summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/pcre2
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/pcre2')
-rw-r--r--src/3rdparty/pcre2/AUTHORS6
-rw-r--r--src/3rdparty/pcre2/LICENCE27
-rw-r--r--src/3rdparty/pcre2/qt_attribution.json6
-rw-r--r--src/3rdparty/pcre2/src/config.h3
-rw-r--r--src/3rdparty/pcre2/src/pcre2.h374
-rw-r--r--src/3rdparty/pcre2/src/pcre2_auto_possess.c26
-rw-r--r--src/3rdparty/pcre2/src/pcre2_compile.c11755
-rw-r--r--src/3rdparty/pcre2/src/pcre2_config.c24
-rw-r--r--src/3rdparty/pcre2/src/pcre2_context.c119
-rw-r--r--src/3rdparty/pcre2/src/pcre2_dfa_match.c441
-rw-r--r--src/3rdparty/pcre2/src/pcre2_error.c41
-rw-r--r--src/3rdparty/pcre2/src/pcre2_find_bracket.c2
-rw-r--r--src/3rdparty/pcre2/src/pcre2_internal.h158
-rw-r--r--src/3rdparty/pcre2/src/pcre2_intmodedep.h180
-rw-r--r--src/3rdparty/pcre2/src/pcre2_jit_compile.c3276
-rw-r--r--src/3rdparty/pcre2/src/pcre2_jit_match.c8
-rw-r--r--src/3rdparty/pcre2/src/pcre2_match.c8429
-rw-r--r--src/3rdparty/pcre2/src/pcre2_match_data.c6
-rw-r--r--src/3rdparty/pcre2/src/pcre2_ord2utf.c2
-rw-r--r--src/3rdparty/pcre2/src/pcre2_pattern_info.c32
-rw-r--r--src/3rdparty/pcre2/src/pcre2_serialize.c5
-rw-r--r--src/3rdparty/pcre2/src/pcre2_study.c192
-rw-r--r--src/3rdparty/pcre2/src/pcre2_substitute.c14
-rw-r--r--src/3rdparty/pcre2/src/pcre2_tables.c448
-rw-r--r--src/3rdparty/pcre2/src/pcre2_ucd.c5447
-rw-r--r--src/3rdparty/pcre2/src/pcre2_ucp.h44
-rw-r--r--src/3rdparty/pcre2/src/pcre2_valid_utf.c14
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitConfig.h14
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h77
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c12
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitLir.c559
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitLir.h639
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeARM_32.c1210
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c224
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c845
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c267
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c270
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c555
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativePPC_32.c115
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativePPC_64.c162
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativePPC_common.c462
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_32.c27
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_common.c152
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX-encoder.c2
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX_64.c38
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c154
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c147
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c774
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitUtils.c69
49 files changed, 20215 insertions, 17638 deletions
diff --git a/src/3rdparty/pcre2/AUTHORS b/src/3rdparty/pcre2/AUTHORS
index d9a0e15690..e056ad6868 100644
--- a/src/3rdparty/pcre2/AUTHORS
+++ b/src/3rdparty/pcre2/AUTHORS
@@ -8,7 +8,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
-Copyright (c) 1997-2016 University of Cambridge
+Copyright (c) 1997-2017 University of Cambridge
All rights reserved
@@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
-Copyright(c) 2010-2016 Zoltan Herczeg
+Copyright(c) 2010-2017 Zoltan Herczeg
All rights reserved.
@@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
-Copyright(c) 2009-2016 Zoltan Herczeg
+Copyright(c) 2009-2017 Zoltan Herczeg
All rights reserved.
####
diff --git a/src/3rdparty/pcre2/LICENCE b/src/3rdparty/pcre2/LICENCE
index 6600a65907..2b34d3f62b 100644
--- a/src/3rdparty/pcre2/LICENCE
+++ b/src/3rdparty/pcre2/LICENCE
@@ -5,9 +5,10 @@ PCRE2 is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as
-specified below. The documentation for PCRE2, supplied in the "doc"
-directory, is distributed under the same terms as the software itself. The data
-in the testdata directory is not copyrighted and is in the public domain.
+specified below, with one exemption for certain binary redistributions. The
+documentation for PCRE2, supplied in the "doc" directory, is distributed under
+the same terms as the software itself. The data in the testdata directory is
+not copyrighted and is in the public domain.
The basic library functions are written in C and are freestanding. Also
included in the distribution is a just-in-time compiler that can be used to
@@ -25,7 +26,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
-Copyright (c) 1997-2016 University of Cambridge
+Copyright (c) 1997-2017 University of Cambridge
All rights reserved.
@@ -36,7 +37,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
-Copyright(c) 2010-2016 Zoltan Herczeg
+Copyright(c) 2010-2017 Zoltan Herczeg
All rights reserved.
@@ -47,7 +48,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
-Copyright(c) 2009-2016 Zoltan Herczeg
+Copyright(c) 2009-2017 Zoltan Herczeg
All rights reserved.
@@ -57,11 +58,11 @@ THE "BSD" LICENCE
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright notice,
+ * Redistributions of source code must retain the above copyright notices,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
+ notices, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of any
@@ -80,4 +81,14 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
+
+EXEMPTION FOR BINARY LIBRARY-LIKE PACKAGES
+------------------------------------------
+
+The second condition in the BSD licence (covering binary redistributions) does
+not apply all the way down a chain of software. If binary package A includes
+PCRE2, it must respect the condition, but if package B is software that
+includes package A, the condition is not imposed on package B unless it uses
+PCRE2 independently.
+
End
diff --git a/src/3rdparty/pcre2/qt_attribution.json b/src/3rdparty/pcre2/qt_attribution.json
index bcf7650993..33c1a58878 100644
--- a/src/3rdparty/pcre2/qt_attribution.json
+++ b/src/3rdparty/pcre2/qt_attribution.json
@@ -6,12 +6,12 @@
"Description": "The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5.",
"Homepage": "http://www.pcre.org/",
- "Version": "10.22",
+ "Version": "10.30",
"License": "BSD 3-clause \"New\" or \"Revised\" License",
"LicenseId": "BSD-3-Clause",
"LicenseFile": "LICENCE",
- "Copyright": "Copyright (c) 1997-2016 University of Cambridge
-Copyright (c) 2009-2016 Zoltan Herczeg
+ "Copyright": "Copyright (c) 1997-2017 University of Cambridge
+Copyright (c) 2009-2017 Zoltan Herczeg
Copyright (c) 2007-2012 Google Inc.
Copyright (c) 2013-2013 Tilera Corporation (jiwang@tilera.com)"
}
diff --git a/src/3rdparty/pcre2/src/config.h b/src/3rdparty/pcre2/src/config.h
index fbebfe6be0..eeade9d9ce 100644
--- a/src/3rdparty/pcre2/src/config.h
+++ b/src/3rdparty/pcre2/src/config.h
@@ -7,8 +7,9 @@
#define HAVE_STRING_H 1
#define LINK_SIZE 2
+#define HEAP_LIMIT 20000000
#define MATCH_LIMIT 10000000
-#define MATCH_LIMIT_RECURSION MATCH_LIMIT
+#define MATCH_LIMIT_DEPTH MATCH_LIMIT
#define MAX_NAME_COUNT 10000
#define MAX_NAME_SIZE 32
#define NEWLINE_DEFAULT 2
diff --git a/src/3rdparty/pcre2/src/pcre2.h b/src/3rdparty/pcre2/src/pcre2.h
index 20d221b803..5a4533909d 100644
--- a/src/3rdparty/pcre2/src/pcre2.h
+++ b/src/3rdparty/pcre2/src/pcre2.h
@@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, second API, to be
#included by applications that call PCRE2 functions.
- Copyright (c) 2016 University of Cambridge
+ Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -36,15 +36,15 @@ POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
-#ifndef _PCRE2_H
-#define _PCRE2_H
+#ifndef PCRE2_H_IDEMPOTENT_GUARD
+#define PCRE2_H_IDEMPOTENT_GUARD
/* The current PCRE version information. */
#define PCRE2_MAJOR 10
-#define PCRE2_MINOR 22
+#define PCRE2_MINOR 30
#define PCRE2_PRERELEASE
-#define PCRE2_DATE 2016-07-29
+#define PCRE2_DATE 2017-08-14
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate
@@ -67,6 +67,20 @@ don't change existing definitions of PCRE2_EXP_DECL. */
# endif
#endif
+/* When compiling with the MSVC compiler, it is sometimes necessary to include
+a "calling convention" before exported function names. (This is secondhand
+information; I know nothing about MSVC myself). For example, something like
+
+ void __cdecl function(....)
+
+might be needed. In order so make this easy, all the exported functions have
+PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
+set, we ensure here that it has no effect. */
+
+#ifndef PCRE2_CALL_CONVENTION
+#define PCRE2_CALL_CONVENTION
+#endif
+
/* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and
uint8_t, UCHAR_MAX, etc are defined. */
@@ -87,6 +101,7 @@ others can be added next to them */
#define PCRE2_ANCHORED 0x80000000u
#define PCRE2_NO_UTF_CHECK 0x40000000u
+#define PCRE2_ENDANCHORED 0x20000000u
/* The following option bits can be passed only to pcre2_compile(). However,
they may affect compilation, JIT compilation, and/or interpretive execution.
@@ -122,6 +137,15 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
+#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
+#define PCRE2_LITERAL 0x02000000u /* C */
+
+/* An additional compile options word is available in the compile context. */
+
+#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */
+#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */
+#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
+#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
/* These are for pcre2_jit_compile(). */
@@ -160,6 +184,16 @@ ignored for pcre2_jit_match(). */
#define PCRE2_NO_JIT 0x00002000u
+/* Options for pcre2_pattern_convert(). */
+
+#define PCRE2_CONVERT_UTF 0x00000001u
+#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u
+#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
+#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
+#define PCRE2_CONVERT_GLOB 0x00000010u
+#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
+#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
+
/* Newline and \R settings, for use in compile contexts. The newline values
must be kept in step with values set in config.h and both sets must all be
greater than zero. */
@@ -169,6 +203,7 @@ greater than zero. */
#define PCRE2_NEWLINE_CRLF 3
#define PCRE2_NEWLINE_ANY 4
#define PCRE2_NEWLINE_ANYCRLF 5
+#define PCRE2_NEWLINE_NUL 6
#define PCRE2_BSR_UNICODE 1
#define PCRE2_BSR_ANYCRLF 2
@@ -242,7 +277,8 @@ numbers must not be changed. */
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
#define PCRE2_ERROR_NULL (-51)
#define PCRE2_ERROR_RECURSELOOP (-52)
-#define PCRE2_ERROR_RECURSIONLIMIT (-53)
+#define PCRE2_ERROR_DEPTHLIMIT (-53)
+#define PCRE2_ERROR_RECURSIONLIMIT (-53) /* Obsolete synonym */
#define PCRE2_ERROR_UNAVAILABLE (-54)
#define PCRE2_ERROR_UNSET (-55)
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
@@ -252,6 +288,9 @@ numbers must not be changed. */
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
+#define PCRE2_ERROR_HEAPLIMIT (-63)
+#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
+
/* Request types for pcre2_pattern_info() */
@@ -276,9 +315,12 @@ numbers must not be changed. */
#define PCRE2_INFO_NAMEENTRYSIZE 18
#define PCRE2_INFO_NAMETABLE 19
#define PCRE2_INFO_NEWLINE 20
-#define PCRE2_INFO_RECURSIONLIMIT 21
+#define PCRE2_INFO_DEPTHLIMIT 21
+#define PCRE2_INFO_RECURSIONLIMIT 21 /* Obsolete synonym */
#define PCRE2_INFO_SIZE 22
#define PCRE2_INFO_HASBACKSLASHC 23
+#define PCRE2_INFO_FRAMESIZE 24
+#define PCRE2_INFO_HEAPLIMIT 25
/* Request types for pcre2_config(). */
@@ -289,11 +331,13 @@ numbers must not be changed. */
#define PCRE2_CONFIG_MATCHLIMIT 4
#define PCRE2_CONFIG_NEWLINE 5
#define PCRE2_CONFIG_PARENSLIMIT 6
-#define PCRE2_CONFIG_RECURSIONLIMIT 7
-#define PCRE2_CONFIG_STACKRECURSE 8
+#define PCRE2_CONFIG_DEPTHLIMIT 7
+#define PCRE2_CONFIG_RECURSIONLIMIT 7 /* Obsolete synonym */
+#define PCRE2_CONFIG_STACKRECURSE 8 /* Obsolete */
#define PCRE2_CONFIG_UNICODE 9
#define PCRE2_CONFIG_UNICODE_VERSION 10
#define PCRE2_CONFIG_VERSION 11
+#define PCRE2_CONFIG_HEAPLIMIT 12
/* Types for code units in patterns and subject strings. */
@@ -328,6 +372,9 @@ typedef struct pcre2_real_compile_context pcre2_compile_context; \
struct pcre2_real_match_context; \
typedef struct pcre2_real_match_context pcre2_match_context; \
\
+struct pcre2_real_convert_context; \
+typedef struct pcre2_real_convert_context pcre2_convert_context; \
+\
struct pcre2_real_code; \
typedef struct pcre2_real_code pcre2_code; \
\
@@ -386,170 +433,220 @@ expanded for each width below. Start with functions that give general
information. */
#define PCRE2_GENERAL_INFO_FUNCTIONS \
-PCRE2_EXP_DECL int pcre2_config(uint32_t, void *);
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *);
/* Functions for manipulating contexts. */
#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
-PCRE2_EXP_DECL \
- pcre2_general_context *pcre2_general_context_copy(pcre2_general_context *); \
-PCRE2_EXP_DECL \
- pcre2_general_context *pcre2_general_context_create( \
- void *(*)(PCRE2_SIZE, void *), \
- void (*)(void *, void *), void *); \
-PCRE2_EXP_DECL void pcre2_general_context_free(pcre2_general_context *);
+PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \
+ *pcre2_general_context_copy(pcre2_general_context *); \
+PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \
+ *pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \
+ void (*)(void *, void *), void *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+ pcre2_general_context_free(pcre2_general_context *);
#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
-PCRE2_EXP_DECL \
- pcre2_compile_context *pcre2_compile_context_copy(pcre2_compile_context *); \
-PCRE2_EXP_DECL \
- pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\
-PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
-PCRE2_EXP_DECL int pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
-PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
- const unsigned char *); \
-PCRE2_EXP_DECL int pcre2_set_max_pattern_length(pcre2_compile_context *, \
- PCRE2_SIZE); \
-PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \
-PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
- uint32_t); \
-PCRE2_EXP_DECL int pcre2_set_compile_recursion_guard(\
- pcre2_compile_context *, int (*)(uint32_t, void *), \
- void *);
+PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \
+ *pcre2_compile_context_copy(pcre2_compile_context *); \
+PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \
+ *pcre2_compile_context_create(pcre2_general_context *);\
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+ pcre2_compile_context_free(pcre2_compile_context *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_character_tables(pcre2_compile_context *, const unsigned char *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_newline(pcre2_compile_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
+ int (*)(uint32_t, void *), void *);
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
-PCRE2_EXP_DECL \
- pcre2_match_context *pcre2_match_context_copy(pcre2_match_context *); \
-PCRE2_EXP_DECL \
- pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \
-PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
-PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
- int (*)(pcre2_callout_block *, void *), void *); \
-PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
- uint32_t); \
-PCRE2_EXP_DECL int pcre2_set_offset_limit(pcre2_match_context *, \
- PCRE2_SIZE); \
-PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \
- uint32_t); \
-PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
- pcre2_match_context *, void *(*)(PCRE2_SIZE, void *), \
- void (*)(void *, void *), void *);
+PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \
+ *pcre2_match_context_copy(pcre2_match_context *); \
+PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \
+ *pcre2_match_context_create(pcre2_general_context *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+ pcre2_match_context_free(pcre2_match_context *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_callout(pcre2_match_context *, \
+ int (*)(pcre2_callout_block *, void *), void *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_recursion_memory_management(pcre2_match_context *, \
+ void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *);
+
+#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
+PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
+ *pcre2_convert_context_copy(pcre2_convert_context *); \
+PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
+ *pcre2_convert_context_create(pcre2_general_context *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+ pcre2_convert_context_free(pcre2_convert_context *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
/* Functions concerned with compiling a pattern to PCRE internal code. */
#define PCRE2_COMPILE_FUNCTIONS \
-PCRE2_EXP_DECL \
- pcre2_code *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, \
- int *, PCRE2_SIZE *, pcre2_compile_context *); \
-PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *); \
-PCRE2_EXP_DECL \
- pcre2_code *pcre2_code_copy(const pcre2_code *);
+PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
+ *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \
+ pcre2_compile_context *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+ pcre2_code_free(pcre2_code *); \
+PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
+ *pcre2_code_copy(const pcre2_code *); \
+PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
+ *pcre2_code_copy_with_tables(const pcre2_code *);
/* Functions that give information about a compiled pattern. */
#define PCRE2_PATTERN_INFO_FUNCTIONS \
-PCRE2_EXP_DECL int pcre2_pattern_info(const pcre2_code *, uint32_t, \
- void *); \
-PCRE2_EXP_DECL int pcre2_callout_enumerate(const pcre2_code *, \
- int (*)(pcre2_callout_enumerate_block *, void *), \
- void *);
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_callout_enumerate(const pcre2_code *, \
+ int (*)(pcre2_callout_enumerate_block *, void *), void *);
/* Functions for running a match and inspecting the result. */
#define PCRE2_MATCH_FUNCTIONS \
-PCRE2_EXP_DECL \
- pcre2_match_data *pcre2_match_data_create(uint32_t, \
- pcre2_general_context *); \
-PCRE2_EXP_DECL \
- pcre2_match_data *pcre2_match_data_create_from_pattern(\
- const pcre2_code *, \
- pcre2_general_context *); \
-PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, \
- PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
- pcre2_match_data *, pcre2_match_context *, int *, \
- PCRE2_SIZE); \
-PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \
- PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
- pcre2_match_data *, pcre2_match_context *); \
-PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \
-PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \
-PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \
-PCRE2_EXP_DECL PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *); \
-PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *);
+PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
+ *pcre2_match_data_create(uint32_t, pcre2_general_context *); \
+PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
+ *pcre2_match_data_create_from_pattern(const pcre2_code *, \
+ pcre2_general_context *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
+ uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
+ uint32_t, pcre2_match_data *, pcre2_match_context *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+ pcre2_match_data_free(pcre2_match_data *); \
+PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
+ pcre2_get_mark(pcre2_match_data *); \
+PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
+ pcre2_get_ovector_count(pcre2_match_data *); \
+PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
+ *pcre2_get_ovector_pointer(pcre2_match_data *); \
+PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
+ pcre2_get_startchar(pcre2_match_data *);
/* Convenience functions for handling matched substrings. */
#define PCRE2_SUBSTRING_FUNCTIONS \
-PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \
- PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \
-PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \
- uint32_t, PCRE2_UCHAR *, PCRE2_SIZE *); \
-PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \
-PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \
- PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \
-PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \
- uint32_t, PCRE2_UCHAR **, PCRE2_SIZE *); \
-PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \
- PCRE2_SPTR, PCRE2_SIZE *); \
-PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \
- uint32_t, PCRE2_SIZE *); \
-PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \
- PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \
-PCRE2_EXP_DECL int pcre2_substring_number_from_name(\
- const pcre2_code *, PCRE2_SPTR); \
-PCRE2_EXP_DECL void pcre2_substring_list_free(PCRE2_SPTR *); \
-PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \
- PCRE2_UCHAR ***, PCRE2_SIZE **);
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \
+ PCRE2_SIZE *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \
+ PCRE2_SIZE *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+ pcre2_substring_free(PCRE2_UCHAR *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \
+ PCRE2_SIZE *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \
+ PCRE2_SIZE *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \
+ PCRE2_SPTR *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+ pcre2_substring_list_free(PCRE2_SPTR *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **);
/* Functions for serializing / deserializing compiled patterns. */
#define PCRE2_SERIALIZE_FUNCTIONS \
-PCRE2_EXP_DECL int32_t pcre2_serialize_encode(const pcre2_code **, \
- int32_t, uint8_t **, PCRE2_SIZE *, \
- pcre2_general_context *); \
-PCRE2_EXP_DECL int32_t pcre2_serialize_decode(pcre2_code **, int32_t, \
- const uint8_t *, pcre2_general_context *); \
-PCRE2_EXP_DECL int32_t pcre2_serialize_get_number_of_codes(const uint8_t *); \
-PCRE2_EXP_DECL void pcre2_serialize_free(uint8_t *);
+PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
+ pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \
+ PCRE2_SIZE *, pcre2_general_context *); \
+PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
+ pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \
+ pcre2_general_context *); \
+PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
+ pcre2_serialize_get_number_of_codes(const uint8_t *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+ pcre2_serialize_free(uint8_t *);
/* Convenience function for match + substitute. */
#define PCRE2_SUBSTITUTE_FUNCTION \
-PCRE2_EXP_DECL int pcre2_substitute(const pcre2_code *, \
- PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
- pcre2_match_data *, pcre2_match_context *, \
- PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, \
- PCRE2_SIZE *);
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
+ uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \
+ PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
+
+
+/* Functions for converting pattern source strings. */
+
+#define PCRE2_CONVERT_FUNCTIONS \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \
+ PCRE2_SIZE *, pcre2_convert_context *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+ pcre2_converted_pattern_free(PCRE2_UCHAR *);
/* Functions for JIT processing */
#define PCRE2_JIT_FUNCTIONS \
-PCRE2_EXP_DECL int pcre2_jit_compile(pcre2_code *, uint32_t); \
-PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \
- PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
- pcre2_match_data *, pcre2_match_context *); \
-PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *); \
-PCRE2_EXP_DECL \
- pcre2_jit_stack *pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, \
- pcre2_general_context *); \
-PCRE2_EXP_DECL void pcre2_jit_stack_assign(pcre2_match_context *, \
- pcre2_jit_callback, void *); \
-PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_jit_stack *);
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_jit_compile(pcre2_code *, uint32_t); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
+ uint32_t, pcre2_match_data *, pcre2_match_context *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+ pcre2_jit_free_unused_memory(pcre2_general_context *); \
+PCRE2_EXP_DECL pcre2_jit_stack PCRE2_CALL_CONVENTION \
+ *pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+ pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+ pcre2_jit_stack_free(pcre2_jit_stack *);
/* Other miscellaneous functions. */
#define PCRE2_OTHER_FUNCTIONS \
-PCRE2_EXP_DECL int pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
-PCRE2_EXP_DECL \
- const uint8_t *pcre2_maketables(pcre2_general_context *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+ pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
+PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \
+ *pcre2_maketables(pcre2_general_context *); \
/* Define macros that generate width-specific names from generic versions. The
@@ -576,6 +673,7 @@ pcre2_compile are called by application code. */
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
+#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_)
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
@@ -587,6 +685,7 @@ pcre2_compile are called by application code. */
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
+#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
@@ -595,12 +694,17 @@ pcre2_compile are called by application code. */
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
+#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
+#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_)
+#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_)
+#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_)
+#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_)
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
@@ -624,6 +728,7 @@ pcre2_compile are called by application code. */
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
+#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
@@ -632,14 +737,17 @@ pcre2_compile are called by application code. */
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
+#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
+#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
+#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
+#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
+#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
-#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
-#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
@@ -653,6 +761,11 @@ pcre2_compile are called by application code. */
#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_)
#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_)
+/* Keep this old function name for backwards compatibility */
+#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
+
+/* Keep this obsolete function for backwards compatibility: it is now a noop. */
+#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
/* Now generate all three sets of width-specific structures and function
prototypes. */
@@ -663,6 +776,8 @@ PCRE2_STRUCTURE_LIST \
PCRE2_GENERAL_INFO_FUNCTIONS \
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
+PCRE2_CONVERT_CONTEXT_FUNCTIONS \
+PCRE2_CONVERT_FUNCTIONS \
PCRE2_MATCH_CONTEXT_FUNCTIONS \
PCRE2_COMPILE_FUNCTIONS \
PCRE2_PATTERN_INFO_FUNCTIONS \
@@ -692,6 +807,7 @@ PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
#undef PCRE2_GENERAL_INFO_FUNCTIONS
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
+#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
#undef PCRE2_COMPILE_FUNCTIONS
#undef PCRE2_PATTERN_INFO_FUNCTIONS
@@ -729,4 +845,6 @@ PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
} /* extern "C" */
#endif
-#endif /* End of pcre2.h */
+#endif /* PCRE2_H_IDEMPOTENT_GUARD */
+
+/* End of pcre2.h */
diff --git a/src/3rdparty/pcre2/src/pcre2_auto_possess.c b/src/3rdparty/pcre2/src/pcre2_auto_possess.c
index 8d0fa896ec..ad3543f627 100644
--- a/src/3rdparty/pcre2/src/pcre2_auto_possess.c
+++ b/src/3rdparty/pcre2/src/pcre2_auto_possess.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -588,7 +588,7 @@ for(;;)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_ONCE_NC:
+
/* Atomic sub-patterns and assertions can always auto-possessify their
last iterator. However, if the group was entered as a result of checking
a previous iterator, this is not possible. */
@@ -600,12 +600,14 @@ for(;;)
continue;
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRA:
case OP_CBRA:
next_code = code + GET(code, 1);
code += PRIV(OP_lengths)[c];
+ /* Check each branch. We have to recurse a level for all but the last
+ branch. */
+
while (*next_code == OP_ALT)
{
if (!compare_opcodes(code, utf, cb, base_list, base_end, rec_limit))
@@ -621,8 +623,8 @@ for(;;)
case OP_BRAMINZERO:
next_code = code + 1;
- if (*next_code != OP_BRA && *next_code != OP_CBRA
- && *next_code != OP_ONCE && *next_code != OP_ONCE_NC) return FALSE;
+ if (*next_code != OP_BRA && *next_code != OP_CBRA &&
+ *next_code != OP_ONCE) return FALSE;
do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
@@ -1046,8 +1048,10 @@ but some compilers complain about an unreachable statement. */
/* Replaces single character iterations with their possessive alternatives
if appropriate. This function modifies the compiled opcode! Hitting a
-non-existant opcode may indicate a bug in PCRE2, but it can also be caused if a
-bad UTF string was compiled with PCRE2_NO_UTF_CHECK.
+non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
+bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
+overly complicated or large patterns. In these cases, the check just stops,
+leaving the remainder of the pattern unpossessified.
Arguments:
code points to start of the byte code
@@ -1061,17 +1065,17 @@ Returns: 0 for success
int
PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
{
-register PCRE2_UCHAR c;
+PCRE2_UCHAR c;
PCRE2_SPTR end;
PCRE2_UCHAR *repeat_opcode;
uint32_t list[8];
-int rec_limit;
+int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */
for (;;)
{
c = *code;
- if (c > OP_TABLE_LENGTH) return -1; /* Something gone wrong */
+ if (c >= OP_TABLE_LENGTH) return -1; /* Something gone wrong */
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
{
@@ -1080,7 +1084,6 @@ for (;;)
get_chr_property_list(code, utf, cb->fcc, list) : NULL;
list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
- rec_limit = 1000;
if (end != NULL && compare_opcodes(end, utf, cb, list, end, &rec_limit))
{
switch(c)
@@ -1137,7 +1140,6 @@ for (;;)
list[1] = (c & 1) == 0;
- rec_limit = 1000;
if (compare_opcodes(end, utf, cb, list, end, &rec_limit))
{
switch (c)
diff --git a/src/3rdparty/pcre2/src/pcre2_compile.c b/src/3rdparty/pcre2/src/pcre2_compile.c
index bb9736cd51..44ee2502c8 100644
--- a/src/3rdparty/pcre2/src/pcre2_compile.c
+++ b/src/3rdparty/pcre2/src/pcre2_compile.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -58,9 +58,14 @@ POSSIBILITY OF SUCH DAMAGE.
#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
#endif
#include "pcre2_printint.c"
-#define CALL_PRINTINT
+#define DEBUG_CALL_PRINTINT
#endif
+/* Other debugging code can be enabled by these defines. */
+
+// #define DEBUG_SHOW_CAPTURES
+// #define DEBUG_SHOW_PARSED
+
/* There are a few things that vary with different code unit sizes. Handle them
by defining macros in order to minimize #if usage. */
@@ -79,16 +84,56 @@ by defining macros in order to minimize #if usage. */
#endif
#endif
+/* Macros to store and retrieve a PCRE2_SIZE value in the parsed pattern, which
+consists of uint32_t elements. Assume that if uint32_t can't hold it, two of
+them will be able to (i.e. assume a 64-bit world). */
+
+#if PCRE2_SIZE_MAX <= UINT32_MAX
+#define PUTOFFSET(s,p) *p++ = s
+#define GETOFFSET(s,p) s = *p++
+#define GETPLUSOFFSET(s,p) s = *(++p)
+#define READPLUSOFFSET(s,p) s = p[1]
+#define SKIPOFFSET(p) p++
+#define SIZEOFFSET 1
+#else
+#define PUTOFFSET(s,p) \
+ { *p++ = (uint32_t)(s >> 32); *p++ = (uint32_t)(s & 0xffffffff); }
+#define GETOFFSET(s,p) \
+ { s = ((PCRE2_SIZE)p[0] << 32) | (PCRE2_SIZE)p[1]; p += 2; }
+#define GETPLUSOFFSET(s,p) \
+ { s = ((PCRE2_SIZE)p[1] << 32) | (PCRE2_SIZE)p[2]; p += 2; }
+#define READPLUSOFFSET(s,p) \
+ { s = ((PCRE2_SIZE)p[1] << 32) | (PCRE2_SIZE)p[2]; }
+#define SKIPOFFSET(p) p += 2
+#define SIZEOFFSET 2
+#endif
+
+/* Macros for manipulating elements of the parsed pattern vector. */
+
+#define META_CODE(x) (x & 0xffff0000u)
+#define META_DATA(x) (x & 0x0000ffffu)
+#define META_DIFF(x,y) ((x-y)>>16)
+
/* Function definitions to allow mutual recursion */
+#ifdef SUPPORT_UNICODE
static unsigned int
- add_list_to_class(uint8_t *, PCRE2_UCHAR **, uint32_t, compile_block *,
- const uint32_t *, unsigned int);
+ add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t,
+ compile_block *, const uint32_t *, unsigned int);
+#endif
+
+static int
+ compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t,
+ uint32_t *, int32_t *, uint32_t *, int32_t *, branch_chain *,
+ compile_block *, PCRE2_SIZE *);
+
+static int
+ get_branchlength(uint32_t **, int *, int *, parsed_recurse_check *,
+ compile_block *);
static BOOL
- compile_regex(uint32_t, PCRE2_UCHAR **, PCRE2_SPTR *, int *, BOOL, BOOL,
- uint32_t, int, uint32_t *, int32_t *, uint32_t *, int32_t *,
- branch_chain *, compile_block *, size_t *);
+ set_lookbehind_lengths(uint32_t **, int *, int *, parsed_recurse_check *,
+ compile_block *);
@@ -96,9 +141,15 @@ static BOOL
* Code parameters and static tables *
*************************************************/
-/* This value specifies the size of stack workspace, which is used in different
-ways in the different pattern scans. The group-identifying pre-scan uses it to
-handle nesting, and needs it to be 16-bit aligned.
+#define MAX_GROUP_NUMBER 65535u
+#define MAX_REPEAT_COUNT 65535u
+#define REPEAT_UNLIMITED (MAX_REPEAT_COUNT+1)
+
+/* COMPILE_WORK_SIZE specifies the size of stack workspace, which is used in
+different ways in the different pattern scans. The parsing and group-
+identifying pre-scan uses it to handle nesting, and needs it to be 16-bit
+aligned for this. Having defined the size in code units, we set up
+C16_WORK_SIZE as the number of elements in the 16-bit vector.
During the first compiling phase, when determining how much memory is required,
the regex is partly compiled into this space, but the compiled parts are
@@ -107,16 +158,18 @@ overrun. The code does, however, check for an overrun, which can occur for
pathological patterns. The size of the workspace depends on LINK_SIZE because
the length of compiled items varies with this.
-In the real compile phase, the workspace is used for remembering data about
-numbered groups, provided there are not too many of them (if there are, extra
-memory is acquired). For this phase the memory must be 32-bit aligned. Having
-defined the size in code units, we set up C32_WORK_SIZE as the number of
-elements in the 32-bit vector. */
+In the real compile phase, this workspace is not currently used. */
-#define COMPILE_WORK_SIZE (2048*LINK_SIZE) /* Size in code units */
+#define COMPILE_WORK_SIZE (3000*LINK_SIZE) /* Size in code units */
-#define C32_WORK_SIZE \
- ((COMPILE_WORK_SIZE * sizeof(PCRE2_UCHAR))/sizeof(uint32_t))
+#define C16_WORK_SIZE \
+ ((COMPILE_WORK_SIZE * sizeof(PCRE2_UCHAR))/sizeof(uint16_t))
+
+/* A uint32_t vector is used for caching information about the size of
+capturing groups, to improve performance. A default is created on the stack of
+this size. */
+
+#define GROUPINFO_DEFAULT_SIZE 256
/* The overrun tests check for a slightly smaller size so that they detect the
overrun before it actually does run off the end of the data block. */
@@ -130,25 +183,176 @@ value is the number of slots in the list. */
#define NAMED_GROUP_LIST_SIZE 20
-/* The original PCRE required patterns to be zero-terminated, and it simplifies
-the compiling code if it is guaranteed that there is a zero code unit at the
-end of the pattern, because this means that tests for coding sequences such as
-(*SKIP) or even just (?<= can check a sequence of code units without having to
-keep checking for the end of the pattern. The new PCRE2 API allows zero code
-units within patterns if a positive length is given, but in order to keep most
-of the compiling code as it was, we copy such patterns and add a zero on the
-end. This value determines the size of space on the stack that is used if the
-pattern fits; if not, heap memory is used. */
+/* The pre-compiling pass over the pattern creates a parsed pattern in a vector
+of uint32_t. For short patterns this lives on the stack, with this size. Heap
+memory is used for longer patterns. */
-#define COPIED_PATTERN_SIZE 1024
+#define PARSED_PATTERN_DEFAULT_SIZE 1024
/* Maximum length value to check against when making sure that the variable
that holds the compiled pattern length does not overflow. We make it a bit less
-than INT_MAX to allow for adding in group terminating bytes, so that we don't
-have to check them every time. */
+than INT_MAX to allow for adding in group terminating code units, so that we
+don't have to check them every time. */
#define OFLOW_MAX (INT_MAX - 20)
+/* Code values for parsed patterns, which are stored in a vector of 32-bit
+unsigned ints. Values less than META_END are literal data values. The coding
+for identifying the item is in the top 16-bits, leaving 16 bits for the
+additional data that some of them need. The META_CODE, META_DATA, and META_DIFF
+macros are used to manipulate parsed pattern elements.
+
+NOTE: When these definitions are changed, the table of extra lengths for each
+code (meta_extra_lengths, just below) must be updated to remain in step. */
+
+#define META_END 0x80000000u /* End of pattern */
+
+#define META_ALT 0x80010000u /* alternation */
+#define META_ATOMIC 0x80020000u /* atomic group */
+#define META_BACKREF 0x80030000u /* Back ref */
+#define META_BACKREF_BYNAME 0x80040000u /* \k'name' */
+#define META_BIGVALUE 0x80050000u /* Next is a literal > META_END */
+#define META_CALLOUT_NUMBER 0x80060000u /* (?C with numerical argument */
+#define META_CALLOUT_STRING 0x80070000u /* (?C with string argument */
+#define META_CAPTURE 0x80080000u /* Capturing parenthesis */
+#define META_CIRCUMFLEX 0x80090000u /* ^ metacharacter */
+#define META_CLASS 0x800a0000u /* start non-empty class */
+#define META_CLASS_EMPTY 0x800b0000u /* empty class */
+#define META_CLASS_EMPTY_NOT 0x800c0000u /* negative empty class */
+#define META_CLASS_END 0x800d0000u /* end of non-empty class */
+#define META_CLASS_NOT 0x800e0000u /* start non-empty negative class */
+#define META_COND_ASSERT 0x800f0000u /* (?(?assertion)... */
+#define META_COND_DEFINE 0x80100000u /* (?(DEFINE)... */
+#define META_COND_NAME 0x80110000u /* (?(<name>)... */
+#define META_COND_NUMBER 0x80120000u /* (?(digits)... */
+#define META_COND_RNAME 0x80130000u /* (?(R&name)... */
+#define META_COND_RNUMBER 0x80140000u /* (?(Rdigits)... */
+#define META_COND_VERSION 0x80150000u /* (?(VERSION<op>x.y)... */
+#define META_DOLLAR 0x80160000u /* $ metacharacter */
+#define META_DOT 0x80170000u /* . metacharacter */
+#define META_ESCAPE 0x80180000u /* \d and friends */
+#define META_KET 0x80190000u /* closing parenthesis */
+#define META_NOCAPTURE 0x801a0000u /* no capture parens */
+#define META_OPTIONS 0x801b0000u /* (?i) and friends */
+#define META_POSIX 0x801c0000u /* POSIX class item */
+#define META_POSIX_NEG 0x801d0000u /* negative POSIX class item */
+#define META_RANGE_ESCAPED 0x801e0000u /* range with at least one escape */
+#define META_RANGE_LITERAL 0x801f0000u /* range defined literally */
+#define META_RECURSE 0x80200000u /* Recursion */
+#define META_RECURSE_BYNAME 0x80210000u /* (?&name) */
+
+/* These must be kept together to make it easy to check that an assertion
+is present where expected in a conditional group. */
+
+#define META_LOOKAHEAD 0x80220000u /* (?= */
+#define META_LOOKAHEADNOT 0x80230000u /* (?! */
+#define META_LOOKBEHIND 0x80240000u /* (?<= */
+#define META_LOOKBEHINDNOT 0x80250000u /* (?<! */
+
+/* These must be kept in this order, with consecutive values, and the _ARG
+versions of PRUNE, SKIP, and THEN immediately after their non-argument
+versions. */
+
+#define META_MARK 0x80260000u /* (*MARK) */
+#define META_ACCEPT 0x80270000u /* (*ACCEPT) */
+#define META_COMMIT 0x80280000u /* (*COMMIT) */
+#define META_FAIL 0x80290000u /* (*FAIL) */
+#define META_PRUNE 0x802a0000u /* These pairs must */
+#define META_PRUNE_ARG 0x802b0000u /* be */
+#define META_SKIP 0x802c0000u /* kept */
+#define META_SKIP_ARG 0x802d0000u /* in */
+#define META_THEN 0x802e0000u /* this */
+#define META_THEN_ARG 0x802f0000u /* order */
+
+/* These must be kept in groups of adjacent 3 values, and all together. */
+
+#define META_ASTERISK 0x80300000u /* * */
+#define META_ASTERISK_PLUS 0x80310000u /* *+ */
+#define META_ASTERISK_QUERY 0x80320000u /* *? */
+#define META_PLUS 0x80330000u /* + */
+#define META_PLUS_PLUS 0x80340000u /* ++ */
+#define META_PLUS_QUERY 0x80350000u /* +? */
+#define META_QUERY 0x80360000u /* ? */
+#define META_QUERY_PLUS 0x80370000u /* ?+ */
+#define META_QUERY_QUERY 0x80380000u /* ?? */
+#define META_MINMAX 0x80390000u /* {n,m} repeat */
+#define META_MINMAX_PLUS 0x803a0000u /* {n,m}+ repeat */
+#define META_MINMAX_QUERY 0x803b0000u /* {n,m}? repeat */
+
+#define META_FIRST_QUANTIFIER META_ASTERISK
+#define META_LAST_QUANTIFIER META_MINMAX_QUERY
+
+/* Table of extra lengths for each of the meta codes. Must be kept in step with
+the definitions above. For some items these values are a basic length to which
+a variable amount has to be added. */
+
+static unsigned char meta_extra_lengths[] = {
+ 0, /* META_END */
+ 0, /* META_ALT */
+ 0, /* META_ATOMIC */
+ 0, /* META_BACKREF - more if group is >= 10 */
+ 1+SIZEOFFSET, /* META_BACKREF_BYNAME */
+ 1, /* META_BIGVALUE */
+ 3, /* META_CALLOUT_NUMBER */
+ 3+SIZEOFFSET, /* META_CALLOUT_STRING */
+ 0, /* META_CAPTURE */
+ 0, /* META_CIRCUMFLEX */
+ 0, /* META_CLASS */
+ 0, /* META_CLASS_EMPTY */
+ 0, /* META_CLASS_EMPTY_NOT */
+ 0, /* META_CLASS_END */
+ 0, /* META_CLASS_NOT */
+ 0, /* META_COND_ASSERT */
+ SIZEOFFSET, /* META_COND_DEFINE */
+ 1+SIZEOFFSET, /* META_COND_NAME */
+ 1+SIZEOFFSET, /* META_COND_NUMBER */
+ 1+SIZEOFFSET, /* META_COND_RNAME */
+ 1+SIZEOFFSET, /* META_COND_RNUMBER */
+ 3, /* META_COND_VERSION */
+ 0, /* META_DOLLAR */
+ 0, /* META_DOT */
+ 0, /* META_ESCAPE - more for ESC_P, ESC_p, ESC_g, ESC_k */
+ 0, /* META_KET */
+ 0, /* META_NOCAPTURE */
+ 1, /* META_OPTIONS */
+ 1, /* META_POSIX */
+ 1, /* META_POSIX_NEG */
+ 0, /* META_RANGE_ESCAPED */
+ 0, /* META_RANGE_LITERAL */
+ SIZEOFFSET, /* META_RECURSE */
+ 1+SIZEOFFSET, /* META_RECURSE_BYNAME */
+ 0, /* META_LOOKAHEAD */
+ 0, /* META_LOOKAHEADNOT */
+ SIZEOFFSET, /* META_LOOKBEHIND */
+ SIZEOFFSET, /* META_LOOKBEHINDNOT */
+ 1, /* META_MARK - plus the string length */
+ 0, /* META_ACCEPT */
+ 0, /* META_COMMIT */
+ 0, /* META_FAIL */
+ 0, /* META_PRUNE */
+ 1, /* META_PRUNE_ARG - plus the string length */
+ 0, /* META_SKIP */
+ 1, /* META_SKIP_ARG - plus the string length */
+ 0, /* META_THEN */
+ 1, /* META_THEN_ARG - plus the string length */
+ 0, /* META_ASTERISK */
+ 0, /* META_ASTERISK_PLUS */
+ 0, /* META_ASTERISK_QUERY */
+ 0, /* META_PLUS */
+ 0, /* META_PLUS_PLUS */
+ 0, /* META_PLUS_QUERY */
+ 0, /* META_QUERY */
+ 0, /* META_QUERY_PLUS */
+ 0, /* META_QUERY_QUERY */
+ 2, /* META_MINMAX */
+ 2, /* META_MINMAX_PLUS */
+ 2 /* META_MINMAX_QUERY */
+};
+
+/* Types for skipping parts of a parsed pattern. */
+
+enum { PSKIP_ALT, PSKIP_CLASS, PSKIP_KET };
+
/* Macro for setting individual bits in class bitmaps. It took some
experimenting to figure out how to stop gcc 5.3.0 from warning with
-Wconversion. This version gets a warning:
@@ -170,17 +374,10 @@ compiler is clever with identical subexpressions. */
/* These flags are used in the groupinfo vector. */
-#define GI_SET_COULD_BE_EMPTY 0x80000000u
-#define GI_COULD_BE_EMPTY 0x40000000u
-#define GI_NOT_FIXED_LENGTH 0x20000000u
-#define GI_SET_FIXED_LENGTH 0x10000000u
+#define GI_SET_FIXED_LENGTH 0x80000000u
+#define GI_NOT_FIXED_LENGTH 0x40000000u
#define GI_FIXED_LENGTH_MASK 0x0000ffffu
-/* This bit (which is greater than any UTF value) is used to indicate that a
-variable contains a number of code units instead of an actual code point. */
-
-#define UTF_LENGTH 0x10000000l
-
/* This simple test for a decimal digit works for both ASCII/Unicode and EBCDIC
and is fast (a good compiler can turn it into a subtraction and unsigned
comparison). */
@@ -191,8 +388,8 @@ comparison). */
locale, and may mark arbitrary characters as digits. We want to recognize only
0-9, a-z, and A-Z as hex digits, which is why we have a private table here. It
costs 256 bytes, but it is a lot faster than doing character value tests (at
-least in some simple cases I timed), and in some applications one wants PCRE to
-compile efficiently as well as match efficiently. The value in the table is
+least in some simple cases I timed), and in some applications one wants PCRE2
+to compile efficiently as well as match efficiently. The value in the table is
the binary hex digit value, or 0xff for non-hex digits. */
/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
@@ -380,9 +577,9 @@ string is built from string macros so that it works in UTF-8 mode on EBCDIC
platforms. */
typedef struct verbitem {
- int len; /* Length of verb name */
- int op; /* Op when no arg, or -1 if arg mandatory */
- int op_arg; /* Op when arg present, or -1 if not allowed */
+ unsigned int len; /* Length of verb name */
+ uint32_t meta; /* Base META_ code */
+ int has_arg; /* Argument requirement */
} verbitem;
static const char verbnames[] =
@@ -397,32 +594,30 @@ static const char verbnames[] =
STRING_THEN;
static const verbitem verbs[] = {
- { 0, -1, OP_MARK },
- { 4, -1, OP_MARK },
- { 6, OP_ACCEPT, -1 },
- { 6, OP_COMMIT, -1 },
- { 1, OP_FAIL, -1 },
- { 4, OP_FAIL, -1 },
- { 5, OP_PRUNE, OP_PRUNE_ARG },
- { 4, OP_SKIP, OP_SKIP_ARG },
- { 4, OP_THEN, OP_THEN_ARG }
+ { 0, META_MARK, +1 }, /* > 0 => must have an argument */
+ { 4, META_MARK, +1 },
+ { 6, META_ACCEPT, -1 }, /* < 0 => must not have an argument */
+ { 6, META_COMMIT, -1 },
+ { 1, META_FAIL, -1 },
+ { 4, META_FAIL, -1 },
+ { 5, META_PRUNE, 0 }, /* Argument is optional; bump META code if found */
+ { 4, META_SKIP, 0 },
+ { 4, META_THEN, 0 }
};
static const int verbcount = sizeof(verbs)/sizeof(verbitem);
+/* Verb opcodes, indexed by their META code offset from META_MARK. */
-/* Substitutes for [[:<:]] and [[:>:]], which mean start and end of word in
-another regex library. */
-
-static const PCRE2_UCHAR sub_start_of_word[] = {
- CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK,
- CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w, CHAR_RIGHT_PARENTHESIS, '\0' };
+static const uint32_t verbops[] = {
+ OP_MARK, OP_ACCEPT, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_PRUNE_ARG, OP_SKIP,
+ OP_SKIP_ARG, OP_THEN, OP_THEN_ARG };
-static const PCRE2_UCHAR sub_end_of_word[] = {
- CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK,
- CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w,
- CHAR_RIGHT_PARENTHESIS, '\0' };
+/* Offsets from OP_STAR for case-independent and negative repeat opcodes. */
+static uint32_t chartypeoffset[] = {
+ OP_STAR - OP_STAR, OP_STARI - OP_STAR,
+ OP_NOTSTAR - OP_STAR, OP_NOTSTARI - OP_STAR };
/* Tables of names of POSIX character classes and their lengths. The names are
now all in a single string, to reduce the number of relocations when a shared
@@ -444,7 +639,6 @@ static const uint8_t posix_name_lengths[] = {
#define PC_PRINT 9
#define PC_PUNCT 10
-
/* Table of class bit maps for each POSIX class. Each class is formed from a
base map, with an optional addition or removal of another map. Then, for some
classes, there is some additional tweaking: for [:blank:] the vertical space
@@ -472,130 +666,53 @@ static const int posix_class_maps[] = {
cbit_xdigit,-1, 0 /* xdigit */
};
-/* Table of substitutes for \d etc when PCRE2_UCP is set. They are replaced by
-Unicode property escapes. */
-
#ifdef SUPPORT_UNICODE
-static const PCRE2_UCHAR string_PNd[] = {
- CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
- CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_pNd[] = {
- CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
- CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_PXsp[] = {
- CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
- CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_pXsp[] = {
- CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
- CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_PXwd[] = {
- CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
- CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_pXwd[] = {
- CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
- CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-
-static PCRE2_SPTR substitutes[] = {
- string_PNd, /* \D */
- string_pNd, /* \d */
- string_PXsp, /* \S */ /* Xsp is Perl space, but from 8.34, Perl */
- string_pXsp, /* \s */ /* space and POSIX space are the same. */
- string_PXwd, /* \W */
- string_pXwd /* \w */
-};
-/* The POSIX class substitutes must be in the order of the POSIX class names,
-defined above, and there are both positive and negative cases. NULL means no
-general substitute of a Unicode property escape (\p or \P). However, for some
-POSIX classes (e.g. graph, print, punct) a special property code is compiled
-directly. */
-
-static const PCRE2_UCHAR string_pCc[] = {
- CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
- CHAR_C, CHAR_c, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_pL[] = {
- CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
- CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_pLl[] = {
- CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
- CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_pLu[] = {
- CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
- CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_pXan[] = {
- CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
- CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_h[] = {
- CHAR_BACKSLASH, CHAR_h, '\0' };
-static const PCRE2_UCHAR string_pXps[] = {
- CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
- CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_PCc[] = {
- CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
- CHAR_C, CHAR_c, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_PL[] = {
- CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
- CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_PLl[] = {
- CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
- CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_PLu[] = {
- CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
- CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_PXan[] = {
- CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
- CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-static const PCRE2_UCHAR string_H[] = {
- CHAR_BACKSLASH, CHAR_H, '\0' };
-static const PCRE2_UCHAR string_PXps[] = {
- CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
- CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
-
-static PCRE2_SPTR posix_substitutes[] = {
- string_pL, /* alpha */
- string_pLl, /* lower */
- string_pLu, /* upper */
- string_pXan, /* alnum */
- NULL, /* ascii */
- string_h, /* blank */
- string_pCc, /* cntrl */
- string_pNd, /* digit */
- NULL, /* graph */
- NULL, /* print */
- NULL, /* punct */
- string_pXps, /* space */ /* Xps is POSIX space, but from 8.34 */
- string_pXwd, /* word */ /* Perl and POSIX space are the same */
- NULL, /* xdigit */
- /* Negated cases */
- string_PL, /* ^alpha */
- string_PLl, /* ^lower */
- string_PLu, /* ^upper */
- string_PXan, /* ^alnum */
- NULL, /* ^ascii */
- string_H, /* ^blank */
- string_PCc, /* ^cntrl */
- string_PNd, /* ^digit */
- NULL, /* ^graph */
- NULL, /* ^print */
- NULL, /* ^punct */
- string_PXps, /* ^space */ /* Xps is POSIX space, but from 8.34 */
- string_PXwd, /* ^word */ /* Perl and POSIX space are the same */
- NULL /* ^xdigit */
+/* The POSIX class Unicode property substitutes that are used in UCP mode must
+be in the order of the POSIX class names, defined above. */
+
+static int posix_substitutes[] = {
+ PT_GC, ucp_L, /* alpha */
+ PT_PC, ucp_Ll, /* lower */
+ PT_PC, ucp_Lu, /* upper */
+ PT_ALNUM, 0, /* alnum */
+ -1, 0, /* ascii, treat as non-UCP */
+ -1, 1, /* blank, treat as \h */
+ PT_PC, ucp_Cc, /* cntrl */
+ PT_PC, ucp_Nd, /* digit */
+ PT_PXGRAPH, 0, /* graph */
+ PT_PXPRINT, 0, /* print */
+ PT_PXPUNCT, 0, /* punct */
+ PT_PXSPACE, 0, /* space */ /* Xps is POSIX space, but from 8.34 */
+ PT_WORD, 0, /* word */ /* Perl and POSIX space are the same */
+ -1, 0 /* xdigit, treat as non-UCP */
};
-#define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(PCRE2_UCHAR *))
+#define POSIX_SUBSIZE (sizeof(posix_substitutes) / (2*sizeof(uint32_t)))
#endif /* SUPPORT_UNICODE */
-/* Masks for checking option settings. */
+/* Masks for checking option settings. When PCRE2_LITERAL is set, only a subset
+are allowed. */
+
+#define PUBLIC_LITERAL_COMPILE_OPTIONS \
+ (PCRE2_ANCHORED|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_ENDANCHORED| \
+ PCRE2_FIRSTLINE|PCRE2_LITERAL|PCRE2_NO_START_OPTIMIZE| \
+ PCRE2_NO_UTF_CHECK|PCRE2_USE_OFFSET_LIMIT|PCRE2_UTF)
#define PUBLIC_COMPILE_OPTIONS \
- (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
- PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
- PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \
- PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
- PCRE2_NEVER_UCP|PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE| \
- PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
- PCRE2_NO_UTF_CHECK|PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
- PCRE2_UTF)
+ (PUBLIC_LITERAL_COMPILE_OPTIONS| \
+ PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
+ PCRE2_ALT_VERBNAMES|PCRE2_DOLLAR_ENDONLY|PCRE2_DOTALL|PCRE2_DUPNAMES| \
+ PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MATCH_UNSET_BACKREF| \
+ PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C|PCRE2_NEVER_UCP| \
+ PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE|PCRE2_NO_AUTO_POSSESS| \
+ PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_UCP|PCRE2_UNGREEDY)
+
+#define PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS \
+ (PCRE2_EXTRA_MATCH_LINE|PCRE2_EXTRA_MATCH_WORD)
+
+#define PUBLIC_COMPILE_EXTRA_OPTIONS \
+ (PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \
+ PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES|PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL)
/* Compile time error code numbers. They are given names so that they can more
easily be tracked. When a new number is added, the tables called eint1 and
@@ -611,21 +728,8 @@ enum { ERR0 = COMPILE_ERROR_BASE,
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
- ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88 };
-
-/* Error codes that correspond to negative error codes returned by
-find_fixedlength(). */
-
-static int fixed_length_errors[] =
- {
- ERR0, /* Not an error */
- ERR0, /* Not an error; -1 is used for "process later" */
- ERR25, /* Lookbehind is not fixed length */
- ERR36, /* \C in lookbehind is not allowed */
- ERR87, /* Lookbehind is too long */
- ERR86, /* Pattern too complicated */
- ERR70 /* Internal error: unknown opcode encountered */
- };
+ ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
+ ERR91, ERR92};
/* This is a table of start-of-pattern options such as (*UTF) and settings such
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@@ -636,8 +740,9 @@ enum { PSO_OPT, /* Value is an option bit */
PSO_FLG, /* Value is a flag bit */
PSO_NL, /* Value is a newline type */
PSO_BSR, /* Value is a \R type */
+ PSO_LIMH, /* Read integer value for heap limit */
PSO_LIMM, /* Read integer value for match limit */
- PSO_LIMR }; /* Read integer value for recursion limit */
+ PSO_LIMD }; /* Read integer value for depth limit */
typedef struct pso {
const uint8_t *name;
@@ -658,12 +763,15 @@ static pso pso_list[] = {
{ (uint8_t *)STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPT, PCRE2_NO_DOTSTAR_ANCHOR },
{ (uint8_t *)STRING_NO_JIT_RIGHTPAR, 7, PSO_FLG, PCRE2_NOJIT },
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
+ { (uint8_t *)STRING_LIMIT_HEAP_EQ, 11, PSO_LIMH, 0 },
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
- { (uint8_t *)STRING_LIMIT_RECURSION_EQ, 16, PSO_LIMR, 0 },
+ { (uint8_t *)STRING_LIMIT_DEPTH_EQ, 12, PSO_LIMD, 0 },
+ { (uint8_t *)STRING_LIMIT_RECURSION_EQ, 16, PSO_LIMD, 0 },
{ (uint8_t *)STRING_CR_RIGHTPAR, 3, PSO_NL, PCRE2_NEWLINE_CR },
{ (uint8_t *)STRING_LF_RIGHTPAR, 3, PSO_NL, PCRE2_NEWLINE_LF },
{ (uint8_t *)STRING_CRLF_RIGHTPAR, 5, PSO_NL, PCRE2_NEWLINE_CRLF },
{ (uint8_t *)STRING_ANY_RIGHTPAR, 4, PSO_NL, PCRE2_NEWLINE_ANY },
+ { (uint8_t *)STRING_NUL_RIGHTPAR, 4, PSO_NL, PCRE2_NEWLINE_NUL },
{ (uint8_t *)STRING_ANYCRLF_RIGHTPAR, 8, PSO_NL, PCRE2_NEWLINE_ANYCRLF },
{ (uint8_t *)STRING_BSR_ANYCRLF_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_ANYCRLF },
{ (uint8_t *)STRING_BSR_UNICODE_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_UNICODE }
@@ -728,1056 +836,535 @@ static const uint8_t opcode_possessify[] = {
};
-
+#ifdef DEBUG_SHOW_PARSED
/*************************************************
-* Copy compiled code *
+* Show the parsed pattern for debugging *
*************************************************/
-/* Compiled JIT code cannot be copied, so the new compiled block has no
-associated JIT data. */
+/* For debugging the pre-scan, this code, which outputs the parsed data vector,
+can be enabled. */
-PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION
-pcre2_code_copy(const pcre2_code *code)
+static void show_parsed(compile_block *cb)
{
-PCRE2_SIZE* ref_count;
-pcre2_code *newcode;
+uint32_t *pptr = cb->parsed_pattern;
-if (code == NULL) return NULL;
-newcode = code->memctl.malloc(code->blocksize, code->memctl.memory_data);
-if (newcode == NULL) return NULL;
-memcpy(newcode, code, code->blocksize);
-newcode->executable_jit = NULL;
-
-/* If the code is one that has been deserialized, increment the reference count
-in the decoded tables. */
-
-if ((code->flags & PCRE2_DEREF_TABLES) != 0)
+for (;;)
{
- ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
- (*ref_count)++;
- }
-
-return newcode;
-}
+ int max, min;
+ PCRE2_SIZE offset;
+ uint32_t i;
+ uint32_t length;
+ uint32_t meta_arg = META_DATA(*pptr);
+ fprintf(stderr, "+++ %02d %.8x ", (int)(pptr - cb->parsed_pattern), *pptr);
-
-/*************************************************
-* Free compiled code *
-*************************************************/
-
-PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
-pcre2_code_free(pcre2_code *code)
-{
-PCRE2_SIZE* ref_count;
-
-if (code != NULL)
- {
- if (code->executable_jit != NULL)
- PRIV(jit_free)(code->executable_jit, &code->memctl);
-
- if ((code->flags & PCRE2_DEREF_TABLES) != 0)
+ if (*pptr < META_END)
{
- /* Decoded tables belong to the codes after deserialization, and they must
- be freed when there are no more reference to them. The *ref_count should
- always be > 0. */
-
- ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
- if (*ref_count > 0)
- {
- (*ref_count)--;
- if (*ref_count == 0)
- code->memctl.free((void *)code->tables, code->memctl.memory_data);
- }
+ if (*pptr > 32 && *pptr < 128) fprintf(stderr, "%c", *pptr);
+ pptr++;
}
- code->memctl.free(code, code->memctl.memory_data);
- }
-}
-
-
-
-/*************************************************
-* Insert an automatic callout point *
-*************************************************/
-
-/* This function is called when the PCRE2_AUTO_CALLOUT option is set, to insert
-callout points before each pattern item.
-
-Arguments:
- code current code pointer
- ptr current pattern pointer
- cb general compile-time data
-
-Returns: new code pointer
-*/
-
-static PCRE2_UCHAR *
-auto_callout(PCRE2_UCHAR *code, PCRE2_SPTR ptr, compile_block *cb)
-{
-code[0] = OP_CALLOUT;
-PUT(code, 1, ptr - cb->start_pattern); /* Pattern offset */
-PUT(code, 1 + LINK_SIZE, 0); /* Default length */
-code[1 + 2*LINK_SIZE] = 255;
-return code + PRIV(OP_lengths)[OP_CALLOUT];
-}
-
-
-
-/*************************************************
-* Complete a callout item *
-*************************************************/
-
-/* A callout item contains the length of the next item in the pattern, which
-we can't fill in till after we have reached the relevant point. This is used
-for both automatic and manual callouts.
-
-Arguments:
- previous_callout points to previous callout item
- ptr current pattern pointer
- cb general compile-time data
-
-Returns: nothing
-*/
-
-static void
-complete_callout(PCRE2_UCHAR *previous_callout, PCRE2_SPTR ptr,
- compile_block *cb)
-{
-size_t length = (size_t)(ptr - cb->start_pattern - GET(previous_callout, 1));
-PUT(previous_callout, 1 + LINK_SIZE, length);
-}
-
-
-
-/*************************************************
-* Find the fixed length of a branch *
-*************************************************/
-
-/* Scan a branch and compute the fixed length of subject that will match it, if
-the length is fixed. This is needed for dealing with lookbehind assertions. In
-UTF mode, the result is in code units rather than bytes. The branch is
-temporarily terminated with OP_END when this function is called.
-
-This function is called when a lookbehind assertion is encountered, so that if
-it fails, the error message can point to the correct place in the pattern.
-However, we cannot do this when the assertion contains subroutine calls,
-because they can be forward references. We solve this by remembering this case
-and doing the check at the end; a flag specifies which mode we are running in.
-
-Lookbehind lengths are held in 16-bit fields and the maximum value is defined
-as LOOKBEHIND_MAX.
-
-Arguments:
- code points to the start of the pattern (the bracket)
- utf TRUE in UTF mode
- atend TRUE if called when the pattern is complete
- cb the "compile data" structure
- recurses chain of recurse_check to catch mutual recursion
- countptr pointer to counter, to catch over-complexity
-
-Returns: if non-negative, the fixed length,
- or -1 if an OP_RECURSE item was encountered and atend is FALSE
- or -2 if there is no fixed length,
- or -3 if \C was encountered (in UTF mode only)
- or -4 if length is too long
- or -5 if regex is too complicated
- or -6 if an unknown opcode was encountered (internal error)
-*/
-
-#define FFL_LATER (-1)
-#define FFL_NOTFIXED (-2)
-#define FFL_BACKSLASHC (-3)
-#define FFL_TOOLONG (-4)
-#define FFL_TOOCOMPLICATED (-5)
-#define FFL_UNKNOWNOP (-6)
-
-static int
-find_fixedlength(PCRE2_UCHAR *code, BOOL utf, BOOL atend, compile_block *cb,
- recurse_check *recurses, int *countptr)
-{
-uint32_t length = 0xffffffffu; /* Unset */
-uint32_t group = 0;
-uint32_t groupinfo = 0;
-recurse_check this_recurse;
-register uint32_t branchlength = 0;
-register PCRE2_UCHAR *cc = code + 1 + LINK_SIZE;
-
-/* If this is a capturing group, we may have the answer cached, but we can only
-use this information if there are no (?| groups in the pattern, because
-otherwise group numbers are not unique. */
-
-if (*code == OP_CBRA || *code == OP_CBRAPOS || *code == OP_SCBRA ||
- *code == OP_SCBRAPOS)
- {
- group = GET2(cc, 0);
- cc += IMM2_SIZE;
- groupinfo = cb->groupinfo[group];
- if ((cb->external_flags & PCRE2_DUPCAPUSED) == 0)
+ else switch (META_CODE(*pptr++))
{
- if ((groupinfo & GI_NOT_FIXED_LENGTH) != 0) return FFL_NOTFIXED;
- if ((groupinfo & GI_SET_FIXED_LENGTH) != 0)
- return groupinfo & GI_FIXED_LENGTH_MASK;
- }
- }
-
-/* A large and/or complex regex can take too long to process. This can happen
-more often when (?| groups are present in the pattern. */
-
-if ((*countptr)++ > 2000) return FFL_TOOCOMPLICATED;
+ default:
+ fprintf(stderr, "**** OOPS - unknown META value - giving up ****\n");
+ return;
-/* Scan along the opcodes for this branch. If we get to the end of the
-branch, check the length against that of the other branches. */
+ case META_END:
+ fprintf(stderr, "META_END\n");
+ return;
-for (;;)
- {
- int d;
- PCRE2_UCHAR *ce, *cs;
- register PCRE2_UCHAR op = *cc;
+ case META_CAPTURE:
+ fprintf(stderr, "META_CAPTURE %d", meta_arg);
+ break;
- if (branchlength > LOOKBEHIND_MAX) return FFL_TOOLONG;
+ case META_RECURSE:
+ GETOFFSET(offset, pptr);
+ fprintf(stderr, "META_RECURSE %d %zd", meta_arg, offset);
+ break;
- switch (op)
- {
- /* We only need to continue for OP_CBRA (normal capturing bracket) and
- OP_BRA (normal non-capturing bracket) because the other variants of these
- opcodes are all concerned with unlimited repeated groups, which of course
- are not of fixed length. */
-
- case OP_CBRA:
- case OP_BRA:
- case OP_ONCE:
- case OP_ONCE_NC:
- case OP_COND:
- d = find_fixedlength(cc, utf, atend, cb, recurses, countptr);
- if (d < 0) return d;
- branchlength += (uint32_t)d;
- do cc += GET(cc, 1); while (*cc == OP_ALT);
- cc += 1 + LINK_SIZE;
+ case META_BACKREF:
+ if (meta_arg < 10)
+ offset = cb->small_ref_offset[meta_arg];
+ else
+ GETOFFSET(offset, pptr);
+ fprintf(stderr, "META_BACKREF %d %zd", meta_arg, offset);
break;
- /* Reached end of a branch; if it's a ket it is the end of a nested call.
- If it's ALT it is an alternation in a nested call. An ACCEPT is effectively
- an ALT. If it is END it's the end of the outer call. All can be handled by
- the same code. Note that we must not include the OP_KETRxxx opcodes here,
- because they all imply an unlimited repeat. */
-
- case OP_ALT:
- case OP_KET:
- case OP_END:
- case OP_ACCEPT:
- case OP_ASSERT_ACCEPT:
- if (length == 0xffffffffu) length = branchlength;
- else if (length != branchlength) goto ISNOTFIXED;
- if (*cc != OP_ALT)
+ case META_ESCAPE:
+ if (meta_arg == ESC_P || meta_arg == ESC_p)
+ {
+ uint32_t ptype = *pptr >> 16;
+ uint32_t pvalue = *pptr++ & 0xffff;
+ fprintf(stderr, "META \\%c %d %d", (meta_arg == ESC_P)? 'P':'p',
+ ptype, pvalue);
+ }
+ else
{
- if (group > 0)
+ uint32_t cc;
+ /* There's just one escape we might have here that isn't negated in the
+ escapes table. */
+ if (meta_arg == ESC_g) cc = CHAR_g;
+ else for (cc = ESCAPES_FIRST; cc <= ESCAPES_LAST; cc++)
{
- groupinfo |= (uint32_t)(GI_SET_FIXED_LENGTH | length);
- cb->groupinfo[group] = groupinfo;
+ if (meta_arg == (uint32_t)(-escapes[cc - ESCAPES_FIRST])) break;
}
- return (int)length;
+ if (cc > ESCAPES_LAST) cc = CHAR_QUESTION_MARK;
+ fprintf(stderr, "META \\%c", cc);
}
- cc += 1 + LINK_SIZE;
- branchlength = 0;
break;
- /* A true recursion implies not fixed length, but a subroutine call may
- be OK. If the subroutine is a forward reference, we can't deal with
- it until the end of the pattern, so return FFL_LATER. */
-
- case OP_RECURSE:
- if (!atend) return FFL_LATER;
- cs = ce = (PCRE2_UCHAR *)cb->start_code + GET(cc, 1); /* Start subpattern */
- do ce += GET(ce, 1); while (*ce == OP_ALT); /* End subpattern */
- if (cc > cs && cc < ce) goto ISNOTFIXED; /* Recursion */
- else /* Check for mutual recursion */
- {
- recurse_check *r = recurses;
- for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
- if (r != NULL) goto ISNOTFIXED; /* Mutual recursion */
- }
- this_recurse.prev = recurses;
- this_recurse.group = cs;
- d = find_fixedlength(cs, utf, atend, cb, &this_recurse, countptr);
- if (d < 0) return d;
- branchlength += (uint32_t)d;
- cc += 1 + LINK_SIZE;
+ case META_MINMAX:
+ min = *pptr++;
+ max = *pptr++;
+ if (max != REPEAT_UNLIMITED)
+ fprintf(stderr, "META {%d,%d}", min, max);
+ else
+ fprintf(stderr, "META {%d,}", min);
break;
- /* Skip over assertive subpatterns. Note that we must increment cc by
- 1 + LINK_SIZE at the end, not by OP_length[*cc] because in a recursive
- situation this assertion may be the one that is ultimately being checked
- for having a fixed length, in which case its terminating OP_KET will have
- been temporarily replaced by OP_END. */
-
- case OP_ASSERT:
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK:
- case OP_ASSERTBACK_NOT:
- do cc += GET(cc, 1); while (*cc == OP_ALT);
- cc += 1 + LINK_SIZE;
+ case META_MINMAX_QUERY:
+ min = *pptr++;
+ max = *pptr++;
+ if (max != REPEAT_UNLIMITED)
+ fprintf(stderr, "META {%d,%d}?", min, max);
+ else
+ fprintf(stderr, "META {%d,}?", min);
break;
- /* Skip over things that don't match chars */
-
- case OP_MARK:
- case OP_PRUNE_ARG:
- case OP_SKIP_ARG:
- case OP_THEN_ARG:
- cc += cc[1] + PRIV(OP_lengths)[*cc];
+ case META_MINMAX_PLUS:
+ min = *pptr++;
+ max = *pptr++;
+ if (max != REPEAT_UNLIMITED)
+ fprintf(stderr, "META {%d,%d}+", min, max);
+ else
+ fprintf(stderr, "META {%d,}+", min);
break;
- case OP_CALLOUT:
- case OP_CIRC:
- case OP_CIRCM:
- case OP_CLOSE:
- case OP_COMMIT:
- case OP_CREF:
- case OP_FALSE:
- case OP_TRUE:
- case OP_DNCREF:
- case OP_DNRREF:
- case OP_DOLL:
- case OP_DOLLM:
- case OP_EOD:
- case OP_EODN:
- case OP_FAIL:
- case OP_NOT_WORD_BOUNDARY:
- case OP_PRUNE:
- case OP_REVERSE:
- case OP_RREF:
- case OP_SET_SOM:
- case OP_SKIP:
- case OP_SOD:
- case OP_SOM:
- case OP_THEN:
- case OP_WORD_BOUNDARY:
- cc += PRIV(OP_lengths)[*cc];
+ case META_BIGVALUE: fprintf(stderr, "META_BIGVALUE %.8x", *pptr++); break;
+ case META_CIRCUMFLEX: fprintf(stderr, "META_CIRCUMFLEX"); break;
+ case META_COND_ASSERT: fprintf(stderr, "META_COND_ASSERT"); break;
+ case META_DOLLAR: fprintf(stderr, "META_DOLLAR"); break;
+ case META_DOT: fprintf(stderr, "META_DOT"); break;
+ case META_ASTERISK: fprintf(stderr, "META *"); break;
+ case META_ASTERISK_QUERY: fprintf(stderr, "META *?"); break;
+ case META_ASTERISK_PLUS: fprintf(stderr, "META *+"); break;
+ case META_PLUS: fprintf(stderr, "META +"); break;
+ case META_PLUS_QUERY: fprintf(stderr, "META +?"); break;
+ case META_PLUS_PLUS: fprintf(stderr, "META ++"); break;
+ case META_QUERY: fprintf(stderr, "META ?"); break;
+ case META_QUERY_QUERY: fprintf(stderr, "META ??"); break;
+ case META_QUERY_PLUS: fprintf(stderr, "META ?+"); break;
+
+ case META_ATOMIC: fprintf(stderr, "META (?>"); break;
+ case META_NOCAPTURE: fprintf(stderr, "META (?:"); break;
+ case META_LOOKAHEAD: fprintf(stderr, "META (?="); break;
+ case META_LOOKAHEADNOT: fprintf(stderr, "META (?!"); break;
+ case META_KET: fprintf(stderr, "META )"); break;
+ case META_ALT: fprintf(stderr, "META | %d", meta_arg); break;
+
+ case META_CLASS: fprintf(stderr, "META ["); break;
+ case META_CLASS_NOT: fprintf(stderr, "META [^"); break;
+ case META_CLASS_END: fprintf(stderr, "META ]"); break;
+ case META_CLASS_EMPTY: fprintf(stderr, "META []"); break;
+ case META_CLASS_EMPTY_NOT: fprintf(stderr, "META [^]"); break;
+
+ case META_RANGE_LITERAL: fprintf(stderr, "META - (literal)"); break;
+ case META_RANGE_ESCAPED: fprintf(stderr, "META - (escaped)"); break;
+
+ case META_POSIX: fprintf(stderr, "META_POSIX %d", *pptr++); break;
+ case META_POSIX_NEG: fprintf(stderr, "META_POSIX_NEG %d", *pptr++); break;
+
+ case META_ACCEPT: fprintf(stderr, "META (*ACCEPT)"); break;
+ case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break;
+ case META_FAIL: fprintf(stderr, "META (*FAIL)"); break;
+ case META_PRUNE: fprintf(stderr, "META (*PRUNE)"); break;
+ case META_SKIP: fprintf(stderr, "META (*SKIP)"); break;
+ case META_THEN: fprintf(stderr, "META (*THEN)"); break;
+
+ case META_OPTIONS: fprintf(stderr, "META_OPTIONS 0x%02x", *pptr++); break;
+
+ case META_LOOKBEHIND:
+ fprintf(stderr, "META (?<= %d offset=", meta_arg);
+ GETOFFSET(offset, pptr);
+ fprintf(stderr, "%zd", offset);
break;
- case OP_CALLOUT_STR:
- cc += GET(cc, 1 + 2*LINK_SIZE);
+ case META_LOOKBEHINDNOT:
+ fprintf(stderr, "META (?<! %d offset=", meta_arg);
+ GETOFFSET(offset, pptr);
+ fprintf(stderr, "%zd", offset);
break;
- /* Handle literal characters */
+ case META_CALLOUT_NUMBER:
+ fprintf(stderr, "META (?C%d) next=%d/%d", pptr[2], pptr[0],
+ pptr[1]);
+ pptr += 3;
+ break;
- case OP_CHAR:
- case OP_CHARI:
- case OP_NOT:
- case OP_NOTI:
- branchlength++;
- cc += 2;
-#ifdef SUPPORT_UNICODE
- if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
-#endif
+ case META_CALLOUT_STRING:
+ {
+ uint32_t patoffset = *pptr++; /* Offset of next pattern item */
+ uint32_t patlength = *pptr++; /* Length of next pattern item */
+ fprintf(stderr, "META (?Cstring) length=%d offset=", *pptr++);
+ GETOFFSET(offset, pptr);
+ fprintf(stderr, "%zd next=%d/%d", offset, patoffset, patlength);
+ }
break;
- /* Handle exact repetitions. The count is already in characters, but we
- need to skip over a multibyte character in UTF8 mode. */
+ case META_RECURSE_BYNAME:
+ fprintf(stderr, "META (?(&name) length=%d offset=", *pptr++);
+ GETOFFSET(offset, pptr);
+ fprintf(stderr, "%zd", offset);
+ break;
- case OP_EXACT:
- case OP_EXACTI:
- case OP_NOTEXACT:
- case OP_NOTEXACTI:
- branchlength += GET2(cc,1);
- cc += 2 + IMM2_SIZE;
-#ifdef SUPPORT_UNICODE
- if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
-#endif
+ case META_BACKREF_BYNAME:
+ fprintf(stderr, "META_BACKREF_BYNAME length=%d offset=", *pptr++);
+ GETOFFSET(offset, pptr);
+ fprintf(stderr, "%zd", offset);
break;
- case OP_TYPEEXACT:
- branchlength += GET2(cc,1);
- if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP)
- cc += 2;
- cc += 1 + IMM2_SIZE + 1;
+ case META_COND_NUMBER:
+ fprintf(stderr, "META_COND_NUMBER %d offset=", pptr[SIZEOFFSET]);
+ GETOFFSET(offset, pptr);
+ fprintf(stderr, "%zd", offset);
+ pptr++;
break;
- /* Handle single-char matchers */
+ case META_COND_DEFINE:
+ fprintf(stderr, "META (?(DEFINE) offset=");
+ GETOFFSET(offset, pptr);
+ fprintf(stderr, "%zd", offset);
+ break;
- case OP_PROP:
- case OP_NOTPROP:
- cc += 2;
- /* Fall through */
+ case META_COND_VERSION:
+ fprintf(stderr, "META (?(VERSION%s", (*pptr++ == 0)? "=" : ">=");
+ fprintf(stderr, "%d.", *pptr++);
+ fprintf(stderr, "%d)", *pptr++);
+ break;
- case OP_HSPACE:
- case OP_VSPACE:
- case OP_NOT_HSPACE:
- case OP_NOT_VSPACE:
- case OP_NOT_DIGIT:
- case OP_DIGIT:
- case OP_NOT_WHITESPACE:
- case OP_WHITESPACE:
- case OP_NOT_WORDCHAR:
- case OP_WORDCHAR:
- case OP_ANY:
- case OP_ALLANY:
- branchlength++;
- cc++;
+ case META_COND_NAME:
+ fprintf(stderr, "META (?(<name>) length=%d offset=", *pptr++);
+ GETOFFSET(offset, pptr);
+ fprintf(stderr, "%zd", offset);
break;
- /* The single-byte matcher isn't allowed. This only happens in UTF-8 or
- UTF-16 mode; otherwise \C is coded as OP_ALLANY. */
+ case META_COND_RNAME:
+ fprintf(stderr, "META (?(R&name) length=%d offset=", *pptr++);
+ GETOFFSET(offset, pptr);
+ fprintf(stderr, "%zd", offset);
+ break;
- case OP_ANYBYTE:
- return FFL_BACKSLASHC;
+ /* This is kept as a name, because it might be. */
- /* Check a class for variable quantification */
+ case META_COND_RNUMBER:
+ fprintf(stderr, "META (?(Rnumber) length=%d offset=", *pptr++);
+ GETOFFSET(offset, pptr);
+ fprintf(stderr, "%zd", offset);
+ break;
- case OP_CLASS:
- case OP_NCLASS:
-#ifdef SUPPORT_WIDE_CHARS
- case OP_XCLASS:
- /* The original code caused an unsigned overflow in 64 bit systems,
- so now we use a conditional statement. */
- if (op == OP_XCLASS)
- cc += GET(cc, 1);
- else
- cc += PRIV(OP_lengths)[OP_CLASS];
-#else
- cc += PRIV(OP_lengths)[OP_CLASS];
-#endif
+ case META_MARK:
+ fprintf(stderr, "META (*MARK:");
+ goto SHOWARG;
- switch (*cc)
- {
- case OP_CRSTAR:
- case OP_CRMINSTAR:
- case OP_CRPLUS:
- case OP_CRMINPLUS:
- case OP_CRQUERY:
- case OP_CRMINQUERY:
- case OP_CRPOSSTAR:
- case OP_CRPOSPLUS:
- case OP_CRPOSQUERY:
- goto ISNOTFIXED;
+ case META_PRUNE_ARG:
+ fprintf(stderr, "META (*PRUNE:");
+ goto SHOWARG;
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- case OP_CRPOSRANGE:
- if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) goto ISNOTFIXED;
- branchlength += GET2(cc,1);
- cc += 1 + 2 * IMM2_SIZE;
- break;
+ case META_SKIP_ARG:
+ fprintf(stderr, "META (*SKIP:");
+ goto SHOWARG;
- default:
- branchlength++;
+ case META_THEN_ARG:
+ fprintf(stderr, "META (*THEN:");
+ SHOWARG:
+ length = *pptr++;
+ for (i = 0; i < length; i++)
+ {
+ uint32_t cc = *pptr++;
+ if (cc > 32 && cc < 128) fprintf(stderr, "%c", cc);
+ else fprintf(stderr, "\\x{%x}", cc);
}
+ fprintf(stderr, ") length=%u", length);
break;
-
- /* Anything else is variable length */
-
- case OP_ANYNL:
- case OP_BRAMINZERO:
- case OP_BRAPOS:
- case OP_BRAPOSZERO:
- case OP_BRAZERO:
- case OP_CBRAPOS:
- case OP_EXTUNI:
- case OP_KETRMAX:
- case OP_KETRMIN:
- case OP_KETRPOS:
- case OP_MINPLUS:
- case OP_MINPLUSI:
- case OP_MINQUERY:
- case OP_MINQUERYI:
- case OP_MINSTAR:
- case OP_MINSTARI:
- case OP_MINUPTO:
- case OP_MINUPTOI:
- case OP_NOTMINPLUS:
- case OP_NOTMINPLUSI:
- case OP_NOTMINQUERY:
- case OP_NOTMINQUERYI:
- case OP_NOTMINSTAR:
- case OP_NOTMINSTARI:
- case OP_NOTMINUPTO:
- case OP_NOTMINUPTOI:
- case OP_NOTPLUS:
- case OP_NOTPLUSI:
- case OP_NOTPOSPLUS:
- case OP_NOTPOSPLUSI:
- case OP_NOTPOSQUERY:
- case OP_NOTPOSQUERYI:
- case OP_NOTPOSSTAR:
- case OP_NOTPOSSTARI:
- case OP_NOTPOSUPTO:
- case OP_NOTPOSUPTOI:
- case OP_NOTQUERY:
- case OP_NOTQUERYI:
- case OP_NOTSTAR:
- case OP_NOTSTARI:
- case OP_NOTUPTO:
- case OP_NOTUPTOI:
- case OP_PLUS:
- case OP_PLUSI:
- case OP_POSPLUS:
- case OP_POSPLUSI:
- case OP_POSQUERY:
- case OP_POSQUERYI:
- case OP_POSSTAR:
- case OP_POSSTARI:
- case OP_POSUPTO:
- case OP_POSUPTOI:
- case OP_QUERY:
- case OP_QUERYI:
- case OP_REF:
- case OP_REFI:
- case OP_DNREF:
- case OP_DNREFI:
- case OP_SBRA:
- case OP_SBRAPOS:
- case OP_SCBRA:
- case OP_SCBRAPOS:
- case OP_SCOND:
- case OP_SKIPZERO:
- case OP_STAR:
- case OP_STARI:
- case OP_TYPEMINPLUS:
- case OP_TYPEMINQUERY:
- case OP_TYPEMINSTAR:
- case OP_TYPEMINUPTO:
- case OP_TYPEPLUS:
- case OP_TYPEPOSPLUS:
- case OP_TYPEPOSQUERY:
- case OP_TYPEPOSSTAR:
- case OP_TYPEPOSUPTO:
- case OP_TYPEQUERY:
- case OP_TYPESTAR:
- case OP_TYPEUPTO:
- case OP_UPTO:
- case OP_UPTOI:
- goto ISNOTFIXED;
-
- /* Catch unrecognized opcodes so that when new ones are added they
- are not forgotten, as has happened in the past. */
-
- default:
- return FFL_UNKNOWNOP;
}
+ fprintf(stderr, "\n");
}
-/* Control never gets here except by goto. */
-
-ISNOTFIXED:
-if (group > 0)
- {
- groupinfo |= GI_NOT_FIXED_LENGTH;
- cb->groupinfo[group] = groupinfo;
- }
-return FFL_NOTFIXED;
+return;
}
+#endif /* DEBUG_SHOW_PARSED */
/*************************************************
-* Find first significant op code *
+* Copy compiled code *
*************************************************/
-/* This is called by several functions that scan a compiled expression looking
-for a fixed first character, or an anchoring op code etc. It skips over things
-that do not influence this. For some calls, it makes sense to skip negative
-forward and all backward assertions, and also the \b assertion; for others it
-does not.
-
-Arguments:
- code pointer to the start of the group
- skipassert TRUE if certain assertions are to be skipped
-
-Returns: pointer to the first significant opcode
-*/
+/* Compiled JIT code cannot be copied, so the new compiled block has no
+associated JIT data. */
-static const PCRE2_UCHAR*
-first_significant_code(PCRE2_SPTR code, BOOL skipassert)
+PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION
+pcre2_code_copy(const pcre2_code *code)
{
-for (;;)
- {
- switch ((int)*code)
- {
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK:
- case OP_ASSERTBACK_NOT:
- if (!skipassert) return code;
- do code += GET(code, 1); while (*code == OP_ALT);
- code += PRIV(OP_lengths)[*code];
- break;
-
- case OP_WORD_BOUNDARY:
- case OP_NOT_WORD_BOUNDARY:
- if (!skipassert) return code;
- /* Fall through */
+PCRE2_SIZE* ref_count;
+pcre2_code *newcode;
- case OP_CALLOUT:
- case OP_CREF:
- case OP_DNCREF:
- case OP_RREF:
- case OP_DNRREF:
- case OP_FALSE:
- case OP_TRUE:
- code += PRIV(OP_lengths)[*code];
- break;
+if (code == NULL) return NULL;
+newcode = code->memctl.malloc(code->blocksize, code->memctl.memory_data);
+if (newcode == NULL) return NULL;
+memcpy(newcode, code, code->blocksize);
+newcode->executable_jit = NULL;
- case OP_CALLOUT_STR:
- code += GET(code, 1 + 2*LINK_SIZE);
- break;
+/* If the code is one that has been deserialized, increment the reference count
+in the decoded tables. */
- default:
- return code;
- }
+if ((code->flags & PCRE2_DEREF_TABLES) != 0)
+ {
+ ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
+ (*ref_count)++;
}
-/* Control never reaches here */
+
+return newcode;
}
/*************************************************
-* Scan compiled branch for non-emptiness *
+* Copy compiled code and character tables *
*************************************************/
-/* This function scans through a branch of a compiled pattern to see whether it
-can match the empty string. It is called at the end of compiling to check the
-entire pattern, and from compile_branch() when checking for an unlimited repeat
-of a group that can match nothing. In the latter case it is called only when
-doing the real compile, not during the pre-compile that measures the size of
-the compiled pattern.
-
-Note that first_significant_code() skips over backward and negative forward
-assertions when its final argument is TRUE. If we hit an unclosed bracket, we
-return "empty" - this means we've struck an inner bracket whose current branch
-will already have been scanned.
-
-Arguments:
- code points to start of search
- endcode points to where to stop
- utf TRUE if in UTF mode
- cb compile data
- atend TRUE if being called to check an entire pattern
- recurses chain of recurse_check to catch mutual recursion
- countptr pointer to count to catch over-complicated pattern
-
-Returns: 0 if what is matched cannot be empty
- 1 if what is matched could be empty
- -1 if the pattern is too complicated
-*/
-
-#define CBE_NOTEMPTY 0
-#define CBE_EMPTY 1
-#define CBE_TOOCOMPLICATED (-1)
-
+/* Compiled JIT code cannot be copied, so the new compiled block has no
+associated JIT data. This version of code_copy also makes a separate copy of
+the character tables. */
-static int
-could_be_empty_branch(PCRE2_SPTR code, PCRE2_SPTR endcode, BOOL utf,
- compile_block *cb, BOOL atend, recurse_check *recurses, int *countptr)
+PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION
+pcre2_code_copy_with_tables(const pcre2_code *code)
{
-uint32_t group = 0;
-uint32_t groupinfo = 0;
-register PCRE2_UCHAR c;
-recurse_check this_recurse;
-
-/* If what we are checking has already been set as "could be empty", we know
-the answer. */
-
-if (*code >= OP_SBRA && *code <= OP_SCOND) return CBE_EMPTY;
+PCRE2_SIZE* ref_count;
+pcre2_code *newcode;
+uint8_t *newtables;
-/* If this is a capturing group, we may have the answer cached, but we can only
-use this information if there are no (?| groups in the pattern, because
-otherwise group numbers are not unique. */
+if (code == NULL) return NULL;
+newcode = code->memctl.malloc(code->blocksize, code->memctl.memory_data);
+if (newcode == NULL) return NULL;
+memcpy(newcode, code, code->blocksize);
+newcode->executable_jit = NULL;
-if ((cb->external_flags & PCRE2_DUPCAPUSED) == 0 &&
- (*code == OP_CBRA || *code == OP_CBRAPOS))
+newtables = code->memctl.malloc(tables_length + sizeof(PCRE2_SIZE),
+ code->memctl.memory_data);
+if (newtables == NULL)
{
- group = GET2(code, 1 + LINK_SIZE);
- groupinfo = cb->groupinfo[group];
- if ((groupinfo & GI_SET_COULD_BE_EMPTY) != 0)
- return ((groupinfo & GI_COULD_BE_EMPTY) != 0)? CBE_EMPTY : CBE_NOTEMPTY;
+ code->memctl.free((void *)newcode, code->memctl.memory_data);
+ return NULL;
}
+memcpy(newtables, code->tables, tables_length);
+ref_count = (PCRE2_SIZE *)(newtables + tables_length);
+*ref_count = 1;
-/* A large and/or complex regex can take too long to process. We have to assume
-it can match an empty string. This can happen more often when (?| groups are
-present in the pattern and the caching is disabled. Setting the cap at 1100
-allows the test for more than 1023 capturing patterns to work. */
+newcode->tables = newtables;
+newcode->flags |= PCRE2_DEREF_TABLES;
+return newcode;
+}
-if ((*countptr)++ > 1100) return CBE_TOOCOMPLICATED;
-/* Scan the opcodes for this branch. */
-for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
- code < endcode;
- code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
- {
- PCRE2_SPTR ccode;
+/*************************************************
+* Free compiled code *
+*************************************************/
- c = *code;
+PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
+pcre2_code_free(pcre2_code *code)
+{
+PCRE2_SIZE* ref_count;
- /* Skip over forward assertions; the other assertions are skipped by
- first_significant_code() with a TRUE final argument. */
+if (code != NULL)
+ {
+ if (code->executable_jit != NULL)
+ PRIV(jit_free)(code->executable_jit, &code->memctl);
- if (c == OP_ASSERT)
+ if ((code->flags & PCRE2_DEREF_TABLES) != 0)
{
- do code += GET(code, 1); while (*code == OP_ALT);
- c = *code;
- continue;
+ /* Decoded tables belong to the codes after deserialization, and they must
+ be freed when there are no more reference to them. The *ref_count should
+ always be > 0. */
+
+ ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
+ if (*ref_count > 0)
+ {
+ (*ref_count)--;
+ if (*ref_count == 0)
+ code->memctl.free((void *)code->tables, code->memctl.memory_data);
+ }
}
- /* For a recursion/subroutine call we can scan the recursion when this
- function is called at the end, to check a complete pattern. Before then,
- recursions just have the group number as their argument and in any case may
- be forward references. In that situation, we return CBE_EMPTY, just in case.
- It means that unlimited repeats of groups that contain recursions are always
- treated as "could be empty" - which just adds a bit more processing time
- because of the runtime check. */
+ code->memctl.free(code, code->memctl.memory_data);
+ }
+}
- if (c == OP_RECURSE)
- {
- PCRE2_SPTR scode, endgroup;
- BOOL empty_branch;
- if (!atend) goto ISTRUE;
- scode = cb->start_code + GET(code, 1);
- endgroup = scode;
- /* We need to detect whether this is a recursive call, as otherwise there
- will be an infinite loop. If it is a recursion, just skip over it. Simple
- recursions are easily detected. For mutual recursions we keep a chain on
- the stack. */
+/*************************************************
+* Read a number, possibly signed *
+*************************************************/
- do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
- if (code >= scode && code <= endgroup) continue; /* Simple recursion */
- else
- {
- recurse_check *r = recurses;
- for (r = recurses; r != NULL; r = r->prev)
- if (r->group == scode) break;
- if (r != NULL) continue; /* Mutual recursion */
- }
+/* This function is used to read numbers in the pattern. The initial pointer
+must be the sign or first digit of the number. When relative values (introduced
+by + or -) are allowed, they are relative group numbers, and the result must be
+greater than zero.
- /* Scan the referenced group, remembering it on the stack chain to detect
- mutual recursions. */
+Arguments:
+ ptrptr points to the character pointer variable
+ ptrend points to the end of the input string
+ allow_sign if < 0, sign not allowed; if >= 0, sign is relative to this
+ max_value the largest number allowed
+ max_error the error to give for an over-large number
+ intptr where to put the result
+ errcodeptr where to put an error code
+
+Returns: TRUE - a number was read
+ FALSE - errorcode == 0 => no number was found
+ errorcode != 0 => an error occurred
+*/
- empty_branch = FALSE;
- this_recurse.prev = recurses;
- this_recurse.group = scode;
+static BOOL
+read_number(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, int32_t allow_sign,
+ uint32_t max_value, uint32_t max_error, int *intptr, int *errorcodeptr)
+{
+int sign = 0;
+uint32_t n = 0;
+PCRE2_SPTR ptr = *ptrptr;
+BOOL yield = FALSE;
- do
- {
- int rc = could_be_empty_branch(scode, endcode, utf, cb, atend,
- &this_recurse, countptr);
- if (rc < 0) return rc;
- if (rc > 0)
- {
- empty_branch = TRUE;
- break;
- }
- scode += GET(scode, 1);
- }
- while (*scode == OP_ALT);
+*errorcodeptr = 0;
- if (!empty_branch) goto ISFALSE; /* All branches are non-empty */
- continue;
+if (allow_sign >= 0 && ptr < ptrend)
+ {
+ if (*ptr == CHAR_PLUS)
+ {
+ sign = +1;
+ max_value -= allow_sign;
+ ptr++;
}
-
- /* Groups with zero repeats can of course be empty; skip them. */
-
- if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO ||
- c == OP_BRAPOSZERO)
+ else if (*ptr == CHAR_MINUS)
{
- code += PRIV(OP_lengths)[c];
- do code += GET(code, 1); while (*code == OP_ALT);
- c = *code;
- continue;
+ sign = -1;
+ ptr++;
}
+ }
- /* A nested group that is already marked as "could be empty" can just be
- skipped. */
-
- if (c == OP_SBRA || c == OP_SBRAPOS ||
- c == OP_SCBRA || c == OP_SCBRAPOS)
+if (ptr >= ptrend || !IS_DIGIT(*ptr)) return FALSE;
+while (ptr < ptrend && IS_DIGIT(*ptr))
+ {
+ n = n * 10 + *ptr++ - CHAR_0;
+ if (n > max_value)
{
- do code += GET(code, 1); while (*code == OP_ALT);
- c = *code;
- continue;
+ *errorcodeptr = max_error;
+ goto EXIT;
}
+ }
- /* For other groups, scan the branches. */
-
- if (c == OP_BRA || c == OP_BRAPOS ||
- c == OP_CBRA || c == OP_CBRAPOS ||
- c == OP_ONCE || c == OP_ONCE_NC ||
- c == OP_COND || c == OP_SCOND)
+if (allow_sign >= 0 && sign != 0)
+ {
+ if (n == 0)
{
- BOOL empty_branch;
- if (GET(code, 1) == 0) goto ISTRUE; /* Hit unclosed bracket */
-
- /* If a conditional group has only one branch, there is a second, implied,
- empty branch, so just skip over the conditional, because it could be empty.
- Otherwise, scan the individual branches of the group. */
-
- if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
- code += GET(code, 1);
- else
- {
- empty_branch = FALSE;
- do
- {
- if (!empty_branch)
- {
- int rc = could_be_empty_branch(code, endcode, utf, cb, atend,
- recurses, countptr);
- if (rc < 0) return rc;
- if (rc > 0) empty_branch = TRUE;
- }
- code += GET(code, 1);
- }
- while (*code == OP_ALT);
- if (!empty_branch) goto ISFALSE; /* All branches are non-empty */
- }
-
- c = *code;
- continue;
+ *errorcodeptr = ERR26; /* +0 and -0 are not allowed */
+ goto EXIT;
}
- /* Handle the other opcodes */
-
- switch (c)
+ if (sign > 0) n += allow_sign;
+ else if ((int)n > allow_sign)
{
- /* Check for quantifiers after a class. XCLASS is used for classes that
- cannot be represented just by a bit map. This includes negated single
- high-valued characters. The length in PRIV(OP_lengths)[] is zero; the
- actual length is stored in the compiled code, so we must update "code"
- here. */
-
-#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
- case OP_XCLASS:
- ccode = code += GET(code, 1);
- goto CHECK_CLASS_REPEAT;
-#endif
-
- case OP_CLASS:
- case OP_NCLASS:
- ccode = code + PRIV(OP_lengths)[OP_CLASS];
-
-#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
- CHECK_CLASS_REPEAT:
-#endif
-
- switch (*ccode)
- {
- case OP_CRSTAR: /* These could be empty; continue */
- case OP_CRMINSTAR:
- case OP_CRQUERY:
- case OP_CRMINQUERY:
- case OP_CRPOSSTAR:
- case OP_CRPOSQUERY:
- break;
+ *errorcodeptr = ERR15; /* Non-existent subpattern */
+ goto EXIT;
+ }
+ else n = allow_sign + 1 - n;
+ }
- default: /* Non-repeat => class must match */
- case OP_CRPLUS: /* These repeats aren't empty */
- case OP_CRMINPLUS:
- case OP_CRPOSPLUS:
- goto ISFALSE;
+yield = TRUE;
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- case OP_CRPOSRANGE:
- if (GET2(ccode, 1) > 0) goto ISFALSE; /* Minimum > 0 */
- break;
- }
- break;
+EXIT:
+*intptr = n;
+*ptrptr = ptr;
+return yield;
+}
- /* Opcodes that must match a character */
-
- case OP_ANY:
- case OP_ALLANY:
- case OP_ANYBYTE:
-
- case OP_PROP:
- case OP_NOTPROP:
- case OP_ANYNL:
-
- case OP_NOT_HSPACE:
- case OP_HSPACE:
- case OP_NOT_VSPACE:
- case OP_VSPACE:
- case OP_EXTUNI:
-
- case OP_NOT_DIGIT:
- case OP_DIGIT:
- case OP_NOT_WHITESPACE:
- case OP_WHITESPACE:
- case OP_NOT_WORDCHAR:
- case OP_WORDCHAR:
-
- case OP_CHAR:
- case OP_CHARI:
- case OP_NOT:
- case OP_NOTI:
-
- case OP_PLUS:
- case OP_PLUSI:
- case OP_MINPLUS:
- case OP_MINPLUSI:
-
- case OP_NOTPLUS:
- case OP_NOTPLUSI:
- case OP_NOTMINPLUS:
- case OP_NOTMINPLUSI:
-
- case OP_POSPLUS:
- case OP_POSPLUSI:
- case OP_NOTPOSPLUS:
- case OP_NOTPOSPLUSI:
-
- case OP_EXACT:
- case OP_EXACTI:
- case OP_NOTEXACT:
- case OP_NOTEXACTI:
-
- case OP_TYPEPLUS:
- case OP_TYPEMINPLUS:
- case OP_TYPEPOSPLUS:
- case OP_TYPEEXACT:
- goto ISFALSE;
-
- /* These are going to continue, as they may be empty, but we have to
- fudge the length for the \p and \P cases. */
-
- case OP_TYPESTAR:
- case OP_TYPEMINSTAR:
- case OP_TYPEPOSSTAR:
- case OP_TYPEQUERY:
- case OP_TYPEMINQUERY:
- case OP_TYPEPOSQUERY:
- if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
- break;
- /* Same for these */
- case OP_TYPEUPTO:
- case OP_TYPEMINUPTO:
- case OP_TYPEPOSUPTO:
- if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
- code += 2;
- break;
+/*************************************************
+* Read repeat counts *
+*************************************************/
- /* End of branch */
+/* Read an item of the form {n,m} and return the values if non-NULL pointers
+are supplied. Repeat counts must be less than 65536 (MAX_REPEAT_COUNT); a
+larger value is used for "unlimited". We have to use signed arguments for
+read_number() because it is capable of returning a signed value.
- case OP_KET:
- case OP_KETRMAX:
- case OP_KETRMIN:
- case OP_KETRPOS:
- case OP_ALT:
- goto ISTRUE;
+Arguments:
+ ptrptr points to pointer to character after'{'
+ ptrend pointer to end of input
+ minp if not NULL, pointer to int for min
+ maxp if not NULL, pointer to int for max (-1 if no max)
+ returned as -1 if no max
+ errorcodeptr points to error code variable
- /* In UTF-8 or UTF-16 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY,
- POSQUERY, UPTO, MINUPTO, and POSUPTO and their caseless and negative
- versions may be followed by a multibyte character. */
+Returns: FALSE if not a repeat quantifier, errorcode set zero
+ FALSE on error, with errorcode set non-zero
+ TRUE on success, with pointer updated to point after '}'
+*/
-#ifdef MAYBE_UTF_MULTI
- case OP_STAR:
- case OP_STARI:
- case OP_NOTSTAR:
- case OP_NOTSTARI:
-
- case OP_MINSTAR:
- case OP_MINSTARI:
- case OP_NOTMINSTAR:
- case OP_NOTMINSTARI:
-
- case OP_POSSTAR:
- case OP_POSSTARI:
- case OP_NOTPOSSTAR:
- case OP_NOTPOSSTARI:
-
- case OP_QUERY:
- case OP_QUERYI:
- case OP_NOTQUERY:
- case OP_NOTQUERYI:
-
- case OP_MINQUERY:
- case OP_MINQUERYI:
- case OP_NOTMINQUERY:
- case OP_NOTMINQUERYI:
-
- case OP_POSQUERY:
- case OP_POSQUERYI:
- case OP_NOTPOSQUERY:
- case OP_NOTPOSQUERYI:
- if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
- break;
+static BOOL
+read_repeat_counts(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *minp,
+ uint32_t *maxp, int *errorcodeptr)
+{
+PCRE2_SPTR p = *ptrptr;
+BOOL yield = FALSE;
+int32_t min = 0;
+int32_t max = REPEAT_UNLIMITED; /* This value is larger than MAX_REPEAT_COUNT */
- case OP_UPTO:
- case OP_UPTOI:
- case OP_NOTUPTO:
- case OP_NOTUPTOI:
-
- case OP_MINUPTO:
- case OP_MINUPTOI:
- case OP_NOTMINUPTO:
- case OP_NOTMINUPTOI:
-
- case OP_POSUPTO:
- case OP_POSUPTOI:
- case OP_NOTPOSUPTO:
- case OP_NOTPOSUPTOI:
- if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
- break;
-#endif /* MAYBE_UTF_MULTI */
+/* NB read_number() initializes the error code to zero. The only error is for a
+number that is too big. */
- /* MARK, and PRUNE/SKIP/THEN with an argument must skip over the argument
- string. */
+if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &min, errorcodeptr))
+ goto EXIT;
- case OP_MARK:
- case OP_PRUNE_ARG:
- case OP_SKIP_ARG:
- case OP_THEN_ARG:
- code += code[1];
- break;
+if (p >= ptrend) goto EXIT;
- /* None of the remaining opcodes are required to match a character. */
+if (*p == CHAR_RIGHT_CURLY_BRACKET)
+ {
+ p++;
+ max = min;
+ }
- default:
- break;
+else
+ {
+ if (*p++ != CHAR_COMMA || p >= ptrend) goto EXIT;
+ if (*p != CHAR_RIGHT_CURLY_BRACKET)
+ {
+ if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &max,
+ errorcodeptr) || p >= ptrend || *p != CHAR_RIGHT_CURLY_BRACKET)
+ goto EXIT;
+ if (max < min)
+ {
+ *errorcodeptr = ERR4;
+ goto EXIT;
+ }
}
+ p++;
}
-ISTRUE:
-groupinfo |= GI_COULD_BE_EMPTY;
-
-ISFALSE:
-if (group > 0) cb->groupinfo[group] = groupinfo | GI_SET_COULD_BE_EMPTY;
-
-return ((groupinfo & GI_COULD_BE_EMPTY) != 0)? CBE_EMPTY : CBE_NOTEMPTY;
-}
-
-
-
-/*************************************************
-* Check for counted repeat *
-*************************************************/
+yield = TRUE;
+if (minp != NULL) *minp = (uint32_t)min;
+if (maxp != NULL) *maxp = (uint32_t)max;
-/* This function is called when a '{' is encountered in a place where it might
-start a quantifier. It looks ahead to see if it really is a quantifier, that
-is, one of the forms {ddd} {ddd,} or {ddd,ddd} where the ddds are digits.
+/* Update the pattern pointer on success, or after an error, but not when
+the result is "not a repeat quantifier". */
-Argument: pointer to the first char after '{'
-Returns: TRUE or FALSE
-*/
-
-static BOOL
-is_counted_repeat(PCRE2_SPTR p)
-{
-if (!IS_DIGIT(*p)) return FALSE;
-p++;
-while (IS_DIGIT(*p)) p++;
-if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
+EXIT:
+if (yield || *errorcodeptr != 0) *ptrptr = p;
+return yield;
-if (*p++ != CHAR_COMMA) return FALSE;
-if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
-if (!IS_DIGIT(*p)) return FALSE;
-p++;
-while (IS_DIGIT(*p)) p++;
-return (*p == CHAR_RIGHT_CURLY_BRACKET);
}
@@ -1789,21 +1376,14 @@ return (*p == CHAR_RIGHT_CURLY_BRACKET);
/* This function is called when a \ has been encountered. It either returns a
positive value for a simple escape such as \d, or 0 for a data character, which
is placed in chptr. A backreference to group n is returned as negative n. On
-entry, ptr is pointing at the \. On exit, it points the final code unit of the
-escape sequence.
+entry, ptr is pointing at the character after \. On exit, it points after the
+final code unit of the escape sequence.
This function is also called from pcre2_substitute() to handle escape sequences
-in replacement strings. In this case, the cb argument is NULL, and only
-sequences that define a data character are recognised. The isclass argument is
-not relevant, but the options argument is the final value of the compiled
-pattern's options.
-
-There is one "trick" case: when a sequence such as [[:>:]] or \s in UCP mode is
-processed, it is replaced by a nested alternative sequence. If this contains a
-backslash (which is usually does), ptrend does not point to its end - it still
-points to the end of the whole pattern. However, we can detect this case
-because cb->nestptr[0] will be non-NULL. The nested sequences are all zero-
-terminated and there are only ever two levels of nesting.
+in replacement strings. In this case, the cb argument is NULL, and in the case
+of escapes that have further processing, only sequences that define a data
+character are recognised. The isclass argument is not relevant; the options
+argument is the final value of the compiled pattern's options.
Arguments:
ptrptr points to the input position pointer
@@ -1816,7 +1396,7 @@ Arguments:
Returns: zero => a data character
positive => a special escape sequence
- negative => a back reference
+ negative => a numerical back reference
on error, errorcodeptr is set non-zero
*/
@@ -1825,16 +1405,11 @@ PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr,
int *errorcodeptr, uint32_t options, BOOL isclass, compile_block *cb)
{
BOOL utf = (options & PCRE2_UTF) != 0;
-PCRE2_SPTR ptr = *ptrptr + 1;
-register uint32_t c, cc;
+PCRE2_SPTR ptr = *ptrptr;
+uint32_t c, cc;
int escape = 0;
int i;
-/* Find the end of a nested insert. */
-
-if (cb != NULL && cb->nestptr[0] != NULL)
- ptrend = ptr + PRIV(strlen)(ptr);
-
/* If backslash is at the end of the string, it's an error. */
if (ptr >= ptrend)
@@ -1844,7 +1419,7 @@ if (ptr >= ptrend)
}
GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */
-ptr--; /* Set pointer back to the last code unit */
+*errorcodeptr = 0; /* Be optimistic */
/* Non-alphanumerics are literals, so we just leave the value in c. An initial
value test saves a memory lookup for code points outside the alphanumeric
@@ -1858,7 +1433,7 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
if (i > 0) c = (uint32_t)i; else /* Positive is a data character */
{
escape = -i; /* Else return a special escape */
- if (escape == ESC_P || escape == ESC_p || escape == ESC_X)
+ if (cb != NULL && (escape == ESC_P || escape == ESC_p || escape == ESC_X))
cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */
}
}
@@ -1870,8 +1445,8 @@ when BSUX is set). */
else
{
PCRE2_SPTR oldptr;
- BOOL braced, negated, overflow;
- unsigned int s;
+ BOOL overflow;
+ int s;
/* Filter calls from pcre2_substitute(). */
@@ -1900,26 +1475,31 @@ else
if ((options & PCRE2_ALT_BSUX) == 0) *errorcodeptr = ERR37; else
{
uint32_t xc;
- if ((cc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */
+ if (ptrend - ptr < 4) break; /* Less than 4 chars */
+ if ((cc = XDIGIT(ptr[0])) == 0xff) break; /* Not a hex digit */
+ if ((xc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */
+ cc = (cc << 4) | xc;
if ((xc = XDIGIT(ptr[2])) == 0xff) break; /* Not a hex digit */
cc = (cc << 4) | xc;
if ((xc = XDIGIT(ptr[3])) == 0xff) break; /* Not a hex digit */
- cc = (cc << 4) | xc;
- if ((xc = XDIGIT(ptr[4])) == 0xff) break; /* Not a hex digit */
c = (cc << 4) | xc;
ptr += 4;
if (utf)
{
if (c > 0x10ffffU) *errorcodeptr = ERR77;
- else if (c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
+ else
+ if (c >= 0xd800 && c <= 0xdfff &&
+ (cb->cx->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
+ *errorcodeptr = ERR73;
}
else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77;
}
break;
- case CHAR_U:
/* \U is unrecognized unless PCRE2_ALT_BSUX is set, in which case it is an
upper case letter. */
+
+ case CHAR_U:
if ((options & PCRE2_ALT_BSUX) == 0) *errorcodeptr = ERR37;
break;
@@ -1933,87 +1513,71 @@ else
(2) Perl 5.10 also supports \g{name} as a reference to a named group. This
is part of Perl's movement towards a unified syntax for back references. As
this is synonymous with \k{name}, we fudge it up by pretending it really
- was \k.
+ was \k{name}.
(3) For Oniguruma compatibility we also support \g followed by a name or a
number either in angle brackets or in single quotes. However, these are
- (possibly recursive) subroutine calls, _not_ backreferences. Just return
- the ESC_g code (cf \k). */
+ (possibly recursive) subroutine calls, _not_ backreferences. We return
+ the ESC_g code.
+
+ Summary: Return a negative number for a numerical back reference, ESC_k for
+ a named back reference, and ESC_g for a named or numbered subroutine call.
+ */
case CHAR_g:
if (isclass) break;
- if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
+
+ if (ptr >= ptrend)
+ {
+ *errorcodeptr = ERR57;
+ break;
+ }
+
+ if (*ptr == CHAR_LESS_THAN_SIGN || *ptr == CHAR_APOSTROPHE)
{
escape = ESC_g;
break;
}
- /* Handle the Perl-compatible cases */
+ /* If there is a brace delimiter, try to read a numerical reference. If
+ there isn't one, assume we have a name and treat it as \k. */
- if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
+ if (*ptr == CHAR_LEFT_CURLY_BRACKET)
{
- PCRE2_SPTR p;
- for (p = ptr+2; *p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
- if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break;
- if (*p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET)
+ PCRE2_SPTR p = ptr + 1;
+ if (!read_number(&p, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &s,
+ errorcodeptr))
{
- escape = ESC_k;
+ if (*errorcodeptr == 0) escape = ESC_k; /* No number found */
break;
}
- braced = TRUE;
- ptr++;
+ if (p >= ptrend || *p != CHAR_RIGHT_CURLY_BRACKET)
+ {
+ *errorcodeptr = ERR57;
+ break;
+ }
+ ptr = p + 1;
}
- else braced = FALSE;
- if (ptr[1] == CHAR_MINUS)
- {
- negated = TRUE;
- ptr++;
- }
- else negated = FALSE;
+ /* Read an undelimited number */
- /* The integer range is limited by the machine's int representation. */
- s = 0;
- overflow = FALSE;
- while (IS_DIGIT(ptr[1]))
+ else
{
- if (s > INT_MAX / 10 - 1) /* Integer overflow */
+ if (!read_number(&ptr, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &s,
+ errorcodeptr))
{
- overflow = TRUE;
+ if (*errorcodeptr == 0) *errorcodeptr = ERR57; /* No number found */
break;
}
- s = s * 10 + (unsigned int)(*(++ptr) - CHAR_0);
- }
- if (overflow) /* Integer overflow */
- {
- while (IS_DIGIT(ptr[1])) ptr++;
- *errorcodeptr = ERR61;
- break;
}
- if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET)
+ if (s <= 0)
{
- *errorcodeptr = ERR57;
+ *errorcodeptr = ERR15;
break;
}
- if (s == 0)
- {
- *errorcodeptr = ERR58;
- break;
- }
-
- if (negated)
- {
- if (s > cb->bracount)
- {
- *errorcodeptr = ERR15;
- break;
- }
- s = cb->bracount - (s - 1);
- }
-
- escape = -(int)s;
+ escape = -s;
break;
/* The handling of escape sequences consisting of a string of digits
@@ -2036,31 +1600,18 @@ else
if (!isclass)
{
oldptr = ptr;
- /* The integer range is limited by the machine's int representation. */
- s = c - CHAR_0;
- overflow = FALSE;
- while (IS_DIGIT(ptr[1]))
- {
- if (s > INT_MAX / 10 - 1) /* Integer overflow */
- {
- overflow = TRUE;
- break;
- }
- s = s * 10 + (unsigned int)(*(++ptr) - CHAR_0);
- }
- if (overflow) /* Integer overflow */
- {
- while (IS_DIGIT(ptr[1])) ptr++;
- *errorcodeptr = ERR61;
+ ptr--; /* Back to the digit */
+ if (!read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, ERR61, &s,
+ errorcodeptr))
break;
- }
/* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x
are octal escapes if there are not that many previous captures. */
- if (s < 10 || *oldptr >= CHAR_8 || s <= cb->bracount)
+ if (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount)
{
- escape = -(int)s; /* Indicates a back reference */
+ if (s > (int)MAX_GROUP_NUMBER) *errorcodeptr = ERR61;
+ else escape = -s; /* Indicates a back reference */
break;
}
ptr = oldptr; /* Put the pointer back and fall through */
@@ -2068,13 +1619,12 @@ else
/* Handle a digit following \ when the number is not a back reference, or
we are within a character class. If the first digit is 8 or 9, Perl used to
- generate a binary zero byte and then treat the digit as a following
- literal. At least by Perl 5.18 this changed so as not to insert the binary
- zero. */
+ generate a binary zero and then treat the digit as a following literal. At
+ least by Perl 5.18 this changed so as not to insert the binary zero. */
- if ((c = *ptr) >= CHAR_8) break;
+ if (c >= CHAR_8) break;
- /* Fall through with a digit less than 8 */
+ /* Fall through */
/* \0 always starts an octal number, but we may drop through to here with a
larger first octal digit. The original code used just to take the least
@@ -2084,8 +1634,8 @@ else
case CHAR_0:
c -= CHAR_0;
- while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
- c = c * 8 + *(++ptr) - CHAR_0;
+ while(i++ < 2 && ptr < ptrend && *ptr >= CHAR_0 && *ptr <= CHAR_7)
+ c = c * 8 + *ptr++ - CHAR_0;
#if PCRE2_CODE_UNIT_WIDTH == 8
if (!utf && c > 0xff) *errorcodeptr = ERR51;
#endif
@@ -2095,13 +1645,18 @@ else
specifying character codes in octal. The only supported form is \o{ddd}. */
case CHAR_o:
- if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR55; else
- if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR78; else
+ if (ptr >= ptrend || *ptr++ != CHAR_LEFT_CURLY_BRACKET)
+ {
+ ptr--;
+ *errorcodeptr = ERR55;
+ }
+ else if (ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET)
+ *errorcodeptr = ERR78;
+ else
{
- ptr += 2;
c = 0;
overflow = FALSE;
- while (*ptr >= CHAR_0 && *ptr <= CHAR_7)
+ while (ptr < ptrend && *ptr >= CHAR_0 && *ptr <= CHAR_7)
{
cc = *ptr++;
if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */
@@ -2119,14 +1674,23 @@ else
}
if (overflow)
{
- while (*ptr >= CHAR_0 && *ptr <= CHAR_7) ptr++;
+ while (ptr < ptrend && *ptr >= CHAR_0 && *ptr <= CHAR_7) ptr++;
*errorcodeptr = ERR34;
}
- else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
+ else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET)
+ {
+ if (utf && c >= 0xd800 && c <= 0xdfff && (cb == NULL ||
+ (cb->cx->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0))
+ {
+ ptr--;
+ *errorcodeptr = ERR73;
+ }
+ }
+ else
{
- if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
+ ptr--;
+ *errorcodeptr = ERR64;
}
- else *errorcodeptr = ERR64;
}
break;
@@ -2137,8 +1701,9 @@ else
if ((options & PCRE2_ALT_BSUX) != 0)
{
uint32_t xc;
- if ((cc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */
- if ((xc = XDIGIT(ptr[2])) == 0xff) break; /* Not a hex digit */
+ if (ptrend - ptr < 2) break; /* Less than 2 characters */
+ if ((cc = XDIGIT(ptr[0])) == 0xff) break; /* Not a hex digit */
+ if ((xc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */
c = (cc << 4) | xc;
ptr += 2;
} /* End PCRE2_ALT_BSUX handling */
@@ -2152,10 +1717,9 @@ else
else
{
- if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
+ if (ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET)
{
- ptr += 2;
- if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
+ if (++ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET)
{
*errorcodeptr = ERR78;
break;
@@ -2163,7 +1727,7 @@ else
c = 0;
overflow = FALSE;
- while ((cc = XDIGIT(*ptr)) != 0xff)
+ while (ptr < ptrend && (cc = XDIGIT(*ptr)) != 0xff)
{
ptr++;
if (c == 0 && cc == 0) continue; /* Leading zeroes */
@@ -2180,12 +1744,17 @@ else
if (overflow)
{
- while (XDIGIT(*ptr) != 0xff) ptr++;
+ while (ptr < ptrend && XDIGIT(*ptr) != 0xff) ptr++;
*errorcodeptr = ERR34;
}
- else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
+ else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET)
{
- if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
+ if (utf && c >= 0xd800 && c <= 0xdfff && (cb == NULL ||
+ (cb->cx->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0))
+ {
+ ptr--;
+ *errorcodeptr = ERR73;
+ }
}
/* If the sequence of hex digits does not end with '}', give an error.
@@ -2193,18 +1762,22 @@ else
\x handling, but nowadays Perl gives an error, which seems much more
sensible, so we do too. */
- else *errorcodeptr = ERR67;
+ else
+ {
+ ptr--;
+ *errorcodeptr = ERR67;
+ }
} /* End of \x{} processing */
- /* Read a single-byte hex-defined char (up to two hex digits after \x) */
+ /* Read a up to two hex digits after \x */
else
{
c = 0;
- if ((cc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */
+ if (ptr >= ptrend || (cc = XDIGIT(*ptr)) == 0xff) break; /* Not a hex digit */
ptr++;
c = cc;
- if ((cc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */
+ if (ptr >= ptrend || (cc = XDIGIT(*ptr)) == 0xff) break; /* Not a hex digit */
ptr++;
c = (c << 4) | cc;
} /* End of \xdd handling */
@@ -2230,14 +1803,13 @@ else
#else
case CHAR_c:
#endif
-
- c = *(++ptr);
- if (c >= CHAR_a && c <= CHAR_z) c = UPPER_CASE(c);
- if (c == CHAR_NULL && ptr >= ptrend)
+ if (ptr >= ptrend)
{
*errorcodeptr = ERR2;
break;
}
+ c = *ptr;
+ if (c >= CHAR_a && c <= CHAR_z) c = UPPER_CASE(c);
/* Handle \c in an ASCII/Unicode environment. */
@@ -2267,6 +1839,7 @@ else
}
#endif /* EBCDIC */
+ ptr++;
break;
/* Any other alphanumeric following \ is an error. Perl gives an error only
@@ -2274,7 +1847,8 @@ else
default:
*errorcodeptr = ERR3;
- break;
+ *ptrptr = ptr - 1; /* Point to the character at fault */
+ return 0;
}
}
@@ -2282,16 +1856,16 @@ else
newline". PCRE does not support \N{name}. However, it does support
quantification such as \N{2,3}. */
-if (escape == ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET &&
- !is_counted_repeat(ptr+2))
- *errorcodeptr = ERR37;
-
-/* If PCRE2_UCP is set, we change the values for \d etc. */
-
-if ((options & PCRE2_UCP) != 0 && escape >= ESC_D && escape <= ESC_w)
- escape += (ESC_DU - ESC_D);
+if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET &&
+ ptrend - ptr > 2)
+ {
+ PCRE2_SPTR p = ptr + 1;
+ if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) &&
+ *errorcodeptr == 0)
+ *errorcodeptr = ERR37;
+ }
-/* Set the pointer to the final character before returning. */
+/* Set the pointer to the next character before returning. */
*ptrptr = ptr;
*chptr = c;
@@ -2307,8 +1881,8 @@ return escape;
/* This function is called after \P or \p has been encountered, provided that
PCRE2 is compiled with support for UTF and Unicode properties. On entry, the
-contents of ptrptr are pointing at the P or p. On exit, it is left pointing at
-the final code unit of the escape sequence.
+contents of ptrptr are pointing after the P or p. On exit, it is left pointing
+after the final code unit of the escape sequence.
Arguments:
ptrptr the pattern position pointer
@@ -2322,31 +1896,34 @@ Returns: TRUE if the type value was found, or FALSE for an invalid type
*/
static BOOL
-get_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, unsigned int *ptypeptr,
- unsigned int *pdataptr, int *errorcodeptr, compile_block *cb)
+get_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, uint16_t *ptypeptr,
+ uint16_t *pdataptr, int *errorcodeptr, compile_block *cb)
{
-register PCRE2_UCHAR c;
-size_t i, bot, top;
+PCRE2_UCHAR c;
+PCRE2_SIZE i, bot, top;
PCRE2_SPTR ptr = *ptrptr;
PCRE2_UCHAR name[32];
+if (ptr >= cb->end_pattern) goto ERROR_RETURN;
+c = *ptr++;
*negptr = FALSE;
-c = *(++ptr);
/* \P or \p can be followed by a name in {}, optionally preceded by ^ for
negation. */
if (c == CHAR_LEFT_CURLY_BRACKET)
{
- if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
+ if (ptr >= cb->end_pattern) goto ERROR_RETURN;
+ if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
{
*negptr = TRUE;
ptr++;
}
for (i = 0; i < (int)(sizeof(name) / sizeof(PCRE2_UCHAR)) - 1; i++)
{
- c = *(++ptr);
- if (c == CHAR_NULL) goto ERROR_RETURN;
+ if (ptr >= cb->end_pattern) goto ERROR_RETURN;
+ c = *ptr++;
+ if (c == CHAR_NUL) goto ERROR_RETURN;
if (c == CHAR_RIGHT_CURLY_BRACKET) break;
name[i] = c;
}
@@ -2397,217 +1974,6 @@ return FALSE;
/*************************************************
-* Read repeat counts *
-*************************************************/
-
-/* Read an item of the form {n,m} and return the values. This is called only
-after is_counted_repeat() has confirmed that a repeat-count quantifier exists,
-so the syntax is guaranteed to be correct, but we need to check the values.
-
-Arguments:
- p pointer to first char after '{'
- minp pointer to int for min
- maxp pointer to int for max
- returned as -1 if no max
- errorcodeptr points to error code variable
-
-Returns: pointer to '}' on success;
- current ptr on error, with errorcodeptr set non-zero
-*/
-
-static PCRE2_SPTR
-read_repeat_counts(PCRE2_SPTR p, int *minp, int *maxp, int *errorcodeptr)
-{
-int min = 0;
-int max = -1;
-
-while (IS_DIGIT(*p))
- {
- min = min * 10 + (int)(*p++ - CHAR_0);
- if (min > 65535)
- {
- *errorcodeptr = ERR5;
- return p;
- }
- }
-
-if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
- {
- if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
- {
- max = 0;
- while(IS_DIGIT(*p))
- {
- max = max * 10 + (int)(*p++ - CHAR_0);
- if (max > 65535)
- {
- *errorcodeptr = ERR5;
- return p;
- }
- }
- if (max < min)
- {
- *errorcodeptr = ERR4;
- return p;
- }
- }
- }
-
-*minp = min;
-*maxp = max;
-return p;
-}
-
-
-
-/*************************************************
-* Scan compiled regex for recursion reference *
-*************************************************/
-
-/* This function scans through a compiled pattern until it finds an instance of
-OP_RECURSE.
-
-Arguments:
- code points to start of expression
- utf TRUE in UTF mode
-
-Returns: pointer to the opcode for OP_RECURSE, or NULL if not found
-*/
-
-static PCRE2_SPTR
-find_recurse(PCRE2_SPTR code, BOOL utf)
-{
-for (;;)
- {
- register PCRE2_UCHAR c = *code;
- if (c == OP_END) return NULL;
- if (c == OP_RECURSE) return code;
-
- /* XCLASS is used for classes that cannot be represented just by a bit map.
- This includes negated single high-valued characters. CALLOUT_STR is used for
- callouts with string arguments. In both cases the length in the table is
- zero; the actual length is stored in the compiled code. */
-
- if (c == OP_XCLASS) code += GET(code, 1);
- else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
-
- /* Otherwise, we can get the item's length from the table, except that for
- repeated character types, we have to test for \p and \P, which have an extra
- two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
- must add in its length. */
-
- else
- {
- switch(c)
- {
- case OP_TYPESTAR:
- case OP_TYPEMINSTAR:
- case OP_TYPEPLUS:
- case OP_TYPEMINPLUS:
- case OP_TYPEQUERY:
- case OP_TYPEMINQUERY:
- case OP_TYPEPOSSTAR:
- case OP_TYPEPOSPLUS:
- case OP_TYPEPOSQUERY:
- if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
- break;
-
- case OP_TYPEPOSUPTO:
- case OP_TYPEUPTO:
- case OP_TYPEMINUPTO:
- case OP_TYPEEXACT:
- if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
- code += 2;
- break;
-
- case OP_MARK:
- case OP_PRUNE_ARG:
- case OP_SKIP_ARG:
- case OP_THEN_ARG:
- code += code[1];
- break;
- }
-
- /* Add in the fixed length from the table */
-
- code += PRIV(OP_lengths)[c];
-
- /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may
- be followed by a multi-unit character. The length in the table is a
- minimum, so we have to arrange to skip the extra units. */
-
-#ifdef MAYBE_UTF_MULTI
- if (utf) switch(c)
- {
- case OP_CHAR:
- case OP_CHARI:
- case OP_NOT:
- case OP_NOTI:
- case OP_EXACT:
- case OP_EXACTI:
- case OP_NOTEXACT:
- case OP_NOTEXACTI:
- case OP_UPTO:
- case OP_UPTOI:
- case OP_NOTUPTO:
- case OP_NOTUPTOI:
- case OP_MINUPTO:
- case OP_MINUPTOI:
- case OP_NOTMINUPTO:
- case OP_NOTMINUPTOI:
- case OP_POSUPTO:
- case OP_POSUPTOI:
- case OP_NOTPOSUPTO:
- case OP_NOTPOSUPTOI:
- case OP_STAR:
- case OP_STARI:
- case OP_NOTSTAR:
- case OP_NOTSTARI:
- case OP_MINSTAR:
- case OP_MINSTARI:
- case OP_NOTMINSTAR:
- case OP_NOTMINSTARI:
- case OP_POSSTAR:
- case OP_POSSTARI:
- case OP_NOTPOSSTAR:
- case OP_NOTPOSSTARI:
- case OP_PLUS:
- case OP_PLUSI:
- case OP_NOTPLUS:
- case OP_NOTPLUSI:
- case OP_MINPLUS:
- case OP_MINPLUSI:
- case OP_NOTMINPLUS:
- case OP_NOTMINPLUSI:
- case OP_POSPLUS:
- case OP_POSPLUSI:
- case OP_NOTPOSPLUS:
- case OP_NOTPOSPLUSI:
- case OP_QUERY:
- case OP_QUERYI:
- case OP_NOTQUERY:
- case OP_NOTQUERYI:
- case OP_MINQUERY:
- case OP_MINQUERYI:
- case OP_NOTMINQUERY:
- case OP_NOTMINQUERYI:
- case OP_POSQUERY:
- case OP_POSQUERYI:
- case OP_NOTPOSQUERY:
- case OP_NOTPOSQUERYI:
- if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
- break;
- }
-#else
- (void)(utf); /* Keep compiler happy by referencing function argument */
-#endif /* MAYBE_UTF_MULTI */
- }
- }
-}
-
-
-
-/*************************************************
* Check for POSIX class syntax *
*************************************************/
@@ -2645,25 +2011,28 @@ seem right at all. PCRE does not allow closing square brackets in POSIX class
names.
Arguments:
- ptr pointer to the initial [
+ ptr pointer to the character after the initial [ (colon, dot, equals)
+ ptrend pointer to the end of the pattern
endptr where to return a pointer to the terminating ':', '.', or '='
Returns: TRUE or FALSE
*/
static BOOL
-check_posix_syntax(PCRE2_SPTR ptr, PCRE2_SPTR *endptr)
+check_posix_syntax(PCRE2_SPTR ptr, PCRE2_SPTR ptrend, PCRE2_SPTR *endptr)
{
PCRE2_UCHAR terminator; /* Don't combine these lines; the Solaris cc */
-terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */
+terminator = *ptr++; /* compiler warns about "non-constant" initializer. */
-for (++ptr; *ptr != CHAR_NULL; ptr++)
+for (; ptrend - ptr >= 2; ptr++)
{
if (*ptr == CHAR_BACKSLASH &&
(ptr[1] == CHAR_RIGHT_SQUARE_BRACKET || ptr[1] == CHAR_BACKSLASH))
ptr++;
+
else if ((*ptr == CHAR_LEFT_SQUARE_BRACKET && ptr[1] == terminator) ||
*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
+
else if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
{
*endptr = ptr;
@@ -2694,7 +2063,7 @@ static int
check_posix_name(PCRE2_SPTR ptr, int len)
{
const char *pn = posix_names;
-register int yield = 0;
+int yield = 0;
while (posix_name_lengths[yield] != 0)
{
if (len == posix_name_lengths[yield] &&
@@ -2707,765 +2076,1424 @@ return -1;
-#ifdef SUPPORT_UNICODE
/*************************************************
-* Get othercase range *
+* Read a subpattern or VERB name *
*************************************************/
-/* This function is passed the start and end of a class range in UCT mode. It
-searches up the characters, looking for ranges of characters in the "other"
-case. Each call returns the next one, updating the start address. A character
-with multiple other cases is returned on its own with a special return value.
+/* This function is called from parse_regex() below whenever it needs to read
+the name of a subpattern or a (*VERB). The initial pointer must be to the
+character before the name. If that character is '*' we are reading a verb name.
+The pointer is updated to point after the name, for a VERB, or after tha name's
+terminator for a subpattern name. Returning both the offset and the name
+pointer is redundant information, but some callers use one and some the other,
+so it is simplest just to return both.
Arguments:
- cptr points to starting character value; updated
- d end value
- ocptr where to put start of othercase range
- odptr where to put end of othercase range
-
-Yield: -1 when no more
- 0 when a range is returned
- >0 the CASESET offset for char with multiple other cases
- in this case, ocptr contains the original
+ ptrptr points to the character pointer variable
+ ptrend points to the end of the input string
+ terminator the terminator of a subpattern name must be this
+ offsetptr where to put the offset from the start of the pattern
+ nameptr where to put a pointer to the name in the input
+ namelenptr where to put the length of the name
+ errcodeptr where to put an error code
+ cb pointer to the compile data block
+
+Returns: TRUE if a name was read
+ FALSE otherwise, with error code set
*/
-static int
-get_othercase_range(uint32_t *cptr, uint32_t d, uint32_t *ocptr,
- uint32_t *odptr)
+static BOOL
+read_name(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t terminator,
+ PCRE2_SIZE *offsetptr, PCRE2_SPTR *nameptr, uint32_t *namelenptr,
+ int *errorcodeptr, compile_block *cb)
{
-uint32_t c, othercase, next;
-unsigned int co;
+PCRE2_SPTR ptr = *ptrptr;
+BOOL is_verb = (*ptr == CHAR_ASTERISK);
+uint32_t namelen = 0;
+uint32_t ctype = is_verb? ctype_letter : ctype_word;
-/* Find the first character that has an other case. If it has multiple other
-cases, return its case offset value. */
+if (++ptr >= ptrend)
+ {
+ *errorcodeptr = is_verb? ERR60: /* Verb not recognized or malformed */
+ ERR62; /* Subpattern name expected */
+ goto FAILED;
+ }
-for (c = *cptr; c <= d; c++)
+*nameptr = ptr;
+*offsetptr = (PCRE2_SIZE)(ptr - cb->start_pattern);
+
+if (IS_DIGIT(*ptr))
{
- if ((co = UCD_CASESET(c)) != 0)
+ *errorcodeptr = ERR44; /* Group name must not start with digit */
+ goto FAILED;
+ }
+
+while (ptr < ptrend && MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype) != 0)
+ {
+ ptr++;
+ namelen++;
+ if (namelen > MAX_NAME_SIZE)
{
- *ocptr = c++; /* Character that has the set */
- *cptr = c; /* Rest of input range */
- return (int)co;
+ *errorcodeptr = ERR48;
+ goto FAILED;
}
- if ((othercase = UCD_OTHERCASE(c)) != c) break;
}
-if (c > d) return -1; /* Reached end of range */
-
-/* Found a character that has a single other case. Search for the end of the
-range, which is either the end of the input range, or a character that has zero
-or more than one other cases. */
+/* Subpattern names must not be empty, and their terminator is checked here.
+(What follows a verb name is checked separately.) */
-*ocptr = othercase;
-next = othercase + 1;
-
-for (++c; c <= d; c++)
+if (!is_verb)
{
- if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break;
- next++;
+ if (namelen == 0)
+ {
+ *errorcodeptr = ERR62; /* Subpattern name expected */
+ goto FAILED;
+ }
+ if (ptr >= ptrend || *ptr != (PCRE2_UCHAR)terminator)
+ {
+ *errorcodeptr = ERR42;
+ goto FAILED;
+ }
+ ptr++;
}
-*odptr = next - 1; /* End of othercase range */
-*cptr = c; /* Rest of input range */
-return 0;
+*namelenptr = namelen;
+*ptrptr = ptr;
+return TRUE;
+
+FAILED:
+*ptrptr = ptr;
+return FALSE;
}
-#endif /* SUPPORT_UNICODE */
/*************************************************
-* Add a character or range to a class *
+* Manage callouts at start of cycle *
*************************************************/
-/* This function packages up the logic of adding a character or range of
-characters to a class. The character values in the arguments will be within the
-valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
-mutually recursive with the function immediately below.
+/* At the start of a new item in parse_regex() we are able to record the
+details of the previous item in a prior callout, and also to set up an
+automatic callout if enabled. Avoid having two adjacent automatic callouts,
+which would otherwise happen for items such as \Q that contribute nothing to
+the parsed pattern.
Arguments:
- classbits the bit map for characters < 256
- uchardptr points to the pointer for extra data
- options the options word
- cb compile data
- start start of range character
- end end of range character
+ ptr current pattern pointer
+ pcalloutptr points to a pointer to previous callout, or NULL
+ auto_callout TRUE if auto_callouts are enabled
+ parsed_pattern the parsed pattern pointer
+ cb compile block
-Returns: the number of < 256 characters added
- the pointer to extra data is updated
+Returns: possibly updated parsed_pattern pointer.
*/
-static unsigned int
-add_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options,
- compile_block *cb, uint32_t start, uint32_t end)
+static uint32_t *
+manage_callouts(PCRE2_SPTR ptr, uint32_t **pcalloutptr, BOOL auto_callout,
+ uint32_t *parsed_pattern, compile_block *cb)
{
-uint32_t c;
-uint32_t classbits_end = (end <= 0xff ? end : 0xff);
-unsigned int n8 = 0;
+uint32_t *previous_callout = *pcalloutptr;
-/* If caseless matching is required, scan the range and process alternate
-cases. In Unicode, there are 8-bit characters that have alternate cases that
-are greater than 255 and vice-versa. Sometimes we can just extend the original
-range. */
+if (previous_callout != NULL) previous_callout[2] = ptr - cb->start_pattern -
+ (PCRE2_SIZE)previous_callout[1];
-if ((options & PCRE2_CASELESS) != 0)
+if (!auto_callout) previous_callout = NULL; else
{
-#ifdef SUPPORT_UNICODE
- if ((options & PCRE2_UTF) != 0)
+ if (previous_callout == NULL ||
+ previous_callout != parsed_pattern - 4 ||
+ previous_callout[3] != 255)
{
- int rc;
- uint32_t oc, od;
-
- options &= ~PCRE2_CASELESS; /* Remove for recursive calls */
- c = start;
+ previous_callout = parsed_pattern; /* Set up new automatic callout */
+ parsed_pattern += 4;
+ previous_callout[0] = META_CALLOUT_NUMBER;
+ previous_callout[2] = 0;
+ previous_callout[3] = 255;
+ }
+ previous_callout[1] = (uint32_t)(ptr - cb->start_pattern);
+ }
- while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0)
- {
- /* Handle a single character that has more than one other case. */
+*pcalloutptr = previous_callout;
+return parsed_pattern;
+}
- if (rc > 0) n8 += add_list_to_class(classbits, uchardptr, options, cb,
- PRIV(ucd_caseless_sets) + rc, oc);
- /* Do nothing if the other case range is within the original range. */
- else if (oc >= start && od <= end) continue;
+/*************************************************
+* Parse regex and identify named groups *
+*************************************************/
- /* Extend the original range if there is overlap, noting that if oc < c, we
- can't have od > end because a subrange is always shorter than the basic
- range. Otherwise, use a recursive call to add the additional range. */
+/* This function is called first of all. It scans the pattern and does two
+things: (1) It identifies capturing groups and makes a table of named capturing
+groups so that information about them is fully available to both the compiling
+scans. (2) It writes a parsed version of the pattern with comments omitted and
+escapes processed into the parsed_pattern vector.
- else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */
- else if (od > end && oc <= end + 1)
- {
- end = od; /* Extend upwards */
- if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff);
- }
- else n8 += add_to_class(classbits, uchardptr, options, cb, oc, od);
- }
- }
- else
-#endif /* SUPPORT_UNICODE */
-
- /* Not UTF mode */
+Arguments:
+ ptr points to the start of the pattern
+ options compiling dynamic options (may change during the scan)
+ has_lookbehind points to a boolean, set TRUE if a lookbehind is found
+ cb pointer to the compile data block
- for (c = start; c <= classbits_end; c++)
- {
- SETBIT(classbits, cb->fcc[c]);
- n8++;
- }
- }
+Returns: zero on success or a non-zero error code, with the
+ error offset placed in the cb field
+*/
-/* Now handle the original range. Adjust the final value according to the bit
-length - this means that the same lists of (e.g.) horizontal spaces can be used
-in all cases. */
+/* A structure and some flags for dealing with nested groups. */
-if ((options & PCRE2_UTF) == 0 && end > MAX_NON_UTF_CHAR)
- end = MAX_NON_UTF_CHAR;
+typedef struct nest_save {
+ uint16_t nest_depth;
+ uint16_t reset_group;
+ uint16_t max_group;
+ uint16_t flags;
+ uint32_t options;
+} nest_save;
-/* Use the bitmap for characters < 256. Otherwise use extra data.*/
+#define NSF_RESET 0x0001u
+#define NSF_CONDASSERT 0x0002u
-for (c = start; c <= classbits_end; c++)
- {
- /* Regardless of start, c will always be <= 255. */
- SETBIT(classbits, c);
- n8++;
- }
+/* Of the options that are changeable within the pattern, these are tracked
+during parsing. The rest are used from META_OPTIONS items when compiling. */
-#ifdef SUPPORT_WIDE_CHARS
-if (start <= 0xff) start = 0xff + 1;
+#define PARSE_TRACKED_OPTIONS \
+ (PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE)
-if (end >= start)
- {
- PCRE2_UCHAR *uchardata = *uchardptr;
+/* States used for analyzing ranges in character classes. The two OK values
+must be last. */
-#ifdef SUPPORT_UNICODE
- if ((options & PCRE2_UTF) != 0)
- {
- if (start < end)
- {
- *uchardata++ = XCL_RANGE;
- uchardata += PRIV(ord2utf)(start, uchardata);
- uchardata += PRIV(ord2utf)(end, uchardata);
- }
- else if (start == end)
- {
- *uchardata++ = XCL_SINGLE;
- uchardata += PRIV(ord2utf)(start, uchardata);
- }
- }
- else
-#endif /* SUPPORT_UNICODE */
+enum { RANGE_NO, RANGE_STARTED, RANGE_OK_ESCAPED, RANGE_OK_LITERAL };
- /* Without UTF support, character values are constrained by the bit length,
- and can only be > 256 for 16-bit and 32-bit libraries. */
+/* Only in 32-bit mode can there be literals > META_END. A macros encapsulates
+the storing of literal values in the parsed pattern. */
-#if PCRE2_CODE_UNIT_WIDTH == 8
- {}
-#else
- if (start < end)
- {
- *uchardata++ = XCL_RANGE;
- *uchardata++ = start;
- *uchardata++ = end;
- }
- else if (start == end)
- {
- *uchardata++ = XCL_SINGLE;
- *uchardata++ = start;
- }
-#endif
- *uchardptr = uchardata; /* Updata extra data pointer */
+#if PCRE2_CODE_UNIT_WIDTH == 32
+#define PARSED_LITERAL(c, p) \
+ { \
+ if (c >= META_END) *p++ = META_BIGVALUE; \
+ *p++ = c; \
+ okquantifier = TRUE; \
}
#else
- (void)uchardptr; /* Avoid compiler warning */
-#endif /* SUPPORT_WIDE_CHARS */
-
-return n8; /* Number of 8-bit characters */
-}
-
+#define PARSED_LITERAL(c, p) *p++ = c; okquantifier = TRUE;
+#endif
+/* Here's the actual function. */
-/*************************************************
-* Add a list of characters to a class *
-*************************************************/
+static int parse_regex(PCRE2_SPTR ptr, uint32_t options, BOOL *has_lookbehind,
+ compile_block *cb)
+{
+uint32_t c;
+uint32_t delimiter;
+uint32_t namelen;
+uint32_t class_range_state;
+uint32_t *verblengthptr = NULL; /* Value avoids compiler warning */
+uint32_t *previous_callout = NULL;
+uint32_t *parsed_pattern = cb->parsed_pattern;
+uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
+uint32_t meta_quantifier = 0;
+uint16_t nest_depth = 0;
+int after_manual_callout = 0;
+int expect_cond_assert = 0;
+int errorcode = 0;
+int escape;
+int i;
+BOOL inescq = FALSE;
+BOOL inverbname = FALSE;
+BOOL utf = (options & PCRE2_UTF) != 0;
+BOOL auto_callout = (options & PCRE2_AUTO_CALLOUT) != 0;
+BOOL isdupname;
+BOOL negate_class;
+BOOL okquantifier = FALSE;
+PCRE2_SPTR thisptr;
+PCRE2_SPTR name;
+PCRE2_SPTR ptrend = cb->end_pattern;
+PCRE2_SPTR verbnamestart = NULL; /* Value avoids compiler warning */
+named_group *ng;
+nest_save *top_nest, *end_nests;
-/* This function is used for adding a list of case-equivalent characters to a
-class, and also for adding a list of horizontal or vertical whitespace. If the
-list is in order (which it should be), ranges of characters are detected and
-handled appropriately. This function is mutually recursive with the function
-above.
+/* Insert leading items for word and line matching (features provided for the
+benefit of pcre2grep). */
-Arguments:
- classbits the bit map for characters < 256
- uchardptr points to the pointer for extra data
- options the options word
- cb contains pointers to tables etc.
- p points to row of 32-bit values, terminated by NOTACHAR
- except character to omit; this is used when adding lists of
- case-equivalent characters to avoid including the one we
- already know about
+if ((cb->cx->extra_options & PCRE2_EXTRA_MATCH_LINE) != 0)
+ {
+ *parsed_pattern++ = META_CIRCUMFLEX;
+ *parsed_pattern++ = META_NOCAPTURE;
+ }
+else if ((cb->cx->extra_options & PCRE2_EXTRA_MATCH_WORD) != 0)
+ {
+ *parsed_pattern++ = META_ESCAPE + ESC_b;
+ *parsed_pattern++ = META_NOCAPTURE;
+ }
-Returns: the number of < 256 characters added
- the pointer to extra data is updated
-*/
+/* If the pattern is actually a literal string, process it separately to avoid
+cluttering up the main loop. */
-static unsigned int
-add_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options,
- compile_block *cb, const uint32_t *p, unsigned int except)
-{
-unsigned int n8 = 0;
-while (p[0] < NOTACHAR)
+if ((options & PCRE2_LITERAL) != 0)
{
- unsigned int n = 0;
- if (p[0] != except)
+ while (ptr < ptrend)
{
- while(p[n+1] == p[0] + n + 1) n++;
- n8 += add_to_class(classbits, uchardptr, options, cb, p[0], p[n]);
+ if (parsed_pattern >= parsed_pattern_end)
+ {
+ errorcode = ERR63; /* Internal error (parsed pattern overflow) */
+ goto FAILED;
+ }
+ thisptr = ptr;
+ GETCHARINCTEST(c, ptr);
+ if (auto_callout)
+ parsed_pattern = manage_callouts(thisptr, &previous_callout,
+ auto_callout, parsed_pattern, cb);
+ PARSED_LITERAL(c, parsed_pattern);
}
- p += n + 1;
+ goto PARSED_END;
}
-return n8;
-}
+/* Process a real regex which may contain meta-characters. */
+top_nest = NULL;
+end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size);
-/*************************************************
-* Add characters not in a list to a class *
-*************************************************/
-
-/* This function is used for adding the complement of a list of horizontal or
-vertical whitespace to a class. The list must be in order.
-
-Arguments:
- classbits the bit map for characters < 256
- uchardptr points to the pointer for extra data
- options the options word
- cb contains pointers to tables etc.
- p points to row of 32-bit values, terminated by NOTACHAR
-
-Returns: the number of < 256 characters added
- the pointer to extra data is updated
-*/
+/* The size of the nest_save structure might not be a factor of the size of the
+workspace. Therefore we must round down end_nests so as to correctly avoid
+creating a nest_save that spans the end of the workspace. */
-static unsigned int
-add_not_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
- uint32_t options, compile_block *cb, const uint32_t *p)
-{
-BOOL utf = (options & PCRE2_UTF) != 0;
-unsigned int n8 = 0;
-if (p[0] > 0)
- n8 += add_to_class(classbits, uchardptr, options, cb, 0, p[0] - 1);
-while (p[0] < NOTACHAR)
- {
- while (p[1] == p[0] + 1) p++;
- n8 += add_to_class(classbits, uchardptr, options, cb, p[0] + 1,
- (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
- p++;
- }
-return n8;
-}
+end_nests = (nest_save *)((char *)end_nests -
+ ((cb->workspace_size * sizeof(PCRE2_UCHAR)) % sizeof(nest_save)));
+/* PCRE2_EXTENDED_MORE implies PCRE2_EXTENDED */
+if ((options & PCRE2_EXTENDED_MORE) != 0) options |= PCRE2_EXTENDED;
-/*************************************************
-* Process (*VERB) name for escapes *
-*************************************************/
+/* Now scan the pattern */
-/* This function is called when the PCRE2_ALT_VERBNAMES option is set, to
-process the characters in a verb's name argument. It is called twice, once with
-codeptr == NULL, to find out the length of the processed name, and again to put
-the name into memory.
+while (ptr < ptrend)
+ {
+ int prev_expect_cond_assert;
+ uint32_t min_repeat, max_repeat;
+ uint32_t set, unset, *optset;
+ uint32_t terminator;
+ uint32_t prev_meta_quantifier;
+ BOOL prev_okquantifier;
+ PCRE2_SPTR tempptr;
+ PCRE2_SIZE offset;
+
+ if (parsed_pattern >= parsed_pattern_end)
+ {
+ errorcode = ERR63; /* Internal error (parsed pattern overflow) */
+ goto FAILED;
+ }
-Arguments:
- ptrptr pointer to the input pointer
- codeptr pointer to the compiled code pointer
- errorcodeptr pointer to the error code
- options the options bits
- utf TRUE if processing UTF
- cb compile data block
-
-Returns: length of the processed name, or < 0 on error
-*/
+ if (nest_depth > cb->cx->parens_nest_limit)
+ {
+ errorcode = ERR19;
+ goto FAILED; /* Parentheses too deeply nested */
+ }
-static int
-process_verb_name(PCRE2_SPTR *ptrptr, PCRE2_UCHAR **codeptr, int *errorcodeptr,
- uint32_t options, BOOL utf, compile_block *cb)
-{
-int32_t arglen = 0;
-BOOL inescq = FALSE;
-PCRE2_SPTR ptr = *ptrptr;
-PCRE2_UCHAR *code = (codeptr == NULL)? NULL : *codeptr;
+ /* Get next input character, save its position for callout handling. */
-for (; ptr < cb->end_pattern; ptr++)
- {
- uint32_t x = *ptr;
+ thisptr = ptr;
+ GETCHARINCTEST(c, ptr);
- /* Skip over literals */
+ /* Copy quoted literals until \E, allowing for the possibility of automatic
+ callouts, except when processing a (*VERB) "name". */
if (inescq)
{
- if (x == CHAR_BACKSLASH && ptr[1] == CHAR_E)
+ if (c == CHAR_BACKSLASH && ptr < ptrend && *ptr == CHAR_E)
{
inescq = FALSE;
- ptr++;;
- continue;
+ ptr++; /* Skip E */
}
+ else
+ {
+ if (expect_cond_assert > 0) /* A literal is not allowed if we are */
+ { /* expecting a conditional assertion, */
+ ptr--; /* but an empty \Q\E sequence is OK. */
+ errorcode = ERR28;
+ goto FAILED;
+ }
+ if (!inverbname && after_manual_callout-- <= 0)
+ parsed_pattern = manage_callouts(thisptr, &previous_callout,
+ auto_callout, parsed_pattern, cb);
+ PARSED_LITERAL(c, parsed_pattern);
+ meta_quantifier = 0;
+ }
+ continue; /* Next character */
}
- else /* Not a literal character */
+ /* If we are processing the "name" part of a (*VERB:NAME) item, all
+ characters up to the closing parenthesis are literals except when
+ PCRE2_ALT_VERBNAMES is set. That causes backslash interpretation, but only \Q
+ and \E and escaped characters are allowed (no character types such as \d). If
+ PCRE2_EXTENDED is also set, we must ignore white space and # comments. Do
+ this by not entering the special (*VERB:NAME) processing - they are then
+ picked up below. Note that c is a character, not a code unit, so we must not
+ use MAX_255 to test its size because MAX_255 tests code units and is assumed
+ TRUE in 8-bit mode. */
+
+ if (inverbname &&
+ (
+ /* EITHER: not both options set */
+ ((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
+ (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
+ /* OR: character > 255 */
+ c > 255 ||
+ /* OR: not a # comment or white space */
+ (c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0)
+ ))
{
- if (x == CHAR_RIGHT_PARENTHESIS) break;
+ PCRE2_SIZE verbnamelength;
- /* Skip over comments and whitespace in extended mode. */
-
- if ((options & PCRE2_EXTENDED) != 0)
+ switch(c)
{
- PCRE2_SPTR wscptr = ptr;
- while (MAX_255(x) && (cb->ctypes[x] & ctype_space) != 0) x = *(++ptr);
- if (x == CHAR_NUMBER_SIGN)
+ default:
+ PARSED_LITERAL(c, parsed_pattern);
+ break;
+
+ case CHAR_RIGHT_PARENTHESIS:
+ inverbname = FALSE;
+ okquantifier = FALSE; /* Was probably set by literals */
+ /* This is the length in characters */
+ verbnamelength = (PCRE2_SIZE)(parsed_pattern - verblengthptr - 1);
+ /* But the limit on the length is in code units */
+ if (ptr - verbnamestart - 1 > (int)MAX_MARK)
{
- ptr++;
- while (*ptr != CHAR_NULL || ptr < cb->end_pattern)
- {
- if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */
- { /* IS_NEWLINE sets cb->nllen. */
- ptr += cb->nllen;
- break;
- }
- ptr++;
-#ifdef SUPPORT_UNICODE
- if (utf) FORWARDCHAR(ptr);
-#endif
- }
+ ptr--;
+ errorcode = ERR76;
+ goto FAILED;
}
+ *verblengthptr = (uint32_t)verbnamelength;
+ break;
- /* If we have skipped any characters, restart the loop. */
+ case CHAR_BACKSLASH:
+ if ((options & PCRE2_ALT_VERBNAMES) != 0)
+ {
+ escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options,
+ FALSE, cb);
+ if (errorcode != 0) goto FAILED;
+ }
+ else escape = 0; /* Treat all as literal */
- if (ptr > wscptr)
+ switch(escape)
{
- ptr--;
- continue;
+ case 0:
+ PARSED_LITERAL(c, parsed_pattern);
+ break;
+
+ case ESC_Q:
+ inescq = TRUE;
+ break;
+
+ case ESC_E: /* Ignore */
+ break;
+
+ default:
+ errorcode = ERR40; /* Invalid in verb name */
+ goto FAILED;
}
}
+ continue; /* Next character in pattern */
+ }
- /* Process escapes */
+ /* Not a verb name character. At this point we must process everything that
+ must not change the quantification state. This is mainly comments, but we
+ handle \Q and \E here as well, so that an item such as A\Q\E+ is treated as
+ A+, as in Perl. An isolated \E is ignored. */
- if (x == '\\')
+ if (c == CHAR_BACKSLASH && ptr < ptrend)
+ {
+ if (*ptr == CHAR_Q || *ptr == CHAR_E)
{
- int rc;
- *errorcodeptr = 0;
- rc = PRIV(check_escape)(&ptr, cb->end_pattern, &x, errorcodeptr, options,
- FALSE, cb);
- *ptrptr = ptr; /* For possible error */
- if (*errorcodeptr != 0) return -1;
- if (rc != 0)
- {
- if (rc == ESC_Q)
- {
- inescq = TRUE;
- continue;
- }
- if (rc == ESC_E) continue;
- *errorcodeptr = ERR40;
- return -1;
- }
+ inescq = *ptr == CHAR_Q;
+ ptr++;
+ continue;
}
}
- /* We have the next character in the name. */
+ /* Skip over whitespace and # comments in extended mode. Note that c is a
+ character, not a code unit, so we must not use MAX_255 to test its size
+ because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */
-#ifdef SUPPORT_UNICODE
- if (utf)
+ if ((options & PCRE2_EXTENDED) != 0)
{
- if (code == NULL) /* Just want the length */
+ if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue;
+ if (c == CHAR_NUMBER_SIGN)
{
-#if PCRE2_CODE_UNIT_WIDTH == 8
- int i;
- for (i = 0; i < PRIV(utf8_table1_size); i++)
- if ((int)x <= PRIV(utf8_table1)[i]) break;
- arglen += i;
-#elif PCRE2_CODE_UNIT_WIDTH == 16
- if (x > 0xffff) arglen++;
+ while (ptr < ptrend)
+ {
+ if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */
+ { /* IS_NEWLINE sets cb->nllen. */
+ ptr += cb->nllen;
+ break;
+ }
+ ptr++;
+#ifdef SUPPORT_UNICODE
+ if (utf) FORWARDCHARTEST(ptr, ptrend);
#endif
- }
- else
- {
- PCRE2_UCHAR cbuff[8];
- x = PRIV(ord2utf)(x, cbuff);
- memcpy(code, cbuff, CU2BYTES(x));
- code += x;
+ }
+ continue; /* Next character in pattern */
}
}
- else
-#endif /* SUPPORT_UNICODE */
- /* Not UTF */
+ /* Skip over bracketed comments */
+
+ if (c == CHAR_LEFT_PARENTHESIS && ptrend - ptr >= 2 &&
+ ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
{
- if (code != NULL) *code++ = (PCRE2_UCHAR)x;
+ while (++ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS);
+ if (ptr >= ptrend)
+ {
+ errorcode = ERR18; /* A special error for missing ) in a comment */
+ goto FAILED; /* to make it easier to debug. */
+ }
+ ptr++;
+ continue; /* Next character in pattern */
}
- arglen++;
+ /* If the next item is not a quantifier, fill in length of any previous
+ callout and create an auto callout if required. */
- if ((unsigned int)arglen > MAX_MARK)
+ if (c != CHAR_ASTERISK && c != CHAR_PLUS && c != CHAR_QUESTION_MARK &&
+ (c != CHAR_LEFT_CURLY_BRACKET ||
+ (tempptr = ptr,
+ !read_repeat_counts(&tempptr, ptrend, NULL, NULL, &errorcode))))
{
- *errorcodeptr = ERR76;
- *ptrptr = ptr;
- return -1;
+ if (after_manual_callout-- <= 0)
+ parsed_pattern = manage_callouts(thisptr, &previous_callout, auto_callout,
+ parsed_pattern, cb);
}
- }
-/* Update the pointers before returning. */
+ /* If expect_cond_assert is 2, we have just passed (?( and are expecting an
+ assertion, possibly preceded by a callout. If the value is 1, we have just
+ had the callout and expect an assertion. There must be at least 3 more
+ characters in all cases. When expect_cond_assert is 2, we know that the
+ current character is an opening parenthesis, as otherwise we wouldn't be
+ here. However, when it is 1, we need to check, and it's easiest just to check
+ always. Note that expect_cond_assert may be negative, since all callouts just
+ decrement it. */
-*ptrptr = ptr;
-if (codeptr != NULL) *codeptr = code;
-return arglen;
-}
+ if (expect_cond_assert > 0)
+ {
+ BOOL ok = c == CHAR_LEFT_PARENTHESIS && ptrend - ptr >= 3 &&
+ ptr[0] == CHAR_QUESTION_MARK;
+ if (ok) switch(ptr[1])
+ {
+ case CHAR_C:
+ ok = expect_cond_assert == 2;
+ break;
+ case CHAR_EQUALS_SIGN:
+ case CHAR_EXCLAMATION_MARK:
+ break;
+ case CHAR_LESS_THAN_SIGN:
+ ok = ptr[2] == CHAR_EQUALS_SIGN || ptr[2] == CHAR_EXCLAMATION_MARK;
+ break;
-/*************************************************
-* Macro for the next two functions *
-*************************************************/
+ default:
+ ok = FALSE;
+ }
-/* Both scan_for_captures() and compile_branch() use this macro to generate a
-fragment of code that reads the characters of a name and sets its length
-(checking for not being too long). Count the characters dynamically, to avoid
-the possibility of integer overflow. The same macro is used for reading *VERB
-names. */
-
-#define READ_NAME(ctype, errno, errset) \
- namelen = 0; \
- while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype) != 0) \
- { \
- ptr++; \
- namelen++; \
- if (namelen > MAX_NAME_SIZE) \
- { \
- errset = errno; \
- goto FAILED; \
- } \
+ if (!ok)
+ {
+ ptr--; /* Adjust error offset */
+ errorcode = ERR28;
+ goto FAILED;
+ }
}
+ /* Remember whether we are expecting a conditional assertion, and set the
+ default for this item. */
+ prev_expect_cond_assert = expect_cond_assert;
+ expect_cond_assert = 0;
-/*************************************************
-* Scan regex to identify named groups *
-*************************************************/
+ /* Remember quantification status for the previous significant item, then set
+ default for this item. */
-/* This function is called first of all, to scan for named capturing groups so
-that information about them is fully available to both the compiling scans.
-It skips over everything except parenthesized items.
+ prev_okquantifier = okquantifier;
+ prev_meta_quantifier = meta_quantifier;
+ okquantifier = FALSE;
+ meta_quantifier = 0;
-Arguments:
- ptrptr points to pointer to the start of the pattern
- options compiling dynamic options
- cb pointer to the compile data block
+ /* If the previous significant item was a quantifier, adjust the parsed code
+ if there is a following modifier. The base meta value is always followed by
+ the PLUS and QUERY values, in that order. We do this here rather than after
+ reading a quantifier so that intervening comments and /x whitespace can be
+ ignored without having to replicate code. */
-Returns: zero on success or a non-zero error code, with pointer updated
-*/
-
-typedef struct nest_save {
- uint16_t nest_depth;
- uint16_t reset_group;
- uint16_t max_group;
- uint16_t flags;
-} nest_save;
+ if (prev_meta_quantifier != 0 && (c == CHAR_QUESTION_MARK || c == CHAR_PLUS))
+ {
+ parsed_pattern[(prev_meta_quantifier == META_MINMAX)? -3 : -1] =
+ prev_meta_quantifier + ((c == CHAR_QUESTION_MARK)?
+ 0x00020000u : 0x00010000u);
+ continue; /* Next character in pattern */
+ }
-#define NSF_RESET 0x0001u
-#define NSF_EXTENDED 0x0002u
-#define NSF_DUPNAMES 0x0004u
-static int scan_for_captures(PCRE2_SPTR *ptrptr, uint32_t options,
- compile_block *cb)
-{
-uint32_t c;
-uint32_t delimiter;
-uint32_t set, unset, *optset;
-uint32_t skiptoket = 0;
-uint16_t nest_depth = 0;
-int errorcode = 0;
-int escape;
-int namelen;
-int i;
-BOOL inescq = FALSE;
-BOOL isdupname;
-BOOL utf = (options & PCRE2_UTF) != 0;
-BOOL negate_class;
-PCRE2_SPTR name;
-PCRE2_SPTR start;
-PCRE2_SPTR ptr = *ptrptr;
-named_group *ng;
-nest_save *top_nest = NULL;
-nest_save *end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size);
+ /* Process the next item in the main part of a pattern. */
-/* The size of the nest_save structure might not be a factor of the size of the
-workspace. Therefore we must round down end_nests so as to correctly avoid
-creating a nest_save that spans the end of the workspace. */
+ switch(c)
+ {
+ default: /* Non-special character */
+ PARSED_LITERAL(c, parsed_pattern);
+ break;
-end_nests = (nest_save *)((char *)end_nests -
- ((cb->workspace_size * sizeof(PCRE2_UCHAR)) % sizeof(nest_save)));
-/* Now scan the pattern */
+ /* ---- Escape sequence ---- */
-for (; ptr < cb->end_pattern; ptr++)
- {
- c = *ptr;
+ case CHAR_BACKSLASH:
+ tempptr = ptr;
+ escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options,
+ FALSE, cb);
+ if (errorcode != 0)
+ {
+ ESCAPE_FAILED:
+ if ((cb->cx->extra_options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0)
+ goto FAILED;
+ ptr = tempptr;
+ if (ptr >= ptrend) c = CHAR_BACKSLASH; else
+ {
+ GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */
+ }
+ escape = 0; /* Treat as literal character */
+ }
- /* Parenthesized groups set skiptoket when all following characters up to the
- next closing parenthesis must be ignored. The parenthesis itself must be
- processed (to end the nested parenthesized item). */
+ /* The escape was a data escape or literal character. */
- if (skiptoket != 0)
- {
- if (c != CHAR_RIGHT_PARENTHESIS) continue;
- skiptoket = 0;
- }
+ if (escape == 0)
+ {
+ PARSED_LITERAL(c, parsed_pattern);
+ }
- /* Skip over literals */
+ /* The escape was a back (or forward) reference. We keep the offset in
+ order to give a more useful diagnostic for a bad forward reference. For
+ references to groups numbered less than 10 we can't use more than two items
+ in parsed_pattern because they may be just two characters in the input (and
+ in a 64-bit world an offset may need two elements). So for them, the offset
+ of the first occurrent is held in a special vector. */
- if (inescq)
- {
- if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
+ else if (escape < 0)
{
- inescq = FALSE;
- ptr++;
+ offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 1);
+ escape = -escape;
+ *parsed_pattern++ = META_BACKREF | (uint32_t)escape;
+ if (escape < 10)
+ {
+ if (cb->small_ref_offset[escape] == PCRE2_UNSET)
+ cb->small_ref_offset[escape] = offset;
+ }
+ else
+ {
+ PUTOFFSET(offset, parsed_pattern);
+ }
+ okquantifier = TRUE;
}
- continue;
- }
-
- /* Skip over # comments and whitespace in extended mode. */
- if ((options & PCRE2_EXTENDED) != 0)
- {
- PCRE2_SPTR wscptr = ptr;
- while (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) c = *(++ptr);
- if (c == CHAR_NUMBER_SIGN)
+ /* The escape was a character class such as \d etc. or other special
+ escape indicator such as \A or \X. Most of them generate just a single
+ parsed item, but \P and \p are followed by a 16-bit type and a 16-bit
+ value. They are supported only when Unicode is available. The type and
+ value are packed into a single 32-bit value so that the whole sequences
+ uses only two elements in the parsed_vector. This is because the same
+ coding is used if \d (for example) is turned into \p{Nd} when PCRE2_UCP is
+ set.
+
+ There are also some cases where the escape sequence is followed by a name:
+ \k{name}, \k<name>, and \k'name' are backreferences by name, and \g<name>
+ and \g'name' are subroutine calls by name; \g{name} is a synonym for
+ \k{name}. Note that \g<number> and \g'number' are handled by check_escape()
+ and returned as a negative value (handled above). A name is coded as an
+ offset into the pattern and a length. */
+
+ else switch (escape)
{
- ptr++;
- while (ptr < cb->end_pattern)
+ case ESC_C:
+#ifdef NEVER_BACKSLASH_C
+ errorcode = ERR85;
+ goto ESCAPE_FAILED;
+#else
+ if ((options & PCRE2_NEVER_BACKSLASH_C) != 0)
{
- if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */
- { /* IS_NEWLINE sets cb->nllen. */
- ptr += cb->nllen;
+ errorcode = ERR83;
+ goto ESCAPE_FAILED;
+ }
+#endif
+ okquantifier = TRUE;
+ *parsed_pattern++ = META_ESCAPE + escape;
+ break;
+
+ case ESC_X:
+#ifndef SUPPORT_UNICODE
+ errorcode = ERR45; /* Supported only with Unicode support */
+ goto ESCAPE_FAILED;
+#endif
+ case ESC_H:
+ case ESC_h:
+ case ESC_N:
+ case ESC_R:
+ case ESC_V:
+ case ESC_v:
+ okquantifier = TRUE;
+ *parsed_pattern++ = META_ESCAPE + escape;
+ break;
+
+ default: /* \A, \B, \b, \G, \K, \Z, \z cannot be quantified. */
+ *parsed_pattern++ = META_ESCAPE + escape;
+ break;
+
+ /* Escapes that change in UCP mode. Note that PCRE2_UCP will never be set
+ without Unicode support because it is checked when pcre2_compile() is
+ called. */
+
+ case ESC_d:
+ case ESC_D:
+ case ESC_s:
+ case ESC_S:
+ case ESC_w:
+ case ESC_W:
+ okquantifier = TRUE;
+ if ((options & PCRE2_UCP) == 0)
+ {
+ *parsed_pattern++ = META_ESCAPE + escape;
+ }
+ else
+ {
+ *parsed_pattern++ = META_ESCAPE +
+ ((escape == ESC_d || escape == ESC_s || escape == ESC_w)?
+ ESC_p : ESC_P);
+ switch(escape)
+ {
+ case ESC_d:
+ case ESC_D:
+ *parsed_pattern++ = (PT_PC << 16) | ucp_Nd;
+ break;
+
+ case ESC_s:
+ case ESC_S:
+ *parsed_pattern++ = PT_SPACE << 16;
+ break;
+
+ case ESC_w:
+ case ESC_W:
+ *parsed_pattern++ = PT_WORD << 16;
break;
}
- ptr++;
+ }
+ break;
+
+ /* Unicode property matching */
+
+ case ESC_P:
+ case ESC_p:
#ifdef SUPPORT_UNICODE
- if (utf) FORWARDCHAR(ptr);
+ {
+ BOOL negated;
+ uint16_t ptype = 0, pdata = 0;
+ if (!get_ucp(&ptr, &negated, &ptype, &pdata, &errorcode, cb))
+ goto ESCAPE_FAILED;
+ if (negated) escape = (escape == ESC_P)? ESC_p : ESC_P;
+ *parsed_pattern++ = META_ESCAPE + escape;
+ *parsed_pattern++ = (ptype << 16) | pdata;
+ okquantifier = TRUE;
+ }
+#else
+ errorcode = ERR45;
+ goto ESCAPE_FAILED;
#endif
+ break; /* End \P and \p */
+
+ /* When \g is used with quotes or angle brackets as delimiters, it is a
+ numerical or named subroutine call, and control comes here. When used
+ with brace delimiters it is a numberical back reference and does not come
+ here because check_escape() returns it directly as a reference. \k is
+ always a named back reference. */
+
+ case ESC_g:
+ case ESC_k:
+ if (ptr >= ptrend || (*ptr != CHAR_LEFT_CURLY_BRACKET &&
+ *ptr != CHAR_LESS_THAN_SIGN && *ptr != CHAR_APOSTROPHE))
+ {
+ errorcode = (escape == ESC_g)? ERR57 : ERR69;
+ goto ESCAPE_FAILED;
}
+ terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
+ CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
+ CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
+
+ /* For a non-braced \g, check for a numerical recursion. */
+
+ if (escape == ESC_g && terminator != CHAR_RIGHT_CURLY_BRACKET)
+ {
+ PCRE2_SPTR p = ptr + 1;
+
+ if (read_number(&p, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &i,
+ &errorcode))
+ {
+ if (p >= ptrend || *p != terminator)
+ {
+ errorcode = ERR57;
+ goto ESCAPE_FAILED;
+ }
+ ptr = p;
+ goto SET_RECURSION;
+ }
+ if (errorcode != 0) goto ESCAPE_FAILED;
+ }
+
+ /* Not a numerical recursion */
+
+ if (!read_name(&ptr, ptrend, terminator, &offset, &name, &namelen,
+ &errorcode, cb)) goto ESCAPE_FAILED;
+
+ /* \k and \g when used with braces are back references, whereas \g used
+ with quotes or angle brackets is a recursion */
+
+ *parsed_pattern++ =
+ (escape == ESC_k || terminator == CHAR_RIGHT_CURLY_BRACKET)?
+ META_BACKREF_BYNAME : META_RECURSE_BYNAME;
+ *parsed_pattern++ = namelen;
+
+ PUTOFFSET(offset, parsed_pattern);
+ okquantifier = TRUE;
+ break; /* End special escape processing */
}
+ break; /* End escape sequence processing */
+
+
+ /* ---- Single-character special items ---- */
+
+ case CHAR_CIRCUMFLEX_ACCENT:
+ *parsed_pattern++ = META_CIRCUMFLEX;
+ break;
+
+ case CHAR_DOLLAR_SIGN:
+ *parsed_pattern++ = META_DOLLAR;
+ break;
+
+ case CHAR_DOT:
+ *parsed_pattern++ = META_DOT;
+ okquantifier = TRUE;
+ break;
+
- /* If we skipped any characters, restart the loop. Otherwise, we didn't see
- a comment. */
+ /* ---- Single-character quantifiers ---- */
- if (ptr > wscptr)
+ case CHAR_ASTERISK:
+ meta_quantifier = META_ASTERISK;
+ goto CHECK_QUANTIFIER;
+
+ case CHAR_PLUS:
+ meta_quantifier = META_PLUS;
+ goto CHECK_QUANTIFIER;
+
+ case CHAR_QUESTION_MARK:
+ meta_quantifier = META_QUERY;
+ goto CHECK_QUANTIFIER;
+
+
+ /* ---- Potential {n,m} quantifier ---- */
+
+ case CHAR_LEFT_CURLY_BRACKET:
+ if (!read_repeat_counts(&ptr, ptrend, &min_repeat, &max_repeat,
+ &errorcode))
{
- ptr--;
- continue;
+ if (errorcode != 0) goto FAILED; /* Error in quantifier. */
+ PARSED_LITERAL(c, parsed_pattern); /* Not a quantifier */
+ break; /* No more quantifier processing */
}
- }
+ meta_quantifier = META_MINMAX;
+ /* Fall through */
- /* Process the next pattern item. */
- switch(c)
- {
- default: /* Most characters are just skipped */
- break;
+ /* ---- Quantifier post-processing ---- */
- /* Skip escapes except for \Q */
+ /* Check that a quantifier is allowed after the previous item. */
- case CHAR_BACKSLASH:
- errorcode = 0;
- escape = PRIV(check_escape)(&ptr, cb->end_pattern, &c, &errorcode, options,
- FALSE, cb);
- if (errorcode != 0) goto FAILED;
- if (escape == ESC_Q) inescq = TRUE;
+ CHECK_QUANTIFIER:
+ if (!prev_okquantifier)
+ {
+ errorcode = ERR9;
+ goto FAILED_BACK;
+ }
+
+ /* Now we can put the quantifier into the parsed pattern vector. At this
+ stage, we have only the basic quantifier. The check for a following + or ?
+ modifier happens at the top of the loop, after any intervening comments
+ have been removed. */
+
+ *parsed_pattern++ = meta_quantifier;
+ if (c == CHAR_LEFT_CURLY_BRACKET)
+ {
+ *parsed_pattern++ = min_repeat;
+ *parsed_pattern++ = max_repeat;
+ }
break;
- /* Skip a character class. The syntax is complicated so we have to
- replicate some of what happens when a class is processed for real. */
+
+ /* ---- Character class ---- */
case CHAR_LEFT_SQUARE_BRACKET:
- if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0 ||
- PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0)
+ okquantifier = TRUE;
+
+ /* In another (POSIX) regex library, the ugly syntax [[:<:]] and [[:>:]] is
+ used for "start of word" and "end of word". As these are otherwise illegal
+ sequences, we don't break anything by recognizing them. They are replaced
+ by \b(?=\w) and \b(?<=\w) respectively. Sequences like [a[:<:]] are
+ erroneous and are handled by the normal code below. */
+
+ if (ptrend - ptr >= 6 &&
+ (PRIV(strncmp_c8)(ptr, STRING_WEIRD_STARTWORD, 6) == 0 ||
+ PRIV(strncmp_c8)(ptr, STRING_WEIRD_ENDWORD, 6) == 0))
{
+ *parsed_pattern++ = META_ESCAPE + ESC_b;
+
+ if (ptr[2] == CHAR_LESS_THAN_SIGN)
+ {
+ *parsed_pattern++ = META_LOOKAHEAD;
+ }
+ else
+ {
+ *parsed_pattern++ = META_LOOKBEHIND;
+ *has_lookbehind = TRUE;
+
+ /* The offset is used only for the "non-fixed length" error; this won't
+ occur here, so just store zero. */
+
+ PUTOFFSET((PCRE2_SIZE)0, parsed_pattern);
+ }
+
+ if ((options & PCRE2_UCP) == 0)
+ *parsed_pattern++ = META_ESCAPE + ESC_w;
+ else
+ {
+ *parsed_pattern++ = META_ESCAPE + ESC_p;
+ *parsed_pattern++ = PT_WORD << 16;
+ }
+ *parsed_pattern++ = META_KET;
ptr += 6;
break;
}
- /* If the first character is '^', set the negation flag (not actually used
- here, except to recognize only one ^) and skip it. If the first few
- characters (either before or after ^) are \Q\E or \E we skip them too. This
- makes for compatibility with Perl. */
+ /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
+ they are encountered at the top level, so we'll do that too. */
+
+ if (ptr < ptrend && (*ptr == CHAR_COLON || *ptr == CHAR_DOT ||
+ *ptr == CHAR_EQUALS_SIGN) &&
+ check_posix_syntax(ptr, ptrend, &tempptr))
+ {
+ errorcode = (*ptr-- == CHAR_COLON)? ERR12 : ERR13;
+ goto FAILED;
+ }
+
+ /* Process a regular character class. If the first character is '^', set
+ the negation flag. If the first few characters (either before or after ^)
+ are \Q\E or \E or space or tab in extended-more mode, we skip them too.
+ This makes for compatibility with Perl. */
negate_class = FALSE;
- for (;;)
+ while (ptr < ptrend)
{
- c = *(++ptr); /* First character in class */
+ GETCHARINCTEST(c, ptr);
if (c == CHAR_BACKSLASH)
{
- if (ptr[1] == CHAR_E)
- ptr++;
- else if (PRIV(strncmp_c8)(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0)
+ if (ptr < ptrend && *ptr == CHAR_E) ptr++;
+ else if (ptrend - ptr >= 3 &&
+ PRIV(strncmp_c8)(ptr, STR_Q STR_BACKSLASH STR_E, 3) == 0)
ptr += 3;
else
break;
}
+ else if ((options & PCRE2_EXTENDED_MORE) != 0 &&
+ (c == CHAR_SPACE || c == CHAR_HT)) /* Note: just these two */
+ continue;
else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
negate_class = TRUE;
else break;
}
+ /* Now the real contents of the class; c has the first "real" character.
+ Empty classes are permitted only if the option is set. */
+
if (c == CHAR_RIGHT_SQUARE_BRACKET &&
(cb->external_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)
- break;
+ {
+ *parsed_pattern++ = negate_class? META_CLASS_EMPTY_NOT : META_CLASS_EMPTY;
+ break; /* End of class processing */
+ }
+
+ /* Process a non-empty class. */
+
+ *parsed_pattern++ = negate_class? META_CLASS_NOT : META_CLASS;
+ class_range_state = RANGE_NO;
+
+ /* In an EBCDIC environment, Perl treats alphabetic ranges specially
+ because there are holes in the encoding, and simply using the range A-Z
+ (for example) would include the characters in the holes. This applies only
+ to ranges where both values are literal; [\xC1-\xE9] is different to [A-Z]
+ in this respect. In order to accommodate this, we keep track of whether
+ character values are literal or not, and a state variable for handling
+ ranges. */
/* Loop for the contents of the class */
for (;;)
{
- PCRE2_SPTR tempptr;
-
- if (c == CHAR_NULL && ptr >= cb->end_pattern)
- {
- errorcode = ERR6; /* Missing terminating ']' */
- goto FAILED;
- }
-
-#ifdef SUPPORT_UNICODE
- if (utf && HAS_EXTRALEN(c))
- { /* Braces are required because the */
- GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */
- }
-#endif
+ BOOL char_is_literal = TRUE;
/* Inside \Q...\E everything is literal except \E */
if (inescq)
{
- if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) /* If we are at \E */
+ if (c == CHAR_BACKSLASH && ptr < ptrend && *ptr == CHAR_E)
{
inescq = FALSE; /* Reset literal state */
ptr++; /* Skip the 'E' */
+ goto CLASS_CONTINUE;
}
- goto CONTINUE_CLASS;
+ goto CLASS_LITERAL;
}
- /* Skip POSIX class names. */
+ /* Skip over space and tab (only) in extended-more mode. */
+
+ if ((options & PCRE2_EXTENDED_MORE) != 0 &&
+ (c == CHAR_SPACE || c == CHAR_HT))
+ goto CLASS_CONTINUE;
+
+ /* Handle POSIX class names. Perl allows a negation extension of the
+ form [:^name:]. A square bracket that doesn't match the syntax is
+ treated as a literal. We also recognize the POSIX constructions
+ [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
+ 5.6 and 5.8 do. */
+
if (c == CHAR_LEFT_SQUARE_BRACKET &&
- (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
- ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
+ ptrend - ptr >= 3 &&
+ (*ptr == CHAR_COLON || *ptr == CHAR_DOT ||
+ *ptr == CHAR_EQUALS_SIGN) &&
+ check_posix_syntax(ptr, ptrend, &tempptr))
{
- ptr = tempptr + 1;
+ BOOL posix_negate = FALSE;
+ int posix_class;
+
+ /* Perl treats a hyphen before a POSIX class as a literal, not the
+ start of a range. However, it gives a warning in its warning mode. PCRE
+ does not have a warning mode, so we give an error, because this is
+ likely an error on the user's part. */
+
+ if (class_range_state == RANGE_STARTED)
+ {
+ errorcode = ERR50;
+ goto FAILED;
+ }
+
+ if (*ptr != CHAR_COLON)
+ {
+ errorcode = ERR13;
+ goto FAILED_BACK;
+ }
+
+ if (*(++ptr) == CHAR_CIRCUMFLEX_ACCENT)
+ {
+ posix_negate = TRUE;
+ ptr++;
+ }
+
+ posix_class = check_posix_name(ptr, (int)(tempptr - ptr));
+ if (posix_class < 0)
+ {
+ errorcode = ERR30;
+ goto FAILED;
+ }
+ ptr = tempptr + 2;
+
+ /* Perl treats a hyphen after a POSIX class as a literal, not the
+ start of a range. However, it gives a warning in its warning mode
+ unless the hyphen is the last character in the class. PCRE does not
+ have a warning mode, so we give an error, because this is likely an
+ error on the user's part. */
+
+ if (ptr < ptrend - 1 && *ptr == CHAR_MINUS &&
+ ptr[1] != CHAR_RIGHT_SQUARE_BRACKET)
+ {
+ errorcode = ERR50;
+ goto FAILED;
+ }
+
+ /* Set "a hyphen is not the start of a range" for the -] case, and also
+ in case the POSIX class is followed by \E or \Q\E (possibly repeated -
+ fuzzers do that kind of thing) and *then* a hyphen. This causes that
+ hyphen to be treated as a literal. I don't think it's worth setting up
+ special apparatus to do otherwise. */
+
+ class_range_state = RANGE_NO;
+
+ /* When PCRE2_UCP is set, some of the POSIX classes are converted to
+ use Unicode properties \p or \P or, in one case, \h or \H. The
+ substitutes table has two values per class, containing the type and
+ value of a \p or \P item. The special cases are specified with a
+ negative type: a non-zero value causes \h or \H to be used, and a zero
+ value falls through to behave like a non-UCP POSIX class. */
+
+#ifdef SUPPORT_UNICODE
+ if ((options & PCRE2_UCP) != 0)
+ {
+ int ptype = posix_substitutes[2*posix_class];
+ int pvalue = posix_substitutes[2*posix_class + 1];
+ if (ptype >= 0)
+ {
+ *parsed_pattern++ = META_ESCAPE + (posix_negate? ESC_P : ESC_p);
+ *parsed_pattern++ = (ptype << 16) | pvalue;
+ goto CLASS_CONTINUE;
+ }
+
+ if (pvalue != 0)
+ {
+ *parsed_pattern++ = META_ESCAPE + (posix_negate? ESC_H : ESC_h);
+ goto CLASS_CONTINUE;
+ }
+
+ /* Fall through */
+ }
+#endif /* SUPPORT_UNICODE */
+
+ /* Non-UCP POSIX class */
+
+ *parsed_pattern++ = posix_negate? META_POSIX_NEG : META_POSIX;
+ *parsed_pattern++ = posix_class;
+ }
+
+ /* Handle potential start of range */
+
+ else if (c == CHAR_MINUS && class_range_state >= RANGE_OK_ESCAPED)
+ {
+ *parsed_pattern++ = (class_range_state == RANGE_OK_LITERAL)?
+ META_RANGE_LITERAL : META_RANGE_ESCAPED;
+ class_range_state = RANGE_STARTED;
+ }
+
+ /* Handle a literal character */
+
+ else if (c != CHAR_BACKSLASH)
+ {
+ CLASS_LITERAL:
+ if (class_range_state == RANGE_STARTED)
+ {
+ if (c == parsed_pattern[-2]) /* Optimize one-char range */
+ parsed_pattern--;
+ else if (parsed_pattern[-2] > c) /* Check range is in order */
+ {
+ errorcode = ERR8;
+ goto FAILED_BACK;
+ }
+ else
+ {
+ if (!char_is_literal && parsed_pattern[-1] == META_RANGE_LITERAL)
+ parsed_pattern[-1] = META_RANGE_ESCAPED;
+ PARSED_LITERAL(c, parsed_pattern);
+ }
+ class_range_state = RANGE_NO;
+ }
+ else /* Potential start of range */
+ {
+ class_range_state = char_is_literal?
+ RANGE_OK_LITERAL : RANGE_OK_ESCAPED;
+ PARSED_LITERAL(c, parsed_pattern);
+ }
}
- else if (c == CHAR_BACKSLASH)
+
+ /* Handle escapes in a class */
+
+ else
{
- errorcode = 0;
- escape = PRIV(check_escape)(&ptr, cb->end_pattern, &c, &errorcode,
+ tempptr = ptr;
+ escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode,
options, TRUE, cb);
- if (errorcode != 0) goto FAILED;
- if (escape == ESC_Q) inescq = TRUE;
+
+ if (errorcode != 0)
+ {
+ CLASS_ESCAPE_FAILED:
+ if ((cb->cx->extra_options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0)
+ goto FAILED;
+ ptr = tempptr;
+ if (ptr >= ptrend) c = CHAR_BACKSLASH; else
+ {
+ GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */
+ }
+ escape = 0; /* Treat as literal character */
+ }
+
+ if (escape == 0) /* Escaped character code point is in c */
+ {
+ char_is_literal = FALSE;
+ goto CLASS_LITERAL;
+ }
+
+ /* These three escapes do not alter the class range state. */
+
+ if (escape == ESC_b)
+ {
+ c = CHAR_BS; /* \b is backspace in a class */
+ char_is_literal = FALSE;
+ goto CLASS_LITERAL;
+ }
+
+ else if (escape == ESC_Q)
+ {
+ inescq = TRUE; /* Enter literal mode */
+ goto CLASS_CONTINUE;
+ }
+
+ else if (escape == ESC_E) /* Ignore orphan \E */
+ goto CLASS_CONTINUE;
+
+ /* The second part of a range can be a single-character escape
+ sequence (detected above), but not any of the other escapes. Perl
+ treats a hyphen as a literal in such circumstances. However, in Perl's
+ warning mode, a warning is given, so PCRE now faults it, as it is
+ almost certainly a mistake on the user's part. */
+
+ if (class_range_state == RANGE_STARTED)
+ {
+ errorcode = ERR50;
+ goto CLASS_ESCAPE_FAILED;
+ }
+
+ /* Of the remaining escapes, only those that define characters are
+ allowed in a class. None may start a range. */
+
+ class_range_state = RANGE_NO;
+ switch(escape)
+ {
+ case ESC_N:
+ errorcode = ERR71; /* Not supported in a class */
+ goto CLASS_ESCAPE_FAILED;
+
+ case ESC_H:
+ case ESC_h:
+ case ESC_V:
+ case ESC_v:
+ *parsed_pattern++ = META_ESCAPE + escape;
+ break;
+
+ /* These escapes are converted to Unicode property tests when
+ PCRE2_UCP is set. */
+
+ case ESC_d:
+ case ESC_D:
+ case ESC_s:
+ case ESC_S:
+ case ESC_w:
+ case ESC_W:
+ if ((options & PCRE2_UCP) == 0)
+ {
+ *parsed_pattern++ = META_ESCAPE + escape;
+ }
+ else
+ {
+ *parsed_pattern++ = META_ESCAPE +
+ ((escape == ESC_d || escape == ESC_s || escape == ESC_w)?
+ ESC_p : ESC_P);
+ switch(escape)
+ {
+ case ESC_d:
+ case ESC_D:
+ *parsed_pattern++ = (PT_PC << 16) | ucp_Nd;
+ break;
+
+ case ESC_s:
+ case ESC_S:
+ *parsed_pattern++ = PT_SPACE << 16;
+ break;
+
+ case ESC_w:
+ case ESC_W:
+ *parsed_pattern++ = PT_WORD << 16;
+ break;
+ }
+ }
+ break;
+
+ /* Explicit Unicode property matching */
+
+ case ESC_P:
+ case ESC_p:
+#ifdef SUPPORT_UNICODE
+ {
+ BOOL negated;
+ uint16_t ptype = 0, pdata = 0;
+ if (!get_ucp(&ptr, &negated, &ptype, &pdata, &errorcode, cb))
+ goto FAILED;
+ if (negated) escape = (escape == ESC_P)? ESC_p : ESC_P;
+ *parsed_pattern++ = META_ESCAPE + escape;
+ *parsed_pattern++ = (ptype << 16) | pdata;
+ }
+#else
+ errorcode = ERR45;
+ goto CLASS_ESCAPE_FAILED;
+#endif
+ break; /* End \P and \p */
+
+ default: /* All others are not allowed in a class */
+ errorcode = ERR7;
+ ptr--;
+ goto CLASS_ESCAPE_FAILED;
+ }
+
+ /* Perl gives a warning unless a following hyphen is the last character
+ in the class. PCRE throws an error. */
+
+ if (ptr < ptrend - 1 && *ptr == CHAR_MINUS &&
+ ptr[1] != CHAR_RIGHT_SQUARE_BRACKET)
+ {
+ errorcode = ERR50;
+ goto FAILED;
+ }
}
- CONTINUE_CLASS:
- c = *(++ptr);
+ /* Proceed to next thing in the class. */
+
+ CLASS_CONTINUE:
+ if (ptr >= ptrend)
+ {
+ errorcode = ERR6; /* Missing terminating ']' */
+ goto FAILED;
+ }
+ GETCHARINCTEST(c, ptr);
if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break;
} /* End of class-processing loop */
- break;
- /* This is the real work of this function - handling parentheses. */
+ if (class_range_state == RANGE_STARTED)
+ {
+ parsed_pattern[-1] = CHAR_MINUS;
+ class_range_state = RANGE_NO;
+ }
+
+ *parsed_pattern++ = META_CLASS_END;
+ break; /* End of character class */
+
+
+ /* ---- Opening parenthesis ---- */
case CHAR_LEFT_PARENTHESIS:
- nest_depth++;
+ if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS;
- if (ptr[1] != CHAR_QUESTION_MARK)
+ /* If ( is not followed by ? it is either a capture or a special verb. */
+
+ if (*ptr != CHAR_QUESTION_MARK)
{
- if (ptr[1] != CHAR_ASTERISK)
+ const char *vn;
+
+ /* Handle capturing brackets (or non-capturing if auto-capture is turned
+ off). */
+
+ if (*ptr != CHAR_ASTERISK)
{
- if ((options & PCRE2_NO_AUTO_CAPTURE) == 0) cb->bracount++;
+ nest_depth++;
+ if ((options & PCRE2_NO_AUTO_CAPTURE) == 0)
+ {
+ cb->bracount++;
+ *parsed_pattern++ = META_CAPTURE | cb->bracount;
+ }
+ else *parsed_pattern++ = META_NOCAPTURE;
}
- /* (*something) - skip over a name, and then just skip to closing ket
- unless PCRE2_ALT_VERBNAMES is set, in which case we have to process
- escapes in the string after a verb name terminated by a colon. */
- else
+ /* ---- Handle (*VERB) and (*VERB:NAME) ---- */
+
+ /* Do nothing for (*) so it gives a "bad quantifier" error rather than
+ "(*MARK) must have an argument". */
+
+ else if (ptrend - ptr > 1 && ptr[1] != CHAR_RIGHT_PARENTHESIS)
{
- ptr += 2;
- while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) ptr++;
- if (*ptr == CHAR_COLON && (options & PCRE2_ALT_VERBNAMES) != 0)
+ vn = verbnames;
+ if (!read_name(&ptr, ptrend, 0, &offset, &name, &namelen, &errorcode,
+ cb)) goto FAILED;
+ if (ptr >= ptrend || (*ptr != CHAR_COLON &&
+ *ptr != CHAR_RIGHT_PARENTHESIS))
{
- ptr++;
- if (process_verb_name(&ptr, NULL, &errorcode, options, utf, cb) < 0)
+ errorcode = ERR60; /* Malformed */
+ goto FAILED;
+ }
+
+ /* Scan the table of verb names */
+
+ for (i = 0; i < verbcount; i++)
+ {
+ if (namelen == verbs[i].len &&
+ PRIV(strncmp_c8)(name, vn, namelen) == 0)
+ break;
+ vn += verbs[i].len + 1;
+ }
+
+ if (i >= verbcount)
+ {
+ errorcode = ERR60; /* Verb not recognized */
+ goto FAILED;
+ }
+
+ /* An empty argument is treated as no argument. */
+
+ if (*ptr == CHAR_COLON && ptr + 1 < ptrend &&
+ ptr[1] == CHAR_RIGHT_PARENTHESIS)
+ ptr++; /* Advance to the closing parens */
+
+ /* Check for mandatory non-empty argument; this is (*MARK) */
+
+ if (verbs[i].has_arg > 0 && *ptr != CHAR_COLON)
+ {
+ errorcode = ERR66;
+ goto FAILED;
+ }
+
+ /* It appears that Perl allows any characters whatsoever, other than a
+ closing parenthesis, to appear in arguments ("names"), so we no longer
+ insist on letters, digits, and underscores. Perl does not, however, do
+ any interpretation within arguments, and has no means of including a
+ closing parenthesis. PCRE supports escape processing but only when it
+ is requested by an option. We set inverbname TRUE here, and let the
+ main loop take care of this so that escape and \x processing is done by
+ the main code above. */
+
+ if (*ptr++ == CHAR_COLON) /* Skip past : or ) */
+ {
+ if (verbs[i].has_arg < 0) /* Argument is forbidden */
+ {
+ errorcode = ERR59;
goto FAILED;
+ }
+ *parsed_pattern++ = verbs[i].meta +
+ ((verbs[i].meta != META_MARK)? 0x00010000u:0);
+ verblengthptr = parsed_pattern++;
+ verbnamestart = ptr;
+ inverbname = TRUE;
}
- else
+ else /* No verb "name" argument */
{
- while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS)
- ptr++;
+ *parsed_pattern++ = verbs[i].meta;
}
- nest_depth--;
- }
- }
+ } /* End of (*VERB) handling */
+ break; /* Done with this parenthesis */
+ } /* End of groups that don't start with (? */
+
+
+ /* ---- Items starting (? ---- */
+
+ /* The type of item is determined by what follows (?. Handle (?| and option
+ changes under "default" because both need a new block on the nest stack.
+ Comments starting with (?# are handled above. Note that there is some
+ ambiguity about the sequence (?- because if a digit follows it's a relative
+ recursion or subroutine call whereas otherwise it's an option unsetting. */
- /* Handle (?...) groups */
+ if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS;
- else switch(ptr[2])
+ switch(*ptr)
{
default:
- ptr += 2;
- if (ptr[0] == CHAR_R || /* (?R) */
- ptr[0] == CHAR_NUMBER_SIGN || /* (?#) */
- IS_DIGIT(ptr[0]) || /* (?n) */
- (ptr[0] == CHAR_MINUS && IS_DIGIT(ptr[1]))) /* (?-n) */
- {
- skiptoket = ptr[0];
- break;
- }
+ if (*ptr == CHAR_MINUS && ptrend - ptr > 1 && IS_DIGIT(ptr[1]))
+ goto RECURSION_BYNUMBER; /* The + case is handled by CHAR_PLUS */
- /* Handle (?| and (?imsxJU: which are the only other valid forms. Both
- need a new block on the nest stack. */
+ /* We now have either (?| or a (possibly empty) option setting,
+ optionally followed by a non-capturing group. */
+ nest_depth++;
if (top_nest == NULL) top_nest = (nest_save *)(cb->start_workspace);
else if (++top_nest >= end_nests)
{
@@ -3474,8 +3502,10 @@ for (; ptr < cb->end_pattern; ptr++)
}
top_nest->nest_depth = nest_depth;
top_nest->flags = 0;
- if ((options & PCRE2_EXTENDED) != 0) top_nest->flags |= NSF_EXTENDED;
- if ((options & PCRE2_DUPNAMES) != 0) top_nest->flags |= NSF_DUPNAMES;
+ top_nest->options = options & PARSE_TRACKED_OPTIONS;
+
+ /* Start of non-capturing group that resets the capture count for each
+ branch. */
if (*ptr == CHAR_VERTICAL_LINE)
{
@@ -3483,76 +3513,226 @@ for (; ptr < cb->end_pattern; ptr++)
top_nest->max_group = (uint16_t)cb->bracount;
top_nest->flags |= NSF_RESET;
cb->external_flags |= PCRE2_DUPCAPUSED;
- break;
+ *parsed_pattern++ = META_NOCAPTURE;
+ ptr++;
}
- /* Scan options */
+ /* Scan for options imnsxJU to be set or unset. */
- top_nest->reset_group = 0;
- top_nest->max_group = 0;
+ else
+ {
+ top_nest->reset_group = 0;
+ top_nest->max_group = 0;
+ set = unset = 0;
+ optset = &set;
- set = unset = 0;
- optset = &set;
+ while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS &&
+ *ptr != CHAR_COLON)
+ {
+ switch (*ptr++)
+ {
+ case CHAR_MINUS: optset = &unset; break;
- /* Need only track (?x: and (?J: at this stage */
+ case CHAR_J: /* Record that it changed in the external options */
+ *optset |= PCRE2_DUPNAMES;
+ cb->external_flags |= PCRE2_JCHANGED;
+ break;
- while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
- {
- switch (*ptr++)
- {
- case CHAR_MINUS: optset = &unset; break;
+ case CHAR_i: *optset |= PCRE2_CASELESS; break;
+ case CHAR_m: *optset |= PCRE2_MULTILINE; break;
+ case CHAR_n: *optset |= PCRE2_NO_AUTO_CAPTURE; break;
+ case CHAR_s: *optset |= PCRE2_DOTALL; break;
+ case CHAR_U: *optset |= PCRE2_UNGREEDY; break;
- case CHAR_x: *optset |= PCRE2_EXTENDED; break;
+ /* If x appears twice it sets the extended extended option. */
- case CHAR_J:
- *optset |= PCRE2_DUPNAMES;
- cb->external_flags |= PCRE2_JCHANGED;
- break;
+ case CHAR_x:
+ *optset |= PCRE2_EXTENDED;
+ if (ptr < ptrend && *ptr == CHAR_x)
+ {
+ *optset |= PCRE2_EXTENDED_MORE;
+ ptr++;
+ }
+ break;
- case CHAR_i:
- case CHAR_m:
- case CHAR_s:
- case CHAR_U:
- break;
+ default:
+ errorcode = ERR11;
+ ptr--; /* Correct the offset */
+ goto FAILED;
+ }
+ }
- default:
- errorcode = ERR11;
- ptr--; /* Correct the offset */
- goto FAILED;
+ /* If we are setting extended without extended-more, ensure that any
+ existing extended-more gets unset. Also, unsetting extended must also
+ unset extended-more. */
+
+ if ((set & (PCRE2_EXTENDED|PCRE2_EXTENDED_MORE)) == PCRE2_EXTENDED ||
+ (unset & PCRE2_EXTENDED) != 0)
+ unset |= PCRE2_EXTENDED_MORE;
+
+ options = (options | set) & (~unset);
+
+ /* If the options ended with ')' this is not the start of a nested
+ group with option changes, so the options change at this level.
+ In this case, if the previous level set up a nest block, discard the
+ one we have just created. Otherwise adjust it for the previous level.
+ If the options ended with ':' we are starting a non-capturing group,
+ possibly with an options setting. */
+
+ if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS;
+ if (*ptr++ == CHAR_RIGHT_PARENTHESIS)
+ {
+ nest_depth--; /* This is not a nested group after all. */
+ if (top_nest > (nest_save *)(cb->start_workspace) &&
+ (top_nest-1)->nest_depth == nest_depth) top_nest--;
+ else top_nest->nest_depth = nest_depth;
+ }
+ else *parsed_pattern++ = META_NOCAPTURE;
+
+ /* If nothing changed, no need to record. */
+
+ if (set != 0 || unset != 0)
+ {
+ *parsed_pattern++ = META_OPTIONS;
+ *parsed_pattern++ = options;
}
+ } /* End options processing */
+ break; /* End default case after (? */
+
+
+ /* ---- Python syntax support ---- */
+
+ case CHAR_P:
+ if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS;
+
+ /* (?P<name> is the same as (?<name>, which defines a named group. */
+
+ if (*ptr == CHAR_LESS_THAN_SIGN)
+ {
+ terminator = CHAR_GREATER_THAN_SIGN;
+ goto DEFINE_NAME;
}
- options = (options | set) & (~unset);
+ /* (?P>name) is the same as (?&name), which is a recursion or subroutine
+ call. */
+
+ if (*ptr == CHAR_GREATER_THAN_SIGN) goto RECURSE_BY_NAME;
- /* If the options ended with ')' this is not the start of a nested
- group with option changes, so the options change at this level. If the
- previous level set up a nest block, discard the one we have just created.
- Otherwise adjust it for the previous level. */
+ /* (?P=name) is the same as \k<name>, a back reference by name. Anything
+ else after (?P is an error. */
- if (*ptr == CHAR_RIGHT_PARENTHESIS)
+ if (*ptr != CHAR_EQUALS_SIGN)
{
- nest_depth--;
- if (top_nest > (nest_save *)(cb->start_workspace) &&
- (top_nest-1)->nest_depth == nest_depth) top_nest --;
- else top_nest->nest_depth = nest_depth;
+ errorcode = ERR41;
+ goto FAILED;
}
- break;
+ if (!read_name(&ptr, ptrend, CHAR_RIGHT_PARENTHESIS, &offset, &name,
+ &namelen, &errorcode, cb)) goto FAILED;
+ *parsed_pattern++ = META_BACKREF_BYNAME;
+ *parsed_pattern++ = namelen;
+ PUTOFFSET(offset, parsed_pattern);
+ okquantifier = TRUE;
+ break; /* End of (?P processing */
- /* Skip over a numerical or string argument for a callout. */
- case CHAR_C:
- ptr += 2;
- if (ptr[1] == CHAR_RIGHT_PARENTHESIS) break;
- if (IS_DIGIT(ptr[1]))
+ /* ---- Recursion/subroutine calls by number ---- */
+
+ case CHAR_R:
+ i = 0; /* (?R) == (?R0) */
+ ptr++;
+ if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
{
- while (IS_DIGIT(ptr[1])) ptr++;
+ errorcode = ERR58;
+ goto FAILED;
}
+ goto SET_RECURSION;
- /* Handle a string argument */
+ /* An item starting (?- followed by a digit comes here via the "default"
+ case because (?- followed by a non-digit is an options setting. */
- else
+ case CHAR_PLUS:
+ if (ptrend - ptr < 2 || !IS_DIGIT(ptr[1]))
{
- ptr++;
+ errorcode = ERR29; /* Missing number */
+ goto FAILED;
+ }
+ /* Fall through */
+
+ case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
+ case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
+ RECURSION_BYNUMBER:
+ if (!read_number(&ptr, ptrend,
+ (IS_DIGIT(*ptr))? -1:(int)(cb->bracount), /* + and - are relative */
+ MAX_GROUP_NUMBER, ERR61,
+ &i, &errorcode)) goto FAILED;
+ if (i < 0) /* NB (?0) is permitted */
+ {
+ errorcode = ERR15; /* Unknown group */
+ goto FAILED_BACK;
+ }
+ if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
+ goto UNCLOSED_PARENTHESIS;
+
+ SET_RECURSION:
+ *parsed_pattern++ = META_RECURSE | (uint32_t)i;
+ offset = (PCRE2_SIZE)(ptr - cb->start_pattern);
+ ptr++;
+ PUTOFFSET(offset, parsed_pattern);
+ okquantifier = TRUE;
+ break; /* End of recursive call by number handling */
+
+
+ /* ---- Recursion/subroutine calls by name ---- */
+
+ case CHAR_AMPERSAND:
+ RECURSE_BY_NAME:
+ if (!read_name(&ptr, ptrend, CHAR_RIGHT_PARENTHESIS, &offset, &name,
+ &namelen, &errorcode, cb)) goto FAILED;
+ *parsed_pattern++ = META_RECURSE_BYNAME;
+ *parsed_pattern++ = namelen;
+ PUTOFFSET(offset, parsed_pattern);
+ okquantifier = TRUE;
+ break;
+
+ /* ---- Callout with numerical or string argument ---- */
+
+ case CHAR_C:
+ if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS;
+
+ /* If the previous item was a condition starting (?(? an assertion,
+ optionally preceded by a callout, is expected. This is checked later on,
+ during actual compilation. However we need to identify this kind of
+ assertion in this pass because it must not be qualified. The value of
+ expect_cond_assert is set to 2 after (?(? is processed. We decrement it
+ for a callout - still leaving a positive value that identifies the
+ assertion. Multiple callouts or any other items will make it zero or
+ less, which doesn't matter because they will cause an error later. */
+
+ expect_cond_assert = prev_expect_cond_assert - 1;
+
+ /* If previous_callout is not NULL, it means this follows a previous
+ callout. If it was a manual callout, do nothing; this means its "length
+ of next pattern item" field will remain zero. If it was an automatic
+ callout, abolish it. */
+
+ if (previous_callout != NULL && (options & PCRE2_AUTO_CALLOUT) != 0 &&
+ previous_callout == parsed_pattern - 4 &&
+ parsed_pattern[-1] == 255)
+ parsed_pattern = previous_callout;
+
+ /* Save for updating next pattern item length, and skip one item before
+ completing. */
+
+ previous_callout = parsed_pattern;
+ after_manual_callout = 1;
+
+ /* Handle a string argument; specific delimiter is required. */
+
+ if (*ptr != CHAR_RIGHT_PARENTHESIS && !IS_DIGIT(*ptr))
+ {
+ PCRE2_SIZE calloutlength;
+ PCRE2_SPTR startptr = ptr;
+
delimiter = 0;
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
{
@@ -3562,130 +3742,338 @@ for (; ptr < cb->end_pattern; ptr++)
break;
}
}
-
if (delimiter == 0)
{
errorcode = ERR82;
goto FAILED;
}
- start = ptr;
- do
+ *parsed_pattern = META_CALLOUT_STRING;
+ parsed_pattern += 3; /* Skip pattern info */
+
+ for (;;)
{
- if (++ptr >= cb->end_pattern)
+ if (++ptr >= ptrend)
{
errorcode = ERR81;
- ptr = start; /* To give a more useful message */
+ ptr = startptr; /* To give a more useful message */
+ goto FAILED;
+ }
+ if (*ptr == delimiter && (++ptr >= ptrend || *ptr != delimiter))
+ break;
+ }
+
+ calloutlength = (PCRE2_SIZE)(ptr - startptr);
+ if (calloutlength > UINT32_MAX)
+ {
+ errorcode = ERR72;
+ goto FAILED;
+ }
+ *parsed_pattern++ = (uint32_t)calloutlength;
+ offset = (PCRE2_SIZE)(startptr - cb->start_pattern);
+ PUTOFFSET(offset, parsed_pattern);
+ }
+
+ /* Handle a callout with an optional numerical argument, which must be
+ less than or equal to 255. A missing argument gives 0. */
+
+ else
+ {
+ int n = 0;
+ *parsed_pattern = META_CALLOUT_NUMBER; /* Numerical callout */
+ parsed_pattern += 3; /* Skip pattern info */
+ while (ptr < ptrend && IS_DIGIT(*ptr))
+ {
+ n = n * 10 + *ptr++ - CHAR_0;
+ if (n > 255)
+ {
+ errorcode = ERR38;
goto FAILED;
}
- if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2;
}
- while (ptr[0] != delimiter);
+ *parsed_pattern++ = n;
}
- /* Check terminating ) */
+ /* Both formats must have a closing parenthesis */
- if (ptr[1] != CHAR_RIGHT_PARENTHESIS)
+ if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
{
errorcode = ERR39;
- ptr++;
goto FAILED;
}
- break;
+ ptr++;
- /* Conditional group */
+ /* Remember the offset to the next item in the pattern, and set a default
+ length. This should get updated after the next item is read. */
- case CHAR_LEFT_PARENTHESIS:
- if (ptr[3] != CHAR_QUESTION_MARK) /* Not assertion or callout */
- {
- nest_depth++;
- ptr += 2;
- break;
- }
+ previous_callout[1] = ptr - cb->start_pattern;
+ previous_callout[2] = 0;
+ break; /* End callout */
- /* Must be an assertion or a callout */
- switch(ptr[4])
- {
- case CHAR_LESS_THAN_SIGN:
- if (ptr[5] != CHAR_EXCLAMATION_MARK && ptr[5] != CHAR_EQUALS_SIGN)
- goto MISSING_ASSERTION;
- /* Fall through */
-
- case CHAR_C:
- case CHAR_EXCLAMATION_MARK:
- case CHAR_EQUALS_SIGN:
- ptr++;
- break;
+ /* ---- Conditional group ---- */
- default:
- MISSING_ASSERTION:
- ptr += 3; /* To improve error message */
- errorcode = ERR28;
- goto FAILED;
- }
- break;
+ /* A condition can be an assertion, a number (referring to a numbered
+ group's having been set), a name (referring to a named group), or 'R',
+ referring to overall recursion. R<digits> and R&name are also permitted
+ for recursion state tests. Numbers may be preceded by + or - to specify a
+ relative group number.
- case CHAR_COLON:
- case CHAR_GREATER_THAN_SIGN:
- case CHAR_EQUALS_SIGN:
- case CHAR_EXCLAMATION_MARK:
- case CHAR_AMPERSAND:
- case CHAR_PLUS:
- ptr += 2;
- break;
+ There are several syntaxes for testing a named group: (?(name)) is used
+ by Python; Perl 5.10 onwards uses (?(<name>) or (?('name')).
- case CHAR_P:
- if (ptr[3] != CHAR_LESS_THAN_SIGN)
+ There are two unfortunate ambiguities. 'R' can be the recursive thing or
+ the name 'R' (and similarly for 'R' followed by digits). 'DEFINE' can be
+ the Perl DEFINE feature or the Python named test. We look for a name
+ first; if not found, we try the other case.
+
+ For compatibility with auto-callouts, we allow a callout to be specified
+ before a condition that is an assertion. */
+
+ case CHAR_LEFT_PARENTHESIS:
+ if (++ptr >= ptrend) goto UNCLOSED_PARENTHESIS;
+ nest_depth++;
+
+ /* If the next character is ? there must be an assertion next (optionally
+ preceded by a callout). We do not check this here, but instead we set
+ expect_cond_assert to 2. If this is still greater than zero (callouts
+ decrement it) when the next assertion is read, it will be marked as a
+ condition that must not be repeated. A value greater than zero also
+ causes checking that an assertion (possibly with callout) follows. */
+
+ if (*ptr == CHAR_QUESTION_MARK)
{
- ptr += 3;
- break;
+ *parsed_pattern++ = META_COND_ASSERT;
+ ptr--; /* Pull pointer back to the opening parenthesis. */
+ expect_cond_assert = 2;
+ break; /* End of conditional */
}
- ptr++;
- c = CHAR_GREATER_THAN_SIGN; /* Terminator */
- goto DEFINE_NAME;
- case CHAR_LESS_THAN_SIGN:
- if (ptr[3] == CHAR_EQUALS_SIGN || ptr[3] == CHAR_EXCLAMATION_MARK)
+ /* Handle (?([+-]number)... */
+
+ if (read_number(&ptr, ptrend, cb->bracount, MAX_GROUP_NUMBER, ERR61, &i,
+ &errorcode))
{
- ptr += 3;
- break;
+ if (i <= 0)
+ {
+ errorcode = ERR15;
+ goto FAILED;
+ }
+ *parsed_pattern++ = META_COND_NUMBER;
+ offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 2);
+ PUTOFFSET(offset, parsed_pattern);
+ *parsed_pattern++ = i;
}
- c = CHAR_GREATER_THAN_SIGN; /* Terminator */
- goto DEFINE_NAME;
-
- case CHAR_APOSTROPHE:
- c = CHAR_APOSTROPHE; /* Terminator */
+ else if (errorcode != 0) goto FAILED; /* Number too big */
- DEFINE_NAME:
- name = ptr = ptr + 3;
+ /* No number found. Handle the special case (?(VERSION[>]=n.m)... */
- if (*ptr == c) /* Empty name */
+ else if (ptrend - ptr >= 10 &&
+ PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 &&
+ ptr[7] != CHAR_RIGHT_PARENTHESIS)
{
- errorcode = ERR62;
- goto FAILED;
+ uint32_t ge = 0;
+ int major = 0;
+ int minor = 0;
+
+ ptr += 7;
+ if (*ptr == CHAR_GREATER_THAN_SIGN)
+ {
+ ge = 1;
+ ptr++;
+ }
+
+ /* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT
+ references its argument twice. */
+
+ if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr)))
+ goto BAD_VERSION_CONDITION;
+
+ if (!read_number(&ptr, ptrend, -1, 1000, ERR79, &major, &errorcode))
+ goto FAILED;
+
+ if (ptr >= ptrend) goto BAD_VERSION_CONDITION;
+ if (*ptr == CHAR_DOT)
+ {
+ if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION;
+ if (!read_number(&ptr, ptrend, -1, 99 , ERR79, &minor, &errorcode))
+ goto FAILED;
+ if (minor < 10) minor *= 10;
+ if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
+ goto BAD_VERSION_CONDITION;
+ }
+
+ *parsed_pattern++ = META_COND_VERSION;
+ *parsed_pattern++ = ge;
+ *parsed_pattern++ = major;
+ *parsed_pattern++ = minor;
}
- if (IS_DIGIT(*ptr))
+ /* All the remaining cases now require us to read a name. We cannot at
+ this stage distinguish ambiguous cases such as (?(R12) which might be a
+ recursion test by number or a name, because the named groups have not yet
+ all been identified. Those cases are treated as names, but given a
+ different META code. */
+
+ else
{
- errorcode = ERR44; /* Group name must start with non-digit */
- goto FAILED;
- }
+ BOOL was_r_ampersand = FALSE;
+
+ if (*ptr == CHAR_R && ptrend - ptr > 1 && ptr[1] == CHAR_AMPERSAND)
+ {
+ terminator = CHAR_RIGHT_PARENTHESIS;
+ was_r_ampersand = TRUE;
+ ptr++;
+ }
+ else if (*ptr == CHAR_LESS_THAN_SIGN)
+ terminator = CHAR_GREATER_THAN_SIGN;
+ else if (*ptr == CHAR_APOSTROPHE)
+ terminator = CHAR_APOSTROPHE;
+ else
+ {
+ terminator = CHAR_RIGHT_PARENTHESIS;
+ ptr--; /* Point to char before name */
+ }
+ if (!read_name(&ptr, ptrend, terminator, &offset, &name, &namelen,
+ &errorcode, cb)) goto FAILED;
+
+ /* Handle (?(R&name) */
- if (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) == 0)
+ if (was_r_ampersand)
+ {
+ *parsed_pattern = META_COND_RNAME;
+ ptr--; /* Back to closing parens */
+ }
+
+ /* Handle (?(name). If the name is "DEFINE" we identify it with a
+ special code. Likewise if the name consists of R followed only by
+ digits. Otherwise, handle it like a quoted name. */
+
+ else if (terminator == CHAR_RIGHT_PARENTHESIS)
+ {
+ if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0)
+ *parsed_pattern = META_COND_DEFINE;
+ else
+ {
+ for (i = 1; i < (int)namelen; i++)
+ if (!IS_DIGIT(name[i])) break;
+ *parsed_pattern = (*name == CHAR_R && i >= (int)namelen)?
+ META_COND_RNUMBER : META_COND_NAME;
+ }
+ ptr--; /* Back to closing parens */
+ }
+
+ /* Handle (?('name') or (?(<name>) */
+
+ else *parsed_pattern = META_COND_NAME;
+
+ /* All these cases except DEFINE end with the name length and offset;
+ DEFINE just has an offset (for the "too many branches" error). */
+
+ if (*parsed_pattern++ != META_COND_DEFINE) *parsed_pattern++ = namelen;
+ PUTOFFSET(offset, parsed_pattern);
+ } /* End cases that read a name */
+
+ /* Check the closing parenthesis of the condition */
+
+ if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
{
errorcode = ERR24;
goto FAILED;
}
+ ptr++;
+ break; /* End of condition processing */
+
+
+ /* ---- Atomic group ---- */
+
+ case CHAR_GREATER_THAN_SIGN:
+ *parsed_pattern++ = META_ATOMIC;
+ nest_depth++;
+ ptr++;
+ break;
+
- /* Advance ptr, set namelen and check its length. */
- READ_NAME(ctype_word, ERR48, errorcode);
+ /* ---- Lookahead assertions ---- */
- if (*ptr != c)
+ case CHAR_EQUALS_SIGN:
+ *parsed_pattern++ = META_LOOKAHEAD;
+ ptr++;
+ goto POST_ASSERTION;
+
+ case CHAR_EXCLAMATION_MARK:
+ *parsed_pattern++ = META_LOOKAHEADNOT;
+ ptr++;
+ goto POST_ASSERTION;
+
+
+ /* ---- Lookbehind assertions ---- */
+
+ /* (?< followed by = or ! is a lookbehind assertion. Otherwise (?< is the
+ start of the name of a capturing group. */
+
+ case CHAR_LESS_THAN_SIGN:
+ if (ptrend - ptr <= 1 ||
+ (ptr[1] != CHAR_EQUALS_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK))
{
- errorcode = ERR42;
- goto FAILED;
+ terminator = CHAR_GREATER_THAN_SIGN;
+ goto DEFINE_NAME;
+ }
+ *parsed_pattern++ = (ptr[1] == CHAR_EQUALS_SIGN)?
+ META_LOOKBEHIND : META_LOOKBEHINDNOT;
+ *has_lookbehind = TRUE;
+ offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 2);
+ PUTOFFSET(offset, parsed_pattern);
+ ptr += 2;
+ /* Fall through */
+
+ /* If the previous item was a condition starting (?(? an assertion,
+ optionally preceded by a callout, is expected. This is checked later on,
+ during actual compilation. However we need to identify this kind of
+ assertion in this pass because it must not be qualified. The value of
+ expect_cond_assert is set to 2 after (?(? is processed. We decrement it
+ for a callout - still leaving a positive value that identifies the
+ assertion. Multiple callouts or any other items will make it zero or
+ less, which doesn't matter because they will cause an error later. */
+
+ POST_ASSERTION:
+ nest_depth++;
+ if (prev_expect_cond_assert > 0)
+ {
+ if (top_nest == NULL) top_nest = (nest_save *)(cb->start_workspace);
+ else if (++top_nest >= end_nests)
+ {
+ errorcode = ERR84;
+ goto FAILED;
+ }
+ top_nest->nest_depth = nest_depth;
+ top_nest->flags = NSF_CONDASSERT;
+ top_nest->options = options & PARSE_TRACKED_OPTIONS;
}
+ break;
+
+
+ /* ---- Define a named group ---- */
+
+ /* A named group may be defined as (?'name') or (?<name>). In the latter
+ case we jump to DEFINE_NAME from the disambiguation of (?< above with the
+ terminator set to '>'. */
+
+ case CHAR_APOSTROPHE:
+ terminator = CHAR_APOSTROPHE; /* Terminator */
+
+ DEFINE_NAME:
+ if (!read_name(&ptr, ptrend, terminator, &offset, &name, &namelen,
+ &errorcode, cb)) goto FAILED;
+
+ /* We have a name for this capturing group. It is also assigned a number,
+ which is its primary means of identification. */
+
+ cb->bracount++;
+ *parsed_pattern++ = META_CAPTURE | cb->bracount;
+ nest_depth++;
+
+ /* Check not too many names */
if (cb->names_found >= MAX_NAME_COUNT)
{
@@ -3693,13 +4081,11 @@ for (; ptr < cb->end_pattern; ptr++)
goto FAILED;
}
+ /* Adjust the entry size to accommodate the longest name found. */
+
if (namelen + IMM2_SIZE + 1 > cb->name_entry_size)
cb->name_entry_size = (uint16_t)(namelen + IMM2_SIZE + 1);
- /* We have a valid name for this capturing group. */
-
- cb->bracount++;
-
/* Scan the list to check for duplicates. For duplicate names, if the
number is the same, break the loop, which causes the name to be
discarded; otherwise, if DUPNAMES is not set, give an error.
@@ -3712,7 +4098,7 @@ for (; ptr < cb->end_pattern; ptr++)
for (i = 0; i < cb->names_found; i++, ng++)
{
if (namelen == ng->length &&
- PRIV(strncmp)(name, ng->name, (size_t)namelen) == 0)
+ PRIV(strncmp)(name, ng->name, (PCRE2_SIZE)namelen) == 0)
{
if (ng->number == cb->bracount) break;
if ((options & PCRE2_DUPNAMES) == 0)
@@ -3766,7 +4152,10 @@ for (; ptr < cb->end_pattern; ptr++)
} /* End of (? switch */
break; /* End of ( handling */
- /* At an alternation, reset the capture count if we are in a (?| group. */
+
+ /* ---- Branch terminators ---- */
+
+ /* Alternation: reset the capture count if we are in a (?| group. */
case CHAR_VERTICAL_LINE:
if (top_nest != NULL && top_nest->nest_depth == nest_depth &&
@@ -3776,48 +4165,617 @@ for (; ptr < cb->end_pattern; ptr++)
top_nest->max_group = (uint16_t)cb->bracount;
cb->bracount = top_nest->reset_group;
}
+ *parsed_pattern++ = META_ALT;
break;
- /* At a right parenthesis, reset the capture count to the maximum if we
- are in a (?| group and/or reset the extended option. */
+ /* End of group; reset the capture count to the maximum if we are in a (?|
+ group and/or reset the options that are tracked during parsing. Disallow
+ quantifier for a condition that is an assertion. */
case CHAR_RIGHT_PARENTHESIS:
+ okquantifier = TRUE;
if (top_nest != NULL && top_nest->nest_depth == nest_depth)
{
+ options = (options & ~PARSE_TRACKED_OPTIONS) | top_nest->options;
if ((top_nest->flags & NSF_RESET) != 0 &&
top_nest->max_group > cb->bracount)
cb->bracount = top_nest->max_group;
- if ((top_nest->flags & NSF_EXTENDED) != 0) options |= PCRE2_EXTENDED;
- else options &= ~PCRE2_EXTENDED;
- if ((top_nest->flags & NSF_DUPNAMES) != 0) options |= PCRE2_DUPNAMES;
- else options &= ~PCRE2_DUPNAMES;
+ if ((top_nest->flags & NSF_CONDASSERT) != 0)
+ okquantifier = FALSE;
if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL;
else top_nest--;
}
if (nest_depth == 0) /* Unmatched closing parenthesis */
{
errorcode = ERR22;
- goto FAILED;
+ goto FAILED_BACK;
}
nest_depth--;
+ *parsed_pattern++ = META_KET;
break;
- }
+ } /* End of switch on pattern character */
+ } /* End of main character scan loop */
+
+/* End of pattern reached. Check for missing ) at the end of a verb name. */
+
+if (inverbname && ptr >= ptrend)
+ {
+ errorcode = ERR60;
+ goto FAILED;
+ }
+
+/* Manage callout for the final item */
+
+PARSED_END:
+parsed_pattern = manage_callouts(ptr, &previous_callout, auto_callout,
+ parsed_pattern, cb);
+
+/* Insert trailing items for word and line matching (features provided for the
+benefit of pcre2grep). */
+
+if ((cb->cx->extra_options & PCRE2_EXTRA_MATCH_LINE) != 0)
+ {
+ *parsed_pattern++ = META_KET;
+ *parsed_pattern++ = META_DOLLAR;
+ }
+else if ((cb->cx->extra_options & PCRE2_EXTRA_MATCH_WORD) != 0)
+ {
+ *parsed_pattern++ = META_KET;
+ *parsed_pattern++ = META_ESCAPE + ESC_b;
}
-if (nest_depth == 0)
+/* Terminate the parsed pattern, then return success if all groups are closed.
+Otherwise we have unclosed parentheses. */
+
+if (parsed_pattern >= parsed_pattern_end)
{
- cb->final_bracount = cb->bracount;
- return 0;
+ errorcode = ERR63; /* Internal error (parsed pattern overflow) */
+ goto FAILED;
}
-/* We give a special error for a missing closing parentheses after (?# because
-it might otherwise be hard to see where the missing character is. */
+*parsed_pattern = META_END;
+if (nest_depth == 0) return 0;
-errorcode = (skiptoket == CHAR_NUMBER_SIGN)? ERR18 : ERR14;
+UNCLOSED_PARENTHESIS:
+errorcode = ERR14;
+
+/* Come here for all failures. */
FAILED:
-*ptrptr = ptr;
+cb->erroroffset = (PCRE2_SIZE)(ptr - cb->start_pattern);
return errorcode;
+
+/* Some errors need to indicate the previous character. */
+
+FAILED_BACK:
+ptr--;
+goto FAILED;
+
+/* This failure happens several times. */
+
+BAD_VERSION_CONDITION:
+errorcode = ERR79;
+goto FAILED;
+}
+
+
+
+/*************************************************
+* Find first significant op code *
+*************************************************/
+
+/* This is called by several functions that scan a compiled expression looking
+for a fixed first character, or an anchoring op code etc. It skips over things
+that do not influence this. For some calls, it makes sense to skip negative
+forward and all backward assertions, and also the \b assertion; for others it
+does not.
+
+Arguments:
+ code pointer to the start of the group
+ skipassert TRUE if certain assertions are to be skipped
+
+Returns: pointer to the first significant opcode
+*/
+
+static const PCRE2_UCHAR*
+first_significant_code(PCRE2_SPTR code, BOOL skipassert)
+{
+for (;;)
+ {
+ switch ((int)*code)
+ {
+ case OP_ASSERT_NOT:
+ case OP_ASSERTBACK:
+ case OP_ASSERTBACK_NOT:
+ if (!skipassert) return code;
+ do code += GET(code, 1); while (*code == OP_ALT);
+ code += PRIV(OP_lengths)[*code];
+ break;
+
+ case OP_WORD_BOUNDARY:
+ case OP_NOT_WORD_BOUNDARY:
+ if (!skipassert) return code;
+ /* Fall through */
+
+ case OP_CALLOUT:
+ case OP_CREF:
+ case OP_DNCREF:
+ case OP_RREF:
+ case OP_DNRREF:
+ case OP_FALSE:
+ case OP_TRUE:
+ code += PRIV(OP_lengths)[*code];
+ break;
+
+ case OP_CALLOUT_STR:
+ code += GET(code, 1 + 2*LINK_SIZE);
+ break;
+
+ case OP_SKIPZERO:
+ code += 2 + GET(code, 2) + LINK_SIZE;
+ break;
+
+ case OP_COND:
+ case OP_SCOND:
+ if (code[1+LINK_SIZE] != OP_FALSE || /* Not DEFINE */
+ code[GET(code, 1)] != OP_KET) /* More than one branch */
+ return code;
+ code += GET(code, 1) + 1 + LINK_SIZE;
+ break;
+
+ default:
+ return code;
+ }
+ }
+/* Control never reaches here */
+}
+
+
+
+#ifdef SUPPORT_UNICODE
+/*************************************************
+* Get othercase range *
+*************************************************/
+
+/* This function is passed the start and end of a class range in UCP mode. It
+searches up the characters, looking for ranges of characters in the "other"
+case. Each call returns the next one, updating the start address. A character
+with multiple other cases is returned on its own with a special return value.
+
+Arguments:
+ cptr points to starting character value; updated
+ d end value
+ ocptr where to put start of othercase range
+ odptr where to put end of othercase range
+
+Yield: -1 when no more
+ 0 when a range is returned
+ >0 the CASESET offset for char with multiple other cases
+ in this case, ocptr contains the original
+*/
+
+static int
+get_othercase_range(uint32_t *cptr, uint32_t d, uint32_t *ocptr,
+ uint32_t *odptr)
+{
+uint32_t c, othercase, next;
+unsigned int co;
+
+/* Find the first character that has an other case. If it has multiple other
+cases, return its case offset value. */
+
+for (c = *cptr; c <= d; c++)
+ {
+ if ((co = UCD_CASESET(c)) != 0)
+ {
+ *ocptr = c++; /* Character that has the set */
+ *cptr = c; /* Rest of input range */
+ return (int)co;
+ }
+ if ((othercase = UCD_OTHERCASE(c)) != c) break;
+ }
+
+if (c > d) return -1; /* Reached end of range */
+
+/* Found a character that has a single other case. Search for the end of the
+range, which is either the end of the input range, or a character that has zero
+or more than one other cases. */
+
+*ocptr = othercase;
+next = othercase + 1;
+
+for (++c; c <= d; c++)
+ {
+ if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break;
+ next++;
+ }
+
+*odptr = next - 1; /* End of othercase range */
+*cptr = c; /* Rest of input range */
+return 0;
+}
+#endif /* SUPPORT_UNICODE */
+
+
+
+/*************************************************
+* Add a character or range to a class (internal) *
+*************************************************/
+
+/* This function packages up the logic of adding a character or range of
+characters to a class. The character values in the arguments will be within the
+valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
+called only from within the "add to class" group of functions, some of which
+are recursive and mutually recursive. The external entry point is
+add_to_class().
+
+Arguments:
+ classbits the bit map for characters < 256
+ uchardptr points to the pointer for extra data
+ options the options word
+ cb compile data
+ start start of range character
+ end end of range character
+
+Returns: the number of < 256 characters added
+ the pointer to extra data is updated
+*/
+
+static unsigned int
+add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
+ uint32_t options, compile_block *cb, uint32_t start, uint32_t end)
+{
+uint32_t c;
+uint32_t classbits_end = (end <= 0xff ? end : 0xff);
+unsigned int n8 = 0;
+
+/* If caseless matching is required, scan the range and process alternate
+cases. In Unicode, there are 8-bit characters that have alternate cases that
+are greater than 255 and vice-versa. Sometimes we can just extend the original
+range. */
+
+if ((options & PCRE2_CASELESS) != 0)
+ {
+#ifdef SUPPORT_UNICODE
+ if ((options & PCRE2_UTF) != 0)
+ {
+ int rc;
+ uint32_t oc, od;
+
+ options &= ~PCRE2_CASELESS; /* Remove for recursive calls */
+ c = start;
+
+ while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0)
+ {
+ /* Handle a single character that has more than one other case. */
+
+ if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr, options, cb,
+ PRIV(ucd_caseless_sets) + rc, oc);
+
+ /* Do nothing if the other case range is within the original range. */
+
+ else if (oc >= cb->class_range_start && od <= cb->class_range_end) continue;
+
+ /* Extend the original range if there is overlap, noting that if oc < c, we
+ can't have od > end because a subrange is always shorter than the basic
+ range. Otherwise, use a recursive call to add the additional range. */
+
+ else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */
+ else if (od > end && oc <= end + 1)
+ {
+ end = od; /* Extend upwards */
+ if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff);
+ }
+ else n8 += add_to_class_internal(classbits, uchardptr, options, cb, oc, od);
+ }
+ }
+ else
+#endif /* SUPPORT_UNICODE */
+
+ /* Not UTF mode */
+
+ for (c = start; c <= classbits_end; c++)
+ {
+ SETBIT(classbits, cb->fcc[c]);
+ n8++;
+ }
+ }
+
+/* Now handle the originally supplied range. Adjust the final value according
+to the bit length - this means that the same lists of (e.g.) horizontal spaces
+can be used in all cases. */
+
+if ((options & PCRE2_UTF) == 0 && end > MAX_NON_UTF_CHAR)
+ end = MAX_NON_UTF_CHAR;
+
+if (start > cb->class_range_start && end < cb->class_range_end) return n8;
+
+/* Use the bitmap for characters < 256. Otherwise use extra data.*/
+
+for (c = start; c <= classbits_end; c++)
+ {
+ /* Regardless of start, c will always be <= 255. */
+ SETBIT(classbits, c);
+ n8++;
+ }
+
+#ifdef SUPPORT_WIDE_CHARS
+if (start <= 0xff) start = 0xff + 1;
+
+if (end >= start)
+ {
+ PCRE2_UCHAR *uchardata = *uchardptr;
+
+#ifdef SUPPORT_UNICODE
+ if ((options & PCRE2_UTF) != 0)
+ {
+ if (start < end)
+ {
+ *uchardata++ = XCL_RANGE;
+ uchardata += PRIV(ord2utf)(start, uchardata);
+ uchardata += PRIV(ord2utf)(end, uchardata);
+ }
+ else if (start == end)
+ {
+ *uchardata++ = XCL_SINGLE;
+ uchardata += PRIV(ord2utf)(start, uchardata);
+ }
+ }
+ else
+#endif /* SUPPORT_UNICODE */
+
+ /* Without UTF support, character values are constrained by the bit length,
+ and can only be > 256 for 16-bit and 32-bit libraries. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ {}
+#else
+ if (start < end)
+ {
+ *uchardata++ = XCL_RANGE;
+ *uchardata++ = start;
+ *uchardata++ = end;
+ }
+ else if (start == end)
+ {
+ *uchardata++ = XCL_SINGLE;
+ *uchardata++ = start;
+ }
+#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
+ *uchardptr = uchardata; /* Updata extra data pointer */
+ }
+#else /* SUPPORT_WIDE_CHARS */
+ (void)uchardptr; /* Avoid compiler warning */
+#endif /* SUPPORT_WIDE_CHARS */
+
+return n8; /* Number of 8-bit characters */
+}
+
+
+
+#ifdef SUPPORT_UNICODE
+/*************************************************
+* Add a list of characters to a class (internal) *
+*************************************************/
+
+/* This function is used for adding a list of case-equivalent characters to a
+class when in UTF mode. This function is called only from within
+add_to_class_internal(), with which it is mutually recursive.
+
+Arguments:
+ classbits the bit map for characters < 256
+ uchardptr points to the pointer for extra data
+ options the options word
+ cb contains pointers to tables etc.
+ p points to row of 32-bit values, terminated by NOTACHAR
+ except character to omit; this is used when adding lists of
+ case-equivalent characters to avoid including the one we
+ already know about
+
+Returns: the number of < 256 characters added
+ the pointer to extra data is updated
+*/
+
+static unsigned int
+add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
+ uint32_t options, compile_block *cb, const uint32_t *p, unsigned int except)
+{
+unsigned int n8 = 0;
+while (p[0] < NOTACHAR)
+ {
+ unsigned int n = 0;
+ if (p[0] != except)
+ {
+ while(p[n+1] == p[0] + n + 1) n++;
+ n8 += add_to_class_internal(classbits, uchardptr, options, cb, p[0], p[n]);
+ }
+ p += n + 1;
+ }
+return n8;
+}
+#endif
+
+
+
+/*************************************************
+* External entry point for add range to class *
+*************************************************/
+
+/* This function sets the overall range so that the internal functions can try
+to avoid duplication when handling case-independence.
+
+Arguments:
+ classbits the bit map for characters < 256
+ uchardptr points to the pointer for extra data
+ options the options word
+ cb compile data
+ start start of range character
+ end end of range character
+
+Returns: the number of < 256 characters added
+ the pointer to extra data is updated
+*/
+
+static unsigned int
+add_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options,
+ compile_block *cb, uint32_t start, uint32_t end)
+{
+cb->class_range_start = start;
+cb->class_range_end = end;
+return add_to_class_internal(classbits, uchardptr, options, cb, start, end);
+}
+
+
+/*************************************************
+* External entry point for add list to class *
+*************************************************/
+
+/* This function is used for adding a list of horizontal or vertical whitespace
+characters to a class. The list must be in order so that ranges of characters
+can be detected and handled appropriately. This function sets the overall range
+so that the internal functions can try to avoid duplication when handling
+case-independence.
+
+Arguments:
+ classbits the bit map for characters < 256
+ uchardptr points to the pointer for extra data
+ options the options word
+ cb contains pointers to tables etc.
+ p points to row of 32-bit values, terminated by NOTACHAR
+ except character to omit; this is used when adding lists of
+ case-equivalent characters to avoid including the one we
+ already know about
+
+Returns: the number of < 256 characters added
+ the pointer to extra data is updated
+*/
+
+static unsigned int
+add_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options,
+ compile_block *cb, const uint32_t *p, unsigned int except)
+{
+unsigned int n8 = 0;
+while (p[0] < NOTACHAR)
+ {
+ unsigned int n = 0;
+ if (p[0] != except)
+ {
+ while(p[n+1] == p[0] + n + 1) n++;
+ cb->class_range_start = p[0];
+ cb->class_range_end = p[n];
+ n8 += add_to_class_internal(classbits, uchardptr, options, cb, p[0], p[n]);
+ }
+ p += n + 1;
+ }
+return n8;
+}
+
+
+
+/*************************************************
+* Add characters not in a list to a class *
+*************************************************/
+
+/* This function is used for adding the complement of a list of horizontal or
+vertical whitespace to a class. The list must be in order.
+
+Arguments:
+ classbits the bit map for characters < 256
+ uchardptr points to the pointer for extra data
+ options the options word
+ cb contains pointers to tables etc.
+ p points to row of 32-bit values, terminated by NOTACHAR
+
+Returns: the number of < 256 characters added
+ the pointer to extra data is updated
+*/
+
+static unsigned int
+add_not_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
+ uint32_t options, compile_block *cb, const uint32_t *p)
+{
+BOOL utf = (options & PCRE2_UTF) != 0;
+unsigned int n8 = 0;
+if (p[0] > 0)
+ n8 += add_to_class(classbits, uchardptr, options, cb, 0, p[0] - 1);
+while (p[0] < NOTACHAR)
+ {
+ while (p[1] == p[0] + 1) p++;
+ n8 += add_to_class(classbits, uchardptr, options, cb, p[0] + 1,
+ (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
+ p++;
+ }
+return n8;
+}
+
+
+
+/*************************************************
+* Find details of duplicate group names *
+*************************************************/
+
+/* This is called from compile_branch() when it needs to know the index and
+count of duplicates in the names table when processing named backreferences,
+either directly, or as conditions.
+
+Arguments:
+ name points to the name
+ length the length of the name
+ indexptr where to put the index
+ countptr where to put the count of duplicates
+ errorcodeptr where to put an error code
+ cb the compile block
+
+Returns: TRUE if OK, FALSE if not, error code set
+*/
+
+static BOOL
+find_dupname_details(PCRE2_SPTR name, uint32_t length, int *indexptr,
+ int *countptr, int *errorcodeptr, compile_block *cb)
+{
+uint32_t i, groupnumber;
+int count;
+PCRE2_UCHAR *slot = cb->name_table;
+
+/* Find the first entry in the table */
+
+for (i = 0; i < cb->names_found; i++)
+ {
+ if (PRIV(strncmp)(name, slot+IMM2_SIZE, length) == 0 &&
+ slot[IMM2_SIZE+length] == 0) break;
+ slot += cb->name_entry_size;
+ }
+
+/* This should not occur, because this function is called only when we know we
+have duplicate names. Give an internal error. */
+
+if (i >= cb->names_found)
+ {
+ *errorcodeptr = ERR53;
+ cb->erroroffset = name - cb->start_pattern;
+ return FALSE;
+ }
+
+/* Record the index and then see how many duplicates there are, updating the
+backref map and maximum back reference as we do. */
+
+*indexptr = i;
+count = 0;
+
+for (;;)
+ {
+ count++;
+ groupnumber = GET2(slot,0);
+ cb->backref_map |= (groupnumber < 32)? (1u << groupnumber) : 1;
+ if (groupnumber > cb->top_backref) cb->top_backref = groupnumber;
+ if (++i >= cb->names_found) break;
+ slot += cb->name_entry_size;
+ if (PRIV(strncmp)(name, slot+IMM2_SIZE, length) != 0 ||
+ (slot+IMM2_SIZE)[length] != 0) break;
+ }
+
+*countptr = count;
+return TRUE;
}
@@ -3826,63 +4784,64 @@ return errorcode;
* Compile one branch *
*************************************************/
-/* Scan the pattern, compiling it into the a vector. If the options are
-changed during the branch, the pointer is used to change the external options
-bits. This function is used during the pre-compile phase when we are trying
-to find out the amount of memory needed, as well as during the real compile
-phase. The value of lengthptr distinguishes the two phases.
+/* Scan the parsed pattern, compiling it into the a vector of PCRE2_UCHAR. If
+the options are changed during the branch, the pointer is used to change the
+external options bits. This function is used during the pre-compile phase when
+we are trying to find out the amount of memory needed, as well as during the
+real compile phase. The value of lengthptr distinguishes the two phases.
Arguments:
optionsptr pointer to the option bits
codeptr points to the pointer to the current code point
- ptrptr points to the current pattern pointer
+ pptrptr points to the current parsed pattern pointer
errorcodeptr points to error code variable
firstcuptr place to put the first required code unit
firstcuflagsptr place to put the first code unit flags, or a negative number
reqcuptr place to put the last required code unit
reqcuflagsptr place to put the last required code unit flags, or a negative number
bcptr points to current branch chain
- cond_depth conditional nesting depth
cb contains pointers to tables etc.
lengthptr NULL during the real compile phase
points to length accumulator during pre-compile phase
-Returns: TRUE on success
- FALSE, with *errorcodeptr set non-zero on error
+Returns: 0 There's been an error, *errorcodeptr is non-zero
+ +1 Success, this branch must match at least one character
+ -1 Success, this branch may match an empty string
*/
-static BOOL
-compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr,
- PCRE2_SPTR *ptrptr, int *errorcodeptr,
- uint32_t *firstcuptr, int32_t *firstcuflagsptr,
- uint32_t *reqcuptr, int32_t *reqcuflagsptr,
- branch_chain *bcptr, int cond_depth,
- compile_block *cb, size_t *lengthptr)
+static int
+compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
+ int *errorcodeptr, uint32_t *firstcuptr, int32_t *firstcuflagsptr,
+ uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr,
+ compile_block *cb, PCRE2_SIZE *lengthptr)
{
-int repeat_min = 0, repeat_max = 0; /* To please picky compilers */
int bravalue = 0;
+int okreturn = -1;
+int group_return = 0;
+uint32_t repeat_min = 0, repeat_max = 0; /* To please picky compilers */
uint32_t greedy_default, greedy_non_default;
uint32_t repeat_type, op_type;
uint32_t options = *optionsptr; /* May change dynamically */
uint32_t firstcu, reqcu;
-int32_t firstcuflags, reqcuflags;
uint32_t zeroreqcu, zerofirstcu;
+uint32_t escape;
+uint32_t *pptr = *pptrptr;
+uint32_t meta, meta_arg;
+int32_t firstcuflags, reqcuflags;
int32_t zeroreqcuflags, zerofirstcuflags;
int32_t req_caseopt, reqvary, tempreqvary;
-int after_manual_callout = 0;
-int escape;
-size_t length_prevgroup = 0;
-register uint32_t c;
-register PCRE2_UCHAR *code = *codeptr;
+PCRE2_SIZE offset = 0;
+PCRE2_SIZE length_prevgroup = 0;
+PCRE2_UCHAR *code = *codeptr;
PCRE2_UCHAR *last_code = code;
PCRE2_UCHAR *orig_code = code;
PCRE2_UCHAR *tempcode;
-BOOL inescq = FALSE;
-BOOL groupsetfirstcu = FALSE;
-PCRE2_SPTR ptr = *ptrptr;
-PCRE2_SPTR tempptr;
PCRE2_UCHAR *previous = NULL;
-PCRE2_UCHAR *previous_callout = NULL;
+PCRE2_UCHAR op_previous;
+BOOL groupsetfirstcu = FALSE;
+BOOL matched_char = FALSE;
+BOOL previous_matched_char = FALSE;
+const uint8_t *cbits = cb->cbits;
uint8_t classbits[32];
/* We can fish out the UTF setting once and for all into a BOOL, but we must
@@ -3891,10 +4850,6 @@ dynamically as we process the pattern. */
#ifdef SUPPORT_UNICODE
BOOL utf = (options & PCRE2_UTF) != 0;
-#if PCRE2_CODE_UNIT_WIDTH != 32
-PCRE2_UCHAR utf_units[6]; /* For setting up multi-cu chars */
-#endif
-
#else /* No UTF support */
BOOL utf = FALSE;
#endif
@@ -3935,57 +4890,36 @@ to record the case status of the value. This is used only for ASCII characters.
req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
-/* Switch on next character until the end of the branch */
+/* Switch on next META item until the end of the branch */
-for (;; ptr++)
+for (;; pptr++)
{
+#ifdef SUPPORT_WIDE_CHARS
+ BOOL xclass_has_prop;
+#endif
BOOL negate_class;
BOOL should_flip_negation;
BOOL match_all_or_no_wide_chars;
BOOL possessive_quantifier;
- BOOL is_quantifier;
- BOOL is_recurse;
- BOOL is_dupname;
- BOOL reset_bracount;
+ BOOL note_group_empty;
int class_has_8bitchar;
- int class_one_char;
-#ifdef SUPPORT_WIDE_CHARS
- BOOL xclass_has_prop;
-#endif
- int recno; /* Must be signed */
- int refsign; /* Must be signed */
- int terminator; /* Must be signed */
- unsigned int mclength;
- unsigned int tempbracount;
- uint32_t ec;
- uint32_t newoptions;
+ int i;
+ uint32_t mclength;
uint32_t skipunits;
uint32_t subreqcu, subfirstcu;
+ uint32_t groupnumber;
+ uint32_t verbarglen, verbculen;
int32_t subreqcuflags, subfirstcuflags; /* Must be signed */
+ open_capitem *oc;
PCRE2_UCHAR mcbuffer[8];
- /* Come here to restart the loop. */
-
- REDO_LOOP:
-
- /* Get next character in the pattern */
+ /* Get next META item in the pattern and its potential argument. */
- c = *ptr;
-
- /* If we are at the end of a nested substitution, revert to the outer level
- string. Nesting only happens one or two levels deep, and the inserted string
- is always zero terminated. */
-
- if (c == CHAR_NULL && cb->nestptr[0] != NULL)
- {
- ptr = cb->nestptr[0];
- cb->nestptr[0] = cb->nestptr[1];
- cb->nestptr[1] = NULL;
- c = *ptr;
- }
+ meta = META_CODE(*pptr);
+ meta_arg = META_DATA(*pptr);
/* If we are in the pre-compile phase, accumulate the length used for the
- previous cycle of this loop. */
+ previous cycle of this loop, unless the next item is a quantifier. */
if (lengthptr != NULL)
{
@@ -3994,202 +4928,81 @@ for (;; ptr++)
{
*errorcodeptr = (code >= cb->start_workspace + cb->workspace_size)?
ERR52 : ERR86;
- goto FAILED;
+ return 0;
}
/* There is at least one situation where code goes backwards: this is the
- case of a zero quantifier after a class (e.g. [ab]{0}). At compile time,
- the class is simply eliminated. However, it is created first, so we have to
- allow memory for it. Therefore, don't ever reduce the length at this point.
- */
+ case of a zero quantifier after a class (e.g. [ab]{0}). When the quantifier
+ is processed, the whole class is eliminated. However, it is created first,
+ so we have to allow memory for it. Therefore, don't ever reduce the length
+ at this point. */
if (code < last_code) code = last_code;
- /* Paranoid check for integer overflow */
-
- if (OFLOW_MAX - *lengthptr < (size_t)(code - last_code))
- {
- *errorcodeptr = ERR20;
- goto FAILED;
- }
- *lengthptr += (size_t)(code - last_code);
-
- /* If "previous" is set and it is not at the start of the work space, move
- it back to there, in order to avoid filling up the work space. Otherwise,
- if "previous" is NULL, reset the current code pointer to the start. */
+ /* If the next thing is not a quantifier, we add the length of the previous
+ item into the total, and reset the code pointer to the start of the
+ workspace. Otherwise leave the previous item available to be quantified. */
- if (previous != NULL)
- {
- if (previous > orig_code)
- {
- memmove(orig_code, previous, (size_t)CU2BYTES(code - previous));
- code -= previous - orig_code;
- previous = orig_code;
- }
- }
- else code = orig_code;
-
- /* Remember where this code item starts so we can pick up the length
- next time round. */
-
- last_code = code;
- }
-
- /* Before doing anything else we must handle all the special items that do
- nothing, and which may come between an item and its quantifier. Otherwise,
- when auto-callouts are enabled, a callout gets incorrectly inserted before
- the quantifier is recognized. After recognizing a "do nothing" item, restart
- the loop in case another one follows. */
-
- /* If c is not NULL we are not at the end of the pattern. If it is NULL, we
- may still be in the pattern with a NULL data item. In these cases, if we are
- in \Q...\E, check for the \E that ends the literal string; if not, we have a
- literal character. If not in \Q...\E, an isolated \E is ignored. */
-
- if (c != CHAR_NULL || ptr < cb->end_pattern)
- {
- if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
- {
- inescq = FALSE;
- ptr++;
- continue;
- }
- else if (inescq) /* Literal character */
+ if (meta < META_ASTERISK || meta > META_MINMAX_QUERY)
{
- if (previous_callout != NULL)
+ if (OFLOW_MAX - *lengthptr < (PCRE2_SIZE)(code - orig_code))
{
- if (lengthptr == NULL) /* Don't attempt in pre-compile phase */
- complete_callout(previous_callout, ptr, cb);
- previous_callout = NULL;
+ *errorcodeptr = ERR20; /* Integer overflow */
+ return 0;
}
- if ((options & PCRE2_AUTO_CALLOUT) != 0)
+ *lengthptr += (PCRE2_SIZE)(code - orig_code);
+ if (*lengthptr > MAX_PATTERN_SIZE)
{
- previous_callout = code;
- code = auto_callout(code, ptr, cb);
+ *errorcodeptr = ERR20; /* Pattern is too large */
+ return 0;
}
- goto NORMAL_CHAR;
+ code = orig_code;
}
- /* Check for the start of a \Q...\E sequence. We must do this here rather
- than later in case it is immediately followed by \E, which turns it into a
- "do nothing" sequence. */
+ /* Remember where this code item starts so we can catch the "backwards"
+ case above next time round. */
- if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
- {
- inescq = TRUE;
- ptr++;
- continue;
- }
- }
-
- /* In extended mode, skip white space and #-comments that end at newline. */
-
- if ((options & PCRE2_EXTENDED) != 0)
- {
- PCRE2_SPTR wscptr = ptr;
- while (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) c = *(++ptr);
- if (c == CHAR_NUMBER_SIGN)
- {
- ptr++;
- while (ptr < cb->end_pattern)
- {
- if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */
- { /* IS_NEWLINE sets cb->nllen. */
- ptr += cb->nllen;
- break;
- }
- ptr++;
-#ifdef SUPPORT_UNICODE
- if (utf) FORWARDCHAR(ptr);
-#endif
- }
- }
-
- /* If we skipped any characters, restart the loop. Otherwise, we didn't see
- a comment. */
-
- if (ptr > wscptr) goto REDO_LOOP;
- }
-
- /* Skip over (?# comments. */
-
- if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
- ptr[2] == CHAR_NUMBER_SIGN)
- {
- ptr += 3;
- while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
- if (*ptr != CHAR_RIGHT_PARENTHESIS)
- {
- *errorcodeptr = ERR18;
- goto FAILED;
- }
- continue;
+ last_code = code;
}
- /* End of processing "do nothing" items. See if the next thing is a
- quantifier. */
-
- is_quantifier =
- c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
- (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
+ /* Process the next parsed pattern item. If it is not a quantifier, remember
+ where it starts so that it can be quantified when a quantifier follows.
+ Checking for the legality of quantifiers happens in parse_regex(), except for
+ a quantifier after an assertion that is a condition. */
- /* Fill in length of a previous callout and create an auto callout if
- required, except when the next thing is a quantifier or when processing a
- property substitution string for \w etc in UCP mode. */
-
- if (!is_quantifier && cb->nestptr[0] == NULL)
+ if (meta < META_ASTERISK || meta > META_MINMAX_QUERY)
{
- if (previous_callout != NULL && after_manual_callout-- <= 0)
- {
- if (lengthptr == NULL) /* Don't attempt in pre-compile phase */
- complete_callout(previous_callout, ptr, cb);
- previous_callout = NULL;
- }
-
- if ((options & PCRE2_AUTO_CALLOUT) != 0)
- {
- previous_callout = code;
- code = auto_callout(code, ptr, cb);
- }
+ previous = code;
+ if (matched_char) okreturn = 1;
}
- /* Process the next pattern item. */
+ previous_matched_char = matched_char;
+ matched_char = FALSE;
+ note_group_empty = FALSE;
+ skipunits = 0; /* Default value for most subgroups */
- switch(c)
+ switch(meta)
{
/* ===================================================================*/
- /* The branch terminates at string end or | or ) */
-
- case CHAR_NULL:
- if (ptr < cb->end_pattern) goto NORMAL_CHAR; /* Zero data character */
- /* Fall through */
+ /* The branch terminates at pattern end or | or ) */
- case CHAR_VERTICAL_LINE:
- case CHAR_RIGHT_PARENTHESIS:
+ case META_END:
+ case META_ALT:
+ case META_KET:
*firstcuptr = firstcu;
*firstcuflagsptr = firstcuflags;
*reqcuptr = reqcu;
*reqcuflagsptr = reqcuflags;
*codeptr = code;
- *ptrptr = ptr;
- if (lengthptr != NULL)
- {
- if (OFLOW_MAX - *lengthptr < (size_t)(code - last_code))
- {
- *errorcodeptr = ERR20;
- goto FAILED;
- }
- *lengthptr += (size_t)(code - last_code); /* To include callout length */
- }
- return TRUE;
+ *pptrptr = pptr;
+ return okreturn;
/* ===================================================================*/
/* Handle single-character metacharacters. In multiline mode, ^ disables
the setting of any following char as a first character. */
- case CHAR_CIRCUMFLEX_ACCENT:
- previous = NULL;
+ case META_CIRCUMFLEX:
if ((options & PCRE2_MULTILINE) != 0)
{
if (firstcuflags == REQ_UNSET)
@@ -4199,117 +5012,112 @@ for (;; ptr++)
else *code++ = OP_CIRC;
break;
- case CHAR_DOLLAR_SIGN:
- previous = NULL;
+ case META_DOLLAR:
*code++ = ((options & PCRE2_MULTILINE) != 0)? OP_DOLLM : OP_DOLL;
break;
/* There can never be a first char if '.' is first, whatever happens about
repeats. The value of reqcu doesn't change either. */
- case CHAR_DOT:
+ case META_DOT:
+ matched_char = TRUE;
if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
zerofirstcu = firstcu;
zerofirstcuflags = firstcuflags;
zeroreqcu = reqcu;
zeroreqcuflags = reqcuflags;
- previous = code;
*code++ = ((options & PCRE2_DOTALL) != 0)? OP_ALLANY: OP_ANY;
break;
/* ===================================================================*/
- /* Character classes. If the included characters are all < 256, we build a
- 32-byte bitmap of the permitted characters, except in the special case
- where there is only one such character. For negated classes, we build the
- map as usual, then invert it at the end. However, we use a different opcode
- so that data characters > 255 can be handled correctly.
+ /* Empty character classes are allowed if PCRE2_ALLOW_EMPTY_CLASS is set.
+ Otherwise, an initial ']' is taken as a data character. When empty classes
+ are allowed, [] must always fail, so generate OP_FAIL, whereas [^] must
+ match any character, so generate OP_ALLANY. */
+
+ case META_CLASS_EMPTY:
+ case META_CLASS_EMPTY_NOT:
+ matched_char = TRUE;
+ *code++ = (meta == META_CLASS_EMPTY_NOT)? OP_ALLANY : OP_FAIL;
+ if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
+ zerofirstcu = firstcu;
+ zerofirstcuflags = firstcuflags;
+ break;
+
+
+ /* ===================================================================*/
+ /* Non-empty character class. If the included characters are all < 256, we
+ build a 32-byte bitmap of the permitted characters, except in the special
+ case where there is only one such character. For negated classes, we build
+ the map as usual, then invert it at the end. However, we use a different
+ opcode so that data characters > 255 can be handled correctly.
If the class contains characters outside the 0-255 range, a different
opcode is compiled. It may optionally have a bit map for characters < 256,
- but those above are are explicitly listed afterwards. A flag byte tells
- whether the bitmap is present, and whether this is a negated class or not.
+ but those above are are explicitly listed afterwards. A flag code unit
+ tells whether the bitmap is present, and whether this is a negated class or
+ not. */
- An isolated ']' character is not treated specially, so is just another data
- character. In earlier versions of PCRE that used the original API there was
- a "JavaScript compatibility mode" in which it gave an error. However,
- JavaScript itself has changed in this respect so there is no longer any
- need for this special handling.
+ case META_CLASS_NOT:
+ case META_CLASS:
+ matched_char = TRUE;
+ negate_class = meta == META_CLASS_NOT;
- In another (POSIX) regex library, the ugly syntax [[:<:]] and [[:>:]] is
- used for "start of word" and "end of word". As these are otherwise illegal
- sequences, we don't break anything by recognizing them. They are replaced
- by \b(?=\w) and \b(?<=\w) respectively. This can only happen at the top
- nesting level, as no other inserted sequences will contains these oddities.
- Sequences like [a[:<:]] are erroneous and are handled by the normal code
- below. */
+ /* We can optimize the case of a single character in a class by generating
+ OP_CHAR or OP_CHARI if it's positive, or OP_NOT or OP_NOTI if it's
+ negative. In the negative case there can be no first char if this item is
+ first, whatever repeat count may follow. In the case of reqcu, save the
+ previous value for reinstating. */
- case CHAR_LEFT_SQUARE_BRACKET:
- if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0)
- {
- cb->nestptr[0] = ptr + 7;
- ptr = sub_start_of_word;
- goto REDO_LOOP;
- }
+ /* NOTE: at present this optimization is not effective if the only
+ character in a class in 32-bit, non-UCP mode has its top bit set. */
- if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0)
+ if (pptr[1] < META_END && pptr[2] == META_CLASS_END)
{
- cb->nestptr[0] = ptr + 7;
- ptr = sub_end_of_word;
- goto REDO_LOOP;
- }
+#ifdef SUPPORT_UNICODE
+ uint32_t d;
+#endif
+ uint32_t c = pptr[1];
- /* Handle a real character class. */
+ pptr += 2; /* Move on to class end */
+ if (meta == META_CLASS) /* A positive one-char class can be */
+ { /* handled as a normal literal character. */
+ meta = c; /* Set up the character */
+ goto NORMAL_CHAR_SET;
+ }
- previous = code;
+ /* Handle a negative one-character class */
- /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
- they are encountered at the top level, so we'll do that too. */
-
- if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
- ptr[1] == CHAR_EQUALS_SIGN) &&
- check_posix_syntax(ptr, &tempptr))
- {
- *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR12 : ERR13;
- goto FAILED;
- }
+ zeroreqcu = reqcu;
+ zeroreqcuflags = reqcuflags;
+ if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
+ zerofirstcu = firstcu;
+ zerofirstcuflags = firstcuflags;
- /* If the first character is '^', set the negation flag and skip it. Also,
- if the first few characters (either before or after ^) are \Q\E or \E we
- skip them too. This makes for compatibility with Perl. */
+ /* For caseless UTF mode, check whether this character has more than
+ one other case. If so, generate a special OP_NOTPROP item instead of
+ OP_NOTI. */
- negate_class = FALSE;
- for (;;)
- {
- c = *(++ptr);
- if (c == CHAR_BACKSLASH)
+#ifdef SUPPORT_UNICODE
+ if (utf && (options & PCRE2_CASELESS) != 0 &&
+ (d = UCD_CASESET(c)) != 0)
{
- if (ptr[1] == CHAR_E)
- ptr++;
- else if (PRIV(strncmp_c8)(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0)
- ptr += 3;
- else
- break;
+ *code++ = OP_NOTPROP;
+ *code++ = PT_CLIST;
+ *code++ = d;
+ break; /* We are finished with this class */
}
- else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
- negate_class = TRUE;
- else break;
- }
+#endif
+ /* Char has only one other case, or UCP not available */
- /* Empty classes are allowed if PCRE2_ALLOW_EMPTY_CLASS is set. Otherwise,
- an initial ']' is taken as a data character -- the code below handles
- that. When empty classes are allowed, [] must always fail, so generate
- OP_FAIL, whereas [^] must match any character, so generate OP_ALLANY. */
+ *code++ = ((options & PCRE2_CASELESS) != 0)? OP_NOTI: OP_NOT;
+ code += PUTCHAR(c, code);
+ break; /* We are finished with this class */
+ } /* End of 1-char optimization */
- if (c == CHAR_RIGHT_SQUARE_BRACKET &&
- (cb->external_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)
- {
- *code++ = negate_class? OP_ALLANY : OP_FAIL;
- if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
- zerofirstcu = firstcu;
- zerofirstcuflags = firstcuflags;
- break;
- }
+ /* Handle character classes that contain more than just one literal
+ character. */
/* If a non-extended class contains a negative special such as \S, we need
to flip the negation flag at the end, so that support for characters > 255
@@ -4329,13 +5137,11 @@ for (;; ptr++)
#endif
/* For optimization purposes, we track some properties of the class:
- class_has_8bitchar will be non-zero if the class contains at least one 256
- character with a code point less than 256; class_one_char will be 1 if the
- class contains just one character; xclass_has_prop will be TRUE if Unicode
- property checks are present in the class. */
+ class_has_8bitchar will be non-zero if the class contains at least one
+ character with a code point less than 256; xclass_has_prop will be TRUE if
+ Unicode property checks are present in the class. */
class_has_8bitchar = 0;
- class_one_char = 0;
#ifdef SUPPORT_WIDE_CHARS
xclass_has_prop = FALSE;
#endif
@@ -4347,156 +5153,77 @@ for (;; ptr++)
memset(classbits, 0, 32 * sizeof(uint8_t));
- /* Process characters until ] is reached. As the test is at the end of the
- loop, an initial ] is taken as a data character. At the start of the loop,
- c contains the first code unit of the character. If it is zero, check for
- the end of the pattern, to allow binary zero as data. */
+ /* Process items until META_CLASS_END is reached. */
- for(;;)
+ while ((meta = *(++pptr)) != META_CLASS_END)
{
- PCRE2_SPTR oldptr;
-#ifdef EBCDIC
- BOOL range_is_literal = TRUE;
-#endif
-
- if (c == CHAR_NULL && ptr >= cb->end_pattern)
- {
- *errorcodeptr = ERR6; /* Missing terminating ']' */
- goto FAILED;
- }
-
-#ifdef SUPPORT_UNICODE
- if (utf && HAS_EXTRALEN(c))
- { /* Braces are required because the */
- GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */
- }
-#endif
+ /* Handle POSIX classes such as [:alpha:] etc. */
- /* Inside \Q...\E everything is literal except \E */
-
- if (inescq)
+ if (meta == META_POSIX || meta == META_POSIX_NEG)
{
- if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) /* If we are at \E */
- {
- inescq = FALSE; /* Reset literal state */
- ptr++; /* Skip the 'E' */
- goto CONTINUE_CLASS; /* Carry on with next char */
- }
- goto CHECK_RANGE; /* Could be range if \E follows */
- }
-
- /* Handle POSIX class names. Perl allows a negation extension of the
- form [:^name:]. A square bracket that doesn't match the syntax is
- treated as a literal. We also recognize the POSIX constructions
- [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
- 5.6 and 5.8 do. */
-
- if (c == CHAR_LEFT_SQUARE_BRACKET &&
- (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
- ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
- {
- BOOL local_negate = FALSE;
- int posix_class, taboffset, tabopt;
- register const uint8_t *cbits = cb->cbits;
+ BOOL local_negate = (meta == META_POSIX_NEG);
+ int posix_class = *(++pptr);
+ int taboffset, tabopt;
uint8_t pbits[32];
- if (ptr[1] != CHAR_COLON)
- {
- *errorcodeptr = ERR13;
- goto FAILED;
- }
+ should_flip_negation = local_negate; /* Note negative special */
- ptr += 2;
- if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
- {
- local_negate = TRUE;
- should_flip_negation = TRUE; /* Note negative special */
- ptr++;
- }
-
- posix_class = check_posix_name(ptr, (int)(tempptr - ptr));
- if (posix_class < 0)
- {
- *errorcodeptr = ERR30;
- goto FAILED;
- }
-
- /* If matching is caseless, upper and lower are converted to
- alpha. This relies on the fact that the class table starts with
- alpha, lower, upper as the first 3 entries. */
+ /* If matching is caseless, upper and lower are converted to alpha.
+ This relies on the fact that the class table starts with alpha,
+ lower, upper as the first 3 entries. */
if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2)
posix_class = 0;
/* When PCRE2_UCP is set, some of the POSIX classes are converted to
- different escape sequences that use Unicode properties \p or \P. Others
- that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP
- directly. UCP support is not available unless UTF support is.*/
+ different escape sequences that use Unicode properties \p or \P.
+ Others that are not available via \p or \P have to generate
+ XCL_PROP/XCL_NOTPROP directly, which is done here. */
#ifdef SUPPORT_UNICODE
- if ((options & PCRE2_UCP) != 0)
+ if ((options & PCRE2_UCP) != 0) switch(posix_class)
{
- unsigned int ptype = 0;
- int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);
-
- /* The posix_substitutes table specifies which POSIX classes can be
- converted to \p or \P items. This can only happen at top nestling
- level, as there will never be a POSIX class in a string that is
- substituted for something else. */
-
- if (posix_substitutes[pc] != NULL)
- {
- cb->nestptr[0] = tempptr + 1;
- ptr = posix_substitutes[pc] - 1;
- goto CONTINUE_CLASS;
- }
-
- /* There are three other classes that generate special property calls
- that are recognized only in an XCLASS. */
+ case PC_GRAPH:
+ case PC_PRINT:
+ case PC_PUNCT:
+ *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
+ *class_uchardata++ = (PCRE2_UCHAR)
+ ((posix_class == PC_GRAPH)? PT_PXGRAPH :
+ (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT);
+ *class_uchardata++ = 0;
+ xclass_has_prop = TRUE;
+ goto CONTINUE_CLASS;
- else switch(posix_class)
- {
- case PC_GRAPH:
- ptype = PT_PXGRAPH;
- /* Fall through */
- case PC_PRINT:
- if (ptype == 0) ptype = PT_PXPRINT;
- /* Fall through */
- case PC_PUNCT:
- if (ptype == 0) ptype = PT_PXPUNCT;
- *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
- *class_uchardata++ = (PCRE2_UCHAR)ptype;
- *class_uchardata++ = 0;
- xclass_has_prop = TRUE;
- ptr = tempptr + 1;
- goto CONTINUE_CLASS;
-
- /* For the other POSIX classes (ascii, xdigit) we are going to fall
- through to the non-UCP case and build a bit map for characters with
- code points less than 256. However, if we are in a negated POSIX
- class, characters with code points greater than 255 must either all
- match or all not match, depending on whether the whole class is not
- or is negated. For example, for [[:^ascii:]... they must all match,
- whereas for [^[:^xdigit:]... they must not.
-
- In the special case where there are no xclass items, this is
- automatically handled by the use of OP_CLASS or OP_NCLASS, but an
- explicit range is needed for OP_XCLASS. Setting a flag here causes
- the range to be generated later when it is known that OP_XCLASS is
- required. */
+ /* For the other POSIX classes (ascii, xdigit) we are going to
+ fall through to the non-UCP case and build a bit map for
+ characters with code points less than 256. However, if we are in
+ a negated POSIX class, characters with code points greater than
+ 255 must either all match or all not match, depending on whether
+ the whole class is not or is negated. For example, for
+ [[:^ascii:]... they must all match, whereas for [^[:^xdigit:]...
+ they must not.
+
+ In the special case where there are no xclass items, this is
+ automatically handled by the use of OP_CLASS or OP_NCLASS, but an
+ explicit range is needed for OP_XCLASS. Setting a flag here
+ causes the range to be generated later when it is known that
+ OP_XCLASS is required. In the 8-bit library this is relevant only in
+ utf mode, since no wide characters can exist otherwise. */
- default:
- match_all_or_no_wide_chars |= local_negate;
- break;
- }
+ default:
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ if (utf)
+#endif
+ match_all_or_no_wide_chars |= local_negate;
+ break;
}
#endif /* SUPPORT_UNICODE */
/* In the non-UCP case, or when UCP makes no difference, we build the
- bit map for the POSIX class in a chunk of local store because we may be
- adding and subtracting from it, and we don't want to subtract bits that
- may be in the main map already. At the end we or the result into the
- bit map that is being built. */
+ bit map for the POSIX class in a chunk of local store because we may
+ be adding and subtracting from it, and we don't want to subtract bits
+ that may be in the main map already. At the end we or the result into
+ the bit map that is being built. */
posix_class *= 3;
@@ -4513,9 +5240,9 @@ for (;; ptr++)
if (taboffset >= 0)
{
if (tabopt >= 0)
- for (c = 0; c < 32; c++) pbits[c] |= cbits[(int)c + taboffset];
+ for (i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset];
else
- for (c = 0; c < 32; c++) pbits[c] &= ~cbits[(int)c + taboffset];
+ for (i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset];
}
/* Now see if we need to remove any special characters. An option
@@ -4529,432 +5256,223 @@ for (;; ptr++)
being built and we are done. */
if (local_negate)
- for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c];
+ for (i = 0; i < 32; i++) classbits[i] |= ~pbits[i];
else
- for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
+ for (i = 0; i < 32; i++) classbits[i] |= pbits[i];
- ptr = tempptr + 1;
/* Every class contains at least one < 256 character. */
+
class_has_8bitchar = 1;
- /* Every class contains at least two characters. */
- class_one_char = 2;
- goto CONTINUE_CLASS; /* End of POSIX syntax handling */
+ goto CONTINUE_CLASS; /* End of POSIX handling */
}
- /* Backslash may introduce a single character, or it may introduce one
- of the specials, which just set a flag. The sequence \b is a special
- case. Inside a class (and only there) it is treated as backspace. We
- assume that other escapes have more than one character in them, so
- speculatively set both class_has_8bitchar and class_one_char bigger
- than one. Unrecognized escapes fall through and are faulted. */
+ /* Other than POSIX classes, the only items we should encounter are
+ \d-type escapes and literal characters (possibly as ranges). */
- if (c == CHAR_BACKSLASH)
+ if (meta == META_BIGVALUE)
{
- escape = PRIV(check_escape)(&ptr, cb->end_pattern, &ec, errorcodeptr,
- options, TRUE, cb);
- if (*errorcodeptr != 0) goto FAILED;
- if (escape == 0) /* Escaped single char */
+ meta = *(++pptr);
+ goto CLASS_LITERAL;
+ }
+
+ /* Any other non-literal must be an escape */
+
+ if (meta >= META_END)
+ {
+ if (META_CODE(meta) != META_ESCAPE)
{
- c = ec;
-#ifdef EBCDIC
- range_is_literal = FALSE;
+#ifdef DEBUG_SHOW_PARSED
+ fprintf(stderr, "** Unrecognized parsed pattern item 0x%.8x "
+ "in character class\n", meta);
#endif
+ *errorcodeptr = ERR89; /* Internal error - unrecognized. */
+ return 0;
}
- else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */
- else if (escape == ESC_N) /* \N is not supported in a class */
- {
- *errorcodeptr = ERR71;
- goto FAILED;
- }
- else if (escape == ESC_Q) /* Handle start of quoted string */
- {
- if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
- {
- ptr += 2; /* avoid empty string */
- }
- else inescq = TRUE;
- goto CONTINUE_CLASS;
- }
- else if (escape == ESC_E) goto CONTINUE_CLASS; /* Ignore orphan \E */
+ escape = META_DATA(meta);
+
+ /* Every class contains at least one < 256 character. */
- else /* Handle \d-type escapes */
+ class_has_8bitchar++;
+
+ switch(escape)
{
- register const uint8_t *cbits = cb->cbits;
- /* Every class contains at least two < 256 characters. */
- class_has_8bitchar++;
- /* Every class contains at least two characters. */
- class_one_char += 2;
+ case ESC_d:
+ for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
+ break;
- switch (escape)
- {
-#ifdef SUPPORT_UNICODE
- case ESC_du: /* These are the values given for \d etc */
- case ESC_DU: /* when PCRE2_UCP is set. We replace the */
- case ESC_wu: /* escape sequence with an appropriate \p */
- case ESC_WU: /* or \P to test Unicode properties instead */
- case ESC_su: /* of the default ASCII testing. This might be */
- case ESC_SU: /* a 2nd-level nesting for [[:<:]] or [[:>:]]. */
- cb->nestptr[1] = cb->nestptr[0];
- cb->nestptr[0] = ptr;
- ptr = substitutes[escape - ESC_DU] - 1; /* Just before substitute */
- class_has_8bitchar--; /* Undo! */
- break;
-#endif
- case ESC_d:
- for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit];
- break;
+ case ESC_D:
+ should_flip_negation = TRUE;
+ for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_digit];
+ break;
- case ESC_D:
- should_flip_negation = TRUE;
- for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
- break;
+ case ESC_w:
+ for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
+ break;
- case ESC_w:
- for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word];
- break;
+ case ESC_W:
+ should_flip_negation = TRUE;
+ for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_word];
+ break;
- case ESC_W:
- should_flip_negation = TRUE;
- for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
- break;
+ /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
+ 5.18. Before PCRE 8.34, we had to preserve the VT bit if it was
+ previously set by something earlier in the character class.
+ Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so
+ we could just adjust the appropriate bit. From PCRE 8.34 we no
+ longer treat \s and \S specially. */
- /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
- 5.18. Before PCRE 8.34, we had to preserve the VT bit if it was
- previously set by something earlier in the character class.
- Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so
- we could just adjust the appropriate bit. From PCRE 8.34 we no
- longer treat \s and \S specially. */
+ case ESC_s:
+ for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
+ break;
- case ESC_s:
- for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space];
- break;
+ case ESC_S:
+ should_flip_negation = TRUE;
+ for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_space];
+ break;
- case ESC_S:
- should_flip_negation = TRUE;
- for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
- break;
+ /* When adding the horizontal or vertical space lists to a class, or
+ their complements, disable PCRE2_CASELESS, because it justs wastes
+ time, and in the "not-x" UTF cases can create unwanted duplicates in
+ the XCLASS list (provoked by characters that have more than one other
+ case and by both cases being in the same "not-x" sublist). */
- /* The rest apply in both UCP and non-UCP cases. */
+ case ESC_h:
+ (void)add_list_to_class(classbits, &class_uchardata,
+ options & ~PCRE2_CASELESS, cb, PRIV(hspace_list), NOTACHAR);
+ break;
- case ESC_h:
- (void)add_list_to_class(classbits, &class_uchardata, options, cb,
- PRIV(hspace_list), NOTACHAR);
- break;
+ case ESC_H:
+ (void)add_not_list_to_class(classbits, &class_uchardata,
+ options & ~PCRE2_CASELESS, cb, PRIV(hspace_list));
+ break;
- case ESC_H:
- (void)add_not_list_to_class(classbits, &class_uchardata, options,
- cb, PRIV(hspace_list));
- break;
+ case ESC_v:
+ (void)add_list_to_class(classbits, &class_uchardata,
+ options & ~PCRE2_CASELESS, cb, PRIV(vspace_list), NOTACHAR);
+ break;
- case ESC_v:
- (void)add_list_to_class(classbits, &class_uchardata, options, cb,
- PRIV(vspace_list), NOTACHAR);
- break;
+ case ESC_V:
+ (void)add_not_list_to_class(classbits, &class_uchardata,
+ options & ~PCRE2_CASELESS, cb, PRIV(vspace_list));
+ break;
- case ESC_V:
- (void)add_not_list_to_class(classbits, &class_uchardata, options,
- cb, PRIV(vspace_list));
- break;
+ /* If Unicode is not supported, \P and \p are not allowed and are
+ faulted at parse time, so will never appear here. */
- case ESC_p:
- case ESC_P:
#ifdef SUPPORT_UNICODE
- {
- BOOL negated;
- unsigned int ptype = 0, pdata = 0;
- if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr, cb))
- goto FAILED;
- *class_uchardata++ = ((escape == ESC_p) != negated)?
- XCL_PROP : XCL_NOTPROP;
- *class_uchardata++ = ptype;
- *class_uchardata++ = pdata;
- xclass_has_prop = TRUE;
- class_has_8bitchar--; /* Undo! */
- }
- break;
-#else
- *errorcodeptr = ERR45;
- goto FAILED;
-#endif
- /* Unrecognized escapes are faulted. */
-
- default:
- *errorcodeptr = ERR7;
- goto FAILED;
+ case ESC_p:
+ case ESC_P:
+ {
+ uint32_t ptype = *(++pptr) >> 16;
+ uint32_t pdata = *pptr & 0xffff;
+ *class_uchardata++ = (escape == ESC_p)? XCL_PROP : XCL_NOTPROP;
+ *class_uchardata++ = ptype;
+ *class_uchardata++ = pdata;
+ xclass_has_prop = TRUE;
+ class_has_8bitchar--; /* Undo! */
}
-
- /* Handled \d-type escape */
-
- goto CONTINUE_CLASS;
+ break;
+#endif
}
- /* Control gets here if the escape just defined a single character.
- This is in c and may be greater than 256. */
-
- escape = 0;
- } /* End of backslash handling */
-
- /* A character may be followed by '-' to form a range. However, Perl does
- not permit ']' to be the end of the range. A '-' character at the end is
- treated as a literal. Perl ignores orphaned \E sequences entirely. The
- code for handling \Q and \E is messy. */
-
- CHECK_RANGE:
- while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
- {
- inescq = FALSE;
- ptr += 2;
- }
- oldptr = ptr;
-
- /* Remember if \r or \n were explicitly used */
-
- if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;
+ goto CONTINUE_CLASS;
+ } /* End handling \d-type escapes */
- /* Check for range */
+ /* A literal character may be followed by a range meta. At parse time
+ there are checks for out-of-order characters, for ranges where the two
+ characters are equal, and for hyphens that cannot indicate a range. At
+ this point, therefore, no checking is needed. */
- if (!inescq && ptr[1] == CHAR_MINUS)
+ else
{
- uint32_t d;
- ptr += 2;
- while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;
-
- /* If we hit \Q (not followed by \E) at this point, go into escaped
- mode. */
-
- while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
- {
- ptr += 2;
- if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
- { ptr += 2; continue; }
- inescq = TRUE;
- break;
- }
+ uint32_t c, d;
- /* Minus (hyphen) at the end of a class is treated as a literal, so put
- back the pointer and jump to handle the character that preceded it. */
-
- if (*ptr == CHAR_NULL || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
- {
- ptr = oldptr;
- goto CLASS_SINGLE_CHARACTER;
- }
+ CLASS_LITERAL:
+ c = d = meta;
- /* Otherwise, we have a potential range; pick up the next character */
+ /* Remember if \r or \n were explicitly used */
-#ifdef SUPPORT_UNICODE
- if (utf)
- { /* Braces are required because the */
- GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */
- }
- else
-#endif
- d = *ptr; /* Not UTF mode */
+ if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;
- /* The second part of a range can be a single-character escape
- sequence, but not any of the other escapes. Perl treats a hyphen as a
- literal in such circumstances. However, in Perl's warning mode, a
- warning is given, so PCRE now faults it as it is almost certainly a
- mistake on the user's part. */
+ /* Process a character range */
- if (!inescq)
+ if (pptr[1] == META_RANGE_LITERAL || pptr[1] == META_RANGE_ESCAPED)
{
- if (d == CHAR_BACKSLASH)
- {
- int descape;
- descape = PRIV(check_escape)(&ptr, cb->end_pattern, &d,
- errorcodeptr, options, TRUE, cb);
- if (*errorcodeptr != 0) goto FAILED;
#ifdef EBCDIC
- range_is_literal = FALSE;
+ BOOL range_is_literal = (pptr[1] == META_RANGE_LITERAL);
#endif
- /* 0 means a character was put into d; \b is backspace; any other
- special causes an error. */
-
- if (descape != 0)
- {
- if (descape == ESC_b) d = CHAR_BS; else
- {
- *errorcodeptr = ERR50;
- goto FAILED;
- }
- }
- }
+ pptr += 2;
+ d = *pptr;
+ if (d == META_BIGVALUE) d = *(++pptr);
- /* A hyphen followed by a POSIX class is treated in the same way. */
+ /* Remember an explicit \r or \n, and add the range to the class. */
- else if (d == CHAR_LEFT_SQUARE_BRACKET &&
- (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
- ptr[1] == CHAR_EQUALS_SIGN) &&
- check_posix_syntax(ptr, &tempptr))
- {
- *errorcodeptr = ERR50;
- goto FAILED;
- }
- }
+ if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;
- /* Check that the two values are in the correct order. Optimize
- one-character ranges. */
-
- if (d < c)
- {
- *errorcodeptr = ERR8;
- goto FAILED;
- }
- if (d == c) goto CLASS_SINGLE_CHARACTER; /* A few lines below */
-
- /* We have found a character range, so single character optimizations
- cannot be done anymore. Any value greater than 1 indicates that there
- is more than one character. */
-
- class_one_char = 2;
-
- /* Remember an explicit \r or \n, and add the range to the class. */
-
- if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;
-
- /* In an EBCDIC environment, Perl treats alphabetic ranges specially
- because there are holes in the encoding, and simply using the range A-Z
- (for example) would include the characters in the holes. This applies
- only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */
+ /* In an EBCDIC environment, Perl treats alphabetic ranges specially
+ because there are holes in the encoding, and simply using the range
+ A-Z (for example) would include the characters in the holes. This
+ applies only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */
#ifdef EBCDIC
- if (range_is_literal &&
- (cb->ctypes[c] & ctype_letter) != 0 &&
- (cb->ctypes[d] & ctype_letter) != 0 &&
- (c <= CHAR_z) == (d <= CHAR_z))
- {
- uint32_t uc = (c <= CHAR_z)? 0 : 64;
- uint32_t C = c - uc;
- uint32_t D = d - uc;
-
- if (C <= CHAR_i)
- {
- class_has_8bitchar +=
- add_to_class(classbits, &class_uchardata, options, cb, C + uc,
- ((D < CHAR_i)? D : CHAR_i) + uc);
- C = CHAR_j;
- }
-
- if (C <= D && C <= CHAR_r)
+ if (range_is_literal &&
+ (cb->ctypes[c] & ctype_letter) != 0 &&
+ (cb->ctypes[d] & ctype_letter) != 0 &&
+ (d <= CHAR_z) == (d <= CHAR_z))
{
- class_has_8bitchar +=
- add_to_class(classbits, &class_uchardata, options, cb, C + uc,
- ((D < CHAR_r)? D : CHAR_r) + uc);
- C = CHAR_s;
- }
+ uint32_t uc = (d <= CHAR_z)? 0 : 64;
+ uint32_t C = d - uc;
+ uint32_t D = d - uc;
- if (C <= D)
- {
- class_has_8bitchar +=
- add_to_class(classbits, &class_uchardata, options, cb, C + uc,
- D + uc);
- }
- }
- else
-#endif
- class_has_8bitchar +=
- add_to_class(classbits, &class_uchardata, options, cb, c, d);
- goto CONTINUE_CLASS; /* Go get the next char in the class */
- }
-
- /* Handle a single character - we can get here for a normal non-escape
- char, or after \ that introduces a single character or for an apparent
- range that isn't. Only the value 1 matters for class_one_char, so don't
- increase it if it is already 2 or more ... just in case there's a class
- with a zillion characters in it. */
-
- CLASS_SINGLE_CHARACTER:
- if (class_one_char < 2) class_one_char++;
-
- /* If class_one_char is 1 and xclass_has_prop is false, we have the first
- single character in the class, and there have been no prior ranges, or
- XCLASS items generated by escapes. If this is the final character in the
- class, we can optimize by turning the item into a 1-character OP_CHAR[I]
- if it's positive, or OP_NOT[I] if it's negative. In the positive case, it
- can cause firstcu to be set. Otherwise, there can be no first char if
- this item is first, whatever repeat count may follow. In the case of
- reqcu, save the previous value for reinstating. */
-
- if (!inescq &&
-#ifdef SUPPORT_UNICODE
- !xclass_has_prop &&
-#endif
- class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
- {
- ptr++;
- zeroreqcu = reqcu;
- zeroreqcuflags = reqcuflags;
-
- if (negate_class)
- {
-#ifdef SUPPORT_UNICODE
- int d;
-#endif
- if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
- zerofirstcu = firstcu;
- zerofirstcuflags = firstcuflags;
+ if (C <= CHAR_i)
+ {
+ class_has_8bitchar +=
+ add_to_class(classbits, &class_uchardata, options, cb, C + uc,
+ ((D < CHAR_i)? D : CHAR_i) + uc);
+ C = CHAR_j;
+ }
- /* For caseless UTF mode, check whether this character has more than
- one other case. If so, generate a special OP_NOTPROP item instead of
- OP_NOTI. */
+ if (C <= D && C <= CHAR_r)
+ {
+ class_has_8bitchar +=
+ add_to_class(classbits, &class_uchardata, options, cb, C + uc,
+ ((D < CHAR_r)? D : CHAR_r) + uc);
+ C = CHAR_s;
+ }
-#ifdef SUPPORT_UNICODE
- if (utf && (options & PCRE2_CASELESS) != 0 &&
- (d = UCD_CASESET(c)) != 0)
- {
- *code++ = OP_NOTPROP;
- *code++ = PT_CLIST;
- *code++ = d;
+ if (C <= D)
+ {
+ class_has_8bitchar +=
+ add_to_class(classbits, &class_uchardata, options, cb, C + uc,
+ D + uc);
+ }
}
else
#endif
- /* Char has only one other case, or UCP not available */
+ /* Not an EBCDIC special range */
- {
- *code++ = ((options & PCRE2_CASELESS) != 0)? OP_NOTI: OP_NOT;
- code += PUTCHAR(c, code);
- }
-
- /* We are finished with this character class */
-
- goto END_CLASS;
- }
+ class_has_8bitchar +=
+ add_to_class(classbits, &class_uchardata, options, cb, c, d);
+ goto CONTINUE_CLASS; /* Go get the next char in the class */
+ } /* End of range handling */
- /* For a single, positive character, get the value into mcbuffer, and
- then we can handle this with the normal one-character code. */
- mclength = PUTCHAR(c, mcbuffer);
- goto ONE_CHAR;
- } /* End of 1-char optimization */
+ /* Handle a single character. */
- /* There is more than one character in the class, or an XCLASS item
- has been generated. Add this character to the class. */
-
- class_has_8bitchar +=
- add_to_class(classbits, &class_uchardata, options, cb, c, c);
+ class_has_8bitchar +=
+ add_to_class(classbits, &class_uchardata, options, cb, meta, meta);
+ }
- /* Continue to the next character in the class. Closing square bracket
- not within \Q..\E ends the class. A NULL character terminates a
- nested substitution string, but may be a data character in the main
- pattern (tested at the start of this loop). */
+ /* Continue to the next item in the class. */
CONTINUE_CLASS:
- c = *(++ptr);
- if (c == CHAR_NULL && cb->nestptr[0] != NULL)
- {
- ptr = cb->nestptr[0];
- cb->nestptr[0] = cb->nestptr[1];
- cb->nestptr[1] = NULL;
- c = *(++ptr);
- }
#ifdef SUPPORT_WIDE_CHARS
- /* If any wide characters have been encountered, set xclass = TRUE. Then,
- in the pre-compile phase, accumulate the length of the wide characters
- and reset the pointer. This is so that very large classes that contain a
- zillion wide characters do not overwrite the work space (which is on the
- stack). */
+ /* If any wide characters or Unicode properties have been encountered,
+ set xclass = TRUE. Then, in the pre-compile phase, accumulate the length
+ of the extra data and reset the pointer. This is so that very large
+ classes that contain a zillion wide characters or Unicode property tests
+ do not overwrite the work space (which is on the stack). */
if (class_uchardata > class_uchardata_base)
{
@@ -4966,13 +5484,12 @@ for (;; ptr++)
}
}
#endif
- /* An unescaped ] ends the class */
- if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break;
+ continue; /* Needed to avoid error when not supporting wide chars */
} /* End of main class-processing loop */
- /* If this is the first thing in the branch, there can be no first char
- setting, whatever the repeat count. Any reqcu setting must remain
+ /* If this class is the first thing in the branch, there can be no first
+ char setting, whatever the repeat count. Any reqcu setting must remain
unchanged after any kind of repeat. */
if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
@@ -4993,23 +5510,46 @@ for (;; ptr++)
all wide characters (depending on whether the whole class is or is not
negated). This requirement is indicated by match_all_or_no_wide_chars being
true. We do this by including an explicit range, which works in both cases.
+ This applies only in UTF and 16-bit and 32-bit non-UTF modes, since there
+ cannot be any wide characters in 8-bit non-UTF mode.
+
+ When there *are* properties in a positive UTF-8 or any 16-bit or 32_bit
+ class where \S etc is present without PCRE2_UCP, causing an extended class
+ to be compiled, we make sure that all characters > 255 are included by
+ forcing match_all_or_no_wide_chars to be true.
If, when generating an xclass, there are no characters < 256, we can omit
the bitmap in the actual compiled code. */
-#ifdef SUPPORT_WIDE_CHARS
+#ifdef SUPPORT_WIDE_CHARS /* Defined for 16/32 bits, or 8-bit with Unicode */
+ if (xclass && (
#ifdef SUPPORT_UNICODE
- if (xclass && (xclass_has_prop || !should_flip_negation ||
- (options & PCRE2_UCP) != 0))
-#elif PCRE2_CODE_UNIT_WIDTH != 8
- if (xclass && (xclass_has_prop || !should_flip_negation))
+ (options & PCRE2_UCP) != 0 ||
#endif
+ xclass_has_prop || !should_flip_negation))
{
- if (match_all_or_no_wide_chars)
+ if (match_all_or_no_wide_chars || (
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ utf &&
+#endif
+ should_flip_negation && !negate_class && (options & PCRE2_UCP) == 0))
{
*class_uchardata++ = XCL_RANGE;
- class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
- class_uchardata += PRIV(ord2utf)(MAX_UTF_CODE_POINT, class_uchardata);
+ if (utf) /* Will always be utf in the 8-bit library */
+ {
+ class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
+ class_uchardata += PRIV(ord2utf)(MAX_UTF_CODE_POINT, class_uchardata);
+ }
+ else /* Can only happen for the 16-bit & 32-bit libraries */
+ {
+#if PCRE2_CODE_UNIT_WIDTH == 16
+ *class_uchardata++ = 0x100;
+ *class_uchardata++ = 0xffffu;
+#elif PCRE2_CODE_UNIT_WIDTH == 32
+ *class_uchardata++ = 0x100;
+ *class_uchardata++ = 0xffffffffu;
+#endif
+ }
}
*class_uchardata++ = XCL_END; /* Marks the end of extra data */
*code++ = OP_XCLASS;
@@ -5026,7 +5566,7 @@ for (;; ptr++)
memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
CU2BYTES(class_uchardata - code));
if (negate_class && !xclass_has_prop)
- for (c = 0; c < 32; c++) classbits[c] = ~classbits[c];
+ for (i = 0; i < 32; i++) classbits[i] = ~classbits[i];
memcpy(code, classbits, 32);
code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
}
@@ -5037,7 +5577,7 @@ for (;; ptr++)
PUT(previous, 1, (int)(code - previous));
break; /* End of class handling */
}
-#endif
+#endif /* SUPPORT_WIDE_CHARS */
/* If there are no characters > 255, or they are all to be included or
excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
@@ -5049,2421 +5589,1705 @@ for (;; ptr++)
if (lengthptr == NULL) /* Save time in the pre-compile phase */
{
if (negate_class)
- for (c = 0; c < 32; c++) classbits[c] = ~classbits[c];
+ for (i = 0; i < 32; i++) classbits[i] = ~classbits[i];
memcpy(code, classbits, 32);
}
code += 32 / sizeof(PCRE2_UCHAR);
-
- END_CLASS:
- break;
+ break; /* End of class processing */
/* ===================================================================*/
- /* Various kinds of repeat; '{' is not necessarily a quantifier, but this
- has been tested above. */
+ /* Deal with (*VERB)s. */
- case CHAR_LEFT_CURLY_BRACKET:
- if (!is_quantifier) goto NORMAL_CHAR;
- ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
- if (*errorcodeptr != 0) goto FAILED;
- goto REPEAT;
-
- case CHAR_ASTERISK:
- repeat_min = 0;
- repeat_max = -1;
- goto REPEAT;
+ /* Check for open captures before ACCEPT and convert it to ASSERT_ACCEPT if
+ in an assertion. In the first pass, just accumulate the length required;
+ otherwise hitting (*ACCEPT) inside many nested parentheses can cause
+ workspace overflow. Do not set firstcu after *ACCEPT. */
- case CHAR_PLUS:
- repeat_min = 1;
- repeat_max = -1;
- goto REPEAT;
-
- case CHAR_QUESTION_MARK:
- repeat_min = 0;
- repeat_max = 1;
-
- REPEAT:
- if (previous == NULL)
+ case META_ACCEPT:
+ cb->had_accept = TRUE;
+ for (oc = cb->open_caps; oc != NULL; oc = oc->next)
{
- *errorcodeptr = ERR9;
- goto FAILED;
- }
-
- if (repeat_min == 0)
- {
- firstcu = zerofirstcu; /* Adjust for zero repeat */
- firstcuflags = zerofirstcuflags;
- reqcu = zeroreqcu; /* Ditto */
- reqcuflags = zeroreqcuflags;
+ if (lengthptr != NULL)
+ {
+ *lengthptr += CU2BYTES(1) + IMM2_SIZE;
+ }
+ else
+ {
+ *code++ = OP_CLOSE;
+ PUT2INC(code, 0, oc->number);
+ }
}
+ *code++ = (cb->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
+ if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
+ break;
- /* Remember whether this is a variable length repeat */
-
- reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;
-
- op_type = 0; /* Default single-char op codes */
- possessive_quantifier = FALSE; /* Default not possessive quantifier */
+ case META_PRUNE:
+ case META_SKIP:
+ cb->had_pruneorskip = TRUE;
+ /* Fall through */
+ case META_COMMIT:
+ case META_FAIL:
+ *code++ = verbops[(meta - META_MARK) >> 16];
+ break;
- /* Save start of previous item, in case we have to move it up in order to
- insert something before it. */
+ case META_THEN:
+ cb->external_flags |= PCRE2_HASTHEN;
+ *code++ = OP_THEN;
+ break;
- tempcode = previous;
+ /* Handle verbs with arguments. Arguments can be very long, especially in
+ 16- and 32-bit modes, and can overflow the workspace in the first pass.
+ However, the argument length is constrained to be small enough to fit in
+ one code unit. This check happens in parse_regex(). In the first pass,
+ instead of putting the argument into memory, we just update the length
+ counter and set up an empty argument. */
- /* Before checking for a possessive quantifier, we must skip over
- whitespace and comments in extended mode because Perl allows white space at
- this point. */
+ case META_THEN_ARG:
+ cb->external_flags |= PCRE2_HASTHEN;
+ goto VERB_ARG;
- if ((options & PCRE2_EXTENDED) != 0)
+ case META_PRUNE_ARG:
+ case META_SKIP_ARG:
+ cb->had_pruneorskip = TRUE;
+ /* Fall through */
+ case META_MARK:
+ VERB_ARG:
+ *code++ = verbops[(meta - META_MARK) >> 16];
+ /* The length is in characters. */
+ verbarglen = *(++pptr);
+ verbculen = 0;
+ tempcode = code++;
+ for (i = 0; i < (int)verbarglen; i++)
{
- ptr++;
- for (;;)
- {
- while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_space) != 0) ptr++;
- if (*ptr != CHAR_NUMBER_SIGN) break;
- ptr++;
- while (ptr < cb->end_pattern)
- {
- if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */
- { /* IS_NEWLINE sets cb->nllen. */
- ptr += cb->nllen;
- break;
- }
- ptr++;
+ meta = *(++pptr);
#ifdef SUPPORT_UNICODE
- if (utf) FORWARDCHAR(ptr);
+ if (utf) mclength = PRIV(ord2utf)(meta, mcbuffer); else
#endif
- } /* Loop for comment characters */
- } /* Loop for multiple comments */
- ptr--; /* Last code unit of previous character. */
+ {
+ mclength = 1;
+ mcbuffer[0] = meta;
+ }
+ if (lengthptr != NULL) *lengthptr += mclength; else
+ {
+ memcpy(code, mcbuffer, CU2BYTES(mclength));
+ code += mclength;
+ verbculen += mclength;
+ }
}
- /* If the next character is '+', we have a possessive quantifier. This
- implies greediness, whatever the setting of the PCRE2_UNGREEDY option.
- If the next character is '?' this is a minimizing repeat, by default,
- but if PCRE2_UNGREEDY is set, it works the other way round. We change the
- repeat type to the non-default. */
-
- if (ptr[1] == CHAR_PLUS)
- {
- repeat_type = 0; /* Force greedy */
- possessive_quantifier = TRUE;
- ptr++;
- }
- else if (ptr[1] == CHAR_QUESTION_MARK)
- {
- repeat_type = greedy_non_default;
- ptr++;
- }
- else repeat_type = greedy_default;
+ *tempcode = verbculen; /* Fill in the code unit length */
+ *code++ = 0; /* Terminating zero */
+ break;
- /* If the repeat is {1} we can ignore it. */
- if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
+ /* ===================================================================*/
+ /* Handle options change. The new setting must be passed back for use in
+ subsequent branches. Reset the greedy defaults and the case value for
+ firstcu and reqcu. */
+
+ case META_OPTIONS:
+ *optionsptr = options = *(++pptr);
+ greedy_default = ((options & PCRE2_UNGREEDY) != 0);
+ greedy_non_default = greedy_default ^ 1;
+ req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0;
+ break;
- /* If previous was a recursion call, wrap it in atomic brackets so that
- previous becomes the atomic group. All recursions were so wrapped in the
- past, but it no longer happens for non-repeated recursions. In fact, the
- repeated ones could be re-implemented independently so as not to need this,
- but for the moment we rely on the code for repeating groups. */
- if (*previous == OP_RECURSE)
+ /* ===================================================================*/
+ /* Handle conditional subpatterns. The case of (?(Rdigits) is ambiguous
+ because it could be a numerical check on recursion, or a name check on a
+ group's being set. The pre-pass sets up META_COND_RNUMBER as a name so that
+ we can handle it either way. We first try for a name; if not found, process
+ the number. */
+
+ case META_COND_RNUMBER: /* (?(Rdigits) */
+ case META_COND_NAME: /* (?(name) or (?'name') or ?(<name>) */
+ case META_COND_RNAME: /* (?(R&name) - test for recursion */
+ bravalue = OP_COND;
{
- memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
- *previous = OP_ONCE;
- PUT(previous, 1, 2 + 2*LINK_SIZE);
- previous[2 + 2*LINK_SIZE] = OP_KET;
- PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE);
- code += 2 + 2 * LINK_SIZE;
- length_prevgroup = 3 + 3*LINK_SIZE;
- }
+ int count, index;
+ PCRE2_SPTR name;
+ named_group *ng = cb->named_groups;
+ uint32_t length = *(++pptr);
- /* Now handle repetition for the different types of item. */
+ GETPLUSOFFSET(offset, pptr);
+ name = cb->start_pattern + offset;
- /* If previous was a character or negated character match, abolish the item
- and generate a repeat item instead. If a char item has a minimum of more
- than one, ensure that it is set in reqcu - it might not be if a sequence
- such as x{3} is the first thing in a branch because the x will have gone
- into firstcu instead. */
+ /* In the first pass, the names generated in the pre-pass are available,
+ but the main name table has not yet been created. Scan the list of names
+ generated in the pre-pass in order to get a number and whether or not
+ this name is duplicated. If it is not duplicated, we can handle it as a
+ numerical group. */
- if (*previous == OP_CHAR || *previous == OP_CHARI
- || *previous == OP_NOT || *previous == OP_NOTI)
- {
- switch (*previous)
+ for (i = 0; i < cb->names_found; i++, ng++)
{
- default: /* Make compiler happy. */
- case OP_CHAR: op_type = OP_STAR - OP_STAR; break;
- case OP_CHARI: op_type = OP_STARI - OP_STAR; break;
- case OP_NOT: op_type = OP_NOTSTAR - OP_STAR; break;
- case OP_NOTI: op_type = OP_NOTSTARI - OP_STAR; break;
+ if (length == ng->length &&
+ PRIV(strncmp)(name, ng->name, length) == 0)
+ {
+ if (!ng->isdup)
+ {
+ code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_RREF : OP_CREF;
+ PUT2(code, 2+LINK_SIZE, ng->number);
+ if (ng->number > cb->top_backref) cb->top_backref = ng->number;
+ skipunits = 1+IMM2_SIZE;
+ goto GROUP_PROCESS_NOTE_EMPTY;
+ }
+ break; /* Found a duplicated name */
+ }
}
- /* Deal with UTF characters that take up more than one code unit. It's
- easier to write this out separately than try to macrify it. Use c to
- hold the length of the character in code units, plus UTF_LENGTH to flag
- that it's a length rather than a small character. */
+ /* If the name was not found we have a bad reference, unless we are
+ dealing with R<digits>, which is treated as a recursion test by number.
+ */
-#ifdef MAYBE_UTF_MULTI
- if (utf && NOT_FIRSTCU(code[-1]))
+ if (i >= cb->names_found)
{
- PCRE2_UCHAR *lastchar = code - 1;
- BACKCHAR(lastchar);
- c = (int)(code - lastchar); /* Length of UTF character */
- memcpy(utf_units, lastchar, CU2BYTES(c)); /* Save the char */
- c |= UTF_LENGTH; /* Flag c as a length */
- }
- else
-#endif /* MAYBE_UTF_MULTI */
-
- /* Handle the case of a single charater - either with no UTF support, or
- with UTF disabled, or for a single-code-unit UTF character. */
- {
- c = code[-1];
- if (*previous <= OP_CHARI && repeat_min > 1)
+ groupnumber = 0;
+ if (meta == META_COND_RNUMBER)
{
- reqcu = c;
- reqcuflags = req_caseopt | cb->req_varyopt;
+ for (i = 1; i < (int)length; i++)
+ {
+ groupnumber = groupnumber * 10 + name[i] - CHAR_0;
+ if (groupnumber > MAX_GROUP_NUMBER)
+ {
+ *errorcodeptr = ERR61;
+ cb->erroroffset = offset + i;
+ return 0;
+ }
+ }
}
- }
- goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
- }
+ if (meta != META_COND_RNUMBER || groupnumber > cb->bracount)
+ {
+ *errorcodeptr = ERR15;
+ cb->erroroffset = offset;
+ return 0;
+ }
- /* If previous was a character type match (\d or similar), abolish it and
- create a suitable repeat item. The code is shared with single-character
- repeats by setting op_type to add a suitable offset into repeat_type. Note
- the the Unicode property types will be present only when SUPPORT_UNICODE is
- defined, but we don't wrap the little bits of code here because it just
- makes it horribly messy. */
+ /* (?Rdigits) treated as a recursion reference by number. A value of
+ zero (which is the result of both (?R) and (?R0)) means "any", and is
+ translated into RREF_ANY (which is 0xffff). */
- else if (*previous < OP_EODN)
- {
- PCRE2_UCHAR *oldcode;
- int prop_type, prop_value;
- op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
- c = *previous; /* Save previous opcode */
- if (c == OP_PROP || c == OP_NOTPROP)
- {
- prop_type = previous[1];
- prop_value = previous[2];
- }
- else
- {
- /* Come here from just above with a character in c */
- OUTPUT_SINGLE_REPEAT:
- prop_type = prop_value = -1;
+ if (groupnumber == 0) groupnumber = RREF_ANY;
+ code[1+LINK_SIZE] = OP_RREF;
+ PUT2(code, 2+LINK_SIZE, groupnumber);
+ skipunits = 1+IMM2_SIZE;
+ goto GROUP_PROCESS_NOTE_EMPTY;
}
- /* At this point we either have prop_type == prop_value == -1 and either
- a code point or a character type that is not OP_[NOT]PROP in c, or we
- have OP_[NOT]PROP in c and prop_type/prop_value not negative. */
+ /* A duplicated name was found. Note that if an R<digits> name is found
+ (META_COND_RNUMBER), it is a reference test, not a recursion test. */
- oldcode = code; /* Save where we were */
- code = previous; /* Usually overwrite previous item */
+ code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_RREF : OP_CREF;
- /* If the maximum is zero then the minimum must also be zero; Perl allows
- this case, so we do too - by simply omitting the item altogether. */
+ /* We have a duplicated name. In the compile pass we have to search the
+ main table in order to get the index and count values. */
- if (repeat_max == 0) goto END_REPEAT;
+ count = 0; /* Values for first pass (avoids compiler warning) */
+ index = 0;
+ if (lengthptr == NULL && !find_dupname_details(name, length, &index,
+ &count, errorcodeptr, cb)) return 0;
- /* Combine the op_type with the repeat_type */
+ /* Add one to the opcode to change CREF/RREF into DNCREF/DNRREF and
+ insert appropriate data values. */
- repeat_type += op_type;
+ code[1+LINK_SIZE]++;
+ skipunits = 1+2*IMM2_SIZE;
+ PUT2(code, 2+LINK_SIZE, index);
+ PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count);
+ }
+ goto GROUP_PROCESS_NOTE_EMPTY;
+
+ /* The DEFINE condition is always false. It's internal groups may never
+ be called, so matched_char must remain false, hence the jump to
+ GROUP_PROCESS rather than GROUP_PROCESS_NOTE_EMPTY. */
+
+ case META_COND_DEFINE:
+ bravalue = OP_COND;
+ GETPLUSOFFSET(offset, pptr);
+ code[1+LINK_SIZE] = OP_DEFINE;
+ skipunits = 1;
+ goto GROUP_PROCESS;
+
+ /* Conditional test of a group's being set. */
+
+ case META_COND_NUMBER:
+ bravalue = OP_COND;
+ GETPLUSOFFSET(offset, pptr);
+ groupnumber = *(++pptr);
+ if (groupnumber > cb->bracount)
+ {
+ *errorcodeptr = ERR15;
+ cb->erroroffset = offset;
+ return 0;
+ }
+ if (groupnumber > cb->top_backref) cb->top_backref = groupnumber;
+ offset -= 2; /* Point at initial ( for too many branches error */
+ code[1+LINK_SIZE] = OP_CREF;
+ skipunits = 1+IMM2_SIZE;
+ PUT2(code, 2+LINK_SIZE, groupnumber);
+ goto GROUP_PROCESS_NOTE_EMPTY;
+
+ /* Test for the PCRE2 version. */
+
+ case META_COND_VERSION:
+ bravalue = OP_COND;
+ if (pptr[1] > 0)
+ code[1+LINK_SIZE] = ((PCRE2_MAJOR > pptr[2]) ||
+ (PCRE2_MAJOR == pptr[2] && PCRE2_MINOR >= pptr[3]))?
+ OP_TRUE : OP_FALSE;
+ else
+ code[1+LINK_SIZE] = (PCRE2_MAJOR == pptr[2] && PCRE2_MINOR == pptr[3])?
+ OP_TRUE : OP_FALSE;
+ skipunits = 1;
+ pptr += 3;
+ goto GROUP_PROCESS_NOTE_EMPTY;
- /* A minimum of zero is handled either as the special case * or ?, or as
- an UPTO, with the maximum given. */
+ /* The condition is an assertion, possibly preceded by a callout. */
- if (repeat_min == 0)
- {
- if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
- else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
- else
- {
- *code++ = OP_UPTO + repeat_type;
- PUT2INC(code, 0, repeat_max);
- }
- }
+ case META_COND_ASSERT:
+ bravalue = OP_COND;
+ goto GROUP_PROCESS_NOTE_EMPTY;
- /* A repeat minimum of 1 is optimized into some special cases. If the
- maximum is unlimited, we use OP_PLUS. Otherwise, the original item is
- left in place and, if the maximum is greater than 1, we use OP_UPTO with
- one less than the maximum. */
- else if (repeat_min == 1)
- {
- if (repeat_max == -1)
- *code++ = OP_PLUS + repeat_type;
- else
- {
- code = oldcode; /* Leave previous item in place */
- if (repeat_max == 1) goto END_REPEAT;
- *code++ = OP_UPTO + repeat_type;
- PUT2INC(code, 0, repeat_max - 1);
- }
- }
+ /* ===================================================================*/
+ /* Handle all kinds of nested bracketed groups. The non-capturing,
+ non-conditional cases are here; others come to GROUP_PROCESS via goto. */
+
+ case META_LOOKAHEAD:
+ bravalue = OP_ASSERT;
+ cb->assert_depth += 1;
+ goto GROUP_PROCESS;
+
+ /* Optimize (?!) to (*FAIL) unless it is quantified - which is a weird
+ thing to do, but Perl allows all assertions to be quantified, and when
+ they contain capturing parentheses there may be a potential use for
+ this feature. Not that that applies to a quantified (?!) but we allow
+ it for uniformity. */
+
+ case META_LOOKAHEADNOT:
+ if (pptr[1] == META_KET &&
+ (pptr[2] < META_ASTERISK || pptr[2] > META_MINMAX_QUERY))
+ {
+ *code++ = OP_FAIL;
+ pptr++;
+ }
+ else
+ {
+ bravalue = OP_ASSERT_NOT;
+ cb->assert_depth += 1;
+ goto GROUP_PROCESS;
+ }
+ break;
- /* The case {n,n} is just an EXACT, while the general case {n,m} is
- handled as an EXACT followed by an UPTO or STAR or QUERY. */
+ case META_LOOKBEHIND:
+ bravalue = OP_ASSERTBACK;
+ cb->assert_depth += 1;
+ goto GROUP_PROCESS;
- else
- {
- *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */
- PUT2INC(code, 0, repeat_min);
+ case META_LOOKBEHINDNOT:
+ bravalue = OP_ASSERTBACK_NOT;
+ cb->assert_depth += 1;
+ goto GROUP_PROCESS;
- /* Unless repeat_max equals repeat_min, fill in the data for EXACT, and
- then generate the second opcode. In UTF mode, multi-code-unit
- characters have their length in c, with the UTF_LENGTH bit as a flag,
- and the code units in utf_units. For a repeated Unicode property match,
- there are two extra values that define the required property, and c
- never has the UTF_LENGTH bit set. */
+ case META_ATOMIC:
+ bravalue = OP_ONCE;
+ goto GROUP_PROCESS_NOTE_EMPTY;
- if (repeat_max != repeat_min)
- {
-#ifdef MAYBE_UTF_MULTI
- if (utf && (c & UTF_LENGTH) != 0)
- {
- memcpy(code, utf_units, CU2BYTES(c & 7));
- code += c & 7;
- }
- else
-#endif /* MAYBE_UTF_MULTI */
- {
- *code++ = c;
- if (prop_type >= 0)
- {
- *code++ = prop_type;
- *code++ = prop_value;
- }
- }
+ case META_NOCAPTURE:
+ bravalue = OP_BRA;
+ /* Fall through */
- /* Now set up the following opcode */
+ /* Process nested bracketed regex. The nesting depth is maintained for the
+ benefit of the stackguard function. The test for too deep nesting is now
+ done in parse_regex(). Assertion and DEFINE groups come to GROUP_PROCESS;
+ others come to GROUP_PROCESS_NOTE_EMPTY, to indicate that we need to take
+ note of whether or not they may match an empty string. */
- if (repeat_max < 0) *code++ = OP_STAR + repeat_type; else
- {
- repeat_max -= repeat_min;
- if (repeat_max == 1)
- {
- *code++ = OP_QUERY + repeat_type;
- }
- else
- {
- *code++ = OP_UPTO + repeat_type;
- PUT2INC(code, 0, repeat_max);
- }
- }
- }
- }
+ GROUP_PROCESS_NOTE_EMPTY:
+ note_group_empty = TRUE;
- /* Fill in the character or character type for the final opcode. */
+ GROUP_PROCESS:
+ cb->parens_depth += 1;
+ *code = bravalue;
+ pptr++;
+ tempcode = code;
+ tempreqvary = cb->req_varyopt; /* Save value before group */
+ length_prevgroup = 0; /* Initialize for pre-compile phase */
-#ifdef MAYBE_UTF_MULTI
- if (utf && (c & UTF_LENGTH) != 0)
- {
- memcpy(code, utf_units, CU2BYTES(c & 7));
- code += c & 7;
- }
- else
-#endif /* MAYBEW_UTF_MULTI */
- {
- *code++ = c;
- if (prop_type >= 0)
- {
- *code++ = prop_type;
- *code++ = prop_value;
- }
- }
- }
+ if ((group_return =
+ compile_regex(
+ options, /* The option state */
+ &tempcode, /* Where to put code (updated) */
+ &pptr, /* Input pointer (updated) */
+ errorcodeptr, /* Where to put an error message */
+ skipunits, /* Skip over bracket number */
+ &subfirstcu, /* For possible first char */
+ &subfirstcuflags,
+ &subreqcu, /* For possible last char */
+ &subreqcuflags,
+ bcptr, /* Current branch chain */
+ cb, /* Compile data block */
+ (lengthptr == NULL)? NULL : /* Actual compile phase */
+ &length_prevgroup /* Pre-compile phase */
+ )) == 0)
+ return 0; /* Error */
- /* If previous was a character class or a back reference, we put the repeat
- stuff after it, but just skip the item if the repeat was {0,0}. */
+ cb->parens_depth -= 1;
- else if (*previous == OP_CLASS || *previous == OP_NCLASS ||
-#ifdef SUPPORT_WIDE_CHARS
- *previous == OP_XCLASS ||
-#endif
- *previous == OP_REF || *previous == OP_REFI ||
- *previous == OP_DNREF || *previous == OP_DNREFI)
- {
- if (repeat_max == 0)
- {
- code = previous;
- goto END_REPEAT;
- }
+ /* If that was a non-conditional significant group (not an assertion, not a
+ DEFINE) that matches at least one character, then the current item matches
+ a character. Conditionals are handled below. */
- if (repeat_min == 0 && repeat_max == -1)
- *code++ = OP_CRSTAR + repeat_type;
- else if (repeat_min == 1 && repeat_max == -1)
- *code++ = OP_CRPLUS + repeat_type;
- else if (repeat_min == 0 && repeat_max == 1)
- *code++ = OP_CRQUERY + repeat_type;
- else
- {
- *code++ = OP_CRRANGE + repeat_type;
- PUT2INC(code, 0, repeat_min);
- if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */
- PUT2INC(code, 0, repeat_max);
- }
- }
+ if (note_group_empty && bravalue != OP_COND && group_return > 0)
+ matched_char = TRUE;
- /* If previous was a bracket group, we may have to replicate it in certain
- cases. Note that at this point we can encounter only the "basic" bracket
- opcodes such as BRA and CBRA, as this is the place where they get converted
- into the more special varieties such as BRAPOS and SBRA. A test for >=
- OP_ASSERT and <= OP_COND includes ASSERT, ASSERT_NOT, ASSERTBACK,
- ASSERTBACK_NOT, ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND.
- Originally, PCRE did not allow repetition of assertions, but now it does,
- for Perl compatibility. */
+ /* If we've just compiled an assertion, pop the assert depth. */
- else if (*previous >= OP_ASSERT && *previous <= OP_COND)
- {
- register int i;
- int len = (int)(code - previous);
- PCRE2_UCHAR *bralink = NULL;
- PCRE2_UCHAR *brazeroptr = NULL;
+ if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
+ cb->assert_depth -= 1;
- /* Repeating a DEFINE group (or any group where the condition is always
- FALSE and there is only one branch) is pointless, but Perl allows the
- syntax, so we just ignore the repeat. */
+ /* At the end of compiling, code is still pointing to the start of the
+ group, while tempcode has been updated to point past the end of the group.
+ The parsed pattern pointer (pptr) is on the closing META_KET.
- if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE &&
- previous[GET(previous, 1)] != OP_ALT)
- goto END_REPEAT;
+ If this is a conditional bracket, check that there are no more than
+ two branches in the group, or just one if it's a DEFINE group. We do this
+ in the real compile phase, not in the pre-pass, where the whole group may
+ not be available. */
- /* There is no sense in actually repeating assertions. The only potential
- use of repetition is in cases when the assertion is optional. Therefore,
- if the minimum is greater than zero, just ignore the repeat. If the
- maximum is not zero or one, set it to 1. */
+ if (bravalue == OP_COND && lengthptr == NULL)
+ {
+ PCRE2_UCHAR *tc = code;
+ int condcount = 0;
- if (*previous < OP_ONCE) /* Assertion */
- {
- if (repeat_min > 0) goto END_REPEAT;
- if (repeat_max < 0 || repeat_max > 1) repeat_max = 1;
- }
+ do {
+ condcount++;
+ tc += GET(tc,1);
+ }
+ while (*tc != OP_KET);
- /* The case of a zero minimum is special because of the need to stick
- OP_BRAZERO in front of it, and because the group appears once in the
- data, whereas in other cases it appears the minimum number of times. For
- this reason, it is simplest to treat this case separately, as otherwise
- the code gets far too messy. There are several special subcases when the
- minimum is zero. */
+ /* A DEFINE group is never obeyed inline (the "condition" is always
+ false). It must have only one branch. Having checked this, change the
+ opcode to OP_FALSE. */
- if (repeat_min == 0)
+ if (code[LINK_SIZE+1] == OP_DEFINE)
{
- /* If the maximum is also zero, we used to just omit the group from the
- output altogether, like this:
-
- ** if (repeat_max == 0)
- ** {
- ** code = previous;
- ** goto END_REPEAT;
- ** }
-
- However, that fails when a group or a subgroup within it is referenced
- as a subroutine from elsewhere in the pattern, so now we stick in
- OP_SKIPZERO in front of it so that it is skipped on execution. As we
- don't have a list of which groups are referenced, we cannot do this
- selectively.
-
- If the maximum is 1 or unlimited, we just have to stick in the BRAZERO
- and do no more at this point. */
-
- if (repeat_max <= 1) /* Covers 0, 1, and unlimited */
- {
- memmove(previous + 1, previous, CU2BYTES(len));
- code++;
- if (repeat_max == 0)
- {
- *previous++ = OP_SKIPZERO;
- goto END_REPEAT;
- }
- brazeroptr = previous; /* Save for possessive optimizing */
- *previous++ = OP_BRAZERO + repeat_type;
- }
-
- /* If the maximum is greater than 1 and limited, we have to replicate
- in a nested fashion, sticking OP_BRAZERO before each set of brackets.
- The first one has to be handled carefully because it's the original
- copy, which has to be moved up. The remainder can be handled by code
- that is common with the non-zero minimum case below. We have to
- adjust the value or repeat_max, since one less copy is required. */
-
- else
+ if (condcount > 1)
{
- int offset;
- memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
- code += 2 + LINK_SIZE;
- *previous++ = OP_BRAZERO + repeat_type;
- *previous++ = OP_BRA;
-
- /* We chain together the bracket offset fields that have to be
- filled in later when the ends of the brackets are reached. */
-
- offset = (bralink == NULL)? 0 : (int)(previous - bralink);
- bralink = previous;
- PUTINC(previous, 0, offset);
+ cb->erroroffset = offset;
+ *errorcodeptr = ERR54;
+ return 0;
}
-
- repeat_max--;
+ code[LINK_SIZE+1] = OP_FALSE;
+ bravalue = OP_DEFINE; /* A flag to suppress char handling below */
}
- /* If the minimum is greater than zero, replicate the group as many
- times as necessary, and adjust the maximum to the number of subsequent
- copies that we need. */
+ /* A "normal" conditional group. If there is just one branch, we must not
+ make use of its firstcu or reqcu, because this is equivalent to an
+ empty second branch. Also, it may match an empty string. If there are two
+ branches, this item must match a character if the group must. */
else
{
- if (repeat_min > 1)
+ if (condcount > 2)
{
- /* In the pre-compile phase, we don't actually do the replication. We
- just adjust the length as if we had. Do some paranoid checks for
- potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
- integer type when available, otherwise double. */
-
- if (lengthptr != NULL)
- {
- size_t delta = (repeat_min - 1)*length_prevgroup;
- if ((INT64_OR_DOUBLE)(repeat_min - 1)*
- (INT64_OR_DOUBLE)length_prevgroup >
- (INT64_OR_DOUBLE)INT_MAX ||
- OFLOW_MAX - *lengthptr < delta)
- {
- *errorcodeptr = ERR20;
- goto FAILED;
- }
- *lengthptr += delta;
- }
-
- /* This is compiling for real. If there is a set first byte for
- the group, and we have not yet set a "required byte", set it. */
-
- else
- {
- if (groupsetfirstcu && reqcuflags < 0)
- {
- reqcu = firstcu;
- reqcuflags = firstcuflags;
- }
- for (i = 1; i < repeat_min; i++)
- {
- memcpy(code, previous, CU2BYTES(len));
- code += len;
- }
- }
+ cb->erroroffset = offset;
+ *errorcodeptr = ERR27;
+ return 0;
}
-
- if (repeat_max > 0) repeat_max -= repeat_min;
+ if (condcount == 1) subfirstcuflags = subreqcuflags = REQ_NONE;
+ else if (group_return > 0) matched_char = TRUE;
}
+ }
- /* This code is common to both the zero and non-zero minimum cases. If
- the maximum is limited, it replicates the group in a nested fashion,
- remembering the bracket starts on a stack. In the case of a zero minimum,
- the first one was set up above. In all cases the repeat_max now specifies
- the number of additional copies needed. Again, we must remember to
- replicate entries on the forward reference list. */
+ /* In the pre-compile phase, update the length by the length of the group,
+ less the brackets at either end. Then reduce the compiled code to just a
+ set of non-capturing brackets so that it doesn't use much memory if it is
+ duplicated by a quantifier.*/
- if (repeat_max >= 0)
+ if (lengthptr != NULL)
+ {
+ if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE)
{
- /* In the pre-compile phase, we don't actually do the replication. We
- just adjust the length as if we had. For each repetition we must add 1
- to the length for BRAZERO and for all but the last repetition we must
- add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
- paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
- a 64-bit integer type when available, otherwise double. */
+ *errorcodeptr = ERR20;
+ return 0;
+ }
+ *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;
+ code++; /* This already contains bravalue */
+ PUTINC(code, 0, 1 + LINK_SIZE);
+ *code++ = OP_KET;
+ PUTINC(code, 0, 1 + LINK_SIZE);
+ break; /* No need to waste time with special character handling */
+ }
- if (lengthptr != NULL && repeat_max > 0)
- {
- size_t delta = repeat_max*(length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
- 2 - 2*LINK_SIZE; /* Last one doesn't nest */
- if ((INT64_OR_DOUBLE)repeat_max *
- (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
- > (INT64_OR_DOUBLE)INT_MAX ||
- OFLOW_MAX - *lengthptr < delta)
- {
- *errorcodeptr = ERR20;
- goto FAILED;
- }
- *lengthptr += delta;
- }
+ /* Otherwise update the main code pointer to the end of the group. */
- /* This is compiling for real */
+ code = tempcode;
- else for (i = repeat_max - 1; i >= 0; i--)
- {
- *code++ = OP_BRAZERO + repeat_type;
+ /* For a DEFINE group, required and first character settings are not
+ relevant. */
- /* All but the final copy start a new nesting, maintaining the
- chain of brackets outstanding. */
+ if (bravalue == OP_DEFINE) break;
- if (i != 0)
- {
- int offset;
- *code++ = OP_BRA;
- offset = (bralink == NULL)? 0 : (int)(code - bralink);
- bralink = code;
- PUTINC(code, 0, offset);
- }
+ /* Handle updating of the required and first code units for other types of
+ group. Update for normal brackets of all kinds, and conditions with two
+ branches (see code above). If the bracket is followed by a quantifier with
+ zero repeat, we have to back off. Hence the definition of zeroreqcu and
+ zerofirstcu outside the main loop so that they can be accessed for the back
+ off. */
- memcpy(code, previous, CU2BYTES(len));
- code += len;
- }
+ zeroreqcu = reqcu;
+ zeroreqcuflags = reqcuflags;
+ zerofirstcu = firstcu;
+ zerofirstcuflags = firstcuflags;
+ groupsetfirstcu = FALSE;
- /* Now chain through the pending brackets, and fill in their length
- fields (which are holding the chain links pro tem). */
+ if (bravalue >= OP_ONCE) /* Not an assertion */
+ {
+ /* If we have not yet set a firstcu in this branch, take it from the
+ subpattern, remembering that it was set here so that a repeat of more
+ than one can replicate it as reqcu if necessary. If the subpattern has
+ no firstcu, set "none" for the whole branch. In both cases, a zero
+ repeat forces firstcu to "none". */
- while (bralink != NULL)
+ if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET)
+ {
+ if (subfirstcuflags >= 0)
{
- int oldlinkoffset;
- int offset = (int)(code - bralink + 1);
- PCRE2_UCHAR *bra = code - offset;
- oldlinkoffset = GET(bra, 1);
- bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
- *code++ = OP_KET;
- PUTINC(code, 0, offset);
- PUT(bra, 1, offset);
+ firstcu = subfirstcu;
+ firstcuflags = subfirstcuflags;
+ groupsetfirstcu = TRUE;
}
+ else firstcuflags = REQ_NONE;
+ zerofirstcuflags = REQ_NONE;
}
- /* If the maximum is unlimited, set a repeater in the final copy. For
- ONCE brackets, that's all we need to do. However, possessively repeated
- ONCE brackets can be converted into non-capturing brackets, as the
- behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
- deal with possessive ONCEs specially.
-
- Otherwise, when we are doing the actual compile phase, check to see
- whether this group is one that could match an empty string. If so,
- convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
- that runtime checking can be done. [This check is also applied to ONCE
- groups at runtime, but in a different way.]
-
- Then, if the quantifier was possessive and the bracket is not a
- conditional, we convert the BRA code to the POS form, and the KET code to
- KETRPOS. (It turns out to be convenient at runtime to detect this kind of
- subpattern at both the start and at the end.) The use of special opcodes
- makes it possible to reduce greatly the stack usage in pcre2_match(). If
- the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
-
- Then, if the minimum number of matches is 1 or 0, cancel the possessive
- flag so that the default action below, of wrapping everything inside
- atomic brackets, does not happen. When the minimum is greater than 1,
- there will be earlier copies of the group, and so we still have to wrap
- the whole thing. */
+ /* If firstcu was previously set, convert the subpattern's firstcu
+ into reqcu if there wasn't one, using the vary flag that was in
+ existence beforehand. */
- else
+ else if (subfirstcuflags >= 0 && subreqcuflags < 0)
{
- PCRE2_UCHAR *ketcode = code - 1 - LINK_SIZE;
- PCRE2_UCHAR *bracode = ketcode - GET(ketcode, 1);
+ subreqcu = subfirstcu;
+ subreqcuflags = subfirstcuflags | tempreqvary;
+ }
- /* Convert possessive ONCE brackets to non-capturing */
+ /* If the subpattern set a required code unit (or set a first code unit
+ that isn't really the first code unit - see above), set it. */
- if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
- possessive_quantifier) *bracode = OP_BRA;
+ if (subreqcuflags >= 0)
+ {
+ reqcu = subreqcu;
+ reqcuflags = subreqcuflags;
+ }
+ }
+
+ /* For a forward assertion, we take the reqcu, if set, provided that the
+ group has also set a firstcu. This can be helpful if the pattern that
+ follows the assertion doesn't set a different char. For example, it's
+ useful for /(?=abcde).+/. We can't set firstcu for an assertion, however
+ because it leads to incorrect effect for patterns such as /(?=a)a.+/ when
+ the "real" "a" would then become a reqcu instead of a firstcu. This is
+ overcome by a scan at the end if there's no firstcu, looking for an
+ asserted first char. A similar effect for patterns like /(?=.*X)X$/ means
+ we must only take the reqcu when the group also set a firstcu. Otherwise,
+ in that example, 'X' ends up set for both. */
+
+ else if (bravalue == OP_ASSERT && subreqcuflags >= 0 &&
+ subfirstcuflags >= 0)
+ {
+ reqcu = subreqcu;
+ reqcuflags = subreqcuflags;
+ }
- /* For non-possessive ONCE brackets, all we need to do is to
- set the KET. */
+ break; /* End of nested group handling */
- if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
- *ketcode = OP_KETRMAX + repeat_type;
- /* Handle non-ONCE brackets and possessive ONCEs (which have been
- converted to non-capturing above). */
+ /* ===================================================================*/
+ /* Handle named backreferences and recursions. */
- else
- {
- /* In the compile phase, check whether the group could match an empty
- string. */
+ case META_BACKREF_BYNAME:
+ case META_RECURSE_BYNAME:
+ {
+ int count, index;
+ PCRE2_SPTR name;
+ BOOL is_dupname = FALSE;
+ named_group *ng = cb->named_groups;
+ uint32_t length = *(++pptr);
- if (lengthptr == NULL)
- {
- PCRE2_UCHAR *scode = bracode;
- do
- {
- int count = 0;
- int rc = could_be_empty_branch(scode, ketcode, utf, cb, FALSE,
- NULL, &count);
- if (rc < 0)
- {
- *errorcodeptr = ERR86;
- goto FAILED;
- }
- if (rc > 0)
- {
- *bracode += OP_SBRA - OP_BRA;
- break;
- }
- scode += GET(scode, 1);
- }
- while (*scode == OP_ALT);
+ GETPLUSOFFSET(offset, pptr);
+ name = cb->start_pattern + offset;
- /* A conditional group with only one branch has an implicit empty
- alternative branch. */
+ /* In the first pass, the names generated in the pre-pass are available,
+ but the main name table has not yet been created. Scan the list of names
+ generated in the pre-pass in order to get a number and whether or not
+ this name is duplicated. */
- if (*bracode == OP_COND && bracode[GET(bracode,1)] != OP_ALT)
- *bracode = OP_SCOND;
- }
+ groupnumber = 0;
+ for (i = 0; i < cb->names_found; i++, ng++)
+ {
+ if (length == ng->length &&
+ PRIV(strncmp)(name, ng->name, length) == 0)
+ {
+ is_dupname = ng->isdup;
+ groupnumber = ng->number;
- /* Handle possessive quantifiers. */
+ /* For a recursion, that's all that is needed. We can now go to
+ the code above that handles numerical recursion, applying it to
+ the first group with the given name. */
- if (possessive_quantifier)
+ if (meta == META_RECURSE_BYNAME)
{
- /* For COND brackets, we wrap the whole thing in a possessively
- repeated non-capturing bracket, because we have not invented POS
- versions of the COND opcodes. */
+ meta_arg = groupnumber;
+ goto HANDLE_NUMERICAL_RECURSION;
+ }
- if (*bracode == OP_COND || *bracode == OP_SCOND)
- {
- int nlen = (int)(code - bracode);
- memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
- code += 1 + LINK_SIZE;
- nlen += 1 + LINK_SIZE;
- *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS;
- *code++ = OP_KETRPOS;
- PUTINC(code, 0, nlen);
- PUT(bracode, 1, nlen);
- }
+ /* For a back reference, update the back reference map and the
+ maximum back reference. Then, for each group, we must check to
+ see if it is recursive, that is, it is inside the group that it
+ references. A flag is set so that the group can be made atomic.
+ */
- /* For non-COND brackets, we modify the BRA code and use KETRPOS. */
+ cb->backref_map |= (groupnumber < 32)? (1u << groupnumber) : 1;
+ if (groupnumber > cb->top_backref)
+ cb->top_backref = groupnumber;
- else
+ for (oc = cb->open_caps; oc != NULL; oc = oc->next)
+ {
+ if (oc->number == groupnumber)
{
- *bracode += 1; /* Switch to xxxPOS opcodes */
- *ketcode = OP_KETRPOS;
+ oc->flag = TRUE;
+ break;
}
-
- /* If the minimum is zero, mark it as possessive, then unset the
- possessive flag when the minimum is 0 or 1. */
-
- if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
- if (repeat_min < 2) possessive_quantifier = FALSE;
}
-
- /* Non-possessive quantifier */
-
- else *ketcode = OP_KETRMAX + repeat_type;
}
}
- }
-
- /* If previous is OP_FAIL, it was generated by an empty class []
- (PCRE2_ALLOW_EMPTY_CLASS is set). The other ways in which OP_FAIL can be
- generated, that is by (*FAIL) or (?!), set previous to NULL, which gives a
- "nothing to repeat" error above. We can just ignore the repeat in empty
- class case. */
- else if (*previous == OP_FAIL) goto END_REPEAT;
+ /* If the name was not found we have a bad reference. */
- /* Else there's some kind of shambles */
-
- else
- {
- *errorcodeptr = ERR10;
- goto FAILED;
- }
-
- /* If the character following a repeat is '+', possessive_quantifier is
- TRUE. For some opcodes, there are special alternative opcodes for this
- case. For anything else, we wrap the entire repeated item inside OP_ONCE
- brackets. Logically, the '+' notation is just syntactic sugar, taken from
- Sun's Java package, but the special opcodes can optimize it.
-
- Some (but not all) possessively repeated subpatterns have already been
- completely handled in the code just above. For them, possessive_quantifier
- is always FALSE at this stage. Note that the repeated item starts at
- tempcode, not at previous, which might be the first part of a string whose
- (former) last char we repeated. */
-
- if (possessive_quantifier)
- {
- int len;
-
- /* Possessifying an EXACT quantifier has no effect, so we can ignore it.
- However, QUERY, STAR, or UPTO may follow (for quantifiers such as {5,6},
- {5,}, or {5,10}). We skip over an EXACT item; if the length of what
- remains is greater than zero, there's a further opcode that can be
- handled. If not, do nothing, leaving the EXACT alone. */
-
- switch(*tempcode)
+ if (groupnumber == 0)
{
- case OP_TYPEEXACT:
- tempcode += PRIV(OP_lengths)[*tempcode] +
- ((tempcode[1 + IMM2_SIZE] == OP_PROP
- || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
- break;
-
- /* CHAR opcodes are used for exacts whose count is 1. */
-
- case OP_CHAR:
- case OP_CHARI:
- case OP_NOT:
- case OP_NOTI:
- case OP_EXACT:
- case OP_EXACTI:
- case OP_NOTEXACT:
- case OP_NOTEXACTI:
- tempcode += PRIV(OP_lengths)[*tempcode];
-#ifdef SUPPORT_UNICODE
- if (utf && HAS_EXTRALEN(tempcode[-1]))
- tempcode += GET_EXTRALEN(tempcode[-1]);
-#endif
- break;
-
- /* For the class opcodes, the repeat operator appears at the end;
- adjust tempcode to point to it. */
-
- case OP_CLASS:
- case OP_NCLASS:
- tempcode += 1 + 32/sizeof(PCRE2_UCHAR);
- break;
-
-#ifdef SUPPORT_WIDE_CHARS
- case OP_XCLASS:
- tempcode += GET(tempcode, 1);
- break;
-#endif
+ *errorcodeptr = ERR15;
+ cb->erroroffset = offset;
+ return 0;
}
- /* If tempcode is equal to code (which points to the end of the repeated
- item), it means we have skipped an EXACT item but there is no following
- QUERY, STAR, or UPTO; the value of len will be 0, and we do nothing. In
- all other cases, tempcode will be pointing to the repeat opcode, and will
- be less than code, so the value of len will be greater than 0. */
+ /* If a back reference name is not duplicated, we can handle it as
+ a numerical reference. */
- len = (int)(code - tempcode);
- if (len > 0)
+ if (!is_dupname)
{
- unsigned int repcode = *tempcode;
+ meta_arg = groupnumber;
+ goto HANDLE_SINGLE_REFERENCE;
+ }
- /* There is a table for possessifying opcodes, all of which are less
- than OP_CALLOUT. A zero entry means there is no possessified version.
- */
+ /* If a back reference name is duplicated, we generate a different
+ opcode to a numerical back reference. In the second pass we must
+ search for the index and count in the final name table. */
- if (repcode < OP_CALLOUT && opcode_possessify[repcode] > 0)
- *tempcode = opcode_possessify[repcode];
+ count = 0; /* Values for first pass (avoids compiler warning) */
+ index = 0;
+ if (lengthptr == NULL && !find_dupname_details(name, length, &index,
+ &count, errorcodeptr, cb)) return 0;
- /* For opcode without a special possessified version, wrap the item in
- ONCE brackets. */
-
- else
- {
- memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
- code += 1 + LINK_SIZE;
- len += 1 + LINK_SIZE;
- tempcode[0] = OP_ONCE;
- *code++ = OP_KET;
- PUTINC(code, 0, len);
- PUT(tempcode, 1, len);
- }
- }
+ if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
+ *code++ = ((options & PCRE2_CASELESS) != 0)? OP_DNREFI : OP_DNREF;
+ PUT2INC(code, 0, index);
+ PUT2INC(code, 0, count);
}
-
- /* In all case we no longer have a previous item. We also set the
- "follows varying string" flag for subsequently encountered reqcus if
- it isn't already set and we have just passed a varying length item. */
-
- END_REPEAT:
- previous = NULL;
- cb->req_varyopt |= reqvary;
break;
/* ===================================================================*/
- /* Start of nested parenthesized sub-expression, or lookahead or lookbehind
- or option setting or condition or all the other extended parenthesis forms.
- We must save the current high-water-mark for the forward reference list so
- that we know where they start for this group. However, because the list may
- be extended when there are very many forward references (usually the result
- of a replicated inner group), we must use an offset rather than an absolute
- address. Note that (?# comments are dealt with at the top of the loop;
- they do not get this far. */
+ /* Handle a numerical callout. */
+
+ case META_CALLOUT_NUMBER:
+ code[0] = OP_CALLOUT;
+ PUT(code, 1, pptr[1]); /* Offset to next pattern item */
+ PUT(code, 1 + LINK_SIZE, pptr[2]); /* Length of next pattern item */
+ code[1 + 2*LINK_SIZE] = pptr[3];
+ pptr += 3;
+ code += PRIV(OP_lengths)[OP_CALLOUT];
+ break;
- case CHAR_LEFT_PARENTHESIS:
- ptr++;
- /* Deal with various "verbs" that can be introduced by '*'. */
+ /* ===================================================================*/
+ /* Handle a callout with a string argument. In the pre-pass we just compute
+ the length without generating anything. The length in pptr[3] includes both
+ delimiters; in the actual compile only the first one is copied, but a
+ terminating zero is added. Any doubled delimiters within the string make
+ this an overestimate, but it is not worth bothering about. */
- if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
- || (MAX_255(ptr[1]) && ((cb->ctypes[ptr[1]] & ctype_letter) != 0))))
+ case META_CALLOUT_STRING:
+ if (lengthptr != NULL)
{
- int i, namelen;
- int arglen = 0;
- const char *vn = verbnames;
- PCRE2_SPTR name = ptr + 1;
- PCRE2_SPTR arg = NULL;
- previous = NULL;
- ptr++;
-
- /* Increment ptr, set namelen, check length */
-
- READ_NAME(ctype_letter, ERR60, *errorcodeptr);
+ *lengthptr += pptr[3] + (1 + 4*LINK_SIZE);
+ pptr += 3;
+ SKIPOFFSET(pptr);
+ }
- /* It appears that Perl allows any characters whatsoever, other than
- a closing parenthesis, to appear in arguments, so we no longer insist on
- letters, digits, and underscores. Perl does not, however, do any
- interpretation within arguments, and has no means of including a closing
- parenthesis. PCRE supports escape processing but only when it is
- requested by an option. Note that check_escape() will not return values
- greater than the code unit maximum when not in UTF mode. */
+ /* In the real compile we can copy the string. The starting delimiter is
+ included so that the client can discover it if they want. We also pass the
+ start offset to help a script language give better error messages. */
- if (*ptr == CHAR_COLON)
+ else
+ {
+ PCRE2_SPTR pp;
+ uint32_t delimiter;
+ uint32_t length = pptr[3];
+ PCRE2_UCHAR *callout_string = code + (1 + 4*LINK_SIZE);
+
+ code[0] = OP_CALLOUT_STR;
+ PUT(code, 1, pptr[1]); /* Offset to next pattern item */
+ PUT(code, 1 + LINK_SIZE, pptr[2]); /* Length of next pattern item */
+
+ pptr += 3;
+ GETPLUSOFFSET(offset, pptr); /* Offset to string in pattern */
+ pp = cb->start_pattern + offset;
+ delimiter = *callout_string++ = *pp++;
+ if (delimiter == CHAR_LEFT_CURLY_BRACKET)
+ delimiter = CHAR_RIGHT_CURLY_BRACKET;
+ PUT(code, 1 + 3*LINK_SIZE, (int)(offset + 1)); /* One after delimiter */
+
+ /* The syntax of the pattern was checked in the parsing scan. The length
+ includes both delimiters, but we have passed the opening one just above,
+ so we reduce length before testing it. The test is for > 1 because we do
+ not want to copy the final delimiter. This also ensures that pp[1] is
+ accessible. */
+
+ while (--length > 1)
{
- arg = ++ptr;
-
- if ((options & PCRE2_ALT_VERBNAMES) == 0)
- {
- arglen = 0;
- while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS)
- {
- ptr++; /* Check length as we go */
- arglen++; /* along, to avoid the */
- if ((unsigned int)arglen > MAX_MARK) /* possibility of overflow. */
- {
- *errorcodeptr = ERR76;
- goto FAILED;
- }
- }
- }
- else
+ if (*pp == delimiter && pp[1] == delimiter)
{
- /* The length check is in process_verb_names() */
- arglen = process_verb_name(&ptr, NULL, errorcodeptr, options,
- utf, cb);
- if (arglen < 0) goto FAILED;
+ *callout_string++ = delimiter;
+ pp += 2;
+ length--;
}
+ else *callout_string++ = *pp++;
}
+ *callout_string++ = CHAR_NUL;
- if (*ptr != CHAR_RIGHT_PARENTHESIS)
- {
- *errorcodeptr = ERR60;
- goto FAILED;
- }
-
- /* Scan the table of verb names */
+ /* Set the length of the entire item, the advance to its end. */
- for (i = 0; i < verbcount; i++)
- {
- if (namelen == verbs[i].len &&
- PRIV(strncmp_c8)(name, vn, namelen) == 0)
- {
- int setverb;
+ PUT(code, 1 + 2*LINK_SIZE, (int)(callout_string - code));
+ code = callout_string;
+ }
+ break;
- /* Check for open captures before ACCEPT and convert it to
- ASSERT_ACCEPT if in an assertion. */
- if (verbs[i].op == OP_ACCEPT)
- {
- open_capitem *oc;
- if (arglen != 0)
- {
- *errorcodeptr = ERR59;
- goto FAILED;
- }
- cb->had_accept = TRUE;
+ /* ===================================================================*/
+ /* Handle repetition. The different types are all sorted out in the parsing
+ pass. */
+
+ case META_MINMAX_PLUS:
+ case META_MINMAX_QUERY:
+ case META_MINMAX:
+ repeat_min = *(++pptr);
+ repeat_max = *(++pptr);
+ goto REPEAT;
- /* In the first pass, just accumulate the length required;
- otherwise hitting (*ACCEPT) inside many nested parentheses can
- cause workspace overflow. */
+ case META_ASTERISK:
+ case META_ASTERISK_PLUS:
+ case META_ASTERISK_QUERY:
+ repeat_min = 0;
+ repeat_max = REPEAT_UNLIMITED;
+ goto REPEAT;
- for (oc = cb->open_caps; oc != NULL; oc = oc->next)
- {
- if (lengthptr != NULL)
- {
- *lengthptr += CU2BYTES(1) + IMM2_SIZE;
- }
- else
- {
- *code++ = OP_CLOSE;
- PUT2INC(code, 0, oc->number);
- }
- }
- setverb = *code++ =
- (cb->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
+ case META_PLUS:
+ case META_PLUS_PLUS:
+ case META_PLUS_QUERY:
+ repeat_min = 1;
+ repeat_max = REPEAT_UNLIMITED;
+ goto REPEAT;
- /* Do not set firstcu after *ACCEPT */
- if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
- }
+ case META_QUERY:
+ case META_QUERY_PLUS:
+ case META_QUERY_QUERY:
+ repeat_min = 0;
+ repeat_max = 1;
- /* Handle other cases with/without an argument */
+ REPEAT:
+ if (previous_matched_char && repeat_min > 0) matched_char = TRUE;
- else if (arglen == 0) /* There is no argument */
- {
- if (verbs[i].op < 0) /* Argument is mandatory */
- {
- *errorcodeptr = ERR66;
- goto FAILED;
- }
- setverb = *code++ = verbs[i].op;
- }
+ /* Remember whether this is a variable length repeat, and default to
+ single-char opcodes. */
- else /* An argument is present */
- {
- if (verbs[i].op_arg < 0) /* Argument is forbidden */
- {
- *errorcodeptr = ERR59;
- goto FAILED;
- }
- setverb = *code++ = verbs[i].op_arg;
+ reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;
+ op_type = 0;
- /* Arguments can be very long, especially in 16- and 32-bit modes,
- and can overflow the workspace in the first pass. Instead of
- putting the argument into memory, we just update the length counter
- and set up an empty argument. */
+ /* If the repeat is {1} we can ignore it. */
- if (lengthptr != NULL)
- {
- *lengthptr += arglen;
- *code++ = 0;
- }
- else
- {
- *code++ = arglen;
- if ((options & PCRE2_ALT_VERBNAMES) != 0)
- {
- PCRE2_UCHAR *memcode = code; /* code is "register" */
- (void)process_verb_name(&arg, &memcode, errorcodeptr, options,
- utf, cb);
- code = memcode;
- }
- else /* No argument processing */
- {
- memcpy(code, arg, CU2BYTES(arglen));
- code += arglen;
- }
- }
+ if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
- *code++ = 0;
- }
+ /* Adjust first and required code units for a zero repeat. */
- switch (setverb)
- {
- case OP_THEN:
- case OP_THEN_ARG:
- cb->external_flags |= PCRE2_HASTHEN;
- break;
+ if (repeat_min == 0)
+ {
+ firstcu = zerofirstcu;
+ firstcuflags = zerofirstcuflags;
+ reqcu = zeroreqcu;
+ reqcuflags = zeroreqcuflags;
+ }
- case OP_PRUNE:
- case OP_PRUNE_ARG:
- case OP_SKIP:
- case OP_SKIP_ARG:
- cb->had_pruneorskip = TRUE;
- break;
- }
+ /* Note the greediness and possessiveness. */
- break; /* Found verb, exit loop */
- }
+ switch (meta)
+ {
+ case META_MINMAX_PLUS:
+ case META_ASTERISK_PLUS:
+ case META_PLUS_PLUS:
+ case META_QUERY_PLUS:
+ repeat_type = 0; /* Force greedy */
+ possessive_quantifier = TRUE;
+ break;
- vn += verbs[i].len + 1;
- }
+ case META_MINMAX_QUERY:
+ case META_ASTERISK_QUERY:
+ case META_PLUS_QUERY:
+ case META_QUERY_QUERY:
+ repeat_type = greedy_non_default;
+ possessive_quantifier = FALSE;
+ break;
- if (i < verbcount) continue; /* Successfully handled a verb */
- *errorcodeptr = ERR60; /* Verb not recognized */
- goto FAILED;
+ default:
+ repeat_type = greedy_default;
+ possessive_quantifier = FALSE;
+ break;
}
- /* Initialization for "real" parentheses */
+ /* Save start of previous item, in case we have to move it up in order to
+ insert something before it, and remember what it was. */
- newoptions = options;
- skipunits = 0;
- bravalue = OP_CBRA;
- reset_bracount = FALSE;
+ tempcode = previous;
+ op_previous = *previous;
- /* Deal with the extended parentheses; all are introduced by '?', and the
- appearance of any of them means that this is not a capturing group. */
+ /* Now handle repetition for the different types of item. */
- if (*ptr == CHAR_QUESTION_MARK)
+ switch (op_previous)
{
- int i, count;
- int namelen; /* Must be signed */
- uint32_t index;
- uint32_t set, unset, *optset;
- named_group *ng;
- PCRE2_SPTR name;
- PCRE2_UCHAR *slot;
+ /* If previous was a character or negated character match, abolish the
+ item and generate a repeat item instead. If a char item has a minimum of
+ more than one, ensure that it is set in reqcu - it might not be if a
+ sequence such as x{3} is the first thing in a branch because the x will
+ have gone into firstcu instead. */
- switch (*(++ptr))
+ case OP_CHAR:
+ case OP_CHARI:
+ case OP_NOT:
+ case OP_NOTI:
+ op_type = chartypeoffset[op_previous - OP_CHAR];
+
+ /* Deal with UTF characters that take up more than one code unit. */
+
+#ifdef MAYBE_UTF_MULTI
+ if (utf && NOT_FIRSTCU(code[-1]))
{
- /* ------------------------------------------------------------ */
- case CHAR_VERTICAL_LINE: /* Reset capture count for each branch */
- reset_bracount = TRUE;
- /* Fall through */
-
- /* ------------------------------------------------------------ */
- case CHAR_COLON: /* Non-capturing bracket */
- bravalue = OP_BRA;
- ptr++;
- break;
+ PCRE2_UCHAR *lastchar = code - 1;
+ BACKCHAR(lastchar);
+ mclength = (uint32_t)(code - lastchar); /* Length of UTF character */
+ memcpy(mcbuffer, lastchar, CU2BYTES(mclength)); /* Save the char */
+ }
+ else
+#endif /* MAYBE_UTF_MULTI */
- /* ------------------------------------------------------------ */
- case CHAR_LEFT_PARENTHESIS:
- bravalue = OP_COND; /* Conditional group */
- tempptr = ptr;
+ /* Handle the case of a single code unit - either with no UTF support, or
+ with UTF disabled, or for a single-code-unit UTF character. */
+ {
+ mcbuffer[0] = code[-1];
+ mclength = 1;
+ if (op_previous <= OP_CHARI && repeat_min > 1)
+ {
+ reqcu = mcbuffer[0];
+ reqcuflags = req_caseopt | cb->req_varyopt;
+ }
+ }
+ goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
- /* A condition can be an assertion, a number (referring to a numbered
- group's having been set), a name (referring to a named group), or 'R',
- referring to recursion. R<digits> and R&name are also permitted for
- recursion tests.
+ /* If previous was a character class or a back reference, we put the
+ repeat stuff after it, but just skip the item if the repeat was {0,0}. */
- There are ways of testing a named group: (?(name)) is used by Python;
- Perl 5.10 onwards uses (?(<name>) or (?('name')).
+#ifdef SUPPORT_WIDE_CHARS
+ case OP_XCLASS:
+#endif
+ case OP_CLASS:
+ case OP_NCLASS:
+ case OP_REF:
+ case OP_REFI:
+ case OP_DNREF:
+ case OP_DNREFI:
- There is one unfortunate ambiguity, caused by history. 'R' can be the
- recursive thing or the name 'R' (and similarly for 'R' followed by
- digits). We look for a name first; if not found, we try the other case.
+ if (repeat_max == 0)
+ {
+ code = previous;
+ goto END_REPEAT;
+ }
- For compatibility with auto-callouts, we allow a callout to be
- specified before a condition that is an assertion. First, check for the
- syntax of a callout; if found, adjust the temporary pointer that is
- used to check for an assertion condition. That's all that is needed! */
+ if (repeat_min == 0 && repeat_max == REPEAT_UNLIMITED)
+ *code++ = OP_CRSTAR + repeat_type;
+ else if (repeat_min == 1 && repeat_max == REPEAT_UNLIMITED)
+ *code++ = OP_CRPLUS + repeat_type;
+ else if (repeat_min == 0 && repeat_max == 1)
+ *code++ = OP_CRQUERY + repeat_type;
+ else
+ {
+ *code++ = OP_CRRANGE + repeat_type;
+ PUT2INC(code, 0, repeat_min);
+ if (repeat_max == REPEAT_UNLIMITED) repeat_max = 0; /* 2-byte encoding for max */
+ PUT2INC(code, 0, repeat_max);
+ }
+ break;
- if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)
- {
- if (IS_DIGIT(ptr[3]) || ptr[3] == CHAR_RIGHT_PARENTHESIS)
- {
- for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
- if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
- tempptr += i + 1;
- }
- else
- {
- uint32_t delimiter = 0;
- for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
- {
- if (ptr[3] == PRIV(callout_start_delims)[i])
- {
- delimiter = PRIV(callout_end_delims)[i];
- break;
- }
- }
- if (delimiter != 0)
- {
- for (i = 4; ptr + i < cb->end_pattern; i++)
- {
- if (ptr[i] == delimiter)
- {
- if (ptr[i+1] == delimiter) i++;
- else
- {
- if (ptr[i+1] == CHAR_RIGHT_PARENTHESIS) tempptr += i + 2;
- break;
- }
- }
- }
- }
- }
+ /* If previous is OP_FAIL, it was generated by an empty class []
+ (PCRE2_ALLOW_EMPTY_CLASS is set). The other ways in which OP_FAIL can be
+ generated, that is by (*FAIL) or (?!), disallow a quantifier at parse
+ time. We can just ignore this repeat. */
+
+ case OP_FAIL:
+ goto END_REPEAT;
+
+ /* Prior to 10.30, repeated recursions were wrapped in OP_ONCE brackets
+ because pcre2_match() could not handle backtracking into recursively
+ called groups. Now that this backtracking is available, we no longer need
+ to do this. However, we still need to replicate recursions as we do for
+ groups so as to have independent backtracking points. We can replicate
+ for the minimum number of repeats directly. For optional repeats we now
+ wrap the recursion in OP_BRA brackets and make use of the bracket
+ repetition. */
+
+ case OP_RECURSE:
+
+ /* Generate unwrapped repeats for a non-zero minimum, except when the
+ minimum is 1 and the maximum unlimited, because that can be handled with
+ OP_BRA terminated by OP_KETRMAX/MIN. When the maximum is equal to the
+ minimum, we just need to generate the appropriate additional copies.
+ Otherwise we need to generate one more, to simulate the situation when
+ the minimum is zero. */
+
+ if (repeat_min > 0 && (repeat_min != 1 || repeat_max != REPEAT_UNLIMITED))
+ {
+ int replicate = repeat_min;
+ if (repeat_min == repeat_max) replicate--;
- /* tempptr should now be pointing to the opening parenthesis of the
- assertion condition. */
+ /* In the pre-compile phase, we don't actually do the replication. We
+ just adjust the length as if we had. Do some paranoid checks for
+ potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
+ integer type when available, otherwise double. */
- if (*tempptr != CHAR_LEFT_PARENTHESIS)
+ if (lengthptr != NULL)
+ {
+ PCRE2_SIZE delta = replicate*(1 + LINK_SIZE);
+ if ((INT64_OR_DOUBLE)replicate*
+ (INT64_OR_DOUBLE)(1 + LINK_SIZE) >
+ (INT64_OR_DOUBLE)INT_MAX ||
+ OFLOW_MAX - *lengthptr < delta)
{
- *errorcodeptr = ERR28;
- goto FAILED;
+ *errorcodeptr = ERR20;
+ return 0;
}
+ *lengthptr += delta;
}
- /* For conditions that are assertions, check the syntax, and then exit
- the switch. This will take control down to where bracketed groups
- are processed. The assertion will be handled as part of the group,
- but we need to identify this case because the conditional assertion may
- not be quantifier. */
-
- if (tempptr[1] == CHAR_QUESTION_MARK &&
- (tempptr[2] == CHAR_EQUALS_SIGN ||
- tempptr[2] == CHAR_EXCLAMATION_MARK ||
- (tempptr[2] == CHAR_LESS_THAN_SIGN &&
- (tempptr[3] == CHAR_EQUALS_SIGN ||
- tempptr[3] == CHAR_EXCLAMATION_MARK))))
+ else for (i = 0; i < replicate; i++)
{
- cb->iscondassert = TRUE;
- break;
+ memcpy(code, previous, CU2BYTES(1 + LINK_SIZE));
+ previous = code;
+ code += 1 + LINK_SIZE;
}
- /* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all
- need to skip at least 1+IMM2_SIZE bytes at the start of the group. */
-
- code[1+LINK_SIZE] = OP_CREF;
- skipunits = 1+IMM2_SIZE;
- refsign = -1; /* => not a number */
- namelen = -1; /* => not a name; must set to avoid warning */
- name = NULL; /* Always set to avoid warning */
- recno = 0; /* Always set to avoid warning */
+ /* If the number of repeats is fixed, we are done. Otherwise, adjust
+ the counts and fall through. */
- /* Point at character after (?( */
-
- ptr++;
-
- /* Check for (?(VERSION[>]=n.m), which is a facility whereby indirect
- users of PCRE2 via an application can discover which release of PCRE2
- is being used. */
-
- if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 &&
- ptr[7] != CHAR_RIGHT_PARENTHESIS)
- {
- BOOL ge = FALSE;
- int major = 0;
- int minor = 0;
-
- ptr += 7;
- if (*ptr == CHAR_GREATER_THAN_SIGN)
- {
- ge = TRUE;
- ptr++;
- }
+ if (repeat_min == repeat_max) break;
+ if (repeat_max != REPEAT_UNLIMITED) repeat_max -= repeat_min;
+ repeat_min = 0;
+ }
- /* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT
- references its argument twice. */
+ /* Wrap the recursion call in OP_BRA brackets. */
- if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr)))
- {
- *errorcodeptr = ERR79;
- goto FAILED;
- }
+ memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
+ op_previous = *previous = OP_BRA;
+ PUT(previous, 1, 2 + 2*LINK_SIZE);
+ previous[2 + 2*LINK_SIZE] = OP_KET;
+ PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE);
+ code += 2 + 2 * LINK_SIZE;
+ length_prevgroup = 3 + 3*LINK_SIZE;
+ group_return = -1; /* Set "may match empty string" */
+
+ /* Now treat as a repeated OP_BRA. */
+ /* Fall through */
+
+ /* If previous was a bracket group, we may have to replicate it in
+ certain cases. Note that at this point we can encounter only the "basic"
+ bracket opcodes such as BRA and CBRA, as this is the place where they get
+ converted into the more special varieties such as BRAPOS and SBRA.
+ Originally, PCRE did not allow repetition of assertions, but now it does,
+ for Perl compatibility. */
+
+ case OP_ASSERT:
+ case OP_ASSERT_NOT:
+ case OP_ASSERTBACK:
+ case OP_ASSERTBACK_NOT:
+ case OP_ONCE:
+ case OP_BRA:
+ case OP_CBRA:
+ case OP_COND:
+ {
+ int len = (int)(code - previous);
+ PCRE2_UCHAR *bralink = NULL;
+ PCRE2_UCHAR *brazeroptr = NULL;
- while (IS_DIGIT(*ptr)) major = major * 10 + *ptr++ - '0';
- if (*ptr == CHAR_DOT)
- {
- ptr++;
- while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0';
- if (minor < 10) minor *= 10;
- }
+ /* Repeating a DEFINE group (or any group where the condition is always
+ FALSE and there is only one branch) is pointless, but Perl allows the
+ syntax, so we just ignore the repeat. */
- if (*ptr != CHAR_RIGHT_PARENTHESIS || minor > 99)
- {
- *errorcodeptr = ERR79;
- goto FAILED;
- }
+ if (op_previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE &&
+ previous[GET(previous, 1)] != OP_ALT)
+ goto END_REPEAT;
- if (ge)
- code[1+LINK_SIZE] = ((PCRE2_MAJOR > major) ||
- (PCRE2_MAJOR == major && PCRE2_MINOR >= minor))?
- OP_TRUE : OP_FALSE;
- else
- code[1+LINK_SIZE] = (PCRE2_MAJOR == major && PCRE2_MINOR == minor)?
- OP_TRUE : OP_FALSE;
+ /* There is no sense in actually repeating assertions. The only
+ potential use of repetition is in cases when the assertion is optional.
+ Therefore, if the minimum is greater than zero, just ignore the repeat.
+ If the maximum is not zero or one, set it to 1. */
- ptr++;
- skipunits = 1;
- break; /* End of condition processing */
+ if (op_previous < OP_ONCE) /* Assertion */
+ {
+ if (repeat_min > 0) goto END_REPEAT;
+ if (repeat_max > 1) repeat_max = 1;
}
- /* Check for a test for recursion in a named group. */
+ /* The case of a zero minimum is special because of the need to stick
+ OP_BRAZERO in front of it, and because the group appears once in the
+ data, whereas in other cases it appears the minimum number of times. For
+ this reason, it is simplest to treat this case separately, as otherwise
+ the code gets far too messy. There are several special subcases when the
+ minimum is zero. */
- if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
+ if (repeat_min == 0)
{
- terminator = -1;
- ptr += 2;
- code[1+LINK_SIZE] = OP_RREF; /* Change the type of test */
- }
+ /* If the maximum is also zero, we used to just omit the group from
+ the output altogether, like this:
- /* Check for a test for a named group's having been set, using the Perl
- syntax (?(<name>) or (?('name'), and also allow for the original PCRE
- syntax of (?(name) or for (?(+n), (?(-n), and just (?(n). */
+ ** if (repeat_max == 0)
+ ** {
+ ** code = previous;
+ ** goto END_REPEAT;
+ ** }
- else if (*ptr == CHAR_LESS_THAN_SIGN)
- {
- terminator = CHAR_GREATER_THAN_SIGN;
- ptr++;
- }
- else if (*ptr == CHAR_APOSTROPHE)
- {
- terminator = CHAR_APOSTROPHE;
- ptr++;
- }
- else
- {
- terminator = CHAR_NULL;
- if (*ptr == CHAR_MINUS || *ptr == CHAR_PLUS) refsign = *ptr++;
- else if (IS_DIGIT(*ptr)) refsign = 0;
- }
+ However, that fails when a group or a subgroup within it is
+ referenced as a subroutine from elsewhere in the pattern, so now we
+ stick in OP_SKIPZERO in front of it so that it is skipped on
+ execution. As we don't have a list of which groups are referenced, we
+ cannot do this selectively.
- /* Handle a number */
+ If the maximum is 1 or unlimited, we just have to stick in the
+ BRAZERO and do no more at this point. */
- if (refsign >= 0)
- {
- while (IS_DIGIT(*ptr))
+ if (repeat_max <= 1 || repeat_max == REPEAT_UNLIMITED)
{
- if (recno > INT_MAX / 10 - 1) /* Integer overflow */
+ memmove(previous + 1, previous, CU2BYTES(len));
+ code++;
+ if (repeat_max == 0)
{
- while (IS_DIGIT(*ptr)) ptr++;
- *errorcodeptr = ERR61;
- goto FAILED;
+ *previous++ = OP_SKIPZERO;
+ goto END_REPEAT;
}
- recno = recno * 10 + (int)(*ptr - CHAR_0);
- ptr++;
+ brazeroptr = previous; /* Save for possessive optimizing */
+ *previous++ = OP_BRAZERO + repeat_type;
}
- }
- /* Otherwise we expect to read a name; anything else is an error. When
- the referenced name is one of a number of duplicates, a different
- opcode is used and it needs more memory. Unfortunately we cannot tell
- whether this is the case in the first pass, so we have to allow for
- more memory always. In the second pass, the additional to skipunits
- happens later. */
+ /* If the maximum is greater than 1 and limited, we have to replicate
+ in a nested fashion, sticking OP_BRAZERO before each set of brackets.
+ The first one has to be handled carefully because it's the original
+ copy, which has to be moved up. The remainder can be handled by code
+ that is common with the non-zero minimum case below. We have to
+ adjust the value or repeat_max, since one less copy is required. */
- else
- {
- if (IS_DIGIT(*ptr))
+ else
{
- *errorcodeptr = ERR44; /* Group name must start with non-digit */
- goto FAILED;
+ int linkoffset;
+ memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
+ code += 2 + LINK_SIZE;
+ *previous++ = OP_BRAZERO + repeat_type;
+ *previous++ = OP_BRA;
+
+ /* We chain together the bracket link offset fields that have to be
+ filled in later when the ends of the brackets are reached. */
+
+ linkoffset = (bralink == NULL)? 0 : (int)(previous - bralink);
+ bralink = previous;
+ PUTINC(previous, 0, linkoffset);
}
- if (!MAX_255(*ptr) || (cb->ctypes[*ptr] & ctype_word) == 0)
- {
- *errorcodeptr = ERR28; /* Assertion expected */
- goto FAILED;
- }
- name = ptr;
- /* Increment ptr, set namelen, check length */
- READ_NAME(ctype_word, ERR48, *errorcodeptr);
- if (lengthptr != NULL) skipunits += IMM2_SIZE;
- }
-
- /* Check the terminator */
- if ((terminator > 0 && *ptr++ != (PCRE2_UCHAR)terminator) ||
- *ptr++ != CHAR_RIGHT_PARENTHESIS)
- {
- ptr--; /* Error offset */
- *errorcodeptr = ERR26; /* Malformed number or name */
- goto FAILED;
+ if (repeat_max != REPEAT_UNLIMITED) repeat_max--;
}
- /* Do no further checking in the pre-compile phase. */
+ /* If the minimum is greater than zero, replicate the group as many
+ times as necessary, and adjust the maximum to the number of subsequent
+ copies that we need. */
- if (lengthptr != NULL) break;
-
- /* In the real compile we do the work of looking for the actual
- reference. If refsign is not negative, it means we have a number in
- recno. */
-
- if (refsign >= 0)
+ else
{
- if (recno <= 0)
- {
- *errorcodeptr = ERR35;
- goto FAILED;
- }
- if (refsign != 0) recno = (refsign == CHAR_MINUS)?
- (cb->bracount + 1) - recno : recno + cb->bracount;
- if (recno <= 0 || (uint32_t)recno > cb->final_bracount)
+ if (repeat_min > 1)
{
- *errorcodeptr = ERR15;
- goto FAILED;
- }
- PUT2(code, 2+LINK_SIZE, recno);
- if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno;
- break;
- }
-
- /* Otherwise look for the name. */
+ /* In the pre-compile phase, we don't actually do the replication.
+ We just adjust the length as if we had. Do some paranoid checks for
+ potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
+ integer type when available, otherwise double. */
- slot = cb->name_table;
- for (i = 0; i < cb->names_found; i++)
- {
- if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) == 0) break;
- slot += cb->name_entry_size;
- }
-
- /* Found the named subpattern. If the name is duplicated, add one to
- the opcode to change CREF/RREF into DNCREF/DNRREF and insert
- appropriate data values. Otherwise, just insert the unique subpattern
- number. */
+ if (lengthptr != NULL)
+ {
+ PCRE2_SIZE delta = (repeat_min - 1)*length_prevgroup;
+ if ((INT64_OR_DOUBLE)(repeat_min - 1)*
+ (INT64_OR_DOUBLE)length_prevgroup >
+ (INT64_OR_DOUBLE)INT_MAX ||
+ OFLOW_MAX - *lengthptr < delta)
+ {
+ *errorcodeptr = ERR20;
+ return 0;
+ }
+ *lengthptr += delta;
+ }
- if (i < cb->names_found)
- {
- int offset = i; /* Offset of first name found */
+ /* This is compiling for real. If there is a set first code unit
+ for the group, and we have not yet set a "required code unit", set
+ it. */
- count = 0;
- for (;;)
- {
- recno = GET2(slot, 0); /* Number for last found */
- if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno;
- count++;
- if (++i >= cb->names_found) break;
- slot += cb->name_entry_size;
- if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) != 0 ||
- (slot+IMM2_SIZE)[namelen] != 0) break;
+ else
+ {
+ if (groupsetfirstcu && reqcuflags < 0)
+ {
+ reqcu = firstcu;
+ reqcuflags = firstcuflags;
+ }
+ for (i = 1; (uint32_t)i < repeat_min; i++)
+ {
+ memcpy(code, previous, CU2BYTES(len));
+ code += len;
+ }
+ }
}
- if (count > 1)
- {
- PUT2(code, 2+LINK_SIZE, offset);
- PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count);
- skipunits += IMM2_SIZE;
- code[1+LINK_SIZE]++;
- }
- else /* Not a duplicated name */
- {
- PUT2(code, 2+LINK_SIZE, recno);
- }
+ if (repeat_max != REPEAT_UNLIMITED) repeat_max -= repeat_min;
}
- /* If terminator == CHAR_NULL it means that the name followed directly
- after the opening parenthesis [e.g. (?(abc)...] and in this case there
- are some further alternatives to try. For the cases where terminator !=
- CHAR_NULL [things like (?(<name>... or (?('name')... or (?(R&name)... ]
- we have now checked all the possibilities, so give an error. */
+ /* This code is common to both the zero and non-zero minimum cases. If
+ the maximum is limited, it replicates the group in a nested fashion,
+ remembering the bracket starts on a stack. In the case of a zero
+ minimum, the first one was set up above. In all cases the repeat_max
+ now specifies the number of additional copies needed. Again, we must
+ remember to replicate entries on the forward reference list. */
- else if (terminator != CHAR_NULL)
+ if (repeat_max != REPEAT_UNLIMITED)
{
- *errorcodeptr = ERR15;
- goto FAILED;
- }
-
- /* Check for (?(R) for recursion. Allow digits after R to specify a
- specific group number. */
+ /* In the pre-compile phase, we don't actually do the replication. We
+ just adjust the length as if we had. For each repetition we must add
+ 1 to the length for BRAZERO and for all but the last repetition we
+ must add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
+ paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type
+ is a 64-bit integer type when available, otherwise double. */
- else if (*name == CHAR_R)
- {
- recno = 0;
- for (i = 1; i < namelen; i++)
+ if (lengthptr != NULL && repeat_max > 0)
{
- if (!IS_DIGIT(name[i]))
- {
- *errorcodeptr = ERR15; /* Non-existent subpattern */
- goto FAILED;
- }
- if (recno > INT_MAX / 10 - 1) /* Integer overflow */
+ PCRE2_SIZE delta = repeat_max*(length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
+ 2 - 2*LINK_SIZE; /* Last one doesn't nest */
+ if ((INT64_OR_DOUBLE)repeat_max *
+ (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
+ > (INT64_OR_DOUBLE)INT_MAX ||
+ OFLOW_MAX - *lengthptr < delta)
{
- *errorcodeptr = ERR61;
- goto FAILED;
+ *errorcodeptr = ERR20;
+ return 0;
}
- recno = recno * 10 + name[i] - CHAR_0;
+ *lengthptr += delta;
}
- if (recno == 0) recno = RREF_ANY;
- code[1+LINK_SIZE] = OP_RREF; /* Change test type */
- PUT2(code, 2+LINK_SIZE, recno);
- }
- /* Similarly, check for the (?(DEFINE) "condition", which is always
- false. During compilation we set OP_DEFINE to distinguish this from
- other OP_FALSE conditions so that it can be checked for having only one
- branch, but after that the opcode is changed to OP_FALSE. */
+ /* This is compiling for real */
- else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0)
- {
- code[1+LINK_SIZE] = OP_DEFINE;
- skipunits = 1;
- }
+ else for (i = repeat_max - 1; i >= 0; i--)
+ {
+ *code++ = OP_BRAZERO + repeat_type;
- /* Reference to an unidentified subpattern. */
-
- else
- {
- *errorcodeptr = ERR15;
- goto FAILED;
- }
- break;
+ /* All but the final copy start a new nesting, maintaining the
+ chain of brackets outstanding. */
+ if (i != 0)
+ {
+ int linkoffset;
+ *code++ = OP_BRA;
+ linkoffset = (bralink == NULL)? 0 : (int)(code - bralink);
+ bralink = code;
+ PUTINC(code, 0, linkoffset);
+ }
- /* ------------------------------------------------------------ */
- case CHAR_EQUALS_SIGN: /* Positive lookahead */
- bravalue = OP_ASSERT;
- cb->assert_depth += 1;
- ptr++;
- break;
+ memcpy(code, previous, CU2BYTES(len));
+ code += len;
+ }
- /* Optimize (?!) to (*FAIL) unless it is quantified - which is a weird
- thing to do, but Perl allows all assertions to be quantified, and when
- they contain capturing parentheses there may be a potential use for
- this feature. Not that that applies to a quantified (?!) but we allow
- it for uniformity. */
+ /* Now chain through the pending brackets, and fill in their length
+ fields (which are holding the chain links pro tem). */
- /* ------------------------------------------------------------ */
- case CHAR_EXCLAMATION_MARK: /* Negative lookahead */
- ptr++;
- if (*ptr == CHAR_RIGHT_PARENTHESIS && ptr[1] != CHAR_ASTERISK &&
- ptr[1] != CHAR_PLUS && ptr[1] != CHAR_QUESTION_MARK &&
- (ptr[1] != CHAR_LEFT_CURLY_BRACKET || !is_counted_repeat(ptr+2)))
- {
- *code++ = OP_FAIL;
- previous = NULL;
- continue;
+ while (bralink != NULL)
+ {
+ int oldlinkoffset;
+ int linkoffset = (int)(code - bralink + 1);
+ PCRE2_UCHAR *bra = code - linkoffset;
+ oldlinkoffset = GET(bra, 1);
+ bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
+ *code++ = OP_KET;
+ PUTINC(code, 0, linkoffset);
+ PUT(bra, 1, linkoffset);
+ }
}
- bravalue = OP_ASSERT_NOT;
- cb->assert_depth += 1;
- break;
+ /* If the maximum is unlimited, set a repeater in the final copy. For
+ ONCE brackets, that's all we need to do. However, possessively repeated
+ ONCE brackets can be converted into non-capturing brackets, as the
+ behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
+ deal with possessive ONCEs specially.
+
+ Otherwise, when we are doing the actual compile phase, check to see
+ whether this group is one that could match an empty string. If so,
+ convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
+ that runtime checking can be done. [This check is also applied to ONCE
+ groups at runtime, but in a different way.]
+
+ Then, if the quantifier was possessive and the bracket is not a
+ conditional, we convert the BRA code to the POS form, and the KET code to
+ KETRPOS. (It turns out to be convenient at runtime to detect this kind of
+ subpattern at both the start and at the end.) The use of special opcodes
+ makes it possible to reduce greatly the stack usage in pcre2_match(). If
+ the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
+
+ Then, if the minimum number of matches is 1 or 0, cancel the possessive
+ flag so that the default action below, of wrapping everything inside
+ atomic brackets, does not happen. When the minimum is greater than 1,
+ there will be earlier copies of the group, and so we still have to wrap
+ the whole thing. */
- /* ------------------------------------------------------------ */
- case CHAR_LESS_THAN_SIGN: /* Lookbehind or named define */
- switch (ptr[1])
+ else
{
- case CHAR_EQUALS_SIGN: /* Positive lookbehind */
- bravalue = OP_ASSERTBACK;
- cb->assert_depth += 1;
- ptr += 2;
- break;
+ PCRE2_UCHAR *ketcode = code - 1 - LINK_SIZE;
+ PCRE2_UCHAR *bracode = ketcode - GET(ketcode, 1);
- case CHAR_EXCLAMATION_MARK: /* Negative lookbehind */
- bravalue = OP_ASSERTBACK_NOT;
- cb->assert_depth += 1;
- ptr += 2;
- break;
+ /* Convert possessive ONCE brackets to non-capturing */
- /* Must be a name definition - as the syntax was checked in the
- pre-pass, we can assume here that it is valid. Skip over the name
- and go to handle the numbered group. */
-
- default:
- while (*(++ptr) != CHAR_GREATER_THAN_SIGN);
- ptr++;
- goto NUMBERED_GROUP;
- }
- break;
-
-
- /* ------------------------------------------------------------ */
- case CHAR_GREATER_THAN_SIGN: /* One-time brackets */
- bravalue = OP_ONCE;
- ptr++;
- break;
+ if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA;
+ /* For non-possessive ONCE brackets, all we need to do is to
+ set the KET. */
- /* ------------------------------------------------------------ */
- case CHAR_C: /* Callout */
- previous_callout = code; /* Save for later completion */
- after_manual_callout = 1; /* Skip one item before completing */
- ptr++; /* Character after (?C */
+ if (*bracode == OP_ONCE) *ketcode = OP_KETRMAX + repeat_type;
- /* A callout may have a string argument, delimited by one of a fixed
- number of characters, or an undelimited numerical argument, or no
- argument, which is the same as (?C0). Different opcodes are used for
- the two cases. */
-
- if (*ptr != CHAR_RIGHT_PARENTHESIS && !IS_DIGIT(*ptr))
- {
- uint32_t delimiter = 0;
+ /* Handle non-ONCE brackets and possessive ONCEs (which have been
+ converted to non-capturing above). */
- for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
+ else
{
- if (*ptr == PRIV(callout_start_delims)[i])
+ /* In the compile phase, adjust the opcode if the group can match
+ an empty string. For a conditional group with only one branch, the
+ value of group_return will not show "could be empty", so we must
+ check that separately. */
+
+ if (lengthptr == NULL)
{
- delimiter = PRIV(callout_end_delims)[i];
- break;
+ if (group_return < 0) *bracode += OP_SBRA - OP_BRA;
+ if (*bracode == OP_COND && bracode[GET(bracode,1)] != OP_ALT)
+ *bracode = OP_SCOND;
}
- }
- if (delimiter == 0)
- {
- *errorcodeptr = ERR82;
- goto FAILED;
- }
+ /* Handle possessive quantifiers. */
- /* During the pre-compile phase, we parse the string and update the
- length. There is no need to generate any code. (In fact, the string
- has already been parsed in the pre-pass that looks for named
- parentheses, but it does no harm to leave this code in.) */
-
- if (lengthptr != NULL) /* Only check the string */
- {
- PCRE2_SPTR start = ptr;
- do
+ if (possessive_quantifier)
{
- if (++ptr >= cb->end_pattern)
+ /* For COND brackets, we wrap the whole thing in a possessively
+ repeated non-capturing bracket, because we have not invented POS
+ versions of the COND opcodes. */
+
+ if (*bracode == OP_COND || *bracode == OP_SCOND)
{
- *errorcodeptr = ERR81;
- ptr = start; /* To give a more useful message */
- goto FAILED;
+ int nlen = (int)(code - bracode);
+ memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
+ code += 1 + LINK_SIZE;
+ nlen += 1 + LINK_SIZE;
+ *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS;
+ *code++ = OP_KETRPOS;
+ PUTINC(code, 0, nlen);
+ PUT(bracode, 1, nlen);
}
- if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2;
- }
- while (ptr[0] != delimiter);
-
- /* Start points to the opening delimiter, ptr points to the
- closing delimiter. We must allow for including the delimiter and
- for the terminating zero. Any doubled delimiters within the string
- make this an overestimate, but it is not worth bothering about. */
-
- (*lengthptr) += (ptr - start) + 2 + (1 + 4*LINK_SIZE);
- }
- /* In the real compile we can copy the string, knowing that it is
- syntactically OK. The starting delimiter is included so that the
- client can discover it if they want. We also pass the start offset to
- help a script language give better error messages. */
+ /* For non-COND brackets, we modify the BRA code and use KETRPOS. */
- else
- {
- PCRE2_UCHAR *callout_string = code + (1 + 4*LINK_SIZE);
- *callout_string++ = *ptr++;
- PUT(code, 1 + 3*LINK_SIZE, (int)(ptr - cb->start_pattern)); /* Start offset */
- for(;;)
- {
- if (*ptr == delimiter)
+ else
{
- if (ptr[1] == delimiter) ptr++; else break;
+ *bracode += 1; /* Switch to xxxPOS opcodes */
+ *ketcode = OP_KETRPOS;
}
- *callout_string++ = *ptr++;
- }
- *callout_string++ = CHAR_NULL;
- code[0] = OP_CALLOUT_STR;
- PUT(code, 1, (int)(ptr + 2 - cb->start_pattern)); /* Next offset */
- PUT(code, 1 + LINK_SIZE, 0); /* Default length */
- PUT(code, 1 + 2*LINK_SIZE, /* Compute size */
- (int)(callout_string - code));
- code = callout_string;
- }
- /* Advance to what should be the closing parenthesis, which is
- checked below. */
+ /* If the minimum is zero, mark it as possessive, then unset the
+ possessive flag when the minimum is 0 or 1. */
- ptr++;
- }
-
- /* Handle a callout with an optional numerical argument, which must be
- less than or equal to 255. A missing argument gives 0. */
-
- else
- {
- int n = 0;
- code[0] = OP_CALLOUT; /* Numerical callout */
- while (IS_DIGIT(*ptr))
- {
- n = n * 10 + *ptr++ - CHAR_0;
- if (n > 255)
- {
- *errorcodeptr = ERR38;
- goto FAILED;
+ if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
+ if (repeat_min < 2) possessive_quantifier = FALSE;
}
- }
- PUT(code, 1, (int)(ptr - cb->start_pattern + 1)); /* Next offset */
- PUT(code, 1 + LINK_SIZE, 0); /* Default length */
- code[1 + 2*LINK_SIZE] = n; /* Callout number */
- code += PRIV(OP_lengths)[OP_CALLOUT];
- }
- /* Both formats must have a closing parenthesis */
+ /* Non-possessive quantifier */
- if (*ptr != CHAR_RIGHT_PARENTHESIS)
- {
- *errorcodeptr = ERR39;
- goto FAILED;
+ else *ketcode = OP_KETRMAX + repeat_type;
+ }
}
+ }
+ break;
- /* Callouts cannot be quantified. */
+ /* If previous was a character type match (\d or similar), abolish it and
+ create a suitable repeat item. The code is shared with single-character
+ repeats by setting op_type to add a suitable offset into repeat_type.
+ Note the the Unicode property types will be present only when
+ SUPPORT_UNICODE is defined, but we don't wrap the little bits of code
+ here because it just makes it horribly messy. */
- previous = NULL;
- continue;
+ default:
+ if (op_previous >= OP_EODN) /* Not a character type - internal error */
+ {
+ *errorcodeptr = ERR10;
+ return 0;
+ }
+ else
+ {
+ int prop_type, prop_value;
+ PCRE2_UCHAR *oldcode;
+ op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
+ mclength = 0; /* Not a character */
- /* ------------------------------------------------------------ */
- case CHAR_P: /* Python-style named subpattern handling */
- if (*(++ptr) == CHAR_EQUALS_SIGN ||
- *ptr == CHAR_GREATER_THAN_SIGN) /* Reference or recursion */
+ if (op_previous == OP_PROP || op_previous == OP_NOTPROP)
{
- is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;
- terminator = CHAR_RIGHT_PARENTHESIS;
- goto NAMED_REF_OR_RECURSE;
+ prop_type = previous[1];
+ prop_value = previous[2];
}
- else if (*ptr != CHAR_LESS_THAN_SIGN) /* Test for Python-style defn */
+ else
{
- *errorcodeptr = ERR41;
- goto FAILED;
+ /* Come here from just above with a character in mcbuffer/mclength. */
+ OUTPUT_SINGLE_REPEAT:
+ prop_type = prop_value = -1;
}
- /* Fall through to handle (?P< as (?< is handled */
+ /* At this point, if prop_type == prop_value == -1 we either have a
+ character in mcbuffer when mclength is greater than zero, or we have
+ mclength zero, in which case there is a non-property character type in
+ op_previous. If prop_type/value are not negative, we have a property
+ character type in op_previous. */
- /* ------------------------------------------------------------ */
- case CHAR_APOSTROPHE: /* Define a name - note fall through above */
+ oldcode = code; /* Save where we were */
+ code = previous; /* Usually overwrite previous item */
- /* The syntax was checked and the list of names was set up in the
- pre-pass, so there is nothing to be done now except to skip over the
- name. */
+ /* If the maximum is zero then the minimum must also be zero; Perl allows
+ this case, so we do too - by simply omitting the item altogether. */
- terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
- CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
- while (*(++ptr) != (unsigned int)terminator);
- ptr++;
- goto NUMBERED_GROUP; /* Set up numbered group */
+ if (repeat_max == 0) goto END_REPEAT;
+ /* Combine the op_type with the repeat_type */
- /* ------------------------------------------------------------ */
- case CHAR_AMPERSAND: /* Perl recursion/subroutine syntax */
- terminator = CHAR_RIGHT_PARENTHESIS;
- is_recurse = TRUE;
- /* Fall through */
+ repeat_type += op_type;
- /* We come here from the Python syntax above that handles both
- references (?P=name) and recursion (?P>name), as well as falling
- through from the Perl recursion syntax (?&name). We also come here from
- the Perl \k<name> or \k'name' back reference syntax and the \k{name}
- .NET syntax, and the Oniguruma \g<...> and \g'...' subroutine syntax. */
+ /* A minimum of zero is handled either as the special case * or ?, or as
+ an UPTO, with the maximum given. */
- NAMED_REF_OR_RECURSE:
- name = ++ptr;
- if (IS_DIGIT(*ptr))
+ if (repeat_min == 0)
{
- *errorcodeptr = ERR44; /* Group name must start with non-digit */
- goto FAILED;
+ if (repeat_max == REPEAT_UNLIMITED) *code++ = OP_STAR + repeat_type;
+ else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
+ else
+ {
+ *code++ = OP_UPTO + repeat_type;
+ PUT2INC(code, 0, repeat_max);
+ }
}
- /* Increment ptr, set namelen, check length */
- READ_NAME(ctype_word, ERR48, *errorcodeptr);
- /* In the pre-compile phase, do a syntax check. */
+ /* A repeat minimum of 1 is optimized into some special cases. If the
+ maximum is unlimited, we use OP_PLUS. Otherwise, the original item is
+ left in place and, if the maximum is greater than 1, we use OP_UPTO with
+ one less than the maximum. */
- if (lengthptr != NULL)
+ else if (repeat_min == 1)
{
- if (namelen == 0)
- {
- *errorcodeptr = ERR62;
- goto FAILED;
- }
- if (*ptr != (PCRE2_UCHAR)terminator)
+ if (repeat_max == REPEAT_UNLIMITED)
+ *code++ = OP_PLUS + repeat_type;
+ else
{
- *errorcodeptr = ERR42;
- goto FAILED;
+ code = oldcode; /* Leave previous item in place */
+ if (repeat_max == 1) goto END_REPEAT;
+ *code++ = OP_UPTO + repeat_type;
+ PUT2INC(code, 0, repeat_max - 1);
}
}
- /* Scan the list of names generated in the pre-pass in order to get
- a number and whether or not this name is duplicated. */
+ /* The case {n,n} is just an EXACT, while the general case {n,m} is
+ handled as an EXACT followed by an UPTO or STAR or QUERY. */
- recno = 0;
- is_dupname = FALSE;
- ng = cb->named_groups;
-
- for (i = 0; i < cb->names_found; i++, ng++)
+ else
{
- if (namelen == ng->length &&
- PRIV(strncmp)(name, ng->name, namelen) == 0)
- {
- open_capitem *oc;
- is_dupname = ng->isdup;
- recno = ng->number;
-
- /* For a recursion, that's all that is needed. We can now go to the
- code that handles numerical recursion. */
+ *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */
+ PUT2INC(code, 0, repeat_min);
- if (is_recurse) goto HANDLE_RECURSION;
+ /* Unless repeat_max equals repeat_min, fill in the data for EXACT,
+ and then generate the second opcode. For a repeated Unicode property
+ match, there are two extra values that define the required property,
+ and mclength is set zero to indicate this. */
- /* For a back reference, update the back reference map and the
- maximum back reference. Then for each group we must check to see if
- it is recursive, that is, it is inside the group that it
- references. A flag is set so that the group can be made atomic. */
-
- cb->backref_map |= (recno < 32)? (1u << recno) : 1;
- if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno;
-
- for (oc = cb->open_caps; oc != NULL; oc = oc->next)
+ if (repeat_max != repeat_min)
+ {
+ if (mclength > 0)
{
- if (oc->number == recno)
+ memcpy(code, mcbuffer, CU2BYTES(mclength));
+ code += mclength;
+ }
+ else
+ {
+ *code++ = op_previous;
+ if (prop_type >= 0)
{
- oc->flag = TRUE;
- break;
+ *code++ = prop_type;
+ *code++ = prop_value;
}
}
- }
- }
- /* If the name was not found we have a bad reference. */
+ /* Now set up the following opcode */
- if (recno == 0)
- {
- *errorcodeptr = ERR15;
- goto FAILED;
- }
-
- /* If a back reference name is not duplicated, we can handle it as a
- numerical reference. */
-
- if (!is_dupname) goto HANDLE_REFERENCE;
-
- /* If a back reference name is duplicated, we generate a different
- opcode to a numerical back reference. In the second pass we must search
- for the index and count in the final name table. */
-
- count = 0;
- index = 0;
-
- if (lengthptr == NULL)
- {
- slot = cb->name_table;
- for (i = 0; i < cb->names_found; i++)
- {
- if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) == 0 &&
- slot[IMM2_SIZE+namelen] == 0)
+ if (repeat_max == REPEAT_UNLIMITED)
+ *code++ = OP_STAR + repeat_type;
+ else
{
- if (count == 0) index = i;
- count++;
+ repeat_max -= repeat_min;
+ if (repeat_max == 1)
+ {
+ *code++ = OP_QUERY + repeat_type;
+ }
+ else
+ {
+ *code++ = OP_UPTO + repeat_type;
+ PUT2INC(code, 0, repeat_max);
+ }
}
- slot += cb->name_entry_size;
- }
-
- if (count == 0)
- {
- *errorcodeptr = ERR15;
- goto FAILED;
}
}
- if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
- previous = code;
- *code++ = ((options & PCRE2_CASELESS) != 0)? OP_DNREFI : OP_DNREF;
- PUT2INC(code, 0, index);
- PUT2INC(code, 0, count);
- continue; /* End of back ref handling */
+ /* Fill in the character or character type for the final opcode. */
-
- /* ------------------------------------------------------------ */
- case CHAR_R: /* Recursion, same as (?0) */
- recno = 0;
- if (*(++ptr) != CHAR_RIGHT_PARENTHESIS)
+ if (mclength > 0)
{
- *errorcodeptr = ERR29;
- goto FAILED;
+ memcpy(code, mcbuffer, CU2BYTES(mclength));
+ code += mclength;
}
- goto HANDLE_RECURSION;
-
-
- /* ------------------------------------------------------------ */
- case CHAR_MINUS: case CHAR_PLUS: /* Recursion or subroutine */
- case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
- case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
+ else
{
- terminator = CHAR_RIGHT_PARENTHESIS;
-
- /* Come here from the \g<...> and \g'...' code (Oniguruma
- compatibility). However, the syntax has been checked to ensure that
- the ... are a (signed) number, so that neither ERR63 nor ERR29 will
- be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY
- ever be taken. */
-
- HANDLE_NUMERICAL_RECURSION:
-
- if ((refsign = *ptr) == CHAR_PLUS)
- {
- ptr++;
- if (!IS_DIGIT(*ptr))
- {
- *errorcodeptr = ERR63;
- goto FAILED;
- }
- }
- else if (refsign == CHAR_MINUS)
+ *code++ = op_previous;
+ if (prop_type >= 0)
{
- if (!IS_DIGIT(ptr[1]))
- goto OTHER_CHAR_AFTER_QUERY;
- ptr++;
+ *code++ = prop_type;
+ *code++ = prop_value;
}
+ }
+ }
+ break;
+ } /* End of switch on different op_previous values */
- recno = 0;
- while (IS_DIGIT(*ptr))
- {
- if (recno > INT_MAX / 10 - 1) /* Integer overflow */
- {
- while (IS_DIGIT(*ptr)) ptr++;
- *errorcodeptr = ERR61;
- goto FAILED;
- }
- recno = recno * 10 + *ptr++ - CHAR_0;
- }
- if (*ptr != (PCRE2_UCHAR)terminator)
- {
- *errorcodeptr = ERR29;
- goto FAILED;
- }
+ /* If the character following a repeat is '+', possessive_quantifier is
+ TRUE. For some opcodes, there are special alternative opcodes for this
+ case. For anything else, we wrap the entire repeated item inside OP_ONCE
+ brackets. Logically, the '+' notation is just syntactic sugar, taken from
+ Sun's Java package, but the special opcodes can optimize it.
- if (refsign == CHAR_MINUS)
- {
- if (recno == 0)
- {
- *errorcodeptr = ERR58;
- goto FAILED;
- }
- recno = (int)(cb->bracount + 1) - recno;
- if (recno <= 0)
- {
- *errorcodeptr = ERR15;
- goto FAILED;
- }
- }
- else if (refsign == CHAR_PLUS)
- {
- if (recno == 0)
- {
- *errorcodeptr = ERR58;
- goto FAILED;
- }
- recno += cb->bracount;
- }
+ Some (but not all) possessively repeated subpatterns have already been
+ completely handled in the code just above. For them, possessive_quantifier
+ is always FALSE at this stage. Note that the repeated item starts at
+ tempcode, not at previous, which might be the first part of a string whose
+ (former) last char we repeated. */
- if ((uint32_t)recno > cb->final_bracount)
- {
- *errorcodeptr = ERR15;
- goto FAILED;
- }
+ if (possessive_quantifier)
+ {
+ int len;
- /* Come here from code above that handles a named recursion.
- We insert the number of the called group after OP_RECURSE. At the
- end of compiling the pattern is scanned and these numbers are
- replaced by offsets within the pattern. It is done like this to avoid
- problems with forward references and adjusting offsets when groups
- are duplicated and moved (as discovered in previous implementations).
- Note that a recursion does not have a set first character (relevant
- if it is repeated, because it will then be wrapped with ONCE
- brackets). */
-
- HANDLE_RECURSION:
- previous = code;
- *code = OP_RECURSE;
- PUT(code, 1, recno);
- code += 1 + LINK_SIZE;
- groupsetfirstcu = FALSE;
- cb->had_recurse = TRUE;
- }
+ /* Possessifying an EXACT quantifier has no effect, so we can ignore it.
+ However, QUERY, STAR, or UPTO may follow (for quantifiers such as {5,6},
+ {5,}, or {5,10}). We skip over an EXACT item; if the length of what
+ remains is greater than zero, there's a further opcode that can be
+ handled. If not, do nothing, leaving the EXACT alone. */
- /* Can't determine a first byte now */
+ switch(*tempcode)
+ {
+ case OP_TYPEEXACT:
+ tempcode += PRIV(OP_lengths)[*tempcode] +
+ ((tempcode[1 + IMM2_SIZE] == OP_PROP
+ || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
+ break;
- if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
- continue;
+ /* CHAR opcodes are used for exacts whose count is 1. */
+ case OP_CHAR:
+ case OP_CHARI:
+ case OP_NOT:
+ case OP_NOTI:
+ case OP_EXACT:
+ case OP_EXACTI:
+ case OP_NOTEXACT:
+ case OP_NOTEXACTI:
+ tempcode += PRIV(OP_lengths)[*tempcode];
+#ifdef SUPPORT_UNICODE
+ if (utf && HAS_EXTRALEN(tempcode[-1]))
+ tempcode += GET_EXTRALEN(tempcode[-1]);
+#endif
+ break;
- /* ------------------------------------------------------------ */
- default: /* Other characters: check option setting */
- OTHER_CHAR_AFTER_QUERY:
- set = unset = 0;
- optset = &set;
+ /* For the class opcodes, the repeat operator appears at the end;
+ adjust tempcode to point to it. */
- while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
- {
- switch (*ptr++)
- {
- case CHAR_MINUS: optset = &unset; break;
+ case OP_CLASS:
+ case OP_NCLASS:
+ tempcode += 1 + 32/sizeof(PCRE2_UCHAR);
+ break;
- case CHAR_J: /* Record that it changed in the external options */
- *optset |= PCRE2_DUPNAMES;
- cb->external_flags |= PCRE2_JCHANGED;
- break;
+#ifdef SUPPORT_WIDE_CHARS
+ case OP_XCLASS:
+ tempcode += GET(tempcode, 1);
+ break;
+#endif
+ }
- case CHAR_i: *optset |= PCRE2_CASELESS; break;
- case CHAR_m: *optset |= PCRE2_MULTILINE; break;
- case CHAR_s: *optset |= PCRE2_DOTALL; break;
- case CHAR_x: *optset |= PCRE2_EXTENDED; break;
- case CHAR_U: *optset |= PCRE2_UNGREEDY; break;
+ /* If tempcode is equal to code (which points to the end of the repeated
+ item), it means we have skipped an EXACT item but there is no following
+ QUERY, STAR, or UPTO; the value of len will be 0, and we do nothing. In
+ all other cases, tempcode will be pointing to the repeat opcode, and will
+ be less than code, so the value of len will be greater than 0. */
- default: *errorcodeptr = ERR11;
- ptr--; /* Correct the offset */
- goto FAILED;
- }
- }
+ len = (int)(code - tempcode);
+ if (len > 0)
+ {
+ unsigned int repcode = *tempcode;
- /* Set up the changed option bits, but don't change anything yet. */
+ /* There is a table for possessifying opcodes, all of which are less
+ than OP_CALLOUT. A zero entry means there is no possessified version.
+ */
- newoptions = (options | set) & (~unset);
+ if (repcode < OP_CALLOUT && opcode_possessify[repcode] > 0)
+ *tempcode = opcode_possessify[repcode];
- /* If the options ended with ')' this is not the start of a nested
- group with option changes, so the options change at this level. They
- must also be passed back for use in subsequent branches. Reset the
- greedy defaults and the case value for firstcu and reqcu. */
+ /* For opcode without a special possessified version, wrap the item in
+ ONCE brackets. */
- if (*ptr == CHAR_RIGHT_PARENTHESIS)
+ else
{
- *optionsptr = options = newoptions;
- greedy_default = ((newoptions & PCRE2_UNGREEDY) != 0);
- greedy_non_default = greedy_default ^ 1;
- req_caseopt = ((newoptions & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
- previous = NULL; /* This item can't be repeated */
- continue; /* It is complete */
+ memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
+ code += 1 + LINK_SIZE;
+ len += 1 + LINK_SIZE;
+ tempcode[0] = OP_ONCE;
+ *code++ = OP_KET;
+ PUTINC(code, 0, len);
+ PUT(tempcode, 1, len);
}
+ }
+ }
- /* If the options ended with ':' we are heading into a nested group
- with possible change of options. Such groups are non-capturing and are
- not assertions of any kind. All we need to do is skip over the ':';
- the newoptions value is handled below. */
-
- bravalue = OP_BRA;
- ptr++;
- } /* End of switch for character following (? */
- } /* End of (? handling */
+ /* We set the "follows varying string" flag for subsequently encountered
+ reqcus if it isn't already set and we have just passed a varying length
+ item. */
- /* Opening parenthesis not followed by '*' or '?'. If PCRE2_NO_AUTO_CAPTURE
- is set, all unadorned brackets become non-capturing and behave like (?:...)
- brackets. */
+ END_REPEAT:
+ cb->req_varyopt |= reqvary;
+ break;
- else if ((options & PCRE2_NO_AUTO_CAPTURE) != 0)
- {
- bravalue = OP_BRA;
- }
- /* Else we have a capturing group. */
+ /* ===================================================================*/
+ /* Handle a 32-bit data character with a value greater than META_END. */
- else
- {
- NUMBERED_GROUP:
- cb->bracount += 1;
- PUT2(code, 1+LINK_SIZE, cb->bracount);
- skipunits = IMM2_SIZE;
- }
+ case META_BIGVALUE:
+ pptr++;
+ goto NORMAL_CHAR;
- /* Process nested bracketed regex. First check for parentheses nested too
- deeply. */
- if ((cb->parens_depth += 1) > (int)(cb->cx->parens_nest_limit))
- {
- *errorcodeptr = ERR19;
- goto FAILED;
- }
+ /* ===============================================================*/
+ /* Handle a back reference by number, which is the meta argument. The
+ pattern offsets for back references to group numbers less than 10 are held
+ in a special vector, to avoid using more than two parsed pattern elements
+ in 64-bit environments. We only need the offset to the first occurrence,
+ because if that doesn't fail, subsequent ones will also be OK. */
- /* All assertions used not to be repeatable, but this was changed for Perl
- compatibility. All kinds can now be repeated except for assertions that are
- conditions (Perl also forbids these to be repeated). We copy code into a
- non-register variable (tempcode) in order to be able to pass its address
- because some compilers complain otherwise. At the start of a conditional
- group whose condition is an assertion, cb->iscondassert is set. We unset it
- here so as to allow assertions later in the group to be quantified. */
+ case META_BACKREF:
+ if (meta_arg < 10) offset = cb->small_ref_offset[meta_arg];
+ else GETPLUSOFFSET(offset, pptr);
- if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT &&
- cb->iscondassert)
- {
- previous = NULL;
- cb->iscondassert = FALSE;
- }
- else
+ if (meta_arg > cb->bracount)
{
- previous = code;
+ cb->erroroffset = offset;
+ *errorcodeptr = ERR15; /* Non-existent subpattern */
+ return 0;
}
- *code = bravalue;
- tempcode = code;
- tempreqvary = cb->req_varyopt; /* Save value before bracket */
- tempbracount = cb->bracount; /* Save value before bracket */
- length_prevgroup = 0; /* Initialize for pre-compile phase */
-
- if (!compile_regex(
- newoptions, /* The complete new option state */
- &tempcode, /* Where to put code (updated) */
- &ptr, /* Input pointer (updated) */
- errorcodeptr, /* Where to put an error message */
- (bravalue == OP_ASSERTBACK ||
- bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
- reset_bracount, /* True if (?| group */
- skipunits, /* Skip over bracket number */
- cond_depth +
- ((bravalue == OP_COND)?1:0), /* Depth of condition subpatterns */
- &subfirstcu, /* For possible first char */
- &subfirstcuflags,
- &subreqcu, /* For possible last char */
- &subreqcuflags,
- bcptr, /* Current branch chain */
- cb, /* Compile data block */
- (lengthptr == NULL)? NULL : /* Actual compile phase */
- &length_prevgroup /* Pre-compile phase */
- ))
- goto FAILED;
-
- cb->parens_depth -= 1;
-
- /* If this was an atomic group and there are no capturing groups within it,
- generate OP_ONCE_NC instead of OP_ONCE. */
+ /* Come here from named backref handling when the reference is to a
+ single group (that is, not to a duplicated name). The back reference
+ data will have already been updated. We must disable firstcu if not
+ set, to cope with cases like (?=(\w+))\1: which would otherwise set ':'
+ later. */
- if (bravalue == OP_ONCE && cb->bracount <= tempbracount)
- *code = OP_ONCE_NC;
+ HANDLE_SINGLE_REFERENCE:
+ if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
+ *code++ = ((options & PCRE2_CASELESS) != 0)? OP_REFI : OP_REF;
+ PUT2INC(code, 0, meta_arg);
- if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
- cb->assert_depth -= 1;
+ /* Update the map of back references, and keep the highest one. We
+ could do this in parse_regex() for numerical back references, but not
+ for named back references, because we don't know the numbers to which
+ named back references refer. So we do it all in this function. */
- /* At the end of compiling, code is still pointing to the start of the
- group, while tempcode has been updated to point past the end of the group.
- The pattern pointer (ptr) is on the bracket.
+ cb->backref_map |= (meta_arg < 32)? (1u << meta_arg) : 1;
+ if (meta_arg > cb->top_backref) cb->top_backref = meta_arg;
- If this is a conditional bracket, check that there are no more than
- two branches in the group, or just one if it's a DEFINE group. We do this
- in the real compile phase, not in the pre-pass, where the whole group may
- not be available. */
+ /* Check to see if this back reference is recursive, that it, it
+ is inside the group that it references. A flag is set so that the
+ group can be made atomic. */
- if (bravalue == OP_COND && lengthptr == NULL)
+ for (oc = cb->open_caps; oc != NULL; oc = oc->next)
{
- PCRE2_UCHAR *tc = code;
- int condcount = 0;
-
- do {
- condcount++;
- tc += GET(tc,1);
- }
- while (*tc != OP_KET);
-
- /* A DEFINE group is never obeyed inline (the "condition" is always
- false). It must have only one branch. Having checked this, change the
- opcode to OP_FALSE. */
-
- if (code[LINK_SIZE+1] == OP_DEFINE)
+ if (oc->number == meta_arg)
{
- if (condcount > 1)
- {
- *errorcodeptr = ERR54;
- goto FAILED;
- }
- code[LINK_SIZE+1] = OP_FALSE;
- bravalue = OP_DEFINE; /* Just a flag to suppress char handling below */
- }
-
- /* A "normal" conditional group. If there is just one branch, we must not
- make use of its firstcu or reqcu, because this is equivalent to an
- empty second branch. */
-
- else
- {
- if (condcount > 2)
- {
- *errorcodeptr = ERR27;
- goto FAILED;
- }
- if (condcount == 1) subfirstcuflags = subreqcuflags = REQ_NONE;
+ oc->flag = TRUE;
+ break;
}
}
+ break;
- /* At the end of a group, it's an error if we hit end of pattern or
- any non-closing parenthesis. This check also happens in the pre-scan,
- so should not trigger here, but leave this code as an insurance. */
-
- if (*ptr != CHAR_RIGHT_PARENTHESIS)
- {
- *errorcodeptr = ERR14;
- goto FAILED;
- }
- /* In the pre-compile phase, update the length by the length of the group,
- less the brackets at either end. Then reduce the compiled code to just a
- set of non-capturing brackets so that it doesn't use much memory if it is
- duplicated by a quantifier.*/
+ /* ===============================================================*/
+ /* Handle recursion by inserting the number of the called group (which is
+ the meta argument) after OP_RECURSE. At the end of compiling the pattern is
+ scanned and these numbers are replaced by offsets within the pattern. It is
+ done like this to avoid problems with forward references and adjusting
+ offsets when groups are duplicated and moved (as discovered in previous
+ implementations). Note that a recursion does not have a set first character
+ (relevant if it is repeated, because it will then be wrapped with ONCE
+ brackets). */
- if (lengthptr != NULL)
+ case META_RECURSE:
+ GETPLUSOFFSET(offset, pptr);
+ if (meta_arg > cb->bracount)
{
- if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE)
- {
- *errorcodeptr = ERR20;
- goto FAILED;
- }
- *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;
- code++; /* This already contains bravalue */
- PUTINC(code, 0, 1 + LINK_SIZE);
- *code++ = OP_KET;
- PUTINC(code, 0, 1 + LINK_SIZE);
- break; /* No need to waste time with special character handling */
+ cb->erroroffset = offset;
+ *errorcodeptr = ERR15; /* Non-existent subpattern */
+ return 0;
}
-
- /* Otherwise update the main code pointer to the end of the group. */
-
- code = tempcode;
-
- /* For a DEFINE group, required and first character settings are not
- relevant. */
-
- if (bravalue == OP_DEFINE) break;
-
- /* Handle updating of the required and first characters for other types of
- group. Update for normal brackets of all kinds, and conditions with two
- branches (see code above). If the bracket is followed by a quantifier with
- zero repeat, we have to back off. Hence the definition of zeroreqcu and
- zerofirstcu outside the main loop so that they can be accessed for the
- back off. */
-
- zeroreqcu = reqcu;
- zeroreqcuflags = reqcuflags;
- zerofirstcu = firstcu;
- zerofirstcuflags = firstcuflags;
+ HANDLE_NUMERICAL_RECURSION:
+ *code = OP_RECURSE;
+ PUT(code, 1, meta_arg);
+ code += 1 + LINK_SIZE;
groupsetfirstcu = FALSE;
+ cb->had_recurse = TRUE;
+ if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
+ break;
- if (bravalue >= OP_ONCE)
- {
- /* If we have not yet set a firstcu in this branch, take it from the
- subpattern, remembering that it was set here so that a repeat of more
- than one can replicate it as reqcu if necessary. If the subpattern has
- no firstcu, set "none" for the whole branch. In both cases, a zero
- repeat forces firstcu to "none". */
-
- if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET)
- {
- if (subfirstcuflags >= 0)
- {
- firstcu = subfirstcu;
- firstcuflags = subfirstcuflags;
- groupsetfirstcu = TRUE;
- }
- else firstcuflags = REQ_NONE;
- zerofirstcuflags = REQ_NONE;
- }
-
- /* If firstcu was previously set, convert the subpattern's firstcu
- into reqcu if there wasn't one, using the vary flag that was in
- existence beforehand. */
-
- else if (subfirstcuflags >= 0 && subreqcuflags < 0)
- {
- subreqcu = subfirstcu;
- subreqcuflags = subfirstcuflags | tempreqvary;
- }
-
- /* If the subpattern set a required byte (or set a first byte that isn't
- really the first byte - see above), set it. */
- if (subreqcuflags >= 0)
- {
- reqcu = subreqcu;
- reqcuflags = subreqcuflags;
- }
- }
+ /* ===============================================================*/
+ /* Handle capturing parentheses; the number is the meta argument. */
- /* For a forward assertion, we take the reqcu, if set. This can be
- helpful if the pattern that follows the assertion doesn't set a different
- char. For example, it's useful for /(?=abcde).+/. We can't set firstcu
- for an assertion, however because it leads to incorrect effect for patterns
- such as /(?=a)a.+/ when the "real" "a" would then become a reqcu instead
- of a firstcu. This is overcome by a scan at the end if there's no
- firstcu, looking for an asserted first char. */
+ case META_CAPTURE:
+ bravalue = OP_CBRA;
+ skipunits = IMM2_SIZE;
+ PUT2(code, 1+LINK_SIZE, meta_arg);
+ cb->lastcapture = meta_arg;
+ goto GROUP_PROCESS_NOTE_EMPTY;
- else if (bravalue == OP_ASSERT && subreqcuflags >= 0)
- {
- reqcu = subreqcu;
- reqcuflags = subreqcuflags;
- }
- break; /* End of processing '(' */
+ /* ===============================================================*/
+ /* Handle escape sequence items. For ones like \d, the ESC_values are
+ arranged to be the same as the corresponding OP_values in the default case
+ when PCRE2_UCP is not set (which is the only case in which they will appear
+ here).
- /* ===================================================================*/
- /* Handle metasequences introduced by \. For ones like \d, the ESC_ values
- are arranged to be the negation of the corresponding OP_values in the
- default case when PCRE2_UCP is not set. For the back references, the values
- are negative the reference number. Only back references and those types
- that consume a character may be repeated. We can test for values between
- ESC_b and ESC_Z for the latter; this may have to change if any new ones are
- ever created.
+ Note: \Q and \E are never seen here, as they were dealt with in
+ parse_pattern(). Neither are numerical back references or recursions, which
+ were turned into META_BACKREF or META_RECURSE items, respectively. \k and
+ \g, when followed by names, are turned into META_BACKREF_BYNAME or
+ META_RECURSE_BYNAME. */
- Note: \Q and \E are handled at the start of the character-processing loop,
- not here. */
+ case META_ESCAPE:
- case CHAR_BACKSLASH:
- tempptr = ptr;
- escape = PRIV(check_escape)(&ptr, cb->end_pattern, &ec, errorcodeptr,
- options, FALSE, cb);
- if (*errorcodeptr != 0) goto FAILED;
+ /* We can test for escape sequences that consume a character because their
+ values lie between ESC_b and ESC_Z; this may have to change if any new ones
+ are ever created. For these sequences, we disable the setting of a first
+ character if it hasn't already been set. */
- if (escape == 0) /* The escape coded a single character */
- c = ec;
- else
+ if (meta_arg > ESC_b && meta_arg < ESC_Z)
{
- /* For metasequences that actually match a character, we disable the
- setting of a first character if it hasn't already been set. */
-
- if (firstcuflags == REQ_UNSET && escape > ESC_b && escape < ESC_Z)
- firstcuflags = REQ_NONE;
-
- /* Set values to reset to if this is followed by a zero repeat. */
-
- zerofirstcu = firstcu;
- zerofirstcuflags = firstcuflags;
- zeroreqcu = reqcu;
- zeroreqcuflags = reqcuflags;
-
- /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
- is a subroutine call by number (Oniguruma syntax). In fact, the value
- ESC_g is returned only for these cases. So we don't need to check for <
- or ' if the value is ESC_g. For the Perl syntax \g{n} the value is
- -n, and for the Perl syntax \g{name} the result is ESC_k (as
- that is a synonym for a named back reference). */
-
- if (escape == ESC_g)
- {
- PCRE2_SPTR p;
- uint32_t cf;
-
- terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
- CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
-
- /* These two statements stop the compiler for warning about possibly
- unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
- fact, because we do the check for a number below, the paths that
- would actually be in error are never taken. */
-
- skipunits = 0;
- reset_bracount = FALSE;
-
- /* If it's not a signed or unsigned number, treat it as a name. */
-
- cf = ptr[1];
- if (cf != CHAR_PLUS && cf != CHAR_MINUS && !IS_DIGIT(cf))
- {
- is_recurse = TRUE;
- goto NAMED_REF_OR_RECURSE;
- }
-
- /* Signed or unsigned number (cf = ptr[1]) is known to be plus or minus
- or a digit. */
-
- p = ptr + 2;
- while (IS_DIGIT(*p)) p++;
- if (*p != (PCRE2_UCHAR)terminator)
- {
- *errorcodeptr = ERR57;
- goto FAILED;
- }
- ptr++;
- goto HANDLE_NUMERICAL_RECURSION;
- }
-
- /* \k<name> or \k'name' is a back reference by name (Perl syntax).
- We also support \k{name} (.NET syntax). */
-
- if (escape == ESC_k)
- {
- if ((ptr[1] != CHAR_LESS_THAN_SIGN &&
- ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET))
- {
- *errorcodeptr = ERR69;
- goto FAILED;
- }
- is_recurse = FALSE;
- terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
- CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
- CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
- goto NAMED_REF_OR_RECURSE;
- }
-
- /* Back references are handled specially; must disable firstcu if
- not set to cope with cases like (?=(\w+))\1: which would otherwise set
- ':' later. */
-
- if (escape < 0)
- {
- open_capitem *oc;
- recno = -escape;
+ matched_char = TRUE;
+ if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
+ }
- /* Come here from named backref handling when the reference is to a
- single group (i.e. not to a duplicated name). */
+ /* Set values to reset to if this is followed by a zero repeat. */
- HANDLE_REFERENCE:
- if (recno > (int)cb->final_bracount)
- {
- *errorcodeptr = ERR15;
- goto FAILED;
- }
- if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
- previous = code;
- *code++ = ((options & PCRE2_CASELESS) != 0)? OP_REFI : OP_REF;
- PUT2INC(code, 0, recno);
- cb->backref_map |= (recno < 32)? (1u << recno) : 1;
- if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno;
-
- /* Check to see if this back reference is recursive, that it, it
- is inside the group that it references. A flag is set so that the
- group can be made atomic. */
-
- for (oc = cb->open_caps; oc != NULL; oc = oc->next)
- {
- if (oc->number == recno)
- {
- oc->flag = TRUE;
- break;
- }
- }
- }
+ zerofirstcu = firstcu;
+ zerofirstcuflags = firstcuflags;
+ zeroreqcu = reqcu;
+ zeroreqcuflags = reqcuflags;
- /* So are Unicode property matches, if supported. */
+ /* If Unicode is not supported, \P and \p are not allowed and are
+ faulted at parse time, so will never appear here. */
#ifdef SUPPORT_UNICODE
- else if (escape == ESC_P || escape == ESC_p)
- {
- BOOL negated;
- unsigned int ptype = 0, pdata = 0;
- if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr, cb))
- goto FAILED;
- previous = code;
- *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
- *code++ = ptype;
- *code++ = pdata;
- }
-#else
-
- /* If Unicode properties are not supported, \X, \P, and \p are not
- allowed. */
-
- else if (escape == ESC_X || escape == ESC_P || escape == ESC_p)
- {
- *errorcodeptr = ERR45;
- goto FAILED;
- }
+ if (meta_arg == ESC_P || meta_arg == ESC_p)
+ {
+ uint32_t ptype = *(++pptr) >> 16;
+ uint32_t pdata = *pptr & 0xffff;
+ *code++ = (meta_arg == ESC_p)? OP_PROP : OP_NOTPROP;
+ *code++ = ptype;
+ *code++ = pdata;
+ break; /* End META_ESCAPE */
+ }
#endif
- /* The use of \C can be locked out. */
-
-#ifdef NEVER_BACKSLASH_C
- else if (escape == ESC_C)
- {
- *errorcodeptr = ERR85;
- goto FAILED;
- }
-#else
- else if (escape == ESC_C && (options & PCRE2_NEVER_BACKSLASH_C) != 0)
- {
- *errorcodeptr = ERR83;
- goto FAILED;
- }
-#endif
+ /* For the rest (including \X when Unicode is supported - if not it's
+ faulted at parse time), the OP value is the escape value when PCRE2_UCP is
+ not set; if it is set, these escapes do not show up here because they are
+ converted into Unicode property tests in parse_regex(). Note that \b and \B
+ do a one-character lookbehind, and \A also behaves as if it does. */
- /* For the rest (including \X when Unicode properties are supported), we
- can obtain the OP value by negating the escape value in the default
- situation when PCRE2_UCP is not set. When it *is* set, we substitute
- Unicode property tests. Note that \b and \B do a one-character
- lookbehind, and \A also behaves as if it does. */
+ if (meta_arg == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */
+ if ((meta_arg == ESC_b || meta_arg == ESC_B || meta_arg == ESC_A) &&
+ cb->max_lookbehind == 0)
+ cb->max_lookbehind = 1;
- else
- {
- if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */
- if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
- cb->max_lookbehind == 0)
- cb->max_lookbehind = 1;
-#ifdef SUPPORT_UNICODE
- if (escape >= ESC_DU && escape <= ESC_wu)
- {
- cb->nestptr[1] = cb->nestptr[0]; /* Back up if at 2nd level */
- cb->nestptr[0] = ptr + 1; /* Where to resume */
- ptr = substitutes[escape - ESC_DU] - 1; /* Just before substitute */
- }
- else
-#endif
- /* In non-UTF mode, and for both 32-bit modes, we turn \C into
- OP_ALLANY instead of OP_ANYBYTE so that it works in DFA mode and in
- lookbehinds. */
+ /* In non-UTF mode, and for both 32-bit modes, we turn \C into OP_ALLANY
+ instead of OP_ANYBYTE so that it works in DFA mode and in lookbehinds. */
- {
- previous = (escape > ESC_b && escape < ESC_Z)? code : NULL;
#if PCRE2_CODE_UNIT_WIDTH == 32
- *code++ = (escape == ESC_C)? OP_ALLANY : escape;
+ *code++ = (meta_arg == ESC_C)? OP_ALLANY : meta_arg;
#else
- *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
+ *code++ = (!utf && meta_arg == ESC_C)? OP_ALLANY : meta_arg;
#endif
- }
- }
- continue;
- }
-
- /* We have a data character whose value is in c. In UTF-8 mode it may have
- a value > 127. We set its representation in the length/buffer, and then
- handle it as a data character. */
-
- mclength = PUTCHAR(c, mcbuffer);
- goto ONE_CHAR;
+ break; /* End META_ESCAPE */
/* ===================================================================*/
- /* Handle a literal character. It is guaranteed not to be whitespace or #
- when the extended flag is set. If we are in a UTF mode, it may be a
- multi-unit literal character. */
+ /* Handle an unrecognized meta value. A parsed pattern value less than
+ META_END is a literal. Otherwise we have a problem. */
default:
- NORMAL_CHAR:
- mclength = 1;
- mcbuffer[0] = c;
-
-#ifdef SUPPORT_UNICODE
- if (utf && HAS_EXTRALEN(c))
- ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
+ if (meta >= META_END)
+ {
+#ifdef DEBUG_SHOW_PARSED
+ fprintf(stderr, "** Unrecognized parsed pattern item 0x%.8x\n", *pptr);
#endif
+ *errorcodeptr = ERR89; /* Internal error - unrecognized. */
+ return 0;
+ }
- /* At this point we have the character's bytes in mcbuffer, and the length
- in mclength. When not in UTF mode, the length is always 1. */
+ /* Handle a literal character. We come here by goto in the case of a
+ 32-bit, non-UTF character whose value is greater than META_END. */
- ONE_CHAR:
- previous = code;
+ NORMAL_CHAR:
+ meta = *pptr; /* Get the full 32 bits */
+ NORMAL_CHAR_SET: /* Character is already in meta */
+ matched_char = TRUE;
/* For caseless UTF mode, check whether this character has more than one
other case. If so, generate a special OP_PROP item instead of OP_CHARI. */
@@ -7471,33 +7295,46 @@ for (;; ptr++)
#ifdef SUPPORT_UNICODE
if (utf && (options & PCRE2_CASELESS) != 0)
{
- GETCHAR(c, mcbuffer);
- if ((c = UCD_CASESET(c)) != 0)
+ uint32_t caseset = UCD_CASESET(meta);
+ if (caseset != 0)
{
*code++ = OP_PROP;
*code++ = PT_CLIST;
- *code++ = c;
+ *code++ = caseset;
if (firstcuflags == REQ_UNSET)
firstcuflags = zerofirstcuflags = REQ_NONE;
- break;
+ break; /* End handling this meta item */
}
}
#endif
- /* Caseful matches, or not one of the multicase characters. */
+ /* Caseful matches, or not one of the multicase characters. Get the
+ character's code units into mcbuffer, with the length in mclength. When not
+ in UTF mode, the length is always 1. */
+
+#ifdef SUPPORT_UNICODE
+ if (utf) mclength = PRIV(ord2utf)(meta, mcbuffer); else
+#endif
+ {
+ mclength = 1;
+ mcbuffer[0] = meta;
+ }
+
+ /* Generate the appropriate code */
*code++ = ((options & PCRE2_CASELESS) != 0)? OP_CHARI : OP_CHAR;
- for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];
+ memcpy(code, mcbuffer, CU2BYTES(mclength));
+ code += mclength;
/* Remember if \r or \n were seen */
if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL)
cb->external_flags |= PCRE2_HASCRORLF;
- /* Set the first and required bytes appropriately. If no previous first
- byte, set it from this character, but revert to none on a zero repeat.
- Otherwise, leave the firstcu value alone, and don't change it on a zero
- repeat. */
+ /* Set the first and required code units appropriately. If no previous
+ first code unit, set it from this character, but revert to none on a zero
+ repeat. Otherwise, leave the firstcu value alone, and don't change it on
+ a zero repeat. */
if (firstcuflags == REQ_UNSET)
{
@@ -7505,15 +7342,13 @@ for (;; ptr++)
zeroreqcu = reqcu;
zeroreqcuflags = reqcuflags;
- /* If the character is more than one byte long, we can set firstcu
+ /* If the character is more than one code unit long, we can set firstcu
only if it is not to be matched caselessly. */
if (mclength == 1 || req_caseopt == 0)
{
- firstcu = mcbuffer[0] | req_caseopt;
firstcu = mcbuffer[0];
firstcuflags = req_caseopt;
-
if (mclength != 1)
{
reqcu = code[-1];
@@ -7538,18 +7373,11 @@ for (;; ptr++)
reqcuflags = req_caseopt | cb->req_varyopt;
}
}
+ break; /* End default meta handling */
+ } /* End of big switch */
+ } /* End of big loop */
- break; /* End of literal character handling */
- }
- } /* end of big loop */
-
-/* Control never reaches here by falling through, only by a goto for all the
-error states. Pass back the position in the pattern so that it can be displayed
-to the user for diagnosing the error. */
-
-FAILED:
-*ptrptr = ptr;
-return FALSE;
+/* Control never reaches here. */
}
@@ -7558,22 +7386,19 @@ return FALSE;
* Compile regex: a sequence of alternatives *
*************************************************/
-/* On entry, ptr is pointing past the bracket character, but on return it
-points to the closing bracket, or vertical bar, or end of string. The code
-variable is pointing at the byte into which the BRA operator has been stored.
-This function is used during the pre-compile phase when we are trying to find
-out the amount of memory needed, as well as during the real compile phase. The
-value of lengthptr distinguishes the two phases.
+/* On entry, pptr is pointing past the bracket meta, but on return it points to
+the closing bracket or META_END. The code variable is pointing at the code unit
+into which the BRA operator has been stored. This function is used during the
+pre-compile phase when we are trying to find out the amount of memory needed,
+as well as during the real compile phase. The value of lengthptr distinguishes
+the two phases.
Arguments:
options option bits, including any changes for this subpattern
codeptr -> the address of the current code pointer
- ptrptr -> the address of the current pattern pointer
+ pptrptr -> the address of the current parsed pattern pointer
errorcodeptr -> pointer to error code variable
- lookbehind TRUE if this is a lookbehind assertion
- reset_bracount TRUE to reset the count for each branch
skipunits skip this many code units at start (for brackets and OP_COND)
- cond_depth depth of nesting for conditional subpatterns
firstcuptr place to put the first required code unit
firstcuflagsptr place to put the first code unit flags, or a negative number
reqcuptr place to put the last required code unit
@@ -7583,30 +7408,31 @@ Arguments:
lengthptr NULL during the real compile phase
points to length accumulator during pre-compile phase
-Returns: TRUE on success
+Returns: 0 There has been an error
+ +1 Success, this group must match at least one character
+ -1 Success, this group may match an empty string
*/
-static BOOL
-compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, PCRE2_SPTR *ptrptr,
- int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, uint32_t skipunits,
- int cond_depth, uint32_t *firstcuptr, int32_t *firstcuflagsptr,
- uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr,
- compile_block *cb, size_t *lengthptr)
+static int
+compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
+ int *errorcodeptr, uint32_t skipunits, uint32_t *firstcuptr,
+ int32_t *firstcuflagsptr, uint32_t *reqcuptr,int32_t *reqcuflagsptr,
+ branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr)
{
-PCRE2_SPTR ptr = *ptrptr;
PCRE2_UCHAR *code = *codeptr;
PCRE2_UCHAR *last_branch = code;
PCRE2_UCHAR *start_bracket = code;
-PCRE2_UCHAR *reverse_count = NULL;
+BOOL lookbehind;
open_capitem capitem;
int capnumber = 0;
+int okreturn = 1;
+uint32_t *pptr = *pptrptr;
uint32_t firstcu, reqcu;
+uint32_t lookbehindlength;
int32_t firstcuflags, reqcuflags;
uint32_t branchfirstcu, branchreqcu;
int32_t branchfirstcuflags, branchreqcuflags;
-size_t length;
-unsigned int orig_bracount;
-unsigned int max_bracount;
+PCRE2_SIZE length;
branch_chain bc;
/* If set, call the external function that checks for stack availability. */
@@ -7615,7 +7441,7 @@ if (cb->cx->stack_guard != NULL &&
cb->cx->stack_guard(cb->parens_depth, cb->cx->stack_guard_data))
{
*errorcodeptr= ERR33;
- return FALSE;
+ return 0;
}
/* Miscellaneous initialization */
@@ -7635,16 +7461,21 @@ the pre-compile phase so that the work space is not exceeded. */
length = 2 + 2*LINK_SIZE + skipunits;
-/* WARNING: If the above line is changed for any reason, you must also change
-the code that abstracts option settings at the start of the pattern and makes
-them global. It tests the value of length for (2 + 2*LINK_SIZE) in the
-pre-compile phase to find out whether or not anything has yet been compiled.
+/* Remember if this is a lookbehind assertion, and if it is, save its length
+and skip over the pattern offset. */
-If this is a capturing subpattern, add to the chain of open capturing items
-so that we can detect them if (*ACCEPT) is encountered. This is also used to
-detect groups that contain recursive back references to themselves. Note that
-only OP_CBRA need be tested here; changing this opcode to one of its variants,
-e.g. OP_SCBRAPOS, happens later, after the group has been compiled. */
+lookbehind = *code == OP_ASSERTBACK || *code == OP_ASSERTBACK_NOT;
+if (lookbehind)
+ {
+ lookbehindlength = META_DATA(pptr[-1]);
+ pptr += SIZEOFFSET;
+ }
+else lookbehindlength = 0;
+
+/* If this is a capturing subpattern, add to the chain of open capturing items
+so that we can detect them if (*ACCEPT) is encountered. Note that only OP_CBRA
+need be tested here; changing this opcode to one of its variants, e.g.
+OP_SCBRAPOS, happens later, after the group has been compiled. */
if (*code == OP_CBRA)
{
@@ -7662,40 +7493,31 @@ code += 1 + LINK_SIZE + skipunits;
/* Loop for each alternative branch */
-orig_bracount = max_bracount = cb->bracount;
-
for (;;)
{
- /* For a (?| group, reset the capturing bracket count so that each branch
- uses the same numbers. */
+ int branch_return;
- if (reset_bracount) cb->bracount = orig_bracount;
+ /* Insert OP_REVERSE if this is as lookbehind assertion. */
- /* Set up dummy OP_REVERSE if lookbehind assertion */
-
- if (lookbehind)
+ if (lookbehind && lookbehindlength > 0)
{
*code++ = OP_REVERSE;
- reverse_count = code;
- PUTINC(code, 0, 0);
+ PUTINC(code, 0, lookbehindlength);
length += 1 + LINK_SIZE;
}
/* Now compile the branch; in the pre-compile phase its length gets added
into the length. */
- if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstcu,
- &branchfirstcuflags, &branchreqcu, &branchreqcuflags, &bc,
- cond_depth, cb, (lengthptr == NULL)? NULL : &length))
- {
- *ptrptr = ptr;
- return FALSE;
- }
+ if ((branch_return =
+ compile_branch(&options, &code, &pptr, errorcodeptr, &branchfirstcu,
+ &branchfirstcuflags, &branchreqcu, &branchreqcuflags, &bc,
+ cb, (lengthptr == NULL)? NULL : &length)) == 0)
+ return 0;
- /* Keep the highest bracket count in case (?| was used and some branch
- has fewer than the rest. */
+ /* If a branch can match an empty string, so can the whole group. */
- if (cb->bracount > max_bracount) max_bracount = cb->bracount;
+ if (branch_return < 0) okreturn = -1;
/* In the real compile phase, there is some post-processing to be done. */
@@ -7757,56 +7579,24 @@ for (;;)
reqcuflags |= branchreqcuflags; /* To "or" REQ_VARY */
}
}
-
- /* If lookbehind, check that this branch matches a fixed-length string, and
- put the length into the OP_REVERSE item. Temporarily mark the end of the
- branch with OP_END. If the branch contains OP_RECURSE, the result is
- FFL_LATER (a negative value) because there may be forward references that
- we can't check here. Set a flag to cause another lookbehind check at the
- end. Why not do it all at the end? Because common errors can be picked up
- here and the offset of the problem can be shown. */
-
- if (lookbehind)
- {
- int fixed_length;
- int count = 0;
- *code = OP_END;
- fixed_length = find_fixedlength(last_branch, (options & PCRE2_UTF) != 0,
- FALSE, cb, NULL, &count);
- if (fixed_length == FFL_LATER)
- {
- cb->check_lookbehind = TRUE;
- }
- else if (fixed_length < 0)
- {
- *errorcodeptr = fixed_length_errors[-fixed_length];
- *ptrptr = ptr;
- return FALSE;
- }
- else
- {
- if (fixed_length > cb->max_lookbehind)
- cb->max_lookbehind = fixed_length;
- PUT(reverse_count, 0, fixed_length);
- }
- }
}
- /* Reached end of expression, either ')' or end of pattern. In the real
- compile phase, go back through the alternative branches and reverse the chain
- of offsets, with the field in the BRA item now becoming an offset to the
- first alternative. If there are no alternatives, it points to the end of the
- group. The length in the terminating ket is always the length of the whole
- bracketed item. Return leaving the pointer at the terminating char. */
+ /* Handle reaching the end of the expression, either ')' or end of pattern.
+ In the real compile phase, go back through the alternative branches and
+ reverse the chain of offsets, with the field in the BRA item now becoming an
+ offset to the first alternative. If there are no alternatives, it points to
+ the end of the group. The length in the terminating ket is always the length
+ of the whole bracketed item. Return leaving the pointer at the terminating
+ char. */
- if (*ptr != CHAR_VERTICAL_LINE)
+ if (META_CODE(*pptr) != META_ALT)
{
if (lengthptr == NULL)
{
- size_t branch_length = code - last_branch;
+ PCRE2_SIZE branch_length = code - last_branch;
do
{
- size_t prev_length = GET(last_branch, 1);
+ PCRE2_SIZE prev_length = GET(last_branch, 1);
PUT(last_branch, 1, branch_length);
branch_length = prev_length;
last_branch -= branch_length;
@@ -7841,14 +7631,10 @@ for (;;)
cb->open_caps = cb->open_caps->next;
}
- /* Retain the highest bracket number, in case resetting was used. */
-
- cb->bracount = max_bracount;
-
/* Set values to pass back */
*codeptr = code;
- *ptrptr = ptr;
+ *pptrptr = pptr;
*firstcuptr = firstcu;
*firstcuflagsptr = firstcuflags;
*reqcuptr = reqcu;
@@ -7858,11 +7644,11 @@ for (;;)
if (OFLOW_MAX - *lengthptr < length)
{
*errorcodeptr = ERR20;
- return FALSE;
+ return 0;
}
*lengthptr += length;
}
- return TRUE;
+ return okreturn;
}
/* Another branch follows. In the pre-compile phase, we can move the code
@@ -7887,9 +7673,11 @@ for (;;)
code += 1 + LINK_SIZE;
}
- /* Advance past the vertical bar */
+ /* Set the lookbehind length (if not in a lookbehind the value will be zero)
+ and then advance past the vertical bar. */
- ptr++;
+ lookbehindlength = META_DATA(*pptr);
+ pptr++;
}
/* Control never reaches here */
}
@@ -7933,25 +7721,27 @@ Arguments:
the less precise approach
cb points to the compile data block
atomcount atomic group level
+ inassert TRUE if in an assertion
Returns: TRUE or FALSE
*/
static BOOL
-is_anchored(register PCRE2_SPTR code, unsigned int bracket_map,
- compile_block *cb, int atomcount)
+is_anchored(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb,
+ int atomcount, BOOL inassert)
{
do {
PCRE2_SPTR scode = first_significant_code(
code + PRIV(OP_lengths)[*code], FALSE);
- register int op = *scode;
+ int op = *scode;
/* Non-capturing brackets */
if (op == OP_BRA || op == OP_BRAPOS ||
op == OP_SBRA || op == OP_SBRAPOS)
{
- if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE;
+ if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
+ return FALSE;
}
/* Capturing brackets */
@@ -7961,33 +7751,44 @@ do {
{
int n = GET2(scode, 1+LINK_SIZE);
int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
- if (!is_anchored(scode, new_map, cb, atomcount)) return FALSE;
+ if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE;
}
- /* Positive forward assertions and conditions */
+ /* Positive forward assertion */
- else if (op == OP_ASSERT || op == OP_COND)
+ else if (op == OP_ASSERT)
{
- if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE;
+ if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE;
+ }
+
+ /* Condition */
+
+ else if (op == OP_COND)
+ {
+ if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
+ return FALSE;
}
/* Atomic groups */
- else if (op == OP_ONCE || op == OP_ONCE_NC)
+ else if (op == OP_ONCE)
{
- if (!is_anchored(scode, bracket_map, cb, atomcount + 1))
+ if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert))
return FALSE;
}
/* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and
it isn't in brackets that are or may be referenced or inside an atomic
- group. There is also an option that disables auto-anchoring. */
+ group or an assertion. Also the pattern must not contain *PRUNE or *SKIP,
+ because these break the feature. Consider, for example, /(?s).*?(*PRUNE)b/
+ with the subject "aab", which matches "b", i.e. not at the start of a line.
+ There is also an option that disables auto-anchoring. */
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
op == OP_TYPEPOSSTAR))
{
if (scode[1] != OP_ALLANY || (bracket_map & cb->backref_map) != 0 ||
- atomcount > 0 || cb->had_pruneorskip ||
+ atomcount > 0 || cb->had_pruneorskip || inassert ||
(cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)
return FALSE;
}
@@ -8014,8 +7815,8 @@ matching and for non-DOTALL patterns that start with .* (which must start at
the beginning or after \n). As in the case of is_anchored() (see above), we
have to take account of back references to capturing brackets that contain .*
because in that case we can't make the assumption. Also, the appearance of .*
-inside atomic brackets or in a pattern that contains *PRUNE or *SKIP does not
-count, because once again the assumption no longer holds.
+inside atomic brackets or in an assertion, or in a pattern that contains *PRUNE
+or *SKIP does not count, because once again the assumption no longer holds.
Arguments:
code points to start of the compiled pattern or a group
@@ -8024,18 +7825,19 @@ Arguments:
the less precise approach
cb points to the compile data
atomcount atomic group level
+ inassert TRUE if in an assertion
Returns: TRUE or FALSE
*/
static BOOL
is_startline(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb,
- int atomcount)
+ int atomcount, BOOL inassert)
{
do {
PCRE2_SPTR scode = first_significant_code(
code + PRIV(OP_lengths)[*code], FALSE);
- register int op = *scode;
+ int op = *scode;
/* If we are at the start of a conditional assertion group, *both* the
conditional assertion *and* what follows the condition must satisfy the test
@@ -8061,7 +7863,7 @@ do {
return FALSE;
default: /* Assertion */
- if (!is_startline(scode, bracket_map, cb, atomcount)) return FALSE;
+ if (!is_startline(scode, bracket_map, cb, atomcount, TRUE)) return FALSE;
do scode += GET(scode, 1); while (*scode == OP_ALT);
scode += 1 + LINK_SIZE;
break;
@@ -8075,7 +7877,8 @@ do {
if (op == OP_BRA || op == OP_BRAPOS ||
op == OP_SBRA || op == OP_SBRAPOS)
{
- if (!is_startline(scode, bracket_map, cb, atomcount)) return FALSE;
+ if (!is_startline(scode, bracket_map, cb, atomcount, inassert))
+ return FALSE;
}
/* Capturing brackets */
@@ -8085,41 +7888,43 @@ do {
{
int n = GET2(scode, 1+LINK_SIZE);
int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
- if (!is_startline(scode, new_map, cb, atomcount)) return FALSE;
+ if (!is_startline(scode, new_map, cb, atomcount, inassert)) return FALSE;
}
/* Positive forward assertions */
else if (op == OP_ASSERT)
{
- if (!is_startline(scode, bracket_map, cb, atomcount)) return FALSE;
+ if (!is_startline(scode, bracket_map, cb, atomcount, TRUE))
+ return FALSE;
}
/* Atomic brackets */
- else if (op == OP_ONCE || op == OP_ONCE_NC)
+ else if (op == OP_ONCE)
{
- if (!is_startline(scode, bracket_map, cb, atomcount + 1)) return FALSE;
+ if (!is_startline(scode, bracket_map, cb, atomcount + 1, inassert))
+ return FALSE;
}
/* .* means "start at start or after \n" if it isn't in atomic brackets or
- brackets that may be referenced, as long as the pattern does not contain
- *PRUNE or *SKIP, because these break the feature. Consider, for example,
- /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e. not at the
- start of a line. There is also an option that disables this optimization. */
+ brackets that may be referenced or an assertion, and as long as the pattern
+ does not contain *PRUNE or *SKIP, because these break the feature. Consider,
+ for example, /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab",
+ i.e. not at the start of a line. There is also an option that disables this
+ optimization. */
else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)
{
if (scode[1] != OP_ANY || (bracket_map & cb->backref_map) != 0 ||
- atomcount > 0 || cb->had_pruneorskip ||
+ atomcount > 0 || cb->had_pruneorskip || inassert ||
(cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)
return FALSE;
}
/* Check for explicit circumflex; anything else gives a FALSE result. Note
- in particular that this includes atomic brackets OP_ONCE and OP_ONCE_NC
- because the number of characters matched by .* cannot be adjusted inside
- them. */
+ in particular that this includes atomic brackets OP_ONCE because the number
+ of characters matched by .* cannot be adjusted inside them. */
else if (op != OP_CIRC && op != OP_CIRCM) return FALSE;
@@ -8134,6 +7939,153 @@ return TRUE;
/*************************************************
+* Scan compiled regex for recursion reference *
+*************************************************/
+
+/* This function scans through a compiled pattern until it finds an instance of
+OP_RECURSE.
+
+Arguments:
+ code points to start of expression
+ utf TRUE in UTF mode
+
+Returns: pointer to the opcode for OP_RECURSE, or NULL if not found
+*/
+
+static PCRE2_SPTR
+find_recurse(PCRE2_SPTR code, BOOL utf)
+{
+for (;;)
+ {
+ PCRE2_UCHAR c = *code;
+ if (c == OP_END) return NULL;
+ if (c == OP_RECURSE) return code;
+
+ /* XCLASS is used for classes that cannot be represented just by a bit map.
+ This includes negated single high-valued characters. CALLOUT_STR is used for
+ callouts with string arguments. In both cases the length in the table is
+ zero; the actual length is stored in the compiled code. */
+
+ if (c == OP_XCLASS) code += GET(code, 1);
+ else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
+
+ /* Otherwise, we can get the item's length from the table, except that for
+ repeated character types, we have to test for \p and \P, which have an extra
+ two code units of parameters, and for MARK/PRUNE/SKIP/THEN with an argument,
+ we must add in its length. */
+
+ else
+ {
+ switch(c)
+ {
+ case OP_TYPESTAR:
+ case OP_TYPEMINSTAR:
+ case OP_TYPEPLUS:
+ case OP_TYPEMINPLUS:
+ case OP_TYPEQUERY:
+ case OP_TYPEMINQUERY:
+ case OP_TYPEPOSSTAR:
+ case OP_TYPEPOSPLUS:
+ case OP_TYPEPOSQUERY:
+ if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
+ break;
+
+ case OP_TYPEPOSUPTO:
+ case OP_TYPEUPTO:
+ case OP_TYPEMINUPTO:
+ case OP_TYPEEXACT:
+ if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
+ code += 2;
+ break;
+
+ case OP_MARK:
+ case OP_PRUNE_ARG:
+ case OP_SKIP_ARG:
+ case OP_THEN_ARG:
+ code += code[1];
+ break;
+ }
+
+ /* Add in the fixed length from the table */
+
+ code += PRIV(OP_lengths)[c];
+
+ /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may
+ be followed by a multi-unit character. The length in the table is a
+ minimum, so we have to arrange to skip the extra units. */
+
+#ifdef MAYBE_UTF_MULTI
+ if (utf) switch(c)
+ {
+ case OP_CHAR:
+ case OP_CHARI:
+ case OP_NOT:
+ case OP_NOTI:
+ case OP_EXACT:
+ case OP_EXACTI:
+ case OP_NOTEXACT:
+ case OP_NOTEXACTI:
+ case OP_UPTO:
+ case OP_UPTOI:
+ case OP_NOTUPTO:
+ case OP_NOTUPTOI:
+ case OP_MINUPTO:
+ case OP_MINUPTOI:
+ case OP_NOTMINUPTO:
+ case OP_NOTMINUPTOI:
+ case OP_POSUPTO:
+ case OP_POSUPTOI:
+ case OP_NOTPOSUPTO:
+ case OP_NOTPOSUPTOI:
+ case OP_STAR:
+ case OP_STARI:
+ case OP_NOTSTAR:
+ case OP_NOTSTARI:
+ case OP_MINSTAR:
+ case OP_MINSTARI:
+ case OP_NOTMINSTAR:
+ case OP_NOTMINSTARI:
+ case OP_POSSTAR:
+ case OP_POSSTARI:
+ case OP_NOTPOSSTAR:
+ case OP_NOTPOSSTARI:
+ case OP_PLUS:
+ case OP_PLUSI:
+ case OP_NOTPLUS:
+ case OP_NOTPLUSI:
+ case OP_MINPLUS:
+ case OP_MINPLUSI:
+ case OP_NOTMINPLUS:
+ case OP_NOTMINPLUSI:
+ case OP_POSPLUS:
+ case OP_POSPLUSI:
+ case OP_NOTPOSPLUS:
+ case OP_NOTPOSPLUSI:
+ case OP_QUERY:
+ case OP_QUERYI:
+ case OP_NOTQUERY:
+ case OP_NOTQUERYI:
+ case OP_MINQUERY:
+ case OP_MINQUERYI:
+ case OP_NOTMINQUERY:
+ case OP_NOTMINQUERYI:
+ case OP_POSQUERY:
+ case OP_POSQUERYI:
+ case OP_NOTPOSQUERY:
+ case OP_NOTPOSQUERYI:
+ if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
+ break;
+ }
+#else
+ (void)(utf); /* Keep compiler happy by referencing function argument */
+#endif /* MAYBE_UTF_MULTI */
+ }
+ }
+}
+
+
+
+/*************************************************
* Check for asserted fixed first code unit *
*************************************************/
@@ -8158,7 +8110,7 @@ Returns: the fixed first code unit, or 0 with REQ_NONE in flags
static uint32_t
find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, BOOL inassert)
{
-register uint32_t c = 0;
+uint32_t c = 0;
int cflags = REQ_NONE;
*flags = REQ_NONE;
@@ -8168,7 +8120,7 @@ do {
int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
PCRE2_SPTR scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);
- register PCRE2_UCHAR op = *scode;
+ PCRE2_UCHAR op = *scode;
switch(op)
{
@@ -8183,7 +8135,6 @@ do {
case OP_SCBRAPOS:
case OP_ASSERT:
case OP_ONCE:
- case OP_ONCE_NC:
d = find_firstassertedcu(scode, &dflags, op == OP_ASSERT);
if (dflags < 0)
return 0;
@@ -8241,18 +8192,19 @@ Arguments:
name the name to add
length the length of the name
groupno the group number
+ tablecount the count of names in the table so far
Returns: nothing
*/
static void
add_name_to_table(compile_block *cb, PCRE2_SPTR name, int length,
- unsigned int groupno)
+ unsigned int groupno, uint32_t tablecount)
{
-int i;
+uint32_t i;
PCRE2_UCHAR *slot = cb->name_table;
-for (i = 0; i < cb->names_found; i++)
+for (i = 0; i < tablecount; i++)
{
int crc = memcmp(name, slot+IMM2_SIZE, CU2BYTES(length));
if (crc == 0 && slot[IMM2_SIZE+length] != 0)
@@ -8266,7 +8218,7 @@ for (i = 0; i < cb->names_found; i++)
if (crc < 0)
{
memmove(slot + cb->name_entry_size, slot,
- CU2BYTES((cb->names_found - i) * cb->name_entry_size));
+ CU2BYTES((tablecount - i) * cb->name_entry_size));
break;
}
@@ -8277,7 +8229,6 @@ for (i = 0; i < cb->names_found; i++)
PUT2(slot, 0, groupno);
memcpy(slot + IMM2_SIZE, name, CU2BYTES(length));
-cb->names_found++;
/* Add a terminating zero and fill the rest of the slot with zeroes so that
the memory is all initialized. Otherwise valgrind moans about uninitialized
@@ -8290,6 +8241,723 @@ memset(slot + IMM2_SIZE + length, 0,
/*************************************************
+* Skip in parsed pattern *
+*************************************************/
+
+/* This function is called to skip parts of the parsed pattern when finding the
+length of a lookbehind branch. It is called after (*ACCEPT) and (*FAIL) to find
+the end of the branch, it is called to skip over an internal lookaround, and it
+is also called to skip to the end of a class, during which it will never
+encounter nested groups (but there's no need to have special code for that).
+
+When called to find the end of a branch or group, pptr must point to the first
+meta code inside the branch, not the branch-starting code. In other cases it
+can point to the item that causes the function to be called.
+
+Arguments:
+ pptr current pointer to skip from
+ skiptype PSKIP_CLASS when skipping to end of class
+ PSKIP_ALT when META_ALT ends the skip
+ PSKIP_KET when only META_KET ends the skip
+
+Returns: new value of pptr
+ NULL if META_END is reached - should never occur
+ or for an unknown meta value - likewise
+*/
+
+static uint32_t *
+parsed_skip(uint32_t *pptr, uint32_t skiptype)
+{
+uint32_t nestlevel = 0;
+
+for (;; pptr++)
+ {
+ uint32_t meta = META_CODE(*pptr);
+
+ switch(meta)
+ {
+ default: /* Just skip over most items */
+ if (meta < META_END) continue; /* Literal */
+ break;
+
+ /* This should never occur. */
+
+ case META_END:
+ return NULL;
+
+ /* The data for these items is variable in length. */
+
+ case META_BACKREF: /* Offset is present only if group >= 10 */
+ if (META_DATA(*pptr) >= 10) pptr += SIZEOFFSET;
+ break;
+
+ case META_ESCAPE: /* A few escapes are followed by data items. */
+ switch (META_DATA(*pptr))
+ {
+ case ESC_P:
+ case ESC_p:
+ pptr += 1;
+ break;
+
+ case ESC_g:
+ case ESC_k:
+ pptr += 1 + SIZEOFFSET;
+ break;
+ }
+ break;
+
+ case META_MARK: /* Add the length of the name. */
+ case META_PRUNE_ARG:
+ case META_SKIP_ARG:
+ case META_THEN_ARG:
+ pptr += pptr[1];
+ break;
+
+ /* These are the "active" items in this loop. */
+
+ case META_CLASS_END:
+ if (skiptype == PSKIP_CLASS) return pptr;
+ break;
+
+ case META_ATOMIC:
+ case META_CAPTURE:
+ case META_COND_ASSERT:
+ case META_COND_DEFINE:
+ case META_COND_NAME:
+ case META_COND_NUMBER:
+ case META_COND_RNAME:
+ case META_COND_RNUMBER:
+ case META_COND_VERSION:
+ case META_LOOKAHEAD:
+ case META_LOOKAHEADNOT:
+ case META_LOOKBEHIND:
+ case META_LOOKBEHINDNOT:
+ case META_NOCAPTURE:
+ nestlevel++;
+ break;
+
+ case META_ALT:
+ if (nestlevel == 0 && skiptype == PSKIP_ALT) return pptr;
+ break;
+
+ case META_KET:
+ if (nestlevel == 0) return pptr;
+ nestlevel--;
+ break;
+ }
+
+ /* The extra data item length for each meta is in a table. */
+
+ meta = (meta >> 16) & 0x7fff;
+ if (meta >= sizeof(meta_extra_lengths)) return NULL;
+ pptr += meta_extra_lengths[meta];
+ }
+/* Control never reaches here */
+return pptr;
+}
+
+
+
+/*************************************************
+* Find length of a parsed group *
+*************************************************/
+
+/* This is called for nested groups within a branch of a lookbehind whose
+length is being computed. If all the branches in the nested group have the same
+length, that is OK. On entry, the pointer must be at the first element after
+the group initializing code. On exit it points to OP_KET. Caching is used to
+improve processing speed when the same capturing group occurs many times.
+
+Arguments:
+ pptrptr pointer to pointer in the parsed pattern
+ isinline FALSE if a reference or recursion; TRUE for inline group
+ errcodeptr pointer to the errorcode
+ lcptr pointer to the loop counter
+ group number of captured group or -1 for a non-capturing group
+ recurses chain of recurse_check to catch mutual recursion
+ cb pointer to the compile data
+
+Returns: the group length or a negative number
+*/
+
+static int
+get_grouplength(uint32_t **pptrptr, BOOL isinline, int *errcodeptr, int *lcptr,
+ int group, parsed_recurse_check *recurses, compile_block *cb)
+{
+int branchlength;
+int grouplength = -1;
+
+/* The cache can be used only if there is no possibility of there being two
+groups with the same number. We do not need to set the end pointer for a group
+that is being processed as a back reference or recursion, but we must do so for
+an inline group. */
+
+if (group > 0 && (cb->external_flags & PCRE2_DUPCAPUSED) == 0)
+ {
+ uint32_t groupinfo = cb->groupinfo[group];
+ if ((groupinfo & GI_NOT_FIXED_LENGTH) != 0) return -1;
+ if ((groupinfo & GI_SET_FIXED_LENGTH) != 0)
+ {
+ if (isinline) *pptrptr = parsed_skip(*pptrptr, PSKIP_KET);
+ return groupinfo & GI_FIXED_LENGTH_MASK;
+ }
+ }
+
+/* Scan the group. In this case we find the end pointer of necessity. */
+
+for(;;)
+ {
+ branchlength = get_branchlength(pptrptr, errcodeptr, lcptr, recurses, cb);
+ if (branchlength < 0) goto ISNOTFIXED;
+ if (grouplength == -1) grouplength = branchlength;
+ else if (grouplength != branchlength) goto ISNOTFIXED;
+ if (**pptrptr == META_KET) break;
+ *pptrptr += 1; /* Skip META_ALT */
+ }
+
+if (group > 0)
+ cb->groupinfo[group] |= (uint32_t)(GI_SET_FIXED_LENGTH | grouplength);
+return grouplength;
+
+ISNOTFIXED:
+if (group > 0) cb->groupinfo[group] |= GI_NOT_FIXED_LENGTH;
+return -1;
+}
+
+
+
+/*************************************************
+* Find length of a parsed branch *
+*************************************************/
+
+/* Return a fixed length for a branch in a lookbehind, giving an error if the
+length is not fixed. If any lookbehinds are encountered on the way, they get
+their length set. On entry, *pptrptr points to the first element inside the
+branch. On exit it is set to point to the ALT or KET.
+
+Arguments:
+ pptrptr pointer to pointer in the parsed pattern
+ errcodeptr pointer to error code
+ lcptr pointer to loop counter
+ recurses chain of recurse_check to catch mutual recursion
+ cb pointer to compile block
+
+Returns: the length, or a negative value on error
+*/
+
+static int
+get_branchlength(uint32_t **pptrptr, int *errcodeptr, int *lcptr,
+ parsed_recurse_check *recurses, compile_block *cb)
+{
+int branchlength = 0;
+int grouplength;
+uint32_t lastitemlength = 0;
+uint32_t *pptr = *pptrptr;
+PCRE2_SIZE offset;
+parsed_recurse_check this_recurse;
+
+/* A large and/or complex regex can take too long to process. This can happen
+more often when (?| groups are present in the pattern because their length
+cannot be cached. */
+
+if ((*lcptr)++ > 2000)
+ {
+ *errcodeptr = ERR35; /* Lookbehind is too complicated */
+ return -1;
+ }
+
+/* Scan the branch, accumulating the length. */
+
+for (;; pptr++)
+ {
+ parsed_recurse_check *r;
+ uint32_t *gptr, *gptrend;
+ uint32_t escape;
+ uint32_t group = 0;
+ uint32_t itemlength = 0;
+
+ if (*pptr < META_END)
+ {
+ itemlength = 1;
+ }
+
+ else switch (META_CODE(*pptr))
+ {
+ case META_KET:
+ case META_ALT:
+ goto EXIT;
+
+ /* (*ACCEPT) and (*FAIL) terminate the branch, but we must skip to the
+ actual termination. */
+
+ case META_ACCEPT:
+ case META_FAIL:
+ pptr = parsed_skip(pptr, PSKIP_ALT);
+ if (pptr == NULL) goto PARSED_SKIP_FAILED;
+ goto EXIT;
+
+ case META_MARK:
+ case META_PRUNE_ARG:
+ case META_SKIP_ARG:
+ case META_THEN_ARG:
+ pptr += pptr[1] + 1;
+ break;
+
+ case META_CIRCUMFLEX:
+ case META_COMMIT:
+ case META_DOLLAR:
+ case META_PRUNE:
+ case META_SKIP:
+ case META_THEN:
+ break;
+
+ case META_OPTIONS:
+ pptr += 1;
+ break;
+
+ case META_BIGVALUE:
+ itemlength = 1;
+ pptr += 1;
+ break;
+
+ case META_CLASS:
+ case META_CLASS_NOT:
+ itemlength = 1;
+ pptr = parsed_skip(pptr, PSKIP_CLASS);
+ if (pptr == NULL) goto PARSED_SKIP_FAILED;
+ break;
+
+ case META_CLASS_EMPTY_NOT:
+ case META_DOT:
+ itemlength = 1;
+ break;
+
+ case META_CALLOUT_NUMBER:
+ pptr += 3;
+ break;
+
+ case META_CALLOUT_STRING:
+ pptr += 3 + SIZEOFFSET;
+ break;
+
+ /* Only some escapes consume a character. Of those, \R and \X are never
+ allowed because they might match more than character. \C is allowed only in
+ 32-bit and non-UTF 8/16-bit modes. */
+
+ case META_ESCAPE:
+ escape = META_DATA(*pptr);
+ if (escape == ESC_R || escape == ESC_X) return -1;
+ if (escape > ESC_b && escape < ESC_Z)
+ {
+#if PCRE2_CODE_UNIT_WIDTH != 32
+ if ((cb->external_options & PCRE2_UTF) != 0 && escape == ESC_C)
+ {
+ *errcodeptr = ERR36;
+ return -1;
+ }
+#endif
+ itemlength = 1;
+ if (escape == ESC_p || escape == ESC_P) pptr++; /* Skip prop data */
+ }
+ break;
+
+ /* Lookaheads can be ignored, but we must start the skip inside the group
+ so that it isn't treated as a group within the branch. */
+
+ case META_LOOKAHEAD:
+ case META_LOOKAHEADNOT:
+ pptr = parsed_skip(pptr + 1, PSKIP_KET);
+ if (pptr == NULL) goto PARSED_SKIP_FAILED;
+ break;
+
+ /* Lookbehinds can be ignored, but must themselves be checked. */
+
+ case META_LOOKBEHIND:
+ case META_LOOKBEHINDNOT:
+ if (!set_lookbehind_lengths(&pptr, errcodeptr, lcptr, recurses, cb))
+ return -1;
+ break;
+
+ /* Back references and recursions are handled by very similar code. At this
+ stage, the names generated in the parsing pass are available, but the main
+ name table has not yet been created. So for the named varieties, scan the
+ list of names in order to get the number of the first one in the pattern,
+ and whether or not this name is duplicated. */
+
+ case META_BACKREF_BYNAME:
+ if ((cb->external_options & PCRE2_MATCH_UNSET_BACKREF) != 0)
+ goto ISNOTFIXED;
+ /* Fall through */
+
+ case META_RECURSE_BYNAME:
+ {
+ int i;
+ PCRE2_SPTR name;
+ BOOL is_dupname = FALSE;
+ named_group *ng = cb->named_groups;
+ uint32_t meta_code = META_CODE(*pptr);
+ uint32_t length = *(++pptr);
+
+ GETPLUSOFFSET(offset, pptr);
+ name = cb->start_pattern + offset;
+ for (i = 0; i < cb->names_found; i++, ng++)
+ {
+ if (length == ng->length && PRIV(strncmp)(name, ng->name, length) == 0)
+ {
+ group = ng->number;
+ is_dupname = ng->isdup;
+ break;
+ }
+ }
+
+ if (group == 0)
+ {
+ *errcodeptr = ERR15; /* Non-existent subpattern */
+ cb->erroroffset = offset;
+ return -1;
+ }
+
+ /* A numerical back reference can be fixed length if duplicate capturing
+ groups are not being used. A non-duplicate named back reference can also
+ be handled. */
+
+ if (meta_code == META_RECURSE_BYNAME ||
+ (!is_dupname && (cb->external_flags & PCRE2_DUPCAPUSED) == 0))
+ goto RECURSE_OR_BACKREF_LENGTH; /* Handle as a numbered version. */
+ }
+ goto ISNOTFIXED; /* Duplicate name or number */
+
+ /* The offset values for back references < 10 are in a separate vector
+ because otherwise they would use more than two parsed pattern elements on
+ 64-bit systems. */
+
+ case META_BACKREF:
+ if ((cb->external_options & PCRE2_MATCH_UNSET_BACKREF) != 0 ||
+ (cb->external_flags & PCRE2_DUPCAPUSED) != 0)
+ goto ISNOTFIXED;
+ group = META_DATA(*pptr);
+ if (group < 10)
+ {
+ offset = cb->small_ref_offset[group];
+ goto RECURSE_OR_BACKREF_LENGTH;
+ }
+
+ /* Fall through */
+ /* For groups >= 10 - picking up group twice does no harm. */
+
+ /* A true recursion implies not fixed length, but a subroutine call may
+ be OK. Back reference "recursions" are also failed. */
+
+ case META_RECURSE:
+ group = META_DATA(*pptr);
+ GETPLUSOFFSET(offset, pptr);
+
+ RECURSE_OR_BACKREF_LENGTH:
+ if (group > cb->bracount)
+ {
+ cb->erroroffset = offset;
+ *errcodeptr = ERR15; /* Non-existent subpattern */
+ return -1;
+ }
+ if (group == 0) goto ISNOTFIXED; /* Local recursion */
+ for (gptr = cb->parsed_pattern; *gptr != META_END; gptr++)
+ {
+ if (META_CODE(*gptr) == META_BIGVALUE) gptr++;
+ else if (*gptr == (META_CAPTURE | group)) break;
+ }
+
+ /* We must start the search for the end of the group at the first meta code
+ inside the group. Otherwise it will be treated as an enclosed group. */
+
+ gptrend = parsed_skip(gptr + 1, PSKIP_KET);
+ if (gptrend == NULL) goto PARSED_SKIP_FAILED;
+ if (pptr > gptr && pptr < gptrend) goto ISNOTFIXED; /* Local recursion */
+ for (r = recurses; r != NULL; r = r->prev) if (r->groupptr == gptr) break;
+ if (r != NULL) goto ISNOTFIXED; /* Mutual recursion */
+ this_recurse.prev = recurses;
+ this_recurse.groupptr = gptr;
+
+ /* We do not need to know the position of the end of the group, that is,
+ gptr is not used after the call to get_grouplength(). Setting the second
+ argument FALSE stops it scanning for the end when the length can be found
+ in the cache. */
+
+ gptr++;
+ grouplength = get_grouplength(&gptr, FALSE, errcodeptr, lcptr, group,
+ &this_recurse, cb);
+ if (grouplength < 0)
+ {
+ if (*errcodeptr == 0) goto ISNOTFIXED;
+ return -1; /* Error already set */
+ }
+ itemlength = grouplength;
+ break;
+
+ /* Check nested groups - advance past the initial data for each type and
+ then seek a fixed length with get_grouplength(). */
+
+ case META_COND_NAME:
+ case META_COND_NUMBER:
+ case META_COND_RNAME:
+ case META_COND_RNUMBER:
+ case META_COND_DEFINE:
+ pptr += 2 + SIZEOFFSET;
+ goto CHECK_GROUP;
+
+ case META_COND_ASSERT:
+ pptr += 1;
+ goto CHECK_GROUP;
+
+ case META_COND_VERSION:
+ pptr += 4;
+ goto CHECK_GROUP;
+
+ case META_CAPTURE:
+ group = META_DATA(*pptr);
+ /* Fall through */
+
+ case META_ATOMIC:
+ case META_NOCAPTURE:
+ pptr++;
+ CHECK_GROUP:
+ grouplength = get_grouplength(&pptr, TRUE, errcodeptr, lcptr, group,
+ recurses, cb);
+ if (grouplength < 0) return -1;
+ itemlength = grouplength;
+ break;
+
+ /* Exact repetition is OK; variable repetition is not. A repetition of zero
+ must subtract the length that has already been added. */
+
+ case META_MINMAX:
+ case META_MINMAX_PLUS:
+ case META_MINMAX_QUERY:
+ if (pptr[1] == pptr[2])
+ {
+ if (pptr[1] == 0) branchlength -= lastitemlength;
+ else itemlength = (pptr[1] - 1) * lastitemlength;
+ pptr += 2;
+ break;
+ }
+ /* Fall through */
+
+ /* Any other item means this branch does not have a fixed length. */
+
+ default:
+ ISNOTFIXED:
+ *errcodeptr = ERR25; /* Not fixed length */
+ return -1;
+ }
+
+ /* Add the item length to the branchlength, and save it for use if the next
+ thing is a quantifier. */
+
+ branchlength += itemlength;
+ lastitemlength = itemlength;
+
+ /* Ensure that the length does not overflow the limit. */
+
+ if (branchlength > LOOKBEHIND_MAX)
+ {
+ *errcodeptr = ERR87;
+ return -1;
+ }
+ }
+
+EXIT:
+*pptrptr = pptr;
+if (branchlength > cb->max_lookbehind) cb->max_lookbehind = branchlength;
+return branchlength;
+
+PARSED_SKIP_FAILED:
+*errcodeptr = ERR90;
+return -1;
+}
+
+
+
+/*************************************************
+* Set lengths in a lookbehind *
+*************************************************/
+
+/* This function is called for each lookbehind, to set the lengths in its
+branches. An error occurs if any branch does not have a fixed length that is
+less than the maximum (65535). On exit, the pointer must be left on the final
+ket.
+
+Arguments:
+ pptrptr pointer to pointer in the parsed pattern
+ errcodeptr pointer to error code
+ lcptr pointer to loop counter
+ recurses chain of recurse_check to catch mutual recursion
+ cb pointer to compile block
+
+Returns: TRUE if all is well
+ FALSE otherwise, with error code and offset set
+*/
+
+static BOOL
+set_lookbehind_lengths(uint32_t **pptrptr, int *errcodeptr, int *lcptr,
+ parsed_recurse_check *recurses, compile_block *cb)
+{
+PCRE2_SIZE offset;
+int branchlength;
+uint32_t *bptr = *pptrptr;
+
+READPLUSOFFSET(offset, bptr); /* Offset for error messages */
+*pptrptr += SIZEOFFSET;
+
+do
+ {
+ *pptrptr += 1;
+ branchlength = get_branchlength(pptrptr, errcodeptr, lcptr, recurses, cb);
+ if (branchlength < 0)
+ {
+ /* The errorcode and offset may already be set from a nested lookbehind. */
+ if (*errcodeptr == 0) *errcodeptr = ERR25;
+ if (cb->erroroffset == PCRE2_UNSET) cb->erroroffset = offset;
+ return FALSE;
+ }
+ *bptr |= branchlength; /* branchlength never more than 65535 */
+ bptr = *pptrptr;
+ }
+while (*bptr == META_ALT);
+
+return TRUE;
+}
+
+
+
+/*************************************************
+* Check parsed pattern lookbehinds *
+*************************************************/
+
+/* This function is called at the end of parsing a pattern if any lookbehinds
+were encountered. It scans the parsed pattern for them, calling
+set_lookbehind_lengths() for each one. At the start, the errorcode is zero and
+the error offset is marked unset. The enables the functions above not to
+override settings from deeper nestings.
+
+Arguments cb points to the compile block
+Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
+*/
+
+static int
+check_lookbehinds(compile_block *cb)
+{
+uint32_t *pptr;
+int errorcode = 0;
+int loopcount = 0;
+
+cb->erroroffset = PCRE2_UNSET;
+
+for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
+ {
+ if (*pptr < META_END) continue; /* Literal */
+
+ switch (META_CODE(*pptr))
+ {
+ default:
+ return ERR70; /* Unrecognized meta code */
+
+ case META_ESCAPE:
+ if (*pptr - META_ESCAPE == ESC_P || *pptr - META_ESCAPE == ESC_p)
+ pptr += 1;
+ break;
+
+ case META_ACCEPT:
+ case META_ALT:
+ case META_ASTERISK:
+ case META_ASTERISK_PLUS:
+ case META_ASTERISK_QUERY:
+ case META_ATOMIC:
+ case META_BACKREF:
+ case META_CAPTURE:
+ case META_CIRCUMFLEX:
+ case META_CLASS:
+ case META_CLASS_EMPTY:
+ case META_CLASS_EMPTY_NOT:
+ case META_CLASS_END:
+ case META_CLASS_NOT:
+ case META_COMMIT:
+ case META_COND_ASSERT:
+ case META_DOLLAR:
+ case META_DOT:
+ case META_FAIL:
+ case META_KET:
+ case META_LOOKAHEAD:
+ case META_LOOKAHEADNOT:
+ case META_NOCAPTURE:
+ case META_PLUS:
+ case META_PLUS_PLUS:
+ case META_PLUS_QUERY:
+ case META_PRUNE:
+ case META_QUERY:
+ case META_QUERY_PLUS:
+ case META_QUERY_QUERY:
+ case META_RANGE_ESCAPED:
+ case META_RANGE_LITERAL:
+ case META_SKIP:
+ case META_THEN:
+ break;
+
+ case META_RECURSE:
+ pptr += SIZEOFFSET;
+ break;
+
+ case META_BACKREF_BYNAME:
+ case META_COND_DEFINE:
+ case META_COND_NAME:
+ case META_COND_NUMBER:
+ case META_COND_RNAME:
+ case META_COND_RNUMBER:
+ case META_RECURSE_BYNAME:
+ pptr += 1 + SIZEOFFSET;
+ break;
+
+ case META_CALLOUT_STRING:
+ pptr += 3 + SIZEOFFSET;
+ break;
+
+ case META_BIGVALUE:
+ case META_OPTIONS:
+ case META_POSIX:
+ case META_POSIX_NEG:
+ pptr += 1;
+ break;
+
+ case META_MINMAX:
+ case META_MINMAX_QUERY:
+ case META_MINMAX_PLUS:
+ pptr += 2;
+ break;
+
+ case META_CALLOUT_NUMBER:
+ case META_COND_VERSION:
+ pptr += 3;
+ break;
+
+ case META_MARK:
+ case META_PRUNE_ARG:
+ case META_SKIP_ARG:
+ case META_THEN_ARG:
+ pptr += 1 + pptr[1];
+ break;
+
+ case META_LOOKBEHIND:
+ case META_LOOKBEHINDNOT:
+ if (!set_lookbehind_lengths(&pptr, &errorcode, &loopcount, NULL, cb))
+ return errorcode;
+ break;
+ }
+ }
+
+return 0;
+}
+
+
+
+/*************************************************
* External function to compile a pattern *
*************************************************/
@@ -8312,43 +8980,51 @@ PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION
pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
{
-BOOL utf; /* Set TRUE for UTF mode */
-pcre2_real_code *re = NULL; /* What we will return */
-compile_block cb; /* "Static" compile-time data */
-const uint8_t *tables; /* Char tables base pointer */
-
-PCRE2_UCHAR *code; /* Current pointer in compiled code */
-PCRE2_SPTR codestart; /* Start of compiled code */
-PCRE2_SPTR ptr; /* Current pointer in pattern */
-
-size_t length = 1; /* Allow or final END opcode */
-size_t usedlength; /* Actual length used */
-size_t re_blocksize; /* Size of memory block */
-
-int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
-uint32_t firstcu, reqcu; /* Value of first/req code unit */
-uint32_t setflags = 0; /* NL and BSR set flags */
-
-uint32_t skipatstart; /* When checking (*UTF) etc */
-uint32_t limit_match = UINT32_MAX; /* Unset match limits */
-uint32_t limit_recursion = UINT32_MAX;
-
-int newline = 0; /* Unset; can be set by the pattern */
-int bsr = 0; /* Unset; can be set by the pattern */
-int errorcode = 0; /* Initialize to avoid compiler warn */
+BOOL utf; /* Set TRUE for UTF mode */
+BOOL has_lookbehind = FALSE; /* Set TRUE if a lookbehind is found */
+BOOL zero_terminated; /* Set TRUE for zero-terminated pattern */
+pcre2_real_code *re = NULL; /* What we will return */
+compile_block cb; /* "Static" compile-time data */
+const uint8_t *tables; /* Char tables base pointer */
+
+PCRE2_UCHAR *code; /* Current pointer in compiled code */
+PCRE2_SPTR codestart; /* Start of compiled code */
+PCRE2_SPTR ptr; /* Current pointer in pattern */
+uint32_t *pptr; /* Current pointer in parsed pattern */
+
+PCRE2_SIZE length = 1; /* Allow for final END opcode */
+PCRE2_SIZE usedlength; /* Actual length used */
+PCRE2_SIZE re_blocksize; /* Size of memory block */
+PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */
+PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */
+
+int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
+uint32_t firstcu, reqcu; /* Value of first/req code unit */
+uint32_t setflags = 0; /* NL and BSR set flags */
+
+uint32_t skipatstart; /* When checking (*UTF) etc */
+uint32_t limit_heap = UINT32_MAX;
+uint32_t limit_match = UINT32_MAX; /* Unset match limits */
+uint32_t limit_depth = UINT32_MAX;
+
+int newline = 0; /* Unset; can be set by the pattern */
+int bsr = 0; /* Unset; can be set by the pattern */
+int errorcode = 0; /* Initialize to avoid compiler warn */
+int regexrc; /* Return from compile */
+
+uint32_t i; /* Local loop counter */
/* Comments at the head of this file explain about these variables. */
-PCRE2_UCHAR *copied_pattern = NULL;
-PCRE2_UCHAR stack_copied_pattern[COPIED_PATTERN_SIZE];
+uint32_t stack_groupinfo[GROUPINFO_DEFAULT_SIZE];
+uint32_t stack_parsed_pattern[PARSED_PATTERN_DEFAULT_SIZE];
named_group named_groups[NAMED_GROUP_LIST_SIZE];
/* The workspace is used in different ways in the different compiling phases.
-It needs to be 16-bit aligned for the preliminary group scan, and 32-bit
-aligned for the group information cache. */
+It needs to be 16-bit aligned for the preliminary parsing scan. */
-uint32_t c32workspace[C32_WORK_SIZE];
-PCRE2_UCHAR *cworkspace = (PCRE2_UCHAR *)c32workspace;
+uint32_t c16workspace[C16_WORK_SIZE];
+PCRE2_UCHAR *cworkspace = (PCRE2_UCHAR *)c16workspace;
/* -------------- Check arguments and set up the pattern ----------------- */
@@ -8367,57 +9043,44 @@ if (pattern == NULL)
return NULL;
}
+/* A NULL compile context means "use a default context" */
+
+if (ccontext == NULL)
+ ccontext = (pcre2_compile_context *)(&PRIV(default_compile_context));
+
/* Check that all undefined public option bits are zero. */
-if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
+if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0 ||
+ (ccontext->extra_options & ~PUBLIC_COMPILE_EXTRA_OPTIONS) != 0)
{
*errorptr = ERR17;
return NULL;
}
-/* A NULL compile context means "use a default context" */
-
-if (ccontext == NULL)
- ccontext = (pcre2_compile_context *)(&PRIV(default_compile_context));
+if ((options & PCRE2_LITERAL) != 0 &&
+ ((options & ~PUBLIC_LITERAL_COMPILE_OPTIONS) != 0 ||
+ (ccontext->extra_options & ~PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS) != 0))
+ {
+ *errorptr = ERR92;
+ return NULL;
+ }
/* A zero-terminated pattern is indicated by the special length value
-PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero,
-to ensure that it is always possible to look one code unit beyond the end of
-the pattern's characters. In both cases, check that the pattern is overlong. */
+PCRE2_ZERO_TERMINATED. Check for an overlong pattern. */
-if (patlen == PCRE2_ZERO_TERMINATED)
- {
+if ((zero_terminated = (patlen == PCRE2_ZERO_TERMINATED)))
patlen = PRIV(strlen)(pattern);
- if (patlen > ccontext->max_pattern_length)
- {
- *errorptr = ERR88;
- return NULL;
- }
- }
-else
+
+if (patlen > ccontext->max_pattern_length)
{
- if (patlen > ccontext->max_pattern_length)
- {
- *errorptr = ERR88;
- return NULL;
- }
- if (patlen < COPIED_PATTERN_SIZE)
- copied_pattern = stack_copied_pattern;
- else
- {
- copied_pattern = ccontext->memctl.malloc(CU2BYTES(patlen + 1),
- ccontext->memctl.memory_data);
- if (copied_pattern == NULL)
- {
- *errorptr = ERR21;
- return NULL;
- }
- }
- memcpy(copied_pattern, pattern, CU2BYTES(patlen));
- copied_pattern[patlen] = 0;
- pattern = copied_pattern;
+ *errorptr = ERR88;
+ return NULL;
}
+/* From here on, all returns from this function should end up going via the
+EXIT label. */
+
+
/* ------------ Initialize the "static" compile data -------------- */
tables = (ccontext->tables != NULL)? ccontext->tables : PRIV(default_tables);
@@ -8428,16 +9091,16 @@ cb.cbits = tables + cbits_offset; /* tables */
cb.ctypes = tables + ctypes_offset;
cb.assert_depth = 0;
-cb.bracount = cb.final_bracount = 0;
+cb.bracount = 0;
cb.cx = ccontext;
cb.dupnames = FALSE;
cb.end_pattern = pattern + patlen;
-cb.nestptr[0] = cb.nestptr[1] = NULL;
+cb.erroroffset = 0;
cb.external_flags = 0;
cb.external_options = options;
-cb.groupinfo = c32workspace;
+cb.groupinfo = stack_groupinfo;
cb.had_recurse = FALSE;
-cb.iscondassert = FALSE;
+cb.lastcapture = 0;
cb.max_lookbehind = 0;
cb.name_entry_size = 0;
cb.name_table = NULL;
@@ -8446,6 +9109,7 @@ cb.named_group_list_size = NAMED_GROUP_LIST_SIZE;
cb.names_found = 0;
cb.open_caps = NULL;
cb.parens_depth = 0;
+cb.parsed_pattern = stack_parsed_pattern;
cb.req_varyopt = 0;
cb.start_code = cworkspace;
cb.start_pattern = pattern;
@@ -8459,77 +9123,103 @@ references to help in deciding whether (.*) can be treated as anchored or not.
cb.top_backref = 0;
cb.backref_map = 0;
+/* Escape sequences \1 to \9 are always back references, but as they are only
+two characters long, only two elements can be used in the parsed_pattern
+vector. The first contains the reference, and we'd like to use the second to
+record the offset in the pattern, so that forward references to non-existent
+groups can be diagnosed later with an offset. However, on 64-bit systems,
+PCRE2_SIZE won't fit. Instead, we have a vector of offsets for the first
+occurrence of \1 to \9, indexed by the second parsed_pattern value. All other
+references have enough space for the offset to be put into the parsed pattern.
+*/
+
+for (i = 0; i < 10; i++) cb.small_ref_offset[i] = PCRE2_UNSET;
+
+
/* --------------- Start looking at the pattern --------------- */
-/* Check for global one-time option settings at the start of the pattern, and
-remember the offset to the actual regex. */
+/* Unless PCRE2_LITERAL is set, check for global one-time option settings at
+the start of the pattern, and remember the offset to the actual regex. With
+valgrind support, make the terminator of a zero-terminated pattern
+inaccessible. This catches bugs that would otherwise only show up for
+non-zero-terminated patterns. */
+
+#ifdef SUPPORT_VALGRIND
+if (zero_terminated) VALGRIND_MAKE_MEM_NOACCESS(pattern + patlen, CU2BYTES(1));
+#endif
ptr = pattern;
skipatstart = 0;
-while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
- ptr[skipatstart+1] == CHAR_ASTERISK)
+if ((options & PCRE2_LITERAL) == 0)
{
- unsigned int i;
- for (i = 0; i < sizeof(pso_list)/sizeof(pso); i++)
+ while (patlen - skipatstart >= 2 &&
+ ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
+ ptr[skipatstart+1] == CHAR_ASTERISK)
{
- pso *p = pso_list + i;
-
- if (PRIV(strncmp_c8)(ptr+skipatstart+2, (char *)(p->name), p->length) == 0)
+ for (i = 0; i < sizeof(pso_list)/sizeof(pso); i++)
{
uint32_t c, pp;
+ pso *p = pso_list + i;
- skipatstart += p->length + 2;
- switch(p->type)
+ if (patlen - skipatstart - 2 >= p->length &&
+ PRIV(strncmp_c8)(ptr + skipatstart + 2, (char *)(p->name),
+ p->length) == 0)
{
- case PSO_OPT:
- cb.external_options |= p->value;
- break;
+ skipatstart += p->length + 2;
+ switch(p->type)
+ {
+ case PSO_OPT:
+ cb.external_options |= p->value;
+ break;
- case PSO_FLG:
- setflags |= p->value;
- break;
+ case PSO_FLG:
+ setflags |= p->value;
+ break;
- case PSO_NL:
- newline = p->value;
- setflags |= PCRE2_NL_SET;
- break;
+ case PSO_NL:
+ newline = p->value;
+ setflags |= PCRE2_NL_SET;
+ break;
- case PSO_BSR:
- bsr = p->value;
- setflags |= PCRE2_BSR_SET;
- break;
+ case PSO_BSR:
+ bsr = p->value;
+ setflags |= PCRE2_BSR_SET;
+ break;
- case PSO_LIMM:
- case PSO_LIMR:
- c = 0;
- pp = skipatstart;
- if (!IS_DIGIT(ptr[pp]))
- {
- errorcode = ERR60;
- ptr += pp;
- goto HAD_ERROR;
- }
- while (IS_DIGIT(ptr[pp]))
- {
- if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */
- c = c*10 + (ptr[pp++] - CHAR_0);
- }
- if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS)
- {
- errorcode = ERR60;
- ptr += pp;
- goto HAD_ERROR;
+ case PSO_LIMM:
+ case PSO_LIMD:
+ case PSO_LIMH:
+ c = 0;
+ pp = skipatstart;
+ if (!IS_DIGIT(ptr[pp]))
+ {
+ errorcode = ERR60;
+ ptr += pp;
+ goto HAD_EARLY_ERROR;
+ }
+ while (IS_DIGIT(ptr[pp]))
+ {
+ if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */
+ c = c*10 + (ptr[pp++] - CHAR_0);
+ }
+ if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS)
+ {
+ errorcode = ERR60;
+ ptr += pp;
+ goto HAD_EARLY_ERROR;
+ }
+ if (p->type == PSO_LIMH) limit_heap = c;
+ else if (p->type == PSO_LIMM) limit_match = c;
+ else limit_depth = c;
+ skipatstart += pp - skipatstart;
+ break;
}
- if (p->type == PSO_LIMM) limit_match = c;
- else limit_recursion = c;
- skipatstart += pp - skipatstart;
- break;
+ break; /* Out of the table scan loop */
}
- break; /* Out of the table scan loop */
}
+ if (i >= sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */
}
- if (i >= sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */
}
/* End of pattern-start options; advance to start of real regex. */
@@ -8542,12 +9232,14 @@ ptr += skipatstart;
if ((cb.external_options & (PCRE2_UTF|PCRE2_UCP)) != 0)
{
errorcode = ERR32;
- goto HAD_ERROR;
+ goto HAD_EARLY_ERROR;
}
#endif
/* Check UTF. We have the original options in 'options', with that value as
-modified by (*UTF) etc in cb->external_options. */
+modified by (*UTF) etc in cb->external_options. The extra option
+PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not permitted in UTF-16 mode because the
+surrogate code points cannot be represented in UTF-16. */
utf = (cb.external_options & PCRE2_UTF) != 0;
if (utf)
@@ -8555,11 +9247,19 @@ if (utf)
if ((options & PCRE2_NEVER_UTF) != 0)
{
errorcode = ERR74;
- goto HAD_ERROR;
+ goto HAD_EARLY_ERROR;
}
if ((options & PCRE2_NO_UTF_CHECK) == 0 &&
(errorcode = PRIV(valid_utf)(pattern, patlen, erroroffset)) != 0)
- goto HAD_UTF_ERROR;
+ goto HAD_ERROR; /* Offset was set by valid_utf() */
+
+#if PCRE2_CODE_UNIT_WIDTH == 16
+ if ((ccontext->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)
+ {
+ errorcode = ERR91;
+ goto HAD_EARLY_ERROR;
+ }
+#endif
}
/* Check UCP lockout. */
@@ -8568,7 +9268,7 @@ if ((cb.external_options & (PCRE2_UCP|PCRE2_NEVER_UCP)) ==
(PCRE2_UCP|PCRE2_NEVER_UCP))
{
errorcode = ERR75;
- goto HAD_ERROR;
+ goto HAD_EARLY_ERROR;
}
/* Process the BSR setting. */
@@ -8591,6 +9291,11 @@ switch(newline)
cb.nl[0] = CHAR_NL;
break;
+ case PCRE2_NEWLINE_NUL:
+ cb.nllen = 1;
+ cb.nl[0] = CHAR_NUL;
+ break;
+
case PCRE2_NEWLINE_CRLF:
cb.nllen = 2;
cb.nl[0] = CHAR_CR;
@@ -8607,23 +9312,107 @@ switch(newline)
default:
errorcode = ERR56;
- goto HAD_ERROR;
+ goto HAD_EARLY_ERROR;
}
-/* Before we do anything else, do a pre-scan of the pattern in order to
-discover the named groups and their numerical equivalents, so that this
-information is always available for the remaining processing. */
+/* Pre-scan the pattern to do two things: (1) Discover the named groups and
+their numerical equivalents, so that this information is always available for
+the remaining processing. (2) At the same time, parse the pattern and put a
+processed version into the parsed_pattern vector. This has escapes interpreted
+and comments removed (amongst other things).
-errorcode = scan_for_captures(&ptr, cb.external_options, &cb);
-if (errorcode != 0) goto HAD_ERROR;
+In all but one case, when PCRE2_AUTO_CALLOUT is not set, the number of unsigned
+32-bit ints in the parsed pattern is bounded by the length of the pattern plus
+one (for the terminator) plus four if PCRE2_EXTRA_WORD or PCRE2_EXTRA_LINE is
+set. The exceptional case is when running in 32-bit, non-UTF mode, when literal
+characters greater than META_END (0x80000000) have to be coded as two units. In
+this case, therefore, we scan the pattern to check for such values. */
-/* For obscure debugging this code can be enabled. */
+#if PCRE2_CODE_UNIT_WIDTH == 32
+if (!utf)
+ {
+ PCRE2_SPTR p;
+ for (p = ptr; p < cb.end_pattern; p++) if (*p >= META_END) big32count++;
+ }
+#endif
-#if 0
+/* Ensure that the parsed pattern buffer is big enough. When PCRE2_AUTO_CALLOUT
+is set we have to assume a numerical callout (4 elements) for each character
+plus one at the end. This is overkill, but memory is plentiful these days. For
+many smaller patterns the vector on the stack (which was set up above) can be
+used. */
+
+parsed_size_needed = patlen - skipatstart + big32count;
+
+if ((ccontext->extra_options &
+ (PCRE2_EXTRA_MATCH_WORD|PCRE2_EXTRA_MATCH_LINE)) != 0)
+ parsed_size_needed += 4;
+
+if ((options & PCRE2_AUTO_CALLOUT) != 0)
+ parsed_size_needed = (parsed_size_needed + 1) * 5;
+
+if (parsed_size_needed >= PARSED_PATTERN_DEFAULT_SIZE)
+ {
+ uint32_t *heap_parsed_pattern = ccontext->memctl.malloc(
+ (parsed_size_needed + 1) * sizeof(uint32_t), ccontext->memctl.memory_data);
+ if (heap_parsed_pattern == NULL)
+ {
+ *errorptr = ERR21;
+ goto EXIT;
+ }
+ cb.parsed_pattern = heap_parsed_pattern;
+ }
+cb.parsed_pattern_end = cb.parsed_pattern + parsed_size_needed + 1;
+
+/* Do the parsing scan. */
+
+errorcode = parse_regex(ptr, cb.external_options, &has_lookbehind, &cb);
+if (errorcode != 0) goto HAD_CB_ERROR;
+
+/* Workspace is needed to remember information about numbered groups: whether a
+group can match an empty string and what its fixed length is. This is done to
+avoid the possibility of recursive references causing very long compile times
+when checking these features. Unnumbered groups do not have this exposure since
+they cannot be referenced. We use an indexed vector for this purpose. If there
+are sufficiently few groups, the default vector on the stack, as set up above,
+can be used. Otherwise we have to get/free a special vector. The vector must be
+initialized to zero. */
+
+if (cb.bracount >= GROUPINFO_DEFAULT_SIZE)
+ {
+ cb.groupinfo = ccontext->memctl.malloc(
+ (cb.bracount + 1)*sizeof(uint32_t), ccontext->memctl.memory_data);
+ if (cb.groupinfo == NULL)
+ {
+ errorcode = ERR21;
+ cb.erroroffset = 0;
+ goto HAD_CB_ERROR;
+ }
+ }
+memset(cb.groupinfo, 0, (cb.bracount + 1) * sizeof(uint32_t));
+
+/* If there were any lookbehinds, scan the parsed pattern to figure out their
+lengths. */
+
+if (has_lookbehind)
+ {
+ errorcode = check_lookbehinds(&cb);
+ if (errorcode != 0) goto HAD_CB_ERROR;
+ }
+
+/* For debugging, there is a function that shows the parsed data vector. */
+
+#ifdef DEBUG_SHOW_PARSED
+fprintf(stderr, "+++ Pre-scan complete:\n");
+show_parsed(&cb);
+#endif
+
+/* For debugging capturing information this code can be enabled. */
+
+#ifdef DEBUG_SHOW_CAPTURES
{
- int i;
named_group *ng = cb.named_groups;
- fprintf(stderr, "+++Captures: %d\n", cb.final_bracount);
+ fprintf(stderr, "+++Captures: %d\n", cb.bracount);
for (i = 0; i < cb.names_found; i++, ng++)
{
fprintf(stderr, "+++%3d %.*s\n", ng->number, ng->length, ng->name);
@@ -8631,12 +9420,6 @@ if (errorcode != 0) goto HAD_ERROR;
}
#endif
-/* Reset current bracket count to zero and current pointer to the start of the
-pattern. */
-
-cb.bracount = 0;
-ptr = pattern + skipatstart;
-
/* Pretend to compile the pattern while actually just accumulating the amount
of memory required in the 'length' variable. This behaviour is triggered by
passing a non-NULL final argument to compile_regex(). We pass a block of
@@ -8645,24 +9428,26 @@ compiled code is discarded when it is no longer needed, so hopefully this
workspace will never overflow, though there is a test for its doing so.
On error, errorcode will be set non-zero, so we don't need to look at the
-result of the function. The initial options have been put into the cb block so
-that they can be changed if an option setting is found within the regex right
-at the beginning. Bringing initial option settings outside can help speed up
-starting point checks. We still have to pass a separate options variable (the
-first argument) because that may change as the pattern is processed. */
+result of the function. The initial options have been put into the cb block,
+but we still have to pass a separate options variable (the first argument)
+because the options may change as the pattern is processed. */
+cb.erroroffset = patlen; /* For any subsequent errors that do not set it */
+pptr = cb.parsed_pattern;
code = cworkspace;
*code = OP_BRA;
-(void)compile_regex(cb.external_options, &code, &ptr, &errorcode, FALSE,
- FALSE, 0, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL,
- &cb, &length);
+(void)compile_regex(cb.external_options, &code, &pptr, &errorcode, 0, &firstcu,
+ &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, &length);
+
+if (errorcode != 0) goto HAD_CB_ERROR; /* Offset is in cb.erroroffset */
+
+/* This should be caught in compile_regex(), but just in case... */
-if (errorcode != 0) goto HAD_ERROR;
if (length > MAX_PATTERN_SIZE)
{
errorcode = ERR20;
- goto HAD_ERROR;
+ goto HAD_CB_ERROR;
}
/* Compute the size of, and then get and initialize, the data block for storing
@@ -8671,15 +9456,23 @@ possible because nowadays we limit the maximum value of cb.names_found and
cb.name_entry_size. */
re_blocksize = sizeof(pcre2_real_code) +
- CU2BYTES(length + cb.names_found * cb.name_entry_size);
+ CU2BYTES(length +
+ (PCRE2_SIZE)cb.names_found * (PCRE2_SIZE)cb.name_entry_size);
re = (pcre2_real_code *)
ccontext->memctl.malloc(re_blocksize, ccontext->memctl.memory_data);
if (re == NULL)
{
errorcode = ERR21;
- goto HAD_ERROR;
+ goto HAD_CB_ERROR;
}
+/* The compiler may put padding at the end of the pcre2_real_code structure in
+order to round it up to a multiple of 4 or 8 bytes. This means that when a
+compiled pattern is copied (for example, when serialized) undefined bytes are
+read, and this annoys debuggers such as valgrind. To avoid this, we explicitly
+write to the last 8 bytes of the structure before setting the fields. */
+
+memset((char *)re + sizeof(pcre2_real_code) - 8, 0, 8);
re->memctl = ccontext->memctl;
re->tables = tables;
re->executable_jit = NULL;
@@ -8689,8 +9482,9 @@ re->magic_number = MAGIC_NUMBER;
re->compile_options = options;
re->overall_options = cb.external_options;
re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags;
+re->limit_heap = limit_heap;
re->limit_match = limit_match;
-re->limit_recursion = limit_recursion;
+re->limit_depth = limit_depth;
re->first_codeunit = 0;
re->last_codeunit = 0;
re->bsr_convention = bsr;
@@ -8708,44 +9502,19 @@ code follows after that. */
codestart = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)) +
re->name_entry_size * re->name_count;
-/* Workspace is needed to remember information about numbered groups: whether a
-group can match an empty string and what its fixed length is. This is done to
-avoid the possibility of recursive references causing very long compile times
-when checking these features. Unnumbered groups do not have this exposure since
-they cannot be referenced. We use an indexed vector for this purpose. If there
-are sufficiently few groups, it can be the c32workspace vector, as set up
-above. Otherwise we have to get/free a special vector. The vector must be
-initialized to zero. */
-
-if (cb.final_bracount >= C32_WORK_SIZE)
- {
- cb.groupinfo = ccontext->memctl.malloc(
- (cb.final_bracount + 1)*sizeof(uint32_t), ccontext->memctl.memory_data);
- if (cb.groupinfo == NULL)
- {
- errorcode = ERR21;
- goto HAD_ERROR;
- }
- }
-memset(cb.groupinfo, 0, (cb.final_bracount + 1) * sizeof(uint32_t));
-
/* Update the compile data block for the actual compile. The starting points of
the name/number translation table and of the code are passed around in the
compile data block. The start/end pattern and initial options are already set
-from the pre-compile phase, as is the name_entry_size field. Reset the bracket
-count and the names_found field. */
+from the pre-compile phase, as is the name_entry_size field. */
cb.parens_depth = 0;
cb.assert_depth = 0;
-cb.bracount = 0;
-cb.max_lookbehind = 0;
+cb.lastcapture = 0;
cb.name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code));
cb.start_code = codestart;
-cb.iscondassert = FALSE;
cb.req_varyopt = 0;
cb.had_accept = FALSE;
cb.had_pruneorskip = FALSE;
-cb.check_lookbehind = FALSE;
cb.open_caps = NULL;
/* If any named groups were found, create the name/number table from the list
@@ -8753,23 +9522,21 @@ created in the pre-pass. */
if (cb.names_found > 0)
{
- int i = cb.names_found;
named_group *ng = cb.named_groups;
- cb.names_found = 0;
- for (; i > 0; i--, ng++)
- add_name_to_table(&cb, ng->name, ng->length, ng->number);
+ for (i = 0; i < cb.names_found; i++, ng++)
+ add_name_to_table(&cb, ng->name, ng->length, ng->number, i);
}
/* Set up a starting, non-extracting bracket, then compile the expression. On
error, errorcode will be set non-zero, so we don't need to look at the result
of the function here. */
-ptr = pattern + skipatstart;
+pptr = cb.parsed_pattern;
code = (PCRE2_UCHAR *)codestart;
*code = OP_BRA;
-(void)compile_regex(re->overall_options, &code, &ptr, &errorcode, FALSE, FALSE,
- 0, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, NULL);
-
+regexrc = compile_regex(re->overall_options, &code, &pptr, &errorcode, 0,
+ &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, NULL);
+if (regexrc < 0) re->flags |= PCRE2_MATCH_EMPTY;
re->top_bracket = cb.bracount;
re->top_backref = cb.top_backref;
re->max_lookbehind = cb.max_lookbehind;
@@ -8805,7 +9572,7 @@ if (errorcode == 0 && cb.had_recurse)
{
PCRE2_UCHAR *rcode;
PCRE2_SPTR rgroup;
- int ccount = 0;
+ unsigned int ccount = 0;
int start = RSCAN_CACHE_SIZE;
recurse_cache rc[RSCAN_CACHE_SIZE];
@@ -8813,16 +9580,16 @@ if (errorcode == 0 && cb.had_recurse)
rcode != NULL;
rcode = (PCRE2_UCHAR *)find_recurse(rcode + 1 + LINK_SIZE, utf))
{
- int i, p, recno;
+ int p, groupnumber;
- recno = (int)GET(rcode, 1);
- if (recno == 0) rgroup = codestart; else
+ groupnumber = (int)GET(rcode, 1);
+ if (groupnumber == 0) rgroup = codestart; else
{
PCRE2_SPTR search_from = codestart;
rgroup = NULL;
for (i = 0, p = start; i < ccount; i++, p = (p + 1) & 7)
{
- if (recno == rc[p].recno)
+ if (groupnumber == rc[p].groupnumber)
{
rgroup = rc[p].group;
break;
@@ -8832,19 +9599,19 @@ if (errorcode == 0 && cb.had_recurse)
search time below when the new group number is greater than any of the
previously found groups. */
- if (recno > rc[p].recno) search_from = rc[p].group;
+ if (groupnumber > rc[p].groupnumber) search_from = rc[p].group;
}
if (rgroup == NULL)
{
- rgroup = PRIV(find_bracket)(search_from, utf, recno);
+ rgroup = PRIV(find_bracket)(search_from, utf, groupnumber);
if (rgroup == NULL)
{
errorcode = ERR53;
break;
}
if (--start < 0) start = RSCAN_CACHE_SIZE - 1;
- rc[start].recno = recno;
+ rc[start].groupnumber = groupnumber;
rc[start].group = rgroup;
if (ccount < RSCAN_CACHE_SIZE) ccount++;
}
@@ -8857,93 +9624,27 @@ if (errorcode == 0 && cb.had_recurse)
/* In rare debugging situations we sometimes need to look at the compiled code
at this stage. */
-#ifdef CALL_PRINTINT
+#ifdef DEBUG_CALL_PRINTINT
pcre2_printint(re, stderr, TRUE);
fprintf(stderr, "Length=%lu Used=%lu\n", length, usedlength);
#endif
-/* After a successful compile, give an error if there's back reference to a
-non-existent capturing subpattern. Then, unless disabled, check whether any
-single character iterators can be auto-possessified. The function overwrites
-the appropriate opcode values, so the type of the pointer must be cast. NOTE:
-the intermediate variable "temp" is used in this code because at least one
-compiler gives a warning about loss of "const" attribute if the cast
-(PCRE2_UCHAR *)codestart is used directly in the function call. */
+/* Unless disabled, check whether any single character iterators can be
+auto-possessified. The function overwrites the appropriate opcode values, so
+the type of the pointer must be cast. NOTE: the intermediate variable "temp" is
+used in this code because at least one compiler gives a warning about loss of
+"const" attribute if the cast (PCRE2_UCHAR *)codestart is used directly in the
+function call. */
-if (errorcode == 0)
+if (errorcode == 0 && (re->overall_options & PCRE2_NO_AUTO_POSSESS) == 0)
{
- if (re->top_backref > re->top_bracket) errorcode = ERR15;
- else if ((re->overall_options & PCRE2_NO_AUTO_POSSESS) == 0)
- {
- PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart;
- if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80;
- }
- }
-
-/* If there were any lookbehind assertions that contained OP_RECURSE
-(recursions or subroutine calls), a flag is set for them to be checked here,
-because they may contain forward references. Actual recursions cannot be fixed
-length, but subroutine calls can. It is done like this so that those without
-OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The
-exceptional ones forgo this. We scan the pattern to check that they are fixed
-length, and set their lengths. */
-
-if (errorcode == 0 && cb.check_lookbehind)
- {
- PCRE2_UCHAR *cc = (PCRE2_UCHAR *)codestart;
-
- /* Loop, searching for OP_REVERSE items, and process those that do not have
- their length set. (Actually, it will also re-process any that have a length
- of zero, but that is a pathological case, and it does no harm.) When we find
- one, we temporarily terminate the branch it is in while we scan it. Note that
- calling find_bracket() with a negative group number returns a pointer to the
- OP_REVERSE item, not the actual lookbehind. */
-
- for (cc = (PCRE2_UCHAR *)PRIV(find_bracket)(codestart, utf, -1);
- cc != NULL;
- cc = (PCRE2_UCHAR *)PRIV(find_bracket)(cc, utf, -1))
- {
- if (GET(cc, 1) == 0)
- {
- int fixed_length;
- int count = 0;
- PCRE2_UCHAR *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
- int end_op = *be;
- *be = OP_END;
- fixed_length = find_fixedlength(cc, utf, TRUE, &cb, NULL, &count);
- *be = end_op;
- if (fixed_length < 0)
- {
- errorcode = fixed_length_errors[-fixed_length];
- break;
- }
- if (fixed_length > cb.max_lookbehind) cb.max_lookbehind = fixed_length;
- PUT(cc, 1, fixed_length);
- }
- cc += 1 + LINK_SIZE;
- }
-
- /* The previous value of the maximum lookbehind was transferred to the
- compiled regex block above. We could have updated this value in the loop
- above, but keep the two values in step, just in case some later code below
- uses the cb value. */
-
- re->max_lookbehind = cb.max_lookbehind;
+ PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart;
+ if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80;
}
-/* Failed to compile, or error while post-processing. Earlier errors get here
-via the dreaded goto. */
+/* Failed to compile, or error while post-processing. */
-if (errorcode != 0)
- {
- HAD_ERROR:
- *erroroffset = (int)(ptr - pattern);
- HAD_UTF_ERROR:
- *errorptr = errorcode;
- pcre2_code_free(re);
- re = NULL;
- goto EXIT;
- }
+if (errorcode != 0) goto HAD_CB_ERROR;
/* Successful compile. If the anchored option was not passed, set it if
we can determine that the pattern is anchored by virtue of ^ characters or \A
@@ -8952,17 +9653,22 @@ there are no occurrences of *PRUNE or *SKIP (though there is an option to
disable this case). */
if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
- is_anchored(codestart, 0, &cb, 0))
+ is_anchored(codestart, 0, &cb, 0, FALSE))
re->overall_options |= PCRE2_ANCHORED;
-/* If the pattern is still not anchored and we do not have a first code unit,
-see if there is one that is asserted (these are not saved during the compile
-because they can cause conflicts with actual literals that follow). This code
-need not be obeyed if PCRE2_NO_START_OPTIMIZE is set, as the data it would
-create will not be used. */
+/* Set up the first code unit or startline flag, the required code unit, and
+then study the pattern. This code need not be obeyed if PCRE2_NO_START_OPTIMIZE
+is set, as the data it would create will not be used. Note that a first code
+unit (but not the startline flag) is useful for anchored patterns because it
+can still give a quick "no match" and also avoid searching for a last code
+unit. */
-if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0)
+if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
{
+ /* If we do not have a first code unit, see if there is one that is asserted
+ (these are not saved during the compile because they can cause conflicts with
+ actual literals that follow). */
+
if (firstcuflags < 0)
firstcu = find_firstassertedcu(codestart, &firstcuflags, FALSE);
@@ -8995,87 +9701,86 @@ if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0)
}
}
- /* When there is no first code unit, see if we can set the PCRE2_STARTLINE
- flag. This is helpful for multiline matches when all branches start with ^
- and also when all branches start with non-atomic .* for non-DOTALL matches
- when *PRUNE and SKIP are not present. (There is an option that disables this
- case.) */
+ /* When there is no first code unit, for non-anchored patterns, see if we can
+ set the PCRE2_STARTLINE flag. This is helpful for multiline matches when all
+ branches start with ^ and also when all branches start with non-atomic .* for
+ non-DOTALL matches when *PRUNE and SKIP are not present. (There is an option
+ that disables this case.) */
- else if (is_startline(codestart, 0, &cb, 0)) re->flags |= PCRE2_STARTLINE;
- }
+ else if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
+ is_startline(codestart, 0, &cb, 0, FALSE))
+ re->flags |= PCRE2_STARTLINE;
-/* Handle the "required code unit", if one is set. In the case of an anchored
-pattern, do this only if it follows a variable length item in the pattern.
-Again, skip this if PCRE2_NO_START_OPTIMIZE is set. */
+ /* Handle the "required code unit", if one is set. In the case of an anchored
+ pattern, do this only if it follows a variable length item in the pattern. */
-if (reqcuflags >= 0 &&
- ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0 ||
- (reqcuflags & REQ_VARY) != 0))
- {
- re->last_codeunit = reqcu;
- re->flags |= PCRE2_LASTSET;
+ if (reqcuflags >= 0 &&
+ ((re->overall_options & PCRE2_ANCHORED) == 0 ||
+ (reqcuflags & REQ_VARY) != 0))
+ {
+ re->last_codeunit = reqcu;
+ re->flags |= PCRE2_LASTSET;
- /* Handle caseless required code units as for first code units (above). */
+ /* Handle caseless required code units as for first code units (above). */
- if ((reqcuflags & REQ_CASELESS) != 0)
- {
- if (reqcu < 128 || (!utf && reqcu < 255))
+ if ((reqcuflags & REQ_CASELESS) != 0)
{
- if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
- }
+ if (reqcu < 128 || (!utf && reqcu < 255))
+ {
+ if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
+ }
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
- else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
- re->flags |= PCRE2_LASTCASELESS;
+ else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
+ re->flags |= PCRE2_LASTCASELESS;
#endif
+ }
}
- }
-/* Check for a pattern than can match an empty string, so that this information
-can be provided to applications. */
+ /* Finally, study the compiled pattern to set up information such as a bitmap
+ of starting code units and a minimum matching length. */
-do
- {
- int count = 0;
- int rc = could_be_empty_branch(codestart, code, utf, &cb, TRUE, NULL, &count);
- if (rc < 0)
+ if (PRIV(study)(re) != 0)
{
- errorcode = ERR86;
- goto HAD_ERROR;
- }
- if (rc > 0)
- {
- re->flags |= PCRE2_MATCH_EMPTY;
- break;
+ errorcode = ERR31;
+ goto HAD_CB_ERROR;
}
- codestart += GET(codestart, 1);
- }
-while (*codestart == OP_ALT);
-
-/* Finally, unless PCRE2_NO_START_OPTIMIZE is set, study the compiled pattern
-to set up information such as a bitmap of starting code units and a minimum
-matching length. */
+ } /* End of start-of-match optimizations. */
-if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
- PRIV(study)(re) != 0)
- {
- errorcode = ERR31;
- goto HAD_ERROR;
- }
-
-/* Control ends up here in all cases. If memory was obtained for a
-zero-terminated copy of the pattern, remember to free it before returning. Also
-free the list of named groups if a larger one had to be obtained, and likewise
-the group information vector. */
+/* Control ends up here in all cases. When running under valgrind, make a
+pattern's terminating zero defined again. If memory was obtained for the parsed
+version of the pattern, free it before returning. Also free the list of named
+groups if a larger one had to be obtained, and likewise the group information
+vector. */
EXIT:
-if (copied_pattern != stack_copied_pattern)
- ccontext->memctl.free(copied_pattern, ccontext->memctl.memory_data);
+#ifdef SUPPORT_VALGRIND
+if (zero_terminated) VALGRIND_MAKE_MEM_DEFINED(pattern + patlen, CU2BYTES(1));
+#endif
+if (cb.parsed_pattern != stack_parsed_pattern)
+ ccontext->memctl.free(cb.parsed_pattern, ccontext->memctl.memory_data);
if (cb.named_group_list_size > NAMED_GROUP_LIST_SIZE)
ccontext->memctl.free((void *)cb.named_groups, ccontext->memctl.memory_data);
-if (cb.groupinfo != c32workspace)
+if (cb.groupinfo != stack_groupinfo)
ccontext->memctl.free((void *)cb.groupinfo, ccontext->memctl.memory_data);
-
return re; /* Will be NULL after an error */
+
+/* Errors discovered in parse_regex() set the offset value in the compile
+block. Errors discovered before it is called must compute it from the ptr
+value. After parse_regex() is called, the offset in the compile block is set to
+the end of the pattern, but certain errors in compile_regex() may reset it if
+an offset is available in the parsed pattern. */
+
+HAD_CB_ERROR:
+ptr = pattern + cb.erroroffset;
+
+HAD_EARLY_ERROR:
+*erroroffset = ptr - pattern;
+
+HAD_ERROR:
+*errorptr = errorcode;
+pcre2_code_free(re);
+re = NULL;
+goto EXIT;
}
/* End of pcre2_compile.c */
diff --git a/src/3rdparty/pcre2/src/pcre2_config.c b/src/3rdparty/pcre2/src/pcre2_config.c
index e99272f577..d009c0a676 100644
--- a/src/3rdparty/pcre2/src/pcre2_config.c
+++ b/src/3rdparty/pcre2/src/pcre2_config.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -84,13 +84,14 @@ if (where == NULL) /* Requests a length */
return PCRE2_ERROR_BADOPTION;
case PCRE2_CONFIG_BSR:
+ case PCRE2_CONFIG_HEAPLIMIT:
case PCRE2_CONFIG_JIT:
case PCRE2_CONFIG_LINKSIZE:
case PCRE2_CONFIG_MATCHLIMIT:
+ case PCRE2_CONFIG_DEPTHLIMIT:
case PCRE2_CONFIG_NEWLINE:
case PCRE2_CONFIG_PARENSLIMIT:
- case PCRE2_CONFIG_RECURSIONLIMIT:
- case PCRE2_CONFIG_STACKRECURSE:
+ case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */
case PCRE2_CONFIG_UNICODE:
return sizeof(uint32_t);
@@ -116,6 +117,10 @@ switch (what)
#endif
break;
+ case PCRE2_CONFIG_HEAPLIMIT:
+ *((uint32_t *)where) = HEAP_LIMIT;
+ break;
+
case PCRE2_CONFIG_JIT:
#ifdef SUPPORT_JIT
*((uint32_t *)where) = 1;
@@ -143,6 +148,10 @@ switch (what)
*((uint32_t *)where) = MATCH_LIMIT;
break;
+ case PCRE2_CONFIG_DEPTHLIMIT:
+ *((uint32_t *)where) = MATCH_LIMIT_DEPTH;
+ break;
+
case PCRE2_CONFIG_NEWLINE:
*((uint32_t *)where) = NEWLINE_DEFAULT;
break;
@@ -151,16 +160,11 @@ switch (what)
*((uint32_t *)where) = PARENS_NEST_LIMIT;
break;
- case PCRE2_CONFIG_RECURSIONLIMIT:
- *((uint32_t *)where) = MATCH_LIMIT_RECURSION;
- break;
+ /* This is now obsolete. The stack is no longer used via recursion for
+ handling backtracking in pcre2_match(). */
case PCRE2_CONFIG_STACKRECURSE:
-#ifdef HEAP_MATCH_RECURSE
*((uint32_t *)where) = 0;
-#else
- *((uint32_t *)where) = 1;
-#endif
break;
case PCRE2_CONFIG_UNICODE_VERSION:
diff --git a/src/3rdparty/pcre2/src/pcre2_context.c b/src/3rdparty/pcre2/src/pcre2_context.c
index ae050fe92c..2c14df0080 100644
--- a/src/3rdparty/pcre2/src/pcre2_context.c
+++ b/src/3rdparty/pcre2/src/pcre2_context.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -138,7 +138,8 @@ const pcre2_compile_context PRIV(default_compile_context) = {
PCRE2_UNSET, /* Max pattern length */
BSR_DEFAULT, /* Backslash R default */
NEWLINE_DEFAULT, /* Newline convention */
- PARENS_NEST_LIMIT }; /* As it says */
+ PARENS_NEST_LIMIT, /* As it says */
+ 0 }; /* Extra options */
/* The create function copies the default into the new memory, but must
override the default memory handling functions if a gcontext was provided. */
@@ -161,9 +162,6 @@ when no context is supplied to a match function. */
const pcre2_match_context PRIV(default_match_context) = {
{ default_malloc, default_free, NULL },
-#ifdef HEAP_MATCH_RECURSE
- { default_malloc, default_free, NULL },
-#endif
#ifdef SUPPORT_JIT
NULL,
NULL,
@@ -171,8 +169,9 @@ const pcre2_match_context PRIV(default_match_context) = {
NULL,
NULL,
PCRE2_UNSET, /* Offset limit */
+ HEAP_LIMIT,
MATCH_LIMIT,
- MATCH_LIMIT_RECURSION };
+ MATCH_LIMIT_DEPTH };
/* The create function copies the default into the new memory, but must
override the default memory handling functions if a gcontext was provided. */
@@ -190,6 +189,36 @@ return mcontext;
}
+/* A default convert context is set up to save having to initialize at run time
+when no context is supplied to the convert function. */
+
+const pcre2_convert_context PRIV(default_convert_context) = {
+ { default_malloc, default_free, NULL }, /* Default memory handling */
+#ifdef _WIN32
+ CHAR_BACKSLASH, /* Default path separator */
+ CHAR_GRAVE_ACCENT /* Default escape character */
+#else /* Not Windows */
+ CHAR_SLASH, /* Default path separator */
+ CHAR_BACKSLASH /* Default escape character */
+#endif
+ };
+
+/* The create function copies the default into the new memory, but must
+override the default memory handling functions if a gcontext was provided. */
+
+PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
+pcre2_convert_context_create(pcre2_general_context *gcontext)
+{
+pcre2_convert_context *ccontext = PRIV(memctl_malloc)(
+ sizeof(pcre2_real_convert_context), (pcre2_memctl *)gcontext);
+if (ccontext == NULL) return NULL;
+*ccontext = PRIV(default_convert_context);
+if (gcontext != NULL)
+ *((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
+return ccontext;
+}
+
+
/*************************************************
* Context copy functions *
*************************************************/
@@ -231,11 +260,22 @@ return new;
+PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
+pcre2_convert_context_copy(pcre2_convert_context *ccontext)
+{
+pcre2_convert_context *new =
+ ccontext->memctl.malloc(sizeof(pcre2_real_convert_context),
+ ccontext->memctl.memory_data);
+if (new == NULL) return NULL;
+memcpy(new, ccontext, sizeof(pcre2_real_convert_context));
+return new;
+}
+
+
/*************************************************
* Context free functions *
*************************************************/
-
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_general_context_free(pcre2_general_context *gcontext)
{
@@ -260,6 +300,12 @@ if (mcontext != NULL)
}
+PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
+pcre2_convert_context_free(pcre2_convert_context *ccontext)
+{
+if (ccontext != NULL)
+ ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
+}
/*************************************************
@@ -271,7 +317,7 @@ data is given. Only some of the functions are able to test the validity of the
data. */
-/* ------------ Compile contexts ------------ */
+/* ------------ Compile context ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_character_tables(pcre2_compile_context *ccontext,
@@ -313,6 +359,7 @@ switch(newline)
case PCRE2_NEWLINE_CRLF:
case PCRE2_NEWLINE_ANY:
case PCRE2_NEWLINE_ANYCRLF:
+ case PCRE2_NEWLINE_NUL:
ccontext->newline_convention = newline;
return 0;
@@ -329,6 +376,13 @@ return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, uint32_t options)
+{
+ccontext->extra_options = options;
+return 0;
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
int (*guard)(uint32_t, void *), void *user_data)
{
@@ -338,7 +392,7 @@ return 0;
}
-/* ------------ Match contexts ------------ */
+/* ------------ Match context ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_callout(pcre2_match_context *mcontext,
@@ -350,6 +404,13 @@ return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
+{
+mcontext->heap_limit = limit;
+return 0;
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
{
mcontext->match_limit = limit;
@@ -357,17 +418,26 @@ return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_depth_limit(pcre2_match_context *mcontext, uint32_t limit)
+{
+mcontext->depth_limit = limit;
+return 0;
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit)
{
mcontext->offset_limit = limit;
return 0;
}
+/* This function became obsolete at release 10.30. It is kept as a synonym for
+backwards compatibility. */
+
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
{
-mcontext->recursion_limit = limit;
-return 0;
+return pcre2_set_depth_limit(mcontext, limit);
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@@ -375,17 +445,32 @@ pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
void *mydata)
{
-#ifdef HEAP_MATCH_RECURSE
-mcontext->stack_memctl.malloc = mymalloc;
-mcontext->stack_memctl.free = myfree;
-mcontext->stack_memctl.memory_data = mydata;
-#else
(void)mcontext;
(void)mymalloc;
(void)myfree;
(void)mydata;
-#endif
+return 0;
+}
+
+/* ------------ Convert context ------------ */
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_glob_separator(pcre2_convert_context *ccontext, uint32_t separator)
+{
+if (separator != CHAR_SLASH && separator != CHAR_BACKSLASH &&
+ separator != CHAR_DOT) return PCRE2_ERROR_BADDATA;
+ccontext->glob_separator = separator;
+return 0;
+}
+
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_glob_escape(pcre2_convert_context *ccontext, uint32_t escape)
+{
+if (escape > 255 || (escape != 0 && !ispunct(escape)))
+ return PCRE2_ERROR_BADDATA;
+ccontext->glob_escape = escape;
return 0;
}
/* End of pcre2_context.c */
+
diff --git a/src/3rdparty/pcre2/src/pcre2_dfa_match.c b/src/3rdparty/pcre2/src/pcre2_dfa_match.c
index 12b31b1b36..5ae13944c7 100644
--- a/src/3rdparty/pcre2/src/pcre2_dfa_match.c
+++ b/src/3rdparty/pcre2/src/pcre2_dfa_match.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -83,7 +83,7 @@ in others, so I abandoned this code. */
#include "pcre2_internal.h"
#define PUBLIC_DFA_MATCH_OPTIONS \
- (PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
+ (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
PCRE2_PARTIAL_SOFT|PCRE2_DFA_SHORTEST|PCRE2_DFA_RESTART)
@@ -172,7 +172,7 @@ static const uint8_t coptable[] = {
0, /* Assert not */
0, /* Assert behind */
0, /* Assert behind not */
- 0, 0, /* ONCE, ONCE_NC */
+ 0, /* ONCE */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, DNCREF */
@@ -245,7 +245,7 @@ static const uint8_t poptable[] = {
0, /* Assert not */
0, /* Assert behind */
0, /* Assert behind not */
- 0, 0, /* ONCE, ONCE_NC */
+ 0, /* ONCE */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, DNCREF */
@@ -371,18 +371,14 @@ internal_dfa_match(
uint32_t offsetcount,
int *workspace,
int wscount,
- int rlevel)
+ uint32_t rlevel)
{
stateblock *active_states, *new_states, *temp_states;
stateblock *next_active_state, *next_new_state;
-
const uint8_t *ctypes, *lcc, *fcc;
PCRE2_SPTR ptr;
PCRE2_SPTR end_code;
-PCRE2_SPTR first_op;
-
dfa_recursion_info new_recursive;
-
int active_count, new_count, match_count;
/* Some fields in the mb block are frequently referenced, so we load them into
@@ -400,7 +396,8 @@ BOOL utf = FALSE;
BOOL reset_could_continue = FALSE;
-rlevel++;
+if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
+if (rlevel++ > mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
offsetcount &= (uint32_t)(-2); /* Round down */
wscount -= 2;
@@ -417,21 +414,15 @@ active_states = (stateblock *)(workspace + 2);
next_new_state = new_states = active_states + wscount;
new_count = 0;
-first_op = this_start_code + 1 + LINK_SIZE +
- ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
- *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
- ? IMM2_SIZE:0);
-
/* The first thing in any (sub) pattern is a bracket of some sort. Push all
the alternative states onto the list, and find out where the end is. This
makes is possible to use this function recursively, when we want to stop at a
matching internal ket rather than at the end.
-If the first opcode in the first alternative is OP_REVERSE, we are dealing with
-a backward assertion. In that case, we have to find out the maximum amount to
-move back, and set up each alternative appropriately. */
+If we are dealing with a backward assertion we have to find out the maximum
+amount to move back, and set up each alternative appropriately. */
-if (*first_op == OP_REVERSE)
+if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
{
size_t max_back = 0;
size_t gone_back;
@@ -476,15 +467,17 @@ if (*first_op == OP_REVERSE)
if (current_subject < mb->start_used_ptr)
mb->start_used_ptr = current_subject;
- /* Now we can process the individual branches. */
+ /* Now we can process the individual branches. There will be an OP_REVERSE at
+ the start of each branch, except when the length of the branch is zero. */
end_code = this_start_code;
do
{
- size_t back = (size_t)GET(end_code, 2+LINK_SIZE);
+ uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + LINK_SIZE : 0;
+ size_t back = (revlen == 0)? 0 : (size_t)GET(end_code, 2+LINK_SIZE);
if (back <= gone_back)
{
- int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
+ int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen);
ADD_NEW_DATA(-bstate, 0, (int)(gone_back - back));
}
end_code += GET(end_code, 1);
@@ -697,7 +690,7 @@ for (;;)
case OP_TABLE_LENGTH +
((sizeof(coptable) == OP_TABLE_LENGTH) &&
(sizeof(poptable) == OP_TABLE_LENGTH)):
- break;
+ return 0;
/* ========================================================================== */
/* Reached a closing bracket. If not at the end of the pattern, carry
@@ -1386,8 +1379,46 @@ for (;;)
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
rgb = UCD_GRAPHBREAK(d);
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
+
+ /* Not breaking between Regional Indicators is allowed only if
+ there are an even number of preceding RIs. */
+
+ if (lgb == ucp_gbRegionalIndicator &&
+ rgb == ucp_gbRegionalIndicator)
+ {
+ int ricount = 0;
+ PCRE2_SPTR bptr = nptr - 1;
+#ifdef SUPPORT_UNICODE
+ if (utf) BACKCHAR(bptr);
+#endif
+ /* bptr is pointing to the left-hand character */
+
+ while (bptr > mb->start_subject)
+ {
+ bptr--;
+#ifdef SUPPORT_UNICODE
+ if (utf)
+ {
+ BACKCHAR(bptr);
+ GETCHAR(d, bptr);
+ }
+ else
+#endif
+ d = *bptr;
+ if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
+ ricount++;
+ }
+ if ((ricount & 1) != 0) break; /* Grapheme break required */
+ }
+
+ /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
+ any number of Extend before a following E_Modifier. */
+
+ if (rgb != ucp_gbExtend ||
+ (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+ lgb = rgb;
+
ncount++;
- lgb = rgb;
nptr += dlen;
}
count++;
@@ -1648,8 +1679,46 @@ for (;;)
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
rgb = UCD_GRAPHBREAK(d);
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
+
+ /* Not breaking between Regional Indicators is allowed only if
+ there are an even number of preceding RIs. */
+
+ if (lgb == ucp_gbRegionalIndicator &&
+ rgb == ucp_gbRegionalIndicator)
+ {
+ int ricount = 0;
+ PCRE2_SPTR bptr = nptr - 1;
+#ifdef SUPPORT_UNICODE
+ if (utf) BACKCHAR(bptr);
+#endif
+ /* bptr is pointing to the left-hand character */
+
+ while (bptr > mb->start_subject)
+ {
+ bptr--;
+#ifdef SUPPORT_UNICODE
+ if (utf)
+ {
+ BACKCHAR(bptr);
+ GETCHAR(d, bptr);
+ }
+ else
+#endif
+ d = *bptr;
+ if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
+ ricount++;
+ }
+ if ((ricount & 1) != 0) break; /* Grapheme break required */
+ }
+
+ /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
+ any number of Extend before a following E_Modifier. */
+
+ if (rgb != ucp_gbExtend ||
+ (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+ lgb = rgb;
+
ncount++;
- lgb = rgb;
nptr += dlen;
}
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
@@ -1919,8 +1988,46 @@ for (;;)
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
rgb = UCD_GRAPHBREAK(d);
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
+
+ /* Not breaking between Regional Indicators is allowed only if
+ there are an even number of preceding RIs. */
+
+ if (lgb == ucp_gbRegionalIndicator &&
+ rgb == ucp_gbRegionalIndicator)
+ {
+ int ricount = 0;
+ PCRE2_SPTR bptr = nptr - 1;
+#ifdef SUPPORT_UNICODE
+ if (utf) BACKCHAR(bptr);
+#endif
+ /* bptr is pointing to the left-hand character */
+
+ while (bptr > mb->start_subject)
+ {
+ bptr--;
+#ifdef SUPPORT_UNICODE
+ if (utf)
+ {
+ BACKCHAR(bptr);
+ GETCHAR(d, bptr);
+ }
+ else
+#endif
+ d = *bptr;
+ if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
+ ricount++;
+ }
+ if ((ricount & 1) != 0) break; /* Grapheme break required */
+ }
+
+ /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
+ any number of Extend before a following E_Modifier. */
+
+ if (rgb != ucp_gbExtend ||
+ (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+ lgb = rgb;
+
ncount++;
- lgb = rgb;
nptr += dlen;
}
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
@@ -2109,8 +2216,46 @@ for (;;)
if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
rgb = UCD_GRAPHBREAK(d);
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
+
+ /* Not breaking between Regional Indicators is allowed only if
+ there are an even number of preceding RIs. */
+
+ if (lgb == ucp_gbRegionalIndicator &&
+ rgb == ucp_gbRegionalIndicator)
+ {
+ int ricount = 0;
+ PCRE2_SPTR bptr = nptr - 1;
+#ifdef SUPPORT_UNICODE
+ if (utf) BACKCHAR(bptr);
+#endif
+ /* bptr is pointing to the left-hand character */
+
+ while (bptr > mb->start_subject)
+ {
+ bptr--;
+#ifdef SUPPORT_UNICODE
+ if (utf)
+ {
+ BACKCHAR(bptr);
+ GETCHAR(d, bptr);
+ }
+ else
+#endif
+ d = *bptr;
+ if (UCD_GRAPHBREAK(d) != ucp_gbRegionalIndicator) break;
+ ricount++;
+ }
+ if ((ricount & 1) != 0) break; /* Grapheme break required */
+ }
+
+ /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
+ any number of Extend before a following E_Modifier. */
+
+ if (rgb != ucp_gbExtend ||
+ (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+ lgb = rgb;
+
ncount++;
- lgb = rgb;
nptr += dlen;
}
if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
@@ -2136,6 +2281,7 @@ for (;;)
case 0x2029:
#endif /* Not EBCDIC */
if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
+ /* Fall through */
case CHAR_LF:
ADD_NEW(state_offset + 1, 0);
@@ -2539,11 +2685,13 @@ for (;;)
if (isinclass)
{
int max = (int)GET2(ecode, 1 + IMM2_SIZE);
- if (*ecode == OP_CRPOSRANGE)
+
+ if (*ecode == OP_CRPOSRANGE && count >= (int)GET2(ecode, 1))
{
active_count--; /* Remove non-match possibility */
next_active_state--;
}
+
if (++count >= max && max != 0) /* Max 0 => no limit */
{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
else
@@ -2591,7 +2739,7 @@ for (;;)
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
- if (rc == PCRE2_ERROR_DFA_UITEM) return rc;
+ if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
}
@@ -2710,7 +2858,7 @@ for (;;)
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
- if (rc == PCRE2_ERROR_DFA_UITEM) return rc;
+ if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
if ((rc >= 0) ==
(condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
@@ -2889,7 +3037,6 @@ for (;;)
/*-----------------------------------------------------------------*/
case OP_ONCE:
- case OP_ONCE_NC:
{
PCRE2_SIZE local_offsets[2];
int local_workspace[1000];
@@ -3069,7 +3216,7 @@ for (;;)
)
)
match_count = PCRE2_ERROR_PARTIAL;
- break; /* In effect, "return", but see the comment below */
+ break; /* Exit from loop along the subject string */
}
/* One or more states are active for the next character. */
@@ -3077,11 +3224,13 @@ for (;;)
ptr += clen; /* Advance to next subject character */
} /* Loop to move along the subject string */
-/* Control gets here from "break" a few lines above. We do it this way because
-if we use "return" above, we have compiler trouble. Some compilers warn if
-there's nothing here because they think the function doesn't return a value. On
-the other hand, if we put a dummy statement here, some more clever compilers
-complain that it can't be reached. Sigh. */
+/* Control gets here from "break" a few lines above. If we have a match and
+PCRE2_ENDANCHORED is set, the match fails. */
+
+if (match_count >= 0 &&
+ ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0 &&
+ ptr < end_subject)
+ match_count = PCRE2_ERROR_NOMATCH;
return match_count;
}
@@ -3115,7 +3264,7 @@ Returns: > 0 => number of match offset pairs placed in offsets
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
- pcre2_match_context *mcontext, int *workspace, size_t wscount)
+ pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount)
{
const pcre2_real_code *re = (const pcre2_real_code *)code;
@@ -3154,6 +3303,13 @@ if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
+/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
+time. */
+
+if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
+ ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
+ return PCRE2_ERROR_BADOPTION;
+
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. */
@@ -3216,6 +3372,8 @@ if (mcontext == NULL)
{
mb->callout = NULL;
mb->memctl = re->memctl;
+ mb->match_limit = PRIV(default_match_context).match_limit;
+ mb->match_limit_depth = PRIV(default_match_context).depth_limit;
}
else
{
@@ -3228,8 +3386,16 @@ else
mb->callout = mcontext->callout;
mb->callout_data = mcontext->callout_data;
mb->memctl = mcontext->memctl;
+ mb->match_limit = mcontext->match_limit;
+ mb->match_limit_depth = mcontext->depth_limit;
}
+if (mb->match_limit > re->limit_match)
+ mb->match_limit = re->limit_match;
+
+if (mb->match_limit_depth > re->limit_depth)
+ mb->match_limit_depth = re->limit_depth;
+
mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
re->name_count * re->name_entry_size;
mb->tables = re->tables;
@@ -3238,6 +3404,7 @@ mb->end_subject = end_subject;
mb->start_offset = start_offset;
mb->moptions = options;
mb->poptions = re->overall_options;
+mb->match_call_count = 0;
/* Process the \R and newline settings. */
@@ -3255,6 +3422,11 @@ switch(re->newline_convention)
mb->nl[0] = CHAR_NL;
break;
+ case PCRE2_NEWLINE_NUL:
+ mb->nllen = 1;
+ mb->nl[0] = CHAR_NUL;
+ break;
+
case PCRE2_NEWLINE_CRLF:
mb->nllen = 2;
mb->nl[0] = CHAR_CR;
@@ -3321,34 +3493,27 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
}
#endif /* SUPPORT_UNICODE */
-/* Set up the first code unit to match, if available. The first_codeunit value
-is never set for an anchored regular expression, but the anchoring may be
-forced at run time, so we have to test for anchoring. The first code unit may
-be unset for an unanchored pattern, of course. If there's no first code unit
-there may be a bitmap of possible first characters. */
+/* Set up the first code unit to match, if available. If there's no first code
+unit there may be a bitmap of possible first characters. */
-if (!anchored)
+if ((re->flags & PCRE2_FIRSTSET) != 0)
{
- if ((re->flags & PCRE2_FIRSTSET) != 0)
+ has_first_cu = TRUE;
+ first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
+ if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
{
- has_first_cu = TRUE;
- first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
- if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
- {
- first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
+ first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
- if (utf && first_cu > 127)
- first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
+ if (utf && first_cu > 127)
+ first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
#endif
- }
}
- else
- if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
- start_bits = re->start_bitmap;
}
+else
+ if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
+ start_bits = re->start_bitmap;
-/* For anchored or unanchored matches, there may be a "last known required
-character" set. */
+/* There may be a "last known required code unit" set. */
if ((re->flags & PCRE2_LASTSET) != 0)
{
@@ -3394,8 +3559,8 @@ for (;;)
/* If firstline is TRUE, the start of the match is constrained to the first
line of a multiline string. That is, the match must be before or at the
first newline. Implement this by temporarily adjusting end_subject so that
- we stop the optimization scans at a newline. If the match fails at the
- newline, later code breaks this loop. */
+ we stop the optimization scans for a first code unit at a newline. If the
+ match fails at the newline, later code breaks this loop. */
if (firstline)
{
@@ -3415,69 +3580,137 @@ for (;;)
end_subject = t;
}
- /* Advance to a unique first code unit if there is one. */
+ /* Anchored: check the first code unit if one is recorded. This may seem
+ pointless but it can help in detecting a no match case without scanning for
+ the required code unit. */
- if (has_first_cu)
+ if (anchored)
{
- PCRE2_UCHAR smc;
- if (first_cu != first_cu2)
- while (start_match < end_subject &&
- (smc = UCHAR21TEST(start_match)) != first_cu && smc != first_cu2)
- start_match++;
- else
- while (start_match < end_subject && UCHAR21TEST(start_match) != first_cu)
- start_match++;
+ if (has_first_cu || start_bits != NULL)
+ {
+ BOOL ok = start_match < end_subject;
+ if (ok)
+ {
+ PCRE2_UCHAR c = UCHAR21TEST(start_match);
+ ok = has_first_cu && (c == first_cu || c == first_cu2);
+ if (!ok && start_bits != NULL)
+ {
+#if PCRE2_CODE_UNIT_WIDTH != 8
+ if (c > 255) c = 255;
+#endif
+ ok = (start_bits[c/8] & (1 << (c&7))) != 0;
+ }
+ }
+ if (!ok) break;
+ }
}
- /* Or to just after a linebreak for a multiline match */
+ /* Not anchored. Advance to a unique first code unit if there is one. In
+ 8-bit mode, the use of memchr() gives a big speed up, even though we have
+ to call it twice in caseless mode, in order to find the earliest occurrence
+ of the character in either of its cases. */
- else if (startline)
+ else
{
- if (start_match > mb->start_subject + start_offset)
+ if (has_first_cu)
{
-#ifdef SUPPORT_UNICODE
- if (utf)
+ if (first_cu != first_cu2) /* Caseless */
{
- while (start_match < end_subject && !WAS_NEWLINE(start_match))
- {
+#if PCRE2_CODE_UNIT_WIDTH != 8
+ PCRE2_UCHAR smc;
+ while (start_match < end_subject &&
+ (smc = UCHAR21TEST(start_match)) != first_cu &&
+ smc != first_cu2)
start_match++;
- ACROSSCHAR(start_match < end_subject, *start_match,
- start_match++);
- }
+#else /* 8-bit code units */
+ PCRE2_SPTR pp1 =
+ memchr(start_match, first_cu, end_subject-start_match);
+ PCRE2_SPTR pp2 =
+ memchr(start_match, first_cu2, end_subject-start_match);
+ if (pp1 == NULL)
+ start_match = (pp2 == NULL)? end_subject : pp2;
+ else
+ start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
+#endif
}
+
+ /* The caseful case */
+
else
+ {
+#if PCRE2_CODE_UNIT_WIDTH != 8
+ while (start_match < end_subject && UCHAR21TEST(start_match) !=
+ first_cu)
+ start_match++;
+#else
+ start_match = memchr(start_match, first_cu, end_subject - start_match);
+ if (start_match == NULL) start_match = end_subject;
#endif
- while (start_match < end_subject && !WAS_NEWLINE(start_match))
- start_match++;
+ }
- /* If we have just passed a CR and the newline option is ANY or
- ANYCRLF, and we are now at a LF, advance the match position by one more
- code unit. */
+ /* If we can't find the required code unit, break the bumpalong loop,
+ to force a match failure, except when doing partial matching, when we
+ let the next cycle run at the end of the subject. To see why, consider
+ the pattern /(?<=abc)def/, which partially matches "abc", even though
+ the string does not contain the starting character "d". */
- if (start_match[-1] == CHAR_CR &&
- (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
- start_match < end_subject &&
- UCHAR21TEST(start_match) == CHAR_NL)
- start_match++;
+ if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 &&
+ start_match >= end_subject)
+ break;
}
- }
- /* Or to a non-unique first code unit if any have been identified. The
- bitmap contains only 256 bits. When code units are 16 or 32 bits wide, all
- code units greater than 254 set the 255 bit. */
+ /* If there's no first code unit, advance to just after a linebreak for a
+ multiline match if required. */
- else if (start_bits != NULL)
- {
- while (start_match < end_subject)
+ else if (startline)
+ {
+ if (start_match > mb->start_subject + start_offset)
+ {
+#ifdef SUPPORT_UNICODE
+ if (utf)
+ {
+ while (start_match < end_subject && !WAS_NEWLINE(start_match))
+ {
+ start_match++;
+ ACROSSCHAR(start_match < end_subject, *start_match,
+ start_match++);
+ }
+ }
+ else
+#endif
+ while (start_match < end_subject && !WAS_NEWLINE(start_match))
+ start_match++;
+
+ /* If we have just passed a CR and the newline option is ANY or
+ ANYCRLF, and we are now at a LF, advance the match position by one
+ more code unit. */
+
+ if (start_match[-1] == CHAR_CR &&
+ (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
+ start_match < end_subject &&
+ UCHAR21TEST(start_match) == CHAR_NL)
+ start_match++;
+ }
+ }
+
+ /* If there's no first code unit or a requirement for a multiline line
+ start, advance to a non-unique first code unit if any have been
+ identified. The bitmap contains only 256 bits. When code units are 16 or
+ 32 bits wide, all code units greater than 254 set the 255 bit. */
+
+ else if (start_bits != NULL)
{
- register uint32_t c = UCHAR21TEST(start_match);
+ while (start_match < end_subject)
+ {
+ uint32_t c = UCHAR21TEST(start_match);
#if PCRE2_CODE_UNIT_WIDTH != 8
- if (c > 255) c = 255;
+ if (c > 255) c = 255;
#endif
- if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
- start_match++;
+ if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
+ start_match++;
+ }
}
- }
+ } /* End of first code unit handling */
/* Restore fudged end_subject */
@@ -3510,7 +3743,7 @@ for (;;)
if (has_req_cu && end_subject - start_match < REQ_CU_MAX)
{
- register PCRE2_SPTR p = start_match + (has_first_cu? 1:0);
+ PCRE2_SPTR p = start_match + (has_first_cu? 1:0);
/* We don't need to repeat the search if we haven't yet reached the
place we found it at last time. */
@@ -3521,7 +3754,7 @@ for (;;)
{
while (p < end_subject)
{
- register uint32_t pp = UCHAR21INCTEST(p);
+ uint32_t pp = UCHAR21INCTEST(p);
if (pp == req_cu || pp == req_cu2) { p--; break; }
}
}
diff --git a/src/3rdparty/pcre2/src/pcre2_error.c b/src/3rdparty/pcre2/src/pcre2_error.c
index 77fd5f4124..d98cae9963 100644
--- a/src/3rdparty/pcre2/src/pcre2_error.c
+++ b/src/3rdparty/pcre2/src/pcre2_error.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -91,13 +91,13 @@ static const unsigned char compile_error_texts[] =
"failed to allocate heap memory\0"
"unmatched closing parenthesis\0"
"internal error: code overflow\0"
- "letter or underscore expected after (?< or (?'\0"
+ "missing closing parenthesis for condition\0"
/* 25 */
"lookbehind assertion is not fixed length\0"
- "malformed number or name after (?(\0"
+ "a relative value of zero is not allowed\0"
"conditional group contains more than two branches\0"
"assertion expected after (?( or (?(?C)\0"
- "(?R or (?[+-]digits must be followed by )\0"
+ "digit expected after (?+ or (?-\0"
/* 30 */
"unknown POSIX class name\0"
"internal error in pcre2_study(): should not occur\0"
@@ -105,7 +105,7 @@ static const unsigned char compile_error_texts[] =
"parentheses are too deeply nested (stack check)\0"
"character code point value in \\x{} or \\o{} is too large\0"
/* 35 */
- "invalid condition (?(0)\0"
+ "lookbehind is too complicated\0"
"\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0"
"PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
"number after (?C is greater than 255\0"
@@ -132,13 +132,13 @@ static const unsigned char compile_error_texts[] =
"missing opening brace after \\o\0"
"internal error: unknown newline setting\0"
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
- "a numbered reference must not be zero\0"
+ "(?R (recursive pattern call) must be followed by a closing parenthesis\0"
"an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
/* 60 */
"(*VERB) not recognized or malformed\0"
- "number is too big\0"
+ "group number is too big\0"
"subpattern name expected\0"
- "digit expected after (?+\0"
+ "internal error: parsed pattern overflow\0"
"non-octal character in \\o{} (closing brace missing?)\0"
/* 65 */
"different names for subpatterns of the same number are not allowed\0"
@@ -151,9 +151,9 @@ static const unsigned char compile_error_texts[] =
#endif
"\\k is not followed by a braced, angle-bracketed, or quoted name\0"
/* 70 */
- "internal error: unknown opcode in find_fixedlength()\0"
+ "internal error: unknown meta code in check_lookbehinds()\0"
"\\N is not supported in a class\0"
- "SPARE ERROR\0"
+ "callout string is too long\0"
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
"using UTF is disabled by the application\0"
/* 75 */
@@ -161,7 +161,7 @@ static const unsigned char compile_error_texts[] =
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character code point value in \\u.... sequence is too large\0"
"digits missing in \\x{} or \\o{}\0"
- "syntax error in (?(VERSION condition\0"
+ "syntax error or number too big in (?(VERSION condition\0"
/* 80 */
"internal error: unknown opcode in auto_possessify()\0"
"missing terminating delimiter for callout with string argument\0"
@@ -173,6 +173,11 @@ static const unsigned char compile_error_texts[] =
"regular expression is too complicated\0"
"lookbehind assertion is too long\0"
"pattern string is longer than the limit set by the application\0"
+ "internal error: unknown code in parsed pattern\0"
+ /* 90 */
+ "internal error: bad code value in parsed_skip()\0"
+ "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
+ "invalid option bits with PCRE2_LITERAL\0"
;
/* Match-time and UTF error texts are in the same format. */
@@ -241,7 +246,7 @@ static const unsigned char match_error_texts[] =
"non-unique substring name\0"
"NULL argument passed\0"
"nested recursion at the same subject position\0"
- "recursion limit exceeded\0"
+ "matching depth limit exceeded\0"
"requested value is not available\0"
/* 55 */
"requested value is not set\0"
@@ -253,6 +258,8 @@ static const unsigned char match_error_texts[] =
"match with end before start is not supported\0"
"too many replacements (more than INT_MAX)\0"
"bad serialized data\0"
+ "heap limit exceeded\0"
+ "invalid syntax\0"
;
@@ -268,17 +275,17 @@ distinct.
Arguments:
enumber error number
buffer where to put the message (zero terminated)
- size size of the buffer
+ size size of the buffer in code units
Returns: length of message if all is well
negative on error
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
-pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, size_t size)
+pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size)
{
const unsigned char *message;
-size_t i;
+PCRE2_SIZE i;
int n;
if (size == 0) return PCRE2_ERROR_NOMEMORY;
@@ -301,8 +308,8 @@ else /* Invalid error number */
for (; n > 0; n--)
{
- while (*message++ != CHAR_NULL) {};
- if (*message == CHAR_NULL) return PCRE2_ERROR_BADDATA;
+ while (*message++ != CHAR_NUL) {};
+ if (*message == CHAR_NUL) return PCRE2_ERROR_BADDATA;
}
for (i = 0; *message != 0; i++)
diff --git a/src/3rdparty/pcre2/src/pcre2_find_bracket.c b/src/3rdparty/pcre2/src/pcre2_find_bracket.c
index 803e719765..357385a11c 100644
--- a/src/3rdparty/pcre2/src/pcre2_find_bracket.c
+++ b/src/3rdparty/pcre2/src/pcre2_find_bracket.c
@@ -71,7 +71,7 @@ PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
{
for (;;)
{
- register PCRE2_UCHAR c = *code;
+ PCRE2_UCHAR c = *code;
if (c == OP_END) return NULL;
diff --git a/src/3rdparty/pcre2/src/pcre2_internal.h b/src/3rdparty/pcre2/src/pcre2_internal.h
index 56908708aa..9ccce25d47 100644
--- a/src/3rdparty/pcre2/src/pcre2_internal.h
+++ b/src/3rdparty/pcre2/src/pcre2_internal.h
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -142,20 +142,6 @@ pcre2_match() because of the way it backtracks. */
#define PCRE2_SPTR CUSTOM_SUBJECT_PTR
#endif
-/* When compiling with the MSVC compiler, it is sometimes necessary to include
-a "calling convention" before exported function names. (This is secondhand
-information; I know nothing about MSVC myself). For example, something like
-
- void __cdecl function(....)
-
-might be needed. In order so make this easy, all the exported functions have
-PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
-set, we ensure here that it has no effect. */
-
-#ifndef PCRE2_CALL_CONVENTION
-#define PCRE2_CALL_CONVENTION
-#endif
-
/* When checking for integer overflow in pcre2_compile(), we need to handle
large integers. If a 64-bit integer type is available, we can use that.
Otherwise we have to cast to double, which of course requires floating point
@@ -254,6 +240,16 @@ not rely on this. */
#define COMPILE_ERROR_BASE 100
+/* The initial frames vector for remembering backtracking points in
+pcre2_match() is allocated on the system stack, of this size (bytes). The size
+must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
+multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
+on the number of capturing parentheses) so 20K handles quite a few frames. A
+larger vector on the heap is obtained for patterns that need more frames. The
+maximum size of this can be limited. */
+
+#define START_FRAMES_SIZE 20480
+
/* Define the default BSR convention. */
#ifdef BSR_ANYCRLF
@@ -561,9 +557,14 @@ enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
/* The maximum remaining length of subject we are prepared to search for a
-req_unit match. */
+req_unit match. In 8-bit mode, memchr() is used and is much faster than the
+search loop that has to be used in 16-bit and 32-bit modes. */
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#define REQ_CU_MAX 2000
+#else
#define REQ_CU_MAX 1000
+#endif
/* Offsets for the bitmap tables in the cbits set of tables. Each table
contains a set of bits for a class map. Some classes are built by combining
@@ -682,7 +683,7 @@ a positive value. */
/* The remaining definitions work in both environments. */
-#define CHAR_NULL '\0'
+#define CHAR_NUL '\0'
#define CHAR_HT '\t'
#define CHAR_VT '\v'
#define CHAR_FF '\f'
@@ -923,6 +924,7 @@ a positive value. */
#define STRING_CRLF_RIGHTPAR "CRLF)"
#define STRING_ANY_RIGHTPAR "ANY)"
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
+#define STRING_NUL_RIGHTPAR "NUL)"
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
#define STRING_UTF8_RIGHTPAR "UTF8)"
@@ -936,7 +938,9 @@ a positive value. */
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
#define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)"
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
+#define STRING_LIMIT_HEAP_EQ "LIMIT_HEAP="
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
+#define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH="
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
#define STRING_MARK "MARK"
@@ -958,7 +962,7 @@ only. */
#define CHAR_ESC '\033'
#define CHAR_DEL '\177'
-#define CHAR_NULL '\0'
+#define CHAR_NUL '\0'
#define CHAR_SPACE '\040'
#define CHAR_EXCLAMATION_MARK '\041'
#define CHAR_QUOTATION_MARK '\042'
@@ -1196,6 +1200,7 @@ only. */
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
+#define STRING_NUL_RIGHTPAR STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
@@ -1209,7 +1214,9 @@ only. */
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
#define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
+#define STRING_LIMIT_HEAP_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
+#define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
#define STRING_MARK STR_M STR_A STR_R STR_K
@@ -1298,23 +1305,16 @@ mode rather than an escape sequence. It is also used for [^] in JavaScript
compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves
like \N.
-The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
-when PCRE2_UCP is set and replacement of \d etc by \p sequences is required.
-They must be contiguous, and remain in order so that the replacements can be
-looked up from a table.
-
Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in
-check_escape(). There are two tests in the code for an escape
-greater than ESC_b and less than ESC_Z to detect the types that may be
-repeated. These are the types that consume characters. If any new escapes are
-put in between that don't consume a character, that code will have to change.
-*/
+check_escape(). There are tests in the code for an escape greater than ESC_b
+and less than ESC_Z to detect the types that may be repeated. These are the
+types that consume characters. If any new escapes are put in between that don't
+consume a character, that code will have to change. */
enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
- ESC_E, ESC_Q, ESC_g, ESC_k,
- ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu };
+ ESC_E, ESC_Q, ESC_g, ESC_k };
/********************** Opcode definitions ******************/
@@ -1380,7 +1380,8 @@ enum {
OP_CIRC, /* 27 Start of line - not multiline */
OP_CIRCM, /* 28 Start of line - multiline */
- /* Single characters; caseful must precede the caseless ones */
+ /* Single characters; caseful must precede the caseless ones, and these
+ must remain in this order, and adjacent. */
OP_CHAR, /* 29 Match one character, casefully */
OP_CHARI, /* 30 Match one character, caselessly */
@@ -1530,68 +1531,67 @@ enum {
OP_ASSERTBACK, /* 128 Positive lookbehind */
OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */
- /* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately
- after the assertions, with ONCE first, as there's a test for >= ONCE for a
- subpattern that isn't an assertion. The POS versions must immediately follow
- the non-POS versions in each case. */
+ /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately after the
+ assertions, with ONCE first, as there's a test for >= ONCE for a subpattern
+ that isn't an assertion. The POS versions must immediately follow the non-POS
+ versions in each case. */
OP_ONCE, /* 130 Atomic group, contains captures */
- OP_ONCE_NC, /* 131 Atomic group containing no captures */
- OP_BRA, /* 132 Start of non-capturing bracket */
- OP_BRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
- OP_CBRA, /* 134 Start of capturing bracket */
- OP_CBRAPOS, /* 135 Ditto, with unlimited, possessive repeat */
- OP_COND, /* 136 Conditional group */
+ OP_BRA, /* 131 Start of non-capturing bracket */
+ OP_BRAPOS, /* 132 Ditto, with unlimited, possessive repeat */
+ OP_CBRA, /* 133 Start of capturing bracket */
+ OP_CBRAPOS, /* 134 Ditto, with unlimited, possessive repeat */
+ OP_COND, /* 135 Conditional group */
/* These five must follow the previous five, in the same order. There's a
check for >= SBRA to distinguish the two sets. */
- OP_SBRA, /* 137 Start of non-capturing bracket, check empty */
- OP_SBRAPOS, /* 138 Ditto, with unlimited, possessive repeat */
- OP_SCBRA, /* 139 Start of capturing bracket, check empty */
- OP_SCBRAPOS, /* 140 Ditto, with unlimited, possessive repeat */
- OP_SCOND, /* 141 Conditional group, check empty */
+ OP_SBRA, /* 136 Start of non-capturing bracket, check empty */
+ OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */
+ OP_SCBRA, /* 138 Start of capturing bracket, check empty */
+ OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */
+ OP_SCOND, /* 140 Conditional group, check empty */
/* The next two pairs must (respectively) be kept together. */
- OP_CREF, /* 142 Used to hold a capture number as condition */
- OP_DNCREF, /* 143 Used to point to duplicate names as a condition */
- OP_RREF, /* 144 Used to hold a recursion number as condition */
- OP_DNRREF, /* 145 Used to point to duplicate names as a condition */
- OP_FALSE, /* 146 Always false (used by DEFINE and VERSION) */
- OP_TRUE, /* 147 Always true (used by VERSION) */
+ OP_CREF, /* 141 Used to hold a capture number as condition */
+ OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
+ OP_RREF, /* 143 Used to hold a recursion number as condition */
+ OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
+ OP_FALSE, /* 145 Always false (used by DEFINE and VERSION) */
+ OP_TRUE, /* 146 Always true (used by VERSION) */
- OP_BRAZERO, /* 148 These two must remain together and in this */
- OP_BRAMINZERO, /* 149 order. */
- OP_BRAPOSZERO, /* 150 */
+ OP_BRAZERO, /* 147 These two must remain together and in this */
+ OP_BRAMINZERO, /* 148 order. */
+ OP_BRAPOSZERO, /* 149 */
/* These are backtracking control verbs */
- OP_MARK, /* 151 always has an argument */
- OP_PRUNE, /* 152 */
- OP_PRUNE_ARG, /* 153 same, but with argument */
- OP_SKIP, /* 154 */
- OP_SKIP_ARG, /* 155 same, but with argument */
- OP_THEN, /* 156 */
- OP_THEN_ARG, /* 157 same, but with argument */
- OP_COMMIT, /* 158 */
+ OP_MARK, /* 150 always has an argument */
+ OP_PRUNE, /* 151 */
+ OP_PRUNE_ARG, /* 152 same, but with argument */
+ OP_SKIP, /* 153 */
+ OP_SKIP_ARG, /* 154 same, but with argument */
+ OP_THEN, /* 155 */
+ OP_THEN_ARG, /* 156 same, but with argument */
+ OP_COMMIT, /* 157 */
/* These are forced failure and success verbs */
- OP_FAIL, /* 159 */
- OP_ACCEPT, /* 160 */
- OP_ASSERT_ACCEPT, /* 161 Used inside assertions */
- OP_CLOSE, /* 162 Used before OP_ACCEPT to close open captures */
+ OP_FAIL, /* 158 */
+ OP_ACCEPT, /* 159 */
+ OP_ASSERT_ACCEPT, /* 160 Used inside assertions */
+ OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
- OP_SKIPZERO, /* 163 */
+ OP_SKIPZERO, /* 162 */
/* This is used to identify a DEFINE group during compilation so that it can
be checked for having only one branch. It is changed to OP_FALSE before
compilation finishes. */
- OP_DEFINE, /* 164 */
+ OP_DEFINE, /* 163 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
@@ -1638,7 +1638,7 @@ some cases doesn't actually use these names at all). */
"Recurse", "Callout", "CalloutStr", \
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
- "Once", "Once_NC", \
+ "Once", \
"Bra", "BraPos", "CBra", "CBraPos", \
"Cond", \
"SBra", "SBraPos", "SCBra", "SCBraPos", \
@@ -1722,7 +1722,6 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1+LINK_SIZE, /* Assert behind */ \
1+LINK_SIZE, /* Assert behind not */ \
1+LINK_SIZE, /* ONCE */ \
- 1+LINK_SIZE, /* ONCE_NC */ \
1+LINK_SIZE, /* BRA */ \
1+LINK_SIZE, /* BRAPOS */ \
1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \
@@ -1794,10 +1793,17 @@ typedef struct {
/* UCD access macros */
#define UCD_BLOCK_SIZE 128
-#define GET_UCD(ch) (PRIV(ucd_records) + \
+#define REAL_GET_UCD(ch) (PRIV(ucd_records) + \
PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
+#if PCRE2_CODE_UNIT_WIDTH == 32
+#define GET_UCD(ch) ((ch > MAX_UTF_CODE_POINT)? \
+ PRIV(dummy_ucd_record) : REAL_GET_UCD(ch))
+#else
+#define GET_UCD(ch) REAL_GET_UCD(ch)
+#endif
+
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
@@ -1852,8 +1858,12 @@ extern const uint8_t PRIV(utf8_table4)[];
#define _pcre2_callout_end_delims PCRE2_SUFFIX(_pcre2_callout_end_delims_)
#define _pcre2_callout_start_delims PCRE2_SUFFIX(_pcre2_callout_start_delims_)
#define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_)
+#define _pcre2_default_convert_context PCRE2_SUFFIX(_pcre2_default_convert_context_)
#define _pcre2_default_match_context PCRE2_SUFFIX(_pcre2_default_match_context_)
#define _pcre2_default_tables PCRE2_SUFFIX(_pcre2_default_tables_)
+#if PCRE2_CODE_UNIT_WIDTH == 32
+#define _pcre2_dummy_ucd_record PCRE2_SUFFIX(_pcre2_dummy_ucd_record_)
+#endif
#define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_)
#define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_)
#define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
@@ -1872,12 +1882,16 @@ extern const uint8_t PRIV(OP_lengths)[];
extern const uint32_t PRIV(callout_end_delims)[];
extern const uint32_t PRIV(callout_start_delims)[];
extern const pcre2_compile_context PRIV(default_compile_context);
+extern const pcre2_convert_context PRIV(default_convert_context);
extern const pcre2_match_context PRIV(default_match_context);
extern const uint8_t PRIV(default_tables)[];
extern const uint32_t PRIV(hspace_list)[];
extern const uint32_t PRIV(vspace_list)[];
extern const uint32_t PRIV(ucd_caseless_sets)[];
extern const ucd_record PRIV(ucd_records)[];
+#if PCRE2_CODE_UNIT_WIDTH == 32
+extern const ucd_record PRIV(dummy_ucd_record)[];
+#endif
extern const uint8_t PRIV(ucd_stage1)[];
extern const uint16_t PRIV(ucd_stage2)[];
extern const uint32_t PRIV(ucp_gbtable)[];
diff --git a/src/3rdparty/pcre2/src/pcre2_intmodedep.h b/src/3rdparty/pcre2/src/pcre2_intmodedep.h
index 596d62cfdc..387f65eb08 100644
--- a/src/3rdparty/pcre2/src/pcre2_intmodedep.h
+++ b/src/3rdparty/pcre2/src/pcre2_intmodedep.h
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -54,6 +54,7 @@ just to undefine them all. */
#undef ACROSSCHAR
#undef BACKCHAR
#undef BYTES2CU
+#undef CHMAX_255
#undef CU2BYTES
#undef FORWARDCHAR
#undef FORWARDCHARTEST
@@ -140,7 +141,7 @@ values of 3 or 4 are also supported. */
#undef LINK_SIZE
#define LINK_SIZE 1
#define PUT(a,n,d) \
- (a[n] = (d))
+ (a[n] = (PCRE2_UCHAR)(d))
#define GET(a,n) \
(a[n])
#define MAX_PATTERN_SIZE (1 << 16)
@@ -200,21 +201,26 @@ arithmetic results in a signed value. Hence the cast. */
#endif
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
-whether its argument is less than 256. The maximum length of a MARK name must
-fit in one code unit; currently it is set to 255 or 65535. The TABLE_GET macro
-is used to access elements of tables containing exactly 256 items. When code
-points can be greater than 255, a check is needed before accessing these
-tables. */
+whether its argument, which is assumed to be one code unit, is less than 256.
+The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK
+name must fit in one code unit; currently it is set to 255 or 65535. The
+TABLE_GET macro is used to access elements of tables containing exactly 256
+items. When code points can be greater than 255, a check is needed before
+accessing these tables. */
#if PCRE2_CODE_UNIT_WIDTH == 8
#define MAX_255(c) TRUE
#define MAX_MARK ((1u << 8) - 1)
#ifdef SUPPORT_UNICODE
#define SUPPORT_WIDE_CHARS
+#define CHMAX_255(c) ((c) <= 255u)
+#else
+#define CHMAX_255(c) TRUE
#endif /* SUPPORT_UNICODE */
#define TABLE_GET(c, table, default) ((table)[c])
#else /* Code units are 16 or 32 bits */
+#define CHMAX_255(c) ((c) <= 255u)
#define MAX_255(c) ((c) <= 255u)
#define MAX_MARK ((1u << 16) - 1)
#define SUPPORT_WIDE_CHARS
@@ -566,15 +572,13 @@ typedef struct pcre2_real_compile_context {
uint16_t bsr_convention;
uint16_t newline_convention;
uint32_t parens_nest_limit;
+ uint32_t extra_options;
} pcre2_real_compile_context;
/* The real match context structure. */
typedef struct pcre2_real_match_context {
pcre2_memctl memctl;
-#ifdef HEAP_MATCH_RECURSE
- pcre2_memctl stack_memctl;
-#endif
#ifdef SUPPORT_JIT
pcre2_jit_callback jit_callback;
void *jit_callback_data;
@@ -582,10 +586,19 @@ typedef struct pcre2_real_match_context {
int (*callout)(pcre2_callout_block *, void *);
void *callout_data;
PCRE2_SIZE offset_limit;
+ uint32_t heap_limit;
uint32_t match_limit;
- uint32_t recursion_limit;
+ uint32_t depth_limit;
} pcre2_real_match_context;
+/* The real convert context structure. */
+
+typedef struct pcre2_real_convert_context {
+ pcre2_memctl memctl;
+ uint32_t glob_separator;
+ uint32_t glob_escape;
+} pcre2_real_convert_context;
+
/* The real compiled code structure. The type for the blocksize field is
defined specially because it is required in pcre2_serialize_decode() when
copying the size from possibly unaligned memory into a variable of the same
@@ -611,8 +624,9 @@ typedef struct pcre2_real_code {
uint32_t compile_options; /* Options passed to pcre2_compile() */
uint32_t overall_options; /* Options after processing the pattern */
uint32_t flags; /* Various state flags */
+ uint32_t limit_heap; /* Limit set in the pattern */
uint32_t limit_match; /* Limit set in the pattern */
- uint32_t limit_recursion; /* Limit set in the pattern */
+ uint32_t limit_depth; /* Limit set in the pattern */
uint32_t first_codeunit; /* Starting code unit */
uint32_t last_codeunit; /* This codeunit must be seen */
uint16_t bsr_convention; /* What \R matches */
@@ -625,7 +639,11 @@ typedef struct pcre2_real_code {
uint16_t name_count; /* Number of name entries in the table */
} pcre2_real_code;
-/* The real match data structure. */
+/* The real match data structure. Define ovector large so that array bound
+checkers don't grumble. Memory for this structure is obtained by calling
+pcre2_match_data_create(), which sets the size as the offset of ovector plus
+pairs of elements for each capturing group. (See also the heapframe structure
+below.) */
typedef struct pcre2_real_match_data {
pcre2_memctl memctl;
@@ -638,7 +656,7 @@ typedef struct pcre2_real_match_data {
uint16_t matchedby; /* Type of match (normal, JIT, DFA) */
uint16_t oveccount; /* Number of pairs */
int rc; /* The return code from the match */
- PCRE2_SIZE ovector[1]; /* The first field */
+ PCRE2_SIZE ovector[10000];/* The first field */
} pcre2_real_match_data;
@@ -648,18 +666,24 @@ typedef struct pcre2_real_match_data {
#ifndef PCRE2_PCRE2TEST
-/* Structure for checking for mutual recursion when scanning compiled code. */
+/* Structures for checking for mutual recursion when scanning compiled or
+parsed code. */
typedef struct recurse_check {
struct recurse_check *prev;
PCRE2_SPTR group;
} recurse_check;
+typedef struct parsed_recurse_check {
+ struct parsed_recurse_check *prev;
+ uint32_t *groupptr;
+} parsed_recurse_check;
+
/* Structure for building a cache when filling in recursion offsets. */
typedef struct recurse_cache {
PCRE2_SPTR group;
- int recno;
+ int groupnumber;
} recurse_cache;
/* Structure for maintaining a chain of pointers to the currently incomplete
@@ -693,9 +717,10 @@ typedef struct compile_block {
PCRE2_SPTR start_code; /* The start of the compiled code */
PCRE2_SPTR start_pattern; /* The start of the pattern */
PCRE2_SPTR end_pattern; /* The end of the pattern */
- PCRE2_SPTR nestptr[2]; /* Pointer(s) saved for string substitution */
PCRE2_UCHAR *name_table; /* The name/number table */
- size_t workspace_size; /* Size of workspace */
+ PCRE2_SIZE workspace_size; /* Size of workspace */
+ PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */
+ PCRE2_SIZE erroroffset; /* Offset of error in pattern */
uint16_t names_found; /* Number of entries so far */
uint16_t name_entry_size; /* Size of each entry */
open_capitem *open_caps; /* Chain of open capture items */
@@ -703,13 +728,17 @@ typedef struct compile_block {
uint32_t named_group_list_size; /* Number of entries in the list */
uint32_t external_options; /* External (initial) options */
uint32_t external_flags; /* External flag bits to be set */
- uint32_t bracount; /* Count of capturing parens as we compile */
- uint32_t final_bracount; /* Saved value after first pass */
+ uint32_t bracount; /* Count of capturing parentheses */
+ uint32_t lastcapture; /* Last capture encountered */
+ uint32_t *parsed_pattern; /* Parsed pattern buffer */
+ uint32_t *parsed_pattern_end; /* Parsed pattern should not get here */
uint32_t *groupinfo; /* Group info vector */
uint32_t top_backref; /* Maximum back reference */
uint32_t backref_map; /* Bitmap of low back refs */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
+ uint32_t class_range_start; /* Overall class range start */
+ uint32_t class_range_end; /* Overall class range end */
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
int max_lookbehind; /* Maximum lookbehind (characters) */
int parens_depth; /* Depth of nested parentheses */
@@ -718,9 +747,7 @@ typedef struct compile_block {
BOOL had_accept; /* (*ACCEPT) encountered */
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
BOOL had_recurse; /* Had a recursion or subroutine call */
- BOOL check_lookbehind; /* Lookbehinds need later checking */
BOOL dupnames; /* Duplicate names exist */
- BOOL iscondassert; /* Next assert is a condition */
} compile_block;
/* Structure for keeping the properties of the in-memory stack used
@@ -731,27 +758,8 @@ typedef struct pcre2_real_jit_stack {
void* stack;
} pcre2_real_jit_stack;
-/* Structure for keeping a chain of heap blocks used for saving ovectors
-during pattern recursion when the ovector is larger than can be saved on
-the system stack. */
-
-typedef struct ovecsave_frame {
- struct ovecsave_frame *next; /* Next frame on free chain */
- PCRE2_SIZE saved_ovec[1]; /* First vector element */
-} ovecsave_frame;
-
/* Structure for items in a linked list that represents an explicit recursive
-call within the pattern; used by pcre_match(). */
-
-typedef struct recursion_info {
- struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
- unsigned int group_num; /* Number of group that was called */
- PCRE2_SIZE *ovec_save; /* Pointer to saved ovector frame */
- uint32_t saved_capture_last; /* Last capture number */
- PCRE2_SPTR subject_position; /* Position at start of recursion */
-} recursion_info;
-
-/* A similar structure for pcre_dfa_match(). */
+call within the pattern when running pcre_dfa_match(). */
typedef struct dfa_recursion_info {
struct dfa_recursion_info *prevrec;
@@ -759,35 +767,75 @@ typedef struct dfa_recursion_info {
uint32_t group_num;
} dfa_recursion_info;
-/* Structure for building a chain of data for holding the values of the subject
-pointer at the start of each subpattern, so as to detect when an empty string
-has been matched by a subpattern - to break infinite loops; used by
-pcre2_match(). */
+/* Structure for "stack" frames that are used for remembering backtracking
+positions during matching. As these are used in a vector, with the ovector item
+being extended, the size of the structure must be a multiple of PCRE2_SIZE. The
+only way to check this at compile time is to force an error by generating an
+array with a negative size. By putting this in a typedef (which is never used),
+we don't generate any code when all is well. */
+
+typedef struct heapframe {
+
+ /* The first set of fields are variables that have to be preserved over calls
+ to RRMATCH(), but which do not need to be copied to new frames. */
+
+ PCRE2_SPTR ecode; /* The current position in the pattern */
+ PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */
+ PCRE2_SIZE length; /* Used for character, string, or code lengths */
+ PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */
+ PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */
+ uint32_t rdepth; /* "Recursion" depth */
+ uint32_t group_frame_type; /* Type information for group frames */
+ uint32_t temp_32[4]; /* Used for short-term 32-bit or BOOL values */
+ uint8_t return_id; /* Where to go on in internal "return" */
+ uint8_t op; /* Processing opcode */
-typedef struct eptrblock {
- struct eptrblock *epb_prev;
- PCRE2_SPTR epb_saved_eptr;
-} eptrblock;
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ PCRE2_UCHAR occu[6]; /* Used for other case code units */
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+ PCRE2_UCHAR occu[2]; /* Used for other case code units */
+#else
+ PCRE2_UCHAR occu[1]; /* Used for other case code units */
+#endif
+
+ /* The rest have to be copied from the previous frame whenever a new frame
+ becomes current. The final field is specified as a large vector so that
+ runtime array bound checks don't catch references to it. However, for any
+ specific call to pcre2_match() the memory allocated for each frame structure
+ allows for exactly the right size ovector for the number of capturing
+ parentheses. */
+
+ PCRE2_SPTR eptr; /* MUST BE FIRST */
+ PCRE2_SPTR start_match; /* Can be adjusted by \K */
+ PCRE2_SPTR mark; /* Most recent mark on the success path */
+ uint32_t current_recurse; /* Current (deepest) recursion number */
+ uint32_t capture_last; /* Most recent capture */
+ PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */
+ PCRE2_SIZE offset_top; /* Offset after highest capture */
+ PCRE2_SIZE ovector[10000]; /* Must be last in the structure */
+} heapframe;
+
+typedef char check_heapframe_size[
+ ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
/* Structure for passing "static" information around between the functions
doing traditional NFA matching (pcre2_match() and friends). */
typedef struct match_block {
pcre2_memctl memctl; /* For general use */
-#ifdef HEAP_MATCH_RECURSE
- pcre2_memctl stack_memctl; /* For "stack" frames */
-#endif
- uint32_t match_call_count; /* As it says */
+ PCRE2_SIZE frame_vector_size; /* Size of a backtracking frame */
+ heapframe *match_frames; /* Points to vector of frames */
+ heapframe *match_frames_top; /* Points after the end of the vector */
+ heapframe *stack_frames; /* The original vector on the stack */
+ PCRE2_SIZE heap_limit; /* As it says */
uint32_t match_limit; /* As it says */
- uint32_t match_limit_recursion; /* As it says */
+ uint32_t match_limit_depth; /* As it says */
+ uint32_t match_call_count; /* Number of times a new frame is created */
BOOL hitend; /* Hit the end of the subject at some point */
BOOL hasthen; /* Pattern contains (*THEN) */
const uint8_t *lcc; /* Points to lower casing table */
const uint8_t *fcc; /* Points to case-flipping table */
const uint8_t *ctypes; /* Points to table of type maps */
- PCRE2_SIZE *ovector; /* Pointer to the offset vector */
- PCRE2_SIZE offset_end; /* One past the end */
- PCRE2_SIZE offset_max; /* The maximum usable for return data */
PCRE2_SIZE start_offset; /* The start offset value */
PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */
uint16_t partial; /* PARTIAL options */
@@ -798,30 +846,23 @@ typedef struct match_block {
PCRE2_SPTR start_code; /* For use when recursing */
PCRE2_SPTR start_subject; /* Start of the subject string */
PCRE2_SPTR end_subject; /* End of the subject string */
- PCRE2_SPTR start_match_ptr; /* Start of matched string */
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
- PCRE2_SPTR once_target; /* Where to back up to for atomic groups */
+ PCRE2_SPTR verb_ecode_ptr; /* For passing back info */
+ PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */
+ uint32_t verb_current_recurse; /* Current recurse when (*VERB) happens */
uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */
- uint32_t capture_last; /* Most recent capture number + overflow flag */
uint32_t skip_arg_count; /* For counting SKIP_ARGs */
uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */
- uint32_t match_function_type; /* Set for certain special calls of match() */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
- eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
- recursion_info *recursive; /* Linked list of recursion data */
- ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */
void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
-#ifdef HEAP_MATCH_RECURSE
- void *match_frames_base; /* For remembering malloc'd frames */
-#endif
} match_block;
/* A similar structure is used for the same purpose by the DFA matching
@@ -836,6 +877,9 @@ typedef struct dfa_match_block {
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
const uint8_t *tables; /* Character tables */
PCRE2_SIZE start_offset; /* The start offset value */
+ uint32_t match_limit; /* As it says */
+ uint32_t match_limit_depth; /* As it says */
+ uint32_t match_call_count; /* Number of calls of internal function */
uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */
uint32_t nltype; /* Newline type */
diff --git a/src/3rdparty/pcre2/src/pcre2_jit_compile.c b/src/3rdparty/pcre2/src/pcre2_jit_compile.c
index 8dea90a1c5..c7bf0b2c3e 100644
--- a/src/3rdparty/pcre2/src/pcre2_jit_compile.c
+++ b/src/3rdparty/pcre2/src/pcre2_jit_compile.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -313,16 +313,25 @@ typedef struct ref_iterator_backtrack {
typedef struct recurse_entry {
struct recurse_entry *next;
- /* Contains the function entry. */
- struct sljit_label *entry;
- /* Collects the calls until the function is not created. */
- jump_list *calls;
+ /* Contains the function entry label. */
+ struct sljit_label *entry_label;
+ /* Contains the function entry label. */
+ struct sljit_label *backtrack_label;
+ /* Collects the entry calls until the function is not created. */
+ jump_list *entry_calls;
+ /* Collects the backtrack calls until the function is not created. */
+ jump_list *backtrack_calls;
/* Points to the starting opcode. */
sljit_sw start;
} recurse_entry;
typedef struct recurse_backtrack {
backtrack_common common;
+ /* Return to the matching path. */
+ struct sljit_label *matchingpath;
+ /* Recursive pattern. */
+ recurse_entry *entry;
+ /* Pattern is inlined. */
BOOL inlined_pattern;
} recurse_backtrack;
@@ -341,11 +350,26 @@ typedef struct then_trap_backtrack {
int framesize;
} then_trap_backtrack;
-#define MAX_RANGE_SIZE 4
+#define MAX_N_CHARS 12
+#define MAX_DIFF_CHARS 5
+
+typedef struct fast_forward_char_data {
+ /* Number of characters in the chars array, 255 for any character. */
+ sljit_u8 count;
+ /* Number of last UTF-8 characters in the chars array. */
+ sljit_u8 last_count;
+ /* Available characters in the current position. */
+ PCRE2_UCHAR chars[MAX_DIFF_CHARS];
+} fast_forward_char_data;
+
+#define MAX_CLASS_RANGE_SIZE 4
+#define MAX_CLASS_CHARS_SIZE 3
typedef struct compiler_common {
/* The sljit ceneric compiler. */
struct sljit_compiler *compiler;
+ /* Compiled regular expression. */
+ pcre2_real_code *re;
/* First byte code. */
PCRE2_SPTR start;
/* Maps private data offset to each opcode. */
@@ -402,10 +426,10 @@ typedef struct compiler_common {
BOOL has_then;
/* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
BOOL has_skip_in_assert_back;
- /* Currently in recurse or negative assert. */
- BOOL local_exit;
- /* Currently in a positive assert. */
- BOOL positive_assert;
+ /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
+ BOOL local_quit_available;
+ /* Currently in a positive assertion. */
+ BOOL in_positive_assertion;
/* Newline control. */
int nltype;
sljit_u32 nlmax;
@@ -426,7 +450,7 @@ typedef struct compiler_common {
/* Labels and jump lists. */
struct sljit_label *partialmatchlabel;
struct sljit_label *quit_label;
- struct sljit_label *forced_quit_label;
+ struct sljit_label *abort_label;
struct sljit_label *accept_label;
struct sljit_label *ff_newline_shortcut;
stub_list *stubs;
@@ -435,8 +459,9 @@ typedef struct compiler_common {
recurse_entry *currententry;
jump_list *partialmatch;
jump_list *quit;
- jump_list *positive_assert_quit;
- jump_list *forced_quit;
+ jump_list *positive_assertion_quit;
+ jump_list *abort;
+ jump_list *failed_match;
jump_list *accept;
jump_list *calllimit;
jump_list *stackalloc;
@@ -500,7 +525,7 @@ typedef struct compare_context {
#undef CMP
/* Used for accessing the elements of the stack. */
-#define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
+#define STACK(i) ((i) * (int)sizeof(sljit_sw))
#define TMP1 SLJIT_R0
#define TMP2 SLJIT_R2
@@ -570,13 +595,17 @@ the start pointers when the end of the capturing group has not yet reached. */
sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
#define CMPTO(type, src1, src1w, src2, src2w, label) \
sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
-#define OP_FLAGS(op, dst, dstw, src, srcw, type) \
- sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
+#define OP_FLAGS(op, dst, dstw, type) \
+ sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
+#define CMOV(type, dst_reg, src, srcw) \
+ sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
#define GET_LOCAL_BASE(dst, dstw, offset) \
sljit_get_local_base(compiler, (dst), (dstw), (offset))
#define READ_CHAR_MAX 0x7fffffff
+#define INVALID_UTF_CHAR 888
+
static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
{
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
@@ -606,8 +635,8 @@ return count;
set_private_data_ptrs
get_framesize
init_frame
- get_private_data_copy_length
- copy_private_data
+ get_recurse_data_length
+ copy_recurse_data
compile_matchingpath
compile_backtrackingpath
*/
@@ -675,7 +704,6 @@ switch(*cc)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRA:
case OP_BRAPOS:
case OP_CBRA:
@@ -806,7 +834,7 @@ switch(*cc)
default:
/* All opcodes are supported now! */
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return NULL;
}
}
@@ -1304,7 +1332,7 @@ while (cc < ccend)
if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
break;
- if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
+ if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
{
if (detect_repeat(common, cc))
{
@@ -1333,7 +1361,6 @@ while (cc < ccend)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRAPOS:
case OP_SBRA:
case OP_SBRAPOS:
@@ -1654,11 +1681,11 @@ if (length > 0)
return stack_restore ? no_frame : no_stack;
}
-static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop, BOOL recursive)
+static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
{
DEFINE_COMPILER;
-BOOL setsom_found = recursive;
-BOOL setmark_found = recursive;
+BOOL setsom_found = FALSE;
+BOOL setmark_found = FALSE;
/* The last capture is a local variable even for recursions. */
BOOL capture_last_found = FALSE;
int offset;
@@ -1671,7 +1698,7 @@ stackpos = STACK(stackpos);
if (ccend == NULL)
{
ccend = bracketend(cc) - (1 + LINK_SIZE);
- if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
+ if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
cc = next_opcode(common, cc);
}
@@ -1685,9 +1712,9 @@ while (cc < ccend)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
- stackpos += (int)sizeof(sljit_sw);
+ stackpos -= (int)sizeof(sljit_sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos += (int)sizeof(sljit_sw);
+ stackpos -= (int)sizeof(sljit_sw);
setsom_found = TRUE;
}
cc += 1;
@@ -1701,9 +1728,9 @@ while (cc < ccend)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
- stackpos += (int)sizeof(sljit_sw);
+ stackpos -= (int)sizeof(sljit_sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos += (int)sizeof(sljit_sw);
+ stackpos -= (int)sizeof(sljit_sw);
setmark_found = TRUE;
}
cc += 1 + 2 + cc[1];
@@ -1714,27 +1741,27 @@ while (cc < ccend)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
- stackpos += (int)sizeof(sljit_sw);
+ stackpos -= (int)sizeof(sljit_sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos += (int)sizeof(sljit_sw);
+ stackpos -= (int)sizeof(sljit_sw);
setsom_found = TRUE;
}
if (common->mark_ptr != 0 && !setmark_found)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
- stackpos += (int)sizeof(sljit_sw);
+ stackpos -= (int)sizeof(sljit_sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos += (int)sizeof(sljit_sw);
+ stackpos -= (int)sizeof(sljit_sw);
setmark_found = TRUE;
}
if (common->capture_last_ptr != 0 && !capture_last_found)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
- stackpos += (int)sizeof(sljit_sw);
+ stackpos -= (int)sizeof(sljit_sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos += (int)sizeof(sljit_sw);
+ stackpos -= (int)sizeof(sljit_sw);
capture_last_found = TRUE;
}
cc += 1 + LINK_SIZE;
@@ -1748,20 +1775,20 @@ while (cc < ccend)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
- stackpos += (int)sizeof(sljit_sw);
+ stackpos -= (int)sizeof(sljit_sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos += (int)sizeof(sljit_sw);
+ stackpos -= (int)sizeof(sljit_sw);
capture_last_found = TRUE;
}
offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
- stackpos += (int)sizeof(sljit_sw);
+ stackpos -= (int)sizeof(sljit_sw);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos += (int)sizeof(sljit_sw);
+ stackpos -= (int)sizeof(sljit_sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
- stackpos += (int)sizeof(sljit_sw);
+ stackpos -= (int)sizeof(sljit_sw);
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
@@ -1776,21 +1803,127 @@ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
SLJIT_ASSERT(stackpos == STACK(stacktop));
}
-static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL needs_control_head)
+#define RECURSE_TMP_REG_COUNT 3
+
+typedef struct delayed_mem_copy_status {
+ struct sljit_compiler *compiler;
+ int store_bases[RECURSE_TMP_REG_COUNT];
+ int store_offsets[RECURSE_TMP_REG_COUNT];
+ int tmp_regs[RECURSE_TMP_REG_COUNT];
+ int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
+ int next_tmp_reg;
+} delayed_mem_copy_status;
+
+static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
{
-int private_data_length = needs_control_head ? 3 : 2;
+int i;
+
+for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
+ {
+ SLJIT_ASSERT(status->tmp_regs[i] >= 0);
+ SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
+
+ status->store_bases[i] = -1;
+ }
+status->next_tmp_reg = 0;
+status->compiler = common->compiler;
+}
+
+static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
+ int store_base, sljit_sw store_offset)
+{
+struct sljit_compiler *compiler = status->compiler;
+int next_tmp_reg = status->next_tmp_reg;
+int tmp_reg = status->tmp_regs[next_tmp_reg];
+
+SLJIT_ASSERT(load_base > 0 && store_base > 0);
+
+if (status->store_bases[next_tmp_reg] == -1)
+ {
+ /* Preserve virtual registers. */
+ if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
+ OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
+ }
+else
+ OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
+
+OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
+status->store_bases[next_tmp_reg] = store_base;
+status->store_offsets[next_tmp_reg] = store_offset;
+
+status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
+}
+
+static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
+{
+struct sljit_compiler *compiler = status->compiler;
+int next_tmp_reg = status->next_tmp_reg;
+int tmp_reg, saved_tmp_reg, i;
+
+for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
+ {
+ if (status->store_bases[next_tmp_reg] != -1)
+ {
+ tmp_reg = status->tmp_regs[next_tmp_reg];
+ saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
+
+ OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
+
+ /* Restore virtual registers. */
+ if (sljit_get_register_index(saved_tmp_reg) < 0)
+ OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
+ }
+
+ next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
+ }
+}
+
+#undef RECURSE_TMP_REG_COUNT
+
+static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
+ BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
+{
+int length = 1;
int size;
PCRE2_SPTR alternative;
+BOOL quit_found = FALSE;
+BOOL accept_found = FALSE;
+BOOL setsom_found = FALSE;
+BOOL setmark_found = FALSE;
+BOOL capture_last_found = FALSE;
+BOOL control_head_found = FALSE;
+
+#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
+SLJIT_ASSERT(common->control_head_ptr != 0);
+control_head_found = TRUE;
+#endif
+
/* Calculate the sum of the private machine words. */
while (cc < ccend)
{
size = 0;
switch(*cc)
{
+ case OP_SET_SOM:
+ SLJIT_ASSERT(common->has_set_som);
+ setsom_found = TRUE;
+ cc += 1;
+ break;
+
+ case OP_RECURSE:
+ if (common->has_set_som)
+ setsom_found = TRUE;
+ if (common->mark_ptr != 0)
+ setmark_found = TRUE;
+ if (common->capture_last_ptr != 0)
+ capture_last_found = TRUE;
+ cc += 1 + LINK_SIZE;
+ break;
+
case OP_KET:
if (PRIVATE_DATA(cc) != 0)
{
- private_data_length++;
+ length++;
SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
cc += PRIVATE_DATA(cc + 1);
}
@@ -1802,26 +1935,30 @@ while (cc < ccend)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRAPOS:
case OP_SBRA:
case OP_SBRAPOS:
case OP_SCOND:
- private_data_length++;
+ length++;
SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
cc += 1 + LINK_SIZE;
break;
case OP_CBRA:
case OP_SCBRA:
+ length += 2;
+ if (common->capture_last_ptr != 0)
+ capture_last_found = TRUE;
if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
- private_data_length++;
+ length++;
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
case OP_CBRAPOS:
case OP_SCBRAPOS:
- private_data_length += 2;
+ length += 2 + 2;
+ if (common->capture_last_ptr != 0)
+ capture_last_found = TRUE;
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
@@ -1829,13 +1966,13 @@ while (cc < ccend)
/* Might be a hidden SCOND. */
alternative = cc + GET(cc, 1);
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
- private_data_length++;
+ length++;
cc += 1 + LINK_SIZE;
break;
CASE_ITERATOR_PRIVATE_DATA_1
- if (PRIVATE_DATA(cc))
- private_data_length++;
+ if (PRIVATE_DATA(cc) != 0)
+ length++;
cc += 2;
#ifdef SUPPORT_UNICODE
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
@@ -1843,8 +1980,8 @@ while (cc < ccend)
break;
CASE_ITERATOR_PRIVATE_DATA_2A
- if (PRIVATE_DATA(cc))
- private_data_length += 2;
+ if (PRIVATE_DATA(cc) != 0)
+ length += 2;
cc += 2;
#ifdef SUPPORT_UNICODE
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
@@ -1852,8 +1989,8 @@ while (cc < ccend)
break;
CASE_ITERATOR_PRIVATE_DATA_2B
- if (PRIVATE_DATA(cc))
- private_data_length += 2;
+ if (PRIVATE_DATA(cc) != 0)
+ length += 2;
cc += 2 + IMM2_SIZE;
#ifdef SUPPORT_UNICODE
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
@@ -1861,20 +1998,20 @@ while (cc < ccend)
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
- if (PRIVATE_DATA(cc))
- private_data_length++;
+ if (PRIVATE_DATA(cc) != 0)
+ length++;
cc += 1;
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
- if (PRIVATE_DATA(cc))
- private_data_length += 2;
+ if (PRIVATE_DATA(cc) != 0)
+ length += 2;
cc += 1;
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
- if (PRIVATE_DATA(cc))
- private_data_length += 2;
+ if (PRIVATE_DATA(cc) != 0)
+ length += 2;
cc += 1 + IMM2_SIZE;
break;
@@ -1886,11 +2023,51 @@ while (cc < ccend)
#else
size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
#endif
- if (PRIVATE_DATA(cc))
- private_data_length += get_class_iterator_size(cc + size);
+ if (PRIVATE_DATA(cc) != 0)
+ length += get_class_iterator_size(cc + size);
cc += size;
break;
+ case OP_MARK:
+ case OP_PRUNE_ARG:
+ case OP_THEN_ARG:
+ SLJIT_ASSERT(common->mark_ptr != 0);
+ if (!setmark_found)
+ setmark_found = TRUE;
+ if (common->control_head_ptr != 0)
+ control_head_found = TRUE;
+ if (*cc != OP_MARK)
+ quit_found = TRUE;
+
+ cc += 1 + 2 + cc[1];
+ break;
+
+ case OP_PRUNE:
+ case OP_SKIP:
+ case OP_COMMIT:
+ quit_found = TRUE;
+ cc++;
+ break;
+
+ case OP_SKIP_ARG:
+ quit_found = TRUE;
+ cc += 1 + 2 + cc[1];
+ break;
+
+ case OP_THEN:
+ SLJIT_ASSERT(common->control_head_ptr != 0);
+ quit_found = TRUE;
+ if (!control_head_found)
+ control_head_found = TRUE;
+ cc++;
+ break;
+
+ case OP_ACCEPT:
+ case OP_ASSERT_ACCEPT:
+ accept_found = TRUE;
+ cc++;
+ break;
+
default:
cc = next_opcode(common, cc);
SLJIT_ASSERT(cc != NULL);
@@ -1898,329 +2075,446 @@ while (cc < ccend)
}
}
SLJIT_ASSERT(cc == ccend);
-return private_data_length;
+
+if (control_head_found)
+ length++;
+if (capture_last_found)
+ length++;
+if (quit_found)
+ {
+ if (setsom_found)
+ length++;
+ if (setmark_found)
+ length++;
+ }
+
+*needs_control_head = control_head_found;
+*has_quit = quit_found;
+*has_accept = accept_found;
+return length;
}
-static void copy_private_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
- BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
+enum copy_recurse_data_types {
+ recurse_copy_from_global,
+ recurse_copy_private_to_global,
+ recurse_copy_shared_to_global,
+ recurse_copy_kept_shared_to_global,
+ recurse_swap_global
+};
+
+static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
+ int type, int stackptr, int stacktop, BOOL has_quit)
{
-DEFINE_COMPILER;
-int srcw[2];
-int count, size;
-BOOL tmp1next = TRUE;
-BOOL tmp1empty = TRUE;
-BOOL tmp2empty = TRUE;
+delayed_mem_copy_status status;
PCRE2_SPTR alternative;
-enum {
- start,
- loop,
- end
-} status;
+sljit_sw private_srcw[2];
+sljit_sw shared_srcw[3];
+sljit_sw kept_shared_srcw[2];
+int private_count, shared_count, kept_shared_count;
+int from_sp, base_reg, offset, i;
+BOOL setsom_found = FALSE;
+BOOL setmark_found = FALSE;
+BOOL capture_last_found = FALSE;
+BOOL control_head_found = FALSE;
-status = save ? start : loop;
-stackptr = STACK(stackptr - 2);
-stacktop = STACK(stacktop - 1);
+#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
+SLJIT_ASSERT(common->control_head_ptr != 0);
+control_head_found = TRUE;
+#endif
-if (!save)
+switch (type)
{
- stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
- if (stackptr < stacktop)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
- stackptr += sizeof(sljit_sw);
- tmp1empty = FALSE;
- }
- if (stackptr < stacktop)
- {
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
- stackptr += sizeof(sljit_sw);
- tmp2empty = FALSE;
- }
- /* The tmp1next must be TRUE in either way. */
+ case recurse_copy_from_global:
+ from_sp = TRUE;
+ base_reg = STACK_TOP;
+ break;
+
+ case recurse_copy_private_to_global:
+ case recurse_copy_shared_to_global:
+ case recurse_copy_kept_shared_to_global:
+ from_sp = FALSE;
+ base_reg = STACK_TOP;
+ break;
+
+ default:
+ SLJIT_ASSERT(type == recurse_swap_global);
+ from_sp = FALSE;
+ base_reg = TMP2;
+ break;
}
-do
+stackptr = STACK(stackptr);
+stacktop = STACK(stacktop);
+
+status.tmp_regs[0] = TMP1;
+status.saved_tmp_regs[0] = TMP1;
+
+if (base_reg != TMP2)
{
- count = 0;
- switch(status)
+ status.tmp_regs[1] = TMP2;
+ status.saved_tmp_regs[1] = TMP2;
+ }
+else
+ {
+ status.saved_tmp_regs[1] = RETURN_ADDR;
+ if (sljit_get_register_index (RETURN_ADDR) == -1)
+ status.tmp_regs[1] = STR_PTR;
+ else
+ status.tmp_regs[1] = RETURN_ADDR;
+ }
+
+status.saved_tmp_regs[2] = TMP3;
+if (sljit_get_register_index (TMP3) == -1)
+ status.tmp_regs[2] = STR_END;
+else
+ status.tmp_regs[2] = TMP3;
+
+delayed_mem_copy_init(&status, common);
+
+if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
+ {
+ SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
+
+ if (!from_sp)
+ delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
+
+ if (from_sp || type == recurse_swap_global)
+ delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
+ }
+
+stackptr += sizeof(sljit_sw);
+
+#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
+if (type != recurse_copy_shared_to_global)
+ {
+ if (!from_sp)
+ delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
+
+ if (from_sp || type == recurse_swap_global)
+ delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
+ }
+
+stackptr += sizeof(sljit_sw);
+#endif
+
+while (cc < ccend)
+ {
+ private_count = 0;
+ shared_count = 0;
+ kept_shared_count = 0;
+
+ switch(*cc)
{
- case start:
- SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
- count = 1;
- srcw[0] = common->recursive_head_ptr;
- if (needs_control_head)
+ case OP_SET_SOM:
+ SLJIT_ASSERT(common->has_set_som);
+ if (has_quit && !setsom_found)
{
- SLJIT_ASSERT(common->control_head_ptr != 0);
- count = 2;
- srcw[1] = common->control_head_ptr;
+ kept_shared_srcw[0] = OVECTOR(0);
+ kept_shared_count = 1;
+ setsom_found = TRUE;
}
- status = loop;
+ cc += 1;
break;
- case loop:
- if (cc >= ccend)
+ case OP_RECURSE:
+ if (has_quit)
{
- status = end;
- break;
+ if (common->has_set_som && !setsom_found)
+ {
+ kept_shared_srcw[0] = OVECTOR(0);
+ kept_shared_count = 1;
+ setsom_found = TRUE;
+ }
+ if (common->mark_ptr != 0 && !setmark_found)
+ {
+ kept_shared_srcw[kept_shared_count] = common->mark_ptr;
+ kept_shared_count++;
+ setmark_found = TRUE;
+ }
}
+ if (common->capture_last_ptr != 0 && !capture_last_found)
+ {
+ shared_srcw[0] = common->capture_last_ptr;
+ shared_count = 1;
+ capture_last_found = TRUE;
+ }
+ cc += 1 + LINK_SIZE;
+ break;
- switch(*cc)
+ case OP_KET:
+ if (PRIVATE_DATA(cc) != 0)
{
- case OP_KET:
- if (PRIVATE_DATA(cc) != 0)
- {
- count = 1;
- srcw[0] = PRIVATE_DATA(cc);
- SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
- cc += PRIVATE_DATA(cc + 1);
- }
- cc += 1 + LINK_SIZE;
- break;
+ private_count = 1;
+ private_srcw[0] = PRIVATE_DATA(cc);
+ SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
+ cc += PRIVATE_DATA(cc + 1);
+ }
+ cc += 1 + LINK_SIZE;
+ break;
- case OP_ASSERT:
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK:
- case OP_ASSERTBACK_NOT:
- case OP_ONCE:
- case OP_ONCE_NC:
- case OP_BRAPOS:
- case OP_SBRA:
- case OP_SBRAPOS:
- case OP_SCOND:
- count = 1;
- srcw[0] = PRIVATE_DATA(cc);
- SLJIT_ASSERT(srcw[0] != 0);
- cc += 1 + LINK_SIZE;
- break;
+ case OP_ASSERT:
+ case OP_ASSERT_NOT:
+ case OP_ASSERTBACK:
+ case OP_ASSERTBACK_NOT:
+ case OP_ONCE:
+ case OP_BRAPOS:
+ case OP_SBRA:
+ case OP_SBRAPOS:
+ case OP_SCOND:
+ private_count = 1;
+ private_srcw[0] = PRIVATE_DATA(cc);
+ cc += 1 + LINK_SIZE;
+ break;
- case OP_CBRA:
- case OP_SCBRA:
- if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
- {
- count = 1;
- srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
- }
- cc += 1 + LINK_SIZE + IMM2_SIZE;
- break;
+ case OP_CBRA:
+ case OP_SCBRA:
+ offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
+ shared_srcw[0] = OVECTOR(offset);
+ shared_srcw[1] = OVECTOR(offset + 1);
+ shared_count = 2;
- case OP_CBRAPOS:
- case OP_SCBRAPOS:
- count = 2;
- srcw[0] = PRIVATE_DATA(cc);
- srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
- SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
- cc += 1 + LINK_SIZE + IMM2_SIZE;
- break;
+ if (common->capture_last_ptr != 0 && !capture_last_found)
+ {
+ shared_srcw[2] = common->capture_last_ptr;
+ shared_count = 3;
+ capture_last_found = TRUE;
+ }
- case OP_COND:
- /* Might be a hidden SCOND. */
- alternative = cc + GET(cc, 1);
- if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
- {
- count = 1;
- srcw[0] = PRIVATE_DATA(cc);
- SLJIT_ASSERT(srcw[0] != 0);
- }
- cc += 1 + LINK_SIZE;
- break;
+ if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
+ {
+ private_count = 1;
+ private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
+ }
+ cc += 1 + LINK_SIZE + IMM2_SIZE;
+ break;
- CASE_ITERATOR_PRIVATE_DATA_1
- if (PRIVATE_DATA(cc))
- {
- count = 1;
- srcw[0] = PRIVATE_DATA(cc);
- }
- cc += 2;
+ case OP_CBRAPOS:
+ case OP_SCBRAPOS:
+ offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
+ shared_srcw[0] = OVECTOR(offset);
+ shared_srcw[1] = OVECTOR(offset + 1);
+ shared_count = 2;
+
+ if (common->capture_last_ptr != 0 && !capture_last_found)
+ {
+ shared_srcw[2] = common->capture_last_ptr;
+ shared_count = 3;
+ capture_last_found = TRUE;
+ }
+
+ private_count = 2;
+ private_srcw[0] = PRIVATE_DATA(cc);
+ private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
+ cc += 1 + LINK_SIZE + IMM2_SIZE;
+ break;
+
+ case OP_COND:
+ /* Might be a hidden SCOND. */
+ alternative = cc + GET(cc, 1);
+ if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
+ {
+ private_count = 1;
+ private_srcw[0] = PRIVATE_DATA(cc);
+ }
+ cc += 1 + LINK_SIZE;
+ break;
+
+ CASE_ITERATOR_PRIVATE_DATA_1
+ if (PRIVATE_DATA(cc))
+ {
+ private_count = 1;
+ private_srcw[0] = PRIVATE_DATA(cc);
+ }
+ cc += 2;
#ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
- break;
+ break;
- CASE_ITERATOR_PRIVATE_DATA_2A
- if (PRIVATE_DATA(cc))
- {
- count = 2;
- srcw[0] = PRIVATE_DATA(cc);
- srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
- }
- cc += 2;
+ CASE_ITERATOR_PRIVATE_DATA_2A
+ if (PRIVATE_DATA(cc))
+ {
+ private_count = 2;
+ private_srcw[0] = PRIVATE_DATA(cc);
+ private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
+ }
+ cc += 2;
#ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
- break;
+ break;
- CASE_ITERATOR_PRIVATE_DATA_2B
- if (PRIVATE_DATA(cc))
- {
- count = 2;
- srcw[0] = PRIVATE_DATA(cc);
- srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
- }
- cc += 2 + IMM2_SIZE;
+ CASE_ITERATOR_PRIVATE_DATA_2B
+ if (PRIVATE_DATA(cc))
+ {
+ private_count = 2;
+ private_srcw[0] = PRIVATE_DATA(cc);
+ private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
+ }
+ cc += 2 + IMM2_SIZE;
#ifdef SUPPORT_UNICODE
- if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
- break;
+ break;
- CASE_ITERATOR_TYPE_PRIVATE_DATA_1
- if (PRIVATE_DATA(cc))
- {
- count = 1;
- srcw[0] = PRIVATE_DATA(cc);
- }
- cc += 1;
- break;
+ CASE_ITERATOR_TYPE_PRIVATE_DATA_1
+ if (PRIVATE_DATA(cc))
+ {
+ private_count = 1;
+ private_srcw[0] = PRIVATE_DATA(cc);
+ }
+ cc += 1;
+ break;
- CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
- if (PRIVATE_DATA(cc))
- {
- count = 2;
- srcw[0] = PRIVATE_DATA(cc);
- srcw[1] = srcw[0] + sizeof(sljit_sw);
- }
- cc += 1;
- break;
+ CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
+ if (PRIVATE_DATA(cc))
+ {
+ private_count = 2;
+ private_srcw[0] = PRIVATE_DATA(cc);
+ private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
+ }
+ cc += 1;
+ break;
- CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
- if (PRIVATE_DATA(cc))
- {
- count = 2;
- srcw[0] = PRIVATE_DATA(cc);
- srcw[1] = srcw[0] + sizeof(sljit_sw);
- }
- cc += 1 + IMM2_SIZE;
- break;
+ CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
+ if (PRIVATE_DATA(cc))
+ {
+ private_count = 2;
+ private_srcw[0] = PRIVATE_DATA(cc);
+ private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
+ }
+ cc += 1 + IMM2_SIZE;
+ break;
- case OP_CLASS:
- case OP_NCLASS:
+ case OP_CLASS:
+ case OP_NCLASS:
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
- case OP_XCLASS:
- size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
+ case OP_XCLASS:
+ i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
#else
- size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
+ i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
#endif
- if (PRIVATE_DATA(cc))
- switch(get_class_iterator_size(cc + size))
- {
- case 1:
- count = 1;
- srcw[0] = PRIVATE_DATA(cc);
- break;
-
- case 2:
- count = 2;
- srcw[0] = PRIVATE_DATA(cc);
- srcw[1] = srcw[0] + sizeof(sljit_sw);
- break;
+ if (PRIVATE_DATA(cc) != 0)
+ switch(get_class_iterator_size(cc + i))
+ {
+ case 1:
+ private_count = 1;
+ private_srcw[0] = PRIVATE_DATA(cc);
+ break;
+
+ case 2:
+ private_count = 2;
+ private_srcw[0] = PRIVATE_DATA(cc);
+ private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
+ break;
+
+ default:
+ SLJIT_UNREACHABLE();
+ break;
+ }
+ cc += i;
+ break;
- default:
- SLJIT_ASSERT_STOP();
- break;
- }
- cc += size;
- break;
+ case OP_MARK:
+ case OP_PRUNE_ARG:
+ case OP_THEN_ARG:
+ SLJIT_ASSERT(common->mark_ptr != 0);
+ if (has_quit && !setmark_found)
+ {
+ kept_shared_srcw[0] = common->mark_ptr;
+ kept_shared_count = 1;
+ setmark_found = TRUE;
+ }
+ if (common->control_head_ptr != 0 && !control_head_found)
+ {
+ shared_srcw[0] = common->control_head_ptr;
+ shared_count = 1;
+ control_head_found = TRUE;
+ }
+ cc += 1 + 2 + cc[1];
+ break;
- default:
- cc = next_opcode(common, cc);
- SLJIT_ASSERT(cc != NULL);
- break;
+ case OP_THEN:
+ SLJIT_ASSERT(common->control_head_ptr != 0);
+ if (!control_head_found)
+ {
+ shared_srcw[0] = common->control_head_ptr;
+ shared_count = 1;
+ control_head_found = TRUE;
}
+ cc++;
break;
- case end:
- SLJIT_ASSERT_STOP();
+ default:
+ cc = next_opcode(common, cc);
+ SLJIT_ASSERT(cc != NULL);
break;
}
- while (count > 0)
+ if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
{
- count--;
- if (save)
- {
- if (tmp1next)
- {
- if (!tmp1empty)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
- stackptr += sizeof(sljit_sw);
- }
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
- tmp1empty = FALSE;
- tmp1next = FALSE;
- }
- else
- {
- if (!tmp2empty)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
- stackptr += sizeof(sljit_sw);
- }
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
- tmp2empty = FALSE;
- tmp1next = TRUE;
- }
- }
- else
+ SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
+
+ for (i = 0; i < private_count; i++)
{
- if (tmp1next)
- {
- SLJIT_ASSERT(!tmp1empty);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
- tmp1empty = stackptr >= stacktop;
- if (!tmp1empty)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
- stackptr += sizeof(sljit_sw);
- }
- tmp1next = FALSE;
- }
- else
- {
- SLJIT_ASSERT(!tmp2empty);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
- tmp2empty = stackptr >= stacktop;
- if (!tmp2empty)
- {
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
- stackptr += sizeof(sljit_sw);
- }
- tmp1next = TRUE;
- }
+ SLJIT_ASSERT(private_srcw[i] != 0);
+
+ if (!from_sp)
+ delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
+
+ if (from_sp || type == recurse_swap_global)
+ delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
+
+ stackptr += sizeof(sljit_sw);
}
}
- }
-while (status != end);
+ else
+ stackptr += sizeof(sljit_sw) * private_count;
-if (save)
- {
- if (tmp1next)
+ if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
{
- if (!tmp1empty)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
- stackptr += sizeof(sljit_sw);
- }
- if (!tmp2empty)
+ SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
+
+ for (i = 0; i < shared_count; i++)
{
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
+ SLJIT_ASSERT(shared_srcw[i] != 0);
+
+ if (!from_sp)
+ delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
+
+ if (from_sp || type == recurse_swap_global)
+ delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
+
stackptr += sizeof(sljit_sw);
}
}
else
+ stackptr += sizeof(sljit_sw) * shared_count;
+
+ if (type != recurse_copy_private_to_global && type != recurse_swap_global)
{
- if (!tmp2empty)
- {
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
- stackptr += sizeof(sljit_sw);
- }
- if (!tmp1empty)
+ SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
+
+ for (i = 0; i < kept_shared_count; i++)
{
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
+ SLJIT_ASSERT(kept_shared_srcw[i] != 0);
+
+ if (!from_sp)
+ delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
+
+ if (from_sp || type == recurse_swap_global)
+ delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
+
stackptr += sizeof(sljit_sw);
}
}
+ else
+ stackptr += sizeof(sljit_sw) * kept_shared_count;
}
-SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
+
+SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
+
+delayed_mem_copy_finish(&status);
}
static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
@@ -2337,7 +2631,7 @@ static SLJIT_INLINE void count_match(compiler_common *common)
{
DEFINE_COMPILER;
-OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
+OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
}
@@ -2347,7 +2641,7 @@ static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
DEFINE_COMPILER;
SLJIT_ASSERT(size > 0);
-OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
+OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
#ifdef DESTROY_REGISTERS
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
@@ -2355,7 +2649,7 @@ OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
#endif
-add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
+add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
}
static SLJIT_INLINE void free_stack(compiler_common *common, int size)
@@ -2363,7 +2657,7 @@ static SLJIT_INLINE void free_stack(compiler_common *common, int size)
DEFINE_COMPILER;
SLJIT_ASSERT(size > 0);
-OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
+OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
}
static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
@@ -2407,7 +2701,7 @@ else
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
loop = LABEL();
OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, loop);
}
}
@@ -2445,7 +2739,7 @@ else
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
loop = LABEL();
OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
- OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, loop);
}
@@ -2463,22 +2757,22 @@ static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg
{
while (current != NULL)
{
- switch (current[-2])
+ switch (current[1])
{
case type_then_trap:
break;
case type_mark:
- if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[-3]) == 0)
- return current[-4];
+ if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
+ return current[3];
break;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
break;
}
- SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
- current = (sljit_sw*)current[-1];
+ SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
+ current = (sljit_sw*)current[0];
}
return -1;
}
@@ -2518,7 +2812,7 @@ if (sizeof(PCRE2_SIZE) == 4)
OP1(SLJIT_MOVU_U32, SLJIT_MEM1(SLJIT_R2), sizeof(PCRE2_SIZE), SLJIT_S1, 0);
else
OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R2), sizeof(PCRE2_SIZE), SLJIT_S1, 0);
-OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
+OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, loop);
/* Calculate the return value, which is the maximum ovector value. */
@@ -3104,8 +3398,8 @@ if (common->utf)
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
/* Skip low surrogate if necessary. */
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
+ OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
return;
@@ -3124,6 +3418,7 @@ struct sljit_jump *jump;
if (nltype == NLTYPE_ANY)
{
add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
+ sljit_set_current_flags(compiler, SLJIT_SET_Z);
add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
}
else if (nltype == NLTYPE_ANYCRLF)
@@ -3165,7 +3460,7 @@ OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
/* Searching for the first zero. */
-OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
+OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
jump = JUMP(SLJIT_NOT_ZERO);
/* Two byte sequence. */
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
@@ -3179,7 +3474,7 @@ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
+OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
jump = JUMP(SLJIT_NOT_ZERO);
/* Three byte sequence. */
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
@@ -3213,15 +3508,15 @@ OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
/* Searching for the first zero. */
-OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
+OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
jump = JUMP(SLJIT_NOT_ZERO);
/* Two byte sequence. */
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
JUMPHERE(jump);
-OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
-OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
+OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
/* This code runs only in 8 bit mode. No need to shift the value. */
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
@@ -3244,7 +3539,7 @@ struct sljit_jump *compare;
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
-OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
+OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
jump = JUMP(SLJIT_NOT_ZERO);
/* Two byte sequence. */
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
@@ -3281,10 +3576,30 @@ static void do_getucd(compiler_common *common)
/* Search the UCD record for the character comes in TMP1.
Returns chartype in TMP1 and UCD offset in TMP2. */
DEFINE_COMPILER;
+#if PCRE2_CODE_UNIT_WIDTH == 32
+struct sljit_jump *jump;
+#endif
+
+#if defined SLJIT_DEBUG && SLJIT_DEBUG
+/* dummy_ucd_record */
+const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
+SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
+SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
+#endif
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+if (!common->utf)
+ {
+ jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
+ JUMPHERE(jump);
+ }
+#endif
+
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
@@ -3299,7 +3614,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
#endif /* SUPPORT_UNICODE */
-static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, sljit_u32 overall_options)
+static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
{
DEFINE_COMPILER;
struct sljit_label *mainloop;
@@ -3311,6 +3626,8 @@ struct sljit_jump *end2 = NULL;
struct sljit_jump *singlechar;
#endif
jump_list *newline = NULL;
+sljit_u32 overall_options = common->re->overall_options;
+BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
BOOL newlinecheck = FALSE;
BOOL readuchar = FALSE;
@@ -3318,7 +3635,7 @@ if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
&& (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
newlinecheck = TRUE;
-SLJIT_ASSERT(common->forced_quit_label == NULL);
+SLJIT_ASSERT(common->abort_label == NULL);
if ((overall_options & PCRE2_FIRSTLINE) != 0)
{
@@ -3375,7 +3692,7 @@ else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
JUMPHERE(end2);
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
- add_jump(compiler, &common->forced_quit, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
+ add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
JUMPHERE(end);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
}
@@ -3388,8 +3705,8 @@ if (newlinecheck)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
+ OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
@@ -3426,8 +3743,8 @@ if (common->utf)
{
singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+ OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
JUMPHERE(singlechar);
@@ -3445,40 +3762,42 @@ if (newlinecheck)
return mainloop;
}
-#define MAX_N_CHARS 16
-#define MAX_DIFF_CHARS 6
-static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, PCRE2_UCHAR *chars)
+static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
{
-PCRE2_UCHAR i, len;
+sljit_u32 i, count = chars->count;
-len = chars[0];
-if (len == 255)
+if (count == 255)
return;
-if (len == 0)
+if (count == 0)
{
- chars[0] = 1;
- chars[1] = chr;
+ chars->count = 1;
+ chars->chars[0] = chr;
+
+ if (last)
+ chars->last_count = 1;
return;
}
-for (i = len; i > 0; i--)
- if (chars[i] == chr)
+for (i = 0; i < count; i++)
+ if (chars->chars[i] == chr)
return;
-if (len >= MAX_DIFF_CHARS - 1)
+if (count >= MAX_DIFF_CHARS)
{
- chars[0] = 255;
+ chars->count = 255;
return;
}
-len++;
-chars[len] = chr;
-chars[0] = len;
+chars->chars[count] = chr;
+chars->count = count + 1;
+
+if (last)
+ chars->last_count++;
}
-static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *chars, int max_chars, sljit_u32 *rec_count)
+static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
{
/* Recursive function, which scans prefix literals. */
BOOL last, any, class, caseless;
@@ -3487,7 +3806,7 @@ sljit_u32 chr; /* Any unicode character. */
sljit_u8 *bytes, *bytes_end, byte;
PCRE2_SPTR alternative, cc_save, oc;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
-PCRE2_UCHAR othercase[8];
+PCRE2_UCHAR othercase[4];
#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
PCRE2_UCHAR othercase[2];
#else
@@ -3510,6 +3829,7 @@ while (TRUE)
{
case OP_CHARI:
caseless = TRUE;
+ /* Fall through */
case OP_CHAR:
last = FALSE;
cc++;
@@ -3541,6 +3861,7 @@ while (TRUE)
case OP_MINPLUSI:
case OP_POSPLUSI:
caseless = TRUE;
+ /* Fall through */
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
@@ -3549,6 +3870,7 @@ while (TRUE)
case OP_EXACTI:
caseless = TRUE;
+ /* Fall through */
case OP_EXACT:
repeat = GET2(cc, 1);
last = FALSE;
@@ -3559,6 +3881,7 @@ while (TRUE)
case OP_MINQUERYI:
case OP_POSQUERYI:
caseless = TRUE;
+ /* Fall through */
case OP_QUERY:
case OP_MINQUERY:
case OP_POSQUERY:
@@ -3582,7 +3905,6 @@ while (TRUE)
continue;
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRA:
case OP_BRAPOS:
case OP_CBRA:
@@ -3703,12 +4025,12 @@ while (TRUE)
{
do
{
- chars[0] = 255;
+ chars->count = 255;
consumed++;
if (--max_chars == 0)
return consumed;
- chars += MAX_DIFF_CHARS;
+ chars++;
}
while (--repeat > 0);
@@ -3752,8 +4074,8 @@ while (TRUE)
do
{
if (bytes[31] & 0x80)
- chars[0] = 255;
- else if (chars[0] != 255)
+ chars->count = 255;
+ else if (chars->count != 255)
{
bytes_end = bytes + 32;
chr = 0;
@@ -3768,7 +4090,7 @@ while (TRUE)
do
{
if ((byte & 0x1) != 0)
- add_prefix_char(chr, chars);
+ add_prefix_char(chr, chars, TRUE);
byte >>= 1;
chr++;
}
@@ -3776,14 +4098,14 @@ while (TRUE)
chr = (chr + 7) & ~7;
}
}
- while (chars[0] != 255 && bytes < bytes_end);
+ while (chars->count != 255 && bytes < bytes_end);
bytes = bytes_end - 32;
}
consumed++;
if (--max_chars == 0)
return consumed;
- chars += MAX_DIFF_CHARS;
+ chars++;
}
while (--repeat > 0);
@@ -3847,17 +4169,18 @@ while (TRUE)
oc = othercase;
do
{
+ len--;
+ consumed++;
+
chr = *cc;
- add_prefix_char(*cc, chars);
+ add_prefix_char(*cc, chars, len == 0);
if (caseless)
- add_prefix_char(*oc, chars);
+ add_prefix_char(*oc, chars, len == 0);
- len--;
- consumed++;
if (--max_chars == 0)
return consumed;
- chars += MAX_DIFF_CHARS;
+ chars++;
cc++;
oc++;
}
@@ -3876,7 +4199,37 @@ while (TRUE)
}
}
-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
+{
+#if PCRE2_CODE_UNIT_WIDTH == 8
+OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
+CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
+CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
+#else
+#error "Unknown code width"
+#endif
+}
+#endif
+
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
+{
+#if PCRE2_CODE_UNIT_WIDTH == 8
+OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
+return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
+return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
+#else
+#error "Unknown code width"
+#endif
+}
+#endif
static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
{
@@ -3895,39 +4248,143 @@ return value;
#endif
}
-static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
+static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg)
+{
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+sljit_u8 instruction[5];
+#else
+sljit_u8 instruction[4];
+#endif
+
+SLJIT_ASSERT(dst_xmm_reg < 8);
+
+/* MOVDQA xmm1, xmm2/m128 */
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+if (src_general_reg < 8)
+ {
+ instruction[0] = 0x66;
+ instruction[1] = 0x0f;
+ instruction[2] = 0x6f;
+ instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
+else
+ {
+ instruction[0] = 0x66;
+ instruction[1] = 0x41;
+ instruction[2] = 0x0f;
+ instruction[3] = 0x6f;
+ instruction[4] = (dst_xmm_reg << 3) | (src_general_reg & 0x7);
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
+#else
+instruction[0] = 0x66;
+instruction[1] = 0x0f;
+instruction[2] = 0x6f;
+instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
+sljit_emit_op_custom(compiler, instruction, 4);
+#endif
+}
+
+static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, PCRE2_UCHAR char1, PCRE2_UCHAR char2,
+ sljit_u32 bit, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
+{
+sljit_u8 instruction[4];
+instruction[0] = 0x66;
+instruction[1] = 0x0f;
+
+if (char1 == char2 || bit != 0)
+ {
+ if (bit != 0)
+ {
+ /* POR xmm1, xmm2/m128 */
+ /* instruction[0] = 0x66; */
+ /* instruction[1] = 0x0f; */
+ instruction[2] = 0xeb;
+ instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
+
+ /* PCMPEQB/W/D xmm1, xmm2/m128 */
+ /* instruction[0] = 0x66; */
+ /* instruction[1] = 0x0f; */
+ instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
+ instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
+else
+ {
+ /* MOVDQA xmm1, xmm2/m128 */
+ /* instruction[0] = 0x66; */
+ /* instruction[1] = 0x0f; */
+ instruction[2] = 0x6f;
+ instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
+ sljit_emit_op_custom(compiler, instruction, 4);
+
+ /* PCMPEQB/W/D xmm1, xmm2/m128 */
+ /* instruction[0] = 0x66; */
+ /* instruction[1] = 0x0f; */
+ instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
+ instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
+ sljit_emit_op_custom(compiler, instruction, 4);
+
+ instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
+ sljit_emit_op_custom(compiler, instruction, 4);
+
+ /* POR xmm1, xmm2/m128 */
+ /* instruction[0] = 0x66; */
+ /* instruction[1] = 0x0f; */
+ instruction[2] = 0xeb;
+ instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
+}
+
+static void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
{
DEFINE_COMPILER;
struct sljit_label *start;
-struct sljit_jump *quit[3];
-struct sljit_jump *nomatch;
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+struct sljit_label *restart;
+#endif
+struct sljit_jump *quit;
+struct sljit_jump *partial_quit[2];
sljit_u8 instruction[8];
sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
-sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
+// sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
-BOOL load_twice = FALSE;
-PCRE2_UCHAR bit;
+sljit_s32 data_ind = 0;
+sljit_s32 tmp_ind = 1;
+sljit_s32 cmp1_ind = 2;
+sljit_s32 cmp2_ind = 3;
+sljit_u32 bit = 0;
-bit = char1 ^ char2;
-if (!is_powerof2(bit))
- bit = 0;
+SLJIT_UNUSED_ARG(offset);
-if ((char1 != char2) && bit == 0)
- load_twice = TRUE;
+if (char1 != char2)
+ {
+ bit = char1 ^ char2;
+ if (!is_powerof2(bit))
+ bit = 0;
+ }
-quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+if (common->mode == PCRE2_JIT_COMPLETE)
+ add_jump(compiler, &common->failed_match, partial_quit[0]);
/* First part (unaligned start) */
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
-SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
+// SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
+
+SLJIT_ASSERT(tmp1_ind < 8);
/* MOVD xmm, r/m32 */
instruction[0] = 0x66;
instruction[1] = 0x0f;
instruction[2] = 0x6e;
-instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
+instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_ind;
sljit_emit_op_custom(compiler, instruction, 4);
if (char1 != char2)
@@ -3935,224 +4392,521 @@ if (char1 != char2)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
/* MOVD xmm, r/m32 */
- instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
+ instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_ind;
sljit_emit_op_custom(compiler, instruction, 4);
}
+OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
+
/* PSHUFD xmm1, xmm2/m128, imm8 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
instruction[2] = 0x70;
-instruction[3] = 0xc0 | (2 << 3) | 2;
+instruction[3] = 0xc0 | (cmp1_ind << 3) | 2;
instruction[4] = 0;
sljit_emit_op_custom(compiler, instruction, 5);
if (char1 != char2)
{
/* PSHUFD xmm1, xmm2/m128, imm8 */
- instruction[3] = 0xc0 | (3 << 3) | 3;
- instruction[4] = 0;
+ instruction[3] = 0xc0 | (cmp2_ind << 3) | 3;
sljit_emit_op_custom(compiler, instruction, 5);
}
-OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+restart = LABEL();
+#endif
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
-/* MOVDQA xmm1, xmm2/m128 */
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+load_from_mem_sse2(compiler, data_ind, str_ptr_ind);
+fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
-if (str_ptr_ind < 8)
- {
- instruction[2] = 0x6f;
- instruction[3] = (0 << 3) | str_ptr_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
-
- if (load_twice)
- {
- instruction[3] = (1 << 3) | str_ptr_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- }
- }
-else
- {
- instruction[1] = 0x41;
- instruction[2] = 0x0f;
- instruction[3] = 0x6f;
- instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
- sljit_emit_op_custom(compiler, instruction, 5);
+/* PMOVMSKB reg, xmm */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xd7;
+instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
+sljit_emit_op_custom(compiler, instruction, 4);
- if (load_twice)
- {
- instruction[4] = (1 << 3) | str_ptr_ind;
- sljit_emit_op_custom(compiler, instruction, 5);
- }
- instruction[1] = 0x0f;
- }
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
-#else
+/* BSF r32, r/m32 */
+instruction[0] = 0x0f;
+instruction[1] = 0xbc;
+instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
+sljit_emit_op_custom(compiler, instruction, 3);
+sljit_set_current_flags(compiler, SLJIT_SET_Z);
-instruction[2] = 0x6f;
-instruction[3] = (0 << 3) | str_ptr_ind;
-sljit_emit_op_custom(compiler, instruction, 4);
+quit = JUMP(SLJIT_NOT_ZERO);
-if (load_twice)
- {
- instruction[3] = (1 << 3) | str_ptr_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
- }
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
-#endif
+start = LABEL();
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
-if (bit != 0)
- {
- /* POR xmm1, xmm2/m128 */
- instruction[2] = 0xeb;
- instruction[3] = 0xc0 | (0 << 3) | 3;
- sljit_emit_op_custom(compiler, instruction, 4);
- }
+partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+if (common->mode == PCRE2_JIT_COMPLETE)
+ add_jump(compiler, &common->failed_match, partial_quit[1]);
-/* PCMPEQB/W/D xmm1, xmm2/m128 */
-instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
-instruction[3] = 0xc0 | (0 << 3) | 2;
-sljit_emit_op_custom(compiler, instruction, 4);
+/* Second part (aligned) */
-if (load_twice)
- {
- instruction[3] = 0xc0 | (1 << 3) | 3;
- sljit_emit_op_custom(compiler, instruction, 4);
- }
+load_from_mem_sse2(compiler, 0, str_ptr_ind);
+fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
/* PMOVMSKB reg, xmm */
+instruction[0] = 0x66;
+instruction[1] = 0x0f;
instruction[2] = 0xd7;
instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
sljit_emit_op_custom(compiler, instruction, 4);
-if (load_twice)
- {
- OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
- instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
- sljit_emit_op_custom(compiler, instruction, 4);
-
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
- }
-
-OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
-
/* BSF r32, r/m32 */
instruction[0] = 0x0f;
instruction[1] = 0xbc;
instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
sljit_emit_op_custom(compiler, instruction, 3);
+sljit_set_current_flags(compiler, SLJIT_SET_Z);
-nomatch = JUMP(SLJIT_ZERO);
+JUMPTO(SLJIT_ZERO, start);
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+JUMPHERE(quit);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
-quit[1] = JUMP(SLJIT_JUMP);
-JUMPHERE(nomatch);
+if (common->mode != PCRE2_JIT_COMPLETE)
+ {
+ JUMPHERE(partial_quit[0]);
+ JUMPHERE(partial_quit[1]);
+ OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
+ CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
+ }
+else
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
-start = LABEL();
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
-quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+if (common->utf && offset > 0)
+ {
+ SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
-/* Second part (aligned) */
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
-instruction[0] = 0x66;
-instruction[1] = 0x0f;
+ quit = jump_if_utf_char_start(compiler, TMP1);
-/* MOVDQA xmm1, xmm2/m128 */
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+ OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
+ JUMPTO(SLJIT_JUMP, restart);
+
+ JUMPHERE(quit);
+ }
+#endif
+}
+
+#ifndef _WIN64
-if (str_ptr_ind < 8)
+static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_sse2_offset(void)
+{
+#if PCRE2_CODE_UNIT_WIDTH == 8
+return 15;
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+return 7;
+#elif PCRE2_CODE_UNIT_WIDTH == 32
+return 3;
+#else
+#error "Unsupported unit width"
+#endif
+}
+
+static void fast_forward_char_pair_sse2(compiler_common *common, sljit_s32 offs1,
+ PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
+{
+DEFINE_COMPILER;
+sljit_u32 bit1 = 0;
+sljit_u32 bit2 = 0;
+sljit_u32 diff = IN_UCHARS(offs1 - offs2);
+sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
+sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
+sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
+sljit_s32 data1_ind = 0;
+sljit_s32 data2_ind = 1;
+sljit_s32 tmp_ind = 2;
+sljit_s32 cmp1a_ind = 3;
+sljit_s32 cmp1b_ind = 4;
+sljit_s32 cmp2a_ind = 5;
+sljit_s32 cmp2b_ind = 6;
+struct sljit_label *start;
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+struct sljit_label *restart;
+#endif
+struct sljit_jump *jump[2];
+
+sljit_u8 instruction[8];
+
+SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
+SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_sse2_offset()));
+SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
+
+/* Initialize. */
+if (common->match_end_ptr != 0)
{
- instruction[2] = 0x6f;
- instruction[3] = (0 << 3) | str_ptr_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
+ OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
- if (load_twice)
+ OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
+ CMOV(SLJIT_LESS, STR_END, TMP1, 0);
+ }
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+/* MOVD xmm, r/m32 */
+instruction[0] = 0x66;
+instruction[1] = 0x0f;
+instruction[2] = 0x6e;
+
+if (char1a == char1b)
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
+else
+ {
+ bit1 = char1a ^ char1b;
+ if (is_powerof2(bit1))
{
- instruction[3] = (1 << 3) | str_ptr_ind;
- sljit_emit_op_custom(compiler, instruction, 4);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
+ }
+ else
+ {
+ bit1 = 0;
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
}
}
-else
+
+instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+if (char1a != char1b)
{
- instruction[1] = 0x41;
- instruction[2] = 0x0f;
- instruction[3] = 0x6f;
- instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
- sljit_emit_op_custom(compiler, instruction, 5);
+ instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_ind;
+ sljit_emit_op_custom(compiler, instruction, 4);
+ }
- if (load_twice)
+if (char2a == char2b)
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
+else
+ {
+ bit2 = char2a ^ char2b;
+ if (is_powerof2(bit2))
{
- instruction[4] = (1 << 3) | str_ptr_ind;
- sljit_emit_op_custom(compiler, instruction, 5);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
+ }
+ else
+ {
+ bit2 = 0;
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
}
- instruction[1] = 0x0f;
}
-#else
-
-instruction[2] = 0x6f;
-instruction[3] = (0 << 3) | str_ptr_ind;
+instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_ind;
sljit_emit_op_custom(compiler, instruction, 4);
-if (load_twice)
+if (char2a != char2b)
{
- instruction[3] = (1 << 3) | str_ptr_ind;
+ instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_ind;
sljit_emit_op_custom(compiler, instruction, 4);
}
-#endif
+/* PSHUFD xmm1, xmm2/m128, imm8 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0x70;
+instruction[4] = 0;
-if (bit != 0)
+instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind;
+sljit_emit_op_custom(compiler, instruction, 5);
+
+if (char1a != char1b)
{
- /* POR xmm1, xmm2/m128 */
- instruction[2] = 0xeb;
- instruction[3] = 0xc0 | (0 << 3) | 3;
- sljit_emit_op_custom(compiler, instruction, 4);
+ instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind;
+ sljit_emit_op_custom(compiler, instruction, 5);
}
-/* PCMPEQB/W/D xmm1, xmm2/m128 */
-instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
-instruction[3] = 0xc0 | (0 << 3) | 2;
-sljit_emit_op_custom(compiler, instruction, 4);
+instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind;
+sljit_emit_op_custom(compiler, instruction, 5);
-if (load_twice)
+if (char2a != char2b)
{
- instruction[3] = 0xc0 | (1 << 3) | 3;
- sljit_emit_op_custom(compiler, instruction, 4);
+ instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind;
+ sljit_emit_op_custom(compiler, instruction, 5);
}
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+restart = LABEL();
+#endif
+
+OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1 - offs2));
+OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
+OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
+OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, ~0xf);
+
+load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);
+
+jump[0] = CMP(SLJIT_EQUAL, STR_PTR, 0, TMP1, 0);
+
+load_from_mem_sse2(compiler, data2_ind, tmp1_ind);
+
+/* MOVDQA xmm1, xmm2/m128 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0x6f;
+instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+/* PSLLDQ xmm1, xmm2/m128, imm8 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0x73;
+instruction[3] = 0xc0 | (7 << 3) | tmp_ind;
+instruction[4] = diff;
+sljit_emit_op_custom(compiler, instruction, 5);
+
+/* PSRLDQ xmm1, xmm2/m128, imm8 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+/* instruction[2] = 0x73; */
+instruction[3] = 0xc0 | (3 << 3) | data2_ind;
+instruction[4] = 16 - diff;
+sljit_emit_op_custom(compiler, instruction, 5);
+
+/* POR xmm1, xmm2/m128 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xeb;
+instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+jump[1] = JUMP(SLJIT_JUMP);
+
+JUMPHERE(jump[0]);
+
+/* MOVDQA xmm1, xmm2/m128 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0x6f;
+instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+/* PSLLDQ xmm1, xmm2/m128, imm8 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0x73;
+instruction[3] = 0xc0 | (7 << 3) | data2_ind;
+instruction[4] = diff;
+sljit_emit_op_custom(compiler, instruction, 5);
+
+JUMPHERE(jump[1]);
+
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
+
+fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind);
+fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind);
+
+/* PAND xmm1, xmm2/m128 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xdb;
+instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
/* PMOVMSKB reg, xmm */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
instruction[2] = 0xd7;
instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
sljit_emit_op_custom(compiler, instruction, 4);
-if (load_twice)
- {
- instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
- sljit_emit_op_custom(compiler, instruction, 4);
+/* Ignore matches before the first STR_PTR. */
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
- OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
- }
+/* BSF r32, r/m32 */
+instruction[0] = 0x0f;
+instruction[1] = 0xbc;
+instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
+sljit_emit_op_custom(compiler, instruction, 3);
+sljit_set_current_flags(compiler, SLJIT_SET_Z);
+
+jump[0] = JUMP(SLJIT_NOT_ZERO);
+
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+
+/* Main loop. */
+instruction[0] = 0x66;
+instruction[1] = 0x0f;
+
+start = LABEL();
+
+load_from_mem_sse2(compiler, data2_ind, str_ptr_ind);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);
+
+/* PSRLDQ xmm1, xmm2/m128, imm8 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0x73;
+instruction[3] = 0xc0 | (3 << 3) | data2_ind;
+instruction[4] = 16 - diff;
+sljit_emit_op_custom(compiler, instruction, 5);
+
+/* MOVDQA xmm1, xmm2/m128 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0x6f;
+instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+/* PSLLDQ xmm1, xmm2/m128, imm8 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0x73;
+instruction[3] = 0xc0 | (7 << 3) | tmp_ind;
+instruction[4] = diff;
+sljit_emit_op_custom(compiler, instruction, 5);
+
+/* POR xmm1, xmm2/m128 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xeb;
+instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind);
+fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind);
+
+/* PAND xmm1, xmm2/m128 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xdb;
+instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+/* PMOVMSKB reg, xmm */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xd7;
+instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
+sljit_emit_op_custom(compiler, instruction, 4);
/* BSF r32, r/m32 */
instruction[0] = 0x0f;
instruction[1] = 0xbc;
instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
sljit_emit_op_custom(compiler, instruction, 3);
+sljit_set_current_flags(compiler, SLJIT_SET_Z);
JUMPTO(SLJIT_ZERO, start);
+JUMPHERE(jump[0]);
+
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
-start = LABEL();
-SET_LABEL(quit[0], start);
-SET_LABEL(quit[1], start);
-SET_LABEL(quit[2], start);
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+if (common->match_end_ptr != 0)
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+if (common->utf)
+ {
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
+
+ jump[0] = jump_if_utf_char_start(compiler, TMP1);
+
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
+
+ add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
+
+ JUMPHERE(jump[0]);
+ }
+#endif
+
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
+
+if (common->match_end_ptr != 0)
+ OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
}
+static BOOL check_fast_forward_char_pair_sse2(compiler_common *common, fast_forward_char_data *chars, int max)
+{
+sljit_s32 i, j, priority, count;
+sljit_u32 priorities;
+PCRE2_UCHAR a1, a2, b1, b2;
+
+priorities = 0;
+
+count = 0;
+for (i = 0; i < max; i++)
+ {
+ if (chars[i].last_count > 2)
+ {
+ SLJIT_ASSERT(chars[i].last_count <= 7);
+
+ priorities |= (1 << chars[i].last_count);
+ count++;
+ }
+ }
+
+if (count < 2)
+ return FALSE;
+
+for (priority = 7; priority > 2; priority--)
+ {
+ if ((priorities & (1 << priority)) == 0)
+ continue;
+
+ for (i = max - 1; i >= 1; i--)
+ if (chars[i].last_count >= priority)
+ {
+ SLJIT_ASSERT(chars[i].count <= 2 && chars[i].count >= 1);
+
+ a1 = chars[i].chars[0];
+ a2 = chars[i].chars[1];
+
+ j = i - max_fast_forward_char_pair_sse2_offset();
+ if (j < 0)
+ j = 0;
+
+ while (j < i)
+ {
+ if (chars[j].last_count >= priority)
+ {
+ b1 = chars[j].chars[0];
+ b2 = chars[j].chars[1];
+
+ if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
+ {
+ fast_forward_char_pair_sse2(common, i, a1, a2, j, b1, b2);
+ return TRUE;
+ }
+ }
+ j++;
+ }
+ }
+ }
+
+return FALSE;
+}
+
+#endif
+
#undef SSE2_COMPARE_TYPE_INDEX
#endif
@@ -4161,15 +4915,16 @@ static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1,
{
DEFINE_COMPILER;
struct sljit_label *start;
-struct sljit_jump *quit;
-struct sljit_jump *found;
+struct sljit_jump *match;
+struct sljit_jump *partial_quit;
PCRE2_UCHAR mask;
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
-struct sljit_label *utf_start = NULL;
-struct sljit_jump *utf_quit = NULL;
-#endif
BOOL has_match_end = (common->match_end_ptr != 0);
+SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
+
+if (has_match_end)
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
+
if (offset > 0)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
@@ -4177,76 +4932,21 @@ if (has_match_end)
{
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
- OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
- if (sljit_x86_is_cmov_available())
- {
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
- sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
- }
-#endif
- {
- quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
- OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
- JUMPHERE(quit);
- }
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
+ OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
+ CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
}
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
-if (common->utf && offset > 0)
- utf_start = LABEL();
-#endif
-
-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
/* SSE2 accelerated first character search. */
-if (sljit_x86_is_sse2_available())
+if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
{
- fast_forward_first_char2_sse2(common, char1, char2);
-
- SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
- if (common->mode == PCRE2_JIT_COMPLETE)
- {
- /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
- SLJIT_ASSERT(common->forced_quit_label == NULL);
- OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
- add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
-
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf && offset > 0)
- {
- SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
-
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-#if PCRE2_CODE_UNIT_WIDTH == 8
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
- CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
-#elif PCRE2_CODE_UNIT_WIDTH == 16
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
-#else
-#error "Unknown code width"
-#endif
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- }
-#endif
+ fast_forward_first_char2_sse2(common, char1, char2, offset);
- if (offset > 0)
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
- }
- else if (sljit_x86_is_cmov_available())
- {
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
- sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
- }
- else
- {
- quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
- OP1(SLJIT_MOV, STR_PTR, 0, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
- JUMPHERE(quit);
- }
+ if (offset > 0)
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
if (has_match_end)
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
@@ -4255,85 +4955,56 @@ if (sljit_x86_is_sse2_available())
#endif
-quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
-
start = LABEL();
+
+partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+if (common->mode == PCRE2_JIT_COMPLETE)
+ add_jump(compiler, &common->failed_match, partial_quit);
+
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
if (char1 == char2)
- found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
+ CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
else
{
mask = char1 ^ char2;
if (is_powerof2(mask))
{
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
- found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
+ CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
}
else
{
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
- found = JUMP(SLJIT_NOT_ZERO);
+ match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
+ CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
+ JUMPHERE(match);
}
}
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
-
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
-if (common->utf && offset > 0)
- utf_quit = JUMP(SLJIT_JUMP);
-#endif
-
-JUMPHERE(found);
-
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf && offset > 0)
{
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-#if PCRE2_CODE_UNIT_WIDTH == 8
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
- CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
-#elif PCRE2_CODE_UNIT_WIDTH == 16
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
-#else
-#error "Unknown code width"
-#endif
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- JUMPHERE(utf_quit);
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
+ jumpto_if_not_utf_char_start(compiler, TMP1, start);
}
#endif
-JUMPHERE(quit);
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
+
+if (common->mode != PCRE2_JIT_COMPLETE)
+ JUMPHERE(partial_quit);
if (has_match_end)
- {
- quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
- if (offset > 0)
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
- JUMPHERE(quit);
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
- }
-
-if (offset > 0)
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
}
static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
{
DEFINE_COMPILER;
struct sljit_label *start;
-struct sljit_jump *quit;
struct sljit_jump *match;
-/* bytes[0] represent the number of characters between 0
-and MAX_N_BYTES - 1, 255 represents any character. */
-PCRE2_UCHAR chars[MAX_N_CHARS * MAX_DIFF_CHARS];
+fast_forward_char_data chars[MAX_N_CHARS];
sljit_s32 offset;
PCRE2_UCHAR mask;
PCRE2_UCHAR *char_set, *char_set_end;
@@ -4344,7 +5015,10 @@ BOOL in_range;
sljit_u32 rec_count;
for (i = 0; i < MAX_N_CHARS; i++)
- chars[i * MAX_DIFF_CHARS] = 0;
+ {
+ chars[i].count = 0;
+ chars[i].last_count = 0;
+ }
rec_count = 10000;
max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
@@ -4352,21 +5026,50 @@ max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
if (max < 1)
return FALSE;
+/* Convert last_count to priority. */
+for (i = 0; i < max; i++)
+ {
+ SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
+
+ if (chars[i].count == 1)
+ {
+ chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
+ /* Simplifies algorithms later. */
+ chars[i].chars[1] = chars[i].chars[0];
+ }
+ else if (chars[i].count == 2)
+ {
+ SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
+
+ if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
+ chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
+ else
+ chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
+ }
+ else
+ chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
+ }
+
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) && !(defined _WIN64)
+if (check_fast_forward_char_pair_sse2(common, chars, max))
+ return TRUE;
+#endif
+
in_range = FALSE;
/* Prevent compiler "uninitialized" warning */
from = 0;
range_len = 4 /* minimum length */ - 1;
for (i = 0; i <= max; i++)
{
- if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
+ if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
{
range_len = i - from;
range_right = i - 1;
}
- if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
+ if (i < max && chars[i].count < 255)
{
- SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
+ SLJIT_ASSERT(chars[i].count > 0);
if (!in_range)
{
in_range = TRUE;
@@ -4386,16 +5089,17 @@ if (range_right >= 0)
for (i = 0; i < range_len; i++)
{
- char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
- SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
- char_set_end = char_set + char_set[0];
- char_set++;
- while (char_set <= char_set_end)
+ SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
+
+ char_set = chars[range_right - i].chars;
+ char_set_end = char_set + chars[range_right - i].count;
+ do
{
if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
update_table[(*char_set) & 0xff] = IN_UCHARS(i);
char_set++;
}
+ while (char_set < char_set_end);
}
}
@@ -4403,54 +5107,38 @@ offset = -1;
/* Scan forward. */
for (i = 0; i < max; i++)
{
+ if (range_right == i)
+ continue;
+
if (offset == -1)
{
- if (chars[i * MAX_DIFF_CHARS] <= 2)
+ if (chars[i].last_count >= 2)
offset = i;
}
- else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
- {
- if (chars[i * MAX_DIFF_CHARS] == 1)
- offset = i;
- else
- {
- mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
- if (!is_powerof2(mask))
- {
- mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
- if (is_powerof2(mask))
- offset = i;
- }
- }
- }
+ else if (chars[offset].last_count < chars[i].last_count)
+ offset = i;
}
+SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
+
if (range_right < 0)
{
if (offset < 0)
return FALSE;
- SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
/* Works regardless the value is 1 or 2. */
- mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
- fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
+ fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
return TRUE;
}
-if (range_right == offset)
- offset = -1;
-
-SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
+SLJIT_ASSERT(range_right != offset);
-max -= 1;
-SLJIT_ASSERT(max > 0);
if (common->match_end_ptr != 0)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
- quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
- OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
- JUMPHERE(quit);
+ OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
+ CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
}
else
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
@@ -4462,7 +5150,7 @@ OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
#endif
start = LABEL();
-quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
@@ -4483,20 +5171,20 @@ if (offset >= 0)
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- if (chars[offset * MAX_DIFF_CHARS] == 1)
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
+ if (chars[offset].count == 1)
+ CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
else
{
- mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
+ mask = chars[offset].chars[0] ^ chars[offset].chars[1];
if (is_powerof2(mask))
{
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
+ CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
}
else
{
- match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
- CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
+ match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
+ CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
JUMPHERE(match);
}
}
@@ -4512,15 +5200,9 @@ if (common->utf && offset != 0)
}
else
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
-#if PCRE2_CODE_UNIT_WIDTH == 8
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
- CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
-#elif PCRE2_CODE_UNIT_WIDTH == 16
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
-#else
-#error "Unknown code width"
-#endif
+
+ jumpto_if_not_utf_char_start(compiler, TMP1, start);
+
if (offset < 0)
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
}
@@ -4529,33 +5211,20 @@ if (common->utf && offset != 0)
if (offset >= 0)
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-JUMPHERE(quit);
-
if (common->match_end_ptr != 0)
- {
- if (range_right >= 0)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
- if (range_right >= 0)
- {
- quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
- OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
- JUMPHERE(quit);
- }
- }
else
OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
return TRUE;
}
-#undef MAX_N_CHARS
-
-static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, PCRE2_UCHAR first_char, BOOL caseless)
+static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
{
+PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
PCRE2_UCHAR oc;
oc = first_char;
-if (caseless)
+if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
{
oc = TABLE_GET(first_char, common->fcc, first_char);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
@@ -4593,8 +5262,8 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255)
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
@@ -4638,8 +5307,8 @@ if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
JUMPHERE(foundcr);
notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
+ OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
@@ -4654,79 +5323,75 @@ if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
}
-static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
+static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
-static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
+static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
{
DEFINE_COMPILER;
+const sljit_u8 *start_bits = common->re->start_bitmap;
struct sljit_label *start;
-struct sljit_jump *quit;
-struct sljit_jump *found = NULL;
-jump_list *matches = NULL;
+struct sljit_jump *partial_quit;
#if PCRE2_CODE_UNIT_WIDTH != 8
-struct sljit_jump *jump;
+struct sljit_jump *found = NULL;
#endif
+jump_list *matches = NULL;
if (common->match_end_ptr != 0)
{
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
+ OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
+ CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
}
start = LABEL();
-quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+
+partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+if (common->mode == PCRE2_JIT_COMPLETE)
+ add_jump(compiler, &common->failed_match, partial_quit);
+
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
-#ifdef SUPPORT_UNICODE
-if (common->utf)
- OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
-#endif
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
+if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
{
#if PCRE2_CODE_UNIT_WIDTH != 8
- jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
- JUMPHERE(jump);
+ if ((start_bits[31] & 0x80) != 0)
+ found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
+ else
+ CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
+#elif defined SUPPORT_UNICODE
+ if (common->utf && is_char7_bitset(start_bits, FALSE))
+ CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
#endif
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
- OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
- found = JUMP(SLJIT_NOT_ZERO);
+ if (sljit_get_register_index(TMP3) >= 0)
+ {
+ OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
+ OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
+ }
+ else
+ {
+ OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
+ OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+ }
+ JUMPTO(SLJIT_ZERO, start);
}
+else
+ set_jumps(matches, start);
-#ifdef SUPPORT_UNICODE
-if (common->utf)
- OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
-#endif
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-#ifdef SUPPORT_UNICODE
-#if PCRE2_CODE_UNIT_WIDTH == 8
-if (common->utf)
- {
- CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- }
-#elif PCRE2_CODE_UNIT_WIDTH == 16
-if (common->utf)
- {
- CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
- }
-#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
-#endif /* SUPPORT_UNICODE */
-JUMPTO(SLJIT_JUMP, start);
+#if PCRE2_CODE_UNIT_WIDTH != 8
if (found != NULL)
JUMPHERE(found);
-if (matches != NULL)
- set_jumps(matches, LABEL());
-JUMPHERE(quit);
+#endif
+
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+if (common->mode != PCRE2_JIT_COMPLETE)
+ JUMPHERE(partial_quit);
if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
@@ -4802,31 +5467,50 @@ struct sljit_jump *jump;
struct sljit_label *mainloop;
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
-OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
-GET_LOCAL_BASE(TMP3, 0, 0);
+GET_LOCAL_BASE(TMP1, 0, 0);
/* Drop frames until we reach STACK_TOP. */
mainloop = LABEL();
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
-OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
-jump = JUMP(SLJIT_SIG_LESS_EQUAL);
-
-OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
-OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
+jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
+
+OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
+if (sljit_get_register_index (TMP3) < 0)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
+ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
+ }
+else
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
+ OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
+ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
+ GET_LOCAL_BASE(TMP1, 0, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
+ }
JUMPTO(SLJIT_JUMP, mainloop);
JUMPHERE(jump);
-jump = JUMP(SLJIT_SIG_LESS);
-/* End of dropping frames. */
+jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
+/* End of reverting values. */
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
JUMPHERE(jump);
OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
-OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
-OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
+OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
+if (sljit_get_register_index (TMP3) < 0)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
+ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
+ }
+else
+ {
+ OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
+ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
+ }
JUMPTO(SLJIT_JUMP, mainloop);
}
@@ -4859,11 +5543,11 @@ if (common->use_ucp)
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
JUMPHERE(jump);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
}
@@ -4903,11 +5587,11 @@ if (common->use_ucp)
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
JUMPHERE(jump);
}
else
@@ -4935,15 +5619,15 @@ else
}
set_jumps(skipread_list, LABEL());
-OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
+OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
}
-static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
+static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
{
/* May destroy TMP1. */
DEFINE_COMPILER;
-int ranges[MAX_RANGE_SIZE];
+int ranges[MAX_CLASS_RANGE_SIZE];
sljit_u8 bit, cbit, all;
int i, byte, length = 0;
@@ -4961,7 +5645,7 @@ for (i = 0; i < 256; )
cbit = (bits[byte] >> (i & 0x7)) & 0x1;
if (cbit != bit)
{
- if (length >= MAX_RANGE_SIZE)
+ if (length >= MAX_CLASS_RANGE_SIZE)
return FALSE;
ranges[length] = i;
length++;
@@ -4974,7 +5658,7 @@ for (i = 0; i < 256; )
if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
{
- if (length >= MAX_RANGE_SIZE)
+ if (length >= MAX_CLASS_RANGE_SIZE)
return FALSE;
ranges[length] = 256;
length++;
@@ -5086,11 +5770,118 @@ switch(length)
return TRUE;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return FALSE;
}
}
+static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
+{
+/* May destroy TMP1. */
+DEFINE_COMPILER;
+uint16_t char_list[MAX_CLASS_CHARS_SIZE];
+uint8_t byte;
+sljit_s32 type;
+int i, j, k, len, c;
+
+if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
+ return FALSE;
+
+if (invert)
+ nclass = !nclass;
+
+len = 0;
+
+for (i = 0; i < 32; i++)
+ {
+ byte = bits[i];
+
+ if (nclass)
+ byte = ~byte;
+
+ j = 0;
+ while (byte != 0)
+ {
+ if (byte & 0x1)
+ {
+ c = i * 8 + j;
+
+ k = len;
+
+ if ((c & 0x20) != 0)
+ {
+ for (k = 0; k < len; k++)
+ if (char_list[k] == c - 0x20)
+ {
+ char_list[k] |= 0x120;
+ break;
+ }
+ }
+
+ if (k == len)
+ {
+ if (len >= MAX_CLASS_CHARS_SIZE)
+ return FALSE;
+
+ char_list[len++] = (uint16_t) c;
+ }
+ }
+
+ byte >>= 1;
+ j++;
+ }
+ }
+
+i = 0;
+j = 0;
+
+if (char_list[0] == 0)
+ {
+ i++;
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
+ }
+else
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
+
+while (i < len)
+ {
+ if ((char_list[i] & 0x100) != 0)
+ j++;
+ else
+ {
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
+ CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
+ }
+ i++;
+ }
+
+if (j != 0)
+ {
+ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
+
+ for (i = 0; i < len; i++)
+ if ((char_list[i] & 0x100) != 0)
+ {
+ j--;
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
+ CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
+ }
+ }
+
+type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
+add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
+return TRUE;
+}
+
+static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
+{
+/* May destroy TMP1. */
+if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
+ return TRUE;
+return optimize_class_chars(common, bits, nclass, invert, backtracks);
+}
+
static void check_anynewline(compiler_common *common)
{
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
@@ -5099,22 +5890,22 @@ DEFINE_COMPILER;
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
-OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
-OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
-OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
+OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
+OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
#if PCRE2_CODE_UNIT_WIDTH == 8
if (common->utf)
{
#endif
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
#if PCRE2_CODE_UNIT_WIDTH == 8
}
#endif
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
-OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
@@ -5125,34 +5916,34 @@ DEFINE_COMPILER;
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
-OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
-OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
-OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
-OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
-OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
+OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
+OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
+OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
#if PCRE2_CODE_UNIT_WIDTH == 8
if (common->utf)
{
#endif
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
#if PCRE2_CODE_UNIT_WIDTH == 8
}
#endif
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
-OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
@@ -5165,22 +5956,22 @@ DEFINE_COMPILER;
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
-OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
-OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
-OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
+OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
+OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
#if PCRE2_CODE_UNIT_WIDTH == 8
if (common->utf)
{
#endif
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
#if PCRE2_CODE_UNIT_WIDTH == 8
}
#endif
#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
-OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
@@ -5205,7 +5996,7 @@ label = LABEL();
OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
-OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
+OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
JUMPTO(SLJIT_NOT_ZERO, label);
JUMPHERE(jump);
@@ -5249,7 +6040,7 @@ OP1(SLJIT_MOV_U8, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
JUMPHERE(jump);
#endif
jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
-OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
+OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
JUMPTO(SLJIT_NOT_ZERO, label);
JUMPHERE(jump);
@@ -5416,7 +6207,7 @@ do
#endif
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
break;
}
context->ucharptr = 0;
@@ -5591,7 +6382,7 @@ while (*cc != XCL_END)
break;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
break;
}
cc += 2;
@@ -5609,13 +6400,13 @@ if ((cc[-1] & XCL_HASPROP) == 0)
if ((cc[-1] & XCL_MAP) != 0)
{
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
- if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
+ if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
{
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+ OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
}
@@ -5636,7 +6427,7 @@ else if ((cc[-1] & XCL_MAP) != 0)
#ifdef SUPPORT_UNICODE
charsaved = TRUE;
#endif
- if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
+ if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
{
#if PCRE2_CODE_UNIT_WIDTH == 8
jump = NULL;
@@ -5648,7 +6439,7 @@ else if ((cc[-1] & XCL_MAP) != 0)
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+ OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
#if PCRE2_CODE_UNIT_WIDTH == 8
@@ -5667,6 +6458,15 @@ if (needstype || needsscript)
if (needschar && !charsaved)
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (!common->utf)
+ {
+ jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
+ JUMPHERE(jump);
+ }
+#endif
+
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
@@ -5758,14 +6558,14 @@ while (*cc != XCL_END)
if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
{
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
- OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+ OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
numberofcmps++;
}
else if (numberofcmps > 0)
{
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
numberofcmps = 0;
}
@@ -5784,14 +6584,14 @@ while (*cc != XCL_END)
if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
{
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
- OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+ OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
numberofcmps++;
}
else if (numberofcmps > 0)
{
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
numberofcmps = 0;
}
@@ -5816,12 +6616,12 @@ while (*cc != XCL_END)
break;
case PT_LAMP:
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
@@ -5843,33 +6643,33 @@ while (*cc != XCL_END)
case PT_SPACE:
case PT_PXSPACE:
SET_CHAR_OFFSET(9);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
SET_TYPE_OFFSET(ucp_Zl);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
case PT_WORD:
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
/* Fall through. */
case PT_ALNUM:
SET_TYPE_OFFSET(ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
- OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
+ OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
SET_TYPE_OFFSET(ucp_Nd);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
@@ -5891,8 +6691,8 @@ while (*cc != XCL_END)
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
}
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
other_cases += 2;
}
else if (is_powerof2(other_cases[2] ^ other_cases[1]))
@@ -5904,63 +6704,63 @@ while (*cc != XCL_END)
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
}
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
- OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
+ OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
other_cases += 3;
}
else
{
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
}
while (*other_cases != NOTACHAR)
{
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
- OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
+ OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
}
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
case PT_UCNC:
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
SET_CHAR_OFFSET(0xa0);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
SET_CHAR_OFFSET(0);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
case PT_PXGRAPH:
/* C and Z groups are the farthest two groups. */
SET_TYPE_OFFSET(ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
+ OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
/* In case of ucp_Cf, we overwrite the result. */
SET_CHAR_OFFSET(0x2066);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
JUMPHERE(jump);
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
@@ -5969,21 +6769,21 @@ while (*cc != XCL_END)
case PT_PXPRINT:
/* C and Z groups are the farthest two groups. */
SET_TYPE_OFFSET(ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
+ OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
- OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
+ OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
/* In case of ucp_Cf, we overwrite the result. */
SET_CHAR_OFFSET(0x2066);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
JUMPHERE(jump);
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
@@ -5991,21 +6791,21 @@ while (*cc != XCL_END)
case PT_PXPUNCT:
SET_TYPE_OFFSET(ucp_Sc);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
SET_CHAR_OFFSET(0);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
- OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
+ OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
SET_TYPE_OFFSET(ucp_Pc);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
break;
}
cc += 2;
@@ -6051,6 +6851,7 @@ switch(type)
case OP_NOT_WORD_BOUNDARY:
case OP_WORD_BOUNDARY:
add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
+ sljit_set_current_flags(compiler, SLJIT_SET_Z);
add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
return cc;
@@ -6066,10 +6867,10 @@ switch(type)
else
{
jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
check_partial(common, TRUE);
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
@@ -6091,9 +6892,9 @@ switch(type)
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
+ OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
jump[2] = JUMP(SLJIT_GREATER);
- add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
+ add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
/* Equal. */
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
@@ -6112,6 +6913,7 @@ switch(type)
read_char_range(common, common->nlmin, common->nlmax, TRUE);
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
+ sljit_set_current_flags(compiler, SLJIT_SET_Z);
add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
}
@@ -6129,8 +6931,8 @@ switch(type)
case OP_DOLL:
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
- add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
+ OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
+ add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
if (!common->endonly)
compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
@@ -6144,8 +6946,8 @@ switch(type)
case OP_DOLLM:
jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
- add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
+ OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
+ add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
check_partial(common, FALSE);
jump[0] = JUMP(SLJIT_JUMP);
JUMPHERE(jump[1]);
@@ -6182,16 +6984,16 @@ switch(type)
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
- OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
- add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
+ OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
+ add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
return cc;
case OP_CIRCM:
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
- OP2(SLJIT_AND32 | SLJIT_SET_E, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
- add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
+ OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
+ add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));
jump[0] = JUMP(SLJIT_JUMP);
JUMPHERE(jump[1]);
@@ -6229,7 +7031,7 @@ switch(type)
label = LABEL();
add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
skip_char_back(common);
- OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}
else
@@ -6242,7 +7044,7 @@ switch(type)
check_start_used_ptr(common);
return cc + LINK_SIZE;
}
-SLJIT_ASSERT_STOP();
+SLJIT_UNREACHABLE();
return cc;
}
@@ -6273,7 +7075,7 @@ switch(type)
#endif
read_char8_type(common, type == OP_NOT_DIGIT);
/* Flip the starting bit in the negative case. */
- OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
+ OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
return cc;
@@ -6287,7 +7089,7 @@ switch(type)
else
#endif
read_char8_type(common, type == OP_NOT_WHITESPACE);
- OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
+ OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
return cc;
@@ -6301,7 +7103,7 @@ switch(type)
else
#endif
read_char8_type(common, type == OP_NOT_WORDCHAR);
- OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
+ OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
return cc;
@@ -6343,8 +7145,8 @@ switch(type)
#elif PCRE2_CODE_UNIT_WIDTH == 16
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
+ OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
#endif
@@ -6404,6 +7206,7 @@ switch(type)
detect_partial_match(common, backtracks);
read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
+ sljit_set_current_flags(compiler, SLJIT_SET_Z);
add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
return cc;
@@ -6413,6 +7216,7 @@ switch(type)
detect_partial_match(common, backtracks);
read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
+ sljit_set_current_flags(compiler, SLJIT_SET_Z);
add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
return cc;
@@ -6439,7 +7243,7 @@ switch(type)
OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+ OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
JUMPTO(SLJIT_NOT_ZERO, label);
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
@@ -6582,7 +7386,7 @@ switch(type)
read_char_range(common, 0, 255, type == OP_NCLASS);
#endif
- if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
+ if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
return cc + 32 / sizeof(PCRE2_UCHAR);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
@@ -6609,7 +7413,7 @@ switch(type)
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+ OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
@@ -6626,7 +7430,7 @@ switch(type)
return cc + GET(cc, 0) - 1;
#endif
}
-SLJIT_ASSERT_STOP();
+SLJIT_UNREACHABLE();
return cc;
}
@@ -6812,9 +7616,9 @@ else
#endif /* SUPPORT_UNICODE */
{
if (ref)
- OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
else
- OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
if (withchecks)
jump = JUMP(SLJIT_ZERO);
@@ -6905,7 +7709,7 @@ switch(type)
cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
break;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
break;
}
@@ -6919,7 +7723,7 @@ if (!minimize)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
/* Temporary release of STR_PTR. */
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+ OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
/* Handles both invalid and empty cases. Since the minimum repeat,
is zero the invalid case is basically the same as an empty case. */
if (ref)
@@ -6932,7 +7736,7 @@ if (!minimize)
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
}
/* Restore if not zero length. */
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
}
else
{
@@ -7096,8 +7900,10 @@ if (entry == NULL)
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return NULL;
entry->next = NULL;
- entry->entry = NULL;
- entry->calls = NULL;
+ entry->entry_label = NULL;
+ entry->backtrack_label = NULL;
+ entry->entry_calls = NULL;
+ entry->backtrack_calls = NULL;
entry->start = start;
if (prev != NULL)
@@ -7106,71 +7912,73 @@ if (entry == NULL)
common->entries = entry;
}
-if (common->has_set_som && common->mark_ptr != 0)
- {
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
- allocate_stack(common, 2);
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
- }
-else if (common->has_set_som || common->mark_ptr != 0)
- {
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
- allocate_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
- }
+BACKTRACK_AS(recurse_backtrack)->entry = entry;
-if (entry->entry == NULL)
- add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
+if (entry->entry_label == NULL)
+ add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
else
- JUMPTO(SLJIT_FAST_CALL, entry->entry);
+ JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
/* Leave if the match is failed. */
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
+BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
return cc + 1 + LINK_SIZE;
}
static int SLJIT_CALL do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
{
-PCRE2_SPTR begin = arguments->begin;
-PCRE2_SIZE *ovector = arguments->match_data->ovector;
-sljit_u32 oveccount = arguments->oveccount;
-sljit_u32 i;
+PCRE2_SPTR begin;
+PCRE2_SIZE *ovector;
+sljit_u32 oveccount, capture_top;
if (arguments->callout == NULL)
return 0;
+SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
+
+begin = arguments->begin;
+ovector = (PCRE2_SIZE*)(callout_block + 1);
+oveccount = callout_block->capture_top;
+
+SLJIT_ASSERT(oveccount >= 1);
+
callout_block->version = 1;
/* Offsets in subject. */
callout_block->subject_length = arguments->end - arguments->begin;
-callout_block->start_match = (PCRE2_SPTR)callout_block->subject - arguments->begin;
-callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - arguments->begin;
+callout_block->start_match = jit_ovector[0] - begin;
+callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
callout_block->subject = begin;
/* Convert and copy the JIT offset vector to the ovector array. */
-callout_block->capture_top = 0;
+callout_block->capture_top = 1;
callout_block->offset_vector = ovector;
-for (i = 2; i < oveccount; i += 2)
- {
- ovector[i] = jit_ovector[i] - begin;
- ovector[i + 1] = jit_ovector[i + 1] - begin;
- if (jit_ovector[i] >= begin)
- callout_block->capture_top = i;
- }
-callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
ovector[0] = PCRE2_UNSET;
ovector[1] = PCRE2_UNSET;
+ovector += 2;
+jit_ovector += 2;
+capture_top = 1;
+
+/* Convert pointers to sizes. */
+while (--oveccount != 0)
+ {
+ capture_top++;
+
+ ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
+ ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
+
+ if (ovector[0] != PCRE2_UNSET)
+ callout_block->capture_top = capture_top;
+
+ ovector += 2;
+ jit_ovector += 2;
+ }
+
return (arguments->callout)(callout_block, arguments->callout_data);
}
-/* Aligning to 8 byte. */
-#define CALLOUT_ARG_SIZE \
- (((int)sizeof(pcre2_callout_block) + 7) & ~7)
-
#define CALLOUT_ARG_OFFSET(arg) \
- (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(pcre2_callout_block, arg))
+ SLJIT_OFFSETOF(pcre2_callout_block, arg)
static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
{
@@ -7182,10 +7990,13 @@ unsigned int callout_length = (*cc == OP_CALLOUT)
sljit_sw value1;
sljit_sw value2;
sljit_sw value3;
+sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
-allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
+callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
+
+allocate_stack(common, callout_arg_size);
SLJIT_ASSERT(common->capture_last_ptr != 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
@@ -7193,11 +8004,10 @@ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
+OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
/* These pointer sized fields temporarly stores internal variables. */
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
if (common->mark_ptr != 0)
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
@@ -7225,20 +8035,21 @@ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_pt
/* Needed to save important temporary registers. */
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
-OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
+/* SLJIT_R0 = arguments */
+OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
OP1(SLJIT_MOV_S32, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
-free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
+free_stack(common, callout_arg_size);
/* Check return value. */
-OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
+OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
-if (common->forced_quit_label == NULL)
- add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS));
+if (common->abort_label == NULL)
+ add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
else
- JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label);
+ JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
return cc + callout_length;
}
@@ -7280,6 +8091,7 @@ static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPT
DEFINE_COMPILER;
int framesize;
int extrasize;
+BOOL local_quit_available = FALSE;
BOOL needs_control_head;
int private_data_ptr;
backtrack_common altbacktrack;
@@ -7290,13 +8102,13 @@ jump_list *tmp = NULL;
jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
jump_list **found;
/* Saving previous accept variables. */
-BOOL save_local_exit = common->local_exit;
-BOOL save_positive_assert = common->positive_assert;
+BOOL save_local_quit_available = common->local_quit_available;
+BOOL save_in_positive_assertion = common->in_positive_assertion;
then_trap_backtrack *save_then_trap = common->then_trap;
struct sljit_label *save_quit_label = common->quit_label;
struct sljit_label *save_accept_label = common->accept_label;
jump_list *save_quit = common->quit;
-jump_list *save_positive_assert_quit = common->positive_assert_quit;
+jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
jump_list *save_accept = common->accept;
struct sljit_jump *jump;
struct sljit_jump *brajump = NULL;
@@ -7363,7 +8175,7 @@ else
allocate_stack(common, framesize + extrasize);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
+ OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
if (needs_control_head)
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
@@ -7378,21 +8190,21 @@ else
else
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
- init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
+ init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
}
memset(&altbacktrack, 0, sizeof(backtrack_common));
-if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
{
- /* Negative assert is stronger than positive assert. */
- common->local_exit = TRUE;
+ /* Control verbs cannot escape from these asserts. */
+ local_quit_available = TRUE;
+ common->local_quit_available = TRUE;
common->quit_label = NULL;
common->quit = NULL;
- common->positive_assert = FALSE;
}
-else
- common->positive_assert = TRUE;
-common->positive_assert_quit = NULL;
+
+common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
+common->positive_assertion_quit = NULL;
while (1)
{
@@ -7408,16 +8220,16 @@ while (1)
compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
{
- if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+ if (local_quit_available)
{
- common->local_exit = save_local_exit;
+ common->local_quit_available = save_local_quit_available;
common->quit_label = save_quit_label;
common->quit = save_quit;
}
- common->positive_assert = save_positive_assert;
+ common->in_positive_assertion = save_in_positive_assertion;
common->then_trap = save_then_trap;
common->accept_label = save_accept_label;
- common->positive_assert_quit = save_positive_assert_quit;
+ common->positive_assertion_quit = save_positive_assertion_quit;
common->accept = save_accept;
return NULL;
}
@@ -7434,23 +8246,24 @@ while (1)
free_stack(common, extrasize);
if (needs_control_head)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
}
else
{
if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
{
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
- OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
+ OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
if (needs_control_head)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
}
else
{
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
if (needs_control_head)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+ OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
}
}
@@ -7460,25 +8273,25 @@ while (1)
if (conditional)
{
if (extrasize > 0)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
}
else if (bra == OP_BRAZERO)
{
if (framesize < 0)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
else
{
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
}
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
}
else if (framesize >= 0)
{
/* For OP_BRA and OP_BRAMINZERO. */
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
}
}
add_jump(compiler, found, JUMP(SLJIT_JUMP));
@@ -7486,16 +8299,16 @@ while (1)
compile_backtrackingpath(common, altbacktrack.top);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
{
- if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+ if (local_quit_available)
{
- common->local_exit = save_local_exit;
+ common->local_quit_available = save_local_quit_available;
common->quit_label = save_quit_label;
common->quit = save_quit;
}
- common->positive_assert = save_positive_assert;
+ common->in_positive_assertion = save_in_positive_assertion;
common->then_trap = save_then_trap;
common->accept_label = save_accept_label;
- common->positive_assert_quit = save_positive_assert_quit;
+ common->positive_assertion_quit = save_positive_assertion_quit;
common->accept = save_accept;
return NULL;
}
@@ -7508,26 +8321,26 @@ while (1)
cc += GET(cc, 1);
}
-if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+if (local_quit_available)
{
- SLJIT_ASSERT(common->positive_assert_quit == NULL);
+ SLJIT_ASSERT(common->positive_assertion_quit == NULL);
/* Makes the check less complicated below. */
- common->positive_assert_quit = common->quit;
+ common->positive_assertion_quit = common->quit;
}
/* None of them matched. */
-if (common->positive_assert_quit != NULL)
+if (common->positive_assertion_quit != NULL)
{
jump = JUMP(SLJIT_JUMP);
- set_jumps(common->positive_assert_quit, LABEL());
+ set_jumps(common->positive_assertion_quit, LABEL());
SLJIT_ASSERT(framesize != no_stack);
if (framesize < 0)
- OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
+ OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
else
{
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
+ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
}
JUMPHERE(jump);
}
@@ -7576,18 +8389,18 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
{
/* We know that STR_PTR was stored on the top of the stack. */
if (extrasize > 0)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
/* Keep the STR_PTR on the top of the stack. */
if (bra == OP_BRAZERO)
{
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
if (extrasize == 2)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
}
else if (bra == OP_BRAMINZERO)
{
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
}
}
@@ -7596,13 +8409,13 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
if (bra == OP_BRA)
{
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
- OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
+ OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
}
else
{
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
- OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
+ OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
if (extrasize == 2)
{
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
@@ -7630,7 +8443,9 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
{
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
+ OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
}
set_jumps(backtrack->common.topbacktracks, LABEL());
}
@@ -7683,16 +8498,16 @@ else
}
}
-if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
+if (local_quit_available)
{
- common->local_exit = save_local_exit;
+ common->local_quit_available = save_local_quit_available;
common->quit_label = save_quit_label;
common->quit = save_quit;
}
-common->positive_assert = save_positive_assert;
+common->in_positive_assertion = save_in_positive_assertion;
common->then_trap = save_then_trap;
common->accept_label = save_accept_label;
-common->positive_assert_quit = save_positive_assert_quit;
+common->positive_assertion_quit = save_positive_assertion_quit;
common->accept = save_accept;
return cc + 1 + LINK_SIZE;
}
@@ -7717,23 +8532,23 @@ if (framesize < 0)
}
if (needs_control_head)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
/* TMP2 which is set here used by OP_KETRMAX below. */
if (ket == OP_KETRMAX)
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
else if (ket == OP_KETRMIN)
{
/* Move the STR_PTR to the private_data_ptr. */
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
}
}
else
{
stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
- OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
+ OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
if (needs_control_head)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
if (ket == OP_KETRMAX)
{
@@ -7820,7 +8635,6 @@ return stacksize;
(|) OP_*BRA | OP_ALT ... M A
(?()|) OP_*COND | OP_ALT M A
(?>|) OP_ONCE | OP_ALT ... [stack trace] M A
- (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
Or nothing, if trace is unnecessary
*/
@@ -7888,8 +8702,6 @@ if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
opcode = OP_SCOND;
-if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
- opcode = OP_ONCE;
if (opcode == OP_CBRA || opcode == OP_SCBRA)
{
@@ -7966,7 +8778,7 @@ if (bra == OP_BRAMINZERO)
{
/* Except when the whole stack frame must be saved. */
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
+ braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
}
JUMPHERE(skip);
}
@@ -8039,7 +8851,7 @@ if (opcode == OP_ONCE)
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
- OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
+ OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
}
else if (ket == OP_KETRMAX || has_alternatives)
@@ -8057,7 +8869,7 @@ if (opcode == OP_ONCE)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
+ OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
stacksize = needs_control_head ? 1 : 0;
if (ket != OP_KET || has_alternatives)
@@ -8072,7 +8884,7 @@ if (opcode == OP_ONCE)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
}
- init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
+ init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
}
}
else if (opcode == OP_CBRA || opcode == OP_SCBRA)
@@ -8129,13 +8941,13 @@ if (opcode == OP_COND || opcode == OP_SCOND)
slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
- OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
slot += common->name_entry_size;
i--;
while (i-- > 0)
{
OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
- OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
+ OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
slot += common->name_entry_size;
}
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
@@ -8288,7 +9100,7 @@ if (ket == OP_KETRMAX)
{
if (has_alternatives)
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, rmax_label);
/* Drop STR_PTR for greedy plus quantifier. */
if (opcode != OP_ONCE)
@@ -8318,7 +9130,7 @@ if (ket == OP_KETRMAX)
if (repeat_type == OP_EXACT)
{
count_match(common);
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, rmax_label);
}
else if (repeat_type == OP_UPTO)
@@ -8346,6 +9158,7 @@ if (bra == OP_BRAMINZERO)
{
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+ OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
}
else if (ket == OP_KETRMIN && opcode != OP_ONCE)
free_stack(common, 1);
@@ -8418,7 +9231,7 @@ switch(opcode)
break;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
break;
}
@@ -8496,7 +9309,7 @@ else
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
if (needs_control_head)
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
- OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
+ OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
stack = 0;
if (!zero)
@@ -8515,7 +9328,7 @@ else
stack++;
}
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
- init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
+ init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
stack -= 1 + (offset == 0);
}
@@ -8568,7 +9381,7 @@ while (*cc != OP_KETRPOS)
{
if (offset != 0)
{
- OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
+ OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
@@ -8579,10 +9392,10 @@ while (*cc != OP_KETRPOS)
else
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
+ OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
if (opcode == OP_SBRAPOS)
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
}
/* Even if the match is empty, we need to reset the control head. */
@@ -8628,7 +9441,7 @@ while (*cc != OP_KETRPOS)
else
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
}
}
@@ -8645,7 +9458,7 @@ if (!zero)
if (framesize < 0)
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
else /* TMP2 is set to [private_data_ptr] above. */
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
}
/* None of them matched. */
@@ -8868,7 +9681,7 @@ if (exact > 1)
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
- OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}
else
@@ -8876,7 +9689,7 @@ if (exact > 1)
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
- OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}
}
@@ -8906,7 +9719,7 @@ switch(opcode)
if (opcode == OP_UPTO)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
- OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
jump = JUMP(SLJIT_ZERO);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
}
@@ -8968,7 +9781,7 @@ switch(opcode)
label = LABEL();
if (opcode == OP_UPTO)
{
- OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
}
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
@@ -8988,7 +9801,7 @@ switch(opcode)
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
if (opcode == OP_UPTO)
{
- OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
}
@@ -9015,7 +9828,7 @@ switch(opcode)
if (opcode == OP_UPTO)
{
- OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}
else
@@ -9044,7 +9857,7 @@ switch(opcode)
if (opcode == OP_UPTO)
{
- OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}
else
@@ -9070,7 +9883,7 @@ switch(opcode)
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
if (opcode == OP_UPTO)
{
- OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
}
@@ -9157,7 +9970,7 @@ switch(opcode)
label = LABEL();
compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
- OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
set_jumps(no_match, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
@@ -9168,7 +9981,7 @@ switch(opcode)
label = LABEL();
detect_partial_match(common, &no_match);
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
- OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
set_jumps(no_char1_match, LABEL());
@@ -9186,7 +9999,7 @@ switch(opcode)
break;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
break;
}
@@ -9207,6 +10020,9 @@ if (*cc == OP_FAIL)
return cc + 1;
}
+if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
+ add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
+
if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
{
/* No need to check notempty conditions. */
@@ -9223,9 +10039,9 @@ else
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
-OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
+OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
-OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
+OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
if (common->accept_label == NULL)
add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
else
@@ -9309,7 +10125,7 @@ size = 3 + (size < 0 ? 0 : size);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
allocate_stack(common, size);
if (size > 3)
- OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
+ OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
else
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
@@ -9318,7 +10134,7 @@ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
size = BACKTRACK_AS(then_trap_backtrack)->framesize;
if (size >= 0)
- init_frame(common, cc, ccend, size - 1, 0, FALSE);
+ init_frame(common, cc, ccend, size - 1, 0);
}
static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
@@ -9540,7 +10356,6 @@ while (cc < ccend)
break;
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRA:
case OP_CBRA:
case OP_COND:
@@ -9615,7 +10430,7 @@ while (cc < ccend)
break;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return;
}
if (cc == NULL)
@@ -9723,7 +10538,7 @@ switch(opcode)
case OP_MINUPTO:
OP1(SLJIT_MOV, TMP1, 0, base, offset1);
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
OP1(SLJIT_MOV, base, offset1, TMP1, 0);
@@ -9769,7 +10584,7 @@ switch(opcode)
break;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
break;
}
@@ -9804,27 +10619,21 @@ free_stack(common, ref ? 2 : 3);
static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
{
DEFINE_COMPILER;
+recurse_entry *entry;
-if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
- compile_backtrackingpath(common, current->top);
-set_jumps(current->topbacktracks, LABEL());
-if (CURRENT_AS(recurse_backtrack)->inlined_pattern)
- return;
-
-if (common->has_set_som && common->mark_ptr != 0)
+if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
{
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
- free_stack(common, 2);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
- }
-else if (common->has_set_som || common->mark_ptr != 0)
- {
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
- free_stack(common, 1);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0);
+ entry = CURRENT_AS(recurse_backtrack)->entry;
+ if (entry->backtrack_label == NULL)
+ add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
+ else
+ JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
+ CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
}
+else
+ compile_backtrackingpath(common, current->top);
+
+set_jumps(current->topbacktracks, LABEL());
}
static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
@@ -9877,7 +10686,9 @@ if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
{
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
+ OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
set_jumps(current->topbacktracks, LABEL());
}
@@ -9887,7 +10698,7 @@ else
if (bra == OP_BRAZERO)
{
/* We know there is enough place on the stack. */
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
+ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
JUMPHERE(brajump);
@@ -9947,8 +10758,6 @@ if (opcode == OP_CBRA || opcode == OP_SCBRA)
offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
opcode = OP_SCOND;
-if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
- opcode = OP_ONCE;
alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
@@ -10000,7 +10809,7 @@ else if (ket == OP_KETRMIN)
else
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
+ CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
}
/* Drop STR_PTR for non-greedy plus quantifier. */
if (opcode != OP_ONCE)
@@ -10054,6 +10863,7 @@ if (SLJIT_UNLIKELY(opcode == OP_ONCE))
{
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+ OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
}
once = JUMP(SLJIT_JUMP);
}
@@ -10106,7 +10916,9 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
{
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
+ OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
}
cond = JUMP(SLJIT_JUMP);
set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
@@ -10247,7 +11059,9 @@ if (has_alternatives)
{
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
+ OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
}
JUMPHERE(cond);
}
@@ -10302,7 +11116,7 @@ else if (opcode == OP_ONCE)
JUMPHERE(once);
/* Restore previous private_data_ptr */
if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
else if (ket == OP_KETRMIN)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
@@ -10383,6 +11197,7 @@ if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
if (current->topbacktracks)
{
@@ -10392,7 +11207,7 @@ if (current->topbacktracks)
free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
JUMPHERE(jump);
}
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw));
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
}
static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
@@ -10438,22 +11253,23 @@ if (opcode == OP_THEN || opcode == OP_THEN_ARG)
jump = JUMP(SLJIT_JUMP);
loop = LABEL();
- OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw));
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
JUMPHERE(jump);
- CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop);
- CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop);
+ CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
+ CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
return;
}
- else if (common->positive_assert)
+ else if (!common->local_quit_available && common->in_positive_assertion)
{
- add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP));
+ add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
return;
}
}
-if (common->local_exit)
+if (common->local_quit_available)
{
+ /* Abort match with a fail. */
if (common->quit_label == NULL)
add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
else
@@ -10504,7 +11320,10 @@ jump = JUMP(SLJIT_JUMP);
set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
/* STACK_TOP is set by THEN. */
if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
+ {
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
+ OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
+ }
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
free_stack(common, 3);
@@ -10621,7 +11440,6 @@ while (current)
break;
case OP_ONCE:
- case OP_ONCE_NC:
case OP_BRA:
case OP_CBRA:
case OP_COND:
@@ -10670,7 +11488,7 @@ while (current)
break;
case OP_COMMIT:
- if (!common->local_exit)
+ if (!common->local_quit_available)
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
if (common->quit_label == NULL)
add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
@@ -10692,7 +11510,7 @@ while (current)
break;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
break;
}
current = current->prev;
@@ -10707,38 +11525,52 @@ PCRE2_SPTR cc = common->start + common->currententry->start;
PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
BOOL needs_control_head;
-int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
-int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
-int alternativesize;
-BOOL needs_frame;
+BOOL has_quit;
+BOOL has_accept;
+int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
+int alt_count, alt_max, local_size;
backtrack_common altbacktrack;
-struct sljit_jump *jump;
+jump_list *match = NULL;
+sljit_uw *next_update_addr = NULL;
+struct sljit_jump *alt1 = NULL;
+struct sljit_jump *alt2 = NULL;
+struct sljit_jump *accept_exit = NULL;
+struct sljit_label *quit;
/* Recurse captures then. */
common->then_trap = NULL;
SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
-needs_frame = framesize >= 0;
-if (!needs_frame)
- framesize = 0;
-alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
-SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0);
-common->currententry->entry = LABEL();
-set_jumps(common->currententry->calls, common->currententry->entry);
+alt_max = no_alternatives(cc);
+alt_count = 0;
+
+/* Matching path. */
+SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
+common->currententry->entry_label = LABEL();
+set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
sljit_emit_fast_enter(compiler, TMP2, 0);
count_match(common);
-allocate_stack(common, private_data_size + framesize + alternativesize);
-OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
-copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
+
+local_size = (alt_max > 1) ? 2 : 1;
+
+/* (Reversed) stack layout:
+ [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
+
+allocate_stack(common, private_data_size + local_size);
+/* Save return address. */
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
+
+copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
+
+/* This variable is saved and restored all time when we enter or exit from a recursive context. */
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
+
if (needs_control_head)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
-if (needs_frame)
- init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE);
-if (alternativesize > 0)
+if (alt_max > 1)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
memset(&altbacktrack, 0, sizeof(backtrack_common));
@@ -10760,7 +11592,75 @@ while (1)
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return;
- add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
+ allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
+
+ if (alt_max > 1 || has_accept)
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
+
+ add_jump(compiler, &match, JUMP(SLJIT_JUMP));
+
+ if (alt_count == 0)
+ {
+ /* Backtracking path entry. */
+ SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
+ common->currententry->backtrack_label = LABEL();
+ set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
+
+ sljit_emit_fast_enter(compiler, TMP1, 0);
+
+ if (has_accept)
+ accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_max * sizeof (sljit_sw));
+
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ /* Save return address. */
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
+
+ copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
+
+ if (alt_max > 1)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+ free_stack(common, 2);
+
+ if (alt_max > 4)
+ {
+ /* Table jump if alt_max is greater than 4. */
+ next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
+ if (SLJIT_UNLIKELY(next_update_addr == NULL))
+ return;
+ sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
+ add_label_addr(common, next_update_addr++);
+ }
+ else
+ {
+ if (alt_max == 4)
+ alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
+ alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
+ }
+ }
+ else
+ free_stack(common, has_accept ? 2 : 1);
+ }
+ else if (alt_max > 4)
+ add_label_addr(common, next_update_addr++);
+ else
+ {
+ if (alt_count != 2 * sizeof(sljit_uw))
+ {
+ JUMPHERE(alt1);
+ if (alt_max == 3 && alt_count == sizeof(sljit_uw))
+ alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
+ }
+ else
+ {
+ JUMPHERE(alt2);
+ if (alt_max == 4)
+ alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
+ }
+ }
+
+ alt_count += sizeof(sljit_uw);
compile_backtrackingpath(common, altbacktrack.top);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
@@ -10774,55 +11674,65 @@ while (1)
cc += GET(cc, 1);
}
-/* None of them matched. */
-OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
-jump = JUMP(SLJIT_JUMP);
+/* No alternative is matched. */
+
+quit = LABEL();
+
+copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
+
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
+free_stack(common, private_data_size + local_size);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
+sljit_emit_fast_return(compiler, TMP2, 0);
if (common->quit != NULL)
{
+ SLJIT_ASSERT(has_quit);
+
set_jumps(common->quit, LABEL());
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
- if (needs_frame)
- {
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
- add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
- }
- OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
- common->quit = NULL;
- add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
+ copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
+ JUMPTO(SLJIT_JUMP, quit);
}
-set_jumps(common->accept, LABEL());
-OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
-if (needs_frame)
+if (has_accept)
{
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
- add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
- OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw));
- }
-OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
+ JUMPHERE(accept_exit);
+ free_stack(common, 2);
-JUMPHERE(jump);
-if (common->quit != NULL)
- set_jumps(common->quit, LABEL());
-copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
-free_stack(common, private_data_size + framesize + alternativesize);
-if (needs_control_head)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw));
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0);
- OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
+ /* Save return address. */
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
+
+ copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
+
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
+ free_stack(common, private_data_size + local_size);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
+ sljit_emit_fast_return(compiler, TMP2, 0);
}
-else
+
+if (common->accept != NULL)
{
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw));
- OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0);
+ SLJIT_ASSERT(has_accept);
+
+ set_jumps(common->accept, LABEL());
+
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
+ OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
+
+ allocate_stack(common, 2);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
}
-sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
+
+set_jumps(match, LABEL());
+
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+
+copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
+
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
+sljit_emit_fast_return(compiler, TMP2, 0);
}
#undef COMPILE_BACKTRACKINGPATH
@@ -10854,11 +11764,13 @@ struct sljit_jump *jump;
struct sljit_jump *minlength_check_failed = NULL;
struct sljit_jump *reqbyte_notfound = NULL;
struct sljit_jump *empty_match = NULL;
+struct sljit_jump *end_anchor_failed = NULL;
SLJIT_ASSERT(tables);
memset(&rootbacktrack, 0, sizeof(backtrack_common));
memset(common, 0, sizeof(compiler_common));
+common->re = re;
common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
@@ -11076,7 +11988,7 @@ if (common->control_head_ptr != 0)
/* Main part of the matching */
if ((re->overall_options & PCRE2_ANCHORED) == 0)
{
- mainloop_label = mainloop_entry(common, (re->flags & PCRE2_HASCRORLF) != 0, re->overall_options);
+ mainloop_label = mainloop_entry(common);
continue_match_label = LABEL();
/* Forward search if possible. */
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
@@ -11084,11 +11996,11 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0)
if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
;
else if ((re->flags & PCRE2_FIRSTSET) != 0)
- fast_forward_first_char(common, (PCRE2_UCHAR)(re->first_codeunit), (re->flags & PCRE2_FIRSTCASELESS) != 0);
+ fast_forward_first_char(common);
else if ((re->flags & PCRE2_STARTLINE) != 0)
fast_forward_newline(common);
else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
- fast_forward_start_bits(common, re->start_bitmap);
+ fast_forward_start_bits(common);
}
}
else
@@ -11135,6 +12047,9 @@ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return PCRE2_ERROR_NOMEMORY;
}
+if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
+ end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
+
if (common->might_be_empty)
{
empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
@@ -11147,15 +12062,26 @@ if (common->accept != NULL)
/* This means we have a match. Update the ovector. */
copy_ovector(common, re->top_bracket + 1);
-common->quit_label = common->forced_quit_label = LABEL();
+common->quit_label = common->abort_label = LABEL();
if (common->quit != NULL)
set_jumps(common->quit, common->quit_label);
-if (common->forced_quit != NULL)
- set_jumps(common->forced_quit, common->forced_quit_label);
+if (common->abort != NULL)
+ set_jumps(common->abort, common->abort_label);
if (minlength_check_failed != NULL)
- SET_LABEL(minlength_check_failed, common->forced_quit_label);
+ SET_LABEL(minlength_check_failed, common->abort_label);
sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
+if (common->failed_match != NULL)
+ {
+ SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
+ set_jumps(common->failed_match, LABEL());
+ OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
+ JUMPTO(SLJIT_JUMP, common->abort_label);
+ }
+
+if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
+ JUMPHERE(end_anchor_failed);
+
if (mode != PCRE2_JIT_COMPLETE)
{
common->partialmatchlabel = LABEL();
@@ -11236,9 +12162,9 @@ if (common->might_be_empty)
JUMPHERE(empty_match);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
- OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
+ OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
- OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
+ OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
JUMPTO(SLJIT_ZERO, empty_match_found_label);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
@@ -11249,7 +12175,7 @@ common->fast_forward_bc_ptr = NULL;
common->fast_fail_start_ptr = 0;
common->fast_fail_end_ptr = 0;
common->currententry = common->entries;
-common->local_exit = TRUE;
+common->local_quit_available = TRUE;
quit_label = common->quit_label;
while (common->currententry != NULL)
{
@@ -11266,7 +12192,7 @@ while (common->currententry != NULL)
flush_stubs(common);
common->currententry = common->currententry->next;
}
-common->local_exit = FALSE;
+common->local_quit_available = FALSE;
common->quit_label = quit_label;
/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
@@ -11278,7 +12204,7 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
-OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
+OP2(SLJIT_SUB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
diff --git a/src/3rdparty/pcre2/src/pcre2_jit_match.c b/src/3rdparty/pcre2/src/pcre2_jit_match.c
index a323971ff3..4cad754c75 100644
--- a/src/3rdparty/pcre2/src/pcre2_jit_match.c
+++ b/src/3rdparty/pcre2/src/pcre2_jit_match.c
@@ -49,10 +49,10 @@ static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_f
sljit_u8 local_space[MACHINE_STACK_SIZE];
struct sljit_stack local_stack;
-local_stack.top = (sljit_sw)&local_space;
-local_stack.base = local_stack.top;
-local_stack.limit = local_stack.base + MACHINE_STACK_SIZE;
-local_stack.max_limit = local_stack.limit;
+local_stack.max_limit = local_space;
+local_stack.limit = local_space;
+local_stack.base = local_space + MACHINE_STACK_SIZE;
+local_stack.top = local_space + MACHINE_STACK_SIZE;
arguments->stack = &local_stack;
return executable_func(arguments);
}
diff --git a/src/3rdparty/pcre2/src/pcre2_match.c b/src/3rdparty/pcre2/src/pcre2_match.c
index 0763a239e1..050b7e93ec 100644
--- a/src/3rdparty/pcre2/src/pcre2_match.c
+++ b/src/3rdparty/pcre2/src/pcre2_match.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2015-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -43,17 +43,31 @@ POSSIBILITY OF SUCH DAMAGE.
#include "config.h"
#endif
-#define NLBLOCK mb /* Block containing newline information */
-#define PSSTART start_subject /* Field containing processed string start */
-#define PSEND end_subject /* Field containing processed string end */
+/* These defines enables debugging code */
+
+//#define DEBUG_FRAMES_DISPLAY
+//#define DEBUG_SHOW_OPS
+//#define DEBUG_SHOW_RMATCH
+
+#ifdef DEBUG_FRAME_DISPLAY
+#include <stdarg.h>
+#endif
+
+/* These defines identify the name of the block containing "static"
+information, and fields within it. */
+
+#define NLBLOCK mb /* Block containing newline information */
+#define PSSTART start_subject /* Field containing processed string start */
+#define PSEND end_subject /* Field containing processed string end */
#include "pcre2_internal.h"
-/* Masks for identifying the public options that are permitted at match time.
-*/
+#define RECURSE_UNSET 0xffffffffu /* Bigger than max group number */
+
+/* Masks for identifying the public options that are permitted at match time. */
#define PUBLIC_MATCH_OPTIONS \
- (PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
+ (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
@@ -61,60 +75,252 @@ POSSIBILITY OF SUCH DAMAGE.
(PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD)
-/* The mb->capture_last field uses the lower 16 bits for the last captured
-substring (which can never be greater than 65535) and a bit in the top half
-to mean "capture vector overflowed". This odd way of doing things was
-implemented when it was realized that preserving and restoring the overflow bit
-whenever the last capture number was saved/restored made for a neater
-interface, and doing it this way saved on (a) another variable, which would
-have increased the stack frame size (a big NO-NO in PCRE) and (b) another
-separate set of save/restore instructions. The following defines are used in
-implementing this. */
-
-#define CAPLMASK 0x0000ffff /* The bits used for last_capture */
-#define OVFLMASK 0xffff0000 /* The bits used for the overflow flag */
-#define OVFLBIT 0x00010000 /* The bit that is set for overflow */
-
-/* Bits for setting in mb->match_function_type to indicate two special types
-of call to match(). We do it this way to save on using another stack variable,
-as stack usage is to be discouraged. */
-
-#define MATCH_CONDASSERT 1 /* Called to check a condition assertion */
-#define MATCH_CBEGROUP 2 /* Could-be-empty unlimited repeat group */
-
-/* Non-error returns from the match() function. Error returns are externally
-defined PCRE2_ERROR_xxx codes, which are all negative. */
+/* Non-error returns from and within the match() function. Error returns are
+externally defined PCRE2_ERROR_xxx codes, which are all negative. */
#define MATCH_MATCH 1
#define MATCH_NOMATCH 0
-/* Special internal returns from the match() function. Make them sufficiently
-negative to avoid the external error codes. */
+/* Special internal returns used in the match() function. Make them
+sufficiently negative to avoid the external error codes. */
#define MATCH_ACCEPT (-999)
#define MATCH_KETRPOS (-998)
-#define MATCH_ONCE (-997)
/* The next 5 must be kept together and in sequence so that a test that checks
for any one of them can use a range. */
-#define MATCH_COMMIT (-996)
-#define MATCH_PRUNE (-995)
-#define MATCH_SKIP (-994)
-#define MATCH_SKIP_ARG (-993)
-#define MATCH_THEN (-992)
+#define MATCH_COMMIT (-997)
+#define MATCH_PRUNE (-996)
+#define MATCH_SKIP (-995)
+#define MATCH_SKIP_ARG (-994)
+#define MATCH_THEN (-993)
#define MATCH_BACKTRACK_MAX MATCH_THEN
#define MATCH_BACKTRACK_MIN MATCH_COMMIT
-/* Min and max values for the common repeats; for the maxima, 0 => infinity */
+/* Group frame type values. Zero means the frame is not a group frame. The
+lower 16 bits are used for data (e.g. the capture number). Group frames are
+used for most groups so that information about the start is easily available at
+the end without having to scan back through intermediate frames (backtrack
+points). */
+
+#define GF_CAPTURE 0x00010000u
+#define GF_NOCAPTURE 0x00020000u
+#define GF_CONDASSERT 0x00030000u
+#define GF_RECURSE 0x00040000u
-static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
-static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
+/* Masks for the identity and data parts of the group frame type. */
-/* Maximum number of ovector elements that can be saved on the system stack
-when processing OP_RECURSE in non-HEAP_MATCH_RECURSE mode. If the ovector is
-bigger, malloc() is used. This value should be a multiple of 3, because the
-ovector length is always a multiple of 3. */
+#define GF_IDMASK(a) ((a) & 0xffff0000u)
+#define GF_DATAMASK(a) ((a) & 0x0000ffffu)
-#define OP_RECURSE_STACK_SAVE_MAX 45
+/* Repetition types */
+
+enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS };
+
+/* Min and max values for the common repeats; a maximum of UINT32_MAX =>
+infinity. */
+
+static const uint32_t rep_min[] = {
+ 0, 0, /* * and *? */
+ 1, 1, /* + and +? */
+ 0, 0, /* ? and ?? */
+ 0, 0, /* dummy placefillers for OP_CR[MIN]RANGE */
+ 0, 1, 0 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
+
+static const uint32_t rep_max[] = {
+ UINT32_MAX, UINT32_MAX, /* * and *? */
+ UINT32_MAX, UINT32_MAX, /* + and +? */
+ 1, 1, /* ? and ?? */
+ 0, 0, /* dummy placefillers for OP_CR[MIN]RANGE */
+ UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
+
+/* Repetition types - must include OP_CRPOSRANGE (not needed above) */
+
+static const uint32_t rep_typ[] = {
+ REPTYPE_MAX, REPTYPE_MIN, /* * and *? */
+ REPTYPE_MAX, REPTYPE_MIN, /* + and +? */
+ REPTYPE_MAX, REPTYPE_MIN, /* ? and ?? */
+ REPTYPE_MAX, REPTYPE_MIN, /* OP_CRRANGE and OP_CRMINRANGE */
+ REPTYPE_POS, REPTYPE_POS, /* OP_CRPOSSTAR, OP_CRPOSPLUS */
+ REPTYPE_POS, REPTYPE_POS }; /* OP_CRPOSQUERY, OP_CRPOSRANGE */
+
+/* Numbers for RMATCH calls at backtracking points. When these lists are
+changed, the code at RETURN_SWITCH below must be updated in sync. */
+
+enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
+ RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
+ RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
+ RM31, RM32, RM33, RM34, RM35 };
+
+#ifdef SUPPORT_WIDE_CHARS
+enum { RM100=100, RM101 };
+#endif
+
+#ifdef SUPPORT_UNICODE
+enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
+ RM208, RM209, RM210, RM211, RM212, RM213, RM214, RM215,
+ RM216, RM217, RM218, RM219, RM220, RM221, RM222 };
+#endif
+
+/* Define short names for general fields in the current backtrack frame, which
+is always pointed to by the F variable. Occasional references to fields in
+other frames are written out explicitly. There are also some fields in the
+current frame whose names start with "temp" that are used for short-term,
+localised backtracking memory. These are #defined with Lxxx names at the point
+of use and undefined afterwards. */
+
+#define Fback_frame F->back_frame
+#define Fcapture_last F->capture_last
+#define Fcurrent_recurse F->current_recurse
+#define Fecode F->ecode
+#define Feptr F->eptr
+#define Fgroup_frame_type F->group_frame_type
+#define Flast_group_offset F->last_group_offset
+#define Flength F->length
+#define Fmark F->mark
+#define Frdepth F->rdepth
+#define Fstart_match F->start_match
+#define Foffset_top F->offset_top
+#define Foccu F->occu
+#define Fop F->op
+#define Fovector F->ovector
+#define Freturn_id F->return_id
+
+
+#ifdef DEBUG_FRAMES_DISPLAY
+/*************************************************
+* Display current frames and contents *
+*************************************************/
+
+/* This debugging function displays the current set of frames and their
+contents. It is not called automatically from anywhere, the intention being
+that calls can be inserted where necessary when debugging frame-related
+problems.
+
+Arguments:
+ f the file to write to
+ F the current top frame
+ P a previous frame of interest
+ frame_size the frame size
+ mb points to the match block
+ s identification text
+
+Returns: nothing
+*/
+
+static void
+display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
+ match_block *mb, const char *s, ...)
+{
+uint32_t i;
+heapframe *Q;
+va_list ap;
+va_start(ap, s);
+
+fprintf(f, "FRAMES ");
+vfprintf(f, s, ap);
+va_end(ap);
+
+if (P != NULL) fprintf(f, " P=%lu",
+ ((char *)P - (char *)(mb->match_frames))/frame_size);
+fprintf(f, "\n");
+
+for (i = 0, Q = mb->match_frames;
+ Q <= F;
+ i++, Q = (heapframe *)((char *)Q + frame_size))
+ {
+ fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
+ i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
+ Q->back_frame, Q->return_id);
+
+ if (Q->last_group_offset == PCRE2_UNSET)
+ fprintf(f, " lgoffset=unset\n");
+ else
+ fprintf(f, " lgoffset=%lu\n", Q->last_group_offset/frame_size);
+ }
+}
+
+#endif
+
+
+
+/*************************************************
+* Process a callout *
+*************************************************/
+
+/* This function is called for all callouts, whether "standalone" or at the
+start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
+OP_CALLOUT_STR.
+
+Arguments:
+ F points to the current backtracking frame
+ mb points to the match block
+ lengthptr where to return the length of the callout item
+
+Returns: the return from the callout
+ or 0 if no callout function exists
+*/
+
+static int
+do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
+{
+int rc;
+PCRE2_SIZE save0, save1;
+PCRE2_SIZE *callout_ovector;
+pcre2_callout_block cb;
+
+*lengthptr = (*Fecode == OP_CALLOUT)?
+ PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
+
+if (mb->callout == NULL) return 0; /* No callout function provided */
+
+/* The original matching code (pre 10.30) worked directly with the ovector
+passed by the user, and this was passed to callouts. Now that the working
+ovector is in the backtracking frame, it no longer needs to reserve space for
+the overall match offsets (which would waste space in the frame). For backward
+compatibility, however, we pass capture_top and offset_vector to the callout as
+if for the extended ovector, and we ensure that the first two slots are unset
+by preserving and restoring their current contents. Picky compilers complain if
+references such as Fovector[-2] are use directly, so we set up a separate
+pointer. */
+
+callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
+
+cb.version = 1;
+cb.capture_top = (uint32_t)Foffset_top/2 + 1;
+cb.capture_last = Fcapture_last;
+cb.offset_vector = callout_ovector;
+cb.mark = mb->nomatch_mark;
+cb.subject = mb->start_subject;
+cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
+cb.start_match = (PCRE2_SIZE)(Fstart_match - mb->start_subject);
+cb.current_position = (PCRE2_SIZE)(Feptr - mb->start_subject);
+cb.pattern_position = GET(Fecode, 1);
+cb.next_item_length = GET(Fecode, 1 + LINK_SIZE);
+
+if (*Fecode == OP_CALLOUT) /* Numerical callout */
+ {
+ cb.callout_number = Fecode[1 + 2*LINK_SIZE];
+ cb.callout_string_offset = 0;
+ cb.callout_string = NULL;
+ cb.callout_string_length = 0;
+ }
+else /* String callout */
+ {
+ cb.callout_number = 0;
+ cb.callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE);
+ cb.callout_string = Fecode + (1 + 4*LINK_SIZE) + 1;
+ cb.callout_string_length =
+ *lengthptr - (1 + 4*LINK_SIZE) - 2;
+ }
+
+save0 = callout_ovector[0];
+save1 = callout_ovector[1];
+callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET;
+rc = mb->callout(&cb, mb->callout_data);
+callout_ovector[0] = save0;
+callout_ovector[1] = save1;
+return rc;
+}
@@ -130,10 +336,9 @@ seems unlikely.)
Arguments:
offset index into the offset vector
- offset_top top of the used offset vector
- eptr pointer into the subject
- mb points to match block
caseless TRUE if caseless
+ F the current backtracking frame pointer
+ mb points to match block
lengthptr pointer for returning the length matched
Returns: = 0 sucessful match; number of code units matched is set
@@ -142,21 +347,18 @@ Returns: = 0 sucessful match; number of code units matched is set
*/
static int
-match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr,
- match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr)
+match_ref(PCRE2_SIZE offset, BOOL caseless, heapframe *F, match_block *mb,
+ PCRE2_SIZE *lengthptr)
{
-#if defined SUPPORT_UNICODE
-BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
-#endif
-
-register PCRE2_SPTR p;
+PCRE2_SPTR p;
PCRE2_SIZE length;
-PCRE2_SPTR eptr_start = eptr;
+PCRE2_SPTR eptr;
+PCRE2_SPTR eptr_start;
/* Deal with an unset group. The default is no match, but there is an option to
match an empty string. */
-if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET)
+if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
{
if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
{
@@ -168,19 +370,20 @@ if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET)
/* Separate the caseless and UTF cases for speed. */
-p = mb->start_subject + mb->ovector[offset];
-length = mb->ovector[offset+1] - mb->ovector[offset];
+eptr = eptr_start = Feptr;
+p = mb->start_subject + Fovector[offset];
+length = Fovector[offset+1] - Fovector[offset];
if (caseless)
{
#if defined SUPPORT_UNICODE
- if (utf)
+ if ((mb->poptions & PCRE2_UTF) != 0)
{
/* Match characters up to the end of the reference. NOTE: the number of
code units matched may differ, because in UTF-8 there are some characters
- whose upper and lower case versions code have different numbers of bytes.
- For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
- (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
+ whose upper and lower case codes have different numbers of bytes. For
+ example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3
+ bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
sequence of two of the latter. It is important, therefore, to check the
length along the reference, not along the subject (earlier code did this
wrong). */
@@ -226,14 +429,26 @@ if (caseless)
}
/* In the caseful case, we can just compare the code units, whether or not we
-are in UTF mode. */
+are in UTF mode. When partial matching, we have to do this unit-by-unit. */
else
{
- for (; length > 0; length--)
+ if (mb->partial != 0)
+ {
+ for (; length > 0; length--)
+ {
+ if (eptr >= mb->end_subject) return 1; /* Partial match */
+ if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1; /* No match */
+ }
+ }
+
+ /* Not partial matching */
+
+ else
{
- if (eptr >= mb->end_subject) return 1; /* Partial match */
- if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1; /*No match */
+ if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
+ if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1; /* No match */
+ eptr += length;
}
}
@@ -243,281 +458,73 @@ return 0; /* Match */
-/***************************************************************************
-****************************************************************************
- RECURSION IN THE match() FUNCTION
-
-The match() function is highly recursive, though not every recursive call
-increases the recursion depth. Nevertheless, some regular expressions can cause
-it to recurse to a great depth. I was writing for Unix, so I just let it call
-itself recursively. This uses the stack for saving everything that has to be
-saved for a recursive call. On Unix, the stack can be large, and this works
-fine.
-
-It turns out that on some non-Unix-like systems there are problems with
-programs that use a lot of stack. (This despite the fact that every last chip
-has oodles of memory these days, and techniques for extending the stack have
-been known for decades.) So....
-
-There is a fudge, triggered by defining HEAP_MATCH_RECURSE, which avoids
-recursive calls by keeping local variables that need to be preserved in blocks
-of memory on the heap instead instead of on the stack. Macros are used to
-achieve this so that the actual code doesn't look very different to what it
-always used to.
-
-The original heap-recursive code used longjmp(). However, it seems that this
-can be very slow on some operating systems. Following a suggestion from Stan
-Switzer, the use of longjmp() has been abolished, at the cost of having to
-provide a unique number for each call to RMATCH. There is no way of generating
-a sequence of numbers at compile time in C. I have given them names, to make
-them stand out more clearly.
-
-Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
-FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
-tests. Furthermore, not using longjmp() means that local dynamic variables
-don't have indeterminate values; this has meant that the frame size can be
-reduced because the result can be "passed back" by straight setting of the
-variable instead of being passed in the frame.
-****************************************************************************
-***************************************************************************/
-
-/* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
-below must be updated in sync. */
-
-enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
- RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
- RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
- RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
- RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
- RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
- RM61, RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
-
-/* These versions of the macros use the stack, as normal. Note that the "rw"
-argument of RMATCH isn't actually used in this definition. */
-
-#ifndef HEAP_MATCH_RECURSE
-#define REGISTER register
-#define RMATCH(ra,rb,rc,rd,re,rw) \
- rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
-#define RRETURN(ra) return ra
-#else
-
-/* These versions of the macros manage a private stack on the heap. Note that
-the "rd" argument of RMATCH isn't actually used in this definition. It's the mb
-argument of match(), which never changes. */
-
-#define REGISTER
-
-#define RMATCH(ra,rb,rc,rd,re,rw)\
- {\
- heapframe *newframe = frame->Xnextframe;\
- if (newframe == NULL)\
- {\
- newframe = (heapframe *)(mb->stack_memctl.malloc)\
- (sizeof(heapframe), mb->stack_memctl.memory_data);\
- if (newframe == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);\
- newframe->Xnextframe = NULL;\
- frame->Xnextframe = newframe;\
- }\
- frame->Xwhere = rw;\
- newframe->Xeptr = ra;\
- newframe->Xecode = rb;\
- newframe->Xmstart = mstart;\
- newframe->Xoffset_top = rc;\
- newframe->Xeptrb = re;\
- newframe->Xrdepth = frame->Xrdepth + 1;\
- newframe->Xprevframe = frame;\
- frame = newframe;\
- goto HEAP_RECURSE;\
- L_##rw:;\
- }
-
-#define RRETURN(ra)\
- {\
- heapframe *oldframe = frame;\
- frame = oldframe->Xprevframe;\
- if (frame != NULL)\
- {\
- rrc = ra;\
- goto HEAP_RETURN;\
- }\
- return ra;\
- }
-
-
-/* Structure for remembering the local variables in a private frame. Arrange it
-so as to minimize the number of holes. */
-
-typedef struct heapframe {
- struct heapframe *Xprevframe;
- struct heapframe *Xnextframe;
-
-#ifdef SUPPORT_UNICODE
- PCRE2_SPTR Xcharptr;
-#endif
- PCRE2_SPTR Xeptr;
- PCRE2_SPTR Xecode;
- PCRE2_SPTR Xmstart;
- PCRE2_SPTR Xcallpat;
- PCRE2_SPTR Xdata;
- PCRE2_SPTR Xnext_ecode;
- PCRE2_SPTR Xpp;
- PCRE2_SPTR Xprev;
- PCRE2_SPTR Xsaved_eptr;
-
- eptrblock *Xeptrb;
-
- PCRE2_SIZE Xlength;
- PCRE2_SIZE Xoffset;
- PCRE2_SIZE Xoffset_top;
- PCRE2_SIZE Xsave_offset1, Xsave_offset2, Xsave_offset3;
-
- uint32_t Xfc;
- uint32_t Xnumber;
- uint32_t Xrdepth;
- uint32_t Xop;
- uint32_t Xsave_capture_last;
-
-#ifdef SUPPORT_UNICODE
- uint32_t Xprop_value;
- int Xprop_type;
- int Xprop_fail_result;
- int Xoclength;
-#endif
-
- int Xcodelink;
- int Xctype;
- int Xfi;
- int Xmax;
- int Xmin;
- int Xwhere; /* Where to jump back to */
-
- BOOL Xcondition;
- BOOL Xcur_is_word;
- BOOL Xprev_is_word;
-
- eptrblock Xnewptrb;
- recursion_info Xnew_recursive;
-
-#ifdef SUPPORT_UNICODE
- PCRE2_UCHAR Xocchars[6];
-#endif
-} heapframe;
-
-#endif
-
-
-/***************************************************************************
-***************************************************************************/
+/******************************************************************************
+*******************************************************************************
+ "Recursion" in the match() function
+The original match() function was highly recursive, but this proved to be the
+source of a number of problems over the years, mostly because of the relatively
+small system stacks that are commonly found. As new features were added to
+patterns, various kludges were invented to reduce the amount of stack used,
+making the code hard to understand in places.
-/* When HEAP_MATCH_RECURSE is not defined, the match() function implements
-backtrack points by calling itself recursively in all but one case. The one
-special case is when processing OP_RECURSE, which specifies recursion in the
-pattern. The entire ovector must be saved and restored while processing
-OP_RECURSE. If the ovector is small enough, instead of calling match()
-directly, op_recurse_ovecsave() is called. This function uses the system stack
-to save the ovector while calling match() to process the pattern recursion. */
+A version did exist that used individual frames on the heap instead of calling
+match() recursively, but this ran substantially slower. The current version is
+a refactoring that uses a vector of frames to remember backtracking points.
+This runs no slower, and possibly even a bit faster than the original recursive
+implementation. An initial vector of size START_FRAMES_SIZE (enough for maybe
+50 frames) is allocated on the system stack. If this is not big enough, the
+heap is used for a larger vector.
-#ifndef HEAP_MATCH_RECURSE
+*******************************************************************************
+******************************************************************************/
-/* We need a prototype for match() because it is mutually recursive with
-op_recurse_ovecsave(). */
-static int
-match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
- PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth);
/*************************************************
-* Process OP_RECURSE, stacking ovector *
+* Macros for the match() function *
*************************************************/
-/* When this function is called, mb->recursive has already been updated to
-point to a new recursion data block, and all its fields other than ovec_save
-have been set.
-
-This function exists so that the local vector variable ovecsave is no longer
-defined in the match() function, as it was in PCRE1. It is used only when there
-is recursion in the pattern, so it wastes a lot of stack to have it defined for
-every call of match(). We now use this function as an indirect way of calling
-match() only in the case when ovecsave is needed. (David Wheeler used to say
-"All problems in computer science can be solved by another level of
-indirection.")
-
-HOWEVER: when this file is compiled by gcc in an optimizing mode, because this
-function is called only once, and only from within match(), gcc will "inline"
-it - that is, move it inside match() - and this completely negates its reason
-for existence. Therefore, we mark it as non-inline when gcc is in use.
+/* These macros pack up tests that are used for partial matching several times
+in the code. We set the "hit end" flag if the pointer is at the end of the
+subject and also past the earliest inspected character (i.e. something has been
+matched, even if not part of the actual matched string). For hard partial
+matching, we then return immediately. The second one is used when we already
+know we are past the end of the subject. */
-Arguments:
- eptr pointer to current character in subject
- callpat the recursion point in the pattern
- mstart pointer to the current match start position (can be modified
- by encountering \K)
- offset_top current top pointer (highest ovector offset used + 1)
- mb pointer to "static" info block for the match
- eptrb pointer to chain of blocks containing eptr at start of
- brackets - for testing for empty matches
- rdepth the recursion depth
-
-Returns: a match() return code
-*/
-
-static int
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
-__attribute__ ((noinline))
-#endif
-op_recurse_ovecsave(REGISTER PCRE2_SPTR eptr, PCRE2_SPTR callpat,
- PCRE2_SPTR mstart, PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb,
- uint32_t rdepth)
-{
-register int rrc;
-BOOL cbegroup = *callpat >= OP_SBRA;
-recursion_info *new_recursive = mb->recursive;
-PCRE2_SIZE ovecsave[OP_RECURSE_STACK_SAVE_MAX];
-
-/* Save the ovector */
+#define CHECK_PARTIAL()\
+ if (mb->partial != 0 && Feptr >= mb->end_subject && \
+ Feptr > mb->start_used_ptr) \
+ { \
+ mb->hitend = TRUE; \
+ if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
+ }
-new_recursive->ovec_save = ovecsave;
-memcpy(ovecsave, mb->ovector, mb->offset_end * sizeof(PCRE2_SIZE));
+#define SCHECK_PARTIAL()\
+ if (mb->partial != 0 && Feptr > mb->start_used_ptr) \
+ { \
+ mb->hitend = TRUE; \
+ if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
+ }
-/* Do the recursion. After processing each alternative, restore the ovector
-data and the last captured value. */
+/* These macros are used to implement backtracking. They simulate a recursive
+call to the match() function by means of a local vector of frames which
+remember the backtracking points. */
-do
- {
- if (cbegroup) mb->match_function_type |= MATCH_CBEGROUP;
- rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
- mb, eptrb, rdepth + 1);
- memcpy(mb->ovector, new_recursive->ovec_save,
- mb->offset_end * sizeof(PCRE2_SIZE));
- mb->capture_last = new_recursive->saved_capture_last;
- if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) return rrc;
-
- /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
- recursion; they cause a NOMATCH for the entire recursion. These codes
- are defined in a range that can be tested for. */
-
- if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
- return MATCH_NOMATCH;
-
- /* Any return code other than NOMATCH is an error. Otherwise, advance to the
- next alternative or to the end of the recursing subpattern. If there were
- nested recursions, mb->recursive might be changed, so reset it before
- looping. */
-
- if (rrc != MATCH_NOMATCH) return rrc;
- mb->recursive = new_recursive;
- callpat += GET(callpat, 1);
+#define RMATCH(ra,rb)\
+ {\
+ start_ecode = ra;\
+ Freturn_id = rb;\
+ goto MATCH_RECURSE;\
+ L_##rb:;\
}
-while (*callpat == OP_ALT); /* Loop for the alternatives */
-
-/* None of the alternatives matched. */
-return MATCH_NOMATCH;
-}
-#endif /* HEAP_MATCH_RECURSE */
+#define RRETURN(ra)\
+ {\
+ rrc = ra;\
+ goto RETURN_SWITCH;\
+ }
@@ -525,2470 +532,1270 @@ return MATCH_NOMATCH;
* Match from current position *
*************************************************/
-/* This function is called recursively in many circumstances. Whenever it
-returns a negative (error) response, the outer incarnation must also return the
-same response. */
-
-/* These macros pack up tests that are used for partial matching, and which
-appear several times in the code. We set the "hit end" flag if the pointer is
-at the end of the subject and also past the earliest inspected character (i.e.
-something has been matched, even if not part of the actual matched string). For
-hard partial matching, we then return immediately. The second one is used when
-we already know we are past the end of the subject. */
+/* This function is called to run one match attempt at a single starting point
+in the subject.
-#define CHECK_PARTIAL()\
- if (mb->partial != 0 && eptr >= mb->end_subject && \
- eptr > mb->start_used_ptr) \
- { \
- mb->hitend = TRUE; \
- if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL); \
- }
-
-#define SCHECK_PARTIAL()\
- if (mb->partial != 0 && eptr > mb->start_used_ptr) \
- { \
- mb->hitend = TRUE; \
- if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL); \
- }
-
-
-/* Performance note: It might be tempting to extract commonly used fields from
-the mb structure (e.g. utf, end_subject) into individual variables to improve
+Performance note: It might be tempting to extract commonly used fields from the
+mb structure (e.g. end_subject) into individual variables to improve
performance. Tests using gcc on a SPARC disproved this; in the first case, it
made performance worse.
Arguments:
- eptr pointer to current character in subject
- ecode pointer to current position in compiled code
- mstart pointer to the current match start position (can be modified
- by encountering \K)
- offset_top current top pointer (highest ovector offset used + 1)
- mb pointer to "static" info block for the match
- eptrb pointer to chain of blocks containing eptr at start of
- brackets - for testing for empty matches
- rdepth the recursion depth
-
-Returns: MATCH_MATCH if matched ) these values are >= 0
- MATCH_NOMATCH if failed to match )
- a negative MATCH_xxx value for PRUNE, SKIP, etc
- a negative PCRE2_ERROR_xxx value if aborted by an error condition
- (e.g. stopped by repeated call or recursion limit)
+ start_eptr starting character in subject
+ start_ecode starting position in compiled code
+ ovector pointer to the final output vector
+ oveccount number of pairs in ovector
+ top_bracket number of capturing parentheses in the pattern
+ frame_size size of each backtracking frame
+ mb pointer to "static" variables block
+
+Returns: MATCH_MATCH if matched ) these values are >= 0
+ MATCH_NOMATCH if failed to match )
+ negative MATCH_xxx value for PRUNE, SKIP, etc
+ negative PCRE2_ERROR_xxx value if aborted by an error condition
+ (e.g. stopped by repeated call or depth limit)
*/
static int
-match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
- PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth)
+match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, PCRE2_SIZE *ovector,
+ uint16_t oveccount, uint16_t top_bracket, PCRE2_SIZE frame_size,
+ match_block *mb)
{
-/* These variables do not need to be preserved over recursion in this function,
-so they can be ordinary variables in all cases. Mark some of them with
-"register" because they are used a lot in loops. */
-
-register int rrc; /* Returns from recursive calls */
-register int i; /* Used for loops not involving calls to RMATCH() */
-register uint32_t c; /* Character values not kept over RMATCH() calls */
-register BOOL utf; /* Local copy of UTF flag for speed */
+/* Frame-handling variables */
-BOOL minimize, possessive; /* Quantifier options */
-BOOL caseless;
-int condcode;
+heapframe *F; /* Current frame pointer */
+heapframe *N = NULL; /* Temporary frame pointers */
+heapframe *P = NULL;
+heapframe *assert_accept_frame; /* For passing back the frame with captures */
+PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */
-/* When recursion is not being used, all "local" variables that have to be
-preserved over calls to RMATCH() are part of a "frame". We set up the top-level
-frame on the stack here; subsequent instantiations are obtained from the heap
-whenever RMATCH() does a "recursion". See the macro definitions above. Putting
-the top-level on the stack rather than malloc-ing them all gives a performance
-boost in many cases where there is not much "recursion". */
+/* Local variables that do not need to be preserved over calls to RRMATCH(). */
-#ifdef HEAP_MATCH_RECURSE
-heapframe *frame = (heapframe *)mb->match_frames_base;
+PCRE2_SPTR bracode; /* Temp pointer to start of group */
+PCRE2_SIZE offset; /* Used for group offsets */
+PCRE2_SIZE length; /* Used for various length calculations */
-/* Copy in the original argument variables */
-
-frame->Xeptr = eptr;
-frame->Xecode = ecode;
-frame->Xmstart = mstart;
-frame->Xoffset_top = offset_top;
-frame->Xeptrb = eptrb;
-frame->Xrdepth = rdepth;
-
-/* This is where control jumps back to to effect "recursion" */
-
-HEAP_RECURSE:
+int rrc; /* Return from functions & backtracking "recursions" */
+#ifdef SUPPORT_UNICODE
+int proptype; /* Type of character property */
+#endif
-/* Macros make the argument variables come from the current frame */
+uint32_t i; /* Used for local loops */
+uint32_t fc; /* Character values */
+uint32_t number; /* Used for group and other numbers */
+uint32_t reptype = 0; /* Type of repetition (0 to avoid compiler warning) */
+uint32_t group_frame_type; /* Specifies type for new group frames */
-#define eptr frame->Xeptr
-#define ecode frame->Xecode
-#define mstart frame->Xmstart
-#define offset_top frame->Xoffset_top
-#define eptrb frame->Xeptrb
-#define rdepth frame->Xrdepth
+BOOL condition; /* Used in conditional groups */
+BOOL cur_is_word; /* Used in "word" tests */
+BOOL prev_is_word; /* Used in "word" tests */
-/* Ditto for the local variables */
+/* UTF flag */
#ifdef SUPPORT_UNICODE
-#define charptr frame->Xcharptr
-#define prop_value frame->Xprop_value
-#define prop_type frame->Xprop_type
-#define prop_fail_result frame->Xprop_fail_result
-#define oclength frame->Xoclength
-#define occhars frame->Xocchars
+BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
+#else
+BOOL utf = FALSE;
#endif
+/* This is the length of the last part of a backtracking frame that must be
+copied when a new frame is created. */
-#define callpat frame->Xcallpat
-#define codelink frame->Xcodelink
-#define data frame->Xdata
-#define next_ecode frame->Xnext_ecode
-#define pp frame->Xpp
-#define prev frame->Xprev
-#define saved_eptr frame->Xsaved_eptr
-
-#define new_recursive frame->Xnew_recursive
-
-#define ctype frame->Xctype
-#define fc frame->Xfc
-#define fi frame->Xfi
-#define length frame->Xlength
-#define max frame->Xmax
-#define min frame->Xmin
-#define number frame->Xnumber
-#define offset frame->Xoffset
-#define op frame->Xop
-#define save_capture_last frame->Xsave_capture_last
-#define save_offset1 frame->Xsave_offset1
-#define save_offset2 frame->Xsave_offset2
-#define save_offset3 frame->Xsave_offset3
-
-#define condition frame->Xcondition
-#define cur_is_word frame->Xcur_is_word
-#define prev_is_word frame->Xprev_is_word
-
-#define newptrb frame->Xnewptrb
-
-/* When normal stack-based recursion is being used for match(), local variables
-are allocated on the stack and get preserved during recursion in the usual way.
-In this environment, fi and i, and fc and c, can be the same variables. */
-
-#else /* HEAP_MATCH_RECURSE not defined */
-#define fi i
-#define fc c
-
-/* Many of the following variables are used only in small blocks of the code.
-My normal style of coding would have declared them within each of those blocks.
-However, in order to accommodate the version of this code that uses an external
-"stack" implemented on the heap, it is easier to declare them all here, so the
-declarations can be cut out in a block. The only declarations within blocks
-below are for variables that do not have to be preserved over a recursive call
-to RMATCH(). */
+frame_copy_size = frame_size - offsetof(heapframe, eptr);
-#ifdef SUPPORT_UNICODE
-PCRE2_SPTR charptr;
-#endif
-PCRE2_SPTR callpat;
-PCRE2_SPTR data;
-PCRE2_SPTR next_ecode;
-PCRE2_SPTR pp;
-PCRE2_SPTR prev;
-PCRE2_SPTR saved_eptr;
-
-PCRE2_SIZE length;
-PCRE2_SIZE offset;
-PCRE2_SIZE save_offset1, save_offset2, save_offset3;
+/* Set up the first current frame at the start of the vector, and initialize
+fields that are not reset for new frames. */
-uint32_t number;
-uint32_t op;
-uint32_t save_capture_last;
+F = mb->match_frames;
+Frdepth = 0; /* "Recursion" depth */
+Fcapture_last = 0; /* Number of most recent capture */
+Fcurrent_recurse = RECURSE_UNSET; /* Not pattern recursing. */
+Fstart_match = Feptr = start_eptr; /* Current data pointer and start match */
+Fmark = NULL; /* Most recent mark */
+Foffset_top = 0; /* End of captures within the frame */
+Flast_group_offset = PCRE2_UNSET; /* Saved frame of most recent group */
+group_frame_type = 0; /* Not a start of group frame */
+goto NEW_FRAME; /* Start processing with this frame */
-#ifdef SUPPORT_UNICODE
-uint32_t prop_value;
-int prop_type;
-int prop_fail_result;
-int oclength;
-PCRE2_UCHAR occhars[6];
-#endif
+/* Come back here when we want to create a new frame for remembering a
+backtracking point. */
-int codelink;
-int ctype;
-int max;
-int min;
+MATCH_RECURSE:
-BOOL condition;
-BOOL cur_is_word;
-BOOL prev_is_word;
+/* Set up a new backtracking frame. If the vector is full, get a new one
+on the heap, doubling the size, but constrained by the heap limit. */
-eptrblock newptrb;
-recursion_info new_recursive;
-#endif /* HEAP_MATCH_RECURSE not defined */
+N = (heapframe *)((char *)F + frame_size);
+if (N >= mb->match_frames_top)
+ {
+ PCRE2_SIZE newsize = mb->frame_vector_size * 2;
+ heapframe *new;
-/* To save space on the stack and in the heap frame, I have doubled up on some
-of the local variables that are used only in localised parts of the code, but
-still need to be preserved over recursive calls of match(). These macros define
-the alternative names that are used. */
+ if ((newsize / 1024) > mb->heap_limit)
+ {
+ PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size;
+ if (mb->frame_vector_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT;
+ newsize = maxsize;
+ }
-#define allow_zero cur_is_word
-#define cbegroup condition
-#define code_offset codelink
-#define condassert condition
-#define foc number
-#define matched_once prev_is_word
-#define save_mark data
+ new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
+ if (new == NULL) return PCRE2_ERROR_NOMEMORY;
+ memcpy(new, mb->match_frames, mb->frame_vector_size);
-/* These statements are here to stop the compiler complaining about unitialized
-variables. */
+ F = (heapframe *)((char *)new + ((char *)F - (char *)mb->match_frames));
+ N = (heapframe *)((char *)F + frame_size);
-#ifdef SUPPORT_UNICODE
-prop_value = 0;
-prop_fail_result = 0;
-#endif
+ if (mb->match_frames != mb->stack_frames)
+ mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
+ mb->match_frames = new;
+ mb->match_frames_top = (heapframe *)((char *)mb->match_frames + newsize);
+ mb->frame_vector_size = newsize;
+ }
+#ifdef DEBUG_SHOW_RMATCH
+fprintf(stderr, "++ RMATCH %2d frame=%d", Freturn_id, Frdepth + 1);
+if (group_frame_type != 0)
+ {
+ fprintf(stderr, " type=%x ", group_frame_type);
+ switch (GF_IDMASK(group_frame_type))
+ {
+ case GF_CAPTURE:
+ fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type));
+ break;
-/* This label is used for tail recursion, which is used in a few cases even
-when HEAP_MATCH_RECURSE is not defined, in order to reduce the amount of stack
-that is used. Thanks to Ian Taylor for noticing this possibility and sending
-the original patch. */
+ case GF_NOCAPTURE:
+ fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type));
+ break;
-TAIL_RECURSE:
+ case GF_CONDASSERT:
+ fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type));
+ break;
-/* OK, now we can get on with the real code of the function. Recursive calls
-are specified by the macro RMATCH and RRETURN is used to return. When
-HEAP_MATCH_RECURSE is *not* defined, these just turn into a recursive call to
-match() and a "return", respectively. However, RMATCH isn't like a function
-call because it's quite a complicated macro. It has to be used in one
-particular way. This shouldn't, however, impact performance when true recursion
-is being used. */
+ case GF_RECURSE:
+ fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type));
+ break;
-#ifdef SUPPORT_UNICODE
-utf = (mb->poptions & PCRE2_UTF) != 0;
-#else
-utf = FALSE;
+ default:
+ fprintf(stderr, "*** unknown ***");
+ break;
+ }
+ }
+fprintf(stderr, "\n");
#endif
-/* First check that we haven't called match() too many times, or that we
-haven't exceeded the recursive call limit. */
+/* Copy those fields that must be copied into the new frame, increase the
+"recursion" depth (i.e. the new frame's index) and then make the new frame
+current. */
+
+memcpy((char *)N + offsetof(heapframe, eptr),
+ (char *)F + offsetof(heapframe, eptr),
+ frame_copy_size);
-if (mb->match_call_count++ >= mb->match_limit) RRETURN(PCRE2_ERROR_MATCHLIMIT);
-if (rdepth >= mb->match_limit_recursion) RRETURN(PCRE2_ERROR_RECURSIONLIMIT);
+N->rdepth = Frdepth + 1;
+F = N;
-/* At the start of a group with an unlimited repeat that may match an empty
-string, the variable mb->match_function_type contains the MATCH_CBEGROUP bit.
-It is done this way to save having to use another function argument, which
-would take up space on the stack. See also MATCH_CONDASSERT below.
+/* Carry on processing with a new frame. */
-When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
-such remembered pointers, to be checked when we hit the closing ket, in order
-to break infinite loops that match no characters. When match() is called in
-other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
-NOT be used with tail recursion, because the memory block that is used is on
-the stack, so a new one may be required for each match(). */
+NEW_FRAME:
+Fgroup_frame_type = group_frame_type;
+Fecode = start_ecode; /* Starting code pointer */
+Fback_frame = frame_size; /* Default is go back one frame */
-if ((mb->match_function_type & MATCH_CBEGROUP) != 0)
+/* If this is a special type of group frame, remember its offset for quick
+access at the end of the group. If this is a recursion, set a new current
+recursion value. */
+
+if (group_frame_type != 0)
{
- newptrb.epb_saved_eptr = eptr;
- newptrb.epb_prev = eptrb;
- eptrb = &newptrb;
- mb->match_function_type &= ~MATCH_CBEGROUP;
+ Flast_group_offset = (char *)F - (char *)mb->match_frames;
+ if (GF_IDMASK(group_frame_type) == GF_RECURSE)
+ Fcurrent_recurse = GF_DATAMASK(group_frame_type);
+ group_frame_type = 0;
}
-/* Now, at last, we can start processing the opcodes. */
+
+/* ========================================================================= */
+/* This is the main processing loop. First check that we haven't recorded too
+many backtracks (search tree is too large), or that we haven't exceeded the
+recursive depth limit (used too many backtracking frames). If not, process the
+opcodes. */
+
+if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
+if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
for (;;)
{
- minimize = possessive = FALSE;
- op = *ecode;
+#ifdef DEBUG_SHOW_OPS
+fprintf(stderr, "++ op=%d\n", *Fecode);
+#endif
- switch(op)
+ Fop = *Fecode;
+ switch(Fop)
{
- case OP_MARK:
- mb->nomatch_mark = ecode + 2;
- mb->mark = NULL; /* In case previously set by assertion */
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, mb,
- eptrb, RM55);
- if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
- mb->mark == NULL) mb->mark = ecode + 2;
-
- /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
- argument, and we must check whether that argument matches this MARK's
- argument. It is passed back in mb->start_match_ptr (an overloading of that
- variable). If it does match, we reset that variable to the current subject
- position and return MATCH_SKIP. Otherwise, pass back the return code
- unaltered. */
+ /* ===================================================================== */
+ /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close
+ any currently open capturing brackets. Unlike reaching the end of a group,
+ where we know the starting frame is at the top of the chained frames, in
+ this case we have to search back for the relevant frame in case other types
+ of group that use chained frames have intervened. Multiple OP_CLOSEs always
+ come innermost first, which matches the chain order. We can ignore this in
+ a recursion, because captures are not passed out of recursions. */
- else if (rrc == MATCH_SKIP_ARG &&
- PRIV(strcmp)(ecode + 2, mb->start_match_ptr) == 0)
+ case OP_CLOSE:
+ if (Fcurrent_recurse == RECURSE_UNSET)
{
- mb->start_match_ptr = eptr;
- RRETURN(MATCH_SKIP);
+ number = GET2(Fecode, 1);
+ offset = Flast_group_offset;
+ for(;;)
+ {
+ if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
+ N = (heapframe *)((char *)mb->match_frames + offset);
+ P = (heapframe *)((char *)N - frame_size);
+ if (N->group_frame_type == (GF_CAPTURE | number)) break;
+ offset = P->last_group_offset;
+ }
+ offset = (number << 1) - 2;
+ Fcapture_last = number;
+ Fovector[offset] = P->eptr - mb->start_subject;
+ Fovector[offset+1] = Feptr - mb->start_subject;
+ if (offset >= Foffset_top) Foffset_top = offset + 2;
}
- RRETURN(rrc);
-
- case OP_FAIL:
- RRETURN(MATCH_NOMATCH);
-
- case OP_COMMIT:
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
- eptrb, RM52);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RRETURN(MATCH_COMMIT);
-
- case OP_PRUNE:
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
- eptrb, RM51);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RRETURN(MATCH_PRUNE);
-
- case OP_PRUNE_ARG:
- mb->nomatch_mark = ecode + 2;
- mb->mark = NULL; /* In case previously set by assertion */
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, mb,
- eptrb, RM56);
- if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
- mb->mark == NULL) mb->mark = ecode + 2;
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- RRETURN(MATCH_PRUNE);
-
- case OP_SKIP:
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
- eptrb, RM53);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- mb->start_match_ptr = eptr; /* Pass back current position */
- RRETURN(MATCH_SKIP);
-
- /* Note that, for Perl compatibility, SKIP with an argument does NOT set
- nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
- not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
- that failed and any that precede it (either they also failed, or were not
- triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
- SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
- set to the count of the one that failed. */
+ Fecode += PRIV(OP_lengths)[*Fecode];
+ break;
- case OP_SKIP_ARG:
- mb->skip_arg_count++;
- if (mb->skip_arg_count <= mb->ignore_skip_arg)
- {
- ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
- break;
- }
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, mb,
- eptrb, RM57);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- /* Pass back the current skip name by overloading mb->start_match_ptr and
- returning the special MATCH_SKIP_ARG return code. This will either be
- caught by a matching MARK, or get to the top, where it causes a rematch
- with mb->ignore_skip_arg set to the value of mb->skip_arg_count. */
+ /* ===================================================================== */
+ /* Real or forced end of the pattern, assertion, or recursion. In an
+ assertion ACCEPT, update the last used pointer and remember the current
+ frame so that the captures can be fished out of it. */
- mb->start_match_ptr = ecode + 2;
- RRETURN(MATCH_SKIP_ARG);
+ case OP_ASSERT_ACCEPT:
+ if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
+ assert_accept_frame = F;
+ RRETURN(MATCH_ACCEPT);
- /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
- the branch in which it occurs can be determined. Overload the start of
- match pointer to do this. */
+ /* If recursing, we have to find the most recent recursion. */
- case OP_THEN:
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
- eptrb, RM54);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- mb->start_match_ptr = ecode;
- RRETURN(MATCH_THEN);
+ case OP_ACCEPT:
+ case OP_END:
- case OP_THEN_ARG:
- mb->nomatch_mark = ecode + 2;
- mb->mark = NULL; /* In case previously set by assertion */
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
- mb, eptrb, RM58);
- if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
- mb->mark == NULL) mb->mark = ecode + 2;
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- mb->start_match_ptr = ecode;
- RRETURN(MATCH_THEN);
+ /* Handle end of a recursion. */
- /* Handle an atomic group that does not contain any capturing parentheses.
- This can be handled like an assertion. Prior to 8.13, all atomic groups
- were handled this way. In 8.13, the code was changed as below for ONCE, so
- that backups pass through the group and thereby reset captured values.
- However, this uses a lot more stack, so in 8.20, atomic groups that do not
- contain any captures generate OP_ONCE_NC, which can be handled in the old,
- less stack intensive way.
-
- Check the alternative branches in turn - the matching won't pass the KET
- for this kind of subpattern. If any one branch matches, we carry on as at
- the end of a normal bracket, leaving the subject pointer, but resetting
- the start-of-match value in case it was changed by \K. */
-
- case OP_ONCE_NC:
- prev = ecode;
- saved_eptr = eptr;
- save_mark = mb->mark;
- do
+ if (Fcurrent_recurse != RECURSE_UNSET)
{
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, eptrb, RM64);
- if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
+ offset = Flast_group_offset;
+ for(;;)
{
- mstart = mb->start_match_ptr;
- break;
+ if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
+ N = (heapframe *)((char *)mb->match_frames + offset);
+ P = (heapframe *)((char *)N - frame_size);
+ if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
+ offset = P->last_group_offset;
}
- if (rrc == MATCH_THEN)
- {
- next_ecode = ecode + GET(ecode,1);
- if (mb->start_match_ptr < next_ecode &&
- (*ecode == OP_ALT || *next_ecode == OP_ALT))
- rrc = MATCH_NOMATCH;
- }
-
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode += GET(ecode,1);
- mb->mark = save_mark;
- }
- while (*ecode == OP_ALT);
- /* If hit the end of the group (which could be repeated), fail */
+ /* N is now the frame of the recursion; the previous frame is at the
+ OP_RECURSE position. Go back there, copying the current subject position
+ and mark, and move on past the OP_RECURSE. */
- if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
-
- /* Continue as from after the group, updating the offsets high water
- mark, since extracts may have been taken. */
-
- do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
-
- offset_top = mb->end_offset_top;
- eptr = mb->end_match_ptr;
+ P->eptr = Feptr;
+ P->mark = Fmark;
+ F = P;
+ Fecode += 1 + LINK_SIZE;
+ continue;
+ }
- /* For a non-repeating ket, just continue at this level. This also
- happens for a repeating ket if no characters were matched in the group.
- This is the forcible breaking of infinite loops as implemented in Perl
- 5.005. */
+ /* Not a recursion. Fail for an empty string match if either PCRE2_NOTEMPTY
+ is set, or if PCRE2_NOTEMPTY_ATSTART is set and we have matched at the
+ start of the subject. In both cases, backtracking will then try other
+ alternatives, if any. */
- if (*ecode == OP_KET || eptr == saved_eptr)
- {
- ecode += 1+LINK_SIZE;
- break;
- }
+ if (Feptr == Fstart_match &&
+ ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
+ ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
+ Fstart_match == mb->start_subject + mb->start_offset)))
+ RRETURN(MATCH_NOMATCH);
- /* The repeating kets try the rest of the pattern or restart from the
- preceding bracket, in the appropriate order. The second "call" of match()
- uses tail recursion, to avoid using another stack frame. */
+ /* Also fail if PCRE2_ENDANCHORED is set and the end of the match is not
+ the end of the subject. After (*ACCEPT) we fail the entire match (at this
+ position) but backtrack on reaching the end of the pattern. */
- if (*ecode == OP_KETRMIN)
- {
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, eptrb, RM65);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode = prev;
- goto TAIL_RECURSE;
- }
- else /* OP_KETRMAX */
+ if (Feptr < mb->end_subject &&
+ ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
{
- RMATCH(eptr, prev, offset_top, mb, eptrb, RM66);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode += 1 + LINK_SIZE;
- goto TAIL_RECURSE;
+ if (Fop == OP_END) RRETURN(MATCH_NOMATCH);
+ return MATCH_NOMATCH;
}
- /* Control never gets here */
- /* Handle a capturing bracket, other than those that are possessive with an
- unlimited repeat. If there is space in the offset vector, save the current
- subject position in the working slot at the top of the vector. We mustn't
- change the current values of the data slot, because they may be set from a
- previous iteration of this group, and be referred to by a reference inside
- the group. A failure to match might occur after the group has succeeded,
- if something later on doesn't match. For this reason, we need to restore
- the working value and also the values of the final offsets, in case they
- were set by a previous iteration of the same bracket.
-
- If there isn't enough space in the offset vector, treat this as if it were
- a non-capturing bracket. Don't worry about setting the flag for the error
- case here; that is handled in the code for KET. */
+ /* We have a successful match of the whole pattern. Record the result and
+ then do a direct return from the function. If there is space in the offset
+ vector, set any pairs that follow the highest-numbered captured string but
+ are less than the number of capturing groups in the pattern to PCRE2_UNSET.
+ It is documented that this happens. "Gaps" are set to PCRE2_UNSET
+ dynamically. It is only those at the end that need setting here. */
- case OP_CBRA:
- case OP_SCBRA:
- number = GET2(ecode, 1+LINK_SIZE);
- offset = number << 1;
+ mb->end_match_ptr = Feptr; /* Record where we ended */
+ mb->end_offset_top = Foffset_top; /* and how many extracts were taken */
+ mb->mark = Fmark; /* and the last success mark */
+ if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
- if (offset < mb->offset_max)
- {
- save_offset1 = mb->ovector[offset];
- save_offset2 = mb->ovector[offset+1];
- save_offset3 = mb->ovector[mb->offset_end - number];
- save_capture_last = mb->capture_last;
- save_mark = mb->mark;
+ ovector[0] = Fstart_match - mb->start_subject;
+ ovector[1] = Feptr - mb->start_subject;
- mb->ovector[mb->offset_end - number] = eptr - mb->start_subject;
+ /* Set i to the smaller of the sizes of the external and frame ovectors. */
- for (;;)
- {
- if (op >= OP_SBRA) mb->match_function_type |= MATCH_CBEGROUP;
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
- eptrb, RM1);
- if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */
-
- /* If we backed up to a THEN, check whether it is within the current
- branch by comparing the address of the THEN that is passed back with
- the end of the branch. If it is within the current branch, and the
- branch is one of two or more alternatives (it either starts or ends
- with OP_ALT), we have reached the limit of THEN's action, so convert
- the return code to NOMATCH, which will cause normal backtracking to
- happen from now on. Otherwise, THEN is passed back to an outer
- alternative. This implements Perl's treatment of parenthesized groups,
- where a group not containing | does not affect the current alternative,
- that is, (X) is NOT the same as (X|(*F)). */
-
- if (rrc == MATCH_THEN)
- {
- next_ecode = ecode + GET(ecode,1);
- if (mb->start_match_ptr < next_ecode &&
- (*ecode == OP_ALT || *next_ecode == OP_ALT))
- rrc = MATCH_NOMATCH;
- }
+ i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1);
+ memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
+ while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET;
+ return MATCH_MATCH; /* Note: NOT RRETURN */
- /* Anything other than NOMATCH is passed back. */
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- mb->capture_last = save_capture_last;
- ecode += GET(ecode, 1);
- mb->mark = save_mark;
- if (*ecode != OP_ALT) break;
- }
-
- mb->ovector[offset] = save_offset1;
- mb->ovector[offset+1] = save_offset2;
- mb->ovector[mb->offset_end - number] = save_offset3;
-
- /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
+ /*===================================================================== */
+ /* Match any single character type except newline; have to take care with
+ CRLF newlines and partial matching. */
- RRETURN(rrc);
+ case OP_ANY:
+ if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
+ if (mb->partial != 0 &&
+ Feptr == mb->end_subject - 1 &&
+ NLBLOCK->nltype == NLTYPE_FIXED &&
+ NLBLOCK->nllen == 2 &&
+ UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
+ {
+ mb->hitend = TRUE;
+ if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
}
+ /* Fall through */
- /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
- as a non-capturing bracket. */
+ /* Match any single character whatsoever. */
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
- /* VVVVVVVVVVVVVVVVVVVVVVVVV */
+ case OP_ALLANY:
+ if (Feptr >= mb->end_subject) /* DO NOT merge the Feptr++ here; it must */
+ { /* not be updated before SCHECK_PARTIAL. */
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ Feptr++;
+#ifdef SUPPORT_UNICODE
+ if (utf) ACROSSCHAR(Feptr < mb->end_subject, *Feptr, Feptr++);
+#endif
+ Fecode++;
+ break;
- /* Non-capturing or atomic group, except for possessive with unlimited
- repeat and ONCE group with no captures. Loop for all the alternatives.
- When we get to the final alternative within the brackets, we used to return
- the result of a recursive call to match() whatever happened so it was
- possible to reduce stack usage by turning this into a tail recursion,
- except in the case of a possibly empty group. However, now that there is
- the possiblity of (*THEN) occurring in the final alternative, this
- optimization is no longer always possible.
+ /* ===================================================================== */
+ /* Match a single code unit, even in UTF mode. This opcode really does
+ match any code unit, even newline. (It really should be called ANYCODEUNIT,
+ of course - the byte name is from pre-16 bit days.) */
- We can optimize if we know there are no (*THEN)s in the pattern; at present
- this is the best that can be done.
+ case OP_ANYBYTE:
+ if (Feptr >= mb->end_subject) /* DO NOT merge the Feptr++ here; it must */
+ { /* not be updated before SCHECK_PARTIAL. */
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ Feptr++;
+ Fecode++;
+ break;
- MATCH_ONCE is returned when the end of an atomic group is successfully
- reached, but subsequent matching fails. It passes back up the tree (causing
- captured values to be reset) until the original atomic group level is
- reached. This is tested by comparing mb->once_target with the start of the
- group. At this point, the return is converted into MATCH_NOMATCH so that
- previous backup points can be taken. */
- case OP_ONCE:
- case OP_BRA:
- case OP_SBRA:
+ /* ===================================================================== */
+ /* Match a single character, casefully */
- for (;;)
+ case OP_CHAR:
+#ifdef SUPPORT_UNICODE
+ if (utf)
{
- if (op >= OP_SBRA || op == OP_ONCE)
- mb->match_function_type |= MATCH_CBEGROUP;
-
- /* If this is not a possibly empty group, and there are no (*THEN)s in
- the pattern, and this is the final alternative, optimize as described
- above. */
-
- else if (!mb->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
+ Flength = 1;
+ Fecode++;
+ GETCHARLEN(fc, Fecode, Flength);
+ if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr))
{
- ecode += PRIV(OP_lengths)[*ecode];
- goto TAIL_RECURSE;
+ CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
+ RRETURN(MATCH_NOMATCH);
}
-
- /* In all other cases, we have to make another call to match(). */
-
- save_mark = mb->mark;
- save_capture_last = mb->capture_last;
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb, eptrb,
- RM2);
-
- /* See comment in the code for capturing groups above about handling
- THEN. */
-
- if (rrc == MATCH_THEN)
+ for (; Flength > 0; Flength--)
{
- next_ecode = ecode + GET(ecode,1);
- if (mb->start_match_ptr < next_ecode &&
- (*ecode == OP_ALT || *next_ecode == OP_ALT))
- rrc = MATCH_NOMATCH;
+ if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH);
}
-
- if (rrc != MATCH_NOMATCH)
- {
- if (rrc == MATCH_ONCE)
- {
- PCRE2_SPTR scode = ecode;
- if (*scode != OP_ONCE) /* If not at start, find it */
- {
- while (*scode == OP_ALT) scode += GET(scode, 1);
- scode -= GET(scode, 1);
- }
- if (mb->once_target == scode) rrc = MATCH_NOMATCH;
- }
- RRETURN(rrc);
- }
- ecode += GET(ecode, 1);
- mb->mark = save_mark;
- if (*ecode != OP_ALT) break;
- mb->capture_last = save_capture_last;
}
-
- RRETURN(MATCH_NOMATCH);
-
- /* Handle possessive capturing brackets with an unlimited repeat. We come
- here from BRAZERO with allow_zero set TRUE. The ovector values are
- handled similarly to the normal case above. However, the matching is
- different. The end of these brackets will always be OP_KETRPOS, which
- returns MATCH_KETRPOS without going further in the pattern. By this means
- we can handle the group by iteration rather than recursion, thereby
- reducing the amount of stack needed. If the ovector is too small for
- capturing, treat as non-capturing. */
-
- case OP_CBRAPOS:
- case OP_SCBRAPOS:
- allow_zero = FALSE;
-
- POSSESSIVE_CAPTURE:
- number = GET2(ecode, 1+LINK_SIZE);
- offset = number << 1;
- if (offset >= mb->offset_max) goto POSSESSIVE_NON_CAPTURE;
-
- matched_once = FALSE;
- code_offset = (int)(ecode - mb->start_code);
-
- save_offset1 = mb->ovector[offset];
- save_offset2 = mb->ovector[offset+1];
- save_offset3 = mb->ovector[mb->offset_end - number];
- save_capture_last = mb->capture_last;
-
- /* Each time round the loop, save the current subject position for use
- when the group matches. For MATCH_MATCH, the group has matched, so we
- restart it with a new subject starting position, remembering that we had
- at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
- usual. If we haven't matched any alternatives in any iteration, check to
- see if a previous iteration matched. If so, the group has matched;
- continue from afterwards. Otherwise it has failed; restore the previous
- capture values before returning NOMATCH. */
-
- for (;;)
+ else
+#endif
+ /* Not UTF mode */
{
- mb->ovector[mb->offset_end - number] = eptr - mb->start_subject;
- if (op >= OP_SBRA) mb->match_function_type |= MATCH_CBEGROUP;
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
- eptrb, RM63);
- if (rrc == MATCH_KETRPOS)
+ if (mb->end_subject - Feptr < 1)
{
- offset_top = mb->end_offset_top;
- ecode = mb->start_code + code_offset;
- save_capture_last = mb->capture_last;
- matched_once = TRUE;
- mstart = mb->start_match_ptr; /* In case \K changed it */
- if (eptr == mb->end_match_ptr) /* Matched an empty string */
- {
- do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
- break;
- }
- eptr = mb->end_match_ptr;
- continue;
+ SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
+ RRETURN(MATCH_NOMATCH);
}
+ if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH);
+ Fecode += 2;
+ }
+ break;
- /* See comment in the code for capturing groups above about handling
- THEN. */
-
- if (rrc == MATCH_THEN)
- {
- next_ecode = ecode + GET(ecode,1);
- if (mb->start_match_ptr < next_ecode &&
- (*ecode == OP_ALT || *next_ecode == OP_ALT))
- rrc = MATCH_NOMATCH;
- }
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- mb->capture_last = save_capture_last;
- ecode += GET(ecode, 1);
- if (*ecode != OP_ALT) break;
- }
+ /* ===================================================================== */
+ /* Match a single character, caselessly. If we are at the end of the
+ subject, give up immediately. We get here only when the pattern character
+ has at most one other case. Characters with more than two cases are coded
+ as OP_PROP with the pseudo-property PT_CLIST. */
- if (!matched_once)
+ case OP_CHARI:
+ if (Feptr >= mb->end_subject)
{
- mb->ovector[offset] = save_offset1;
- mb->ovector[offset+1] = save_offset2;
- mb->ovector[mb->offset_end - number] = save_offset3;
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
}
- if (allow_zero || matched_once)
+#ifdef SUPPORT_UNICODE
+ if (utf)
{
- ecode += 1 + LINK_SIZE;
- break;
- }
- RRETURN(MATCH_NOMATCH);
-
- /* Non-capturing possessive bracket with unlimited repeat. We come here
- from BRAZERO with allow_zero = TRUE. The code is similar to the above,
- without the capturing complication. It is written out separately for speed
- and cleanliness. */
-
- case OP_BRAPOS:
- case OP_SBRAPOS:
- allow_zero = FALSE;
+ Flength = 1;
+ Fecode++;
+ GETCHARLEN(fc, Fecode, Flength);
- POSSESSIVE_NON_CAPTURE:
- matched_once = FALSE;
- code_offset = (int)(ecode - mb->start_code);
- save_capture_last = mb->capture_last;
+ /* If the pattern character's value is < 128, we know that its other case
+ (if any) is also < 128 (and therefore only one code unit long in all
+ code-unit widths), so we can use the fast lookup table. We checked above
+ that there is at least one character left in the subject. */
- for (;;)
- {
- if (op >= OP_SBRA) mb->match_function_type |= MATCH_CBEGROUP;
- RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
- eptrb, RM48);
- if (rrc == MATCH_KETRPOS)
+ if (fc < 128)
{
- offset_top = mb->end_offset_top;
- ecode = mb->start_code + code_offset;
- matched_once = TRUE;
- mstart = mb->start_match_ptr; /* In case \K reset it */
- if (eptr == mb->end_match_ptr) /* Matched an empty string */
- {
- do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
- break;
- }
- eptr = mb->end_match_ptr;
- continue;
+ uint32_t cc = UCHAR21(Feptr);
+ if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
+ Fecode++;
+ Feptr++;
}
- /* See comment in the code for capturing groups above about handling
- THEN. */
+ /* Otherwise we must pick up the subject character and use Unicode
+ property support to test its other case. Note that we cannot use the
+ value of "Flength" to check for sufficient bytes left, because the other
+ case of the character may have more or fewer code units. */
- if (rrc == MATCH_THEN)
+ else
{
- next_ecode = ecode + GET(ecode,1);
- if (mb->start_match_ptr < next_ecode &&
- (*ecode == OP_ALT || *next_ecode == OP_ALT))
- rrc = MATCH_NOMATCH;
+ uint32_t dc;
+ GETCHARINC(dc, Feptr);
+ Fecode += Flength;
+ if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
}
-
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode += GET(ecode, 1);
- if (*ecode != OP_ALT) break;
- mb->capture_last = save_capture_last;
}
+ else
+#endif /* SUPPORT_UNICODE */
- if (matched_once || allow_zero)
+ /* Not UTF mode; use the table for characters < 256. */
{
- ecode += 1 + LINK_SIZE;
- break;
+ if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
+ != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
+ Feptr++;
+ Fecode += 2;
}
- RRETURN(MATCH_NOMATCH);
-
- /* Control never reaches here. */
-
- /* Conditional group: compilation checked that there are no more than two
- branches. If the condition is false, skipping the first branch takes us
- past the end of the item if there is only one branch, but that's exactly
- what we want. */
-
- case OP_COND:
- case OP_SCOND:
-
- /* The variable codelink will be added to ecode when the condition is
- false, to get to the second branch. Setting it to the offset to the ALT
- or KET, then incrementing ecode achieves this effect. We now have ecode
- pointing to the condition or callout. */
+ break;
- codelink = GET(ecode, 1); /* Offset to the second branch */
- ecode += 1 + LINK_SIZE; /* From this opcode */
- /* Because of the way auto-callout works during compile, a callout item is
- inserted between OP_COND and an assertion condition. */
+ /* ===================================================================== */
+ /* Match not a single character. */
- if (*ecode == OP_CALLOUT || *ecode == OP_CALLOUT_STR)
+ case OP_NOT:
+ case OP_NOTI:
+ if (Feptr >= mb->end_subject)
{
- unsigned int callout_length = (*ecode == OP_CALLOUT)
- ? PRIV(OP_lengths)[OP_CALLOUT] : GET(ecode, 1 + 2*LINK_SIZE);
-
- if (mb->callout != NULL)
- {
- pcre2_callout_block cb;
- cb.version = 1;
- cb.capture_top = offset_top/2;
- cb.capture_last = mb->capture_last & CAPLMASK;
- cb.offset_vector = mb->ovector;
- cb.mark = mb->nomatch_mark;
- cb.subject = mb->start_subject;
- cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
- cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject);
- cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
- cb.pattern_position = GET(ecode, 1);
- cb.next_item_length = GET(ecode, 1 + LINK_SIZE);
-
- if (*ecode == OP_CALLOUT)
- {
- cb.callout_number = ecode[1 + 2*LINK_SIZE];
- cb.callout_string_offset = 0;
- cb.callout_string = NULL;
- cb.callout_string_length = 0;
- }
- else
- {
- cb.callout_number = 0;
- cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
- cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
- cb.callout_string_length =
- callout_length - (1 + 4*LINK_SIZE) - 2;
- }
-
- if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
- RRETURN(MATCH_NOMATCH);
- if (rrc < 0) RRETURN(rrc);
- }
-
- /* Advance ecode past the callout, so it now points to the condition. We
- must adjust codelink so that the value of ecode+codelink is unchanged. */
-
- ecode += callout_length;
- codelink -= callout_length;
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
}
-
- /* Test the various possible conditions */
-
- condition = FALSE;
- switch(condcode = *ecode)
+#ifdef SUPPORT_UNICODE
+ if (utf)
{
- case OP_RREF: /* Numbered group recursion test */
- if (mb->recursive != NULL) /* Not recursing => FALSE */
- {
- uint32_t recno = GET2(ecode, 1); /* Recursion group number*/
- condition = (recno == RREF_ANY || recno == mb->recursive->group_num);
- }
- break;
-
- case OP_DNRREF: /* Duplicate named group recursion test */
- if (mb->recursive != NULL)
- {
- int count = GET2(ecode, 1 + IMM2_SIZE);
- PCRE2_SPTR slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size;
- while (count-- > 0)
- {
- uint32_t recno = GET2(slot, 0);
- condition = recno == mb->recursive->group_num;
- if (condition) break;
- slot += mb->name_entry_size;
- }
- }
- break;
-
- case OP_CREF: /* Numbered group used test */
- offset = GET2(ecode, 1) << 1; /* Doubled ref number */
- condition = offset < offset_top &&
- mb->ovector[offset] != PCRE2_UNSET;
- break;
-
- case OP_DNCREF: /* Duplicate named group used test */
- {
- int count = GET2(ecode, 1 + IMM2_SIZE);
- PCRE2_SPTR slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size;
- while (count-- > 0)
- {
- offset = GET2(slot, 0) << 1;
- condition = offset < offset_top &&
- mb->ovector[offset] != PCRE2_UNSET;
- if (condition) break;
- slot += mb->name_entry_size;
- }
- }
- break;
-
- case OP_FALSE:
- case OP_FAIL: /* The assertion (?!) becomes OP_FAIL */
- break;
-
- case OP_TRUE:
- condition = TRUE;
- break;
-
- /* The condition is an assertion. Call match() to evaluate it - setting
- the MATCH_CONDASSERT bit in mb->match_function_type causes it to stop at
- the end of an assertion. */
-
- default:
- mb->match_function_type |= MATCH_CONDASSERT;
- RMATCH(eptr, ecode, offset_top, mb, NULL, RM3);
- if (rrc == MATCH_MATCH)
- {
- if (mb->end_offset_top > offset_top)
- offset_top = mb->end_offset_top; /* Captures may have happened */
- condition = TRUE;
-
- /* Advance ecode past the assertion to the start of the first branch,
- but adjust it so that the general choosing code below works. If the
- assertion has a quantifier that allows zero repeats we must skip over
- the BRAZERO. This is a lunatic thing to do, but somebody did! */
-
- if (*ecode == OP_BRAZERO) ecode++;
- ecode += GET(ecode, 1);
- while (*ecode == OP_ALT) ecode += GET(ecode, 1);
- ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
- }
-
- /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
- assertion; it is therefore treated as NOMATCH. Any other return is an
- error. */
-
- else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
+ uint32_t ch;
+ Fecode++;
+ GETCHARINC(ch, Fecode);
+ GETCHARINC(fc, Feptr);
+ if (ch == fc)
{
- RRETURN(rrc); /* Need braces because of following else */
+ RRETURN(MATCH_NOMATCH); /* Caseful match */
}
- break;
- }
-
- /* Choose branch according to the condition */
-
- ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
-
- /* We are now at the branch that is to be obeyed. As there is only one, we
- can use tail recursion to avoid using another stack frame, except when
- there is unlimited repeat of a possibly empty group. In the latter case, a
- recursive call to match() is always required, unless the second alternative
- doesn't exist, in which case we can just plough on. Note that, for
- compatibility with Perl, the | in a conditional group is NOT treated as
- creating two alternatives. If a THEN is encountered in the branch, it
- propagates out to the enclosing alternative (unless nested in a deeper set
- of alternatives, of course). */
-
- if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
- {
- if (op != OP_SCOND)
+ else if (Fop == OP_NOTI) /* If caseless */
{
- goto TAIL_RECURSE;
+ if (ch > 127)
+ ch = UCD_OTHERCASE(ch);
+ else
+ ch = TABLE_GET(ch, mb->fcc, ch);
+ if (ch == fc) RRETURN(MATCH_NOMATCH);
}
-
- mb->match_function_type |= MATCH_CBEGROUP;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM49);
- RRETURN(rrc);
}
-
- /* Condition false & no alternative; continue after the group. */
-
else
+#endif /* SUPPORT_UNICODE */
{
+ uint32_t ch = Fecode[1];
+ fc = *Feptr++;
+ if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
+ RRETURN(MATCH_NOMATCH);
+ Fecode += 2;
}
break;
- /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
- to close any currently open capturing brackets. */
-
- case OP_CLOSE:
- number = GET2(ecode, 1); /* Must be less than 65536 */
- offset = number << 1;
- mb->capture_last = (mb->capture_last & OVFLMASK) | number;
- if (offset >= mb->offset_max) mb->capture_last |= OVFLBIT; else
- {
- mb->ovector[offset] =
- mb->ovector[mb->offset_end - number];
- mb->ovector[offset+1] = eptr - mb->start_subject;
-
- /* If this group is at or above the current highwater mark, ensure that
- any groups between the current high water mark and this group are marked
- unset and then update the high water mark. */
-
- if (offset >= offset_top)
- {
- register PCRE2_SIZE *iptr = mb->ovector + offset_top;
- register PCRE2_SIZE *iend = mb->ovector + offset;
- while (iptr < iend) *iptr++ = PCRE2_UNSET;
- offset_top = offset + 2;
- }
- }
- ecode += 1 + IMM2_SIZE;
- break;
-
+ /* ===================================================================== */
+ /* Match a single character repeatedly. */
- /* End of the pattern, either real or forced. In an assertion ACCEPT,
- update the last used pointer. */
+#define Loclength F->temp_size
+#define Lstart_eptr F->temp_sptr[0]
+#define Lcharptr F->temp_sptr[1]
+#define Lmin F->temp_32[0]
+#define Lmax F->temp_32[1]
+#define Lc F->temp_32[2]
+#define Loc F->temp_32[3]
- case OP_ASSERT_ACCEPT:
- if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
+ case OP_EXACT:
+ case OP_EXACTI:
+ Lmin = Lmax = GET2(Fecode, 1);
+ Fecode += 1 + IMM2_SIZE;
+ goto REPEATCHAR;
- case OP_ACCEPT:
- case OP_END:
+ case OP_POSUPTO:
+ case OP_POSUPTOI:
+ reptype = REPTYPE_POS;
+ Lmin = 0;
+ Lmax = GET2(Fecode, 1);
+ Fecode += 1 + IMM2_SIZE;
+ goto REPEATCHAR;
- /* If we have matched an empty string, fail if not in an assertion and not
- in a recursion if either PCRE2_NOTEMPTY is set, or if PCRE2_NOTEMPTY_ATSTART
- is set and we have matched at the start of the subject. In both cases,
- backtracking will then try other alternatives, if any. */
+ case OP_UPTO:
+ case OP_UPTOI:
+ reptype = REPTYPE_MAX;
+ Lmin = 0;
+ Lmax = GET2(Fecode, 1);
+ Fecode += 1 + IMM2_SIZE;
+ goto REPEATCHAR;
- if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
- mb->recursive == NULL &&
- ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
- ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
- mstart == mb->start_subject + mb->start_offset)))
- RRETURN(MATCH_NOMATCH);
+ case OP_MINUPTO:
+ case OP_MINUPTOI:
+ reptype = REPTYPE_MIN;
+ Lmin = 0;
+ Lmax = GET2(Fecode, 1);
+ Fecode += 1 + IMM2_SIZE;
+ goto REPEATCHAR;
- /* Otherwise, we have a match. */
+ case OP_POSSTAR:
+ case OP_POSSTARI:
+ reptype = REPTYPE_POS;
+ Lmin = 0;
+ Lmax = UINT32_MAX;
+ Fecode++;
+ goto REPEATCHAR;
- mb->end_match_ptr = eptr; /* Record where we ended */
- mb->end_offset_top = offset_top; /* and how many extracts were taken */
- mb->start_match_ptr = mstart; /* and the start (\K can modify) */
+ case OP_POSPLUS:
+ case OP_POSPLUSI:
+ reptype = REPTYPE_POS;
+ Lmin = 1;
+ Lmax = UINT32_MAX;
+ Fecode++;
+ goto REPEATCHAR;
- /* For some reason, the macros don't work properly if an expression is
- given as the argument to RRETURN when the heap is in use. */
+ case OP_POSQUERY:
+ case OP_POSQUERYI:
+ reptype = REPTYPE_POS;
+ Lmin = 0;
+ Lmax = 1;
+ Fecode++;
+ goto REPEATCHAR;
- rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
- RRETURN(rrc);
+ case OP_STAR:
+ case OP_STARI:
+ case OP_MINSTAR:
+ case OP_MINSTARI:
+ case OP_PLUS:
+ case OP_PLUSI:
+ case OP_MINPLUS:
+ case OP_MINPLUSI:
+ case OP_QUERY:
+ case OP_QUERYI:
+ case OP_MINQUERY:
+ case OP_MINQUERYI:
+ fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI);
+ Lmin = rep_min[fc];
+ Lmax = rep_max[fc];
+ reptype = rep_typ[fc];
- /* Assertion brackets. Check the alternative branches in turn - the
- matching won't pass the KET for an assertion. If any one branch matches,
- the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
- start of each branch to move the current point backwards, so the code at
- this level is identical to the lookahead case. When the assertion is part
- of a condition, we want to return immediately afterwards. The caller of
- this incarnation of the match() function will have set MATCH_CONDASSERT in
- mb->match_function type, and one of these opcodes will be the first opcode
- that is processed. We use a local variable that is preserved over calls to
- match() to remember this case. */
+ /* Common code for all repeated single-character matches. We first check
+ for the minimum number of characters. If the minimum equals the maximum, we
+ are done. Otherwise, if minimizing, check the rest of the pattern for a
+ match; if there isn't one, advance up to the maximum, one character at a
+ time.
- case OP_ASSERT:
- case OP_ASSERTBACK:
- save_mark = mb->mark;
- if ((mb->match_function_type & MATCH_CONDASSERT) != 0)
- {
- condassert = TRUE;
- mb->match_function_type &= ~MATCH_CONDASSERT;
- }
- else condassert = FALSE;
+ If maximizing, advance up to the maximum number of matching characters,
+ until Feptr is past the end of the maximum run. If possessive, we are
+ then done (no backing up). Otherwise, match at this position; anything
+ other than no match is immediately returned. For nomatch, back up one
+ character, unless we are matching \R and the last thing matched was
+ \r\n, in which case, back up two code units until we reach the first
+ optional character position.
- /* Loop for each branch */
+ The various UTF/non-UTF and caseful/caseless cases are handled separately,
+ for speed. */
- do
+ REPEATCHAR:
+#ifdef SUPPORT_UNICODE
+ if (utf)
{
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, NULL, RM4);
+ Flength = 1;
+ Lcharptr = Fecode;
+ GETCHARLEN(fc, Fecode, Flength);
+ Fecode += Flength;
- /* A match means that the assertion is true; break out of the loop
- that matches its alternatives. */
+ /* Handle multi-code-unit character matching, caseful and caseless. */
- if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
+ if (Flength > 1)
{
- mstart = mb->start_match_ptr; /* In case \K reset it */
- break;
- }
-
- /* If not matched, restore the previous mark setting. */
-
- mb->mark = save_mark;
-
- /* See comment in the code for capturing groups above about handling
- THEN. */
+ uint32_t othercase;
- if (rrc == MATCH_THEN)
- {
- next_ecode = ecode + GET(ecode,1);
- if (mb->start_match_ptr < next_ecode &&
- (*ecode == OP_ALT || *next_ecode == OP_ALT))
- rrc = MATCH_NOMATCH;
- }
+ if (Fop >= OP_STARI && /* Caseless */
+ (othercase = UCD_OTHERCASE(fc)) != fc)
+ Loclength = PRIV(ord2utf)(othercase, Foccu);
+ else Loclength = 0;
- /* Anything other than NOMATCH causes the entire assertion to fail,
- passing back the return code. This includes COMMIT, SKIP, PRUNE and an
- uncaptured THEN, which means they take their normal effect. This
- consistent approach does not always have exactly the same effect as in
- Perl. */
+ for (i = 1; i <= Lmin; i++)
+ {
+ if (Feptr <= mb->end_subject - Flength &&
+ memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
+ else if (Loclength > 0 &&
+ Feptr <= mb->end_subject - Loclength &&
+ memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
+ Feptr += Loclength;
+ else
+ {
+ CHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ }
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode += GET(ecode, 1);
- }
- while (*ecode == OP_ALT); /* Continue for next alternative */
+ if (Lmin == Lmax) continue;
- /* If we have tried all the alternative branches, the assertion has
- failed. If not, we broke out after a match. */
+ if (reptype == REPTYPE_MIN)
+ {
+ for (;;)
+ {
+ RMATCH(Fecode, RM202);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr <= mb->end_subject - Flength &&
+ memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
+ else if (Loclength > 0 &&
+ Feptr <= mb->end_subject - Loclength &&
+ memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
+ Feptr += Loclength;
+ else
+ {
+ CHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ }
+ /* Control never gets here */
+ }
- if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
+ else /* Maximize */
+ {
+ Lstart_eptr = Feptr;
+ for (i = Lmin; i < Lmax; i++)
+ {
+ if (Feptr <= mb->end_subject - Flength &&
+ memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0)
+ Feptr += Flength;
+ else if (Loclength > 0 &&
+ Feptr <= mb->end_subject - Loclength &&
+ memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
+ Feptr += Loclength;
+ else
+ {
+ CHECK_PARTIAL();
+ break;
+ }
+ }
- /* If checking an assertion for a condition, return MATCH_MATCH. */
+ /* After \C in UTF mode, Lstart_eptr might be in the middle of a
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
+ go too far. */
- if (condassert) RRETURN(MATCH_MATCH);
+ if (reptype != REPTYPE_POS) for(;;)
+ {
+ if (Feptr <= Lstart_eptr) break;
+ RMATCH(Fecode, RM203);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ Feptr--;
+ BACKCHAR(Feptr);
+ }
+ }
+ break; /* End of repeated wide character handling */
+ }
- /* Continue from after a successful assertion, updating the offsets high
- water mark, since extracts may have been taken during the assertion. */
+ /* Length of UTF character is 1. Put it into the preserved variable and
+ fall through to the non-UTF code. */
- do ecode += GET(ecode,1); while (*ecode == OP_ALT);
- ecode += 1 + LINK_SIZE;
- offset_top = mb->end_offset_top;
- continue;
+ Lc = fc;
+ }
+ else
+#endif /* SUPPORT_UNICODE */
- /* Negative assertion: all branches must fail to match for the assertion to
- succeed. */
+ /* When not in UTF mode, load a single-code-unit character. Then proceed as
+ above. */
- case OP_ASSERT_NOT:
- case OP_ASSERTBACK_NOT:
- save_mark = mb->mark;
- if ((mb->match_function_type & MATCH_CONDASSERT) != 0)
- {
- condassert = TRUE;
- mb->match_function_type &= ~MATCH_CONDASSERT;
- }
- else condassert = FALSE;
+ Lc = *Fecode++;
- /* Loop for each alternative branch. */
+ /* Caseless comparison */
- do
+ if (Fop >= OP_STARI)
{
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, NULL, RM5);
- mb->mark = save_mark; /* Always restore the mark setting */
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ /* Lc must be < 128 in UTF-8 mode. */
+ Loc = mb->fcc[Lc];
+#else /* 16-bit & 32-bit */
+#ifdef SUPPORT_UNICODE
+ if (utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
+ else
+#endif /* SUPPORT_UNICODE */
+ Loc = TABLE_GET(Lc, mb->fcc, Lc);
+#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
- switch(rrc)
+ for (i = 1; i <= Lmin; i++)
{
- case MATCH_MATCH: /* A successful match means */
- case MATCH_ACCEPT: /* the assertion has failed. */
- RRETURN(MATCH_NOMATCH);
-
- case MATCH_NOMATCH: /* Carry on with next branch */
- break;
-
- /* See comment in the code for capturing groups above about handling
- THEN. */
-
- case MATCH_THEN:
- next_ecode = ecode + GET(ecode,1);
- if (mb->start_match_ptr < next_ecode &&
- (*ecode == OP_ALT || *next_ecode == OP_ALT))
+ uint32_t cc; /* Faster than PCRE2_UCHAR */
+ if (Feptr >= mb->end_subject)
{
- rrc = MATCH_NOMATCH;
- break;
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
}
- /* Otherwise fall through. */
-
- /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
- assertion to fail to match, without considering any more alternatives.
- Failing to match means the assertion is true. This is a consistent
- approach, but does not always have the same effect as in Perl. */
-
- case MATCH_COMMIT:
- case MATCH_SKIP:
- case MATCH_SKIP_ARG:
- case MATCH_PRUNE:
- do ecode += GET(ecode,1); while (*ecode == OP_ALT);
- goto NEG_ASSERT_TRUE; /* Break out of alternation loop */
-
- /* Anything else is an error */
-
- default:
- RRETURN(rrc);
+ cc = UCHAR21TEST(Feptr);
+ if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
+ Feptr++;
}
+ if (Lmin == Lmax) continue;
- /* Continue with next branch */
-
- ecode += GET(ecode,1);
- }
- while (*ecode == OP_ALT);
-
- /* All branches in the assertion failed to match. */
-
- NEG_ASSERT_TRUE:
- if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */
- ecode += 1 + LINK_SIZE; /* Continue with current branch */
- continue;
-
- /* Move the subject pointer back. This occurs only at the start of
- each branch of a lookbehind assertion. If we are too close to the start to
- move back, this match function fails. When working with UTF-8 we move
- back a number of characters, not bytes. */
-
- case OP_REVERSE:
- i = GET(ecode, 1);
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- while (i-- > 0)
+ if (reptype == REPTYPE_MIN)
{
- if (eptr <= mb->start_subject) RRETURN(MATCH_NOMATCH);
- eptr--;
- BACKCHAR(eptr);
+ for (;;)
+ {
+ uint32_t cc; /* Faster than PCRE2_UCHAR */
+ RMATCH(Fecode, RM25);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ cc = UCHAR21TEST(Feptr);
+ if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
+ Feptr++;
+ }
+ /* Control never gets here */
}
- }
- else
-#endif
- /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
-
- {
- if (i > eptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
- eptr -= i;
- }
-
- /* Save the earliest consulted character, then skip to next op code */
-
- if (eptr < mb->start_used_ptr) mb->start_used_ptr = eptr;
- ecode += 1 + LINK_SIZE;
- break;
-
- /* The callout item calls an external function, if one is provided, passing
- details of the match so far. This is mainly for debugging, though the
- function is able to force a failure. */
-
- case OP_CALLOUT:
- case OP_CALLOUT_STR:
- {
- unsigned int callout_length = (*ecode == OP_CALLOUT)
- ? PRIV(OP_lengths)[OP_CALLOUT] : GET(ecode, 1 + 2*LINK_SIZE);
-
- if (mb->callout != NULL)
+ else /* Maximize */
{
- pcre2_callout_block cb;
- cb.version = 1;
- cb.callout_number = ecode[LINK_SIZE + 1];
- cb.capture_top = offset_top/2;
- cb.capture_last = mb->capture_last & CAPLMASK;
- cb.offset_vector = mb->ovector;
- cb.mark = mb->nomatch_mark;
- cb.subject = mb->start_subject;
- cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
- cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject);
- cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
- cb.pattern_position = GET(ecode, 1);
- cb.next_item_length = GET(ecode, 1 + LINK_SIZE);
-
- if (*ecode == OP_CALLOUT)
+ Lstart_eptr = Feptr;
+ for (i = Lmin; i < Lmax; i++)
{
- cb.callout_number = ecode[1 + 2*LINK_SIZE];
- cb.callout_string_offset = 0;
- cb.callout_string = NULL;
- cb.callout_string_length = 0;
+ uint32_t cc; /* Faster than PCRE2_UCHAR */
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ cc = UCHAR21TEST(Feptr);
+ if (Lc != cc && Loc != cc) break;
+ Feptr++;
}
- else
+ if (reptype != REPTYPE_POS) for (;;)
{
- cb.callout_number = 0;
- cb.callout_string_offset = GET(ecode, 1 + 3*LINK_SIZE);
- cb.callout_string = ecode + (1 + 4*LINK_SIZE) + 1;
- cb.callout_string_length =
- callout_length - (1 + 4*LINK_SIZE) - 2;
+ if (Feptr == Lstart_eptr) break;
+ RMATCH(Fecode, RM26);
+ Feptr--;
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
}
-
- if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
- RRETURN(MATCH_NOMATCH);
- if (rrc < 0) RRETURN(rrc);
}
- ecode += callout_length;
}
- break;
-
- /* Recursion either matches the current regex, or some subexpression. The
- offset data is the offset to the starting bracket from the start of the
- whole pattern. (This is so that it works from duplicated subpatterns.)
-
- The state of the capturing groups is preserved over recursion, and
- re-instated afterwards. We don't know how many are started and not yet
- finished (offset_top records the completed total) so we just have to save
- all the potential data. There may be up to 65535 such values, which is too
- large to put on the stack, but using malloc for small numbers seems
- expensive. As a compromise, the stack is used when there are no more than
- OP_RECURSE_STACK_SAVE_MAX values to store; otherwise malloc is used.
- There are also other values that have to be saved. We use a chained
- sequence of blocks that actually live on the stack. Thanks to Robin Houston
- for the original version of this logic. It has, however, been hacked around
- a lot, so he is not to blame for the current way it works. */
+ /* Caseful comparisons (includes all multi-byte characters) */
- case OP_RECURSE:
+ else
{
- ovecsave_frame *fr;
- recursion_info *ri;
- uint32_t recno;
-
- callpat = mb->start_code + GET(ecode, 1);
- recno = (callpat == mb->start_code)? 0 : GET2(callpat, 1 + LINK_SIZE);
-
- /* Check for repeating a pattern recursion without advancing the subject
- pointer. This should catch convoluted mutual recursions. (Some simple
- cases are caught at compile time.) */
-
- for (ri = mb->recursive; ri != NULL; ri = ri->prevrec)
- if (recno == ri->group_num && eptr == ri->subject_position)
- RRETURN(PCRE2_ERROR_RECURSELOOP);
-
- /* Add to "recursing stack" */
-
- new_recursive.group_num = recno;
- new_recursive.saved_capture_last = mb->capture_last;
- new_recursive.subject_position = eptr;
- new_recursive.prevrec = mb->recursive;
- mb->recursive = &new_recursive;
-
- /* Where to continue from afterwards */
-
- ecode += 1 + LINK_SIZE;
-
- /* When we are using the system stack for match() recursion we can call a
- function that uses the system stack for preserving the ovector while
- processing the pattern recursion, but only if the ovector is small
- enough. */
-
-#ifndef HEAP_MATCH_RECURSE
- if (mb->offset_end <= OP_RECURSE_STACK_SAVE_MAX)
+ for (i = 1; i <= Lmin; i++)
{
- rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
- eptrb, rdepth);
- mb->recursive = new_recursive.prevrec;
- if (rrc != MATCH_MATCH && rrc != MATCH_ACCEPT) RRETURN(rrc);
-
- /* Set where we got to in the subject, and reset the start, in case
- it was changed by \K. This *is* propagated back out of a recursion,
- for Perl compatibility. */
-
- eptr = mb->end_match_ptr;
- mstart = mb->start_match_ptr;
- break; /* End of processing OP_RECURSE */
- }
-#endif
- /* If the ovector is too big, or if we are using the heap for match()
- recursion, we have to use the heap for saving the ovector. Used ovecsave
- frames are kept on a chain and re-used. This makes a small improvement in
- execution time on Linux. */
-
- if (mb->ovecsave_chain != NULL)
- {
- new_recursive.ovec_save = mb->ovecsave_chain->saved_ovec;
- mb->ovecsave_chain = mb->ovecsave_chain->next;
- }
- else
- {
- fr = (ovecsave_frame *)(mb->memctl.malloc(sizeof(ovecsave_frame *) +
- mb->offset_end * sizeof(PCRE2_SIZE), mb->memctl.memory_data));
- if (fr == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
- new_recursive.ovec_save = fr->saved_ovec;
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
}
- memcpy(new_recursive.ovec_save, mb->ovector,
- mb->offset_end * sizeof(PCRE2_SIZE));
-
- /* Do the recursion. After processing each alternative, restore the
- ovector data and the last captured value. This code has the same overall
- logic as the code in the op_recurse_ovecsave() function, but is adapted
- to use RMATCH/RRETURN and to release the heap block containing the saved
- ovector. */
+ if (Lmin == Lmax) continue;
- cbegroup = (*callpat >= OP_SBRA);
- do
+ if (reptype == REPTYPE_MIN)
{
- if (cbegroup) mb->match_function_type |= MATCH_CBEGROUP;
- RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
- mb, eptrb, RM6);
- memcpy(mb->ovector, new_recursive.ovec_save,
- mb->offset_end * sizeof(PCRE2_SIZE));
- mb->capture_last = new_recursive.saved_capture_last;
- mb->recursive = new_recursive.prevrec;
-
- if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
+ for (;;)
{
- fr = (ovecsave_frame *)
- ((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
- fr->next = mb->ovecsave_chain;
- mb->ovecsave_chain = fr;
-
- /* Set where we got to in the subject, and reset the start, in case
- it was changed by \K. This *is* propagated back out of a recursion,
- for Perl compatibility. */
-
- eptr = mb->end_match_ptr;
- mstart = mb->start_match_ptr;
- goto RECURSION_MATCHED; /* Exit loop; end processing */
+ RMATCH(Fecode, RM27);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
}
+ /* Control never gets here */
+ }
+ else /* Maximize */
+ {
+ Lstart_eptr = Feptr;
+ for (i = Lmin; i < Lmax; i++)
+ {
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
- /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
- recursion; they cause a NOMATCH for the entire recursion. These codes
- are defined in a range that can be tested for. */
+ if (Lc != UCHAR21TEST(Feptr)) break;
+ Feptr++;
+ }
- if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
+ if (reptype != REPTYPE_POS) for (;;)
{
- rrc = MATCH_NOMATCH;
- goto RECURSION_RETURN;
+ if (Feptr <= Lstart_eptr) break;
+ RMATCH(Fecode, RM28);
+ Feptr--;
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
}
-
- /* Any return code other than NOMATCH is an error. */
-
- if (rrc != MATCH_NOMATCH) goto RECURSION_RETURN;
- mb->recursive = &new_recursive;
- callpat += GET(callpat, 1);
}
- while (*callpat == OP_ALT);
-
- RECURSION_RETURN:
- mb->recursive = new_recursive.prevrec;
- fr = (ovecsave_frame *)
- ((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
- fr->next = mb->ovecsave_chain;
- mb->ovecsave_chain = fr;
- RRETURN(rrc);
}
-
- RECURSION_MATCHED:
break;
- /* An alternation is the end of a branch; scan along to find the end of the
- bracketed group and go to there. */
+#undef Loclength
+#undef Lstart_eptr
+#undef Lcharptr
+#undef Lmin
+#undef Lmax
+#undef Lc
+#undef Loc
- case OP_ALT:
- do ecode += GET(ecode,1); while (*ecode == OP_ALT);
- break;
- /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
- indicating that it may occur zero times. It may repeat infinitely, or not
- at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
- with fixed upper repeat limits are compiled as a number of copies, with the
- optional ones preceded by BRAZERO or BRAMINZERO. */
+ /* ===================================================================== */
+ /* Match a negated single one-byte character repeatedly. This is almost a
+ repeat of the code for a repeated single character, but I haven't found a
+ nice way of commoning these up that doesn't require a test of the
+ positive/negative option for each character match. Maybe that wouldn't add
+ very much to the time taken, but character matching *is* what this is all
+ about... */
- case OP_BRAZERO:
- next_ecode = ecode + 1;
- RMATCH(eptr, next_ecode, offset_top, mb, eptrb, RM10);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- do next_ecode += GET(next_ecode, 1); while (*next_ecode == OP_ALT);
- ecode = next_ecode + 1 + LINK_SIZE;
- break;
+#define Lstart_eptr F->temp_sptr[0]
+#define Lmin F->temp_32[0]
+#define Lmax F->temp_32[1]
+#define Lc F->temp_32[2]
+#define Loc F->temp_32[3]
- case OP_BRAMINZERO:
- next_ecode = ecode + 1;
- do next_ecode += GET(next_ecode, 1); while (*next_ecode == OP_ALT);
- RMATCH(eptr, next_ecode + 1+LINK_SIZE, offset_top, mb, eptrb, RM11);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode++;
- break;
+ case OP_NOTEXACT:
+ case OP_NOTEXACTI:
+ Lmin = Lmax = GET2(Fecode, 1);
+ Fecode += 1 + IMM2_SIZE;
+ goto REPEATNOTCHAR;
- case OP_SKIPZERO:
- next_ecode = ecode+1;
- do next_ecode += GET(next_ecode,1); while (*next_ecode == OP_ALT);
- ecode = next_ecode + 1 + LINK_SIZE;
- break;
+ case OP_NOTUPTO:
+ case OP_NOTUPTOI:
+ Lmin = 0;
+ Lmax = GET2(Fecode, 1);
+ reptype = REPTYPE_MAX;
+ Fecode += 1 + IMM2_SIZE;
+ goto REPEATNOTCHAR;
- /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
- here; just jump to the group, with allow_zero set TRUE. */
+ case OP_NOTMINUPTO:
+ case OP_NOTMINUPTOI:
+ Lmin = 0;
+ Lmax = GET2(Fecode, 1);
+ reptype = REPTYPE_MIN;
+ Fecode += 1 + IMM2_SIZE;
+ goto REPEATNOTCHAR;
- case OP_BRAPOSZERO:
- op = *(++ecode);
- allow_zero = TRUE;
- if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
- goto POSSESSIVE_NON_CAPTURE;
+ case OP_NOTPOSSTAR:
+ case OP_NOTPOSSTARI:
+ reptype = REPTYPE_POS;
+ Lmin = 0;
+ Lmax = UINT32_MAX;
+ Fecode++;
+ goto REPEATNOTCHAR;
- /* End of a group, repeated or non-repeating. */
+ case OP_NOTPOSPLUS:
+ case OP_NOTPOSPLUSI:
+ reptype = REPTYPE_POS;
+ Lmin = 1;
+ Lmax = UINT32_MAX;
+ Fecode++;
+ goto REPEATNOTCHAR;
- case OP_KET:
- case OP_KETRMIN:
- case OP_KETRMAX:
- case OP_KETRPOS:
- prev = ecode - GET(ecode, 1);
+ case OP_NOTPOSQUERY:
+ case OP_NOTPOSQUERYI:
+ reptype = REPTYPE_POS;
+ Lmin = 0;
+ Lmax = 1;
+ Fecode++;
+ goto REPEATNOTCHAR;
- /* If this was a group that remembered the subject start, in order to break
- infinite repeats of empty string matches, retrieve the subject start from
- the chain. Otherwise, set it NULL. */
+ case OP_NOTPOSUPTO:
+ case OP_NOTPOSUPTOI:
+ reptype = REPTYPE_POS;
+ Lmin = 0;
+ Lmax = GET2(Fecode, 1);
+ Fecode += 1 + IMM2_SIZE;
+ goto REPEATNOTCHAR;
- if (*prev >= OP_SBRA || *prev == OP_ONCE)
- {
- saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
- eptrb = eptrb->epb_prev; /* Backup to previous group */
- }
- else saved_eptr = NULL;
+ case OP_NOTSTAR:
+ case OP_NOTSTARI:
+ case OP_NOTMINSTAR:
+ case OP_NOTMINSTARI:
+ case OP_NOTPLUS:
+ case OP_NOTPLUSI:
+ case OP_NOTMINPLUS:
+ case OP_NOTMINPLUSI:
+ case OP_NOTQUERY:
+ case OP_NOTQUERYI:
+ case OP_NOTMINQUERY:
+ case OP_NOTMINQUERYI:
+ fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
+ Lmin = rep_min[fc];
+ Lmax = rep_max[fc];
+ reptype = rep_typ[fc];
- /* If we are at the end of an assertion group or a non-capturing atomic
- group, stop matching and return MATCH_MATCH, but record the current high
- water mark for use by positive assertions. We also need to record the match
- start in case it was changed by \K. */
+ /* Common code for all repeated single-character non-matches. */
- if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
- *prev == OP_ONCE_NC)
- {
- mb->end_match_ptr = eptr; /* For ONCE_NC */
- mb->end_offset_top = offset_top;
- mb->start_match_ptr = mstart;
- if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
- RRETURN(MATCH_MATCH); /* Sets mb->mark */
- }
+ REPEATNOTCHAR:
+ GETCHARINCTEST(Lc, Fecode);
- /* For capturing groups we have to check the group number back at the start
- and if necessary complete handling an extraction by setting the offsets and
- bumping the high water mark. Whole-pattern recursion is coded as a recurse
- into group 0, so it won't be picked up here. Instead, we catch it when the
- OP_END is reached. Other recursion is handled here. We just have to record
- the current subject position and start match pointer and give a MATCH
- return. */
+ /* The code is duplicated for the caseless and caseful cases, for speed,
+ since matching characters is likely to be quite common. First, ensure the
+ minimum number of matches are present. If Lmin = Lmax, we are done.
+ Otherwise, if minimizing, keep trying the rest of the expression and
+ advancing one matching character if failing, up to the maximum.
+ Alternatively, if maximizing, find the maximum number of characters and
+ work backwards. */
- if (*prev == OP_CBRA || *prev == OP_SCBRA ||
- *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
+ if (Fop >= OP_NOTSTARI) /* Caseless */
{
- number = GET2(prev, 1+LINK_SIZE);
- offset = number << 1;
-
- /* Handle a recursively called group. */
-
- if (mb->recursive != NULL && mb->recursive->group_num == number)
- {
- mb->end_match_ptr = eptr;
- mb->start_match_ptr = mstart;
- if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
- RRETURN(MATCH_MATCH);
- }
+#ifdef SUPPORT_UNICODE
+ if (utf && Lc > 127)
+ Loc = UCD_OTHERCASE(Lc);
+ else
+#endif /* SUPPORT_UNICODE */
- /* Deal with capturing */
+ Loc = TABLE_GET(Lc, mb->fcc, Lc); /* Other case from table */
- mb->capture_last = (mb->capture_last & OVFLMASK) | number;
- if (offset >= mb->offset_max) mb->capture_last |= OVFLBIT; else
+#ifdef SUPPORT_UNICODE
+ if (utf)
{
- /* If offset is greater than offset_top, it means that we are
- "skipping" a capturing group, and that group's offsets must be marked
- unset. In earlier versions of PCRE, all the offsets were unset at the
- start of matching, but this doesn't work because atomic groups and
- assertions can cause a value to be set that should later be unset.
- Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
- part of the atomic group, but this is not on the final matching path,
- so must be unset when 2 is set. (If there is no group 2, there is no
- problem, because offset_top will then be 2, indicating no capture.) */
-
- if (offset > offset_top)
+ uint32_t d;
+ for (i = 1; i <= Lmin; i++)
{
- register PCRE2_SIZE *iptr = mb->ovector + offset_top;
- register PCRE2_SIZE *iend = mb->ovector + offset;
- while (iptr < iend) *iptr++ = PCRE2_UNSET;
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINC(d, Feptr);
+ if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
}
-
- /* Now make the extraction */
-
- mb->ovector[offset] = mb->ovector[mb->offset_end - number];
- mb->ovector[offset+1] = eptr - mb->start_subject;
- if (offset_top <= offset) offset_top = offset + 2;
- }
- }
-
- /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
- and return the MATCH_KETRPOS. This makes it possible to do the repeats one
- at a time from the outer level, thus saving stack. This must precede the
- empty string test - in this case that test is done at the outer level. */
-
- if (*ecode == OP_KETRPOS)
- {
- mb->start_match_ptr = mstart; /* In case \K reset it */
- mb->end_match_ptr = eptr;
- mb->end_offset_top = offset_top;
- if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
- RRETURN(MATCH_KETRPOS);
- }
-
- /* For an ordinary non-repeating ket, just continue at this level. This
- also happens for a repeating ket if no characters were matched in the
- group. This is the forcible breaking of infinite loops as implemented in
- Perl 5.005. For a non-repeating atomic group that includes captures,
- establish a backup point by processing the rest of the pattern at a lower
- level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
- original OP_ONCE level, thereby bypassing intermediate backup points, but
- resetting any captures that happened along the way. */
-
- if (*ecode == OP_KET || eptr == saved_eptr)
- {
- if (*prev == OP_ONCE)
- {
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, eptrb, RM12);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- mb->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
- RRETURN(MATCH_ONCE);
- }
- ecode += 1 + LINK_SIZE; /* Carry on at this level */
- break;
- }
-
- /* The normal repeating kets try the rest of the pattern or restart from
- the preceding bracket, in the appropriate order. In the second case, we can
- use tail recursion to avoid using another stack frame, unless we have an
- an atomic group or an unlimited repeat of a group that can match an empty
- string. */
-
- if (*ecode == OP_KETRMIN)
- {
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, eptrb, RM7);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (*prev == OP_ONCE)
- {
- RMATCH(eptr, prev, offset_top, mb, eptrb, RM8);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- mb->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
- RRETURN(MATCH_ONCE);
- }
- if (*prev >= OP_SBRA) /* Could match an empty string */
- {
- RMATCH(eptr, prev, offset_top, mb, eptrb, RM50);
- RRETURN(rrc);
- }
- ecode = prev;
- goto TAIL_RECURSE;
- }
- else /* OP_KETRMAX */
- {
- RMATCH(eptr, prev, offset_top, mb, eptrb, RM13);
- if (rrc == MATCH_ONCE && mb->once_target == prev) rrc = MATCH_NOMATCH;
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (*prev == OP_ONCE)
- {
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, mb, eptrb, RM9);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- mb->once_target = prev;
- RRETURN(MATCH_ONCE);
}
- ecode += 1 + LINK_SIZE;
- goto TAIL_RECURSE;
- }
- /* Control never gets here */
-
- /* Not multiline mode: start of subject assertion, unless notbol. */
-
- case OP_CIRC:
- if ((mb->moptions & PCRE2_NOTBOL) != 0 && eptr == mb->start_subject)
- RRETURN(MATCH_NOMATCH);
-
- /* Start of subject assertion */
-
- case OP_SOD:
- if (eptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
-
- /* Multiline mode: start of subject unless notbol, or after any newline
- except for one at the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
-
- case OP_CIRCM:
- if ((mb->moptions & PCRE2_NOTBOL) != 0 && eptr == mb->start_subject)
- RRETURN(MATCH_NOMATCH);
- if (eptr != mb->start_subject &&
- ((eptr == mb->end_subject &&
- (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
- !WAS_NEWLINE(eptr)))
- RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
-
- /* Start of match assertion */
-
- case OP_SOM:
- if (eptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
-
- /* Reset the start of match point */
-
- case OP_SET_SOM:
- mstart = eptr;
- ecode++;
- break;
-
- /* Multiline mode: assert before any newline, or before end of subject
- unless noteol is set. */
+ else
+#endif /* SUPPORT_UNICODE */
- case OP_DOLLM:
- if (eptr < mb->end_subject)
- {
- if (!IS_NEWLINE(eptr))
+ /* Not UTF mode */
{
- if (mb->partial != 0 &&
- eptr + 1 >= mb->end_subject &&
- NLBLOCK->nltype == NLTYPE_FIXED &&
- NLBLOCK->nllen == 2 &&
- UCHAR21TEST(eptr) == NLBLOCK->nl[0])
+ for (i = 1; i <= Lmin; i++)
{
- mb->hitend = TRUE;
- if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
+ Feptr++;
}
- RRETURN(MATCH_NOMATCH);
}
- }
- else
- {
- if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
- SCHECK_PARTIAL();
- }
- ecode++;
- break;
-
- /* Not multiline mode: assert before a terminating newline or before end of
- subject unless noteol is set. */
-
- case OP_DOLL:
- if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
- if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
-
- /* ... else fall through for endonly */
-
- /* End of subject assertion (\z) */
- case OP_EOD:
- if (eptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
- SCHECK_PARTIAL();
- ecode++;
- break;
+ if (Lmin == Lmax) continue; /* Finished for exact count */
- /* End of subject or ending \n assertion (\Z) */
-
- case OP_EODN:
- ASSERT_NL_OR_EOS:
- if (eptr < mb->end_subject &&
- (!IS_NEWLINE(eptr) || eptr != mb->end_subject - mb->nllen))
- {
- if (mb->partial != 0 &&
- eptr + 1 >= mb->end_subject &&
- NLBLOCK->nltype == NLTYPE_FIXED &&
- NLBLOCK->nllen == 2 &&
- UCHAR21TEST(eptr) == NLBLOCK->nl[0])
+ if (reptype == REPTYPE_MIN)
{
- mb->hitend = TRUE;
- if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
- }
- RRETURN(MATCH_NOMATCH);
- }
-
- /* Either at end of string or \n before end. */
-
- SCHECK_PARTIAL();
- ecode++;
- break;
-
- /* Word boundary assertions */
-
- case OP_NOT_WORD_BOUNDARY:
- case OP_WORD_BOUNDARY:
- {
-
- /* Find out if the previous and current characters are "word" characters.
- It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
- be "non-word" characters. Remember the earliest consulted character for
- partial matching. */
-
#ifdef SUPPORT_UNICODE
- if (utf)
- {
- /* Get status of previous character */
-
- if (eptr == mb->start_subject) prev_is_word = FALSE; else
+ if (utf)
{
- PCRE2_SPTR lastptr = eptr - 1;
- BACKCHAR(lastptr);
- if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
- GETCHAR(c, lastptr);
- if ((mb->poptions & PCRE2_UCP) != 0)
+ uint32_t d;
+ for (;;)
{
- if (c == '_') prev_is_word = TRUE; else
+ RMATCH(Fecode, RM204);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
- int cat = UCD_CATEGORY(c);
- prev_is_word = (cat == ucp_L || cat == ucp_N);
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
}
+ GETCHARINC(d, Feptr);
+ if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
}
- else
- prev_is_word = c < 256 && (mb->ctypes[c] & ctype_word) != 0;
- }
-
- /* Get status of next character */
-
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- cur_is_word = FALSE;
}
else
+#endif /*SUPPORT_UNICODE */
+
+ /* Not UTF mode */
{
- PCRE2_SPTR nextptr = eptr + 1;
- FORWARDCHARTEST(nextptr, mb->end_subject);
- if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
- GETCHAR(c, eptr);
- if ((mb->poptions & PCRE2_UCP) != 0)
+ for (;;)
{
- if (c == '_') cur_is_word = TRUE; else
+ RMATCH(Fecode, RM29);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
- int cat = UCD_CATEGORY(c);
- cur_is_word = (cat == ucp_L || cat == ucp_N);
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
}
+ if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
+ Feptr++;
}
- else
- cur_is_word = c < 256 && (mb->ctypes[c] & ctype_word) != 0;
}
+ /* Control never gets here */
}
- else
-#endif /* SUPPORT UTF */
- /* Not in UTF-8 mode, but we may still have PCRE2_UCP set, and for
- consistency with the behaviour of \w we do use it in this case. */
+ /* Maximize case */
+ else
{
- /* Get status of previous character */
+ Lstart_eptr = Feptr;
- if (eptr == mb->start_subject) prev_is_word = FALSE; else
- {
- if (eptr <= mb->start_used_ptr) mb->start_used_ptr = eptr - 1;
#ifdef SUPPORT_UNICODE
- if ((mb->poptions & PCRE2_UCP) != 0)
+ if (utf)
+ {
+ uint32_t d;
+ for (i = Lmin; i < Lmax; i++)
{
- c = eptr[-1];
- if (c == '_') prev_is_word = TRUE; else
+ int len = 1;
+ if (Feptr >= mb->end_subject)
{
- int cat = UCD_CATEGORY(c);
- prev_is_word = (cat == ucp_L || cat == ucp_N);
+ SCHECK_PARTIAL();
+ break;
}
+ GETCHARLEN(d, Feptr, len);
+ if (Lc == d || Loc == d) break;
+ Feptr += len;
}
- else
-#endif
- prev_is_word = MAX_255(eptr[-1])
- && ((mb->ctypes[eptr[-1]] & ctype_word) != 0);
- }
- /* Get status of next character */
+ /* After \C in UTF mode, Lstart_eptr might be in the middle of a
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
+ go too far. */
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- cur_is_word = FALSE;
+ if (reptype != REPTYPE_POS) for(;;)
+ {
+ if (Feptr <= Lstart_eptr) break;
+ RMATCH(Fecode, RM205);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ Feptr--;
+ BACKCHAR(Feptr);
+ }
}
else
+#endif /* SUPPORT_UNICODE */
+
+ /* Not UTF mode */
{
- if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1;
-#ifdef SUPPORT_UNICODE
- if ((mb->poptions & PCRE2_UCP) != 0)
+ for (i = Lmin; i < Lmax; i++)
{
- c = *eptr;
- if (c == '_') cur_is_word = TRUE; else
+ if (Feptr >= mb->end_subject)
{
- int cat = UCD_CATEGORY(c);
- cur_is_word = (cat == ucp_L || cat == ucp_N);
+ SCHECK_PARTIAL();
+ break;
}
+ if (Lc == *Feptr || Loc == *Feptr) break;
+ Feptr++;
+ }
+ if (reptype != REPTYPE_POS) for (;;)
+ {
+ if (Feptr == Lstart_eptr) break;
+ RMATCH(Fecode, RM30);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ Feptr--;
}
- else
-#endif
- cur_is_word = MAX_255(*eptr)
- && ((mb->ctypes[*eptr] & ctype_word) != 0);
}
}
-
- /* Now see if the situation is what we want */
-
- if ((*ecode++ == OP_WORD_BOUNDARY)?
- cur_is_word == prev_is_word : cur_is_word != prev_is_word)
- RRETURN(MATCH_NOMATCH);
- }
- break;
-
- /* Match any single character type except newline; have to take care with
- CRLF newlines and partial matching. */
-
- case OP_ANY:
- if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
- if (mb->partial != 0 &&
- eptr + 1 >= mb->end_subject &&
- NLBLOCK->nltype == NLTYPE_FIXED &&
- NLBLOCK->nllen == 2 &&
- UCHAR21TEST(eptr) == NLBLOCK->nl[0])
- {
- mb->hitend = TRUE;
- if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
- }
-
- /* Fall through */
-
- /* Match any single character whatsoever. */
-
- case OP_ALLANY:
- if (eptr >= mb->end_subject) /* DO NOT merge the eptr++ here; it must */
- { /* not be updated before SCHECK_PARTIAL. */
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- eptr++;
-#ifdef SUPPORT_UNICODE
- if (utf) ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
-#endif
- ecode++;
- break;
-
- /* Match a single code unit, even in UTF-8 mode. This opcode really does
- match any code unit, even newline. (It really should be called ANYCODEUNIT,
- of course - the byte name is from pre-16 bit days.) */
-
- case OP_ANYBYTE:
- if (eptr >= mb->end_subject) /* DO NOT merge the eptr++ here; it must */
- { /* not be updated before SCHECK_PARTIAL. */
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- eptr++;
- ecode++;
- break;
-
- case OP_NOT_DIGIT:
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINCTEST(c, eptr);
- if (
-#ifdef SUPPORT_WIDE_CHARS
- c < 256 &&
-#endif
- (mb->ctypes[c] & ctype_digit) != 0
- )
- RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
-
- case OP_DIGIT:
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINCTEST(c, eptr);
- if (
-#ifdef SUPPORT_WIDE_CHARS
- c > 255 ||
-#endif
- (mb->ctypes[c] & ctype_digit) == 0
- )
- RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
-
- case OP_NOT_WHITESPACE:
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINCTEST(c, eptr);
- if (
-#ifdef SUPPORT_WIDE_CHARS
- c < 256 &&
-#endif
- (mb->ctypes[c] & ctype_space) != 0
- )
- RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
-
- case OP_WHITESPACE:
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINCTEST(c, eptr);
- if (
-#ifdef SUPPORT_WIDE_CHARS
- c > 255 ||
-#endif
- (mb->ctypes[c] & ctype_space) == 0
- )
- RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
-
- case OP_NOT_WORDCHAR:
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINCTEST(c, eptr);
- if (
-#ifdef SUPPORT_WIDE_CHARS
- c < 256 &&
-#endif
- (mb->ctypes[c] & ctype_word) != 0
- )
- RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
-
- case OP_WORDCHAR:
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINCTEST(c, eptr);
- if (
-#ifdef SUPPORT_WIDE_CHARS
- c > 255 ||
-#endif
- (mb->ctypes[c] & ctype_word) == 0
- )
- RRETURN(MATCH_NOMATCH);
- ecode++;
- break;
-
- case OP_ANYNL:
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINCTEST(c, eptr);
- switch(c)
- {
- default: RRETURN(MATCH_NOMATCH);
-
- case CHAR_CR:
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- }
- else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
- break;
-
- case CHAR_LF:
- break;
-
- case CHAR_VT:
- case CHAR_FF:
- case CHAR_NEL:
-#ifndef EBCDIC
- case 0x2028:
- case 0x2029:
-#endif /* Not EBCDIC */
- if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
- break;
}
- ecode++;
- break;
- case OP_NOT_HSPACE:
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINCTEST(c, eptr);
- switch(c)
- {
- HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */
- default: break;
- }
- ecode++;
- break;
-
- case OP_HSPACE:
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINCTEST(c, eptr);
- switch(c)
- {
- HSPACE_CASES: break; /* Byte and multibyte cases */
- default: RRETURN(MATCH_NOMATCH);
- }
- ecode++;
- break;
-
- case OP_NOT_VSPACE:
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINCTEST(c, eptr);
- switch(c)
- {
- VSPACE_CASES: RRETURN(MATCH_NOMATCH);
- default: break;
- }
- ecode++;
- break;
+ /* Caseful comparisons */
- case OP_VSPACE:
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINCTEST(c, eptr);
- switch(c)
+ else
{
- VSPACE_CASES: break;
- default: RRETURN(MATCH_NOMATCH);
- }
- ecode++;
- break;
-
#ifdef SUPPORT_UNICODE
- /* Check the next character by Unicode property. We will get here only
- if the support is in the binary; otherwise a compile-time error occurs. */
-
- case OP_PROP:
- case OP_NOTPROP:
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINCTEST(c, eptr);
- {
- const uint32_t *cp;
- const ucd_record *prop = GET_UCD(c);
-
- switch(ecode[1])
+ if (utf)
{
- case PT_ANY:
- if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
- break;
-
- case PT_LAMP:
- if ((prop->chartype == ucp_Lu ||
- prop->chartype == ucp_Ll ||
- prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
- RRETURN(MATCH_NOMATCH);
- break;
-
- case PT_GC:
- if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
- RRETURN(MATCH_NOMATCH);
- break;
-
- case PT_PC:
- if ((ecode[2] != prop->chartype) == (op == OP_PROP))
- RRETURN(MATCH_NOMATCH);
- break;
-
- case PT_SC:
- if ((ecode[2] != prop->script) == (op == OP_PROP))
- RRETURN(MATCH_NOMATCH);
- break;
-
- /* These are specials */
-
- case PT_ALNUM:
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
- RRETURN(MATCH_NOMATCH);
- break;
-
- /* Perl space used to exclude VT, but from Perl 5.18 it is included,
- which means that Perl space and POSIX space are now identical. PCRE
- was changed at release 8.34. */
-
- case PT_SPACE: /* Perl space */
- case PT_PXSPACE: /* POSIX space */
- switch(c)
+ uint32_t d;
+ for (i = 1; i <= Lmin; i++)
{
- HSPACE_CASES:
- VSPACE_CASES:
- if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
- break;
-
- default:
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
- (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
- break;
- }
- break;
-
- case PT_WORD:
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
- c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
- RRETURN(MATCH_NOMATCH);
- break;
-
- case PT_CLIST:
- cp = PRIV(ucd_caseless_sets) + ecode[2];
- for (;;)
- {
- if (c < *cp)
- { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
- if (c == *cp++)
- { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINC(d, Feptr);
+ if (Lc == d) RRETURN(MATCH_NOMATCH);
}
- break;
-
- case PT_UCNC:
- if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
- c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
- c >= 0xe000) == (op == OP_NOTPROP))
- RRETURN(MATCH_NOMATCH);
- break;
-
- /* This should never occur */
-
- default:
- RRETURN(PCRE2_ERROR_INTERNAL);
}
-
- ecode += 3;
- }
- break;
-
- /* Match an extended Unicode sequence. We will get here only if the support
- is in the binary; otherwise a compile-time error occurs. */
-
- case OP_EXTUNI:
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- else
- {
- int lgb, rgb;
- GETCHARINCTEST(c, eptr);
- lgb = UCD_GRAPHBREAK(c);
- while (eptr < mb->end_subject)
- {
- int len = 1;
- if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
- rgb = UCD_GRAPHBREAK(c);
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
- lgb = rgb;
- eptr += len;
- }
- }
- CHECK_PARTIAL();
- ecode++;
- break;
-#endif /* SUPPORT_UNICODE */
-
-
- /* Match a back reference, possibly repeatedly. Look past the end of the
- item to see if there is repeat information following.
-
- The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
- or to a non-duplicated named group. For a duplicated named group, OP_DNREF
- and OP_DNREFI are used. In this case we must scan the list of groups to
- which the name refers, and use the first one that is set. */
-
- case OP_DNREF:
- case OP_DNREFI:
- caseless = op == OP_DNREFI;
- {
- int count = GET2(ecode, 1+IMM2_SIZE);
- PCRE2_SPTR slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size;
- ecode += 1 + 2*IMM2_SIZE;
-
- /* Initializing 'offset' avoids a compiler warning in the REF_REPEAT
- code. */
-
- offset = 0;
- while (count-- > 0)
- {
- offset = GET2(slot, 0) << 1;
- if (offset < offset_top && mb->ovector[offset] != PCRE2_UNSET) break;
- slot += mb->name_entry_size;
- }
- }
- goto REF_REPEAT;
-
- case OP_REF:
- case OP_REFI:
- caseless = op == OP_REFI;
- offset = GET2(ecode, 1) << 1; /* Doubled ref number */
- ecode += 1 + IMM2_SIZE;
-
- /* Set up for repetition, or handle the non-repeated case */
-
- REF_REPEAT:
- switch (*ecode)
- {
- case OP_CRSTAR:
- case OP_CRMINSTAR:
- case OP_CRPLUS:
- case OP_CRMINPLUS:
- case OP_CRQUERY:
- case OP_CRMINQUERY:
- c = *ecode++ - OP_CRSTAR;
- minimize = (c & 1) != 0;
- min = rep_min[c]; /* Pick up values from tables; */
- max = rep_max[c]; /* zero for max => infinity */
- if (max == 0) max = INT_MAX;
- break;
-
- case OP_CRRANGE:
- case OP_CRMINRANGE:
- minimize = (*ecode == OP_CRMINRANGE);
- min = GET2(ecode, 1);
- max = GET2(ecode, 1 + IMM2_SIZE);
- if (max == 0) max = INT_MAX;
- ecode += 1 + 2 * IMM2_SIZE;
- break;
-
- default: /* No repeat follows */
+ else
+#endif
+ /* Not UTF mode */
{
- int rc = match_ref(offset, offset_top, eptr, mb, caseless, &length);
- if (rc != 0)
+ for (i = 1; i <= Lmin; i++)
{
- if (rc > 0) eptr = mb->end_subject; /* Partial match */
- CHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
}
}
- eptr += length;
- continue; /* With the main loop */
- }
- /* Handle repeated back references. If a set group has length zero, just
- continue with the main loop, because it matches however many times. For an
- unset reference, if the minimum is zero, we can also just continue. We an
- also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
- group be have as a zero-length group. For any other unset cases, carrying
- on will result in NOMATCH. */
-
- if (offset < offset_top && mb->ovector[offset] != PCRE2_UNSET)
- {
- if (mb->ovector[offset] == mb->ovector[offset + 1]) continue;
- }
- else /* Group is not set */
- {
- if (min == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
- continue;
- }
+ if (Lmin == Lmax) continue;
- /* First, ensure the minimum number of matches are present. We get back
- the length of the reference string explicitly rather than passing the
- address of eptr, so that eptr can be a register variable. */
-
- for (i = 1; i <= min; i++)
- {
- PCRE2_SIZE slength;
- int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
- if (rc != 0)
- {
- if (rc > 0) eptr = mb->end_subject; /* Partial match */
- CHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- eptr += slength;
- }
-
- /* If min = max, continue at the same level without recursion.
- They are not both allowed to be zero. */
-
- if (min == max) continue;
-
- /* If minimizing, keep trying and advancing the pointer */
-
- if (minimize)
- {
- for (fi = min;; fi++)
+ if (reptype == REPTYPE_MIN)
{
- int rc;
- PCRE2_SIZE slength;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM14);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
- if (rc != 0)
+#ifdef SUPPORT_UNICODE
+ if (utf)
{
- if (rc > 0) eptr = mb->end_subject; /* Partial match */
- CHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
+ uint32_t d;
+ for (;;)
+ {
+ RMATCH(Fecode, RM206);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINC(d, Feptr);
+ if (Lc == d) RRETURN(MATCH_NOMATCH);
+ }
}
- eptr += slength;
- }
- /* Control never gets here */
- }
-
- /* If maximizing, find the longest string and work backwards, as long as
- the matched lengths for each iteration are the same. */
-
- else
- {
- BOOL samelengths = TRUE;
- pp = eptr;
- length = mb->ovector[offset+1] - mb->ovector[offset];
-
- for (i = min; i < max; i++)
- {
- PCRE2_SIZE slength;
- int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
-
- if (rc != 0)
+ else
+#endif
+ /* Not UTF mode */
{
- /* Can't use CHECK_PARTIAL because we don't want to update eptr in
- the soft partial matching case. */
-
- if (rc > 0 && mb->partial != 0 &&
- mb->end_subject > mb->start_used_ptr)
+ for (;;)
{
- mb->hitend = TRUE;
- if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
+ RMATCH(Fecode, RM31);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
}
- break;
}
-
- if (slength != length) samelengths = FALSE;
- eptr += slength;
+ /* Control never gets here */
}
- /* If the length matched for each repetition is the same as the length of
- the captured group, we can easily work backwards. This is the normal
- case. However, in caseless UTF-8 mode there are pairs of case-equivalent
- characters whose lengths (in terms of code units) differ. However, this
- is very rare, so we handle it by re-matching fewer and fewer times. */
+ /* Maximize case */
- if (samelengths)
+ else
{
- while (eptr >= pp)
+ Lstart_eptr = Feptr;
+
+#ifdef SUPPORT_UNICODE
+ if (utf)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM15);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- eptr -= length;
- }
- }
+ uint32_t d;
+ for (i = Lmin; i < Lmax; i++)
+ {
+ int len = 1;
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ GETCHARLEN(d, Feptr, len);
+ if (Lc == d) break;
+ Feptr += len;
+ }
- /* The rare case of non-matching lengths. Re-scan the repetition for each
- iteration. We know that match_ref() will succeed every time. */
+ /* After \C in UTF mode, Lstart_eptr might be in the middle of a
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
+ go too far. */
- else
- {
- max = i;
- for (;;)
+ if (reptype != REPTYPE_POS) for(;;)
+ {
+ if (Feptr <= Lstart_eptr) break;
+ RMATCH(Fecode, RM207);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ Feptr--;
+ BACKCHAR(Feptr);
+ }
+ }
+ else
+#endif
+ /* Not UTF mode */
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM68);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr == pp) break; /* Failed after minimal repetition */
- eptr = pp;
- max--;
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- PCRE2_SIZE slength;
- (void)match_ref(offset, offset_top, eptr, mb, caseless, &slength);
- eptr += slength;
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (Lc == *Feptr) break;
+ Feptr++;
+ }
+ if (reptype != REPTYPE_POS) for (;;)
+ {
+ if (Feptr == Lstart_eptr) break;
+ RMATCH(Fecode, RM32);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ Feptr--;
}
}
}
-
- RRETURN(MATCH_NOMATCH);
}
- /* Control never gets here */
+ break;
+
+#undef Lstart_eptr
+#undef Lmin
+#undef Lmax
+#undef Lc
+#undef Loc
+
- /* Match a bit-mapped character class, possibly repeatedly. This op code is
- used when all the characters in the class have values in the range 0-255,
- and either the matching is caseful, or the characters are in the range
- 0-127 when UTF-8 processing is enabled. The only difference between
+ /* ===================================================================== */
+ /* Match a bit-mapped character class, possibly repeatedly. These op codes
+ are used when all the characters in the class have values in the range
+ 0-255, and either the matching is caseful, or the characters are in the
+ range 0-127 when UTF processing is enabled. The only difference between
OP_CLASS and OP_NCLASS occurs when a data character outside the range is
- encountered.
+ encountered. */
- First, look past the end of the item to see if there is repeat information
- following. Then obey similar code to character type repeats - written out
- again for speed. */
+#define Lmin F->temp_32[0]
+#define Lmax F->temp_32[1]
+#define Lstart_eptr F->temp_sptr[0]
+#define Lbyte_map_address F->temp_sptr[1]
+#define Lbyte_map ((unsigned char *)Lbyte_map_address)
case OP_NCLASS:
case OP_CLASS:
{
- /* The data variable is saved across frames, so the byte map needs to
- be stored there. */
-#define BYTE_MAP ((uint8_t *)data)
- data = ecode + 1; /* Save for matching */
- ecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
+ Lbyte_map_address = Fecode + 1; /* Save for matching */
+ Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
- switch (*ecode)
+ /* Look past the end of the item to see if there is repeat information
+ following. Then obey similar code to character type repeats. */
+
+ switch (*Fecode)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
@@ -2999,27 +1806,24 @@ for (;;)
case OP_CRPOSSTAR:
case OP_CRPOSPLUS:
case OP_CRPOSQUERY:
- c = *ecode++ - OP_CRSTAR;
- if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
- else possessive = TRUE;
- min = rep_min[c]; /* Pick up values from tables; */
- max = rep_max[c]; /* zero for max => infinity */
- if (max == 0) max = INT_MAX;
+ fc = *Fecode++ - OP_CRSTAR;
+ Lmin = rep_min[fc];
+ Lmax = rep_max[fc];
+ reptype = rep_typ[fc];
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
case OP_CRPOSRANGE:
- minimize = (*ecode == OP_CRMINRANGE);
- possessive = (*ecode == OP_CRPOSRANGE);
- min = GET2(ecode, 1);
- max = GET2(ecode, 1 + IMM2_SIZE);
- if (max == 0) max = INT_MAX;
- ecode += 1 + 2 * IMM2_SIZE;
+ Lmin = GET2(Fecode, 1);
+ Lmax = GET2(Fecode, 1 + IMM2_SIZE);
+ if (Lmax == 0) Lmax = UINT32_MAX; /* Max 0 => infinity */
+ reptype = rep_typ[*Fecode - OP_CRSTAR];
+ Fecode += 1 + 2 * IMM2_SIZE;
break;
default: /* No repeat follows */
- min = max = 1;
+ Lmin = Lmax = 1;
break;
}
@@ -3028,100 +1832,99 @@ for (;;)
#ifdef SUPPORT_UNICODE
if (utf)
{
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
- if (c > 255)
+ GETCHARINC(fc, Feptr);
+ if (fc > 255)
{
- if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
+ if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
}
else
- if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
+ if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
}
}
else
#endif
/* Not UTF mode */
{
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- c = *eptr++;
+ fc = *Feptr++;
#if PCRE2_CODE_UNIT_WIDTH != 8
- if (c > 255)
+ if (fc > 255)
{
- if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
+ if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
}
else
#endif
- if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
+ if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
}
}
- /* If max == min we can continue with the main loop without the
- need to recurse. */
+ /* If Lmax == Lmin we are done. Continue with main loop. */
- if (min == max) continue;
+ if (Lmin == Lmax) continue;
/* If minimizing, keep testing the rest of the expression and advancing
the pointer while it matches the class. */
- if (minimize)
+ if (reptype == REPTYPE_MIN)
{
#ifdef SUPPORT_UNICODE
if (utf)
{
- for (fi = min;; fi++)
+ for (;;)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM16);
+ RMATCH(Fecode, RM200);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
- if (c > 255)
+ GETCHARINC(fc, Feptr);
+ if (fc > 255)
{
- if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
+ if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
}
else
- if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
+ if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
}
}
else
#endif
/* Not UTF mode */
{
- for (fi = min;; fi++)
+ for (;;)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM17);
+ RMATCH(Fecode, RM23);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- c = *eptr++;
+ fc = *Feptr++;
#if PCRE2_CODE_UNIT_WIDTH != 8
- if (c > 255)
+ if (fc > 255)
{
- if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
+ if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
}
else
#endif
- if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
+ if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
}
}
/* Control never gets here */
@@ -3131,91 +1934,102 @@ for (;;)
else
{
- pp = eptr;
+ Lstart_eptr = Feptr;
#ifdef SUPPORT_UNICODE
if (utf)
{
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
- if (c > 255)
+ GETCHARLEN(fc, Feptr, len);
+ if (fc > 255)
{
- if (op == OP_CLASS) break;
+ if (Fop == OP_CLASS) break;
}
else
- if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
- eptr += len;
+ if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) break;
+ Feptr += len;
}
- if (possessive) continue; /* No backtracking */
+ if (reptype == REPTYPE_POS) continue; /* No backtracking */
for (;;)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM18);
+ RMATCH(Fecode, RM201);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr-- == pp) break; /* Stop if tried at original pos */
- BACKCHAR(eptr);
+ if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
+ BACKCHAR(Feptr);
}
}
else
#endif
/* Not UTF mode */
{
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- c = *eptr;
+ fc = *Feptr;
#if PCRE2_CODE_UNIT_WIDTH != 8
- if (c > 255)
+ if (fc > 255)
{
- if (op == OP_CLASS) break;
+ if (Fop == OP_CLASS) break;
}
else
#endif
- if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
- eptr++;
+ if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) break;
+ Feptr++;
}
- if (possessive) continue; /* No backtracking */
+ if (reptype == REPTYPE_POS) continue; /* No backtracking */
- while (eptr >= pp)
+ while (Feptr >= Lstart_eptr)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM19);
+ RMATCH(Fecode, RM24);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- eptr--;
+ Feptr--;
}
}
RRETURN(MATCH_NOMATCH);
}
-#undef BYTE_MAP
}
/* Control never gets here */
+#undef Lbyte_map_address
+#undef Lbyte_map
+#undef Lstart_eptr
+#undef Lmin
+#undef Lmax
+
+ /* ===================================================================== */
/* Match an extended character class. In the 8-bit library, this opcode is
encountered only when UTF-8 mode mode is supported. In the 16-bit and
32-bit libraries, codepoints greater than 255 may be encountered even when
UTF is not supported. */
+#define Lstart_eptr F->temp_sptr[0]
+#define Lxclass_data F->temp_sptr[1]
+#define Lmin F->temp_32[0]
+#define Lmax F->temp_32[1]
+
#ifdef SUPPORT_WIDE_CHARS
case OP_XCLASS:
{
- data = ecode + 1 + LINK_SIZE; /* Save for matching */
- ecode += GET(ecode, 1); /* Advance past the item */
+ Lxclass_data = Fecode + 1 + LINK_SIZE; /* Save for matching */
+ Fecode += GET(Fecode, 1); /* Advance past the item */
- switch (*ecode)
+ switch (*Fecode)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
@@ -3226,65 +2040,61 @@ for (;;)
case OP_CRPOSSTAR:
case OP_CRPOSPLUS:
case OP_CRPOSQUERY:
- c = *ecode++ - OP_CRSTAR;
- if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
- else possessive = TRUE;
- min = rep_min[c]; /* Pick up values from tables; */
- max = rep_max[c]; /* zero for max => infinity */
- if (max == 0) max = INT_MAX;
+ fc = *Fecode++ - OP_CRSTAR;
+ Lmin = rep_min[fc];
+ Lmax = rep_max[fc];
+ reptype = rep_typ[fc];
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
case OP_CRPOSRANGE:
- minimize = (*ecode == OP_CRMINRANGE);
- possessive = (*ecode == OP_CRPOSRANGE);
- min = GET2(ecode, 1);
- max = GET2(ecode, 1 + IMM2_SIZE);
- if (max == 0) max = INT_MAX;
- ecode += 1 + 2 * IMM2_SIZE;
+ Lmin = GET2(Fecode, 1);
+ Lmax = GET2(Fecode, 1 + IMM2_SIZE);
+ if (Lmax == 0) Lmax = UINT32_MAX; /* Max 0 => infinity */
+ reptype = rep_typ[*Fecode - OP_CRSTAR];
+ Fecode += 1 + 2 * IMM2_SIZE;
break;
default: /* No repeat follows */
- min = max = 1;
+ Lmin = Lmax = 1;
break;
}
/* First, ensure the minimum number of matches are present. */
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
+ GETCHARINCTEST(fc, Feptr);
+ if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
}
- /* If max == min we can continue with the main loop without the
- need to recurse. */
+ /* If Lmax == Lmin we can just continue with the main loop. */
- if (min == max) continue;
+ if (Lmin == Lmax) continue;
/* If minimizing, keep testing the rest of the expression and advancing
the pointer while it matches the class. */
- if (minimize)
+ if (reptype == REPTYPE_MIN)
{
- for (fi = min;; fi++)
+ for (;;)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM20);
+ RMATCH(Fecode, RM100);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
+ GETCHARINCTEST(fc, Feptr);
+ if (!PRIV(xclass)(fc, Lxclass_data, utf)) RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
}
@@ -3293,33 +2103,33 @@ for (;;)
else
{
- pp = eptr;
- for (i = min; i < max; i++)
+ Lstart_eptr = Feptr;
+ for (i = Lmin; i < Lmax; i++)
{
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
#ifdef SUPPORT_UNICODE
- GETCHARLENTEST(c, eptr, len);
+ GETCHARLENTEST(fc, Feptr, len);
#else
- c = *eptr;
+ fc = *Feptr;
#endif
- if (!PRIV(xclass)(c, data, utf)) break;
- eptr += len;
+ if (!PRIV(xclass)(fc, Lxclass_data, utf)) break;
+ Feptr += len;
}
- if (possessive) continue; /* No backtracking */
+ if (reptype == REPTYPE_POS) continue; /* No backtracking */
for(;;)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM21);
+ RMATCH(Fecode, RM101);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr-- == pp) break; /* Stop if tried at original pos */
+ if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
#ifdef SUPPORT_UNICODE
- if (utf) BACKCHAR(eptr);
+ if (utf) BACKCHAR(Feptr);
#endif
}
RRETURN(MATCH_NOMATCH);
@@ -3327,887 +2137,416 @@ for (;;)
/* Control never gets here */
}
-#endif /* End of XCLASS */
+#endif /* SUPPORT_WIDE_CHARS: end of XCLASS */
- /* Match a single character, casefully */
+#undef Lstart_eptr
+#undef Lxclass_data
+#undef Lmin
+#undef Lmax
- case OP_CHAR:
-#ifdef SUPPORT_UNICODE
- if (utf)
+
+ /* ===================================================================== */
+ /* Match various character types when PCRE2_UCP is not set. These opcodes
+ are not generated when PCRE2_UCP is set - instead appropriate property
+ tests are compiled. */
+
+ case OP_NOT_DIGIT:
+ if (Feptr >= mb->end_subject)
{
- length = 1;
- ecode++;
- GETCHARLEN(fc, ecode, length);
- if (length > (PCRE2_SIZE)(mb->end_subject - eptr))
- {
- CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
- RRETURN(MATCH_NOMATCH);
- }
- for (; length > 0; length--)
- {
- if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
- }
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
}
- else
-#endif
- /* Not UTF mode */
+ GETCHARINCTEST(fc, Feptr);
+ if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
+ RRETURN(MATCH_NOMATCH);
+ Fecode++;
+ break;
+
+ case OP_DIGIT:
+ if (Feptr >= mb->end_subject)
{
- if (mb->end_subject - eptr < 1)
- {
- SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
- RRETURN(MATCH_NOMATCH);
- }
- if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
- ecode += 2;
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
}
+ GETCHARINCTEST(fc, Feptr);
+ if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
+ RRETURN(MATCH_NOMATCH);
+ Fecode++;
break;
- /* Match a single character, caselessly. If we are at the end of the
- subject, give up immediately. */
-
- case OP_CHARI:
- if (eptr >= mb->end_subject)
+ case OP_NOT_WHITESPACE:
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
+ GETCHARINCTEST(fc, Feptr);
+ if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
+ RRETURN(MATCH_NOMATCH);
+ Fecode++;
+ break;
-#ifdef SUPPORT_UNICODE
- if (utf)
+ case OP_WHITESPACE:
+ if (Feptr >= mb->end_subject)
{
- length = 1;
- ecode++;
- GETCHARLEN(fc, ecode, length);
-
- /* If the pattern character's value is < 128, we have only one byte, and
- we know that its other case must also be one byte long, so we can use the
- fast lookup table. We know that there is at least one byte left in the
- subject. */
-
- if (fc < 128)
- {
- uint32_t cc = UCHAR21(eptr);
- if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
- ecode++;
- eptr++;
- }
-
- /* Otherwise we must pick up the subject character. Note that we cannot
- use the value of "length" to check for sufficient bytes left, because the
- other case of the character may have more or fewer bytes. */
-
- else
- {
- uint32_t dc;
- GETCHARINC(dc, eptr);
- ecode += length;
-
- /* If we have Unicode property support, we can use it to test the other
- case of the character, if there is one. */
-
- if (fc != dc)
- {
-#ifdef SUPPORT_UNICODE
- if (dc != UCD_OTHERCASE(fc))
-#endif
- RRETURN(MATCH_NOMATCH);
- }
- }
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
}
- else
-#endif /* SUPPORT_UNICODE */
+ GETCHARINCTEST(fc, Feptr);
+ if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
+ RRETURN(MATCH_NOMATCH);
+ Fecode++;
+ break;
- /* Not UTF mode */
+ case OP_NOT_WORDCHAR:
+ if (Feptr >= mb->end_subject)
{
- if (TABLE_GET(ecode[1], mb->lcc, ecode[1])
- != TABLE_GET(*eptr, mb->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
- eptr++;
- ecode += 2;
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
}
+ GETCHARINCTEST(fc, Feptr);
+ if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
+ RRETURN(MATCH_NOMATCH);
+ Fecode++;
break;
- /* Match a single character repeatedly. */
-
- case OP_EXACT:
- case OP_EXACTI:
- min = max = GET2(ecode, 1);
- ecode += 1 + IMM2_SIZE;
- goto REPEATCHAR;
-
- case OP_POSUPTO:
- case OP_POSUPTOI:
- possessive = TRUE;
- /* Fall through */
-
- case OP_UPTO:
- case OP_UPTOI:
- case OP_MINUPTO:
- case OP_MINUPTOI:
- min = 0;
- max = GET2(ecode, 1);
- minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
- ecode += 1 + IMM2_SIZE;
- goto REPEATCHAR;
-
- case OP_POSSTAR:
- case OP_POSSTARI:
- possessive = TRUE;
- min = 0;
- max = INT_MAX;
- ecode++;
- goto REPEATCHAR;
-
- case OP_POSPLUS:
- case OP_POSPLUSI:
- possessive = TRUE;
- min = 1;
- max = INT_MAX;
- ecode++;
- goto REPEATCHAR;
-
- case OP_POSQUERY:
- case OP_POSQUERYI:
- possessive = TRUE;
- min = 0;
- max = 1;
- ecode++;
- goto REPEATCHAR;
-
- case OP_STAR:
- case OP_STARI:
- case OP_MINSTAR:
- case OP_MINSTARI:
- case OP_PLUS:
- case OP_PLUSI:
- case OP_MINPLUS:
- case OP_MINPLUSI:
- case OP_QUERY:
- case OP_QUERYI:
- case OP_MINQUERY:
- case OP_MINQUERYI:
- c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
- minimize = (c & 1) != 0;
- min = rep_min[c]; /* Pick up values from tables; */
- max = rep_max[c]; /* zero for max => infinity */
- if (max == 0) max = INT_MAX;
-
- /* Common code for all repeated single-character matches. We first check
- for the minimum number of characters. If the minimum equals the maximum, we
- are done. Otherwise, if minimizing, check the rest of the pattern for a
- match; if there isn't one, advance up to the maximum, one character at a
- time.
-
- If maximizing, advance up to the maximum number of matching characters,
- until eptr is past the end of the maximum run. If possessive, we are
- then done (no backing up). Otherwise, match at this position; anything
- other than no match is immediately returned. For nomatch, back up one
- character, unless we are matching \R and the last thing matched was
- \r\n, in which case, back up two bytes. When we reach the first optional
- character position, we can save stack by doing a tail recurse.
-
- The various UTF/non-UTF and caseful/caseless cases are handled separately,
- for speed. */
-
- REPEATCHAR:
-#ifdef SUPPORT_UNICODE
- if (utf)
+ case OP_WORDCHAR:
+ if (Feptr >= mb->end_subject)
{
- length = 1;
- charptr = ecode;
- GETCHARLEN(fc, ecode, length);
- ecode += length;
-
- /* Handle multibyte character matching specially here. There is
- support for caseless matching if UCP support is present. */
-
- if (length > 1)
- {
- uint32_t othercase;
- if (op >= OP_STARI && /* Caseless */
- (othercase = UCD_OTHERCASE(fc)) != fc)
- oclength = PRIV(ord2utf)(othercase, occhars);
- else oclength = 0;
-
- for (i = 1; i <= min; i++)
- {
- if (eptr <= mb->end_subject - length &&
- memcmp(eptr, charptr, CU2BYTES(length)) == 0) eptr += length;
- else if (oclength > 0 &&
- eptr <= mb->end_subject - oclength &&
- memcmp(eptr, occhars, CU2BYTES(oclength)) == 0) eptr += oclength;
- else
- {
- CHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- }
-
- if (min == max) continue;
-
- if (minimize)
- {
- for (fi = min;; fi++)
- {
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM22);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr <= mb->end_subject - length &&
- memcmp(eptr, charptr, CU2BYTES(length)) == 0) eptr += length;
- else if (oclength > 0 &&
- eptr <= mb->end_subject - oclength &&
- memcmp(eptr, occhars, CU2BYTES(oclength)) == 0) eptr += oclength;
- else
- {
- CHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- }
- /* Control never gets here */
- }
-
- else /* Maximize */
- {
- pp = eptr;
- for (i = min; i < max; i++)
- {
- if (eptr <= mb->end_subject - length &&
- memcmp(eptr, charptr, CU2BYTES(length)) == 0) eptr += length;
- else if (oclength > 0 &&
- eptr <= mb->end_subject - oclength &&
- memcmp(eptr, occhars, CU2BYTES(oclength)) == 0) eptr += oclength;
- else
- {
- CHECK_PARTIAL();
- break;
- }
- }
-
- if (possessive) continue; /* No backtracking */
-
- /* After \C in UTF mode, pp might be in the middle of a Unicode
- character. Use <= pp to ensure backtracking doesn't go too far. */
-
- for(;;)
- {
- if (eptr <= pp) goto TAIL_RECURSE;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM23);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- eptr--;
- BACKCHAR(eptr);
- }
- }
- /* Control never gets here */
- }
-
- /* If the length of a UTF-8 character is 1, we fall through here, and
- obey the code as for non-UTF-8 characters below, though in this case the
- value of fc will always be < 128. */
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
}
- else
-#endif /* SUPPORT_UNICODE */
-
- /* When not in UTF-8 mode, load a single-byte character. */
- fc = *ecode++;
-
- /* The value of fc at this point is always one character, though we may
- or may not be in UTF mode. The code is duplicated for the caseless and
- caseful cases, for speed, since matching characters is likely to be quite
- common. First, ensure the minimum number of matches are present. If min =
- max, continue at the same level without recursing. Otherwise, if
- minimizing, keep trying the rest of the expression and advancing one
- matching character if failing, up to the maximum. Alternatively, if
- maximizing, find the maximum number of characters and work backwards. */
+ GETCHARINCTEST(fc, Feptr);
+ if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
+ RRETURN(MATCH_NOMATCH);
+ Fecode++;
+ break;
- if (op >= OP_STARI) /* Caseless */
+ case OP_ANYNL:
+ if (Feptr >= mb->end_subject)
{
-#if PCRE2_CODE_UNIT_WIDTH == 8
- /* fc must be < 128 if UTF is enabled. */
- foc = mb->fcc[fc];
-#else
-#ifdef SUPPORT_UNICODE
- if (utf && fc > 127)
- foc = UCD_OTHERCASE(fc);
- else
-#endif /* SUPPORT_UNICODE */
- foc = TABLE_GET(fc, mb->fcc, fc);
-#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
-
- for (i = 1; i <= min; i++)
- {
- uint32_t cc; /* Faster than PCRE2_UCHAR */
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- cc = UCHAR21TEST(eptr);
- if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
- eptr++;
- }
- if (min == max) continue;
- if (minimize)
- {
- for (fi = min;; fi++)
- {
- uint32_t cc; /* Faster than PCRE2_UCHAR */
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM24);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- cc = UCHAR21TEST(eptr);
- if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
- eptr++;
- }
- /* Control never gets here */
- }
- else /* Maximize */
- {
- pp = eptr;
- for (i = min; i < max; i++)
- {
- uint32_t cc; /* Faster than PCRE2_UCHAR */
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- cc = UCHAR21TEST(eptr);
- if (fc != cc && foc != cc) break;
- eptr++;
- }
- if (possessive) continue; /* No backtracking */
- for (;;)
- {
- if (eptr == pp) goto TAIL_RECURSE;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM25);
- eptr--;
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- }
- /* Control never gets here */
- }
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
}
-
- /* Caseful comparisons (includes all multi-byte characters) */
-
- else
+ GETCHARINCTEST(fc, Feptr);
+ switch(fc)
{
- for (i = 1; i <= min; i++)
+ default: RRETURN(MATCH_NOMATCH);
+
+ case CHAR_CR:
+ if (Feptr >= mb->end_subject)
{
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
+ SCHECK_PARTIAL();
}
+ else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++;
+ break;
- if (min == max) continue;
+ case CHAR_LF:
+ break;
- if (minimize)
- {
- for (fi = min;; fi++)
- {
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM26);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
- }
- /* Control never gets here */
- }
- else /* Maximize */
- {
- pp = eptr;
- for (i = min; i < max; i++)
- {
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (fc != UCHAR21TEST(eptr)) break;
- eptr++;
- }
- if (possessive) continue; /* No backtracking */
- for (;;)
- {
- if (eptr == pp) goto TAIL_RECURSE;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM27);
- eptr--;
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- }
- /* Control never gets here */
- }
+ case CHAR_VT:
+ case CHAR_FF:
+ case CHAR_NEL:
+#ifndef EBCDIC
+ case 0x2028:
+ case 0x2029:
+#endif /* Not EBCDIC */
+ if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
+ break;
}
- /* Control never gets here */
+ Fecode++;
+ break;
- /* Match a negated single one-byte character. The character we are
- checking can be multibyte. */
+ case OP_NOT_HSPACE:
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINCTEST(fc, Feptr);
+ switch(fc)
+ {
+ HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */
+ default: break;
+ }
+ Fecode++;
+ break;
- case OP_NOT:
- case OP_NOTI:
- if (eptr >= mb->end_subject)
+ case OP_HSPACE:
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
-#ifdef SUPPORT_UNICODE
- if (utf)
+ GETCHARINCTEST(fc, Feptr);
+ switch(fc)
{
- register uint32_t ch, och;
+ HSPACE_CASES: break; /* Byte and multibyte cases */
+ default: RRETURN(MATCH_NOMATCH);
+ }
+ Fecode++;
+ break;
- ecode++;
- GETCHARINC(ch, ecode);
- GETCHARINC(c, eptr);
+ case OP_NOT_VSPACE:
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINCTEST(fc, Feptr);
+ switch(fc)
+ {
+ VSPACE_CASES: RRETURN(MATCH_NOMATCH);
+ default: break;
+ }
+ Fecode++;
+ break;
- if (op == OP_NOT)
- {
- if (ch == c) RRETURN(MATCH_NOMATCH);
- }
- else
- {
- if (ch > 127)
- och = UCD_OTHERCASE(ch);
- else
- och = TABLE_GET(ch, mb->fcc, ch);
- if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
- }
+ case OP_VSPACE:
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
}
- else
-#endif /* SUPPORT_UNICODE */
+ GETCHARINCTEST(fc, Feptr);
+ switch(fc)
{
- register uint32_t ch = ecode[1];
- c = *eptr++;
- if (ch == c || (op == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == c))
- RRETURN(MATCH_NOMATCH);
- ecode += 2;
+ VSPACE_CASES: break;
+ default: RRETURN(MATCH_NOMATCH);
}
+ Fecode++;
break;
- /* Match a negated single one-byte character repeatedly. This is almost a
- repeat of the code for a repeated single character, but I haven't found a
- nice way of commoning these up that doesn't require a test of the
- positive/negative option for each character match. Maybe that wouldn't add
- very much to the time taken, but character matching *is* what this is all
- about... */
- case OP_NOTEXACT:
- case OP_NOTEXACTI:
- min = max = GET2(ecode, 1);
- ecode += 1 + IMM2_SIZE;
- goto REPEATNOTCHAR;
+#ifdef SUPPORT_UNICODE
- case OP_NOTUPTO:
- case OP_NOTUPTOI:
- case OP_NOTMINUPTO:
- case OP_NOTMINUPTOI:
- min = 0;
- max = GET2(ecode, 1);
- minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
- ecode += 1 + IMM2_SIZE;
- goto REPEATNOTCHAR;
+ /* ===================================================================== */
+ /* Check the next character by Unicode property. We will get here only
+ if the support is in the binary; otherwise a compile-time error occurs. */
- case OP_NOTPOSSTAR:
- case OP_NOTPOSSTARI:
- possessive = TRUE;
- min = 0;
- max = INT_MAX;
- ecode++;
- goto REPEATNOTCHAR;
+ case OP_PROP:
+ case OP_NOTPROP:
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINCTEST(fc, Feptr);
+ {
+ const uint32_t *cp;
+ const ucd_record *prop = GET_UCD(fc);
- case OP_NOTPOSPLUS:
- case OP_NOTPOSPLUSI:
- possessive = TRUE;
- min = 1;
- max = INT_MAX;
- ecode++;
- goto REPEATNOTCHAR;
+ switch(Fecode[1])
+ {
+ case PT_ANY:
+ if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
+ break;
- case OP_NOTPOSQUERY:
- case OP_NOTPOSQUERYI:
- possessive = TRUE;
- min = 0;
- max = 1;
- ecode++;
- goto REPEATNOTCHAR;
+ case PT_LAMP:
+ if ((prop->chartype == ucp_Lu ||
+ prop->chartype == ucp_Ll ||
+ prop->chartype == ucp_Lt) == (Fop == OP_NOTPROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
- case OP_NOTPOSUPTO:
- case OP_NOTPOSUPTOI:
- possessive = TRUE;
- min = 0;
- max = GET2(ecode, 1);
- ecode += 1 + IMM2_SIZE;
- goto REPEATNOTCHAR;
+ case PT_GC:
+ if ((Fecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (Fop == OP_PROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
- case OP_NOTSTAR:
- case OP_NOTSTARI:
- case OP_NOTMINSTAR:
- case OP_NOTMINSTARI:
- case OP_NOTPLUS:
- case OP_NOTPLUSI:
- case OP_NOTMINPLUS:
- case OP_NOTMINPLUSI:
- case OP_NOTQUERY:
- case OP_NOTQUERYI:
- case OP_NOTMINQUERY:
- case OP_NOTMINQUERYI:
- c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
- minimize = (c & 1) != 0;
- min = rep_min[c]; /* Pick up values from tables; */
- max = rep_max[c]; /* zero for max => infinity */
- if (max == 0) max = INT_MAX;
+ case PT_PC:
+ if ((Fecode[2] != prop->chartype) == (Fop == OP_PROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
- /* Common code for all repeated single-byte matches. */
+ case PT_SC:
+ if ((Fecode[2] != prop->script) == (Fop == OP_PROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
- REPEATNOTCHAR:
- GETCHARINCTEST(fc, ecode);
+ /* These are specials */
- /* The code is duplicated for the caseless and caseful cases, for speed,
- since matching characters is likely to be quite common. First, ensure the
- minimum number of matches are present. If min = max, continue at the same
- level without recursing. Otherwise, if minimizing, keep trying the rest of
- the expression and advancing one matching character if failing, up to the
- maximum. Alternatively, if maximizing, find the maximum number of
- characters and work backwards. */
+ case PT_ALNUM:
+ if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+ PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (Fop == OP_NOTPROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
- if (op >= OP_NOTSTARI) /* Caseless */
- {
-#ifdef SUPPORT_UNICODE
- if (utf && fc > 127)
- foc = UCD_OTHERCASE(fc);
- else
-#endif /* SUPPORT_UNICODE */
- foc = TABLE_GET(fc, mb->fcc, fc);
+ /* Perl space used to exclude VT, but from Perl 5.18 it is included,
+ which means that Perl space and POSIX space are now identical. PCRE
+ was changed at release 8.34. */
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- register uint32_t d;
- for (i = 1; i <= min; i++)
- {
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINC(d, eptr);
- if (fc == d || (uint32_t)foc == d) RRETURN(MATCH_NOMATCH);
- }
- }
- else
-#endif /* SUPPORT_UNICODE */
- /* Not UTF mode */
- {
- for (i = 1; i <= min; i++)
+ case PT_SPACE: /* Perl space */
+ case PT_PXSPACE: /* POSIX space */
+ switch(fc)
{
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
- eptr++;
+ HSPACE_CASES:
+ VSPACE_CASES:
+ if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
+ break;
+
+ default:
+ if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
+ (Fop == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
+ break;
}
- }
+ break;
- if (min == max) continue;
+ case PT_WORD:
+ if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+ PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
+ fc == CHAR_UNDERSCORE) == (Fop == OP_NOTPROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
- if (minimize)
- {
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- register uint32_t d;
- for (fi = min;; fi++)
- {
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM28);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINC(d, eptr);
- if (fc == d || (uint32_t)foc == d) RRETURN(MATCH_NOMATCH);
- }
- }
- else
-#endif /*SUPPORT_UNICODE */
- /* Not UTF mode */
+ case PT_CLIST:
+ cp = PRIV(ucd_caseless_sets) + Fecode[2];
+ for (;;)
{
- for (fi = min;; fi++)
- {
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM29);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
- eptr++;
- }
+ if (fc < *cp)
+ { if (Fop == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
+ if (fc == *cp++)
+ { if (Fop == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
}
- /* Control never gets here */
- }
-
- /* Maximize case */
-
- else
- {
- pp = eptr;
+ break;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- register uint32_t d;
- for (i = min; i < max; i++)
- {
- int len = 1;
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- GETCHARLEN(d, eptr, len);
- if (fc == d || (uint32_t)foc == d) break;
- eptr += len;
- }
- if (possessive) continue; /* No backtracking */
+ case PT_UCNC:
+ if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
+ fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
+ fc >= 0xe000) == (Fop == OP_NOTPROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
- /* After \C in UTF mode, pp might be in the middle of a Unicode
- character. Use <= pp to ensure backtracking doesn't go too far. */
+ /* This should never occur */
- for(;;)
- {
- if (eptr <= pp) goto TAIL_RECURSE;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM30);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- eptr--;
- BACKCHAR(eptr);
- }
- }
- else
-#endif /* SUPPORT_UNICODE */
- /* Not UTF mode */
- {
- for (i = min; i < max; i++)
- {
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (fc == *eptr || foc == *eptr) break;
- eptr++;
- }
- if (possessive) continue; /* No backtracking */
- for (;;)
- {
- if (eptr == pp) goto TAIL_RECURSE;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM31);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- eptr--;
- }
- }
- /* Control never gets here */
+ default:
+ return PCRE2_ERROR_INTERNAL;
}
+
+ Fecode += 3;
}
+ break;
- /* Caseful comparisons */
+ /* ===================================================================== */
+ /* Match an extended Unicode sequence. We will get here only if the support
+ is in the binary; otherwise a compile-time error occurs. */
+
+ case OP_EXTUNI:
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
else
{
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- register uint32_t d;
- for (i = 1; i <= min; i++)
- {
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINC(d, eptr);
- if (fc == d) RRETURN(MATCH_NOMATCH);
- }
- }
- else
-#endif
- /* Not UTF mode */
+ int lgb, rgb;
+ GETCHARINCTEST(fc, Feptr);
+ lgb = UCD_GRAPHBREAK(fc);
+ while (Feptr < mb->end_subject)
{
- for (i = 1; i <= min; i++)
- {
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
- }
- }
+ int len = 1;
+ if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
+ rgb = UCD_GRAPHBREAK(fc);
+ if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
- if (min == max) continue;
+ /* Not breaking between Regional Indicators is allowed only if there
+ are an even number of preceding RIs. */
- if (minimize)
- {
-#ifdef SUPPORT_UNICODE
- if (utf)
+ if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
{
- register uint32_t d;
- for (fi = min;; fi++)
- {
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM32);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- GETCHARINC(d, eptr);
- if (fc == d) RRETURN(MATCH_NOMATCH);
- }
- }
- else
+ int ricount = 0;
+ PCRE2_SPTR bptr = Feptr - 1;
+#ifdef SUPPORT_UNICODE
+ if (utf) BACKCHAR(bptr);
#endif
- /* Not UTF mode */
- {
- for (fi = min;; fi++)
+ /* bptr is pointing to the left-hand character */
+
+ while (bptr > mb->start_subject)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM33);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ bptr--;
+#ifdef SUPPORT_UNICODE
+ if (utf)
{
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
+ BACKCHAR(bptr);
+ GETCHAR(fc, bptr);
}
- if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
+ else
+#endif
+ fc = *bptr;
+ if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
+ ricount++;
}
+ if ((ricount & 1) != 0) break; /* Grapheme break required */
}
- /* Control never gets here */
- }
- /* Maximize case */
+ /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
+ any number of Extend before a following E_Modifier. */
- else
- {
- pp = eptr;
+ if (rgb != ucp_gbExtend ||
+ (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+ lgb = rgb;
-#ifdef SUPPORT_UNICODE
- if (utf)
- {
- register uint32_t d;
- for (i = min; i < max; i++)
- {
- int len = 1;
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- GETCHARLEN(d, eptr, len);
- if (fc == d) break;
- eptr += len;
- }
- if (possessive) continue; /* No backtracking */
-
- /* After \C in UTF mode, pp might be in the middle of a Unicode
- character. Use <= pp to ensure backtracking doesn't go too far. */
-
- for(;;)
- {
- if (eptr <= pp) goto TAIL_RECURSE;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM34);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- eptr--;
- BACKCHAR(eptr);
- }
- }
- else
-#endif
- /* Not UTF mode */
- {
- for (i = min; i < max; i++)
- {
- if (eptr >= mb->end_subject)
- {
- SCHECK_PARTIAL();
- break;
- }
- if (fc == *eptr) break;
- eptr++;
- }
- if (possessive) continue; /* No backtracking */
- for (;;)
- {
- if (eptr == pp) goto TAIL_RECURSE;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM35);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- eptr--;
- }
- }
- /* Control never gets here */
+ Feptr += len;
}
}
- /* Control never gets here */
+ CHECK_PARTIAL();
+ Fecode++;
+ break;
- /* Match a single character type repeatedly; several different opcodes
- share code. This is very similar to the code for single characters, but we
- repeat it in the interests of efficiency. */
+#endif /* SUPPORT_UNICODE */
+
+
+ /* ===================================================================== */
+ /* Match a single character type repeatedly. Note that the property type
+ does not need to be in a stack frame as it not used within an RMATCH()
+ loop. */
+
+#define Lstart_eptr F->temp_sptr[0]
+#define Lmin F->temp_32[0]
+#define Lmax F->temp_32[1]
+#define Lctype F->temp_32[2]
+#define Lpropvalue F->temp_32[3]
case OP_TYPEEXACT:
- min = max = GET2(ecode, 1);
- minimize = TRUE;
- ecode += 1 + IMM2_SIZE;
+ Lmin = Lmax = GET2(Fecode, 1);
+ Fecode += 1 + IMM2_SIZE;
goto REPEATTYPE;
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
- min = 0;
- max = GET2(ecode, 1);
- minimize = *ecode == OP_TYPEMINUPTO;
- ecode += 1 + IMM2_SIZE;
+ Lmin = 0;
+ Lmax = GET2(Fecode, 1);
+ reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX;
+ Fecode += 1 + IMM2_SIZE;
goto REPEATTYPE;
case OP_TYPEPOSSTAR:
- possessive = TRUE;
- min = 0;
- max = INT_MAX;
- ecode++;
+ reptype = REPTYPE_POS;
+ Lmin = 0;
+ Lmax = UINT32_MAX;
+ Fecode++;
goto REPEATTYPE;
case OP_TYPEPOSPLUS:
- possessive = TRUE;
- min = 1;
- max = INT_MAX;
- ecode++;
+ reptype = REPTYPE_POS;
+ Lmin = 1;
+ Lmax = UINT32_MAX;
+ Fecode++;
goto REPEATTYPE;
case OP_TYPEPOSQUERY:
- possessive = TRUE;
- min = 0;
- max = 1;
- ecode++;
+ reptype = REPTYPE_POS;
+ Lmin = 0;
+ Lmax = 1;
+ Fecode++;
goto REPEATTYPE;
case OP_TYPEPOSUPTO:
- possessive = TRUE;
- min = 0;
- max = GET2(ecode, 1);
- ecode += 1 + IMM2_SIZE;
+ reptype = REPTYPE_POS;
+ Lmin = 0;
+ Lmax = GET2(Fecode, 1);
+ Fecode += 1 + IMM2_SIZE;
goto REPEATTYPE;
case OP_TYPESTAR:
@@ -4216,127 +2555,122 @@ for (;;)
case OP_TYPEMINPLUS:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
- c = *ecode++ - OP_TYPESTAR;
- minimize = (c & 1) != 0;
- min = rep_min[c]; /* Pick up values from tables; */
- max = rep_max[c]; /* zero for max => infinity */
- if (max == 0) max = INT_MAX;
+ fc = *Fecode++ - OP_TYPESTAR;
+ Lmin = rep_min[fc];
+ Lmax = rep_max[fc];
+ reptype = rep_typ[fc];
- /* Common code for all repeated single character type matches. Note that
- in UTF-8 mode, '.' matches a character of any length, but for the other
- character types, the valid characters are all one-byte long. */
+ /* Common code for all repeated character type matches. */
REPEATTYPE:
- ctype = *ecode++; /* Code for the character type */
+ Lctype = *Fecode++; /* Code for the character type */
#ifdef SUPPORT_UNICODE
- if (ctype == OP_PROP || ctype == OP_NOTPROP)
+ if (Lctype == OP_PROP || Lctype == OP_NOTPROP)
{
- prop_fail_result = ctype == OP_NOTPROP;
- prop_type = *ecode++;
- prop_value = *ecode++;
+ proptype = *Fecode++;
+ Lpropvalue = *Fecode++;
}
- else prop_type = -1;
+ else proptype = -1;
#endif
/* First, ensure the minimum number of matches are present. Use inline
code for maximizing the speed, and do the type test once at the start
- (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
- is tidier. Also separate the UCP code, which can be the same for both UTF-8
- and single-bytes. */
+ (i.e. keep it out of the loop). The code for UTF mode is separated out for
+ tidiness, except for Unicode property tests. */
- if (min > 0)
+ if (Lmin > 0)
{
#ifdef SUPPORT_UNICODE
- if (prop_type >= 0)
+ if (proptype >= 0) /* Property tests in all modes */
{
- switch(prop_type)
+ switch(proptype)
{
case PT_ANY:
- if (prop_fail_result) RRETURN(MATCH_NOMATCH);
- for (i = 1; i <= min; i++)
+ if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
+ GETCHARINCTEST(fc, Feptr);
}
break;
case PT_LAMP:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
int chartype;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- chartype = UCD_CHARTYPE(c);
+ GETCHARINCTEST(fc, Feptr);
+ chartype = UCD_CHARTYPE(fc);
if ((chartype == ucp_Lu ||
chartype == ucp_Ll ||
- chartype == ucp_Lt) == prop_fail_result)
+ chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
break;
case PT_GC:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
+ GETCHARINCTEST(fc, Feptr);
+ if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
break;
case PT_PC:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
+ GETCHARINCTEST(fc, Feptr);
+ if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
break;
case PT_SC:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
+ GETCHARINCTEST(fc, Feptr);
+ if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
break;
case PT_ALNUM:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
int category;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- category = UCD_CATEGORY(c);
- if ((category == ucp_L || category == ucp_N) == prop_fail_result)
+ GETCHARINCTEST(fc, Feptr);
+ category = UCD_CATEGORY(fc);
+ if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
break;
@@ -4347,23 +2681,23 @@ for (;;)
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- switch(c)
+ GETCHARINCTEST(fc, Feptr);
+ switch(fc)
{
HSPACE_CASES:
VSPACE_CASES:
- if (prop_fail_result) RRETURN(MATCH_NOMATCH);
+ if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
break;
default:
- if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
+ if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
break;
}
@@ -4371,55 +2705,61 @@ for (;;)
break;
case PT_WORD:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
int category;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- category = UCD_CATEGORY(c);
- if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
- == prop_fail_result)
+ GETCHARINCTEST(fc, Feptr);
+ category = UCD_CATEGORY(fc);
+ if ((category == ucp_L || category == ucp_N ||
+ fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
break;
case PT_CLIST:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
const uint32_t *cp;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- cp = PRIV(ucd_caseless_sets) + prop_value;
+ GETCHARINCTEST(fc, Feptr);
+ cp = PRIV(ucd_caseless_sets) + Lpropvalue;
for (;;)
{
- if (c < *cp)
- { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
- if (c == *cp++)
- { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
+ if (fc < *cp)
+ {
+ if (Lctype == OP_NOTPROP) break;
+ RRETURN(MATCH_NOMATCH);
+ }
+ if (fc == *cp++)
+ {
+ if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
+ break;
+ }
}
}
break;
case PT_UCNC:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
- c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
- c >= 0xe000) == prop_fail_result)
+ GETCHARINCTEST(fc, Feptr);
+ if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
+ fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
+ fc >= 0xe000) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
break;
@@ -4427,18 +2767,18 @@ for (;;)
/* This should not occur */
default:
- RRETURN(PCRE2_ERROR_INTERNAL);
+ return PCRE2_ERROR_INTERNAL;
}
}
/* Match extended Unicode sequences. We will get here only if the
support is in the binary; otherwise a compile-time error occurs. */
- else if (ctype == OP_EXTUNI)
+ else if (Lctype == OP_EXTUNI)
{
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
@@ -4446,16 +2786,54 @@ for (;;)
else
{
int lgb, rgb;
- GETCHARINCTEST(c, eptr);
- lgb = UCD_GRAPHBREAK(c);
- while (eptr < mb->end_subject)
+ GETCHARINCTEST(fc, Feptr);
+ lgb = UCD_GRAPHBREAK(fc);
+ while (Feptr < mb->end_subject)
{
int len = 1;
- if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
- rgb = UCD_GRAPHBREAK(c);
+ if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
+ rgb = UCD_GRAPHBREAK(fc);
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
- lgb = rgb;
- eptr += len;
+
+ /* Not breaking between Regional Indicators is allowed only if
+ there are an even number of preceding RIs. */
+
+ if (lgb == ucp_gbRegionalIndicator &&
+ rgb == ucp_gbRegionalIndicator)
+ {
+ int ricount = 0;
+ PCRE2_SPTR bptr = Feptr - 1;
+#ifdef SUPPORT_UNICODE
+ if (utf) BACKCHAR(bptr);
+#endif
+ /* bptr is pointing to the left-hand character */
+
+ while (bptr > mb->start_subject)
+ {
+ bptr--;
+#ifdef SUPPORT_UNICODE
+ if (utf)
+ {
+ BACKCHAR(bptr);
+ GETCHAR(fc, bptr);
+ }
+ else
+#endif
+ fc = *bptr;
+ if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
+ ricount++;
+ }
+ if ((ricount & 1) != 0) break; /* Grapheme break required */
+ }
+
+ /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
+ any number of Extend before a following E_Modifier. */
+
+ if (rgb != ucp_gbExtend ||
+ (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+ lgb = rgb;
+
+ Feptr += len;
}
}
CHECK_PARTIAL();
@@ -4465,67 +2843,67 @@ for (;;)
else
#endif /* SUPPORT_UNICODE */
-/* Handle all other cases when the coding is UTF-8 */
+/* Handle all other cases in UTF mode */
#ifdef SUPPORT_UNICODE
- if (utf) switch(ctype)
+ if (utf) switch(Lctype)
{
case OP_ANY:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
+ if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
if (mb->partial != 0 &&
- eptr + 1 >= mb->end_subject &&
+ Feptr + 1 >= mb->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- UCHAR21(eptr) == NLBLOCK->nl[0])
+ UCHAR21(Feptr) == NLBLOCK->nl[0])
{
mb->hitend = TRUE;
- if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
+ if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
}
- eptr++;
- ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
+ Feptr++;
+ ACROSSCHAR(Feptr < mb->end_subject, *Feptr, Feptr++);
}
break;
case OP_ALLANY:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- eptr++;
- ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
+ Feptr++;
+ ACROSSCHAR(Feptr < mb->end_subject, *Feptr, Feptr++);
}
break;
case OP_ANYBYTE:
- if (eptr > mb->end_subject - min) RRETURN(MATCH_NOMATCH);
- eptr += min;
+ if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH);
+ Feptr += Lmin;
break;
case OP_ANYNL:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
- switch(c)
+ GETCHARINC(fc, Feptr);
+ switch(fc)
{
default: RRETURN(MATCH_NOMATCH);
case CHAR_CR:
- if (eptr < mb->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
+ if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
break;
case CHAR_LF:
@@ -4545,49 +2923,49 @@ for (;;)
break;
case OP_NOT_HSPACE:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
- switch(c)
+ GETCHARINC(fc, Feptr);
+ switch(fc)
{
- HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */
+ HSPACE_CASES: RRETURN(MATCH_NOMATCH);
default: break;
}
}
break;
case OP_HSPACE:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
- switch(c)
+ GETCHARINC(fc, Feptr);
+ switch(fc)
{
- HSPACE_CASES: break; /* Byte and multibyte cases */
+ HSPACE_CASES: break;
default: RRETURN(MATCH_NOMATCH);
}
}
break;
case OP_NOT_VSPACE:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
- switch(c)
+ GETCHARINC(fc, Feptr);
+ switch(fc)
{
VSPACE_CASES: RRETURN(MATCH_NOMATCH);
default: break;
@@ -4596,15 +2974,15 @@ for (;;)
break;
case OP_VSPACE:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
- switch(c)
+ GETCHARINC(fc, Feptr);
+ switch(fc)
{
VSPACE_CASES: break;
default: RRETURN(MATCH_NOMATCH);
@@ -4613,170 +2991,174 @@ for (;;)
break;
case OP_NOT_DIGIT:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINC(c, eptr);
- if (c < 128 && (mb->ctypes[c] & ctype_digit) != 0)
+ GETCHARINC(fc, Feptr);
+ if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0)
RRETURN(MATCH_NOMATCH);
}
break;
case OP_DIGIT:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
uint32_t cc;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = UCHAR21(eptr);
+ cc = UCHAR21(Feptr);
if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
RRETURN(MATCH_NOMATCH);
- eptr++;
- /* No need to skip more bytes - we know it's a 1-byte character */
+ Feptr++;
+ /* No need to skip more code units - we know it has only one. */
}
break;
case OP_NOT_WHITESPACE:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
uint32_t cc;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = UCHAR21(eptr);
+ cc = UCHAR21(Feptr);
if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
RRETURN(MATCH_NOMATCH);
- eptr++;
- ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
+ Feptr++;
+ ACROSSCHAR(Feptr < mb->end_subject, *Feptr, Feptr++);
}
break;
case OP_WHITESPACE:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
uint32_t cc;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = UCHAR21(eptr);
+ cc = UCHAR21(Feptr);
if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
RRETURN(MATCH_NOMATCH);
- eptr++;
- /* No need to skip more bytes - we know it's a 1-byte character */
+ Feptr++;
+ /* No need to skip more code units - we know it has only one. */
}
break;
case OP_NOT_WORDCHAR:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
uint32_t cc;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = UCHAR21(eptr);
+ cc = UCHAR21(Feptr);
if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
RRETURN(MATCH_NOMATCH);
- eptr++;
- ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
+ Feptr++;
+ ACROSSCHAR(Feptr < mb->end_subject, *Feptr, Feptr++);
}
break;
case OP_WORDCHAR:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
uint32_t cc;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- cc = UCHAR21(eptr);
+ cc = UCHAR21(Feptr);
if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
RRETURN(MATCH_NOMATCH);
- eptr++;
- /* No need to skip more bytes - we know it's a 1-byte character */
+ Feptr++;
+ /* No need to skip more code units - we know it has only one. */
}
break;
default:
- RRETURN(PCRE2_ERROR_INTERNAL);
- } /* End switch(ctype) */
+ return PCRE2_ERROR_INTERNAL;
+ } /* End switch(Lctype) */
else
#endif /* SUPPORT_UNICODE */
- /* Code for the non-UTF-8 case for minimum matching of operators other
+ /* Code for the non-UTF case for minimum matching of operators other
than OP_PROP and OP_NOTPROP. */
- switch(ctype)
+ switch(Lctype)
{
case OP_ANY:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
+ if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
if (mb->partial != 0 &&
- eptr + 1 >= mb->end_subject &&
+ Feptr + 1 >= mb->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ *Feptr == NLBLOCK->nl[0])
{
mb->hitend = TRUE;
- if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
+ if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
}
- eptr++;
+ Feptr++;
}
break;
case OP_ALLANY:
- if (eptr > mb->end_subject - min)
+ if (Feptr > mb->end_subject - Lmin)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- eptr += min;
- break;
-
- case OP_ANYBYTE:
- if (eptr > mb->end_subject - min)
- {
- SCHECK_PARTIAL();
- RRETURN(MATCH_NOMATCH);
- }
- eptr += min;
+ Feptr += Lmin;
break;
+ /* This OP_ANYBYTE case will never be reached because \C gets turned
+ into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
+ reports don't complain about it's never being used. */
+
+/* case OP_ANYBYTE:
+* if (Feptr > mb->end_subject - Lmin)
+* {
+* SCHECK_PARTIAL();
+* RRETURN(MATCH_NOMATCH);
+* }
+* Feptr += Lmin;
+* break;
+*/
case OP_ANYNL:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- switch(*eptr++)
+ switch(*Feptr++)
{
default: RRETURN(MATCH_NOMATCH);
case CHAR_CR:
- if (eptr < mb->end_subject && *eptr == CHAR_LF) eptr++;
+ if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
break;
case CHAR_LF:
@@ -4796,14 +3178,14 @@ for (;;)
break;
case OP_NOT_HSPACE:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- switch(*eptr++)
+ switch(*Feptr++)
{
default: break;
HSPACE_BYTE_CASES:
@@ -4816,14 +3198,14 @@ for (;;)
break;
case OP_HSPACE:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- switch(*eptr++)
+ switch(*Feptr++)
{
default: RRETURN(MATCH_NOMATCH);
HSPACE_BYTE_CASES:
@@ -4836,14 +3218,14 @@ for (;;)
break;
case OP_NOT_VSPACE:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- switch(*eptr++)
+ switch(*Feptr++)
{
VSPACE_BYTE_CASES:
#if PCRE2_CODE_UNIT_WIDTH != 8
@@ -4856,14 +3238,14 @@ for (;;)
break;
case OP_VSPACE:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- switch(*eptr++)
+ switch(*Feptr++)
{
default: RRETURN(MATCH_NOMATCH);
VSPACE_BYTE_CASES:
@@ -4876,212 +3258,212 @@ for (;;)
break;
case OP_NOT_DIGIT:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_digit) != 0)
+ if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
RRETURN(MATCH_NOMATCH);
- eptr++;
+ Feptr++;
}
break;
case OP_DIGIT:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_digit) == 0)
+ if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
RRETURN(MATCH_NOMATCH);
- eptr++;
+ Feptr++;
}
break;
case OP_NOT_WHITESPACE:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_space) != 0)
+ if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
RRETURN(MATCH_NOMATCH);
- eptr++;
+ Feptr++;
}
break;
case OP_WHITESPACE:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_space) == 0)
+ if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
RRETURN(MATCH_NOMATCH);
- eptr++;
+ Feptr++;
}
break;
case OP_NOT_WORDCHAR:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_word) != 0)
+ if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
RRETURN(MATCH_NOMATCH);
- eptr++;
+ Feptr++;
}
break;
case OP_WORDCHAR:
- for (i = 1; i <= min; i++)
+ for (i = 1; i <= Lmin; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_word) == 0)
+ if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
RRETURN(MATCH_NOMATCH);
- eptr++;
+ Feptr++;
}
break;
default:
- RRETURN(PCRE2_ERROR_INTERNAL);
+ return PCRE2_ERROR_INTERNAL;
}
}
- /* If min = max, continue at the same level without recursing */
+ /* If Lmin = Lmax we are done. Continue with the main loop. */
- if (min == max) continue;
+ if (Lmin == Lmax) continue;
/* If minimizing, we have to test the rest of the pattern before each
- subsequent match. Again, separate the UTF-8 case for speed, and also
- separate the UCP cases. */
+ subsequent match. */
- if (minimize)
+ if (reptype == REPTYPE_MIN)
{
#ifdef SUPPORT_UNICODE
- if (prop_type >= 0)
+ if (proptype >= 0)
{
- switch(prop_type)
+ switch(proptype)
{
case PT_ANY:
- for (fi = min;; fi++)
+ for (;;)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM36);
+ RMATCH(Fecode, RM208);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- if (prop_fail_result) RRETURN(MATCH_NOMATCH);
+ GETCHARINCTEST(fc, Feptr);
+ if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
case PT_LAMP:
- for (fi = min;; fi++)
+ for (;;)
{
int chartype;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM37);
+ RMATCH(Fecode, RM209);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- chartype = UCD_CHARTYPE(c);
+ GETCHARINCTEST(fc, Feptr);
+ chartype = UCD_CHARTYPE(fc);
if ((chartype == ucp_Lu ||
chartype == ucp_Ll ||
- chartype == ucp_Lt) == prop_fail_result)
+ chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
case PT_GC:
- for (fi = min;; fi++)
+ for (;;)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM38);
+ RMATCH(Fecode, RM210);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
+ GETCHARINCTEST(fc, Feptr);
+ if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
case PT_PC:
- for (fi = min;; fi++)
+ for (;;)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM39);
+ RMATCH(Fecode, RM211);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
+ GETCHARINCTEST(fc, Feptr);
+ if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
case PT_SC:
- for (fi = min;; fi++)
+ for (;;)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM40);
+ RMATCH(Fecode, RM212);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
+ GETCHARINCTEST(fc, Feptr);
+ if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
case PT_ALNUM:
- for (fi = min;; fi++)
+ for (;;)
{
int category;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM59);
+ RMATCH(Fecode, RM213);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- category = UCD_CATEGORY(c);
- if ((category == ucp_L || category == ucp_N) == prop_fail_result)
+ GETCHARINCTEST(fc, Feptr);
+ category = UCD_CATEGORY(fc);
+ if ((category == ucp_L || category == ucp_N) ==
+ (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
@@ -5092,26 +3474,26 @@ for (;;)
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
- for (fi = min;; fi++)
+ for (;;)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM61);
+ RMATCH(Fecode, RM214);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- switch(c)
+ GETCHARINCTEST(fc, Feptr);
+ switch(fc)
{
HSPACE_CASES:
VSPACE_CASES:
- if (prop_fail_result) RRETURN(MATCH_NOMATCH);
+ if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
break;
default:
- if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
+ if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
break;
}
@@ -5119,87 +3501,92 @@ for (;;)
/* Control never gets here */
case PT_WORD:
- for (fi = min;; fi++)
+ for (;;)
{
int category;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM62);
+ RMATCH(Fecode, RM215);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- category = UCD_CATEGORY(c);
+ GETCHARINCTEST(fc, Feptr);
+ category = UCD_CATEGORY(fc);
if ((category == ucp_L ||
category == ucp_N ||
- c == CHAR_UNDERSCORE)
- == prop_fail_result)
+ fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
case PT_CLIST:
- for (fi = min;; fi++)
+ for (;;)
{
const uint32_t *cp;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM67);
+ RMATCH(Fecode, RM216);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- cp = PRIV(ucd_caseless_sets) + prop_value;
+ GETCHARINCTEST(fc, Feptr);
+ cp = PRIV(ucd_caseless_sets) + Lpropvalue;
for (;;)
{
- if (c < *cp)
- { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
- if (c == *cp++)
- { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
+ if (fc < *cp)
+ {
+ if (Lctype == OP_NOTPROP) break;
+ RRETURN(MATCH_NOMATCH);
+ }
+ if (fc == *cp++)
+ {
+ if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
+ break;
+ }
}
}
/* Control never gets here */
case PT_UCNC:
- for (fi = min;; fi++)
+ for (;;)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM60);
+ RMATCH(Fecode, RM217);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- GETCHARINCTEST(c, eptr);
- if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
- c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
- c >= 0xe000) == prop_fail_result)
+ GETCHARINCTEST(fc, Feptr);
+ if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
+ fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
+ fc >= 0xe000) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
/* This should never occur */
default:
- RRETURN(PCRE2_ERROR_INTERNAL);
+ return PCRE2_ERROR_INTERNAL;
}
}
/* Match extended Unicode sequences. We will get here only if the
support is in the binary; otherwise a compile-time error occurs. */
- else if (ctype == OP_EXTUNI)
+ else if (Lctype == OP_EXTUNI)
{
- for (fi = min;; fi++)
+ for (;;)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM41);
+ RMATCH(Fecode, RM218);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
@@ -5207,16 +3594,54 @@ for (;;)
else
{
int lgb, rgb;
- GETCHARINCTEST(c, eptr);
- lgb = UCD_GRAPHBREAK(c);
- while (eptr < mb->end_subject)
+ GETCHARINCTEST(fc, Feptr);
+ lgb = UCD_GRAPHBREAK(fc);
+ while (Feptr < mb->end_subject)
{
int len = 1;
- if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
- rgb = UCD_GRAPHBREAK(c);
+ if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
+ rgb = UCD_GRAPHBREAK(fc);
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
- lgb = rgb;
- eptr += len;
+
+ /* Not breaking between Regional Indicators is allowed only if
+ there are an even number of preceding RIs. */
+
+ if (lgb == ucp_gbRegionalIndicator &&
+ rgb == ucp_gbRegionalIndicator)
+ {
+ int ricount = 0;
+ PCRE2_SPTR bptr = Feptr - 1;
+#ifdef SUPPORT_UNICODE
+ if (utf) BACKCHAR(bptr);
+#endif
+ /* bptr is pointing to the left-hand character */
+
+ while (bptr > mb->start_subject)
+ {
+ bptr--;
+#ifdef SUPPORT_UNICODE
+ if (utf)
+ {
+ BACKCHAR(bptr);
+ GETCHAR(fc, bptr);
+ }
+ else
+#endif
+ fc = *bptr;
+ if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
+ ricount++;
+ }
+ if ((ricount & 1) != 0) break; /* Grapheme break required */
+ }
+
+ /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
+ any number of Extend before a following E_Modifier. */
+
+ if (rgb != ucp_gbExtend ||
+ (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+ lgb = rgb;
+
+ Feptr += len;
}
}
CHECK_PARTIAL();
@@ -5225,33 +3650,34 @@ for (;;)
else
#endif /* SUPPORT_UNICODE */
+ /* UTF mode for non-property testing character types. */
+
#ifdef SUPPORT_UNICODE
if (utf)
{
- for (fi = min;; fi++)
+ for (;;)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM42);
+ RMATCH(Fecode, RM219);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (ctype == OP_ANY && IS_NEWLINE(eptr))
- RRETURN(MATCH_NOMATCH);
- GETCHARINC(c, eptr);
- switch(ctype)
+ if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
+ GETCHARINC(fc, Feptr);
+ switch(Lctype)
{
case OP_ANY: /* This is the non-NL case */
if (mb->partial != 0 && /* Take care with CRLF partial */
- eptr >= mb->end_subject &&
+ Feptr >= mb->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- c == NLBLOCK->nl[0])
+ fc == NLBLOCK->nl[0])
{
mb->hitend = TRUE;
- if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
+ if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
}
break;
@@ -5260,11 +3686,12 @@ for (;;)
break;
case OP_ANYNL:
- switch(c)
+ switch(fc)
{
default: RRETURN(MATCH_NOMATCH);
+
case CHAR_CR:
- if (eptr < mb->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
+ if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
break;
case CHAR_LF:
@@ -5277,13 +3704,14 @@ for (;;)
case 0x2028:
case 0x2029:
#endif /* Not EBCDIC */
- if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
+ if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
+ RRETURN(MATCH_NOMATCH);
break;
}
break;
case OP_NOT_HSPACE:
- switch(c)
+ switch(fc)
{
HSPACE_CASES: RRETURN(MATCH_NOMATCH);
default: break;
@@ -5291,7 +3719,7 @@ for (;;)
break;
case OP_HSPACE:
- switch(c)
+ switch(fc)
{
HSPACE_CASES: break;
default: RRETURN(MATCH_NOMATCH);
@@ -5299,7 +3727,7 @@ for (;;)
break;
case OP_NOT_VSPACE:
- switch(c)
+ switch(fc)
{
VSPACE_CASES: RRETURN(MATCH_NOMATCH);
default: break;
@@ -5307,7 +3735,7 @@ for (;;)
break;
case OP_VSPACE:
- switch(c)
+ switch(fc)
{
VSPACE_CASES: break;
default: RRETURN(MATCH_NOMATCH);
@@ -5315,68 +3743,69 @@ for (;;)
break;
case OP_NOT_DIGIT:
- if (c < 256 && (mb->ctypes[c] & ctype_digit) != 0)
+ if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0)
RRETURN(MATCH_NOMATCH);
break;
case OP_DIGIT:
- if (c >= 256 || (mb->ctypes[c] & ctype_digit) == 0)
+ if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0)
RRETURN(MATCH_NOMATCH);
break;
case OP_NOT_WHITESPACE:
- if (c < 256 && (mb->ctypes[c] & ctype_space) != 0)
+ if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0)
RRETURN(MATCH_NOMATCH);
break;
case OP_WHITESPACE:
- if (c >= 256 || (mb->ctypes[c] & ctype_space) == 0)
+ if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0)
RRETURN(MATCH_NOMATCH);
break;
case OP_NOT_WORDCHAR:
- if (c < 256 && (mb->ctypes[c] & ctype_word) != 0)
+ if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0)
RRETURN(MATCH_NOMATCH);
break;
case OP_WORDCHAR:
- if (c >= 256 || (mb->ctypes[c] & ctype_word) == 0)
+ if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0)
RRETURN(MATCH_NOMATCH);
break;
default:
- RRETURN(PCRE2_ERROR_INTERNAL);
+ return PCRE2_ERROR_INTERNAL;
}
}
}
else
-#endif
+#endif /* SUPPORT_UNICODE */
+
/* Not UTF mode */
{
- for (fi = min;; fi++)
+ for (;;)
{
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM43);
+ RMATCH(Fecode, RM33);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (fi >= max) RRETURN(MATCH_NOMATCH);
- if (eptr >= mb->end_subject)
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
- if (ctype == OP_ANY && IS_NEWLINE(eptr))
+ if (Lctype == OP_ANY && IS_NEWLINE(Feptr))
RRETURN(MATCH_NOMATCH);
- c = *eptr++;
- switch(ctype)
+ fc = *Feptr++;
+ switch(Lctype)
{
case OP_ANY: /* This is the non-NL case */
if (mb->partial != 0 && /* Take care with CRLF partial */
- eptr >= mb->end_subject &&
+ Feptr >= mb->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- c == NLBLOCK->nl[0])
+ fc == NLBLOCK->nl[0])
{
mb->hitend = TRUE;
- if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
+ if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
}
break;
@@ -5385,11 +3814,12 @@ for (;;)
break;
case OP_ANYNL:
- switch(c)
+ switch(fc)
{
default: RRETURN(MATCH_NOMATCH);
+
case CHAR_CR:
- if (eptr < mb->end_subject && *eptr == CHAR_LF) eptr++;
+ if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
break;
case CHAR_LF:
@@ -5402,13 +3832,14 @@ for (;;)
case 0x2028:
case 0x2029:
#endif
- if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
+ if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
+ RRETURN(MATCH_NOMATCH);
break;
}
break;
case OP_NOT_HSPACE:
- switch(c)
+ switch(fc)
{
default: break;
HSPACE_BYTE_CASES:
@@ -5420,7 +3851,7 @@ for (;;)
break;
case OP_HSPACE:
- switch(c)
+ switch(fc)
{
default: RRETURN(MATCH_NOMATCH);
HSPACE_BYTE_CASES:
@@ -5432,7 +3863,7 @@ for (;;)
break;
case OP_NOT_VSPACE:
- switch(c)
+ switch(fc)
{
default: break;
VSPACE_BYTE_CASES:
@@ -5444,7 +3875,7 @@ for (;;)
break;
case OP_VSPACE:
- switch(c)
+ switch(fc)
{
default: RRETURN(MATCH_NOMATCH);
VSPACE_BYTE_CASES:
@@ -5456,31 +3887,37 @@ for (;;)
break;
case OP_NOT_DIGIT:
- if (MAX_255(c) && (mb->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
+ if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
+ RRETURN(MATCH_NOMATCH);
break;
case OP_DIGIT:
- if (!MAX_255(c) || (mb->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
+ if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
+ RRETURN(MATCH_NOMATCH);
break;
case OP_NOT_WHITESPACE:
- if (MAX_255(c) && (mb->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
+ if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
+ RRETURN(MATCH_NOMATCH);
break;
case OP_WHITESPACE:
- if (!MAX_255(c) || (mb->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
+ if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
+ RRETURN(MATCH_NOMATCH);
break;
case OP_NOT_WORDCHAR:
- if (MAX_255(c) && (mb->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
+ if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
+ RRETURN(MATCH_NOMATCH);
break;
case OP_WORDCHAR:
- if (!MAX_255(c) || (mb->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
+ if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
+ RRETURN(MATCH_NOMATCH);
break;
default:
- RRETURN(PCRE2_ERROR_INTERNAL);
+ return PCRE2_ERROR_INTERNAL;
}
}
}
@@ -5488,113 +3925,116 @@ for (;;)
}
/* If maximizing, it is worth using inline code for speed, doing the type
- test once at the start (i.e. keep it out of the loop). Again, keep the
- UTF-8 and UCP stuff separate. */
+ test once at the start (i.e. keep it out of the loop). */
else
{
- pp = eptr; /* Remember where we started */
+ Lstart_eptr = Feptr; /* Remember where we started */
#ifdef SUPPORT_UNICODE
- if (prop_type >= 0)
+ if (proptype >= 0)
{
- switch(prop_type)
+ switch(proptype)
{
case PT_ANY:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLENTEST(c, eptr, len);
- if (prop_fail_result) break;
- eptr+= len;
+ GETCHARLENTEST(fc, Feptr, len);
+ if (Lctype == OP_NOTPROP) break;
+ Feptr+= len;
}
break;
case PT_LAMP:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int chartype;
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLENTEST(c, eptr, len);
- chartype = UCD_CHARTYPE(c);
+ GETCHARLENTEST(fc, Feptr, len);
+ chartype = UCD_CHARTYPE(fc);
if ((chartype == ucp_Lu ||
chartype == ucp_Ll ||
- chartype == ucp_Lt) == prop_fail_result)
+ chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
break;
- eptr+= len;
+ Feptr+= len;
}
break;
case PT_GC:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLENTEST(c, eptr, len);
- if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
- eptr+= len;
+ GETCHARLENTEST(fc, Feptr, len);
+ if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
+ break;
+ Feptr+= len;
}
break;
case PT_PC:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLENTEST(c, eptr, len);
- if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
- eptr+= len;
+ GETCHARLENTEST(fc, Feptr, len);
+ if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
+ break;
+ Feptr+= len;
}
break;
case PT_SC:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLENTEST(c, eptr, len);
- if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
- eptr+= len;
+ GETCHARLENTEST(fc, Feptr, len);
+ if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
+ break;
+ Feptr+= len;
}
break;
case PT_ALNUM:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int category;
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLENTEST(c, eptr, len);
- category = UCD_CATEGORY(c);
- if ((category == ucp_L || category == ucp_N) == prop_fail_result)
+ GETCHARLENTEST(fc, Feptr, len);
+ category = UCD_CATEGORY(fc);
+ if ((category == ucp_L || category == ucp_N) ==
+ (Lctype == OP_NOTPROP))
break;
- eptr+= len;
+ Feptr+= len;
}
break;
@@ -5604,122 +4044,123 @@ for (;;)
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLENTEST(c, eptr, len);
- switch(c)
+ GETCHARLENTEST(fc, Feptr, len);
+ switch(fc)
{
HSPACE_CASES:
VSPACE_CASES:
- if (prop_fail_result) goto ENDLOOP99; /* Break the loop */
+ if (Lctype == OP_NOTPROP) goto ENDLOOP99; /* Break the loop */
break;
default:
- if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
+ if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
goto ENDLOOP99; /* Break the loop */
break;
}
- eptr+= len;
+ Feptr+= len;
}
ENDLOOP99:
break;
case PT_WORD:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int category;
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLENTEST(c, eptr, len);
- category = UCD_CATEGORY(c);
+ GETCHARLENTEST(fc, Feptr, len);
+ category = UCD_CATEGORY(fc);
if ((category == ucp_L || category == ucp_N ||
- c == CHAR_UNDERSCORE) == prop_fail_result)
+ fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
break;
- eptr+= len;
+ Feptr+= len;
}
break;
case PT_CLIST:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
const uint32_t *cp;
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLENTEST(c, eptr, len);
- cp = PRIV(ucd_caseless_sets) + prop_value;
+ GETCHARLENTEST(fc, Feptr, len);
+ cp = PRIV(ucd_caseless_sets) + Lpropvalue;
for (;;)
{
- if (c < *cp)
- { if (prop_fail_result) break; else goto GOT_MAX; }
- if (c == *cp++)
- { if (prop_fail_result) goto GOT_MAX; else break; }
+ if (fc < *cp)
+ { if (Lctype == OP_NOTPROP) break; else goto GOT_MAX; }
+ if (fc == *cp++)
+ { if (Lctype == OP_NOTPROP) goto GOT_MAX; else break; }
}
- eptr += len;
+ Feptr += len;
}
GOT_MAX:
break;
case PT_UCNC:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLENTEST(c, eptr, len);
- if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
- c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
- c >= 0xe000) == prop_fail_result)
+ GETCHARLENTEST(fc, Feptr, len);
+ if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
+ fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
+ fc >= 0xe000) == (Lctype == OP_NOTPROP))
break;
- eptr += len;
+ Feptr += len;
}
break;
default:
- RRETURN(PCRE2_ERROR_INTERNAL);
+ return PCRE2_ERROR_INTERNAL;
}
- /* eptr is now past the end of the maximum run */
+ /* Feptr is now past the end of the maximum run */
- if (possessive) continue; /* No backtracking */
+ if (reptype == REPTYPE_POS) continue; /* No backtracking */
- /* After \C in UTF mode, pp might be in the middle of a Unicode
- character. Use <= pp to ensure backtracking doesn't go too far. */
+ /* After \C in UTF mode, Lstart_eptr might be in the middle of a
+ Unicode character. Use <= pp to ensure backtracking doesn't go too far.
+ */
for(;;)
{
- if (eptr <= pp) goto TAIL_RECURSE;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM44);
+ if (Feptr <= Lstart_eptr) break;
+ RMATCH(Fecode, RM222);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- eptr--;
- if (utf) BACKCHAR(eptr);
+ Feptr--;
+ if (utf) BACKCHAR(Feptr);
}
}
/* Match extended Unicode grapheme clusters. We will get here only if the
support is in the binary; otherwise a compile-time error occurs. */
- else if (ctype == OP_EXTUNI)
+ else if (Lctype == OP_EXTUNI)
{
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
@@ -5727,63 +4168,101 @@ for (;;)
else
{
int lgb, rgb;
- GETCHARINCTEST(c, eptr);
- lgb = UCD_GRAPHBREAK(c);
- while (eptr < mb->end_subject)
+ GETCHARINCTEST(fc, Feptr);
+ lgb = UCD_GRAPHBREAK(fc);
+ while (Feptr < mb->end_subject)
{
int len = 1;
- if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
- rgb = UCD_GRAPHBREAK(c);
+ if (!utf) fc = *Feptr; else { GETCHARLEN(fc, Feptr, len); }
+ rgb = UCD_GRAPHBREAK(fc);
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
- lgb = rgb;
- eptr += len;
+
+ /* Not breaking between Regional Indicators is allowed only if
+ there are an even number of preceding RIs. */
+
+ if (lgb == ucp_gbRegionalIndicator &&
+ rgb == ucp_gbRegionalIndicator)
+ {
+ int ricount = 0;
+ PCRE2_SPTR bptr = Feptr - 1;
+#ifdef SUPPORT_UNICODE
+ if (utf) BACKCHAR(bptr);
+#endif
+ /* bptr is pointing to the left-hand character */
+
+ while (bptr > mb->start_subject)
+ {
+ bptr--;
+#ifdef SUPPORT_UNICODE
+ if (utf)
+ {
+ BACKCHAR(bptr);
+ GETCHAR(fc, bptr);
+ }
+ else
+#endif
+ fc = *bptr;
+ if (UCD_GRAPHBREAK(fc) != ucp_gbRegionalIndicator) break;
+ ricount++;
+ }
+ if ((ricount & 1) != 0) break; /* Grapheme break required */
+ }
+
+ /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
+ any number of Extend before a following E_Modifier. */
+
+ if (rgb != ucp_gbExtend ||
+ (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+ lgb = rgb;
+
+ Feptr += len;
}
}
CHECK_PARTIAL();
}
- /* eptr is now past the end of the maximum run */
+ /* Feptr is now past the end of the maximum run */
- if (possessive) continue; /* No backtracking */
+ if (reptype == REPTYPE_POS) continue; /* No backtracking */
- /* We use <= pp rather than == pp to detect the start of the run while
- backtracking because the use of \C in UTF mode can cause BACKCHAR to
- move back past pp. This is just palliative; the use of \C in UTF mode
- is fraught with danger. */
+ /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start
+ of the run while backtracking because the use of \C in UTF mode can
+ cause BACKCHAR to move back past Lstart_eptr. This is just palliative;
+ the use of \C in UTF mode is fraught with danger. */
for(;;)
{
int lgb, rgb;
PCRE2_SPTR fptr;
- if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM45);
+ if (Feptr <= Lstart_eptr) break; /* At start of char run */
+ RMATCH(Fecode, RM220);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
/* Backtracking over an extended grapheme cluster involves inspecting
the previous two characters (if present) to see if a break is
permitted between them. */
- eptr--;
- if (!utf) c = *eptr; else
+ Feptr--;
+ if (!utf) fc = *Feptr; else
{
- BACKCHAR(eptr);
- GETCHAR(c, eptr);
+ BACKCHAR(Feptr);
+ GETCHAR(fc, Feptr);
}
- rgb = UCD_GRAPHBREAK(c);
+ rgb = UCD_GRAPHBREAK(fc);
for (;;)
{
- if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
- fptr = eptr - 1;
- if (!utf) c = *fptr; else
+ if (Feptr <= Lstart_eptr) break; /* At start of char run */
+ fptr = Feptr - 1;
+ if (!utf) fc = *fptr; else
{
BACKCHAR(fptr);
- GETCHAR(c, fptr);
+ GETCHAR(fc, fptr);
}
- lgb = UCD_GRAPHBREAK(c);
+ lgb = UCD_GRAPHBREAK(fc);
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
- eptr = fptr;
+ Feptr = fptr;
rgb = lgb;
}
}
@@ -5795,325 +4274,328 @@ for (;;)
#ifdef SUPPORT_UNICODE
if (utf)
{
- switch(ctype)
+ switch(Lctype)
{
case OP_ANY:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- if (IS_NEWLINE(eptr)) break;
+ if (IS_NEWLINE(Feptr)) break;
if (mb->partial != 0 && /* Take care with CRLF partial */
- eptr + 1 >= mb->end_subject &&
+ Feptr + 1 >= mb->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- UCHAR21(eptr) == NLBLOCK->nl[0])
+ UCHAR21(Feptr) == NLBLOCK->nl[0])
{
mb->hitend = TRUE;
- if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
+ if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
}
- eptr++;
- ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
+ Feptr++;
+ ACROSSCHAR(Feptr < mb->end_subject, *Feptr, Feptr++);
}
break;
case OP_ALLANY:
- if (max < INT_MAX)
+ if (Lmax < UINT32_MAX)
{
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- eptr++;
- ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
+ Feptr++;
+ ACROSSCHAR(Feptr < mb->end_subject, *Feptr, Feptr++);
}
}
else
{
- eptr = mb->end_subject; /* Unlimited UTF-8 repeat */
+ Feptr = mb->end_subject; /* Unlimited UTF-8 repeat */
SCHECK_PARTIAL();
}
break;
- /* The byte case is the same as non-UTF8 */
+ /* The "byte" (i.e. "code unit") case is the same as non-UTF */
case OP_ANYBYTE:
- c = max - min;
- if (c > (uint32_t)(mb->end_subject - eptr))
+ fc = Lmax - Lmin;
+ if (fc > (uint32_t)(mb->end_subject - Feptr))
{
- eptr = mb->end_subject;
+ Feptr = mb->end_subject;
SCHECK_PARTIAL();
}
- else eptr += c;
+ else Feptr += fc;
break;
case OP_ANYNL:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
- if (c == CHAR_CR)
+ GETCHARLEN(fc, Feptr, len);
+ if (fc == CHAR_CR)
{
- if (++eptr >= mb->end_subject) break;
- if (UCHAR21(eptr) == CHAR_LF) eptr++;
+ if (++Feptr >= mb->end_subject) break;
+ if (UCHAR21(Feptr) == CHAR_LF) Feptr++;
}
else
{
- if (c != CHAR_LF &&
+ if (fc != CHAR_LF &&
(mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
- (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
+ (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
#ifndef EBCDIC
- && c != 0x2028 && c != 0x2029
+ && fc != 0x2028 && fc != 0x2029
#endif /* Not EBCDIC */
)))
break;
- eptr += len;
+ Feptr += len;
}
}
break;
case OP_NOT_HSPACE:
case OP_HSPACE:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
BOOL gotspace;
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
- switch(c)
+ GETCHARLEN(fc, Feptr, len);
+ switch(fc)
{
HSPACE_CASES: gotspace = TRUE; break;
default: gotspace = FALSE; break;
}
- if (gotspace == (ctype == OP_NOT_HSPACE)) break;
- eptr += len;
+ if (gotspace == (Lctype == OP_NOT_HSPACE)) break;
+ Feptr += len;
}
break;
case OP_NOT_VSPACE:
case OP_VSPACE:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
BOOL gotspace;
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
- switch(c)
+ GETCHARLEN(fc, Feptr, len);
+ switch(fc)
{
VSPACE_CASES: gotspace = TRUE; break;
default: gotspace = FALSE; break;
}
- if (gotspace == (ctype == OP_NOT_VSPACE)) break;
- eptr += len;
+ if (gotspace == (Lctype == OP_NOT_VSPACE)) break;
+ Feptr += len;
}
break;
case OP_NOT_DIGIT:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
- if (c < 256 && (mb->ctypes[c] & ctype_digit) != 0) break;
- eptr+= len;
+ GETCHARLEN(fc, Feptr, len);
+ if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break;
+ Feptr+= len;
}
break;
case OP_DIGIT:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
- if (c >= 256 ||(mb->ctypes[c] & ctype_digit) == 0) break;
- eptr+= len;
+ GETCHARLEN(fc, Feptr, len);
+ if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break;
+ Feptr+= len;
}
break;
case OP_NOT_WHITESPACE:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
- if (c < 256 && (mb->ctypes[c] & ctype_space) != 0) break;
- eptr+= len;
+ GETCHARLEN(fc, Feptr, len);
+ if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break;
+ Feptr+= len;
}
break;
case OP_WHITESPACE:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
- if (c >= 256 ||(mb->ctypes[c] & ctype_space) == 0) break;
- eptr+= len;
+ GETCHARLEN(fc, Feptr, len);
+ if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break;
+ Feptr+= len;
}
break;
case OP_NOT_WORDCHAR:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
- if (c < 256 && (mb->ctypes[c] & ctype_word) != 0) break;
- eptr+= len;
+ GETCHARLEN(fc, Feptr, len);
+ if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break;
+ Feptr+= len;
}
break;
case OP_WORDCHAR:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
int len = 1;
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- GETCHARLEN(c, eptr, len);
- if (c >= 256 || (mb->ctypes[c] & ctype_word) == 0) break;
- eptr+= len;
+ GETCHARLEN(fc, Feptr, len);
+ if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break;
+ Feptr+= len;
}
break;
default:
- RRETURN(PCRE2_ERROR_INTERNAL);
+ return PCRE2_ERROR_INTERNAL;
}
- if (possessive) continue; /* No backtracking */
+ if (reptype == REPTYPE_POS) continue; /* No backtracking */
- /* After \C in UTF mode, pp might be in the middle of a Unicode
- character. Use <= pp to ensure backtracking doesn't go too far. */
+ /* After \C in UTF mode, Lstart_eptr might be in the middle of a
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go
+ too far. */
for(;;)
{
- if (eptr <= pp) goto TAIL_RECURSE;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM46);
+ if (Feptr <= Lstart_eptr) break;
+ RMATCH(Fecode, RM221);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- eptr--;
- BACKCHAR(eptr);
- if (ctype == OP_ANYNL && eptr > pp && UCHAR21(eptr) == CHAR_NL &&
- UCHAR21(eptr - 1) == CHAR_CR) eptr--;
+ Feptr--;
+ BACKCHAR(Feptr);
+ if (Lctype == OP_ANYNL && Feptr > Lstart_eptr &&
+ UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR)
+ Feptr--;
}
}
else
#endif /* SUPPORT_UNICODE */
+
/* Not UTF mode */
{
- switch(ctype)
+ switch(Lctype)
{
case OP_ANY:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- if (IS_NEWLINE(eptr)) break;
+ if (IS_NEWLINE(Feptr)) break;
if (mb->partial != 0 && /* Take care with CRLF partial */
- eptr + 1 >= mb->end_subject &&
+ Feptr + 1 >= mb->end_subject &&
NLBLOCK->nltype == NLTYPE_FIXED &&
NLBLOCK->nllen == 2 &&
- *eptr == NLBLOCK->nl[0])
+ *Feptr == NLBLOCK->nl[0])
{
mb->hitend = TRUE;
- if (mb->partial > 1) RRETURN(PCRE2_ERROR_PARTIAL);
+ if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
}
- eptr++;
+ Feptr++;
}
break;
case OP_ALLANY:
case OP_ANYBYTE:
- c = max - min;
- if (c > (uint32_t)(mb->end_subject - eptr))
+ fc = Lmax - Lmin;
+ if (fc > (uint32_t)(mb->end_subject - Feptr))
{
- eptr = mb->end_subject;
+ Feptr = mb->end_subject;
SCHECK_PARTIAL();
}
- else eptr += c;
+ else Feptr += fc;
break;
case OP_ANYNL:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- c = *eptr;
- if (c == CHAR_CR)
+ fc = *Feptr;
+ if (fc == CHAR_CR)
{
- if (++eptr >= mb->end_subject) break;
- if (*eptr == CHAR_LF) eptr++;
+ if (++Feptr >= mb->end_subject) break;
+ if (*Feptr == CHAR_LF) Feptr++;
}
else
{
- if (c != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
- (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
+ if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
+ (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
#if PCRE2_CODE_UNIT_WIDTH != 8
- && c != 0x2028 && c != 0x2029
+ && fc != 0x2028 && fc != 0x2029
#endif
))) break;
- eptr++;
+ Feptr++;
}
}
break;
case OP_NOT_HSPACE:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- switch(*eptr)
+ switch(*Feptr)
{
- default: eptr++; break;
+ default: Feptr++; break;
HSPACE_BYTE_CASES:
#if PCRE2_CODE_UNIT_WIDTH != 8
HSPACE_MULTIBYTE_CASES:
@@ -6125,37 +4607,37 @@ for (;;)
break;
case OP_HSPACE:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- switch(*eptr)
+ switch(*Feptr)
{
default: goto ENDLOOP01;
HSPACE_BYTE_CASES:
#if PCRE2_CODE_UNIT_WIDTH != 8
HSPACE_MULTIBYTE_CASES:
#endif
- eptr++; break;
+ Feptr++; break;
}
}
ENDLOOP01:
break;
case OP_NOT_VSPACE:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- switch(*eptr)
+ switch(*Feptr)
{
- default: eptr++; break;
+ default: Feptr++; break;
VSPACE_BYTE_CASES:
#if PCRE2_CODE_UNIT_WIDTH != 8
VSPACE_MULTIBYTE_CASES:
@@ -6167,251 +4649,1493 @@ for (;;)
break;
case OP_VSPACE:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- switch(*eptr)
+ switch(*Feptr)
{
default: goto ENDLOOP03;
VSPACE_BYTE_CASES:
#if PCRE2_CODE_UNIT_WIDTH != 8
VSPACE_MULTIBYTE_CASES:
#endif
- eptr++; break;
+ Feptr++; break;
}
}
ENDLOOP03:
break;
case OP_NOT_DIGIT:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_digit) != 0) break;
- eptr++;
+ if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
+ break;
+ Feptr++;
}
break;
case OP_DIGIT:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_digit) == 0) break;
- eptr++;
+ if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
+ break;
+ Feptr++;
}
break;
case OP_NOT_WHITESPACE:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_space) != 0) break;
- eptr++;
+ if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
+ break;
+ Feptr++;
}
break;
case OP_WHITESPACE:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_space) == 0) break;
- eptr++;
+ if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
+ break;
+ Feptr++;
}
break;
case OP_NOT_WORDCHAR:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- if (MAX_255(*eptr) && (mb->ctypes[*eptr] & ctype_word) != 0) break;
- eptr++;
+ if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
+ break;
+ Feptr++;
}
break;
case OP_WORDCHAR:
- for (i = min; i < max; i++)
+ for (i = Lmin; i < Lmax; i++)
{
- if (eptr >= mb->end_subject)
+ if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
- if (!MAX_255(*eptr) || (mb->ctypes[*eptr] & ctype_word) == 0) break;
- eptr++;
+ if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
+ break;
+ Feptr++;
}
break;
default:
- RRETURN(PCRE2_ERROR_INTERNAL);
+ return PCRE2_ERROR_INTERNAL;
}
- if (possessive) continue; /* No backtracking */
+ if (reptype == REPTYPE_POS) continue; /* No backtracking */
+
for (;;)
{
- if (eptr == pp) goto TAIL_RECURSE;
- RMATCH(eptr, ecode, offset_top, mb, eptrb, RM47);
+ if (Feptr == Lstart_eptr) break;
+ RMATCH(Fecode, RM34);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- eptr--;
- if (ctype == OP_ANYNL && eptr > pp && *eptr == CHAR_LF &&
- eptr[-1] == CHAR_CR) eptr--;
+ Feptr--;
+ if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF &&
+ Feptr[-1] == CHAR_CR) Feptr--;
+ }
+ }
+ }
+ break; /* End of repeat character type processing */
+
+#undef Lstart_eptr
+#undef Lmin
+#undef Lmax
+#undef Lctype
+#undef Lpropvalue
+
+
+ /* ===================================================================== */
+ /* Match a back reference, possibly repeatedly. Look past the end of the
+ item to see if there is repeat information following. The OP_REF and
+ OP_REFI opcodes are used for a reference to a numbered group or to a
+ non-duplicated named group. For a duplicated named group, OP_DNREF and
+ OP_DNREFI are used. In this case we must scan the list of groups to which
+ the name refers, and use the first one that is set. */
+
+#define Lmin F->temp_32[0]
+#define Lmax F->temp_32[1]
+#define Lcaseless F->temp_32[2]
+#define Lstart F->temp_sptr[0]
+#define Loffset F->temp_size
+
+ case OP_DNREF:
+ case OP_DNREFI:
+ Lcaseless = (Fop == OP_DNREFI);
+ {
+ int count = GET2(Fecode, 1+IMM2_SIZE);
+ PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
+ Fecode += 1 + 2*IMM2_SIZE;
+
+ while (count-- > 0)
+ {
+ Loffset = (GET2(slot, 0) << 1) - 2;
+ if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break;
+ slot += mb->name_entry_size;
+ }
+ }
+ goto REF_REPEAT;
+
+ case OP_REF:
+ case OP_REFI:
+ Lcaseless = (Fop == OP_REFI);
+ Loffset = (GET2(Fecode, 1) << 1) - 2;
+ Fecode += 1 + IMM2_SIZE;
+
+ /* Set up for repetition, or handle the non-repeated case. The maximum and
+ minimum must be in the heap frame, but as they are short-term values, we
+ use temporary fields. */
+
+ REF_REPEAT:
+ switch (*Fecode)
+ {
+ case OP_CRSTAR:
+ case OP_CRMINSTAR:
+ case OP_CRPLUS:
+ case OP_CRMINPLUS:
+ case OP_CRQUERY:
+ case OP_CRMINQUERY:
+ fc = *Fecode++ - OP_CRSTAR;
+ Lmin = rep_min[fc];
+ Lmax = rep_max[fc];
+ reptype = rep_typ[fc];
+ break;
+
+ case OP_CRRANGE:
+ case OP_CRMINRANGE:
+ Lmin = GET2(Fecode, 1);
+ Lmax = GET2(Fecode, 1 + IMM2_SIZE);
+ reptype = rep_typ[*Fecode - OP_CRSTAR];
+ if (Lmax == 0) Lmax = UINT32_MAX; /* Max 0 => infinity */
+ Fecode += 1 + 2 * IMM2_SIZE;
+ break;
+
+ default: /* No repeat follows */
+ {
+ rrc = match_ref(Loffset, Lcaseless, F, mb, &length);
+ if (rrc != 0)
+ {
+ if (rrc > 0) Feptr = mb->end_subject; /* Partial match */
+ CHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
}
}
+ Feptr += length;
+ continue; /* With the main loop */
+ }
+
+ /* Handle repeated back references. If a set group has length zero, just
+ continue with the main loop, because it matches however many times. For an
+ unset reference, if the minimum is zero, we can also just continue. We can
+ also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
+ group behave as a zero-length group. For any other unset cases, carrying
+ on will result in NOMATCH. */
+ if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET)
+ {
+ if (Fovector[Loffset] == Fovector[Loffset + 1]) continue;
+ }
+ else /* Group is not set */
+ {
+ if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
+ continue;
+ }
+
+ /* First, ensure the minimum number of matches are present. */
+
+ for (i = 1; i <= Lmin; i++)
+ {
+ PCRE2_SIZE slength;
+ rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
+ if (rrc != 0)
+ {
+ if (rrc > 0) Feptr = mb->end_subject; /* Partial match */
+ CHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ Feptr += slength;
+ }
+
+ /* If min = max, we are done. They are not both allowed to be zero. */
+
+ if (Lmin == Lmax) continue;
+
+ /* If minimizing, keep trying and advancing the pointer. */
+
+ if (reptype == REPTYPE_MIN)
+ {
+ for (;;)
+ {
+ PCRE2_SIZE slength;
+ RMATCH(Fecode, RM20);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+ rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
+ if (rrc != 0)
+ {
+ if (rrc > 0) Feptr = mb->end_subject; /* Partial match */
+ CHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ Feptr += slength;
+ }
/* Control never gets here */
}
- /* There's been some horrible disaster. Arrival here can only mean there is
- something seriously wrong in the code above or the OP_xxx definitions. */
+ /* If maximizing, find the longest string and work backwards, as long as
+ the matched lengths for each iteration are the same. */
- default:
- RRETURN(PCRE2_ERROR_INTERNAL);
- }
+ else
+ {
+ BOOL samelengths = TRUE;
+ Lstart = Feptr; /* Starting position */
+ Flength = Fovector[Loffset+1] - Fovector[Loffset];
- /* Do not stick any code in here without much thought; it is assumed
- that "continue" in the code above comes out to here to repeat the main
- loop. */
+ for (i = Lmin; i < Lmax; i++)
+ {
+ PCRE2_SIZE slength;
+ rrc = match_ref(Loffset, Lcaseless, F, mb, &slength);
+ if (rrc != 0)
+ {
+ /* Can't use CHECK_PARTIAL because we don't want to update Feptr in
+ the soft partial matching case. */
- } /* End of main loop */
-/* Control never reaches here */
+ if (rrc > 0 && mb->partial != 0 &&
+ mb->end_subject > mb->start_used_ptr)
+ {
+ mb->hitend = TRUE;
+ if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
+ }
+ break;
+ }
+ if (slength != Flength) samelengths = FALSE;
+ Feptr += slength;
+ }
-/* When compiling to use the heap rather than the stack for recursive calls to
-match(), the RRETURN() macro jumps here. The number that is saved in
-frame->Xwhere indicates which label we actually want to return to. */
+ /* If the length matched for each repetition is the same as the length of
+ the captured group, we can easily work backwards. This is the normal
+ case. However, in caseless UTF-8 mode there are pairs of case-equivalent
+ characters whose lengths (in terms of code units) differ. However, this
+ is very rare, so we handle it by re-matching fewer and fewer times. */
-#ifdef HEAP_MATCH_RECURSE
-#define LBL(val) case val: goto L_RM##val;
-HEAP_RETURN:
-switch (frame->Xwhere)
- {
- LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
- LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
- LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
- LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
- LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
- LBL(65) LBL(66) LBL(68)
-#ifdef SUPPORT_WIDE_CHARS
- LBL(20) LBL(21)
+ if (samelengths)
+ {
+ while (Feptr >= Lstart)
+ {
+ RMATCH(Fecode, RM21);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ Feptr -= Flength;
+ }
+ }
+
+ /* The rare case of non-matching lengths. Re-scan the repetition for each
+ iteration. We know that match_ref() will succeed every time. */
+
+ else
+ {
+ Lmax = i;
+ for (;;)
+ {
+ RMATCH(Fecode, RM22);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (Feptr == Lstart) break; /* Failed after minimal repetition */
+ Feptr = Lstart;
+ Lmax--;
+ for (i = Lmin; i < Lmax; i++)
+ {
+ PCRE2_SIZE slength;
+ (void)match_ref(Loffset, Lcaseless, F, mb, &slength);
+ Feptr += slength;
+ }
+ }
+ }
+
+ RRETURN(MATCH_NOMATCH);
+ }
+ /* Control never gets here */
+
+#undef Lcaseless
+#undef Lmin
+#undef Lmax
+#undef Lstart
+#undef Loffset
+
+
+
+/* ========================================================================= */
+/* Opcodes for the start of various parenthesized items */
+/* ========================================================================= */
+
+ /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the
+ (*THEN) is within the current branch by comparing the address of OP_THEN
+ that is passed back with the end of the branch. If (*THEN) is within the
+ current branch, and the branch is one of two or more alternatives (it
+ either starts or ends with OP_ALT), we have reached the limit of THEN's
+ action, so convert the return code to NOMATCH, which will cause normal
+ backtracking to happen from now on. Otherwise, THEN is passed back to an
+ outer alternative. This implements Perl's treatment of parenthesized
+ groups, where a group not containing | does not affect the current
+ alternative, that is, (X) is NOT the same as (X|(*F)). */
+
+
+ /* ===================================================================== */
+ /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive
+ bracket group, indicating that it may occur zero times. It may repeat
+ infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in
+ the pattern. Brackets with fixed upper repeat limits are compiled as a
+ number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO.
+ Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */
+
+#define Lnext_ecode F->temp_sptr[0]
+
+ case OP_BRAZERO:
+ Lnext_ecode = Fecode + 1;
+ RMATCH(Lnext_ecode, RM9);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
+ Fecode = Lnext_ecode + 1 + LINK_SIZE;
+ break;
+
+ case OP_BRAMINZERO:
+ Lnext_ecode = Fecode + 1;
+ do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
+ RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ Fecode++;
+ break;
+
+#undef Lnext_ecode
+
+ case OP_SKIPZERO:
+ Fecode++;
+ do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
+ Fecode += 1 + LINK_SIZE;
+ break;
+
+
+ /* ===================================================================== */
+ /* Handle possessive brackets with an unlimited repeat. The end of these
+ brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without
+ going further in the pattern. */
+
+#define Lframe_type F->temp_32[0]
+#define Lmatched_once F->temp_32[1]
+#define Lzero_allowed F->temp_32[2]
+#define Lstart_eptr F->temp_sptr[0]
+#define Lstart_group F->temp_sptr[1]
+
+ case OP_BRAPOSZERO:
+ Lzero_allowed = TRUE; /* Zero repeat is allowed */
+ Fecode += 1;
+ if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS)
+ goto POSSESSIVE_CAPTURE;
+ goto POSSESSIVE_NON_CAPTURE;
+
+ case OP_BRAPOS:
+ case OP_SBRAPOS:
+ Lzero_allowed = FALSE; /* Zero repeat not allowed */
+
+ POSSESSIVE_NON_CAPTURE:
+ Lframe_type = GF_NOCAPTURE; /* Remembered frame type */
+ goto POSSESSIVE_GROUP;
+
+ case OP_CBRAPOS:
+ case OP_SCBRAPOS:
+ Lzero_allowed = FALSE; /* Zero repeat not allowed */
+
+ POSSESSIVE_CAPTURE:
+ number = GET2(Fecode, 1+LINK_SIZE);
+ Lframe_type = GF_CAPTURE | number; /* Remembered frame type */
+
+ POSSESSIVE_GROUP:
+ Lmatched_once = FALSE; /* Never matched */
+ Lstart_group = Fecode; /* Start of this group */
+
+ for (;;)
+ {
+ Lstart_eptr = Feptr; /* Position at group start */
+ group_frame_type = Lframe_type;
+ RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8);
+ if (rrc == MATCH_KETRPOS)
+ {
+ Lmatched_once = TRUE; /* Matched at least once */
+ if (Feptr == Lstart_eptr) /* Empty match; skip to end */
+ {
+ do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
+ break;
+ }
+
+ Fecode = Lstart_group;
+ continue;
+ }
+
+ /* See comment above about handling THEN. */
+
+ if (rrc == MATCH_THEN)
+ {
+ PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
+ if (mb->verb_ecode_ptr < next_ecode &&
+ (*Fecode == OP_ALT || *next_ecode == OP_ALT))
+ rrc = MATCH_NOMATCH;
+ }
+
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ Fecode += GET(Fecode, 1);
+ if (*Fecode != OP_ALT) break;
+ }
+
+ /* Success if matched something or zero repeat allowed */
+
+ if (Lmatched_once || Lzero_allowed)
+ {
+ Fecode += 1 + LINK_SIZE;
+ break;
+ }
+
+ RRETURN(MATCH_NOMATCH);
+
+#undef Lmatched_once
+#undef Lzero_allowed
+#undef Lframe_type
+#undef Lstart_eptr
+#undef Lstart_group
+
+
+ /* ===================================================================== */
+ /* Handle non-capturing brackets that cannot match an empty string. When we
+ get to the final alternative within the brackets, as long as there are no
+ THEN's in the pattern, we can optimize by not recording a new backtracking
+ point. (Ideally we should test for a THEN within this group, but we don't
+ have that information.) Don't do this if we are at the very top level,
+ however, because that would make handling assertions and once-only brackets
+ messier when there is nothing to go back to. */
+
+#define Lframe_type F->temp_32[0] /* Set for all that use GROUPLOOP */
+#define Lnext_branch F->temp_sptr[0] /* Used only in OP_BRA handling */
+
+ case OP_BRA:
+ if (mb->hasthen || Frdepth == 0)
+ {
+ Lframe_type = 0;
+ goto GROUPLOOP;
+ }
+
+ for (;;)
+ {
+ Lnext_branch = Fecode + GET(Fecode, 1);
+ if (*Lnext_branch != OP_ALT) break;
+
+ /* This is never the final branch. We do not need to test for MATCH_THEN
+ here because this code is not used when there is a THEN in the pattern. */
+
+ RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ Fecode = Lnext_branch;
+ }
+
+ /* Hit the start of the final branch. Continue at this level. */
+
+ Fecode += PRIV(OP_lengths)[*Fecode];
+ break;
+
+#undef Lnext_branch
+
+
+ /* ===================================================================== */
+ /* Handle a capturing bracket, other than those that are possessive with an
+ unlimited repeat. */
+
+ case OP_CBRA:
+ case OP_SCBRA:
+ Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE);
+ goto GROUPLOOP;
+
+
+ /* ===================================================================== */
+ /* Atomic groups and non-capturing brackets that can match an empty string
+ must record a backtracking point and also set up a chained frame. */
+
+ case OP_ONCE:
+ case OP_SBRA:
+ Lframe_type = GF_NOCAPTURE | Fop;
+
+ GROUPLOOP:
+ for (;;)
+ {
+ group_frame_type = Lframe_type;
+ RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2);
+ if (rrc == MATCH_THEN)
+ {
+ PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
+ if (mb->verb_ecode_ptr < next_ecode &&
+ (*Fecode == OP_ALT || *next_ecode == OP_ALT))
+ rrc = MATCH_NOMATCH;
+ }
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ Fecode += GET(Fecode, 1);
+ if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
+ }
+ /* Control never reaches here. */
+
+#undef Lframe_type
+
+
+ /* ===================================================================== */
+ /* Recursion either matches the current regex, or some subexpression. The
+ offset data is the offset to the starting bracket from the start of the
+ whole pattern. (This is so that it works from duplicated subpatterns.) */
+
+#define Lframe_type F->temp_32[0]
+#define Lstart_branch F->temp_sptr[0]
+
+ case OP_RECURSE:
+ bracode = mb->start_code + GET(Fecode, 1);
+ number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
+
+ /* If we are already in a recursion, check for repeating the same one
+ without advancing the subject pointer. This should catch convoluted mutual
+ recursions. (Some simple cases are caught at compile time.) */
+
+ if (Fcurrent_recurse != RECURSE_UNSET)
+ {
+ offset = Flast_group_offset;
+ while (offset != PCRE2_UNSET)
+ {
+ N = (heapframe *)((char *)mb->match_frames + offset);
+ P = (heapframe *)((char *)N - frame_size);
+ if (N->group_frame_type == (GF_RECURSE | number))
+ {
+ if (Feptr == P->eptr) RRETURN(PCRE2_ERROR_RECURSELOOP);
+ break;
+ }
+ offset = P->last_group_offset;
+ }
+ }
+
+ /* Now run the recursion, branch by branch. */
+
+ Lstart_branch = bracode;
+ Lframe_type = GF_RECURSE | number;
+
+ for (;;)
+ {
+ PCRE2_SPTR next_ecode;
+
+ group_frame_type = Lframe_type;
+ RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11);
+ next_ecode = Lstart_branch + GET(Lstart_branch,1);
+
+ /* Handle backtracking verbs, which are defined in a range that can
+ easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to
+ escape beyond a recursion; they cause a NOMATCH for the entire recursion.
+
+ When one of these verbs triggers, the current recursion group number is
+ recorded. If it matches the recursion we are processing, the verb
+ happened within the recursion and we must deal with it. Otherwise it must
+ have happened after the recursion completed, and so has to be passed
+ back. See comment above about handling THEN. */
+
+ if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX &&
+ mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE))
+ {
+ if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode &&
+ (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT))
+ rrc = MATCH_NOMATCH;
+ else RRETURN(MATCH_NOMATCH);
+ }
+
+ /* Note that carrying on after (*ACCEPT) in a recursion is handled in the
+ OP_ACCEPT code. Nothing needs to be done here. */
+
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ Lstart_branch = next_ecode;
+ if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH);
+ }
+ /* Control never reaches here. */
+
+#undef Lframe_type
+#undef Lstart_branch
+
+
+ /* ===================================================================== */
+ /* Positive assertions are like other groups except that PCRE doesn't allow
+ the effect of (*THEN) to escape beyond an assertion; it is therefore
+ treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
+ captures retained. Any other return is an error. */
+
+#define Lframe_type F->temp_32[0]
+
+ case OP_ASSERT:
+ case OP_ASSERTBACK:
+ Lframe_type = GF_NOCAPTURE | Fop;
+ for (;;)
+ {
+ group_frame_type = Lframe_type;
+ RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3);
+ if (rrc == MATCH_ACCEPT)
+ {
+ memcpy(Fovector,
+ (char *)assert_accept_frame + offsetof(heapframe, ovector),
+ assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
+ Foffset_top = assert_accept_frame->offset_top;
+ break;
+ }
+ if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
+ Fecode += GET(Fecode, 1);
+ if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
+ }
+
+ do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
+ Fecode += 1 + LINK_SIZE;
+ break;
+
+#undef Lframe_type
+
+
+ /* ===================================================================== */
+ /* Handle negative assertions. Loop for each non-matching branch as for
+ positive assertions. */
+
+#define Lframe_type F->temp_32[0]
+
+ case OP_ASSERT_NOT:
+ case OP_ASSERTBACK_NOT:
+ Lframe_type = GF_NOCAPTURE | Fop;
+
+ for (;;)
+ {
+ group_frame_type = Lframe_type;
+ RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4);
+ switch(rrc)
+ {
+ case MATCH_ACCEPT: /* Assertion matched, therefore it fails. */
+ case MATCH_MATCH:
+ RRETURN (MATCH_NOMATCH);
+
+ case MATCH_NOMATCH: /* Branch failed, try next if present. */
+ case MATCH_THEN:
+ Fecode += GET(Fecode, 1);
+ if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED;
+ break;
+
+ case MATCH_COMMIT: /* Assertion forced to fail, therefore continue. */
+ case MATCH_SKIP:
+ case MATCH_PRUNE:
+ do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
+ goto ASSERT_NOT_FAILED;
+
+ default: /* Pass back any other return */
+ RRETURN(rrc);
+ }
+ }
+
+ /* None of the branches have matched or there was a backtrack to (*COMMIT),
+ (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a
+ negative assertion, so carry on. */
+
+ ASSERT_NOT_FAILED:
+ Fecode += 1 + LINK_SIZE;
+ break;
+
+#undef Lframe_type
+
+
+ /* ===================================================================== */
+ /* The callout item calls an external function, if one is provided, passing
+ details of the match so far. This is mainly for debugging, though the
+ function is able to force a failure. */
+
+ case OP_CALLOUT:
+ case OP_CALLOUT_STR:
+ rrc = do_callout(F, mb, &length);
+ if (rrc > 0) RRETURN(MATCH_NOMATCH);
+ if (rrc < 0) RRETURN(rrc);
+ Fecode += length;
+ break;
+
+
+ /* ===================================================================== */
+ /* Conditional group: compilation checked that there are no more than two
+ branches. If the condition is false, skipping the first branch takes us
+ past the end of the item if there is only one branch, but that's exactly
+ what we want. */
+
+ case OP_COND:
+ case OP_SCOND:
+
+ /* The variable Flength will be added to Fecode when the condition is
+ false, to get to the second branch. Setting it to the offset to the ALT or
+ KET, then incrementing Fecode achieves this effect. However, if the second
+ branch is non-existent, we must point to the KET so that the end of the
+ group is correctly processed. We now have Fecode pointing to the condition
+ or callout. */
+
+ Flength = GET(Fecode, 1); /* Offset to the second branch */
+ if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE;
+ Fecode += 1 + LINK_SIZE; /* From this opcode */
+
+ /* Because of the way auto-callout works during compile, a callout item is
+ inserted between OP_COND and an assertion condition. Such a callout can
+ also be inserted manually. */
+
+ if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR)
+ {
+ rrc = do_callout(F, mb, &length);
+ if (rrc > 0) RRETURN(MATCH_NOMATCH);
+ if (rrc < 0) RRETURN(rrc);
+
+ /* Advance Fecode past the callout, so it now points to the condition. We
+ must adjust Flength so that the value of Fecode+Flength is unchanged. */
+
+ Fecode += length;
+ Flength -= length;
+ }
+
+ /* Test the various possible conditions */
+
+ condition = FALSE;
+ switch(*Fecode)
+ {
+ case OP_RREF: /* Group recursion test */
+ if (Fcurrent_recurse != RECURSE_UNSET)
+ {
+ number = GET2(Fecode, 1);
+ condition = (number == RREF_ANY || number == Fcurrent_recurse);
+ }
+ break;
+
+ case OP_DNRREF: /* Duplicate named group recursion test */
+ if (Fcurrent_recurse != RECURSE_UNSET)
+ {
+ int count = GET2(Fecode, 1 + IMM2_SIZE);
+ PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
+ while (count-- > 0)
+ {
+ number = GET2(slot, 0);
+ condition = number == Fcurrent_recurse;
+ if (condition) break;
+ slot += mb->name_entry_size;
+ }
+ }
+ break;
+
+ case OP_CREF: /* Numbered group used test */
+ offset = (GET2(Fecode, 1) << 1) - 2; /* Doubled ref number */
+ condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
+ break;
+
+ case OP_DNCREF: /* Duplicate named group used test */
+ {
+ int count = GET2(Fecode, 1 + IMM2_SIZE);
+ PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
+ while (count-- > 0)
+ {
+ offset = (GET2(slot, 0) << 1) - 2;
+ condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
+ if (condition) break;
+ slot += mb->name_entry_size;
+ }
+ }
+ break;
+
+ case OP_FALSE:
+ case OP_FAIL: /* The assertion (?!) becomes OP_FAIL */
+ break;
+
+ case OP_TRUE:
+ condition = TRUE;
+ break;
+
+ /* The condition is an assertion. Run code similar to the assertion code
+ above. */
+
+#define Lpositive F->temp_32[0]
+#define Lstart_branch F->temp_sptr[0]
+
+ default:
+ Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK);
+ Lstart_branch = Fecode;
+
+ for (;;)
+ {
+ group_frame_type = GF_CONDASSERT | *Fecode;
+ RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5);
+
+ switch(rrc)
+ {
+ case MATCH_ACCEPT: /* Save captures */
+ memcpy(Fovector,
+ (char *)assert_accept_frame + offsetof(heapframe, ovector),
+ assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
+ Foffset_top = assert_accept_frame->offset_top;
+
+ /* Fall through */
+ /* In the case of a match, the captures have already been put into
+ the current frame. */
+
+ case MATCH_MATCH:
+ condition = Lpositive; /* TRUE for positive assertion */
+ break;
+
+ /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
+ assertion; it is therefore always treated as NOMATCH. */
+
+ case MATCH_NOMATCH:
+ case MATCH_THEN:
+ Lstart_branch += GET(Lstart_branch, 1);
+ if (*Lstart_branch == OP_ALT) continue; /* Try next branch */
+ condition = !Lpositive; /* TRUE for negative assertion */
+ break;
+
+ /* These force no match without checking other branches. */
+
+ case MATCH_COMMIT:
+ case MATCH_SKIP:
+ case MATCH_PRUNE:
+ condition = !Lpositive;
+ break;
+
+ default:
+ RRETURN(rrc);
+ }
+ break; /* Out of the branch loop */
+ }
+
+ /* If the condition is true, find the end of the assertion so that
+ advancing past it gets us to the start of the first branch. */
+
+ if (condition)
+ {
+ do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
+ }
+ break; /* End of assertion condition */
+ }
+
+#undef Lpositive
+#undef Lstart_branch
+
+ /* Choose branch according to the condition. */
+
+ Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength;
+
+ /* If the opcode is OP_SCOND it means we are at a repeated conditional
+ group that might match an empty string. We must therefore descend a level
+ so that the start is remembered for checking. For OP_COND we can just
+ continue at this level. */
+
+ if (Fop == OP_SCOND)
+ {
+ group_frame_type = GF_NOCAPTURE | Fop;
+ RMATCH(Fecode, RM35);
+ RRETURN(rrc);
+ }
+ break;
+
+
+
+/* ========================================================================= */
+/* End of start of parenthesis opcodes */
+/* ========================================================================= */
+
+
+ /* ===================================================================== */
+ /* Move the subject pointer back. This occurs only at the start of each
+ branch of a lookbehind assertion. If we are too close to the start to move
+ back, fail. When working with UTF-8 we move back a number of characters,
+ not bytes. */
+
+ case OP_REVERSE:
+ number = GET(Fecode, 1);
+#ifdef SUPPORT_UNICODE
+ if (utf)
+ {
+ while (number-- > 0)
+ {
+ if (Feptr <= mb->start_subject) RRETURN(MATCH_NOMATCH);
+ Feptr--;
+ BACKCHAR(Feptr);
+ }
+ }
+ else
#endif
+
+ /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
+
+ {
+ if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
+ Feptr -= number;
+ }
+
+ /* Save the earliest consulted character, then skip to next op code */
+
+ if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
+ Fecode += 1 + LINK_SIZE;
+ break;
+
+
+ /* ===================================================================== */
+ /* An alternation is the end of a branch; scan along to find the end of the
+ bracketed group. */
+
+ case OP_ALT:
+ do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
+ break;
+
+
+ /* ===================================================================== */
+ /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
+ starting frame was added to the chained frames in order to remember the
+ starting subject position for the group. */
+
+ case OP_KET:
+ case OP_KETRMIN:
+ case OP_KETRMAX:
+ case OP_KETRPOS:
+
+ bracode = Fecode - GET(Fecode, 1);
+
+ /* Point N to the frame at the start of the most recent group.
+ Remember the subject pointer at the start of the group. */
+
+ if (*bracode != OP_BRA && *bracode != OP_COND)
+ {
+ N = (heapframe *)((char *)mb->match_frames + Flast_group_offset);
+ P = (heapframe *)((char *)N - frame_size);
+ Flast_group_offset = P->last_group_offset;
+
+#ifdef DEBUG_SHOW_RMATCH
+ fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n",
+ N->rdepth, N->group_frame_type,
+ (char *)P->eptr - (char *)mb->start_subject);
+#endif
+
+ /* If we are at the end of an assertion that is a condition, return a
+ match, discarding any intermediate backtracking points. Copy back the
+ captures into the frame before N so that they are set on return. Doing
+ this for all assertions, both positive and negative, seems to match what
+ Perl does. */
+
+ if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
+ {
+ memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
+ Foffset_top * sizeof(PCRE2_SIZE));
+ P->offset_top = Foffset_top;
+ Fback_frame = (char *)F - (char *)P;
+ RRETURN(MATCH_MATCH);
+ }
+ }
+ else P = NULL; /* Indicates starting frame not recorded */
+
+ /* The group was not a conditional assertion. */
+
+ switch (*bracode)
+ {
+ case OP_BRA: /* No need to do anything for these */
+ case OP_COND:
+ case OP_SCOND:
+ break;
+
+ /* Positive assertions are like OP_ONCE, except that in addition the
+ subject pointer must be put back to where it was at the start of the
+ assertion. */
+
+ case OP_ASSERT:
+ case OP_ASSERTBACK:
+ if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
+ Feptr = P->eptr;
+ /* Fall through */
+
+ /* For an atomic group, discard internal backtracking points. We must
+ also ensure that any remaining branches within the top-level of the group
+ are not tried. Do this by adjusting the code pointer within the backtrack
+ frame so that it points to the final branch. */
+
+ case OP_ONCE:
+ Fback_frame = ((char *)F - (char *)P) + frame_size;
+ for (;;)
+ {
+ uint32_t y = GET(P->ecode,1);
+ if ((P->ecode)[y] != OP_ALT) break;
+ P->ecode += y;
+ }
+ break;
+
+ /* A matching negative assertion returns MATCH, which is turned into
+ NOMATCH at the assertion level. */
+
+ case OP_ASSERT_NOT:
+ case OP_ASSERTBACK_NOT:
+ RRETURN(MATCH_MATCH);
+
+ /* Whole-pattern recursion is coded as a recurse into group 0, so it
+ won't be picked up here. Instead, we catch it when the OP_END is reached.
+ Other recursion is handled here. */
+
+ case OP_CBRA:
+ case OP_CBRAPOS:
+ case OP_SCBRA:
+ case OP_SCBRAPOS:
+ number = GET2(bracode, 1+LINK_SIZE);
+
+ /* Handle a recursively called group. We reinstate the previous set of
+ captures and then carry on after the recursion call. */
+
+ if (Fcurrent_recurse == number)
+ {
+ P = (heapframe *)((char *)N - frame_size);
+ memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
+ P->offset_top * sizeof(PCRE2_SIZE));
+ Foffset_top = P->offset_top;
+ Fcapture_last = P->capture_last;
+ Fcurrent_recurse = P->current_recurse;
+ Fecode = P->ecode + 1 + LINK_SIZE;
+ continue; /* With next opcode */
+ }
+
+ /* Deal with actual capturing. */
+
+ offset = (number << 1) - 2;
+ Fcapture_last = number;
+ Fovector[offset] = P->eptr - mb->start_subject;
+ Fovector[offset+1] = Feptr - mb->start_subject;
+ if (offset >= Foffset_top) Foffset_top = offset + 2;
+ break;
+ } /* End actions relating to the starting opcode */
+
+ /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
+ and return the MATCH_KETRPOS. This makes it possible to do the repeats one
+ at a time from the outer level. This must precede the empty string test -
+ in this case that test is done at the outer level. */
+
+ if (*Fecode == OP_KETRPOS)
+ {
+ memcpy((char *)P + offsetof(heapframe, eptr),
+ (char *)F + offsetof(heapframe, eptr),
+ frame_copy_size);
+ RRETURN(MATCH_KETRPOS);
+ }
+
+ /* Handle the different kinds of closing brackets. A non-repeating ket
+ needs no special action, just continuing at this level. This also happens
+ for the repeating kets if the group matched no characters, in order to
+ forcibly break infinite loops. Otherwise, the repeating kets try the rest
+ of the pattern or restart from the preceding bracket, in the appropriate
+ order. */
+
+ if (Fop != OP_KET && (P == NULL || Feptr != P->eptr))
+ {
+ if (Fop == OP_KETRMIN)
+ {
+ RMATCH(Fecode + 1 + LINK_SIZE, RM6);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ Fecode -= GET(Fecode, 1);
+ break; /* End of ket processing */
+ }
+
+ /* Repeat the maximum number of times (KETRMAX) */
+
+ RMATCH(bracode, RM7);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ }
+
+ /* Carry on at this level for a non-repeating ket, or after matching an
+ empty string, or after repeating for a maximum number of times. */
+
+ Fecode += 1 + LINK_SIZE;
+ break;
+
+
+ /* ===================================================================== */
+ /* Start and end of line assertions, not multiline mode. */
+
+ case OP_CIRC: /* Start of line, unless PCRE2_NOTBOL is set. */
+ if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0)
+ RRETURN(MATCH_NOMATCH);
+ Fecode++;
+ break;
+
+ case OP_SOD: /* Unconditional start of subject */
+ if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
+ Fecode++;
+ break;
+
+ /* When PCRE2_NOTEOL is unset, assert before the subject end, or a
+ terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */
+
+ case OP_DOLL:
+ if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
+ if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
+
+ /* Fall through */
+ /* Unconditional end of subject assertion (\z) */
+
+ case OP_EOD:
+ if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
+ SCHECK_PARTIAL();
+ Fecode++;
+ break;
+
+ /* End of subject or ending \n assertion (\Z) */
+
+ case OP_EODN:
+ ASSERT_NL_OR_EOS:
+ if (Feptr < mb->end_subject &&
+ (!IS_NEWLINE(Feptr) || Feptr != mb->end_subject - mb->nllen))
+ {
+ if (mb->partial != 0 &&
+ Feptr + 1 >= mb->end_subject &&
+ NLBLOCK->nltype == NLTYPE_FIXED &&
+ NLBLOCK->nllen == 2 &&
+ UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
+ {
+ mb->hitend = TRUE;
+ if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
+ }
+ RRETURN(MATCH_NOMATCH);
+ }
+
+ /* Either at end of string or \n before end. */
+
+ SCHECK_PARTIAL();
+ Fecode++;
+ break;
+
+
+ /* ===================================================================== */
+ /* Start and end of line assertions, multiline mode. */
+
+ /* Start of subject unless notbol, or after any newline except for one at
+ the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
+
+ case OP_CIRCM:
+ if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject)
+ RRETURN(MATCH_NOMATCH);
+ if (Feptr != mb->start_subject &&
+ ((Feptr == mb->end_subject &&
+ (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
+ !WAS_NEWLINE(Feptr)))
+ RRETURN(MATCH_NOMATCH);
+ Fecode++;
+ break;
+
+ /* Assert before any newline, or before end of subject unless noteol is
+ set. */
+
+ case OP_DOLLM:
+ if (Feptr < mb->end_subject)
+ {
+ if (!IS_NEWLINE(Feptr))
+ {
+ if (mb->partial != 0 &&
+ Feptr + 1 >= mb->end_subject &&
+ NLBLOCK->nltype == NLTYPE_FIXED &&
+ NLBLOCK->nllen == 2 &&
+ UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
+ {
+ mb->hitend = TRUE;
+ if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
+ }
+ RRETURN(MATCH_NOMATCH);
+ }
+ }
+ else
+ {
+ if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
+ SCHECK_PARTIAL();
+ }
+ Fecode++;
+ break;
+
+
+ /* ===================================================================== */
+ /* Start of match assertion */
+
+ case OP_SOM:
+ if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
+ Fecode++;
+ break;
+
+
+ /* ===================================================================== */
+ /* Reset the start of match point */
+
+ case OP_SET_SOM:
+ Fstart_match = Feptr;
+ Fecode++;
+ break;
+
+
+ /* ===================================================================== */
+ /* Word boundary assertions. Find out if the previous and current
+ characters are "word" characters. It takes a bit more work in UTF mode.
+ Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is
+ not set. When it is set, use Unicode properties if available, even when not
+ in UTF mode. Remember the earliest and latest consulted characters. */
+
+ case OP_NOT_WORD_BOUNDARY:
+ case OP_WORD_BOUNDARY:
+ if (Feptr == mb->start_subject) prev_is_word = FALSE; else
+ {
+ PCRE2_SPTR lastptr = Feptr - 1;
#ifdef SUPPORT_UNICODE
- LBL(16) LBL(18)
- LBL(22) LBL(23) LBL(28) LBL(30)
- LBL(32) LBL(34) LBL(42) LBL(46)
- LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
- LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
+ if (utf)
+ {
+ BACKCHAR(lastptr);
+ GETCHAR(fc, lastptr);
+ }
+ else
#endif /* SUPPORT_UNICODE */
- default:
- return PCRE2_ERROR_INTERNAL;
- }
-#undef LBL
-#endif /* HEAP_MATCH_RECURSE */
-}
+ fc = *lastptr;
+ if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
+#ifdef SUPPORT_UNICODE
+ if ((mb->poptions & PCRE2_UCP) != 0)
+ {
+ if (fc == '_') prev_is_word = TRUE; else
+ {
+ int cat = UCD_CATEGORY(fc);
+ prev_is_word = (cat == ucp_L || cat == ucp_N);
+ }
+ }
+ else
+#endif /* SUPPORT_UNICODE */
+ prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
+ }
+ /* Get status of next character */
-/***************************************************************************
-****************************************************************************
- RECURSION IN THE match() FUNCTION
+ if (Feptr >= mb->end_subject)
+ {
+ SCHECK_PARTIAL();
+ cur_is_word = FALSE;
+ }
+ else
+ {
+ PCRE2_SPTR nextptr = Feptr + 1;
+#ifdef SUPPORT_UNICODE
+ if (utf)
+ {
+ FORWARDCHARTEST(nextptr, mb->end_subject);
+ GETCHAR(fc, Feptr);
+ }
+ else
+#endif /* SUPPORT_UNICODE */
+ fc = *Feptr;
+ if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
+#ifdef SUPPORT_UNICODE
+ if ((mb->poptions & PCRE2_UCP) != 0)
+ {
+ if (fc == '_') cur_is_word = TRUE; else
+ {
+ int cat = UCD_CATEGORY(fc);
+ cur_is_word = (cat == ucp_L || cat == ucp_N);
+ }
+ }
+ else
+#endif /* SUPPORT_UNICODE */
+ cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
+ }
-Undefine all the macros that were defined above to handle this. */
+ /* Now see if the situation is what we want */
-#ifdef HEAP_MATCH_RECURSE
-#undef eptr
-#undef ecode
-#undef mstart
-#undef offset_top
-#undef eptrb
-#undef flags
+ if ((*Fecode++ == OP_WORD_BOUNDARY)?
+ cur_is_word == prev_is_word : cur_is_word != prev_is_word)
+ RRETURN(MATCH_NOMATCH);
+ break;
-#undef callpat
-#undef charptr
-#undef data
-#undef next_ecode
-#undef pp
-#undef prev
-#undef saved_eptr
-#undef new_recursive
+ /* ===================================================================== */
+ /* Backtracking (*VERB)s, with and without arguments. Note that if the
+ pattern is successfully matched, we do not come back from RMATCH. */
-#undef cur_is_word
-#undef condition
-#undef prev_is_word
+ case OP_MARK:
+ Fmark = mb->nomatch_mark = Fecode + 2;
+ RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12);
-#undef ctype
-#undef length
-#undef max
-#undef min
-#undef number
-#undef offset
-#undef op
-#undef save_capture_last
-#undef save_offset1
-#undef save_offset2
-#undef save_offset3
+ /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
+ argument, and we must check whether that argument matches this MARK's
+ argument. It is passed back in mb->verb_skip_ptr. If it does match, we
+ return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject
+ position that corresponds to this mark. Otherwise, pass back the return
+ code unaltered. */
-#undef newptrb
-#endif /* HEAP_MATCH_RECURSE */
+ if (rrc == MATCH_SKIP_ARG &&
+ PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0)
+ {
+ mb->verb_skip_ptr = Feptr; /* Pass back current position */
+ RRETURN(MATCH_SKIP);
+ }
+ RRETURN(rrc);
-/* These two are defined as macros in both cases */
+ case OP_FAIL:
+ RRETURN(MATCH_NOMATCH);
-#undef fc
-#undef fi
+ /* Record the current recursing group number in mb->verb_current_recurse
+ when a backtracking return such as MATCH_COMMIT is given. This enables the
+ recurse processing to catch verbs from within the recursion. */
-/***************************************************************************
-***************************************************************************/
+ case OP_COMMIT:
+ RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ mb->verb_current_recurse = Fcurrent_recurse;
+ RRETURN(MATCH_COMMIT);
+ case OP_PRUNE:
+ RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ mb->verb_current_recurse = Fcurrent_recurse;
+ RRETURN(MATCH_PRUNE);
-#ifdef HEAP_MATCH_RECURSE
-/*************************************************
-* Release allocated heap frames *
-*************************************************/
+ case OP_PRUNE_ARG:
+ Fmark = mb->nomatch_mark = Fecode + 2;
+ RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ mb->verb_current_recurse = Fcurrent_recurse;
+ RRETURN(MATCH_PRUNE);
-/* This function releases all the allocated frames. The base frame is on the
-machine stack, and so must not be freed.
+ case OP_SKIP:
+ RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ mb->verb_skip_ptr = Feptr; /* Pass back current position */
+ mb->verb_current_recurse = Fcurrent_recurse;
+ RRETURN(MATCH_SKIP);
-Argument:
- frame_base the address of the base frame
- mb the match block
+ /* Note that, for Perl compatibility, SKIP with an argument does NOT set
+ nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
+ not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
+ that failed and any that precede it (either they also failed, or were not
+ triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
+ SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
+ set to the count of the one that failed. */
-Returns: nothing
-*/
+ case OP_SKIP_ARG:
+ mb->skip_arg_count++;
+ if (mb->skip_arg_count <= mb->ignore_skip_arg)
+ {
+ Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1];
+ break;
+ }
+ RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-static void
-release_match_heapframes (heapframe *frame_base, match_block *mb)
-{
-heapframe *nextframe = frame_base->Xnextframe;
-while (nextframe != NULL)
+ /* Pass back the current skip name and return the special MATCH_SKIP_ARG
+ return code. This will either be caught by a matching MARK, or get to the
+ top, where it causes a rematch with mb->ignore_skip_arg set to the value of
+ mb->skip_arg_count. */
+
+ mb->verb_skip_ptr = Fecode + 2;
+ mb->verb_current_recurse = Fcurrent_recurse;
+ RRETURN(MATCH_SKIP_ARG);
+
+ /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
+ the branch in which it occurs can be determined. */
+
+ case OP_THEN:
+ RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ mb->verb_ecode_ptr = Fecode;
+ mb->verb_current_recurse = Fcurrent_recurse;
+ RRETURN(MATCH_THEN);
+
+ case OP_THEN_ARG:
+ Fmark = mb->nomatch_mark = Fecode + 2;
+ RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ mb->verb_ecode_ptr = Fecode;
+ mb->verb_current_recurse = Fcurrent_recurse;
+ RRETURN(MATCH_THEN);
+
+
+ /* ===================================================================== */
+ /* There's been some horrible disaster. Arrival here can only mean there is
+ something seriously wrong in the code above or the OP_xxx definitions. */
+
+ default:
+ return PCRE2_ERROR_INTERNAL;
+ }
+
+ /* Do not insert any code in here without much thought; it is assumed
+ that "continue" in the code above comes out to here to repeat the main
+ loop. */
+
+ } /* End of main loop */
+/* Control never reaches here */
+
+
+/* ========================================================================= */
+/* The RRETURN() macro jumps here. The number that is saved in Freturn_id
+indicates which label we actually want to return to. The value in Frdepth is
+the index number of the frame in the vector. The return value has been placed
+in rrc. */
+
+#define LBL(val) case val: goto L_RM##val;
+
+RETURN_SWITCH:
+if (Frdepth == 0) return rrc; /* Exit from the top level */
+F = (heapframe *)((char *)F - Fback_frame); /* Back track */
+
+#ifdef DEBUG_SHOW_RMATCH
+fprintf(stderr, "++ RETURN %d to %d\n", rrc, Freturn_id);
+#endif
+
+switch (Freturn_id)
{
- heapframe *oldframe = nextframe;
- nextframe = nextframe->Xnextframe;
- mb->stack_memctl.free(oldframe, mb->stack_memctl.memory_data);
+ LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
+ LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
+ LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
+ LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
+ LBL(33) LBL(34) LBL(35)
+
+#ifdef SUPPORT_WIDE_CHARS
+ LBL(100) LBL(101)
+#endif
+
+#ifdef SUPPORT_UNICODE
+ LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
+ LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
+ LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
+ LBL(221) LBL(222)
+#endif
+
+ default:
+ return PCRE2_ERROR_INTERNAL;
}
+#undef LBL
}
-#endif /* HEAP_MATCH_RECURSE */
-
/*************************************************
@@ -6444,8 +6168,6 @@ pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
pcre2_match_context *mcontext)
{
int rc;
-int ocount;
-
const uint8_t *start_bits = NULL;
const pcre2_real_code *re = (const pcre2_real_code *)code;
@@ -6455,7 +6177,6 @@ BOOL firstline;
BOOL has_first_cu = FALSE;
BOOL has_req_cu = FALSE;
BOOL startline;
-BOOL using_temporary_offsets = FALSE;
BOOL utf;
PCRE2_UCHAR first_cu = 0;
@@ -6470,18 +6191,21 @@ PCRE2_SPTR req_cu_ptr = start_match - 1;
PCRE2_SPTR start_partial = NULL;
PCRE2_SPTR match_partial = NULL;
-/* We need to have mb pointing to a match block, because the IS_NEWLINE macro
-is used below, and it expects NLBLOCK to be defined as a pointer. */
+PCRE2_SIZE frame_size;
+
+/* We need to have mb as a pointer to a match block, because the IS_NEWLINE
+macro is used below, and it expects NLBLOCK to be defined as a pointer. */
match_block actual_match_block;
match_block *mb = &actual_match_block;
-#ifdef HEAP_MATCH_RECURSE
-heapframe frame_zero;
-frame_zero.Xprevframe = NULL; /* Marks the top level */
-frame_zero.Xnextframe = NULL; /* None are allocated yet */
-mb->match_frames_base = &frame_zero;
-#endif
+/* Allocate an initial vector of backtracking frames on the stack. If this
+proves to be too small, it is replaced by a larger one on the heap. To get a
+vector of the size required that is aligned for pointers, allocate it as a
+vector of pointers. */
+
+PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)];
+mb->stack_frames = (heapframe *)stack_frames_vector;
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
subject string. */
@@ -6510,8 +6234,8 @@ options variable for this function. Users of PCRE2 who are not calling the
function directly would like to have a way of setting these flags, in the same
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
-(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
-transferred to the options for this function. The bits are guaranteed to be
+(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
+transfer to the options for this function. The bits are guaranteed to be
adjacent, but do not have the same values. This bit of Boolean trickery assumes
that the match-time bits are not more significant than the flag bits. If by
accident this is not the case, a compile-time division by zero error will
@@ -6523,20 +6247,22 @@ options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
#undef FF
#undef OO
-/* A NULL match context means "use a default context" */
-
-if (mcontext == NULL)
- mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
-
/* These two settings are used in the code for checking a UTF string that
follows immediately afterwards. Other values in the mb block are used only
-during interpretive pcre_match() processing, not when the JIT support is in
-use, so they are set up later. */
+during interpretive processing, not when the JIT support is in use, so they are
+set up later. */
utf = (re->overall_options & PCRE2_UTF) != 0;
mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
+/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
+time. */
+
+if (mb->partial != 0 &&
+ ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
+ return PCRE2_ERROR_BADOPTION;
+
/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
we must also check that a starting offset does not point into the middle of a
multiunit character. We check only the portion of the subject that is going to
@@ -6595,7 +6321,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
/* It is an error to set an offset limit without setting the flag at compile
time. */
-if (mcontext->offset_limit != PCRE2_UNSET &&
+if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
(re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
return PCRE2_ERROR_BADOFFSETLIMIT;
@@ -6614,7 +6340,15 @@ if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
}
#endif
-/* Carry on with non-JIT matching. */
+/* Carry on with non-JIT matching. A NULL match context means "use a default
+context", but we take the memory control functions from the pattern. */
+
+if (mcontext == NULL)
+ {
+ mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
+ mb->memctl = re->memctl;
+ }
+else mb->memctl = mcontext->memctl;
anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
@@ -6622,14 +6356,10 @@ startline = (re->flags & PCRE2_STARTLINE) != 0;
bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
end_subject : subject + mcontext->offset_limit;
-/* Fill in the fields in the match block. */
+/* Fill in the remaining fields in the match block. */
mb->callout = mcontext->callout;
mb->callout_data = mcontext->callout_data;
-mb->memctl = mcontext->memctl;
-#ifdef HEAP_MATCH_RECURSE
-mb->stack_memctl = mcontext->stack_memctl;
-#endif
mb->start_subject = subject;
mb->start_offset = start_offset;
@@ -6641,8 +6371,6 @@ mb->poptions = re->overall_options; /* Pattern options */
mb->ignore_skip_arg = 0;
mb->mark = mb->nomatch_mark = NULL; /* In case never set */
-mb->recursive = NULL; /* No recursion at top level */
-mb->ovecsave_chain = NULL; /* No ovecsave blocks yet */
mb->hitend = FALSE;
/* The name table is needed for finding all the numbers associated with a
@@ -6653,20 +6381,6 @@ mb->name_count = re->name_count;
mb->name_entry_size = re->name_entry_size;
mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
-/* Limits set in the pattern override the match context only if they are
-smaller. */
-
-mb->match_limit = (mcontext->match_limit < re->limit_match)?
- mcontext->match_limit : re->limit_match;
-mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
- mcontext->recursion_limit : re->limit_recursion;
-
-/* Pointers to the individual character tables */
-
-mb->lcc = re->tables + lcc_offset;
-mb->fcc = re->tables + fcc_offset;
-mb->ctypes = re->tables + ctypes_offset;
-
/* Process the \R and newline settings. */
mb->bsr_convention = re->bsr_convention;
@@ -6683,6 +6397,11 @@ switch(re->newline_convention)
mb->nl[0] = CHAR_NL;
break;
+ case PCRE2_NEWLINE_NUL:
+ mb->nllen = 1;
+ mb->nl[0] = CHAR_NUL;
+ break;
+
case PCRE2_NEWLINE_CRLF:
mb->nllen = 2;
mb->nl[0] = CHAR_CR;
@@ -6700,71 +6419,91 @@ switch(re->newline_convention)
default: return PCRE2_ERROR_INTERNAL;
}
-/* If the expression has got more back references than the offsets supplied can
-hold, we get a temporary chunk of memory to use during the matching. Otherwise,
-we can use the vector supplied. The size of the ovector is three times the
-value in the oveccount field. Two-thirds of it is pairs for storing matching
-offsets, and the top third is working space. */
+/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
+vector at the end, whose size depends on the number of capturing parentheses in
+the pattern. It is not used at all if there are no capturing parentheses.
+
+ frame_size is the total size of each frame
+ mb->frame_vector_size is the total usable size of the vector (rounded down
+ to a whole number of frames)
+
+The last of these is changed within the match() function if the frame vector
+has to be expanded. We therefore put it into the match block so that it is
+correct when calling match() more than once for non-anchored patterns. */
+
+frame_size = offsetof(heapframe, ovector) +
+ re->top_bracket * 2 * sizeof(PCRE2_SIZE);
+
+/* Limits set in the pattern override the match context only if they are
+smaller. */
+
+mb->heap_limit = (mcontext->heap_limit < re->limit_heap)?
+ mcontext->heap_limit : re->limit_heap;
+
+mb->match_limit = (mcontext->match_limit < re->limit_match)?
+ mcontext->match_limit : re->limit_match;
+
+mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
+ mcontext->depth_limit : re->limit_depth;
+
+/* If a pattern has very many capturing parentheses, the frame size may be very
+large. Ensure that there are at least 10 available frames by getting an initial
+vector on the heap if necessary, except when the heap limit prevents this. Get
+fewer if possible. (The heap limit is in kilobytes.) */
-if (re->top_backref >= match_data->oveccount)
+if (frame_size <= START_FRAMES_SIZE/10)
{
- ocount = re->top_backref * 3 + 3;
- mb->ovector = (PCRE2_SIZE *)(mb->memctl.malloc(ocount * sizeof(PCRE2_SIZE),
- mb->memctl.memory_data));
- if (mb->ovector == NULL) return PCRE2_ERROR_NOMEMORY;
- using_temporary_offsets = TRUE;
+ mb->match_frames = mb->stack_frames; /* Initial frame vector on the stack */
+ mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
}
else
{
- ocount = 3 * match_data->oveccount;
- mb->ovector = match_data->ovector;
+ mb->frame_vector_size = frame_size * 10;
+ if ((mb->frame_vector_size / 1024) > mb->heap_limit)
+ {
+ if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT;
+ mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size;
+ }
+ mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
+ mb->memctl.memory_data);
+ if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
}
-mb->offset_end = ocount;
-mb->offset_max = (2*ocount)/3;
+mb->match_frames_top =
+ (heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
-/* Reset the working variable associated with each extraction. These should
-never be used unless previously set, but they get saved and restored, and so we
-initialize them to avoid reading uninitialized locations. Also, unset the
-offsets for the matched string. This is really just for tidiness with callouts,
-in case they inspect these fields. */
+/* Write to the ovector within the first frame to mark every capture unset and
+to avoid uninitialized memory read errors when it is copied to a new frame. */
-if (ocount > 0)
- {
- register PCRE2_SIZE *iptr = mb->ovector + ocount;
- register PCRE2_SIZE *iend = iptr - re->top_bracket;
- if (iend < mb->ovector + 2) iend = mb->ovector + 2;
- while (--iptr >= iend) *iptr = PCRE2_UNSET;
- mb->ovector[0] = mb->ovector[1] = PCRE2_UNSET;
- }
+memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
+ re->top_bracket * 2 * sizeof(PCRE2_SIZE));
+
+/* Pointers to the individual character tables */
+
+mb->lcc = re->tables + lcc_offset;
+mb->fcc = re->tables + fcc_offset;
+mb->ctypes = re->tables + ctypes_offset;
-/* Set up the first code unit to match, if available. The first_codeunit value
-is never set for an anchored regular expression, but the anchoring may be
-forced at run time, so we have to test for anchoring. The first code unit may
-be unset for an unanchored pattern, of course. If there's no first code unit
-there may be a bitmap of possible first characters. */
+/* Set up the first code unit to match, if available. If there's no first code
+unit there may be a bitmap of possible first characters. */
-if (!anchored)
+if ((re->flags & PCRE2_FIRSTSET) != 0)
{
- if ((re->flags & PCRE2_FIRSTSET) != 0)
+ has_first_cu = TRUE;
+ first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
+ if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
{
- has_first_cu = TRUE;
- first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
- if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
- {
- first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
+ first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
- if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
+ if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
#endif
- }
}
- else
- if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
- start_bits = re->start_bitmap;
}
+else
+ if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
+ start_bits = re->start_bitmap;
-/* For anchored or unanchored matches, there may be a "last known required
-character" set. */
+/* There may also be a "last known required character" set. */
if ((re->flags & PCRE2_LASTSET) != 0)
{
@@ -6788,7 +6527,6 @@ the loop runs just once. */
for(;;)
{
PCRE2_SPTR new_start_match;
- mb->capture_last = 0;
/* ----------------- Start of match optimizations ---------------- */
@@ -6804,8 +6542,8 @@ for(;;)
/* If firstline is TRUE, the start of the match is constrained to the first
line of a multiline string. That is, the match must be before or at the
first newline. Implement this by temporarily adjusting end_subject so that
- we stop the optimization scans at a newline. If the match fails at the
- newline, later code breaks this loop. */
+ we stop the optimization scans for a first code unit at a newline. If the
+ match fails at the newline, later code breaks this loop. */
if (firstline)
{
@@ -6825,90 +6563,156 @@ for(;;)
end_subject = t;
}
- /* Advance to a unique first code unit if there is one. In 8-bit mode, the
- use of memchr() gives a big speed up. */
+ /* Anchored: check the first code unit if one is recorded. This may seem
+ pointless but it can help in detecting a no match case without scanning for
+ the required code unit. */
- if (has_first_cu)
+ if (anchored)
{
- PCRE2_UCHAR smc;
- if (first_cu != first_cu2)
- while (start_match < end_subject &&
- (smc = UCHAR21TEST(start_match)) != first_cu && smc != first_cu2)
- start_match++;
- else
+ if (has_first_cu || start_bits != NULL)
{
+ BOOL ok = start_match < end_subject;
+ if (ok)
+ {
+ PCRE2_UCHAR c = UCHAR21TEST(start_match);
+ ok = has_first_cu && (c == first_cu || c == first_cu2);
+ if (!ok && start_bits != NULL)
+ {
#if PCRE2_CODE_UNIT_WIDTH != 8
- while (start_match < end_subject && UCHAR21TEST(start_match) != first_cu)
- start_match++;
-#else
- start_match = memchr(start_match, first_cu, end_subject - start_match);
- if (start_match == NULL) start_match = end_subject;
+ if (c > 255) c = 255;
#endif
+ ok = (start_bits[c/8] & (1 << (c&7))) != 0;
+ }
+ }
+ if (!ok)
+ {
+ rc = MATCH_NOMATCH;
+ break;
+ }
}
}
- /* Or to just after a linebreak for a multiline match */
+ /* Not anchored. Advance to a unique first code unit if there is one. In
+ 8-bit mode, the use of memchr() gives a big speed up, even though we have
+ to call it twice in caseless mode, in order to find the earliest occurrence
+ of the character in either of its cases. */
- else if (startline)
+ else
{
- if (start_match > mb->start_subject + start_offset)
+ if (has_first_cu)
{
-#ifdef SUPPORT_UNICODE
- if (utf)
+ if (first_cu != first_cu2) /* Caseless */
{
- while (start_match < end_subject && !WAS_NEWLINE(start_match))
- {
+#if PCRE2_CODE_UNIT_WIDTH != 8
+ PCRE2_UCHAR smc;
+ while (start_match < end_subject &&
+ (smc = UCHAR21TEST(start_match)) != first_cu &&
+ smc != first_cu2)
start_match++;
- ACROSSCHAR(start_match < end_subject, *start_match,
- start_match++);
- }
+#else /* 8-bit code units */
+ PCRE2_SPTR pp1 =
+ memchr(start_match, first_cu, end_subject-start_match);
+ PCRE2_SPTR pp2 =
+ memchr(start_match, first_cu2, end_subject-start_match);
+ if (pp1 == NULL)
+ start_match = (pp2 == NULL)? end_subject : pp2;
+ else
+ start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
+#endif
}
+
+ /* The caseful case */
+
else
+ {
+#if PCRE2_CODE_UNIT_WIDTH != 8
+ while (start_match < end_subject && UCHAR21TEST(start_match) !=
+ first_cu)
+ start_match++;
+#else
+ start_match = memchr(start_match, first_cu, end_subject - start_match);
+ if (start_match == NULL) start_match = end_subject;
#endif
- while (start_match < end_subject && !WAS_NEWLINE(start_match))
- start_match++;
+ }
- /* If we have just passed a CR and the newline option is ANY or
- ANYCRLF, and we are now at a LF, advance the match position by one more
- code unit. */
+ /* If we can't find the required code unit, break the bumpalong loop,
+ to force a match failure, except when doing partial matching, when we
+ let the next cycle run at the end of the subject. To see why, consider
+ the pattern /(?<=abc)def/, which partially matches "abc", even though
+ the string does not contain the starting character "d". */
- if (start_match[-1] == CHAR_CR &&
- (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
- start_match < end_subject &&
- UCHAR21TEST(start_match) == CHAR_NL)
- start_match++;
+ if (!mb->partial && start_match >= end_subject)
+ {
+ rc = MATCH_NOMATCH;
+ break;
+ }
}
- }
- /* Or to a non-unique first code unit if any have been identified. The
- bitmap contains only 256 bits. When code units are 16 or 32 bits wide, all
- code units greater than 254 set the 255 bit. */
+ /* If there's no first code unit, advance to just after a linebreak for a
+ multiline match if required. */
- else if (start_bits != NULL)
- {
- while (start_match < end_subject)
+ else if (startline)
{
- register uint32_t c = UCHAR21TEST(start_match);
+ if (start_match > mb->start_subject + start_offset)
+ {
+#ifdef SUPPORT_UNICODE
+ if (utf)
+ {
+ while (start_match < end_subject && !WAS_NEWLINE(start_match))
+ {
+ start_match++;
+ ACROSSCHAR(start_match < end_subject, *start_match,
+ start_match++);
+ }
+ }
+ else
+#endif
+ while (start_match < end_subject && !WAS_NEWLINE(start_match))
+ start_match++;
+
+ /* If we have just passed a CR and the newline option is ANY or
+ ANYCRLF, and we are now at a LF, advance the match position by one
+ more code unit. */
+
+ if (start_match[-1] == CHAR_CR &&
+ (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
+ start_match < end_subject &&
+ UCHAR21TEST(start_match) == CHAR_NL)
+ start_match++;
+ }
+ }
+
+ /* If there's no first code unit or a requirement for a multiline line
+ start, advance to a non-unique first code unit if any have been
+ identified. The bitmap contains only 256 bits. When code units are 16 or
+ 32 bits wide, all code units greater than 254 set the 255 bit. */
+
+ else if (start_bits != NULL)
+ {
+ while (start_match < end_subject)
+ {
+ uint32_t c = UCHAR21TEST(start_match);
#if PCRE2_CODE_UNIT_WIDTH != 8
- if (c > 255) c = 255;
+ if (c > 255) c = 255;
#endif
- if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
- start_match++;
+ if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
+ start_match++;
+ }
}
- }
+ } /* End first code unit handling */
/* Restore fudged end_subject */
end_subject = save_end_subject;
- /* The following two optimizations are disabled for partial matching. */
+ /* The following two optimizations must be disabled for partial matching. */
if (!mb->partial)
{
- /* The minimum matching length is a lower bound; no actual string of that
- length may actually match the pattern. Although the value is, strictly,
- in characters, we treat it as code units to avoid spending too much time
- in this optimization. */
+ /* The minimum matching length is a lower bound; no string of that length
+ may actually match the pattern. Although the value is, strictly, in
+ characters, we treat it as code units to avoid spending too much time in
+ this optimization. */
if (end_subject - start_match < re->minlength)
{
@@ -6917,12 +6721,16 @@ for(;;)
}
/* If req_cu is set, we know that that code unit must appear in the
- subject for the match to succeed. If the first code unit is set, req_cu
- must be later in the subject; otherwise the test starts at the match
- point. This optimization can save a huge amount of backtracking in
- patterns with nested unlimited repeats that aren't going to match.
- Writing separate code for cased/caseless versions makes it go faster, as
- does using an autoincrement and backing off on a match.
+ subject for the (non-partial) match to succeed. If the first code unit is
+ set, req_cu must be later in the subject; otherwise the test starts at
+ the match point. This optimization can save a huge amount of backtracking
+ in patterns with nested unlimited repeats that aren't going to match.
+ Writing separate code for caseful/caseless versions makes it go faster,
+ as does using an autoincrement and backing off on a match. As in the case
+ of the first code unit, using memchr() in the 8-bit library gives a big
+ speed up. Unlike the first_cu check above, we do not need to call
+ memchr() twice in the caseless case because we only need to check for the
+ presence of the character in either case, not find the first occurrence.
HOWEVER: when the subject string is very, very long, searching to its end
can take a long time, and give bad performance on quite ordinary
@@ -6932,30 +6740,55 @@ for(;;)
if (has_req_cu && end_subject - start_match < REQ_CU_MAX)
{
- register PCRE2_SPTR p = start_match + (has_first_cu? 1:0);
+ PCRE2_SPTR p = start_match + (has_first_cu? 1:0);
/* We don't need to repeat the search if we haven't yet reached the
- place we found it at last time. */
+ place we found it last time round the bumpalong loop. */
if (p > req_cu_ptr)
{
- if (req_cu != req_cu2)
+ if (p < end_subject)
{
- while (p < end_subject)
+ if (req_cu != req_cu2) /* Caseless */
{
- register uint32_t pp = UCHAR21INCTEST(p);
- if (pp == req_cu || pp == req_cu2) { p--; break; }
+#if PCRE2_CODE_UNIT_WIDTH != 8
+ do
+ {
+ uint32_t pp = UCHAR21INCTEST(p);
+ if (pp == req_cu || pp == req_cu2) { p--; break; }
+ }
+ while (p < end_subject);
+
+#else /* 8-bit code units */
+ PCRE2_SPTR pp = p;
+ p = memchr(pp, req_cu, end_subject - pp);
+ if (p == NULL)
+ {
+ p = memchr(pp, req_cu2, end_subject - pp);
+ if (p == NULL) p = end_subject;
+ }
+#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
}
- }
- else
- {
- while (p < end_subject)
+
+ /* The caseful case */
+
+ else
{
- if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
+#if PCRE2_CODE_UNIT_WIDTH != 8
+ do
+ {
+ if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
+ }
+ while (p < end_subject);
+
+#else /* 8-bit code units */
+ p = memchr(p, req_cu, end_subject - p);
+ if (p == NULL) p = end_subject;
+#endif
}
}
- /* If we can't find the required code unit, break the matching loop,
+ /* If we can't find the required code unit, break the bumpalong loop,
forcing a match failure. */
if (p >= end_subject)
@@ -6965,8 +6798,8 @@ for(;;)
}
/* If we have found the required code unit, save the point where we
- found it, so that we don't search again next time round the loop if
- the start hasn't passed this code unit yet. */
+ found it, so that we don't search again next time round the bumpalong
+ loop if the start hasn't yet passed this code unit. */
req_cu_ptr = p;
}
@@ -6987,14 +6820,14 @@ for(;;)
/* OK, we can now run the match. If "hitend" is set afterwards, remember the
first starting point for which a partial match was found. */
- mb->start_match_ptr = start_match;
mb->start_used_ptr = start_match;
mb->last_used_ptr = start_match;
mb->match_call_count = 0;
- mb->match_function_type = 0;
mb->end_offset_top = 0;
mb->skip_arg_count = 0;
- rc = match(start_match, mb->start_code, start_match, 2, mb, NULL, 0);
+
+ rc = match(start_match, mb->start_code, match_data->ovector,
+ match_data->oveccount, re->top_bracket, frame_size, mb);
if (mb->hitend && start_partial == NULL)
{
@@ -7020,9 +6853,9 @@ for(;;)
greater than the match we have just done, treat it as NOMATCH. */
case MATCH_SKIP:
- if (mb->start_match_ptr > start_match)
+ if (mb->verb_skip_ptr > start_match)
{
- new_start_match = mb->start_match_ptr;
+ new_start_match = mb->verb_skip_ptr;
break;
}
/* Fall through */
@@ -7096,11 +6929,11 @@ for(;;)
/* ==========================================================================*/
-/* When we reach here, one of the stopping conditions is true:
+/* When we reach here, one of the following stopping conditions is true:
(1) The match succeeded, either completely, or partially;
-(2) The pattern is anchored or the match was failed by (*COMMIT);
+(2) The pattern is anchored or the match was failed after (*COMMIT);
(3) We are past the end of the subject or the bumpalong limit;
@@ -7114,18 +6947,10 @@ for(;;)
ENDLOOP:
-#ifdef HEAP_MATCH_RECURSE
-release_match_heapframes(&frame_zero, mb);
-#endif
-
-/* Release any frames that were saved from recursions. */
+/* Release an enlarged frame vector that is on the heap. */
-while (mb->ovecsave_chain != NULL)
- {
- ovecsave_frame *this = mb->ovecsave_chain;
- mb->ovecsave_chain = this->next;
- mb->memctl.free(this, mb->memctl.memory_data);
- }
+if (mb->match_frames != mb->stack_frames)
+ mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
/* Fill in fields that are always returned in the match data. */
@@ -7134,68 +6959,14 @@ match_data->subject = subject;
match_data->mark = mb->mark;
match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
-/* Handle a fully successful match. */
+/* Handle a fully successful match. Set the return code to the number of
+captured strings, or 0 if there were too many to fit into the ovector, and then
+set the remaining returned values before returning. */
-if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
+if (rc == MATCH_MATCH)
{
- uint32_t arg_offset_max = 2 * match_data->oveccount;
-
- /* When the offset vector is big enough to deal with any backreferences,
- captured substring offsets will already be set up. In the case where we had
- to get some local memory to hold offsets for backreference processing, copy
- those that we can. In this case there need not be overflow if certain parts
- of the pattern were not used, even though there are more capturing
- parentheses than vector slots. */
-
- if (using_temporary_offsets)
- {
- if (arg_offset_max >= 4)
- {
- memcpy(match_data->ovector + 2, mb->ovector + 2,
- (arg_offset_max - 2) * sizeof(PCRE2_SIZE));
- }
- if (mb->end_offset_top > arg_offset_max) mb->capture_last |= OVFLBIT;
- mb->memctl.free(mb->ovector, mb->memctl.memory_data);
- }
-
- /* Set the return code to the number of captured strings, or 0 if there were
- too many to fit into the ovector. */
-
- match_data->rc = ((mb->capture_last & OVFLBIT) != 0)?
- 0 : mb->end_offset_top/2;
-
- /* If there is space in the offset vector, set any pairs that follow the
- highest-numbered captured string but are less than the number of capturing
- groups in the pattern (and are within the ovector) to PCRE2_UNSET. It is
- documented that this happens. In earlier versions, the whole set of potential
- capturing offsets was initialized each time round the loop, but this is
- handled differently now. "Gaps" are set to PCRE2_UNSET dynamically instead
- (this fixed a bug). Thus, it is only those at the end that need setting here.
- We can't just mark them all unset at the start of the whole thing because
- they may get set in one branch that is not the final matching branch. */
-
- if (mb->end_offset_top/2 <= re->top_bracket)
- {
- register PCRE2_SIZE *iptr, *iend;
- int resetcount = re->top_bracket + 1;
- if (resetcount > match_data->oveccount) resetcount = match_data->oveccount;
- iptr = match_data->ovector + mb->end_offset_top;
- iend = match_data->ovector + 2 * resetcount;
- while (iptr < iend) *iptr++ = PCRE2_UNSET;
- }
-
- /* If there is space, set up the whole thing as substring 0. The value of
- mb->start_match_ptr might be modified if \K was encountered on the success
- matching path. */
-
- if (match_data->oveccount < 1) rc = 0; else
- {
- match_data->ovector[0] = mb->start_match_ptr - mb->start_subject;
- match_data->ovector[1] = mb->end_match_ptr - mb->start_subject;
- }
-
- /* Set the remaining returned values */
-
+ match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
+ 0 : (int)mb->end_offset_top/2 + 1;
match_data->startchar = start_match - subject;
match_data->leftchar = mb->start_used_ptr - subject;
match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
@@ -7211,18 +6982,14 @@ match_data->mark = mb->nomatch_mark;
/* For anything other than nomatch or partial match, just return the code. */
-if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL)
- match_data->rc = rc;
+if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
-/* Else handle a partial match. */
+/* Handle a partial match. */
else if (match_partial != NULL)
{
- if (match_data->oveccount > 0)
- {
- match_data->ovector[0] = match_partial - subject;
- match_data->ovector[1] = end_subject - subject;
- }
+ match_data->ovector[0] = match_partial - subject;
+ match_data->ovector[1] = end_subject - subject;
match_data->startchar = match_partial - subject;
match_data->leftchar = start_partial - subject;
match_data->rightchar = end_subject - subject;
@@ -7233,10 +7000,6 @@ else if (match_partial != NULL)
else match_data->rc = PCRE2_ERROR_NOMATCH;
-/* Free any temporary offsets. */
-
-if (using_temporary_offsets)
- mb->memctl.free(mb->ovector, mb->memctl.memory_data);
return match_data->rc;
}
diff --git a/src/3rdparty/pcre2/src/pcre2_match_data.c b/src/3rdparty/pcre2/src/pcre2_match_data.c
index 85ac998348..b297f326b5 100644
--- a/src/3rdparty/pcre2/src/pcre2_match_data.c
+++ b/src/3rdparty/pcre2/src/pcre2_match_data.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -51,7 +51,7 @@ POSSIBILITY OF SUCH DAMAGE.
* Create a match data block given ovector size *
*************************************************/
-/* A minimum of 1 is imposed on the number of ovector triplets. */
+/* A minimum of 1 is imposed on the number of ovector pairs. */
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
@@ -59,7 +59,7 @@ pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
pcre2_match_data *yield;
if (oveccount < 1) oveccount = 1;
yield = PRIV(memctl_malloc)(
- sizeof(pcre2_match_data) + 3*oveccount*sizeof(PCRE2_SIZE),
+ offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE),
(pcre2_memctl *)gcontext);
if (yield == NULL) return NULL;
yield->oveccount = oveccount;
diff --git a/src/3rdparty/pcre2/src/pcre2_ord2utf.c b/src/3rdparty/pcre2/src/pcre2_ord2utf.c
index 75252b763a..1403730996 100644
--- a/src/3rdparty/pcre2/src/pcre2_ord2utf.c
+++ b/src/3rdparty/pcre2/src/pcre2_ord2utf.c
@@ -83,7 +83,7 @@ PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
/* Convert to UTF-8 */
#if PCRE2_CODE_UNIT_WIDTH == 8
-register int i, j;
+int i, j;
for (i = 0; i < PRIV(utf8_table1_size); i++)
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
buffer += i;
diff --git a/src/3rdparty/pcre2/src/pcre2_pattern_info.c b/src/3rdparty/pcre2/src/pcre2_pattern_info.c
index 5b32a905b0..540707b225 100644
--- a/src/3rdparty/pcre2/src/pcre2_pattern_info.c
+++ b/src/3rdparty/pcre2/src/pcre2_pattern_info.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -75,10 +75,12 @@ if (where == NULL) /* Requests field length */
case PCRE2_INFO_BACKREFMAX:
case PCRE2_INFO_BSR:
case PCRE2_INFO_CAPTURECOUNT:
+ case PCRE2_INFO_DEPTHLIMIT:
case PCRE2_INFO_FIRSTCODETYPE:
case PCRE2_INFO_FIRSTCODEUNIT:
case PCRE2_INFO_HASBACKSLASHC:
case PCRE2_INFO_HASCRORLF:
+ case PCRE2_INFO_HEAPLIMIT:
case PCRE2_INFO_JCHANGED:
case PCRE2_INFO_LASTCODETYPE:
case PCRE2_INFO_LASTCODEUNIT:
@@ -89,7 +91,6 @@ if (where == NULL) /* Requests field length */
case PCRE2_INFO_NAMEENTRYSIZE:
case PCRE2_INFO_NAMECOUNT:
case PCRE2_INFO_NEWLINE:
- case PCRE2_INFO_RECURSIONLIMIT:
return sizeof(uint32_t);
case PCRE2_INFO_FIRSTBITMAP:
@@ -97,6 +98,7 @@ if (where == NULL) /* Requests field length */
case PCRE2_INFO_JITSIZE:
case PCRE2_INFO_SIZE:
+ case PCRE2_INFO_FRAMESIZE:
return sizeof(size_t);
case PCRE2_INFO_NAMETABLE:
@@ -137,6 +139,11 @@ switch(what)
*((uint32_t *)where) = re->top_bracket;
break;
+ case PCRE2_INFO_DEPTHLIMIT:
+ *((uint32_t *)where) = re->limit_depth;
+ if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
+ break;
+
case PCRE2_INFO_FIRSTCODETYPE:
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
@@ -152,6 +159,11 @@ switch(what)
&(re->start_bitmap[0]) : NULL;
break;
+ case PCRE2_INFO_FRAMESIZE:
+ *((size_t *)where) = offsetof(heapframe, ovector) +
+ re->top_bracket * 2 * sizeof(PCRE2_SIZE);
+ break;
+
case PCRE2_INFO_HASBACKSLASHC:
*((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
break;
@@ -160,6 +172,11 @@ switch(what)
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
break;
+ case PCRE2_INFO_HEAPLIMIT:
+ *((uint32_t *)where) = re->limit_heap;
+ if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
+ break;
+
case PCRE2_INFO_JCHANGED:
*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
break;
@@ -215,11 +232,6 @@ switch(what)
*((uint32_t *)where) = re->newline_convention;
break;
- case PCRE2_INFO_RECURSIONLIMIT:
- *((uint32_t *)where) = re->limit_recursion;
- if (re->limit_recursion == UINT32_MAX) return PCRE2_ERROR_UNSET;
- break;
-
case PCRE2_INFO_SIZE:
*((size_t *)where) = re->blocksize;
break;
@@ -255,11 +267,15 @@ pcre2_real_code *re = (pcre2_real_code *)code;
pcre2_callout_enumerate_block cb;
PCRE2_SPTR cc;
#ifdef SUPPORT_UNICODE
-BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
+BOOL utf;
#endif
if (re == NULL) return PCRE2_ERROR_NULL;
+#ifdef SUPPORT_UNICODE
+utf = (re->overall_options & PCRE2_UTF) != 0;
+#endif
+
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. */
diff --git a/src/3rdparty/pcre2/src/pcre2_serialize.c b/src/3rdparty/pcre2/src/pcre2_serialize.c
index 0af26d8fc3..d2cc603cbb 100644
--- a/src/3rdparty/pcre2/src/pcre2_serialize.c
+++ b/src/3rdparty/pcre2/src/pcre2_serialize.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -214,7 +214,10 @@ for (i = 0; i < number_of_codes; i++)
if (dst_re->magic_number != MAGIC_NUMBER ||
dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 ||
dst_re->name_count > MAX_NAME_COUNT)
+ {
+ memctl->free(dst_re, memctl->memory_data);
return PCRE2_ERROR_BADSERIALIZEDDATA;
+ }
/* At the moment only one table is supported. */
diff --git a/src/3rdparty/pcre2/src/pcre2_study.c b/src/3rdparty/pcre2/src/pcre2_study.c
index db08266745..b92686759d 100644
--- a/src/3rdparty/pcre2/src/pcre2_study.c
+++ b/src/3rdparty/pcre2/src/pcre2_study.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -46,9 +46,11 @@ collecting data (e.g. minimum matching length). */
#include "config.h"
#endif
-
#include "pcre2_internal.h"
+/* The maximum remembered capturing brackets minimum. */
+
+#define MAX_CACHE_BACKREF 128
/* Set a bit in the starting code unit bit map. */
@@ -71,6 +73,12 @@ length is 16-bits long (on the grounds that anything longer than that is
pathological), so we give up when we reach that amount. This also means that
integer overflow for really crazy patterns cannot happen.
+Backreference minimum lengths are cached to speed up multiple references. This
+function is called only when the highest back reference in the pattern is less
+than or equal to MAX_CACHE_BACKREF, which is one less than the size of the
+caching vector. The zeroth element contains the number of the highest set
+value.
+
Arguments:
re compiled pattern block
code pointer to start of group (the bracket)
@@ -78,6 +86,7 @@ Arguments:
utf UTF flag
recurses chain of recurse_check to catch mutual recursion
countptr pointer to call count (to catch over complexity)
+ backref_cache vector for caching back references.
Returns: the minimum length
-1 \C in UTF-8 mode
@@ -90,7 +99,8 @@ Returns: the minimum length
static int
find_minlength(const pcre2_real_code *re, PCRE2_SPTR code,
- PCRE2_SPTR startcode, BOOL utf, recurse_check *recurses, int *countptr)
+ PCRE2_SPTR startcode, BOOL utf, recurse_check *recurses, int *countptr,
+ int *backref_cache)
{
int length = -1;
int prev_cap_recno = -1;
@@ -101,8 +111,8 @@ uint32_t once_fudge = 0;
BOOL had_recurse = FALSE;
BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0;
recurse_check this_recurse;
-register int branchlength = 0;
-register PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
+int branchlength = 0;
+PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
/* If this is a "could be empty" group, its minimum length is 0. */
@@ -124,7 +134,7 @@ for (;;)
{
int d, min, recno;
PCRE2_UCHAR *cs, *ce;
- register PCRE2_UCHAR op = *cc;
+ PCRE2_UCHAR op = *cc;
if (branchlength >= UINT16_MAX) return UINT16_MAX;
@@ -146,12 +156,12 @@ for (;;)
}
goto PROCESS_NON_CAPTURE;
- /* There's a special case of OP_ONCE, when it is wrapped round an
+ case OP_BRA:
+ /* There's a special case of OP_BRA, when it is wrapped round a repeated
OP_RECURSE. We'd like to process the latter at this level so that
remembering the value works for repeated cases. So we do nothing, but
set a fudge value to skip over the OP_KET after the recurse. */
- case OP_ONCE:
if (cc[1+LINK_SIZE] == OP_RECURSE && cc[2*(1+LINK_SIZE)] == OP_KET)
{
once_fudge = 1 + LINK_SIZE;
@@ -160,13 +170,13 @@ for (;;)
}
/* Fall through */
- case OP_ONCE_NC:
- case OP_BRA:
+ case OP_ONCE:
case OP_SBRA:
case OP_BRAPOS:
case OP_SBRAPOS:
PROCESS_NON_CAPTURE:
- d = find_minlength(re, cc, startcode, utf, recurses, countptr);
+ d = find_minlength(re, cc, startcode, utf, recurses, countptr,
+ backref_cache);
if (d < 0) return d;
branchlength += d;
do cc += GET(cc, 1); while (*cc == OP_ALT);
@@ -182,11 +192,12 @@ for (;;)
case OP_SCBRA:
case OP_CBRAPOS:
case OP_SCBRAPOS:
- recno = dupcapused? prev_cap_recno - 1 : (int)GET2(cc, 1+LINK_SIZE);
- if (recno != prev_cap_recno)
+ recno = (int)GET2(cc, 1+LINK_SIZE);
+ if (dupcapused || recno != prev_cap_recno)
{
prev_cap_recno = recno;
- prev_cap_d = find_minlength(re, cc, startcode, utf, recurses, countptr);
+ prev_cap_d = find_minlength(re, cc, startcode, utf, recurses, countptr,
+ backref_cache);
if (prev_cap_d < 0) return prev_cap_d;
}
branchlength += prev_cap_d;
@@ -456,38 +467,52 @@ for (;;)
d = INT_MAX;
- /* Scan all groups with the same name */
+ /* Scan all groups with the same name; find the shortest. */
while (count-- > 0)
{
- ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
- if (cs == NULL) return -2;
- do ce += GET(ce, 1); while (*ce == OP_ALT);
- if (cc > cs && cc < ce) /* Simple recursion */
- {
- d = 0;
- had_recurse = TRUE;
- break;
- }
+ int dd, i;
+ recno = GET2(slot, 0);
+
+ if (recno <= backref_cache[0] && backref_cache[recno] >= 0)
+ dd = backref_cache[recno];
else
{
- recurse_check *r = recurses;
- for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
- if (r != NULL) /* Mutual recursion */
+ ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
+ if (cs == NULL) return -2;
+ do ce += GET(ce, 1); while (*ce == OP_ALT);
+ if (cc > cs && cc < ce) /* Simple recursion */
{
- d = 0;
+ dd = 0;
had_recurse = TRUE;
- break;
}
else
{
- int dd;
- this_recurse.prev = recurses;
- this_recurse.group = cs;
- dd = find_minlength(re, cs, startcode, utf, &this_recurse, countptr);
- if (dd < d) d = dd;
+ recurse_check *r = recurses;
+ for (r = recurses; r != NULL; r = r->prev)
+ if (r->group == cs) break;
+ if (r != NULL) /* Mutual recursion */
+ {
+ dd = 0;
+ had_recurse = TRUE;
+ }
+ else
+ {
+ this_recurse.prev = recurses;
+ this_recurse.group = cs;
+ dd = find_minlength(re, cs, startcode, utf, &this_recurse,
+ countptr, backref_cache);
+ if (dd < 0) return dd;
+ }
}
+
+ backref_cache[recno] = dd;
+ for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1;
+ backref_cache[0] = recno;
}
+
+ if (dd < d) d = dd;
+ if (d <= 0) break; /* No point looking at any more */
slot += re->name_entry_size;
}
}
@@ -501,34 +526,48 @@ for (;;)
case OP_REF:
case OP_REFI:
if (dupcapused) return -1;
- if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
+ recno = GET2(cc, 1);
+ if (recno <= backref_cache[0] && backref_cache[recno] >= 0)
+ d = backref_cache[recno];
+ else
{
- ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
- if (cs == NULL) return -2;
- do ce += GET(ce, 1); while (*ce == OP_ALT);
- if (cc > cs && cc < ce) /* Simple recursion */
- {
- d = 0;
- had_recurse = TRUE;
- }
- else
+ int i;
+ if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
{
- recurse_check *r = recurses;
- for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
- if (r != NULL) /* Mutual recursion */
+ ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
+ if (cs == NULL) return -2;
+ do ce += GET(ce, 1); while (*ce == OP_ALT);
+ if (cc > cs && cc < ce) /* Simple recursion */
{
d = 0;
had_recurse = TRUE;
}
else
{
- this_recurse.prev = recurses;
- this_recurse.group = cs;
- d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr);
+ recurse_check *r = recurses;
+ for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+ if (r != NULL) /* Mutual recursion */
+ {
+ d = 0;
+ had_recurse = TRUE;
+ }
+ else
+ {
+ this_recurse.prev = recurses;
+ this_recurse.group = cs;
+ d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr,
+ backref_cache);
+ if (d < 0) return d;
+ }
}
}
+ else d = 0;
+
+ backref_cache[recno] = d;
+ for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1;
+ backref_cache[0] = recno;
}
- else d = 0;
+
cc += 1 + IMM2_SIZE;
/* Handle repeated back references */
@@ -601,7 +640,7 @@ for (;;)
this_recurse.prev = recurses;
this_recurse.group = cs;
prev_recurse_d = find_minlength(re, cs, startcode, utf, &this_recurse,
- countptr);
+ countptr, backref_cache);
if (prev_recurse_d < 0) return prev_recurse_d;
prev_recurse_recno = recno;
branchlength += prev_recurse_d;
@@ -747,6 +786,7 @@ if (utf)
if (caseless)
{
+#ifdef SUPPORT_UNICODE
if (utf)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
@@ -759,10 +799,12 @@ if (caseless)
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
#endif
}
+ else
+#endif /* SUPPORT_UNICODE */
/* Not UTF */
- else if (MAX_255(c)) SET_BIT(re->tables[fcc_offset + c]);
+ if (MAX_255(c)) SET_BIT(re->tables[fcc_offset + c]);
}
return p;
@@ -792,7 +834,7 @@ Returns: nothing
static void
set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
{
-register uint32_t c;
+uint32_t c;
for (c = 0; c < table_limit; c++)
re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type];
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
@@ -833,7 +875,7 @@ Returns: nothing
static void
set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
{
-register uint32_t c;
+uint32_t c;
for (c = 0; c < table_limit; c++)
re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
@@ -873,7 +915,7 @@ Returns: SSB_FAIL => Failed to find any starting code units
static int
set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf)
{
-register uint32_t c;
+uint32_t c;
int yield = SSB_DONE;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
@@ -911,7 +953,6 @@ do
case OP_ALLANY:
case OP_ANY:
case OP_ANYBYTE:
- case OP_CIRC:
case OP_CIRCM:
case OP_CLOSE:
case OP_COMMIT:
@@ -979,6 +1020,13 @@ do
case OP_THEN_ARG:
return SSB_FAIL;
+ /* OP_CIRC happens only at the start of an anchored branch (multiline ^
+ uses OP_CIRCM). Skip over it. */
+
+ case OP_CIRC:
+ tcode += PRIV(OP_lengths)[OP_CIRC];
+ break;
+
/* A "real" property test implies no starting bits, but the fake property
PT_CLIST identifies a list of characters. These lists are short, as they
are used for characters with more than one "other case", so there is no
@@ -1025,7 +1073,6 @@ do
case OP_CBRAPOS:
case OP_SCBRAPOS:
case OP_ONCE:
- case OP_ONCE_NC:
case OP_ASSERT:
rc = set_start_bits(re, tcode, utf);
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
@@ -1407,6 +1454,10 @@ do
classmap = ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0)? NULL :
(uint8_t *)(tcode + 1 + LINK_SIZE + 1);
#endif
+ /* It seems that the fall through comment must be outside the #ifdef if
+ it is to avoid the gcc compiler warning. */
+
+ /* Fall through */
/* Enter here for a negative non-XCLASS. In the 8-bit library, if we are
in UTF mode, any byte with a value >= 0xc4 is a potentially valid starter
@@ -1534,24 +1585,31 @@ BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
re->name_entry_size * re->name_count;
-/* For an anchored pattern, or an unanchored pattern that has a first code
-unit, or a multiline pattern that matches only at "line start", there is no
-point in seeking a list of starting code units. */
+/* For a pattern that has a first code unit, or a multiline pattern that
+matches only at "line start", there is no point in seeking a list of starting
+code units. */
-if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
- (re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
+if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
{
int rc = set_start_bits(re, code, utf);
if (rc == SSB_UNKNOWN) return 1;
if (rc == SSB_DONE) re->flags |= PCRE2_FIRSTMAPSET;
}
-/* Find the minimum length of subject string. If it can match an empty string,
-the minimum length is already known. */
+/* Find the minimum length of subject string. If the pattern can match an empty
+string, the minimum length is already known. If there are more back references
+than the size of the vector we are going to cache them in, do nothing. A
+pattern that complicated will probably take a long time to analyze and may in
+any case turn out to be too complicated. Note that back reference minima are
+held as 16-bit numbers. */
-if ((re->flags & PCRE2_MATCH_EMPTY) == 0)
+if ((re->flags & PCRE2_MATCH_EMPTY) == 0 &&
+ re->top_backref <= MAX_CACHE_BACKREF)
{
- switch(min = find_minlength(re, code, code, utf, NULL, &count))
+ int backref_cache[MAX_CACHE_BACKREF+1];
+ backref_cache[0] = 0; /* Highest one that is set */
+ min = find_minlength(re, code, code, utf, NULL, &count, backref_cache);
+ switch(min)
{
case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */
break; /* Leave minlength unchanged (will be zero) */
diff --git a/src/3rdparty/pcre2/src/pcre2_substitute.c b/src/3rdparty/pcre2/src/pcre2_substitute.c
index 0bf781efc1..8da951fc6e 100644
--- a/src/3rdparty/pcre2/src/pcre2_substitute.c
+++ b/src/3rdparty/pcre2/src/pcre2_substitute.c
@@ -114,7 +114,7 @@ for (; ptr < ptrend; ptr++)
else if (*ptr == CHAR_BACKSLASH)
{
int erc;
- int errorcode = 0;
+ int errorcode;
uint32_t ch;
if (ptr < ptrend - 1) switch (ptr[1])
@@ -127,8 +127,10 @@ for (; ptr < ptrend; ptr++)
continue;
}
+ ptr += 1; /* Must point after \ */
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
code->overall_options, FALSE, NULL);
+ ptr -= 1; /* Back to last code unit of escape */
if (errorcode != 0)
{
rc = errorcode;
@@ -287,6 +289,12 @@ options &= ~SUBSTITUTE_OPTIONS;
/* Copy up to the start offset */
+if (start_offset > length)
+ {
+ match_data->leftchar = 0;
+ rc = PCRE2_ERROR_BADOFFSET;
+ goto EXIT;
+ }
CHECKMEMCPY(subject, start_offset);
/* Loop for global substituting. */
@@ -698,7 +706,7 @@ do
else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
*ptr == CHAR_BACKSLASH)
{
- int errorcode = 0;
+ int errorcode;
if (ptr < repend - 1) switch (ptr[1])
{
@@ -728,10 +736,10 @@ do
break;
}
+ ptr++; /* Point after \ */
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
code->overall_options, FALSE, NULL);
if (errorcode != 0) goto BADESCAPE;
- ptr++;
switch(rc)
{
diff --git a/src/3rdparty/pcre2/src/pcre2_tables.c b/src/3rdparty/pcre2/src/pcre2_tables.c
index b945ed7a7f..9f8dc293aa 100644
--- a/src/3rdparty/pcre2/src/pcre2_tables.c
+++ b/src/3rdparty/pcre2/src/pcre2_tables.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -39,7 +39,7 @@ POSSIBILITY OF SUCH DAMAGE.
*/
/* This module contains some fixed tables that are used by more than one of the
-PCRE code modules. The tables are also #included by the pcre2test program,
+PCRE2 code modules. The tables are also #included by the pcre2test program,
which uses macros to change their names from _pcre2_xxx to xxxx, thereby
avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is
defined. */
@@ -148,7 +148,7 @@ two code points. The breaking rules are as follows:
1. Break at the start and end of text (pretty obviously).
-2. Do not break between a CR and LF; otherwise, break before and after
+2. Do not break between a CR and LF; otherwise, break before and after
controls.
3. Do not break Hangul syllable sequences, the rules for which are:
@@ -157,44 +157,62 @@ two code points. The breaking rules are as follows:
LV or V may be followed by V or T
LVT or T may be followed by T
-4. Do not break before extending characters.
+4. Do not break before extending characters or zero-width-joiner (ZWJ).
-The next two rules are only for extended grapheme clusters (but that's what we
+The following rules are only for extended grapheme clusters (but that's what we
are implementing).
5. Do not break before SpacingMarks.
6. Do not break after Prepend characters.
-7. Otherwise, break everywhere.
+7. Do not break within emoji modifier sequences (E_Base or E_Base_GAZ followed
+ by E_Modifier). Extend characters are allowed before the modifier; this
+ cannot be represented in this table, the code has to deal with it.
+
+8. Do not break within emoji zwj sequences (ZWJ followed by Glue_After_Zwj or
+ E_Base_GAZ).
+
+9. Do not break within emoji flag sequences. That is, do not break between
+ regional indicator (RI) symbols if there are an odd number of RI characters
+ before the break point. This table encodes "join RI characters"; the code
+ has to deal with checking for previous adjoining RIs.
+
+10. Otherwise, break everywhere.
*/
+#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
+
const uint32_t PRIV(ucp_gbtable)[] = {
(1<<ucp_gbLF), /* 0 CR */
0, /* 1 LF */
0, /* 2 Control */
- (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark), /* 3 Extend */
- (1<<ucp_gbExtend)|(1<<ucp_gbPrepend)| /* 4 Prepend */
- (1<<ucp_gbSpacingMark)|(1<<ucp_gbL)|
- (1<<ucp_gbV)|(1<<ucp_gbT)|(1<<ucp_gbLV)|
- (1<<ucp_gbLVT)|(1<<ucp_gbOther),
-
- (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark), /* 5 SpacingMark */
- (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbL)| /* 6 L */
- (1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)|(1<<ucp_gbLVT),
-
- (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)| /* 7 V */
- (1<<ucp_gbT),
-
- (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbT), /* 8 T */
- (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)| /* 9 LV */
- (1<<ucp_gbT),
-
- (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbT), /* 10 LVT */
+ ESZ, /* 3 Extend */
+ ESZ|(1<<ucp_gbPrepend)| /* 4 Prepend */
+ (1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbT)|
+ (1<<ucp_gbLV)|(1<<ucp_gbLVT)|(1<<ucp_gbOther)|
+ (1<<ucp_gbRegionalIndicator)|
+ (1<<ucp_gbE_Base)|(1<<ucp_gbE_Modifier)|
+ (1<<ucp_gbE_Base_GAZ)|
+ (1<<ucp_gbZWJ)|(1<<ucp_gbGlue_After_Zwj),
+ ESZ, /* 5 SpacingMark */
+ ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)| /* 6 L */
+ (1<<ucp_gbLVT),
+ ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 7 V */
+ ESZ|(1<<ucp_gbT), /* 8 T */
+ ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 9 LV */
+ ESZ|(1<<ucp_gbT), /* 10 LVT */
(1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
- (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark) /* 12 Other */
+ ESZ, /* 12 Other */
+ ESZ|(1<<ucp_gbE_Modifier), /* 13 E_Base */
+ ESZ, /* 14 E_Modifier */
+ ESZ|(1<<ucp_gbE_Modifier), /* 15 E_Base_GAZ */
+ ESZ|(1<<ucp_gbGlue_After_Zwj)|(1<<ucp_gbE_Base_GAZ), /* 16 ZWJ */
+ ESZ /* 12 Glue_After_Zwj */
};
+#undef ESZ
+
#ifdef SUPPORT_JIT
/* This table reverses PRIV(ucp_gentype). We can save the cost
of a memory load. */
@@ -227,6 +245,7 @@ version. Like all other character and string literals that are compared against
the regular expression pattern, we must use STR_ macros instead of literal
strings to make sure that UTF-8 support works on EBCDIC platforms. */
+#define STRING_Adlam0 STR_A STR_d STR_l STR_a STR_m "\0"
#define STRING_Ahom0 STR_A STR_h STR_o STR_m "\0"
#define STRING_Anatolian_Hieroglyphs0 STR_A STR_n STR_a STR_t STR_o STR_l STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
#define STRING_Any0 STR_A STR_n STR_y "\0"
@@ -238,6 +257,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0"
#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
+#define STRING_Bhaiksuki0 STR_B STR_h STR_a STR_i STR_k STR_s STR_u STR_k STR_i "\0"
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
#define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0"
#define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0"
@@ -313,6 +333,8 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
+#define STRING_Marchen0 STR_M STR_a STR_r STR_c STR_h STR_e STR_n "\0"
+#define STRING_Masaram_Gondi0 STR_M STR_a STR_s STR_a STR_r STR_a STR_m STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
#define STRING_Mc0 STR_M STR_c "\0"
#define STRING_Me0 STR_M STR_e "\0"
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
@@ -330,9 +352,11 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
#define STRING_Nd0 STR_N STR_d "\0"
#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
+#define STRING_Newa0 STR_N STR_e STR_w STR_a "\0"
#define STRING_Nko0 STR_N STR_k STR_o "\0"
#define STRING_Nl0 STR_N STR_l "\0"
#define STRING_No0 STR_N STR_o "\0"
+#define STRING_Nushu0 STR_N STR_u STR_s STR_h STR_u "\0"
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
#define STRING_Old_Hungarian0 STR_O STR_l STR_d STR_UNDERSCORE STR_H STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0"
@@ -343,6 +367,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
+#define STRING_Osage0 STR_O STR_s STR_a STR_g STR_e "\0"
#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
#define STRING_P0 STR_P "\0"
#define STRING_Pahawh_Hmong0 STR_P STR_a STR_h STR_a STR_w STR_h STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
@@ -373,6 +398,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Sm0 STR_S STR_m "\0"
#define STRING_So0 STR_S STR_o "\0"
#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
+#define STRING_Soyombo0 STR_S STR_o STR_y STR_o STR_m STR_b STR_o "\0"
#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
#define STRING_Syloti_Nagri0 STR_S STR_y STR_l STR_o STR_t STR_i STR_UNDERSCORE STR_N STR_a STR_g STR_r STR_i "\0"
#define STRING_Syriac0 STR_S STR_y STR_r STR_i STR_a STR_c "\0"
@@ -383,6 +409,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0"
#define STRING_Takri0 STR_T STR_a STR_k STR_r STR_i "\0"
#define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0"
+#define STRING_Tangut0 STR_T STR_a STR_n STR_g STR_u STR_t "\0"
#define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0"
#define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0"
#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
@@ -399,11 +426,13 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
#define STRING_Yi0 STR_Y STR_i "\0"
#define STRING_Z0 STR_Z "\0"
+#define STRING_Zanabazar_Square0 STR_Z STR_a STR_n STR_a STR_b STR_a STR_z STR_a STR_r STR_UNDERSCORE STR_S STR_q STR_u STR_a STR_r STR_e "\0"
#define STRING_Zl0 STR_Z STR_l "\0"
#define STRING_Zp0 STR_Z STR_p "\0"
#define STRING_Zs0 STR_Z STR_s "\0"
const char PRIV(utt_names)[] =
+ STRING_Adlam0
STRING_Ahom0
STRING_Anatolian_Hieroglyphs0
STRING_Any0
@@ -415,6 +444,7 @@ const char PRIV(utt_names)[] =
STRING_Bassa_Vah0
STRING_Batak0
STRING_Bengali0
+ STRING_Bhaiksuki0
STRING_Bopomofo0
STRING_Brahmi0
STRING_Braille0
@@ -490,6 +520,8 @@ const char PRIV(utt_names)[] =
STRING_Malayalam0
STRING_Mandaic0
STRING_Manichaean0
+ STRING_Marchen0
+ STRING_Masaram_Gondi0
STRING_Mc0
STRING_Me0
STRING_Meetei_Mayek0
@@ -507,9 +539,11 @@ const char PRIV(utt_names)[] =
STRING_Nabataean0
STRING_Nd0
STRING_New_Tai_Lue0
+ STRING_Newa0
STRING_Nko0
STRING_Nl0
STRING_No0
+ STRING_Nushu0
STRING_Ogham0
STRING_Ol_Chiki0
STRING_Old_Hungarian0
@@ -520,6 +554,7 @@ const char PRIV(utt_names)[] =
STRING_Old_South_Arabian0
STRING_Old_Turkic0
STRING_Oriya0
+ STRING_Osage0
STRING_Osmanya0
STRING_P0
STRING_Pahawh_Hmong0
@@ -550,6 +585,7 @@ const char PRIV(utt_names)[] =
STRING_Sm0
STRING_So0
STRING_Sora_Sompeng0
+ STRING_Soyombo0
STRING_Sundanese0
STRING_Syloti_Nagri0
STRING_Syriac0
@@ -560,6 +596,7 @@ const char PRIV(utt_names)[] =
STRING_Tai_Viet0
STRING_Takri0
STRING_Tamil0
+ STRING_Tangut0
STRING_Telugu0
STRING_Thaana0
STRING_Thai0
@@ -576,186 +613,197 @@ const char PRIV(utt_names)[] =
STRING_Xwd0
STRING_Yi0
STRING_Z0
+ STRING_Zanabazar_Square0
STRING_Zl0
STRING_Zp0
STRING_Zs0;
const ucp_type_table PRIV(utt)[] = {
- { 0, PT_SC, ucp_Ahom },
- { 5, PT_SC, ucp_Anatolian_Hieroglyphs },
- { 27, PT_ANY, 0 },
- { 31, PT_SC, ucp_Arabic },
- { 38, PT_SC, ucp_Armenian },
- { 47, PT_SC, ucp_Avestan },
- { 55, PT_SC, ucp_Balinese },
- { 64, PT_SC, ucp_Bamum },
- { 70, PT_SC, ucp_Bassa_Vah },
- { 80, PT_SC, ucp_Batak },
- { 86, PT_SC, ucp_Bengali },
- { 94, PT_SC, ucp_Bopomofo },
- { 103, PT_SC, ucp_Brahmi },
- { 110, PT_SC, ucp_Braille },
- { 118, PT_SC, ucp_Buginese },
- { 127, PT_SC, ucp_Buhid },
- { 133, PT_GC, ucp_C },
- { 135, PT_SC, ucp_Canadian_Aboriginal },
- { 155, PT_SC, ucp_Carian },
- { 162, PT_SC, ucp_Caucasian_Albanian },
- { 181, PT_PC, ucp_Cc },
- { 184, PT_PC, ucp_Cf },
- { 187, PT_SC, ucp_Chakma },
- { 194, PT_SC, ucp_Cham },
- { 199, PT_SC, ucp_Cherokee },
- { 208, PT_PC, ucp_Cn },
- { 211, PT_PC, ucp_Co },
- { 214, PT_SC, ucp_Common },
- { 221, PT_SC, ucp_Coptic },
- { 228, PT_PC, ucp_Cs },
- { 231, PT_SC, ucp_Cuneiform },
- { 241, PT_SC, ucp_Cypriot },
- { 249, PT_SC, ucp_Cyrillic },
- { 258, PT_SC, ucp_Deseret },
- { 266, PT_SC, ucp_Devanagari },
- { 277, PT_SC, ucp_Duployan },
- { 286, PT_SC, ucp_Egyptian_Hieroglyphs },
- { 307, PT_SC, ucp_Elbasan },
- { 315, PT_SC, ucp_Ethiopic },
- { 324, PT_SC, ucp_Georgian },
- { 333, PT_SC, ucp_Glagolitic },
- { 344, PT_SC, ucp_Gothic },
- { 351, PT_SC, ucp_Grantha },
- { 359, PT_SC, ucp_Greek },
- { 365, PT_SC, ucp_Gujarati },
- { 374, PT_SC, ucp_Gurmukhi },
- { 383, PT_SC, ucp_Han },
- { 387, PT_SC, ucp_Hangul },
- { 394, PT_SC, ucp_Hanunoo },
- { 402, PT_SC, ucp_Hatran },
- { 409, PT_SC, ucp_Hebrew },
- { 416, PT_SC, ucp_Hiragana },
- { 425, PT_SC, ucp_Imperial_Aramaic },
- { 442, PT_SC, ucp_Inherited },
- { 452, PT_SC, ucp_Inscriptional_Pahlavi },
- { 474, PT_SC, ucp_Inscriptional_Parthian },
- { 497, PT_SC, ucp_Javanese },
- { 506, PT_SC, ucp_Kaithi },
- { 513, PT_SC, ucp_Kannada },
- { 521, PT_SC, ucp_Katakana },
- { 530, PT_SC, ucp_Kayah_Li },
- { 539, PT_SC, ucp_Kharoshthi },
- { 550, PT_SC, ucp_Khmer },
- { 556, PT_SC, ucp_Khojki },
- { 563, PT_SC, ucp_Khudawadi },
- { 573, PT_GC, ucp_L },
- { 575, PT_LAMP, 0 },
- { 578, PT_SC, ucp_Lao },
- { 582, PT_SC, ucp_Latin },
- { 588, PT_SC, ucp_Lepcha },
- { 595, PT_SC, ucp_Limbu },
- { 601, PT_SC, ucp_Linear_A },
- { 610, PT_SC, ucp_Linear_B },
- { 619, PT_SC, ucp_Lisu },
- { 624, PT_PC, ucp_Ll },
- { 627, PT_PC, ucp_Lm },
- { 630, PT_PC, ucp_Lo },
- { 633, PT_PC, ucp_Lt },
- { 636, PT_PC, ucp_Lu },
- { 639, PT_SC, ucp_Lycian },
- { 646, PT_SC, ucp_Lydian },
- { 653, PT_GC, ucp_M },
- { 655, PT_SC, ucp_Mahajani },
- { 664, PT_SC, ucp_Malayalam },
- { 674, PT_SC, ucp_Mandaic },
- { 682, PT_SC, ucp_Manichaean },
- { 693, PT_PC, ucp_Mc },
- { 696, PT_PC, ucp_Me },
- { 699, PT_SC, ucp_Meetei_Mayek },
- { 712, PT_SC, ucp_Mende_Kikakui },
- { 726, PT_SC, ucp_Meroitic_Cursive },
- { 743, PT_SC, ucp_Meroitic_Hieroglyphs },
- { 764, PT_SC, ucp_Miao },
- { 769, PT_PC, ucp_Mn },
- { 772, PT_SC, ucp_Modi },
- { 777, PT_SC, ucp_Mongolian },
- { 787, PT_SC, ucp_Mro },
- { 791, PT_SC, ucp_Multani },
- { 799, PT_SC, ucp_Myanmar },
- { 807, PT_GC, ucp_N },
- { 809, PT_SC, ucp_Nabataean },
- { 819, PT_PC, ucp_Nd },
- { 822, PT_SC, ucp_New_Tai_Lue },
- { 834, PT_SC, ucp_Nko },
- { 838, PT_PC, ucp_Nl },
- { 841, PT_PC, ucp_No },
- { 844, PT_SC, ucp_Ogham },
- { 850, PT_SC, ucp_Ol_Chiki },
- { 859, PT_SC, ucp_Old_Hungarian },
- { 873, PT_SC, ucp_Old_Italic },
- { 884, PT_SC, ucp_Old_North_Arabian },
- { 902, PT_SC, ucp_Old_Permic },
- { 913, PT_SC, ucp_Old_Persian },
- { 925, PT_SC, ucp_Old_South_Arabian },
- { 943, PT_SC, ucp_Old_Turkic },
- { 954, PT_SC, ucp_Oriya },
- { 960, PT_SC, ucp_Osmanya },
- { 968, PT_GC, ucp_P },
- { 970, PT_SC, ucp_Pahawh_Hmong },
- { 983, PT_SC, ucp_Palmyrene },
- { 993, PT_SC, ucp_Pau_Cin_Hau },
- { 1005, PT_PC, ucp_Pc },
- { 1008, PT_PC, ucp_Pd },
- { 1011, PT_PC, ucp_Pe },
- { 1014, PT_PC, ucp_Pf },
- { 1017, PT_SC, ucp_Phags_Pa },
- { 1026, PT_SC, ucp_Phoenician },
- { 1037, PT_PC, ucp_Pi },
- { 1040, PT_PC, ucp_Po },
- { 1043, PT_PC, ucp_Ps },
- { 1046, PT_SC, ucp_Psalter_Pahlavi },
- { 1062, PT_SC, ucp_Rejang },
- { 1069, PT_SC, ucp_Runic },
- { 1075, PT_GC, ucp_S },
- { 1077, PT_SC, ucp_Samaritan },
- { 1087, PT_SC, ucp_Saurashtra },
- { 1098, PT_PC, ucp_Sc },
- { 1101, PT_SC, ucp_Sharada },
- { 1109, PT_SC, ucp_Shavian },
- { 1117, PT_SC, ucp_Siddham },
- { 1125, PT_SC, ucp_SignWriting },
- { 1137, PT_SC, ucp_Sinhala },
- { 1145, PT_PC, ucp_Sk },
- { 1148, PT_PC, ucp_Sm },
- { 1151, PT_PC, ucp_So },
- { 1154, PT_SC, ucp_Sora_Sompeng },
- { 1167, PT_SC, ucp_Sundanese },
- { 1177, PT_SC, ucp_Syloti_Nagri },
- { 1190, PT_SC, ucp_Syriac },
- { 1197, PT_SC, ucp_Tagalog },
- { 1205, PT_SC, ucp_Tagbanwa },
- { 1214, PT_SC, ucp_Tai_Le },
- { 1221, PT_SC, ucp_Tai_Tham },
- { 1230, PT_SC, ucp_Tai_Viet },
- { 1239, PT_SC, ucp_Takri },
- { 1245, PT_SC, ucp_Tamil },
- { 1251, PT_SC, ucp_Telugu },
- { 1258, PT_SC, ucp_Thaana },
- { 1265, PT_SC, ucp_Thai },
- { 1270, PT_SC, ucp_Tibetan },
- { 1278, PT_SC, ucp_Tifinagh },
- { 1287, PT_SC, ucp_Tirhuta },
- { 1295, PT_SC, ucp_Ugaritic },
- { 1304, PT_SC, ucp_Vai },
- { 1308, PT_SC, ucp_Warang_Citi },
- { 1320, PT_ALNUM, 0 },
- { 1324, PT_PXSPACE, 0 },
- { 1328, PT_SPACE, 0 },
- { 1332, PT_UCNC, 0 },
- { 1336, PT_WORD, 0 },
- { 1340, PT_SC, ucp_Yi },
- { 1343, PT_GC, ucp_Z },
- { 1345, PT_PC, ucp_Zl },
- { 1348, PT_PC, ucp_Zp },
- { 1351, PT_PC, ucp_Zs }
+ { 0, PT_SC, ucp_Adlam },
+ { 6, PT_SC, ucp_Ahom },
+ { 11, PT_SC, ucp_Anatolian_Hieroglyphs },
+ { 33, PT_ANY, 0 },
+ { 37, PT_SC, ucp_Arabic },
+ { 44, PT_SC, ucp_Armenian },
+ { 53, PT_SC, ucp_Avestan },
+ { 61, PT_SC, ucp_Balinese },
+ { 70, PT_SC, ucp_Bamum },
+ { 76, PT_SC, ucp_Bassa_Vah },
+ { 86, PT_SC, ucp_Batak },
+ { 92, PT_SC, ucp_Bengali },
+ { 100, PT_SC, ucp_Bhaiksuki },
+ { 110, PT_SC, ucp_Bopomofo },
+ { 119, PT_SC, ucp_Brahmi },
+ { 126, PT_SC, ucp_Braille },
+ { 134, PT_SC, ucp_Buginese },
+ { 143, PT_SC, ucp_Buhid },
+ { 149, PT_GC, ucp_C },
+ { 151, PT_SC, ucp_Canadian_Aboriginal },
+ { 171, PT_SC, ucp_Carian },
+ { 178, PT_SC, ucp_Caucasian_Albanian },
+ { 197, PT_PC, ucp_Cc },
+ { 200, PT_PC, ucp_Cf },
+ { 203, PT_SC, ucp_Chakma },
+ { 210, PT_SC, ucp_Cham },
+ { 215, PT_SC, ucp_Cherokee },
+ { 224, PT_PC, ucp_Cn },
+ { 227, PT_PC, ucp_Co },
+ { 230, PT_SC, ucp_Common },
+ { 237, PT_SC, ucp_Coptic },
+ { 244, PT_PC, ucp_Cs },
+ { 247, PT_SC, ucp_Cuneiform },
+ { 257, PT_SC, ucp_Cypriot },
+ { 265, PT_SC, ucp_Cyrillic },
+ { 274, PT_SC, ucp_Deseret },
+ { 282, PT_SC, ucp_Devanagari },
+ { 293, PT_SC, ucp_Duployan },
+ { 302, PT_SC, ucp_Egyptian_Hieroglyphs },
+ { 323, PT_SC, ucp_Elbasan },
+ { 331, PT_SC, ucp_Ethiopic },
+ { 340, PT_SC, ucp_Georgian },
+ { 349, PT_SC, ucp_Glagolitic },
+ { 360, PT_SC, ucp_Gothic },
+ { 367, PT_SC, ucp_Grantha },
+ { 375, PT_SC, ucp_Greek },
+ { 381, PT_SC, ucp_Gujarati },
+ { 390, PT_SC, ucp_Gurmukhi },
+ { 399, PT_SC, ucp_Han },
+ { 403, PT_SC, ucp_Hangul },
+ { 410, PT_SC, ucp_Hanunoo },
+ { 418, PT_SC, ucp_Hatran },
+ { 425, PT_SC, ucp_Hebrew },
+ { 432, PT_SC, ucp_Hiragana },
+ { 441, PT_SC, ucp_Imperial_Aramaic },
+ { 458, PT_SC, ucp_Inherited },
+ { 468, PT_SC, ucp_Inscriptional_Pahlavi },
+ { 490, PT_SC, ucp_Inscriptional_Parthian },
+ { 513, PT_SC, ucp_Javanese },
+ { 522, PT_SC, ucp_Kaithi },
+ { 529, PT_SC, ucp_Kannada },
+ { 537, PT_SC, ucp_Katakana },
+ { 546, PT_SC, ucp_Kayah_Li },
+ { 555, PT_SC, ucp_Kharoshthi },
+ { 566, PT_SC, ucp_Khmer },
+ { 572, PT_SC, ucp_Khojki },
+ { 579, PT_SC, ucp_Khudawadi },
+ { 589, PT_GC, ucp_L },
+ { 591, PT_LAMP, 0 },
+ { 594, PT_SC, ucp_Lao },
+ { 598, PT_SC, ucp_Latin },
+ { 604, PT_SC, ucp_Lepcha },
+ { 611, PT_SC, ucp_Limbu },
+ { 617, PT_SC, ucp_Linear_A },
+ { 626, PT_SC, ucp_Linear_B },
+ { 635, PT_SC, ucp_Lisu },
+ { 640, PT_PC, ucp_Ll },
+ { 643, PT_PC, ucp_Lm },
+ { 646, PT_PC, ucp_Lo },
+ { 649, PT_PC, ucp_Lt },
+ { 652, PT_PC, ucp_Lu },
+ { 655, PT_SC, ucp_Lycian },
+ { 662, PT_SC, ucp_Lydian },
+ { 669, PT_GC, ucp_M },
+ { 671, PT_SC, ucp_Mahajani },
+ { 680, PT_SC, ucp_Malayalam },
+ { 690, PT_SC, ucp_Mandaic },
+ { 698, PT_SC, ucp_Manichaean },
+ { 709, PT_SC, ucp_Marchen },
+ { 717, PT_SC, ucp_Masaram_Gondi },
+ { 731, PT_PC, ucp_Mc },
+ { 734, PT_PC, ucp_Me },
+ { 737, PT_SC, ucp_Meetei_Mayek },
+ { 750, PT_SC, ucp_Mende_Kikakui },
+ { 764, PT_SC, ucp_Meroitic_Cursive },
+ { 781, PT_SC, ucp_Meroitic_Hieroglyphs },
+ { 802, PT_SC, ucp_Miao },
+ { 807, PT_PC, ucp_Mn },
+ { 810, PT_SC, ucp_Modi },
+ { 815, PT_SC, ucp_Mongolian },
+ { 825, PT_SC, ucp_Mro },
+ { 829, PT_SC, ucp_Multani },
+ { 837, PT_SC, ucp_Myanmar },
+ { 845, PT_GC, ucp_N },
+ { 847, PT_SC, ucp_Nabataean },
+ { 857, PT_PC, ucp_Nd },
+ { 860, PT_SC, ucp_New_Tai_Lue },
+ { 872, PT_SC, ucp_Newa },
+ { 877, PT_SC, ucp_Nko },
+ { 881, PT_PC, ucp_Nl },
+ { 884, PT_PC, ucp_No },
+ { 887, PT_SC, ucp_Nushu },
+ { 893, PT_SC, ucp_Ogham },
+ { 899, PT_SC, ucp_Ol_Chiki },
+ { 908, PT_SC, ucp_Old_Hungarian },
+ { 922, PT_SC, ucp_Old_Italic },
+ { 933, PT_SC, ucp_Old_North_Arabian },
+ { 951, PT_SC, ucp_Old_Permic },
+ { 962, PT_SC, ucp_Old_Persian },
+ { 974, PT_SC, ucp_Old_South_Arabian },
+ { 992, PT_SC, ucp_Old_Turkic },
+ { 1003, PT_SC, ucp_Oriya },
+ { 1009, PT_SC, ucp_Osage },
+ { 1015, PT_SC, ucp_Osmanya },
+ { 1023, PT_GC, ucp_P },
+ { 1025, PT_SC, ucp_Pahawh_Hmong },
+ { 1038, PT_SC, ucp_Palmyrene },
+ { 1048, PT_SC, ucp_Pau_Cin_Hau },
+ { 1060, PT_PC, ucp_Pc },
+ { 1063, PT_PC, ucp_Pd },
+ { 1066, PT_PC, ucp_Pe },
+ { 1069, PT_PC, ucp_Pf },
+ { 1072, PT_SC, ucp_Phags_Pa },
+ { 1081, PT_SC, ucp_Phoenician },
+ { 1092, PT_PC, ucp_Pi },
+ { 1095, PT_PC, ucp_Po },
+ { 1098, PT_PC, ucp_Ps },
+ { 1101, PT_SC, ucp_Psalter_Pahlavi },
+ { 1117, PT_SC, ucp_Rejang },
+ { 1124, PT_SC, ucp_Runic },
+ { 1130, PT_GC, ucp_S },
+ { 1132, PT_SC, ucp_Samaritan },
+ { 1142, PT_SC, ucp_Saurashtra },
+ { 1153, PT_PC, ucp_Sc },
+ { 1156, PT_SC, ucp_Sharada },
+ { 1164, PT_SC, ucp_Shavian },
+ { 1172, PT_SC, ucp_Siddham },
+ { 1180, PT_SC, ucp_SignWriting },
+ { 1192, PT_SC, ucp_Sinhala },
+ { 1200, PT_PC, ucp_Sk },
+ { 1203, PT_PC, ucp_Sm },
+ { 1206, PT_PC, ucp_So },
+ { 1209, PT_SC, ucp_Sora_Sompeng },
+ { 1222, PT_SC, ucp_Soyombo },
+ { 1230, PT_SC, ucp_Sundanese },
+ { 1240, PT_SC, ucp_Syloti_Nagri },
+ { 1253, PT_SC, ucp_Syriac },
+ { 1260, PT_SC, ucp_Tagalog },
+ { 1268, PT_SC, ucp_Tagbanwa },
+ { 1277, PT_SC, ucp_Tai_Le },
+ { 1284, PT_SC, ucp_Tai_Tham },
+ { 1293, PT_SC, ucp_Tai_Viet },
+ { 1302, PT_SC, ucp_Takri },
+ { 1308, PT_SC, ucp_Tamil },
+ { 1314, PT_SC, ucp_Tangut },
+ { 1321, PT_SC, ucp_Telugu },
+ { 1328, PT_SC, ucp_Thaana },
+ { 1335, PT_SC, ucp_Thai },
+ { 1340, PT_SC, ucp_Tibetan },
+ { 1348, PT_SC, ucp_Tifinagh },
+ { 1357, PT_SC, ucp_Tirhuta },
+ { 1365, PT_SC, ucp_Ugaritic },
+ { 1374, PT_SC, ucp_Vai },
+ { 1378, PT_SC, ucp_Warang_Citi },
+ { 1390, PT_ALNUM, 0 },
+ { 1394, PT_PXSPACE, 0 },
+ { 1398, PT_SPACE, 0 },
+ { 1402, PT_UCNC, 0 },
+ { 1406, PT_WORD, 0 },
+ { 1410, PT_SC, ucp_Yi },
+ { 1413, PT_GC, ucp_Z },
+ { 1415, PT_SC, ucp_Zanabazar_Square },
+ { 1432, PT_PC, ucp_Zl },
+ { 1435, PT_PC, ucp_Zp },
+ { 1438, PT_PC, ucp_Zs }
};
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
diff --git a/src/3rdparty/pcre2/src/pcre2_ucd.c b/src/3rdparty/pcre2/src/pcre2_ucd.c
index 116f537b38..ac7649b99e 100644
--- a/src/3rdparty/pcre2/src/pcre2_ucd.c
+++ b/src/3rdparty/pcre2/src/pcre2_ucd.c
@@ -20,7 +20,7 @@ needed. */
/* Unicode character database. */
/* This file was autogenerated by the MultiStage2.py script. */
-/* Total size: 75072 bytes, block size: 128. */
+/* Total size: 80808 bytes, block size: 128. */
/* The tables herein are needed only when UCP support is built,
and in PCRE2 that happens automatically with UTF support.
@@ -39,7 +39,21 @@ const uint16_t PRIV(ucd_stage2)[] = {0};
const uint32_t PRIV(ucd_caseless_sets)[] = {0};
#else
-const char *PRIV(unicode_version) = "8.0.0";
+const char *PRIV(unicode_version) = "10.0.0";
+
+/* If the 32-bit library is run in non-32-bit mode, character values
+greater than 0x10ffff may be encountered. For these we set up a
+special record. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+const ucd_record PRIV(dummy_ucd_record)[] = {{
+ ucp_Common, /* script */
+ ucp_Cn, /* type unassigned */
+ ucp_gbOther, /* grapheme break property */
+ 0, /* case set */
+ 0, /* other case */
+ }};
+#endif
/* When recompiling tables with a new Unicode version, please check the
types in this structure definition from pcre2_internal.h (the actual
@@ -72,17 +86,25 @@ const uint32_t PRIV(ucd_caseless_sets)[] = {
0x039a, 0x03ba, 0x03f0, NOTACHAR,
0x03a1, 0x03c1, 0x03f1, NOTACHAR,
0x0395, 0x03b5, 0x03f5, NOTACHAR,
+ 0x0412, 0x0432, 0x1c80, NOTACHAR,
+ 0x0414, 0x0434, 0x1c81, NOTACHAR,
+ 0x041e, 0x043e, 0x1c82, NOTACHAR,
+ 0x0421, 0x0441, 0x1c83, NOTACHAR,
+ 0x0422, 0x0442, 0x1c84, 0x1c85, NOTACHAR,
+ 0x042a, 0x044a, 0x1c86, NOTACHAR,
+ 0x0462, 0x0463, 0x1c87, NOTACHAR,
0x1e60, 0x1e61, 0x1e9b, NOTACHAR,
0x03a9, 0x03c9, 0x2126, NOTACHAR,
0x004b, 0x006b, 0x212a, NOTACHAR,
0x00c5, 0x00e5, 0x212b, NOTACHAR,
+ 0x1c88, 0xa64a, 0xa64b, NOTACHAR,
};
/* When #included in pcre2test, we don't need this large table. */
#ifndef PCRE2_PCRE2TEST
-const ucd_record PRIV(ucd_records)[] = { /* 5952 bytes, record size 8 */
+const ucd_record PRIV(ucd_records)[] = { /* 6568 bytes, record size 8 */
{ 9, 0, 2, 0, 0, }, /* 0 */
{ 9, 0, 1, 0, 0, }, /* 1 */
{ 9, 0, 0, 0, 0, }, /* 2 */
@@ -95,12 +117,12 @@ const ucd_record PRIV(ucd_records)[] = { /* 5952 bytes, record size 8 */
{ 9, 17, 12, 0, 0, }, /* 9 */
{ 9, 13, 12, 0, 0, }, /* 10 */
{ 33, 9, 12, 0, 32, }, /* 11 */
- { 33, 9, 12, 71, 32, }, /* 12 */
+ { 33, 9, 12, 100, 32, }, /* 12 */
{ 33, 9, 12, 1, 32, }, /* 13 */
{ 9, 24, 12, 0, 0, }, /* 14 */
{ 9, 16, 12, 0, 0, }, /* 15 */
{ 33, 5, 12, 0, -32, }, /* 16 */
- { 33, 5, 12, 71, -32, }, /* 17 */
+ { 33, 5, 12, 100, -32, }, /* 17 */
{ 33, 5, 12, 1, -32, }, /* 18 */
{ 9, 26, 12, 0, 0, }, /* 19 */
{ 33, 7, 12, 0, 0, }, /* 20 */
@@ -109,9 +131,9 @@ const ucd_record PRIV(ucd_records)[] = { /* 5952 bytes, record size 8 */
{ 9, 15, 12, 0, 0, }, /* 23 */
{ 9, 5, 12, 26, 775, }, /* 24 */
{ 9, 19, 12, 0, 0, }, /* 25 */
- { 33, 9, 12, 75, 32, }, /* 26 */
+ { 33, 9, 12, 104, 32, }, /* 26 */
{ 33, 5, 12, 0, 7615, }, /* 27 */
- { 33, 5, 12, 75, -32, }, /* 28 */
+ { 33, 5, 12, 104, -32, }, /* 28 */
{ 33, 5, 12, 0, 121, }, /* 29 */
{ 33, 9, 12, 0, 1, }, /* 30 */
{ 33, 5, 12, 0, -1, }, /* 31 */
@@ -218,7 +240,7 @@ const ucd_record PRIV(ucd_records)[] = { /* 5952 bytes, record size 8 */
{ 19, 9, 12, 55, 32, }, /* 132 */
{ 19, 9, 12, 30, 32, }, /* 133 */
{ 19, 9, 12, 43, 32, }, /* 134 */
- { 19, 9, 12, 67, 32, }, /* 135 */
+ { 19, 9, 12, 96, 32, }, /* 135 */
{ 19, 5, 12, 0, -38, }, /* 136 */
{ 19, 5, 12, 0, -37, }, /* 137 */
{ 19, 5, 12, 0, -32, }, /* 138 */
@@ -233,7 +255,7 @@ const ucd_record PRIV(ucd_records)[] = { /* 5952 bytes, record size 8 */
{ 19, 5, 12, 30, 1, }, /* 147 */
{ 19, 5, 12, 30, -32, }, /* 148 */
{ 19, 5, 12, 43, -32, }, /* 149 */
- { 19, 5, 12, 67, -32, }, /* 150 */
+ { 19, 5, 12, 96, -32, }, /* 150 */
{ 19, 5, 12, 0, -64, }, /* 151 */
{ 19, 5, 12, 0, -63, }, /* 152 */
{ 19, 9, 12, 0, 8, }, /* 153 */
@@ -256,577 +278,654 @@ const ucd_record PRIV(ucd_records)[] = { /* 5952 bytes, record size 8 */
{ 19, 9, 12, 0, -130, }, /* 170 */
{ 12, 9, 12, 0, 80, }, /* 171 */
{ 12, 9, 12, 0, 32, }, /* 172 */
- { 12, 5, 12, 0, -32, }, /* 173 */
- { 12, 5, 12, 0, -80, }, /* 174 */
- { 12, 9, 12, 0, 1, }, /* 175 */
- { 12, 5, 12, 0, -1, }, /* 176 */
- { 12, 26, 12, 0, 0, }, /* 177 */
- { 12, 12, 3, 0, 0, }, /* 178 */
- { 12, 11, 3, 0, 0, }, /* 179 */
- { 12, 9, 12, 0, 15, }, /* 180 */
- { 12, 5, 12, 0, -15, }, /* 181 */
- { 1, 9, 12, 0, 48, }, /* 182 */
- { 1, 6, 12, 0, 0, }, /* 183 */
- { 1, 21, 12, 0, 0, }, /* 184 */
- { 1, 5, 12, 0, -48, }, /* 185 */
- { 1, 5, 12, 0, 0, }, /* 186 */
- { 1, 17, 12, 0, 0, }, /* 187 */
- { 1, 26, 12, 0, 0, }, /* 188 */
- { 1, 23, 12, 0, 0, }, /* 189 */
- { 25, 12, 3, 0, 0, }, /* 190 */
- { 25, 17, 12, 0, 0, }, /* 191 */
- { 25, 21, 12, 0, 0, }, /* 192 */
- { 25, 7, 12, 0, 0, }, /* 193 */
- { 0, 1, 2, 0, 0, }, /* 194 */
- { 0, 25, 12, 0, 0, }, /* 195 */
- { 0, 21, 12, 0, 0, }, /* 196 */
- { 0, 23, 12, 0, 0, }, /* 197 */
- { 0, 26, 12, 0, 0, }, /* 198 */
- { 0, 12, 3, 0, 0, }, /* 199 */
- { 0, 7, 12, 0, 0, }, /* 200 */
- { 0, 13, 12, 0, 0, }, /* 201 */
- { 0, 6, 12, 0, 0, }, /* 202 */
- { 49, 21, 12, 0, 0, }, /* 203 */
- { 49, 1, 2, 0, 0, }, /* 204 */
- { 49, 7, 12, 0, 0, }, /* 205 */
- { 49, 12, 3, 0, 0, }, /* 206 */
- { 55, 7, 12, 0, 0, }, /* 207 */
- { 55, 12, 3, 0, 0, }, /* 208 */
- { 63, 13, 12, 0, 0, }, /* 209 */
- { 63, 7, 12, 0, 0, }, /* 210 */
- { 63, 12, 3, 0, 0, }, /* 211 */
- { 63, 6, 12, 0, 0, }, /* 212 */
- { 63, 26, 12, 0, 0, }, /* 213 */
- { 63, 21, 12, 0, 0, }, /* 214 */
- { 89, 7, 12, 0, 0, }, /* 215 */
- { 89, 12, 3, 0, 0, }, /* 216 */
- { 89, 6, 12, 0, 0, }, /* 217 */
- { 89, 21, 12, 0, 0, }, /* 218 */
- { 94, 7, 12, 0, 0, }, /* 219 */
- { 94, 12, 3, 0, 0, }, /* 220 */
- { 94, 21, 12, 0, 0, }, /* 221 */
- { 14, 12, 3, 0, 0, }, /* 222 */
- { 14, 10, 5, 0, 0, }, /* 223 */
- { 14, 7, 12, 0, 0, }, /* 224 */
- { 14, 13, 12, 0, 0, }, /* 225 */
- { 14, 21, 12, 0, 0, }, /* 226 */
- { 14, 6, 12, 0, 0, }, /* 227 */
- { 2, 7, 12, 0, 0, }, /* 228 */
- { 2, 12, 3, 0, 0, }, /* 229 */
- { 2, 10, 5, 0, 0, }, /* 230 */
- { 2, 10, 3, 0, 0, }, /* 231 */
- { 2, 13, 12, 0, 0, }, /* 232 */
- { 2, 23, 12, 0, 0, }, /* 233 */
- { 2, 15, 12, 0, 0, }, /* 234 */
- { 2, 26, 12, 0, 0, }, /* 235 */
- { 21, 12, 3, 0, 0, }, /* 236 */
- { 21, 10, 5, 0, 0, }, /* 237 */
- { 21, 7, 12, 0, 0, }, /* 238 */
- { 21, 13, 12, 0, 0, }, /* 239 */
- { 20, 12, 3, 0, 0, }, /* 240 */
- { 20, 10, 5, 0, 0, }, /* 241 */
- { 20, 7, 12, 0, 0, }, /* 242 */
- { 20, 13, 12, 0, 0, }, /* 243 */
- { 20, 21, 12, 0, 0, }, /* 244 */
- { 20, 23, 12, 0, 0, }, /* 245 */
- { 43, 12, 3, 0, 0, }, /* 246 */
- { 43, 10, 5, 0, 0, }, /* 247 */
- { 43, 7, 12, 0, 0, }, /* 248 */
- { 43, 10, 3, 0, 0, }, /* 249 */
- { 43, 13, 12, 0, 0, }, /* 250 */
- { 43, 26, 12, 0, 0, }, /* 251 */
- { 43, 15, 12, 0, 0, }, /* 252 */
- { 53, 12, 3, 0, 0, }, /* 253 */
- { 53, 7, 12, 0, 0, }, /* 254 */
- { 53, 10, 3, 0, 0, }, /* 255 */
- { 53, 10, 5, 0, 0, }, /* 256 */
- { 53, 13, 12, 0, 0, }, /* 257 */
- { 53, 15, 12, 0, 0, }, /* 258 */
- { 53, 26, 12, 0, 0, }, /* 259 */
- { 53, 23, 12, 0, 0, }, /* 260 */
- { 54, 12, 3, 0, 0, }, /* 261 */
- { 54, 10, 5, 0, 0, }, /* 262 */
- { 54, 7, 12, 0, 0, }, /* 263 */
- { 54, 13, 12, 0, 0, }, /* 264 */
- { 54, 15, 12, 0, 0, }, /* 265 */
- { 54, 26, 12, 0, 0, }, /* 266 */
- { 28, 12, 3, 0, 0, }, /* 267 */
- { 28, 10, 5, 0, 0, }, /* 268 */
- { 28, 7, 12, 0, 0, }, /* 269 */
- { 28, 10, 3, 0, 0, }, /* 270 */
- { 28, 13, 12, 0, 0, }, /* 271 */
- { 36, 12, 3, 0, 0, }, /* 272 */
- { 36, 10, 5, 0, 0, }, /* 273 */
- { 36, 7, 12, 0, 0, }, /* 274 */
- { 36, 10, 3, 0, 0, }, /* 275 */
- { 36, 13, 12, 0, 0, }, /* 276 */
- { 36, 15, 12, 0, 0, }, /* 277 */
- { 36, 26, 12, 0, 0, }, /* 278 */
- { 47, 10, 5, 0, 0, }, /* 279 */
- { 47, 7, 12, 0, 0, }, /* 280 */
- { 47, 12, 3, 0, 0, }, /* 281 */
- { 47, 10, 3, 0, 0, }, /* 282 */
- { 47, 13, 12, 0, 0, }, /* 283 */
- { 47, 21, 12, 0, 0, }, /* 284 */
- { 56, 7, 12, 0, 0, }, /* 285 */
- { 56, 12, 3, 0, 0, }, /* 286 */
- { 56, 7, 5, 0, 0, }, /* 287 */
- { 56, 6, 12, 0, 0, }, /* 288 */
- { 56, 21, 12, 0, 0, }, /* 289 */
- { 56, 13, 12, 0, 0, }, /* 290 */
- { 32, 7, 12, 0, 0, }, /* 291 */
- { 32, 12, 3, 0, 0, }, /* 292 */
- { 32, 7, 5, 0, 0, }, /* 293 */
- { 32, 6, 12, 0, 0, }, /* 294 */
- { 32, 13, 12, 0, 0, }, /* 295 */
- { 57, 7, 12, 0, 0, }, /* 296 */
- { 57, 26, 12, 0, 0, }, /* 297 */
- { 57, 21, 12, 0, 0, }, /* 298 */
- { 57, 12, 3, 0, 0, }, /* 299 */
- { 57, 13, 12, 0, 0, }, /* 300 */
- { 57, 15, 12, 0, 0, }, /* 301 */
- { 57, 22, 12, 0, 0, }, /* 302 */
- { 57, 18, 12, 0, 0, }, /* 303 */
- { 57, 10, 5, 0, 0, }, /* 304 */
- { 38, 7, 12, 0, 0, }, /* 305 */
- { 38, 10, 12, 0, 0, }, /* 306 */
- { 38, 12, 3, 0, 0, }, /* 307 */
- { 38, 10, 5, 0, 0, }, /* 308 */
- { 38, 13, 12, 0, 0, }, /* 309 */
- { 38, 21, 12, 0, 0, }, /* 310 */
- { 38, 26, 12, 0, 0, }, /* 311 */
- { 16, 9, 12, 0, 7264, }, /* 312 */
- { 16, 7, 12, 0, 0, }, /* 313 */
- { 16, 6, 12, 0, 0, }, /* 314 */
- { 23, 7, 6, 0, 0, }, /* 315 */
- { 23, 7, 7, 0, 0, }, /* 316 */
- { 23, 7, 8, 0, 0, }, /* 317 */
- { 15, 7, 12, 0, 0, }, /* 318 */
- { 15, 12, 3, 0, 0, }, /* 319 */
- { 15, 21, 12, 0, 0, }, /* 320 */
- { 15, 15, 12, 0, 0, }, /* 321 */
- { 15, 26, 12, 0, 0, }, /* 322 */
- { 8, 9, 12, 0, 38864, }, /* 323 */
- { 8, 9, 12, 0, 8, }, /* 324 */
- { 8, 5, 12, 0, -8, }, /* 325 */
- { 7, 17, 12, 0, 0, }, /* 326 */
- { 7, 7, 12, 0, 0, }, /* 327 */
- { 7, 21, 12, 0, 0, }, /* 328 */
- { 40, 29, 12, 0, 0, }, /* 329 */
- { 40, 7, 12, 0, 0, }, /* 330 */
- { 40, 22, 12, 0, 0, }, /* 331 */
- { 40, 18, 12, 0, 0, }, /* 332 */
- { 45, 7, 12, 0, 0, }, /* 333 */
- { 45, 14, 12, 0, 0, }, /* 334 */
- { 50, 7, 12, 0, 0, }, /* 335 */
- { 50, 12, 3, 0, 0, }, /* 336 */
- { 24, 7, 12, 0, 0, }, /* 337 */
- { 24, 12, 3, 0, 0, }, /* 338 */
- { 6, 7, 12, 0, 0, }, /* 339 */
- { 6, 12, 3, 0, 0, }, /* 340 */
- { 51, 7, 12, 0, 0, }, /* 341 */
- { 51, 12, 3, 0, 0, }, /* 342 */
- { 31, 7, 12, 0, 0, }, /* 343 */
- { 31, 12, 3, 0, 0, }, /* 344 */
- { 31, 10, 5, 0, 0, }, /* 345 */
- { 31, 21, 12, 0, 0, }, /* 346 */
- { 31, 6, 12, 0, 0, }, /* 347 */
- { 31, 23, 12, 0, 0, }, /* 348 */
- { 31, 13, 12, 0, 0, }, /* 349 */
- { 31, 15, 12, 0, 0, }, /* 350 */
- { 37, 21, 12, 0, 0, }, /* 351 */
- { 37, 17, 12, 0, 0, }, /* 352 */
- { 37, 12, 3, 0, 0, }, /* 353 */
- { 37, 1, 2, 0, 0, }, /* 354 */
- { 37, 13, 12, 0, 0, }, /* 355 */
- { 37, 7, 12, 0, 0, }, /* 356 */
- { 37, 6, 12, 0, 0, }, /* 357 */
- { 34, 7, 12, 0, 0, }, /* 358 */
- { 34, 12, 3, 0, 0, }, /* 359 */
- { 34, 10, 5, 0, 0, }, /* 360 */
- { 34, 26, 12, 0, 0, }, /* 361 */
- { 34, 21, 12, 0, 0, }, /* 362 */
- { 34, 13, 12, 0, 0, }, /* 363 */
- { 52, 7, 12, 0, 0, }, /* 364 */
- { 39, 7, 12, 0, 0, }, /* 365 */
- { 39, 13, 12, 0, 0, }, /* 366 */
- { 39, 15, 12, 0, 0, }, /* 367 */
- { 39, 26, 12, 0, 0, }, /* 368 */
- { 31, 26, 12, 0, 0, }, /* 369 */
- { 5, 7, 12, 0, 0, }, /* 370 */
- { 5, 12, 3, 0, 0, }, /* 371 */
- { 5, 10, 5, 0, 0, }, /* 372 */
- { 5, 21, 12, 0, 0, }, /* 373 */
- { 90, 7, 12, 0, 0, }, /* 374 */
- { 90, 10, 5, 0, 0, }, /* 375 */
- { 90, 12, 3, 0, 0, }, /* 376 */
- { 90, 10, 12, 0, 0, }, /* 377 */
- { 90, 13, 12, 0, 0, }, /* 378 */
- { 90, 21, 12, 0, 0, }, /* 379 */
- { 90, 6, 12, 0, 0, }, /* 380 */
- { 27, 11, 3, 0, 0, }, /* 381 */
- { 61, 12, 3, 0, 0, }, /* 382 */
- { 61, 10, 5, 0, 0, }, /* 383 */
- { 61, 7, 12, 0, 0, }, /* 384 */
- { 61, 13, 12, 0, 0, }, /* 385 */
- { 61, 21, 12, 0, 0, }, /* 386 */
- { 61, 26, 12, 0, 0, }, /* 387 */
- { 75, 12, 3, 0, 0, }, /* 388 */
- { 75, 10, 5, 0, 0, }, /* 389 */
- { 75, 7, 12, 0, 0, }, /* 390 */
- { 75, 13, 12, 0, 0, }, /* 391 */
- { 92, 7, 12, 0, 0, }, /* 392 */
- { 92, 12, 3, 0, 0, }, /* 393 */
- { 92, 10, 5, 0, 0, }, /* 394 */
- { 92, 21, 12, 0, 0, }, /* 395 */
- { 69, 7, 12, 0, 0, }, /* 396 */
- { 69, 10, 5, 0, 0, }, /* 397 */
- { 69, 12, 3, 0, 0, }, /* 398 */
- { 69, 21, 12, 0, 0, }, /* 399 */
- { 69, 13, 12, 0, 0, }, /* 400 */
- { 72, 13, 12, 0, 0, }, /* 401 */
- { 72, 7, 12, 0, 0, }, /* 402 */
- { 72, 6, 12, 0, 0, }, /* 403 */
- { 72, 21, 12, 0, 0, }, /* 404 */
- { 75, 21, 12, 0, 0, }, /* 405 */
- { 9, 10, 5, 0, 0, }, /* 406 */
- { 9, 7, 12, 0, 0, }, /* 407 */
- { 12, 5, 12, 0, 0, }, /* 408 */
- { 12, 6, 12, 0, 0, }, /* 409 */
- { 33, 5, 12, 0, 35332, }, /* 410 */
- { 33, 5, 12, 0, 3814, }, /* 411 */
- { 33, 9, 12, 63, 1, }, /* 412 */
- { 33, 5, 12, 63, -1, }, /* 413 */
- { 33, 5, 12, 63, -58, }, /* 414 */
- { 33, 9, 12, 0, -7615, }, /* 415 */
- { 19, 5, 12, 0, 8, }, /* 416 */
- { 19, 9, 12, 0, -8, }, /* 417 */
- { 19, 5, 12, 0, 74, }, /* 418 */
- { 19, 5, 12, 0, 86, }, /* 419 */
- { 19, 5, 12, 0, 100, }, /* 420 */
- { 19, 5, 12, 0, 128, }, /* 421 */
- { 19, 5, 12, 0, 112, }, /* 422 */
- { 19, 5, 12, 0, 126, }, /* 423 */
- { 19, 8, 12, 0, -8, }, /* 424 */
- { 19, 5, 12, 0, 9, }, /* 425 */
- { 19, 9, 12, 0, -74, }, /* 426 */
- { 19, 8, 12, 0, -9, }, /* 427 */
- { 19, 5, 12, 21, -7173, }, /* 428 */
- { 19, 9, 12, 0, -86, }, /* 429 */
- { 19, 9, 12, 0, -100, }, /* 430 */
- { 19, 9, 12, 0, -112, }, /* 431 */
- { 19, 9, 12, 0, -128, }, /* 432 */
- { 19, 9, 12, 0, -126, }, /* 433 */
- { 27, 1, 3, 0, 0, }, /* 434 */
- { 9, 27, 2, 0, 0, }, /* 435 */
- { 9, 28, 2, 0, 0, }, /* 436 */
- { 9, 2, 2, 0, 0, }, /* 437 */
- { 9, 9, 12, 0, 0, }, /* 438 */
- { 9, 5, 12, 0, 0, }, /* 439 */
- { 19, 9, 12, 67, -7517, }, /* 440 */
- { 33, 9, 12, 71, -8383, }, /* 441 */
- { 33, 9, 12, 75, -8262, }, /* 442 */
- { 33, 9, 12, 0, 28, }, /* 443 */
- { 33, 5, 12, 0, -28, }, /* 444 */
- { 33, 14, 12, 0, 16, }, /* 445 */
- { 33, 14, 12, 0, -16, }, /* 446 */
- { 33, 14, 12, 0, 0, }, /* 447 */
- { 9, 26, 12, 0, 26, }, /* 448 */
- { 9, 26, 12, 0, -26, }, /* 449 */
- { 4, 26, 12, 0, 0, }, /* 450 */
- { 17, 9, 12, 0, 48, }, /* 451 */
- { 17, 5, 12, 0, -48, }, /* 452 */
- { 33, 9, 12, 0, -10743, }, /* 453 */
- { 33, 9, 12, 0, -3814, }, /* 454 */
- { 33, 9, 12, 0, -10727, }, /* 455 */
- { 33, 5, 12, 0, -10795, }, /* 456 */
- { 33, 5, 12, 0, -10792, }, /* 457 */
- { 33, 9, 12, 0, -10780, }, /* 458 */
- { 33, 9, 12, 0, -10749, }, /* 459 */
- { 33, 9, 12, 0, -10783, }, /* 460 */
- { 33, 9, 12, 0, -10782, }, /* 461 */
- { 33, 9, 12, 0, -10815, }, /* 462 */
- { 10, 5, 12, 0, 0, }, /* 463 */
- { 10, 26, 12, 0, 0, }, /* 464 */
- { 10, 12, 3, 0, 0, }, /* 465 */
- { 10, 21, 12, 0, 0, }, /* 466 */
- { 10, 15, 12, 0, 0, }, /* 467 */
- { 16, 5, 12, 0, -7264, }, /* 468 */
- { 58, 7, 12, 0, 0, }, /* 469 */
- { 58, 6, 12, 0, 0, }, /* 470 */
- { 58, 21, 12, 0, 0, }, /* 471 */
- { 58, 12, 3, 0, 0, }, /* 472 */
- { 22, 26, 12, 0, 0, }, /* 473 */
- { 22, 6, 12, 0, 0, }, /* 474 */
- { 22, 14, 12, 0, 0, }, /* 475 */
- { 23, 10, 3, 0, 0, }, /* 476 */
- { 26, 7, 12, 0, 0, }, /* 477 */
- { 26, 6, 12, 0, 0, }, /* 478 */
- { 29, 7, 12, 0, 0, }, /* 479 */
- { 29, 6, 12, 0, 0, }, /* 480 */
- { 3, 7, 12, 0, 0, }, /* 481 */
- { 23, 7, 12, 0, 0, }, /* 482 */
- { 23, 26, 12, 0, 0, }, /* 483 */
- { 29, 26, 12, 0, 0, }, /* 484 */
- { 22, 7, 12, 0, 0, }, /* 485 */
- { 60, 7, 12, 0, 0, }, /* 486 */
- { 60, 6, 12, 0, 0, }, /* 487 */
- { 60, 26, 12, 0, 0, }, /* 488 */
- { 85, 7, 12, 0, 0, }, /* 489 */
- { 85, 6, 12, 0, 0, }, /* 490 */
- { 85, 21, 12, 0, 0, }, /* 491 */
- { 76, 7, 12, 0, 0, }, /* 492 */
- { 76, 6, 12, 0, 0, }, /* 493 */
- { 76, 21, 12, 0, 0, }, /* 494 */
- { 76, 13, 12, 0, 0, }, /* 495 */
- { 12, 7, 12, 0, 0, }, /* 496 */
- { 12, 21, 12, 0, 0, }, /* 497 */
- { 78, 7, 12, 0, 0, }, /* 498 */
- { 78, 14, 12, 0, 0, }, /* 499 */
- { 78, 12, 3, 0, 0, }, /* 500 */
- { 78, 21, 12, 0, 0, }, /* 501 */
- { 33, 9, 12, 0, -35332, }, /* 502 */
- { 33, 9, 12, 0, -42280, }, /* 503 */
- { 33, 9, 12, 0, -42308, }, /* 504 */
- { 33, 9, 12, 0, -42319, }, /* 505 */
- { 33, 9, 12, 0, -42315, }, /* 506 */
- { 33, 9, 12, 0, -42305, }, /* 507 */
- { 33, 9, 12, 0, -42258, }, /* 508 */
- { 33, 9, 12, 0, -42282, }, /* 509 */
- { 33, 9, 12, 0, -42261, }, /* 510 */
- { 33, 9, 12, 0, 928, }, /* 511 */
- { 48, 7, 12, 0, 0, }, /* 512 */
- { 48, 12, 3, 0, 0, }, /* 513 */
- { 48, 10, 5, 0, 0, }, /* 514 */
- { 48, 26, 12, 0, 0, }, /* 515 */
- { 64, 7, 12, 0, 0, }, /* 516 */
- { 64, 21, 12, 0, 0, }, /* 517 */
- { 74, 10, 5, 0, 0, }, /* 518 */
- { 74, 7, 12, 0, 0, }, /* 519 */
- { 74, 12, 3, 0, 0, }, /* 520 */
- { 74, 21, 12, 0, 0, }, /* 521 */
- { 74, 13, 12, 0, 0, }, /* 522 */
- { 68, 13, 12, 0, 0, }, /* 523 */
- { 68, 7, 12, 0, 0, }, /* 524 */
- { 68, 12, 3, 0, 0, }, /* 525 */
- { 68, 21, 12, 0, 0, }, /* 526 */
- { 73, 7, 12, 0, 0, }, /* 527 */
- { 73, 12, 3, 0, 0, }, /* 528 */
- { 73, 10, 5, 0, 0, }, /* 529 */
- { 73, 21, 12, 0, 0, }, /* 530 */
- { 83, 12, 3, 0, 0, }, /* 531 */
- { 83, 10, 5, 0, 0, }, /* 532 */
- { 83, 7, 12, 0, 0, }, /* 533 */
- { 83, 21, 12, 0, 0, }, /* 534 */
- { 83, 13, 12, 0, 0, }, /* 535 */
- { 38, 6, 12, 0, 0, }, /* 536 */
- { 67, 7, 12, 0, 0, }, /* 537 */
- { 67, 12, 3, 0, 0, }, /* 538 */
- { 67, 10, 5, 0, 0, }, /* 539 */
- { 67, 13, 12, 0, 0, }, /* 540 */
- { 67, 21, 12, 0, 0, }, /* 541 */
- { 91, 7, 12, 0, 0, }, /* 542 */
- { 91, 12, 3, 0, 0, }, /* 543 */
- { 91, 6, 12, 0, 0, }, /* 544 */
- { 91, 21, 12, 0, 0, }, /* 545 */
- { 86, 7, 12, 0, 0, }, /* 546 */
- { 86, 10, 5, 0, 0, }, /* 547 */
- { 86, 12, 3, 0, 0, }, /* 548 */
- { 86, 21, 12, 0, 0, }, /* 549 */
- { 86, 6, 12, 0, 0, }, /* 550 */
- { 33, 5, 12, 0, -928, }, /* 551 */
- { 8, 5, 12, 0, -38864, }, /* 552 */
- { 86, 13, 12, 0, 0, }, /* 553 */
- { 23, 7, 9, 0, 0, }, /* 554 */
- { 23, 7, 10, 0, 0, }, /* 555 */
- { 9, 4, 2, 0, 0, }, /* 556 */
- { 9, 3, 12, 0, 0, }, /* 557 */
- { 25, 25, 12, 0, 0, }, /* 558 */
- { 0, 24, 12, 0, 0, }, /* 559 */
- { 9, 6, 3, 0, 0, }, /* 560 */
- { 35, 7, 12, 0, 0, }, /* 561 */
- { 19, 14, 12, 0, 0, }, /* 562 */
- { 19, 15, 12, 0, 0, }, /* 563 */
- { 19, 26, 12, 0, 0, }, /* 564 */
- { 70, 7, 12, 0, 0, }, /* 565 */
- { 66, 7, 12, 0, 0, }, /* 566 */
- { 41, 7, 12, 0, 0, }, /* 567 */
- { 41, 15, 12, 0, 0, }, /* 568 */
- { 18, 7, 12, 0, 0, }, /* 569 */
- { 18, 14, 12, 0, 0, }, /* 570 */
- { 117, 7, 12, 0, 0, }, /* 571 */
- { 117, 12, 3, 0, 0, }, /* 572 */
- { 59, 7, 12, 0, 0, }, /* 573 */
- { 59, 21, 12, 0, 0, }, /* 574 */
- { 42, 7, 12, 0, 0, }, /* 575 */
- { 42, 21, 12, 0, 0, }, /* 576 */
- { 42, 14, 12, 0, 0, }, /* 577 */
- { 13, 9, 12, 0, 40, }, /* 578 */
- { 13, 5, 12, 0, -40, }, /* 579 */
- { 46, 7, 12, 0, 0, }, /* 580 */
- { 44, 7, 12, 0, 0, }, /* 581 */
- { 44, 13, 12, 0, 0, }, /* 582 */
- { 105, 7, 12, 0, 0, }, /* 583 */
- { 103, 7, 12, 0, 0, }, /* 584 */
- { 103, 21, 12, 0, 0, }, /* 585 */
- { 109, 7, 12, 0, 0, }, /* 586 */
- { 11, 7, 12, 0, 0, }, /* 587 */
- { 80, 7, 12, 0, 0, }, /* 588 */
- { 80, 21, 12, 0, 0, }, /* 589 */
- { 80, 15, 12, 0, 0, }, /* 590 */
- { 119, 7, 12, 0, 0, }, /* 591 */
- { 119, 26, 12, 0, 0, }, /* 592 */
- { 119, 15, 12, 0, 0, }, /* 593 */
- { 115, 7, 12, 0, 0, }, /* 594 */
- { 115, 15, 12, 0, 0, }, /* 595 */
- { 127, 7, 12, 0, 0, }, /* 596 */
- { 127, 15, 12, 0, 0, }, /* 597 */
- { 65, 7, 12, 0, 0, }, /* 598 */
- { 65, 15, 12, 0, 0, }, /* 599 */
- { 65, 21, 12, 0, 0, }, /* 600 */
- { 71, 7, 12, 0, 0, }, /* 601 */
- { 71, 21, 12, 0, 0, }, /* 602 */
- { 97, 7, 12, 0, 0, }, /* 603 */
- { 96, 7, 12, 0, 0, }, /* 604 */
- { 96, 15, 12, 0, 0, }, /* 605 */
- { 30, 7, 12, 0, 0, }, /* 606 */
- { 30, 12, 3, 0, 0, }, /* 607 */
- { 30, 15, 12, 0, 0, }, /* 608 */
- { 30, 21, 12, 0, 0, }, /* 609 */
- { 87, 7, 12, 0, 0, }, /* 610 */
- { 87, 15, 12, 0, 0, }, /* 611 */
- { 87, 21, 12, 0, 0, }, /* 612 */
- { 116, 7, 12, 0, 0, }, /* 613 */
- { 116, 15, 12, 0, 0, }, /* 614 */
- { 111, 7, 12, 0, 0, }, /* 615 */
- { 111, 26, 12, 0, 0, }, /* 616 */
- { 111, 12, 3, 0, 0, }, /* 617 */
- { 111, 15, 12, 0, 0, }, /* 618 */
- { 111, 21, 12, 0, 0, }, /* 619 */
- { 77, 7, 12, 0, 0, }, /* 620 */
- { 77, 21, 12, 0, 0, }, /* 621 */
- { 82, 7, 12, 0, 0, }, /* 622 */
- { 82, 15, 12, 0, 0, }, /* 623 */
- { 81, 7, 12, 0, 0, }, /* 624 */
- { 81, 15, 12, 0, 0, }, /* 625 */
- { 120, 7, 12, 0, 0, }, /* 626 */
- { 120, 21, 12, 0, 0, }, /* 627 */
- { 120, 15, 12, 0, 0, }, /* 628 */
- { 88, 7, 12, 0, 0, }, /* 629 */
- { 129, 9, 12, 0, 64, }, /* 630 */
- { 129, 5, 12, 0, -64, }, /* 631 */
- { 129, 15, 12, 0, 0, }, /* 632 */
- { 0, 15, 12, 0, 0, }, /* 633 */
- { 93, 10, 5, 0, 0, }, /* 634 */
- { 93, 12, 3, 0, 0, }, /* 635 */
- { 93, 7, 12, 0, 0, }, /* 636 */
- { 93, 21, 12, 0, 0, }, /* 637 */
- { 93, 15, 12, 0, 0, }, /* 638 */
- { 93, 13, 12, 0, 0, }, /* 639 */
- { 84, 12, 3, 0, 0, }, /* 640 */
- { 84, 10, 5, 0, 0, }, /* 641 */
- { 84, 7, 12, 0, 0, }, /* 642 */
- { 84, 21, 12, 0, 0, }, /* 643 */
- { 84, 1, 2, 0, 0, }, /* 644 */
- { 100, 7, 12, 0, 0, }, /* 645 */
- { 100, 13, 12, 0, 0, }, /* 646 */
- { 95, 12, 3, 0, 0, }, /* 647 */
- { 95, 7, 12, 0, 0, }, /* 648 */
- { 95, 10, 5, 0, 0, }, /* 649 */
- { 95, 13, 12, 0, 0, }, /* 650 */
- { 95, 21, 12, 0, 0, }, /* 651 */
- { 110, 7, 12, 0, 0, }, /* 652 */
- { 110, 12, 3, 0, 0, }, /* 653 */
- { 110, 21, 12, 0, 0, }, /* 654 */
- { 99, 12, 3, 0, 0, }, /* 655 */
- { 99, 10, 5, 0, 0, }, /* 656 */
- { 99, 7, 12, 0, 0, }, /* 657 */
- { 99, 21, 12, 0, 0, }, /* 658 */
- { 99, 13, 12, 0, 0, }, /* 659 */
- { 47, 15, 12, 0, 0, }, /* 660 */
- { 107, 7, 12, 0, 0, }, /* 661 */
- { 107, 10, 5, 0, 0, }, /* 662 */
- { 107, 12, 3, 0, 0, }, /* 663 */
- { 107, 21, 12, 0, 0, }, /* 664 */
- { 128, 7, 12, 0, 0, }, /* 665 */
- { 128, 21, 12, 0, 0, }, /* 666 */
- { 108, 7, 12, 0, 0, }, /* 667 */
- { 108, 12, 3, 0, 0, }, /* 668 */
- { 108, 10, 5, 0, 0, }, /* 669 */
- { 108, 13, 12, 0, 0, }, /* 670 */
- { 106, 12, 3, 0, 0, }, /* 671 */
- { 106, 10, 5, 0, 0, }, /* 672 */
- { 106, 7, 12, 0, 0, }, /* 673 */
- { 106, 10, 3, 0, 0, }, /* 674 */
- { 123, 7, 12, 0, 0, }, /* 675 */
- { 123, 10, 3, 0, 0, }, /* 676 */
- { 123, 10, 5, 0, 0, }, /* 677 */
- { 123, 12, 3, 0, 0, }, /* 678 */
- { 123, 21, 12, 0, 0, }, /* 679 */
- { 123, 13, 12, 0, 0, }, /* 680 */
- { 122, 7, 12, 0, 0, }, /* 681 */
- { 122, 10, 3, 0, 0, }, /* 682 */
- { 122, 10, 5, 0, 0, }, /* 683 */
- { 122, 12, 3, 0, 0, }, /* 684 */
- { 122, 21, 12, 0, 0, }, /* 685 */
- { 113, 7, 12, 0, 0, }, /* 686 */
- { 113, 10, 5, 0, 0, }, /* 687 */
- { 113, 12, 3, 0, 0, }, /* 688 */
- { 113, 21, 12, 0, 0, }, /* 689 */
- { 113, 13, 12, 0, 0, }, /* 690 */
- { 101, 7, 12, 0, 0, }, /* 691 */
- { 101, 12, 3, 0, 0, }, /* 692 */
- { 101, 10, 5, 0, 0, }, /* 693 */
- { 101, 13, 12, 0, 0, }, /* 694 */
- { 125, 7, 12, 0, 0, }, /* 695 */
- { 125, 12, 3, 0, 0, }, /* 696 */
- { 125, 10, 5, 0, 0, }, /* 697 */
- { 125, 13, 12, 0, 0, }, /* 698 */
- { 125, 15, 12, 0, 0, }, /* 699 */
- { 125, 21, 12, 0, 0, }, /* 700 */
- { 125, 26, 12, 0, 0, }, /* 701 */
- { 124, 9, 12, 0, 32, }, /* 702 */
- { 124, 5, 12, 0, -32, }, /* 703 */
- { 124, 13, 12, 0, 0, }, /* 704 */
- { 124, 15, 12, 0, 0, }, /* 705 */
- { 124, 7, 12, 0, 0, }, /* 706 */
- { 121, 7, 12, 0, 0, }, /* 707 */
- { 62, 7, 12, 0, 0, }, /* 708 */
- { 62, 14, 12, 0, 0, }, /* 709 */
- { 62, 21, 12, 0, 0, }, /* 710 */
- { 79, 7, 12, 0, 0, }, /* 711 */
- { 126, 7, 12, 0, 0, }, /* 712 */
- { 114, 7, 12, 0, 0, }, /* 713 */
- { 114, 13, 12, 0, 0, }, /* 714 */
- { 114, 21, 12, 0, 0, }, /* 715 */
- { 102, 7, 12, 0, 0, }, /* 716 */
- { 102, 12, 3, 0, 0, }, /* 717 */
- { 102, 21, 12, 0, 0, }, /* 718 */
- { 118, 7, 12, 0, 0, }, /* 719 */
- { 118, 12, 3, 0, 0, }, /* 720 */
- { 118, 21, 12, 0, 0, }, /* 721 */
- { 118, 26, 12, 0, 0, }, /* 722 */
- { 118, 6, 12, 0, 0, }, /* 723 */
- { 118, 13, 12, 0, 0, }, /* 724 */
- { 118, 15, 12, 0, 0, }, /* 725 */
- { 98, 7, 12, 0, 0, }, /* 726 */
- { 98, 10, 5, 0, 0, }, /* 727 */
- { 98, 12, 3, 0, 0, }, /* 728 */
- { 98, 6, 12, 0, 0, }, /* 729 */
- { 104, 7, 12, 0, 0, }, /* 730 */
- { 104, 26, 12, 0, 0, }, /* 731 */
- { 104, 12, 3, 0, 0, }, /* 732 */
- { 104, 21, 12, 0, 0, }, /* 733 */
- { 9, 10, 3, 0, 0, }, /* 734 */
- { 19, 12, 3, 0, 0, }, /* 735 */
- { 130, 26, 12, 0, 0, }, /* 736 */
- { 130, 12, 3, 0, 0, }, /* 737 */
- { 130, 21, 12, 0, 0, }, /* 738 */
- { 112, 7, 12, 0, 0, }, /* 739 */
- { 112, 15, 12, 0, 0, }, /* 740 */
- { 112, 12, 3, 0, 0, }, /* 741 */
- { 9, 26, 11, 0, 0, }, /* 742 */
- { 26, 26, 12, 0, 0, }, /* 743 */
+ { 12, 9, 12, 63, 32, }, /* 173 */
+ { 12, 9, 12, 67, 32, }, /* 174 */
+ { 12, 9, 12, 71, 32, }, /* 175 */
+ { 12, 9, 12, 75, 32, }, /* 176 */
+ { 12, 9, 12, 79, 32, }, /* 177 */
+ { 12, 9, 12, 84, 32, }, /* 178 */
+ { 12, 5, 12, 0, -32, }, /* 179 */
+ { 12, 5, 12, 63, -32, }, /* 180 */
+ { 12, 5, 12, 67, -32, }, /* 181 */
+ { 12, 5, 12, 71, -32, }, /* 182 */
+ { 12, 5, 12, 75, -32, }, /* 183 */
+ { 12, 5, 12, 79, -32, }, /* 184 */
+ { 12, 5, 12, 84, -32, }, /* 185 */
+ { 12, 5, 12, 0, -80, }, /* 186 */
+ { 12, 9, 12, 0, 1, }, /* 187 */
+ { 12, 5, 12, 0, -1, }, /* 188 */
+ { 12, 9, 12, 88, 1, }, /* 189 */
+ { 12, 5, 12, 88, -1, }, /* 190 */
+ { 12, 26, 12, 0, 0, }, /* 191 */
+ { 12, 12, 3, 0, 0, }, /* 192 */
+ { 12, 11, 3, 0, 0, }, /* 193 */
+ { 12, 9, 12, 0, 15, }, /* 194 */
+ { 12, 5, 12, 0, -15, }, /* 195 */
+ { 1, 9, 12, 0, 48, }, /* 196 */
+ { 1, 6, 12, 0, 0, }, /* 197 */
+ { 1, 21, 12, 0, 0, }, /* 198 */
+ { 1, 5, 12, 0, -48, }, /* 199 */
+ { 1, 5, 12, 0, 0, }, /* 200 */
+ { 1, 17, 12, 0, 0, }, /* 201 */
+ { 1, 26, 12, 0, 0, }, /* 202 */
+ { 1, 23, 12, 0, 0, }, /* 203 */
+ { 25, 12, 3, 0, 0, }, /* 204 */
+ { 25, 17, 12, 0, 0, }, /* 205 */
+ { 25, 21, 12, 0, 0, }, /* 206 */
+ { 25, 7, 12, 0, 0, }, /* 207 */
+ { 0, 1, 4, 0, 0, }, /* 208 */
+ { 9, 1, 4, 0, 0, }, /* 209 */
+ { 0, 25, 12, 0, 0, }, /* 210 */
+ { 0, 21, 12, 0, 0, }, /* 211 */
+ { 0, 23, 12, 0, 0, }, /* 212 */
+ { 0, 26, 12, 0, 0, }, /* 213 */
+ { 0, 12, 3, 0, 0, }, /* 214 */
+ { 0, 1, 2, 0, 0, }, /* 215 */
+ { 0, 7, 12, 0, 0, }, /* 216 */
+ { 0, 13, 12, 0, 0, }, /* 217 */
+ { 0, 6, 12, 0, 0, }, /* 218 */
+ { 49, 21, 12, 0, 0, }, /* 219 */
+ { 49, 1, 4, 0, 0, }, /* 220 */
+ { 49, 7, 12, 0, 0, }, /* 221 */
+ { 49, 12, 3, 0, 0, }, /* 222 */
+ { 55, 7, 12, 0, 0, }, /* 223 */
+ { 55, 12, 3, 0, 0, }, /* 224 */
+ { 63, 13, 12, 0, 0, }, /* 225 */
+ { 63, 7, 12, 0, 0, }, /* 226 */
+ { 63, 12, 3, 0, 0, }, /* 227 */
+ { 63, 6, 12, 0, 0, }, /* 228 */
+ { 63, 26, 12, 0, 0, }, /* 229 */
+ { 63, 21, 12, 0, 0, }, /* 230 */
+ { 89, 7, 12, 0, 0, }, /* 231 */
+ { 89, 12, 3, 0, 0, }, /* 232 */
+ { 89, 6, 12, 0, 0, }, /* 233 */
+ { 89, 21, 12, 0, 0, }, /* 234 */
+ { 94, 7, 12, 0, 0, }, /* 235 */
+ { 94, 12, 3, 0, 0, }, /* 236 */
+ { 94, 21, 12, 0, 0, }, /* 237 */
+ { 14, 12, 3, 0, 0, }, /* 238 */
+ { 14, 10, 5, 0, 0, }, /* 239 */
+ { 14, 7, 12, 0, 0, }, /* 240 */
+ { 14, 13, 12, 0, 0, }, /* 241 */
+ { 14, 21, 12, 0, 0, }, /* 242 */
+ { 14, 6, 12, 0, 0, }, /* 243 */
+ { 2, 7, 12, 0, 0, }, /* 244 */
+ { 2, 12, 3, 0, 0, }, /* 245 */
+ { 2, 10, 5, 0, 0, }, /* 246 */
+ { 2, 10, 3, 0, 0, }, /* 247 */
+ { 2, 13, 12, 0, 0, }, /* 248 */
+ { 2, 23, 12, 0, 0, }, /* 249 */
+ { 2, 15, 12, 0, 0, }, /* 250 */
+ { 2, 26, 12, 0, 0, }, /* 251 */
+ { 2, 21, 12, 0, 0, }, /* 252 */
+ { 21, 12, 3, 0, 0, }, /* 253 */
+ { 21, 10, 5, 0, 0, }, /* 254 */
+ { 21, 7, 12, 0, 0, }, /* 255 */
+ { 21, 13, 12, 0, 0, }, /* 256 */
+ { 20, 12, 3, 0, 0, }, /* 257 */
+ { 20, 10, 5, 0, 0, }, /* 258 */
+ { 20, 7, 12, 0, 0, }, /* 259 */
+ { 20, 13, 12, 0, 0, }, /* 260 */
+ { 20, 21, 12, 0, 0, }, /* 261 */
+ { 20, 23, 12, 0, 0, }, /* 262 */
+ { 43, 12, 3, 0, 0, }, /* 263 */
+ { 43, 10, 5, 0, 0, }, /* 264 */
+ { 43, 7, 12, 0, 0, }, /* 265 */
+ { 43, 10, 3, 0, 0, }, /* 266 */
+ { 43, 13, 12, 0, 0, }, /* 267 */
+ { 43, 26, 12, 0, 0, }, /* 268 */
+ { 43, 15, 12, 0, 0, }, /* 269 */
+ { 53, 12, 3, 0, 0, }, /* 270 */
+ { 53, 7, 12, 0, 0, }, /* 271 */
+ { 53, 10, 3, 0, 0, }, /* 272 */
+ { 53, 10, 5, 0, 0, }, /* 273 */
+ { 53, 13, 12, 0, 0, }, /* 274 */
+ { 53, 15, 12, 0, 0, }, /* 275 */
+ { 53, 26, 12, 0, 0, }, /* 276 */
+ { 53, 23, 12, 0, 0, }, /* 277 */
+ { 54, 12, 3, 0, 0, }, /* 278 */
+ { 54, 10, 5, 0, 0, }, /* 279 */
+ { 54, 7, 12, 0, 0, }, /* 280 */
+ { 54, 13, 12, 0, 0, }, /* 281 */
+ { 54, 15, 12, 0, 0, }, /* 282 */
+ { 54, 26, 12, 0, 0, }, /* 283 */
+ { 28, 7, 12, 0, 0, }, /* 284 */
+ { 28, 12, 3, 0, 0, }, /* 285 */
+ { 28, 10, 5, 0, 0, }, /* 286 */
+ { 28, 10, 3, 0, 0, }, /* 287 */
+ { 28, 13, 12, 0, 0, }, /* 288 */
+ { 36, 12, 3, 0, 0, }, /* 289 */
+ { 36, 10, 5, 0, 0, }, /* 290 */
+ { 36, 7, 12, 0, 0, }, /* 291 */
+ { 36, 10, 3, 0, 0, }, /* 292 */
+ { 36, 7, 4, 0, 0, }, /* 293 */
+ { 36, 26, 12, 0, 0, }, /* 294 */
+ { 36, 15, 12, 0, 0, }, /* 295 */
+ { 36, 13, 12, 0, 0, }, /* 296 */
+ { 47, 10, 5, 0, 0, }, /* 297 */
+ { 47, 7, 12, 0, 0, }, /* 298 */
+ { 47, 12, 3, 0, 0, }, /* 299 */
+ { 47, 10, 3, 0, 0, }, /* 300 */
+ { 47, 13, 12, 0, 0, }, /* 301 */
+ { 47, 21, 12, 0, 0, }, /* 302 */
+ { 56, 7, 12, 0, 0, }, /* 303 */
+ { 56, 12, 3, 0, 0, }, /* 304 */
+ { 56, 7, 5, 0, 0, }, /* 305 */
+ { 56, 6, 12, 0, 0, }, /* 306 */
+ { 56, 21, 12, 0, 0, }, /* 307 */
+ { 56, 13, 12, 0, 0, }, /* 308 */
+ { 32, 7, 12, 0, 0, }, /* 309 */
+ { 32, 12, 3, 0, 0, }, /* 310 */
+ { 32, 7, 5, 0, 0, }, /* 311 */
+ { 32, 6, 12, 0, 0, }, /* 312 */
+ { 32, 13, 12, 0, 0, }, /* 313 */
+ { 57, 7, 12, 0, 0, }, /* 314 */
+ { 57, 26, 12, 0, 0, }, /* 315 */
+ { 57, 21, 12, 0, 0, }, /* 316 */
+ { 57, 12, 3, 0, 0, }, /* 317 */
+ { 57, 13, 12, 0, 0, }, /* 318 */
+ { 57, 15, 12, 0, 0, }, /* 319 */
+ { 57, 22, 12, 0, 0, }, /* 320 */
+ { 57, 18, 12, 0, 0, }, /* 321 */
+ { 57, 10, 5, 0, 0, }, /* 322 */
+ { 38, 7, 12, 0, 0, }, /* 323 */
+ { 38, 10, 12, 0, 0, }, /* 324 */
+ { 38, 12, 3, 0, 0, }, /* 325 */
+ { 38, 10, 5, 0, 0, }, /* 326 */
+ { 38, 13, 12, 0, 0, }, /* 327 */
+ { 38, 21, 12, 0, 0, }, /* 328 */
+ { 38, 26, 12, 0, 0, }, /* 329 */
+ { 16, 9, 12, 0, 7264, }, /* 330 */
+ { 16, 7, 12, 0, 0, }, /* 331 */
+ { 16, 6, 12, 0, 0, }, /* 332 */
+ { 23, 7, 6, 0, 0, }, /* 333 */
+ { 23, 7, 7, 0, 0, }, /* 334 */
+ { 23, 7, 8, 0, 0, }, /* 335 */
+ { 15, 7, 12, 0, 0, }, /* 336 */
+ { 15, 12, 3, 0, 0, }, /* 337 */
+ { 15, 21, 12, 0, 0, }, /* 338 */
+ { 15, 15, 12, 0, 0, }, /* 339 */
+ { 15, 26, 12, 0, 0, }, /* 340 */
+ { 8, 9, 12, 0, 38864, }, /* 341 */
+ { 8, 9, 12, 0, 8, }, /* 342 */
+ { 8, 5, 12, 0, -8, }, /* 343 */
+ { 7, 17, 12, 0, 0, }, /* 344 */
+ { 7, 7, 12, 0, 0, }, /* 345 */
+ { 7, 21, 12, 0, 0, }, /* 346 */
+ { 40, 29, 12, 0, 0, }, /* 347 */
+ { 40, 7, 12, 0, 0, }, /* 348 */
+ { 40, 22, 12, 0, 0, }, /* 349 */
+ { 40, 18, 12, 0, 0, }, /* 350 */
+ { 45, 7, 12, 0, 0, }, /* 351 */
+ { 45, 14, 12, 0, 0, }, /* 352 */
+ { 50, 7, 12, 0, 0, }, /* 353 */
+ { 50, 12, 3, 0, 0, }, /* 354 */
+ { 24, 7, 12, 0, 0, }, /* 355 */
+ { 24, 12, 3, 0, 0, }, /* 356 */
+ { 6, 7, 12, 0, 0, }, /* 357 */
+ { 6, 12, 3, 0, 0, }, /* 358 */
+ { 51, 7, 12, 0, 0, }, /* 359 */
+ { 51, 12, 3, 0, 0, }, /* 360 */
+ { 31, 7, 12, 0, 0, }, /* 361 */
+ { 31, 12, 3, 0, 0, }, /* 362 */
+ { 31, 10, 5, 0, 0, }, /* 363 */
+ { 31, 21, 12, 0, 0, }, /* 364 */
+ { 31, 6, 12, 0, 0, }, /* 365 */
+ { 31, 23, 12, 0, 0, }, /* 366 */
+ { 31, 13, 12, 0, 0, }, /* 367 */
+ { 31, 15, 12, 0, 0, }, /* 368 */
+ { 37, 21, 12, 0, 0, }, /* 369 */
+ { 37, 17, 12, 0, 0, }, /* 370 */
+ { 37, 12, 3, 0, 0, }, /* 371 */
+ { 37, 1, 2, 0, 0, }, /* 372 */
+ { 37, 13, 12, 0, 0, }, /* 373 */
+ { 37, 7, 12, 0, 0, }, /* 374 */
+ { 37, 6, 12, 0, 0, }, /* 375 */
+ { 34, 7, 12, 0, 0, }, /* 376 */
+ { 34, 12, 3, 0, 0, }, /* 377 */
+ { 34, 10, 5, 0, 0, }, /* 378 */
+ { 34, 26, 12, 0, 0, }, /* 379 */
+ { 34, 21, 12, 0, 0, }, /* 380 */
+ { 34, 13, 12, 0, 0, }, /* 381 */
+ { 52, 7, 12, 0, 0, }, /* 382 */
+ { 39, 7, 12, 0, 0, }, /* 383 */
+ { 39, 13, 12, 0, 0, }, /* 384 */
+ { 39, 15, 12, 0, 0, }, /* 385 */
+ { 39, 26, 12, 0, 0, }, /* 386 */
+ { 31, 26, 12, 0, 0, }, /* 387 */
+ { 5, 7, 12, 0, 0, }, /* 388 */
+ { 5, 12, 3, 0, 0, }, /* 389 */
+ { 5, 10, 5, 0, 0, }, /* 390 */
+ { 5, 21, 12, 0, 0, }, /* 391 */
+ { 90, 7, 12, 0, 0, }, /* 392 */
+ { 90, 10, 5, 0, 0, }, /* 393 */
+ { 90, 12, 3, 0, 0, }, /* 394 */
+ { 90, 10, 12, 0, 0, }, /* 395 */
+ { 90, 13, 12, 0, 0, }, /* 396 */
+ { 90, 21, 12, 0, 0, }, /* 397 */
+ { 90, 6, 12, 0, 0, }, /* 398 */
+ { 27, 11, 3, 0, 0, }, /* 399 */
+ { 61, 12, 3, 0, 0, }, /* 400 */
+ { 61, 10, 5, 0, 0, }, /* 401 */
+ { 61, 7, 12, 0, 0, }, /* 402 */
+ { 61, 13, 12, 0, 0, }, /* 403 */
+ { 61, 21, 12, 0, 0, }, /* 404 */
+ { 61, 26, 12, 0, 0, }, /* 405 */
+ { 75, 12, 3, 0, 0, }, /* 406 */
+ { 75, 10, 5, 0, 0, }, /* 407 */
+ { 75, 7, 12, 0, 0, }, /* 408 */
+ { 75, 13, 12, 0, 0, }, /* 409 */
+ { 92, 7, 12, 0, 0, }, /* 410 */
+ { 92, 12, 3, 0, 0, }, /* 411 */
+ { 92, 10, 5, 0, 0, }, /* 412 */
+ { 92, 21, 12, 0, 0, }, /* 413 */
+ { 69, 7, 12, 0, 0, }, /* 414 */
+ { 69, 10, 5, 0, 0, }, /* 415 */
+ { 69, 12, 3, 0, 0, }, /* 416 */
+ { 69, 21, 12, 0, 0, }, /* 417 */
+ { 69, 13, 12, 0, 0, }, /* 418 */
+ { 72, 13, 12, 0, 0, }, /* 419 */
+ { 72, 7, 12, 0, 0, }, /* 420 */
+ { 72, 6, 12, 0, 0, }, /* 421 */
+ { 72, 21, 12, 0, 0, }, /* 422 */
+ { 12, 5, 12, 63, -6222, }, /* 423 */
+ { 12, 5, 12, 67, -6221, }, /* 424 */
+ { 12, 5, 12, 71, -6212, }, /* 425 */
+ { 12, 5, 12, 75, -6210, }, /* 426 */
+ { 12, 5, 12, 79, -6210, }, /* 427 */
+ { 12, 5, 12, 79, -6211, }, /* 428 */
+ { 12, 5, 12, 84, -6204, }, /* 429 */
+ { 12, 5, 12, 88, -6180, }, /* 430 */
+ { 12, 5, 12, 108, 35267, }, /* 431 */
+ { 75, 21, 12, 0, 0, }, /* 432 */
+ { 9, 10, 5, 0, 0, }, /* 433 */
+ { 9, 7, 12, 0, 0, }, /* 434 */
+ { 12, 5, 12, 0, 0, }, /* 435 */
+ { 12, 6, 12, 0, 0, }, /* 436 */
+ { 33, 5, 12, 0, 35332, }, /* 437 */
+ { 33, 5, 12, 0, 3814, }, /* 438 */
+ { 33, 9, 12, 92, 1, }, /* 439 */
+ { 33, 5, 12, 92, -1, }, /* 440 */
+ { 33, 5, 12, 92, -58, }, /* 441 */
+ { 33, 9, 12, 0, -7615, }, /* 442 */
+ { 19, 5, 12, 0, 8, }, /* 443 */
+ { 19, 9, 12, 0, -8, }, /* 444 */
+ { 19, 5, 12, 0, 74, }, /* 445 */
+ { 19, 5, 12, 0, 86, }, /* 446 */
+ { 19, 5, 12, 0, 100, }, /* 447 */
+ { 19, 5, 12, 0, 128, }, /* 448 */
+ { 19, 5, 12, 0, 112, }, /* 449 */
+ { 19, 5, 12, 0, 126, }, /* 450 */
+ { 19, 8, 12, 0, -8, }, /* 451 */
+ { 19, 5, 12, 0, 9, }, /* 452 */
+ { 19, 9, 12, 0, -74, }, /* 453 */
+ { 19, 8, 12, 0, -9, }, /* 454 */
+ { 19, 5, 12, 21, -7173, }, /* 455 */
+ { 19, 9, 12, 0, -86, }, /* 456 */
+ { 19, 9, 12, 0, -100, }, /* 457 */
+ { 19, 9, 12, 0, -112, }, /* 458 */
+ { 19, 9, 12, 0, -128, }, /* 459 */
+ { 19, 9, 12, 0, -126, }, /* 460 */
+ { 27, 1, 3, 0, 0, }, /* 461 */
+ { 27, 1, 16, 0, 0, }, /* 462 */
+ { 9, 27, 2, 0, 0, }, /* 463 */
+ { 9, 28, 2, 0, 0, }, /* 464 */
+ { 9, 2, 2, 0, 0, }, /* 465 */
+ { 9, 9, 12, 0, 0, }, /* 466 */
+ { 9, 5, 12, 0, 0, }, /* 467 */
+ { 19, 9, 12, 96, -7517, }, /* 468 */
+ { 33, 9, 12, 100, -8383, }, /* 469 */
+ { 33, 9, 12, 104, -8262, }, /* 470 */
+ { 33, 9, 12, 0, 28, }, /* 471 */
+ { 33, 5, 12, 0, -28, }, /* 472 */
+ { 33, 14, 12, 0, 16, }, /* 473 */
+ { 33, 14, 12, 0, -16, }, /* 474 */
+ { 33, 14, 12, 0, 0, }, /* 475 */
+ { 9, 26, 12, 0, 26, }, /* 476 */
+ { 9, 26, 12, 0, -26, }, /* 477 */
+ { 9, 26, 13, 0, 0, }, /* 478 */
+ { 9, 26, 17, 0, 0, }, /* 479 */
+ { 4, 26, 12, 0, 0, }, /* 480 */
+ { 17, 9, 12, 0, 48, }, /* 481 */
+ { 17, 5, 12, 0, -48, }, /* 482 */
+ { 33, 9, 12, 0, -10743, }, /* 483 */
+ { 33, 9, 12, 0, -3814, }, /* 484 */
+ { 33, 9, 12, 0, -10727, }, /* 485 */
+ { 33, 5, 12, 0, -10795, }, /* 486 */
+ { 33, 5, 12, 0, -10792, }, /* 487 */
+ { 33, 9, 12, 0, -10780, }, /* 488 */
+ { 33, 9, 12, 0, -10749, }, /* 489 */
+ { 33, 9, 12, 0, -10783, }, /* 490 */
+ { 33, 9, 12, 0, -10782, }, /* 491 */
+ { 33, 9, 12, 0, -10815, }, /* 492 */
+ { 10, 5, 12, 0, 0, }, /* 493 */
+ { 10, 26, 12, 0, 0, }, /* 494 */
+ { 10, 12, 3, 0, 0, }, /* 495 */
+ { 10, 21, 12, 0, 0, }, /* 496 */
+ { 10, 15, 12, 0, 0, }, /* 497 */
+ { 16, 5, 12, 0, -7264, }, /* 498 */
+ { 58, 7, 12, 0, 0, }, /* 499 */
+ { 58, 6, 12, 0, 0, }, /* 500 */
+ { 58, 21, 12, 0, 0, }, /* 501 */
+ { 58, 12, 3, 0, 0, }, /* 502 */
+ { 22, 26, 12, 0, 0, }, /* 503 */
+ { 22, 6, 12, 0, 0, }, /* 504 */
+ { 22, 14, 12, 0, 0, }, /* 505 */
+ { 23, 10, 3, 0, 0, }, /* 506 */
+ { 26, 7, 12, 0, 0, }, /* 507 */
+ { 26, 6, 12, 0, 0, }, /* 508 */
+ { 29, 7, 12, 0, 0, }, /* 509 */
+ { 29, 6, 12, 0, 0, }, /* 510 */
+ { 3, 7, 12, 0, 0, }, /* 511 */
+ { 23, 7, 12, 0, 0, }, /* 512 */
+ { 23, 26, 12, 0, 0, }, /* 513 */
+ { 29, 26, 12, 0, 0, }, /* 514 */
+ { 22, 7, 12, 0, 0, }, /* 515 */
+ { 60, 7, 12, 0, 0, }, /* 516 */
+ { 60, 6, 12, 0, 0, }, /* 517 */
+ { 60, 26, 12, 0, 0, }, /* 518 */
+ { 85, 7, 12, 0, 0, }, /* 519 */
+ { 85, 6, 12, 0, 0, }, /* 520 */
+ { 85, 21, 12, 0, 0, }, /* 521 */
+ { 76, 7, 12, 0, 0, }, /* 522 */
+ { 76, 6, 12, 0, 0, }, /* 523 */
+ { 76, 21, 12, 0, 0, }, /* 524 */
+ { 76, 13, 12, 0, 0, }, /* 525 */
+ { 12, 9, 12, 108, 1, }, /* 526 */
+ { 12, 5, 12, 108, -35267, }, /* 527 */
+ { 12, 7, 12, 0, 0, }, /* 528 */
+ { 12, 21, 12, 0, 0, }, /* 529 */
+ { 78, 7, 12, 0, 0, }, /* 530 */
+ { 78, 14, 12, 0, 0, }, /* 531 */
+ { 78, 12, 3, 0, 0, }, /* 532 */
+ { 78, 21, 12, 0, 0, }, /* 533 */
+ { 33, 9, 12, 0, -35332, }, /* 534 */
+ { 33, 9, 12, 0, -42280, }, /* 535 */
+ { 33, 9, 12, 0, -42308, }, /* 536 */
+ { 33, 9, 12, 0, -42319, }, /* 537 */
+ { 33, 9, 12, 0, -42315, }, /* 538 */
+ { 33, 9, 12, 0, -42305, }, /* 539 */
+ { 33, 9, 12, 0, -42258, }, /* 540 */
+ { 33, 9, 12, 0, -42282, }, /* 541 */
+ { 33, 9, 12, 0, -42261, }, /* 542 */
+ { 33, 9, 12, 0, 928, }, /* 543 */
+ { 48, 7, 12, 0, 0, }, /* 544 */
+ { 48, 12, 3, 0, 0, }, /* 545 */
+ { 48, 10, 5, 0, 0, }, /* 546 */
+ { 48, 26, 12, 0, 0, }, /* 547 */
+ { 64, 7, 12, 0, 0, }, /* 548 */
+ { 64, 21, 12, 0, 0, }, /* 549 */
+ { 74, 10, 5, 0, 0, }, /* 550 */
+ { 74, 7, 12, 0, 0, }, /* 551 */
+ { 74, 12, 3, 0, 0, }, /* 552 */
+ { 74, 21, 12, 0, 0, }, /* 553 */
+ { 74, 13, 12, 0, 0, }, /* 554 */
+ { 68, 13, 12, 0, 0, }, /* 555 */
+ { 68, 7, 12, 0, 0, }, /* 556 */
+ { 68, 12, 3, 0, 0, }, /* 557 */
+ { 68, 21, 12, 0, 0, }, /* 558 */
+ { 73, 7, 12, 0, 0, }, /* 559 */
+ { 73, 12, 3, 0, 0, }, /* 560 */
+ { 73, 10, 5, 0, 0, }, /* 561 */
+ { 73, 21, 12, 0, 0, }, /* 562 */
+ { 83, 12, 3, 0, 0, }, /* 563 */
+ { 83, 10, 5, 0, 0, }, /* 564 */
+ { 83, 7, 12, 0, 0, }, /* 565 */
+ { 83, 21, 12, 0, 0, }, /* 566 */
+ { 83, 13, 12, 0, 0, }, /* 567 */
+ { 38, 6, 12, 0, 0, }, /* 568 */
+ { 67, 7, 12, 0, 0, }, /* 569 */
+ { 67, 12, 3, 0, 0, }, /* 570 */
+ { 67, 10, 5, 0, 0, }, /* 571 */
+ { 67, 13, 12, 0, 0, }, /* 572 */
+ { 67, 21, 12, 0, 0, }, /* 573 */
+ { 91, 7, 12, 0, 0, }, /* 574 */
+ { 91, 12, 3, 0, 0, }, /* 575 */
+ { 91, 6, 12, 0, 0, }, /* 576 */
+ { 91, 21, 12, 0, 0, }, /* 577 */
+ { 86, 7, 12, 0, 0, }, /* 578 */
+ { 86, 10, 5, 0, 0, }, /* 579 */
+ { 86, 12, 3, 0, 0, }, /* 580 */
+ { 86, 21, 12, 0, 0, }, /* 581 */
+ { 86, 6, 12, 0, 0, }, /* 582 */
+ { 33, 5, 12, 0, -928, }, /* 583 */
+ { 8, 5, 12, 0, -38864, }, /* 584 */
+ { 86, 13, 12, 0, 0, }, /* 585 */
+ { 23, 7, 9, 0, 0, }, /* 586 */
+ { 23, 7, 10, 0, 0, }, /* 587 */
+ { 9, 4, 2, 0, 0, }, /* 588 */
+ { 9, 3, 12, 0, 0, }, /* 589 */
+ { 25, 25, 12, 0, 0, }, /* 590 */
+ { 0, 24, 12, 0, 0, }, /* 591 */
+ { 9, 6, 3, 0, 0, }, /* 592 */
+ { 35, 7, 12, 0, 0, }, /* 593 */
+ { 19, 14, 12, 0, 0, }, /* 594 */
+ { 19, 15, 12, 0, 0, }, /* 595 */
+ { 19, 26, 12, 0, 0, }, /* 596 */
+ { 70, 7, 12, 0, 0, }, /* 597 */
+ { 66, 7, 12, 0, 0, }, /* 598 */
+ { 41, 7, 12, 0, 0, }, /* 599 */
+ { 41, 15, 12, 0, 0, }, /* 600 */
+ { 18, 7, 12, 0, 0, }, /* 601 */
+ { 18, 14, 12, 0, 0, }, /* 602 */
+ { 117, 7, 12, 0, 0, }, /* 603 */
+ { 117, 12, 3, 0, 0, }, /* 604 */
+ { 59, 7, 12, 0, 0, }, /* 605 */
+ { 59, 21, 12, 0, 0, }, /* 606 */
+ { 42, 7, 12, 0, 0, }, /* 607 */
+ { 42, 21, 12, 0, 0, }, /* 608 */
+ { 42, 14, 12, 0, 0, }, /* 609 */
+ { 13, 9, 12, 0, 40, }, /* 610 */
+ { 13, 5, 12, 0, -40, }, /* 611 */
+ { 46, 7, 12, 0, 0, }, /* 612 */
+ { 44, 7, 12, 0, 0, }, /* 613 */
+ { 44, 13, 12, 0, 0, }, /* 614 */
+ { 135, 9, 12, 0, 40, }, /* 615 */
+ { 135, 5, 12, 0, -40, }, /* 616 */
+ { 105, 7, 12, 0, 0, }, /* 617 */
+ { 103, 7, 12, 0, 0, }, /* 618 */
+ { 103, 21, 12, 0, 0, }, /* 619 */
+ { 109, 7, 12, 0, 0, }, /* 620 */
+ { 11, 7, 12, 0, 0, }, /* 621 */
+ { 80, 7, 12, 0, 0, }, /* 622 */
+ { 80, 21, 12, 0, 0, }, /* 623 */
+ { 80, 15, 12, 0, 0, }, /* 624 */
+ { 119, 7, 12, 0, 0, }, /* 625 */
+ { 119, 26, 12, 0, 0, }, /* 626 */
+ { 119, 15, 12, 0, 0, }, /* 627 */
+ { 115, 7, 12, 0, 0, }, /* 628 */
+ { 115, 15, 12, 0, 0, }, /* 629 */
+ { 127, 7, 12, 0, 0, }, /* 630 */
+ { 127, 15, 12, 0, 0, }, /* 631 */
+ { 65, 7, 12, 0, 0, }, /* 632 */
+ { 65, 15, 12, 0, 0, }, /* 633 */
+ { 65, 21, 12, 0, 0, }, /* 634 */
+ { 71, 7, 12, 0, 0, }, /* 635 */
+ { 71, 21, 12, 0, 0, }, /* 636 */
+ { 97, 7, 12, 0, 0, }, /* 637 */
+ { 96, 7, 12, 0, 0, }, /* 638 */
+ { 96, 15, 12, 0, 0, }, /* 639 */
+ { 30, 7, 12, 0, 0, }, /* 640 */
+ { 30, 12, 3, 0, 0, }, /* 641 */
+ { 30, 15, 12, 0, 0, }, /* 642 */
+ { 30, 21, 12, 0, 0, }, /* 643 */
+ { 87, 7, 12, 0, 0, }, /* 644 */
+ { 87, 15, 12, 0, 0, }, /* 645 */
+ { 87, 21, 12, 0, 0, }, /* 646 */
+ { 116, 7, 12, 0, 0, }, /* 647 */
+ { 116, 15, 12, 0, 0, }, /* 648 */
+ { 111, 7, 12, 0, 0, }, /* 649 */
+ { 111, 26, 12, 0, 0, }, /* 650 */
+ { 111, 12, 3, 0, 0, }, /* 651 */
+ { 111, 15, 12, 0, 0, }, /* 652 */
+ { 111, 21, 12, 0, 0, }, /* 653 */
+ { 77, 7, 12, 0, 0, }, /* 654 */
+ { 77, 21, 12, 0, 0, }, /* 655 */
+ { 82, 7, 12, 0, 0, }, /* 656 */
+ { 82, 15, 12, 0, 0, }, /* 657 */
+ { 81, 7, 12, 0, 0, }, /* 658 */
+ { 81, 15, 12, 0, 0, }, /* 659 */
+ { 120, 7, 12, 0, 0, }, /* 660 */
+ { 120, 21, 12, 0, 0, }, /* 661 */
+ { 120, 15, 12, 0, 0, }, /* 662 */
+ { 88, 7, 12, 0, 0, }, /* 663 */
+ { 129, 9, 12, 0, 64, }, /* 664 */
+ { 129, 5, 12, 0, -64, }, /* 665 */
+ { 129, 15, 12, 0, 0, }, /* 666 */
+ { 0, 15, 12, 0, 0, }, /* 667 */
+ { 93, 10, 5, 0, 0, }, /* 668 */
+ { 93, 12, 3, 0, 0, }, /* 669 */
+ { 93, 7, 12, 0, 0, }, /* 670 */
+ { 93, 21, 12, 0, 0, }, /* 671 */
+ { 93, 15, 12, 0, 0, }, /* 672 */
+ { 93, 13, 12, 0, 0, }, /* 673 */
+ { 84, 12, 3, 0, 0, }, /* 674 */
+ { 84, 10, 5, 0, 0, }, /* 675 */
+ { 84, 7, 12, 0, 0, }, /* 676 */
+ { 84, 21, 12, 0, 0, }, /* 677 */
+ { 84, 1, 4, 0, 0, }, /* 678 */
+ { 100, 7, 12, 0, 0, }, /* 679 */
+ { 100, 13, 12, 0, 0, }, /* 680 */
+ { 95, 12, 3, 0, 0, }, /* 681 */
+ { 95, 7, 12, 0, 0, }, /* 682 */
+ { 95, 10, 5, 0, 0, }, /* 683 */
+ { 95, 13, 12, 0, 0, }, /* 684 */
+ { 95, 21, 12, 0, 0, }, /* 685 */
+ { 110, 7, 12, 0, 0, }, /* 686 */
+ { 110, 12, 3, 0, 0, }, /* 687 */
+ { 110, 21, 12, 0, 0, }, /* 688 */
+ { 99, 12, 3, 0, 0, }, /* 689 */
+ { 99, 10, 5, 0, 0, }, /* 690 */
+ { 99, 7, 12, 0, 0, }, /* 691 */
+ { 99, 7, 4, 0, 0, }, /* 692 */
+ { 99, 21, 12, 0, 0, }, /* 693 */
+ { 99, 13, 12, 0, 0, }, /* 694 */
+ { 47, 15, 12, 0, 0, }, /* 695 */
+ { 107, 7, 12, 0, 0, }, /* 696 */
+ { 107, 10, 5, 0, 0, }, /* 697 */
+ { 107, 12, 3, 0, 0, }, /* 698 */
+ { 107, 21, 12, 0, 0, }, /* 699 */
+ { 128, 7, 12, 0, 0, }, /* 700 */
+ { 128, 21, 12, 0, 0, }, /* 701 */
+ { 108, 7, 12, 0, 0, }, /* 702 */
+ { 108, 12, 3, 0, 0, }, /* 703 */
+ { 108, 10, 5, 0, 0, }, /* 704 */
+ { 108, 13, 12, 0, 0, }, /* 705 */
+ { 106, 12, 3, 0, 0, }, /* 706 */
+ { 106, 10, 5, 0, 0, }, /* 707 */
+ { 106, 7, 12, 0, 0, }, /* 708 */
+ { 106, 10, 3, 0, 0, }, /* 709 */
+ { 134, 7, 12, 0, 0, }, /* 710 */
+ { 134, 10, 5, 0, 0, }, /* 711 */
+ { 134, 12, 3, 0, 0, }, /* 712 */
+ { 134, 21, 12, 0, 0, }, /* 713 */
+ { 134, 13, 12, 0, 0, }, /* 714 */
+ { 123, 7, 12, 0, 0, }, /* 715 */
+ { 123, 10, 3, 0, 0, }, /* 716 */
+ { 123, 10, 5, 0, 0, }, /* 717 */
+ { 123, 12, 3, 0, 0, }, /* 718 */
+ { 123, 21, 12, 0, 0, }, /* 719 */
+ { 123, 13, 12, 0, 0, }, /* 720 */
+ { 122, 7, 12, 0, 0, }, /* 721 */
+ { 122, 10, 3, 0, 0, }, /* 722 */
+ { 122, 10, 5, 0, 0, }, /* 723 */
+ { 122, 12, 3, 0, 0, }, /* 724 */
+ { 122, 21, 12, 0, 0, }, /* 725 */
+ { 113, 7, 12, 0, 0, }, /* 726 */
+ { 113, 10, 5, 0, 0, }, /* 727 */
+ { 113, 12, 3, 0, 0, }, /* 728 */
+ { 113, 21, 12, 0, 0, }, /* 729 */
+ { 113, 13, 12, 0, 0, }, /* 730 */
+ { 101, 7, 12, 0, 0, }, /* 731 */
+ { 101, 12, 3, 0, 0, }, /* 732 */
+ { 101, 10, 5, 0, 0, }, /* 733 */
+ { 101, 13, 12, 0, 0, }, /* 734 */
+ { 125, 7, 12, 0, 0, }, /* 735 */
+ { 125, 12, 3, 0, 0, }, /* 736 */
+ { 125, 10, 5, 0, 0, }, /* 737 */
+ { 125, 13, 12, 0, 0, }, /* 738 */
+ { 125, 15, 12, 0, 0, }, /* 739 */
+ { 125, 21, 12, 0, 0, }, /* 740 */
+ { 125, 26, 12, 0, 0, }, /* 741 */
+ { 124, 9, 12, 0, 32, }, /* 742 */
+ { 124, 5, 12, 0, -32, }, /* 743 */
+ { 124, 13, 12, 0, 0, }, /* 744 */
+ { 124, 15, 12, 0, 0, }, /* 745 */
+ { 124, 7, 12, 0, 0, }, /* 746 */
+ { 140, 7, 12, 0, 0, }, /* 747 */
+ { 140, 12, 3, 0, 0, }, /* 748 */
+ { 140, 10, 5, 0, 0, }, /* 749 */
+ { 140, 7, 4, 0, 0, }, /* 750 */
+ { 140, 21, 12, 0, 0, }, /* 751 */
+ { 139, 7, 12, 0, 0, }, /* 752 */
+ { 139, 12, 3, 0, 0, }, /* 753 */
+ { 139, 10, 5, 0, 0, }, /* 754 */
+ { 139, 7, 4, 0, 0, }, /* 755 */
+ { 139, 21, 12, 0, 0, }, /* 756 */
+ { 121, 7, 12, 0, 0, }, /* 757 */
+ { 132, 7, 12, 0, 0, }, /* 758 */
+ { 132, 10, 5, 0, 0, }, /* 759 */
+ { 132, 12, 3, 0, 0, }, /* 760 */
+ { 132, 21, 12, 0, 0, }, /* 761 */
+ { 132, 13, 12, 0, 0, }, /* 762 */
+ { 132, 15, 12, 0, 0, }, /* 763 */
+ { 133, 21, 12, 0, 0, }, /* 764 */
+ { 133, 7, 12, 0, 0, }, /* 765 */
+ { 133, 12, 3, 0, 0, }, /* 766 */
+ { 133, 10, 5, 0, 0, }, /* 767 */
+ { 137, 7, 12, 0, 0, }, /* 768 */
+ { 137, 12, 3, 0, 0, }, /* 769 */
+ { 137, 7, 4, 0, 0, }, /* 770 */
+ { 137, 13, 12, 0, 0, }, /* 771 */
+ { 62, 7, 12, 0, 0, }, /* 772 */
+ { 62, 14, 12, 0, 0, }, /* 773 */
+ { 62, 21, 12, 0, 0, }, /* 774 */
+ { 79, 7, 12, 0, 0, }, /* 775 */
+ { 126, 7, 12, 0, 0, }, /* 776 */
+ { 114, 7, 12, 0, 0, }, /* 777 */
+ { 114, 13, 12, 0, 0, }, /* 778 */
+ { 114, 21, 12, 0, 0, }, /* 779 */
+ { 102, 7, 12, 0, 0, }, /* 780 */
+ { 102, 12, 3, 0, 0, }, /* 781 */
+ { 102, 21, 12, 0, 0, }, /* 782 */
+ { 118, 7, 12, 0, 0, }, /* 783 */
+ { 118, 12, 3, 0, 0, }, /* 784 */
+ { 118, 21, 12, 0, 0, }, /* 785 */
+ { 118, 26, 12, 0, 0, }, /* 786 */
+ { 118, 6, 12, 0, 0, }, /* 787 */
+ { 118, 13, 12, 0, 0, }, /* 788 */
+ { 118, 15, 12, 0, 0, }, /* 789 */
+ { 98, 7, 12, 0, 0, }, /* 790 */
+ { 98, 10, 5, 0, 0, }, /* 791 */
+ { 98, 12, 3, 0, 0, }, /* 792 */
+ { 98, 6, 12, 0, 0, }, /* 793 */
+ { 136, 6, 12, 0, 0, }, /* 794 */
+ { 138, 6, 12, 0, 0, }, /* 795 */
+ { 136, 7, 12, 0, 0, }, /* 796 */
+ { 138, 7, 12, 0, 0, }, /* 797 */
+ { 104, 7, 12, 0, 0, }, /* 798 */
+ { 104, 26, 12, 0, 0, }, /* 799 */
+ { 104, 12, 3, 0, 0, }, /* 800 */
+ { 104, 21, 12, 0, 0, }, /* 801 */
+ { 9, 10, 3, 0, 0, }, /* 802 */
+ { 19, 12, 3, 0, 0, }, /* 803 */
+ { 130, 26, 12, 0, 0, }, /* 804 */
+ { 130, 12, 3, 0, 0, }, /* 805 */
+ { 130, 21, 12, 0, 0, }, /* 806 */
+ { 17, 12, 3, 0, 0, }, /* 807 */
+ { 112, 7, 12, 0, 0, }, /* 808 */
+ { 112, 15, 12, 0, 0, }, /* 809 */
+ { 112, 12, 3, 0, 0, }, /* 810 */
+ { 131, 9, 12, 0, 34, }, /* 811 */
+ { 131, 5, 12, 0, -34, }, /* 812 */
+ { 131, 12, 3, 0, 0, }, /* 813 */
+ { 131, 13, 12, 0, 0, }, /* 814 */
+ { 131, 21, 12, 0, 0, }, /* 815 */
+ { 9, 26, 11, 0, 0, }, /* 816 */
+ { 26, 26, 12, 0, 0, }, /* 817 */
+ { 9, 24, 14, 0, 0, }, /* 818 */
+ { 9, 26, 15, 0, 0, }, /* 819 */
+ { 9, 1, 3, 0, 0, }, /* 820 */
};
const uint8_t PRIV(ucd_stage1)[] = { /* 8704 bytes */
@@ -834,549 +933,549 @@ const uint8_t PRIV(ucd_stage1)[] = { /* 8704 bytes */
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* U+0800 */
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 41, 41, 42, 43, 44, 45, /* U+1000 */
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, /* U+1800 */
- 62, 63, 64, 65, 66, 66, 67, 68, 69, 70, 71, 72, 73, 71, 74, 75, /* U+2000 */
- 76, 76, 66, 77, 66, 66, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, /* U+2800 */
- 88, 89, 90, 91, 92, 93, 94, 71, 95, 95, 95, 95, 95, 95, 95, 95, /* U+3000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+3800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+4000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 96, 95, 95, 95, 95, /* U+4800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+5000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+5800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+6000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+6800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+7000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+7800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+8000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+8800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+9000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 97, /* U+9800 */
- 98, 99, 99, 99, 99, 99, 99, 99, 99,100,101,101,102,103,104,105, /* U+A000 */
-106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,114, /* U+A800 */
-115,116,117,118,119,120,114,115,116,117,118,119,120,114,115,116, /* U+B000 */
-117,118,119,120,114,115,116,117,118,119,120,114,115,116,117,118, /* U+B800 */
-119,120,114,115,116,117,118,119,120,114,115,116,117,118,119,120, /* U+C000 */
-114,115,116,117,118,119,120,114,115,116,117,118,119,120,114,115, /* U+C800 */
-116,117,118,119,120,114,115,116,117,118,119,120,114,115,116,121, /* U+D000 */
-122,122,122,122,122,122,122,122,122,122,122,122,122,122,122,122, /* U+D800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+E000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+E800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F000 */
-123,123, 95, 95,124,125,126,127,128,128,129,130,131,132,133,134, /* U+F800 */
-135,136,137,138,139,140,141,142,143,144,145,139,146,146,147,139, /* U+10000 */
-148,149,150,151,152,153,154,155,156,157,139,139,158,139,139,139, /* U+10800 */
-159,160,161,162,163,164,165,139,139,166,139,167,168,169,170,139, /* U+11000 */
-139,171,139,139,139,172,139,139,139,139,139,139,139,139,139,139, /* U+11800 */
-173,173,173,173,173,173,173,174,175,173,176,139,139,139,139,139, /* U+12000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+12800 */
-177,177,177,177,177,177,177,177,178,139,139,139,139,139,139,139, /* U+13000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+13800 */
-139,139,139,139,139,139,139,139,179,179,179,179,180,139,139,139, /* U+14000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+14800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+15000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+15800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+16000 */
-181,181,181,181,182,183,184,185,139,139,139,139,139,139,186,187, /* U+16800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+17000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+17800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+18000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+18800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+19000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+19800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1A000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1A800 */
-188,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1B000 */
-139,139,139,139,139,139,139,139,189,190,139,139,139,139,139,139, /* U+1B800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1C000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1C800 */
- 71,191,192,193,194,139,195,139,196,197,198,199,200,201,202,203, /* U+1D000 */
-204,204,204,204,205,206,139,139,139,139,139,139,139,139,139,139, /* U+1D800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1E000 */
-207,208,139,139,139,139,139,139,139,139,139,139,209,210,139,139, /* U+1E800 */
-211,212,213,214,215,139, 71,216, 71, 71,217,218, 71,219,220,221, /* U+1F000 */
-222,223,224,225,139,139,139,139,139,139,139,139,139,139,139,139, /* U+1F800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+20000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+20800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+21000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+21800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+22000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+22800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+23000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+23800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+24000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+24800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+25000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+25800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+26000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+26800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+27000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+27800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+28000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+28800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+29000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+29800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,226, 95, 95, /* U+2A000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+2A800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,227, 95, /* U+2B000 */
-228, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+2B800 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, /* U+2C000 */
- 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95,229,139,139, /* U+2C800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+2D000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+2D800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+2E000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+2E800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+2F000 */
- 95, 95, 95, 95,230,139,139,139,139,139,139,139,139,139,139,139, /* U+2F800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+30000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+30800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+31000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+31800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+32000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+32800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+33000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+33800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+34000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+34800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+35000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+35800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+36000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+36800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+37000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+37800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+38000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+38800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+39000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+39800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+3A000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+3A800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+3B000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+3B800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+3C000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+3C800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+3D000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+3D800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+3E000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+3E800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+3F000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+3F800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+40000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+40800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+41000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+41800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+42000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+42800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+43000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+43800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+44000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+44800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+45000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+45800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+46000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+46800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+47000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+47800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+48000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+48800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+49000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+49800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+4A000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+4A800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+4B000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+4B800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+4C000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+4C800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+4D000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+4D800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+4E000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+4E800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+4F000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+4F800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+50000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+50800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+51000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+51800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+52000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+52800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+53000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+53800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+54000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+54800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+55000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+55800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+56000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+56800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+57000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+57800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+58000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+58800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+59000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+59800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+5A000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+5A800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+5B000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+5B800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+5C000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+5C800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+5D000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+5D800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+5E000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+5E800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+5F000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+5F800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+60000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+60800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+61000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+61800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+62000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+62800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+63000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+63800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+64000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+64800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+65000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+65800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+66000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+66800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+67000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+67800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+68000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+68800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+69000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+69800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+6A000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+6A800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+6B000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+6B800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+6C000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+6C800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+6D000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+6D800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+6E000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+6E800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+6F000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+6F800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+70000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+70800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+71000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+71800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+72000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+72800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+73000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+73800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+74000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+74800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+75000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+75800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+76000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+76800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+77000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+77800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+78000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+78800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+79000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+79800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+7A000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+7A800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+7B000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+7B800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+7C000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+7C800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+7D000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+7D800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+7E000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+7E800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+7F000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+7F800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+80000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+80800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+81000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+81800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+82000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+82800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+83000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+83800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+84000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+84800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+85000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+85800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+86000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+86800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+87000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+87800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+88000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+88800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+89000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+89800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+8A000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+8A800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+8B000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+8B800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+8C000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+8C800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+8D000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+8D800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+8E000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+8E800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+8F000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+8F800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+90000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+90800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+91000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+91800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+92000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+92800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+93000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+93800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+94000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+94800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+95000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+95800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+96000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+96800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+97000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+97800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+98000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+98800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+99000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+99800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+9A000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+9A800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+9B000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+9B800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+9C000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+9C800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+9D000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+9D800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+9E000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+9E800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+9F000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+9F800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A0000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A0800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A1000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A1800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A2000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A2800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A3000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A3800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A4000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A4800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A5000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A5800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A6000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A6800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A7000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A7800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A8000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A8800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A9000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+A9800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+AA000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+AA800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+AB000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+AB800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+AC000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+AC800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+AD000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+AD800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+AE000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+AE800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+AF000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+AF800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B0000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B0800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B1000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B1800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B2000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B2800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B3000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B3800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B4000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B4800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B5000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B5800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B6000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B6800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B7000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B7800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B8000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B8800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B9000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+B9800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+BA000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+BA800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+BB000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+BB800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+BC000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+BC800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+BD000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+BD800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+BE000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+BE800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+BF000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+BF800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C0000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C0800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C1000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C1800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C2000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C2800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C3000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C3800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C4000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C4800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C5000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C5800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C6000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C6800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C7000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C7800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C8000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C8800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C9000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+C9800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+CA000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+CA800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+CB000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+CB800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+CC000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+CC800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+CD000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+CD800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+CE000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+CE800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+CF000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+CF800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D0000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D0800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D1000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D1800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D2000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D2800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D3000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D3800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D4000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D4800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D5000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D5800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D6000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D6800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D7000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D7800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D8000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D8800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D9000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+D9800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+DA000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+DA800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+DB000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+DB800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+DC000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+DC800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+DD000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+DD800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+DE000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+DE800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+DF000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+DF800 */
-231,232,233,234,232,232,232,232,232,232,232,232,232,232,232,232, /* U+E0000 */
-232,232,232,232,232,232,232,232,232,232,232,232,232,232,232,232, /* U+E0800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E1000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E1800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E2000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E2800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E3000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E3800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E4000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E4800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E5000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E5800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E6000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E6800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E7000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E7800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E8000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E8800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E9000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+E9800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+EA000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+EA800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+EB000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+EB800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+EC000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+EC800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+ED000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+ED800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+EE000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+EE800 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+EF000 */
-139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, /* U+EF800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F0000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F0800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F1000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F1800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F2000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F2800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F3000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F3800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F4000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F4800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F5000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F5800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F6000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F6800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F7000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F7800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F8000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F8800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F9000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+F9800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+FA000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+FA800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+FB000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+FB800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+FC000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+FC800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+FD000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+FD800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+FE000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+FE800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+FF000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,235, /* U+FF800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+100000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+100800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+101000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+101800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+102000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+102800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+103000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+103800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+104000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+104800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+105000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+105800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+106000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+106800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+107000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+107800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+108000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+108800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+109000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+109800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+10A000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+10A800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+10B000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+10B800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+10C000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+10C800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+10D000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+10D800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+10E000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+10E800 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+10F000 */
-123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,235, /* U+10F800 */
+ 62, 63, 64, 65, 66, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, /* U+2000 */
+ 77, 77, 66, 78, 66, 66, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, /* U+2800 */
+ 89, 90, 91, 92, 93, 94, 95, 71, 96, 96, 96, 96, 96, 96, 96, 96, /* U+3000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+3800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+4000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 97, 96, 96, 96, 96, /* U+4800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+5000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+5800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+6000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+6800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+7000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+7800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+8000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+8800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+9000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 98, /* U+9800 */
+ 99,100,100,100,100,100,100,100,100,101,102,102,103,104,105,106, /* U+A000 */
+107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,115, /* U+A800 */
+116,117,118,119,120,121,115,116,117,118,119,120,121,115,116,117, /* U+B000 */
+118,119,120,121,115,116,117,118,119,120,121,115,116,117,118,119, /* U+B800 */
+120,121,115,116,117,118,119,120,121,115,116,117,118,119,120,121, /* U+C000 */
+115,116,117,118,119,120,121,115,116,117,118,119,120,121,115,116, /* U+C800 */
+117,118,119,120,121,115,116,117,118,119,120,121,115,116,117,122, /* U+D000 */
+123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, /* U+D800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+E000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+E800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F000 */
+124,124, 96, 96,125,126,127,128,129,129,130,131,132,133,134,135, /* U+F800 */
+136,137,138,139,140,141,142,143,144,145,146,140,147,147,148,140, /* U+10000 */
+149,150,151,152,153,154,155,156,157,158,140,140,159,140,140,140, /* U+10800 */
+160,161,162,163,164,165,166,140,167,168,140,169,170,171,172,140, /* U+11000 */
+140,173,140,140,174,175,140,140,176,177,178,140,140,140,140,140, /* U+11800 */
+179,179,179,179,179,179,179,180,181,179,182,140,140,140,140,140, /* U+12000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+12800 */
+183,183,183,183,183,183,183,183,184,140,140,140,140,140,140,140, /* U+13000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+13800 */
+140,140,140,140,140,140,140,140,185,185,185,185,186,140,140,140, /* U+14000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+14800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+15000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+15800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+16000 */
+187,187,187,187,188,189,190,191,140,140,140,140,140,140,192,193, /* U+16800 */
+194,194,194,194,194,194,194,194,194,194,194,194,194,194,194,194, /* U+17000 */
+194,194,194,194,194,194,194,194,194,194,194,194,194,194,194,194, /* U+17800 */
+194,194,194,194,194,194,194,194,194,194,194,194,194,194,194,195, /* U+18000 */
+194,194,194,194,194,196,140,140,140,140,140,140,140,140,140,140, /* U+18800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+19000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+19800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+1A000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+1A800 */
+197,198,199,200,200,201,140,140,140,140,140,140,140,140,140,140, /* U+1B000 */
+140,140,140,140,140,140,140,140,202,203,140,140,140,140,140,140, /* U+1B800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+1C000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+1C800 */
+ 71,204,205,206,207,140,208,140,209,210,211,212,213,214,215,216, /* U+1D000 */
+217,217,217,217,218,219,140,140,140,140,140,140,140,140,140,140, /* U+1D800 */
+220,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+1E000 */
+221,222,223,140,140,140,140,140,140,140,140,140,224,225,140,140, /* U+1E800 */
+226,227,228,229,230,140,231,232,233,234,235,236,237,238,239,240, /* U+1F000 */
+241,242,243,244,140,140,140,140,140,140,140,140,140,140,140,140, /* U+1F800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+20000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+20800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+21000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+21800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+22000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+22800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+23000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+23800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+24000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+24800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+25000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+25800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+26000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+26800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+27000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+27800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+28000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+28800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+29000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+29800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,245, 96, 96, /* U+2A000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+2A800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,246, 96, /* U+2B000 */
+247, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+2B800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+2C000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,248, 96, 96, /* U+2C800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+2D000 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+2D800 */
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, /* U+2E000 */
+ 96, 96, 96, 96, 96, 96, 96,249,140,140,140,140,140,140,140,140, /* U+2E800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+2F000 */
+ 96, 96, 96, 96,250,140,140,140,140,140,140,140,140,140,140,140, /* U+2F800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+30000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+30800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+31000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+31800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+32000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+32800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+33000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+33800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+34000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+34800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+35000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+35800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+36000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+36800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+37000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+37800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+38000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+38800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+39000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+39800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3A000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3A800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3B000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3B800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3C000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3C800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3D000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3D800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3E000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3E800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3F000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+3F800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+40000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+40800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+41000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+41800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+42000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+42800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+43000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+43800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+44000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+44800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+45000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+45800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+46000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+46800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+47000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+47800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+48000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+48800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+49000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+49800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4A000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4A800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4B000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4B800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4C000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4C800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4D000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4D800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4E000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4E800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4F000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+4F800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+50000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+50800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+51000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+51800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+52000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+52800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+53000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+53800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+54000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+54800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+55000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+55800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+56000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+56800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+57000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+57800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+58000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+58800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+59000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+59800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5A000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5A800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5B000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5B800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5C000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5C800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5D000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5D800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5E000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5E800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5F000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+5F800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+60000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+60800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+61000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+61800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+62000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+62800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+63000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+63800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+64000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+64800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+65000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+65800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+66000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+66800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+67000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+67800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+68000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+68800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+69000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+69800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6A000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6A800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6B000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6B800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6C000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6C800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6D000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6D800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6E000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6E800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6F000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+6F800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+70000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+70800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+71000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+71800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+72000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+72800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+73000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+73800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+74000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+74800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+75000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+75800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+76000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+76800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+77000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+77800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+78000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+78800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+79000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+79800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7A000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7A800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7B000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7B800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7C000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7C800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7D000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7D800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7E000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7E800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7F000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+7F800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+80000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+80800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+81000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+81800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+82000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+82800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+83000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+83800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+84000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+84800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+85000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+85800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+86000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+86800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+87000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+87800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+88000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+88800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+89000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+89800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8A000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8A800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8B000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8B800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8C000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8C800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8D000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8D800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8E000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8E800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8F000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+8F800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+90000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+90800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+91000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+91800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+92000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+92800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+93000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+93800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+94000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+94800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+95000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+95800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+96000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+96800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+97000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+97800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+98000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+98800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+99000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+99800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9A000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9A800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9B000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9B800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9C000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9C800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9D000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9D800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9E000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9E800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9F000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+9F800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A0000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A0800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A1000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A1800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A2000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A2800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A3000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A3800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A4000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A4800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A5000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A5800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A6000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A6800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A7000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A7800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A8000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A8800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A9000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+A9800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AA000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AA800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AB000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AB800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AC000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AC800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AD000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AD800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AE000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AE800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AF000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+AF800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B0000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B0800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B1000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B1800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B2000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B2800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B3000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B3800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B4000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B4800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B5000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B5800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B6000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B6800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B7000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B7800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B8000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B8800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B9000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+B9800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BA000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BA800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BB000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BB800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BC000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BC800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BD000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BD800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BE000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BE800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BF000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+BF800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C0000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C0800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C1000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C1800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C2000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C2800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C3000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C3800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C4000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C4800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C5000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C5800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C6000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C6800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C7000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C7800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C8000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C8800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C9000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+C9800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CA000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CA800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CB000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CB800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CC000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CC800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CD000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CD800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CE000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CE800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CF000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+CF800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D0000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D0800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D1000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D1800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D2000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D2800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D3000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D3800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D4000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D4800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D5000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D5800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D6000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D6800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D7000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D7800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D8000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D8800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D9000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+D9800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DA000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DA800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DB000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DB800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DC000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DC800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DD000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DD800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DE000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DE800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DF000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+DF800 */
+251,252,253,254,252,252,252,252,252,252,252,252,252,252,252,252, /* U+E0000 */
+252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252, /* U+E0800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E1000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E1800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E2000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E2800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E3000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E3800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E4000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E4800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E5000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E5800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E6000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E6800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E7000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E7800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E8000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E8800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E9000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+E9800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EA000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EA800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EB000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EB800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EC000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EC800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+ED000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+ED800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EE000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EE800 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EF000 */
+140,140,140,140,140,140,140,140,140,140,140,140,140,140,140,140, /* U+EF800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F0000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F0800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F1000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F1800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F2000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F2800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F3000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F3800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F4000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F4800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F5000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F5800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F6000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F6800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F7000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F7800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F8000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F8800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F9000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+F9800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FA000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FA800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FB000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FB800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FC000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FC800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FD000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FD800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FE000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FE800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+FF000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,255, /* U+FF800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+100000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+100800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+101000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+101800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+102000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+102800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+103000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+103800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+104000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+104800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+105000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+105800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+106000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+106800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+107000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+107800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+108000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+108800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+109000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+109800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10A000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10A800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10B000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10B800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10C000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10C800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10D000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10D800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10E000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10E800 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,124, /* U+10F000 */
+124,124,124,124,124,124,124,124,124,124,124,124,124,124,124,255, /* U+10F800 */
};
-const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
+const uint16_t PRIV(ucd_stage2)[] = { /* 65536 bytes, block = 128 */
/* block 0 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -1424,7 +1523,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
30, 31, 30, 31, 33, 33, 33, 33, 33, 33, 71, 30, 31, 72, 73, 74,
74, 30, 31, 75, 76, 77, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
78, 79, 80, 81, 82, 33, 83, 83, 33, 84, 33, 85, 86, 33, 33, 33,
- 83, 87, 33, 88, 33, 89, 90, 33, 91, 92, 33, 93, 94, 33, 33, 92,
+ 83, 87, 33, 88, 33, 89, 90, 33, 91, 92, 90, 93, 94, 33, 33, 92,
33, 95, 96, 33, 33, 97, 33, 33, 33, 33, 33, 33, 33, 98, 33, 33,
/* block 5 */
@@ -1459,493 +1558,493 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
/* block 8 */
171,171,171,171,171,171,171,171,171,171,171,171,171,171,171,171,
-172,172,172,172,172,172,172,172,172,172,172,172,172,172,172,172,
-172,172,172,172,172,172,172,172,172,172,172,172,172,172,172,172,
-173,173,173,173,173,173,173,173,173,173,173,173,173,173,173,173,
-173,173,173,173,173,173,173,173,173,173,173,173,173,173,173,173,
-174,174,174,174,174,174,174,174,174,174,174,174,174,174,174,174,
-175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176,
-175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176,
+172,172,173,172,174,172,172,172,172,172,172,172,172,172,175,172,
+172,176,177,172,172,172,172,172,172,172,178,172,172,172,172,172,
+179,179,180,179,181,179,179,179,179,179,179,179,179,179,182,179,
+179,183,184,179,179,179,179,179,179,179,185,179,179,179,179,179,
+186,186,186,186,186,186,186,186,186,186,186,186,186,186,186,186,
+187,188,189,190,187,188,187,188,187,188,187,188,187,188,187,188,
+187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
/* block 9 */
-175,176,177,178,178,110,110,178,179,179,175,176,175,176,175,176,
-175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176,
-175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176,
-175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176,
-180,175,176,175,176,175,176,175,176,175,176,175,176,175,176,181,
-175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176,
-175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176,
-175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176,
+187,188,191,192,192,110,110,192,193,193,187,188,187,188,187,188,
+187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
+187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
+187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
+194,187,188,187,188,187,188,187,188,187,188,187,188,187,188,195,
+187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
+187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
+187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
/* block 10 */
-175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176,
-175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176,
-175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176,
-115,182,182,182,182,182,182,182,182,182,182,182,182,182,182,182,
-182,182,182,182,182,182,182,182,182,182,182,182,182,182,182,182,
-182,182,182,182,182,182,182,115,115,183,184,184,184,184,184,184,
-115,185,185,185,185,185,185,185,185,185,185,185,185,185,185,185,
-185,185,185,185,185,185,185,185,185,185,185,185,185,185,185,185,
+187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
+187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
+187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
+115,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,
+196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,196,
+196,196,196,196,196,196,196,115,115,197,198,198,198,198,198,198,
+115,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,
+199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,
/* block 11 */
-185,185,185,185,185,185,185,186,115, 4,187,115,115,188,188,189,
-115,190,190,190,190,190,190,190,190,190,190,190,190,190,190,190,
-190,190,190,190,190,190,190,190,190,190,190,190,190,190,190,190,
-190,190,190,190,190,190,190,190,190,190,190,190,190,190,191,190,
-192,190,190,192,190,190,192,190,115,115,115,115,115,115,115,115,
-193,193,193,193,193,193,193,193,193,193,193,193,193,193,193,193,
-193,193,193,193,193,193,193,193,193,193,193,115,115,115,115,115,
-193,193,193,192,192,115,115,115,115,115,115,115,115,115,115,115,
+199,199,199,199,199,199,199,200,115, 4,201,115,115,202,202,203,
+115,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,
+204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,
+204,204,204,204,204,204,204,204,204,204,204,204,204,204,205,204,
+206,204,204,206,204,204,206,204,115,115,115,115,115,115,115,115,
+207,207,207,207,207,207,207,207,207,207,207,207,207,207,207,207,
+207,207,207,207,207,207,207,207,207,207,207,115,115,115,115,115,
+207,207,207,206,206,115,115,115,115,115,115,115,115,115,115,115,
/* block 12 */
-194,194,194,194,194, 22,195,195,195,196,196,197, 4,196,198,198,
-199,199,199,199,199,199,199,199,199,199,199, 4, 22,115,196, 4,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-108,200,200,200,200,200,200,200,200,200,200,110,110,110,110,110,
-110,110,110,110,110,110,199,199,199,199,199,199,199,199,199,199,
-201,201,201,201,201,201,201,201,201,201,196,196,196,196,200,200,
-110,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
+208,208,208,208,208,209,210,210,210,211,211,212, 4,211,213,213,
+214,214,214,214,214,214,214,214,214,214,214, 4,215,115,211, 4,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+108,216,216,216,216,216,216,216,216,216,216,110,110,110,110,110,
+110,110,110,110,110,110,214,214,214,214,214,214,214,214,214,214,
+217,217,217,217,217,217,217,217,217,217,211,211,211,211,216,216,
+110,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
/* block 13 */
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,196,200,199,199,199,199,199,199,199, 22,198,199,
-199,199,199,199,199,202,202,199,199,198,199,199,199,199,200,200,
-201,201,201,201,201,201,201,201,201,201,200,200,200,198,198,200,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,211,216,214,214,214,214,214,214,214,209,213,214,
+214,214,214,214,214,218,218,214,214,213,214,214,214,214,216,216,
+217,217,217,217,217,217,217,217,217,217,216,216,216,213,213,216,
/* block 14 */
-203,203,203,203,203,203,203,203,203,203,203,203,203,203,115,204,
-205,206,205,205,205,205,205,205,205,205,205,205,205,205,205,205,
-205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,
-206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,206,
-206,206,206,206,206,206,206,206,206,206,206,115,115,205,205,205,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
+219,219,219,219,219,219,219,219,219,219,219,219,219,219,115,220,
+221,222,221,221,221,221,221,221,221,221,221,221,221,221,221,221,
+221,221,221,221,221,221,221,221,221,221,221,221,221,221,221,221,
+222,222,222,222,222,222,222,222,222,222,222,222,222,222,222,222,
+222,222,222,222,222,222,222,222,222,222,222,115,115,221,221,221,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
/* block 15 */
-207,207,207,207,207,207,207,207,207,207,207,207,207,207,207,207,
-207,207,207,207,207,207,207,207,207,207,207,207,207,207,207,207,
-207,207,207,207,207,207,208,208,208,208,208,208,208,208,208,208,
-208,207,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-209,209,209,209,209,209,209,209,209,209,210,210,210,210,210,210,
-210,210,210,210,210,210,210,210,210,210,210,210,210,210,210,210,
-210,210,210,210,210,210,210,210,210,210,210,211,211,211,211,211,
-211,211,211,211,212,212,213,214,214,214,212,115,115,115,115,115,
+223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,
+223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,
+223,223,223,223,223,223,224,224,224,224,224,224,224,224,224,224,
+224,223,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+225,225,225,225,225,225,225,225,225,225,226,226,226,226,226,226,
+226,226,226,226,226,226,226,226,226,226,226,226,226,226,226,226,
+226,226,226,226,226,226,226,226,226,226,226,227,227,227,227,227,
+227,227,227,227,228,228,229,230,230,230,228,115,115,115,115,115,
/* block 16 */
-215,215,215,215,215,215,215,215,215,215,215,215,215,215,215,215,
-215,215,215,215,215,215,216,216,216,216,217,216,216,216,216,216,
-216,216,216,216,217,216,216,216,217,216,216,216,216,216,115,115,
-218,218,218,218,218,218,218,218,218,218,218,218,218,218,218,115,
-219,219,219,219,219,219,219,219,219,219,219,219,219,219,219,219,
-219,219,219,219,219,219,219,219,219,220,220,220,115,115,221,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,231,
+231,231,231,231,231,231,232,232,232,232,233,232,232,232,232,232,
+232,232,232,232,233,232,232,232,233,232,232,232,232,232,115,115,
+234,234,234,234,234,234,234,234,234,234,234,234,234,234,234,115,
+235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,235,
+235,235,235,235,235,235,235,235,235,236,236,236,115,115,237,115,
+221,221,221,221,221,221,221,221,221,221,221,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 17 */
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,115,115,115,115,115,115,115,115,115,115,115,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,115,216,216,216,216,216,216,216,216,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,199,199,199,199,199,199,199,199,199,199,199,199,199,
-199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,199,
+115,115,115,115,214,214,214,214,214,214,214,214,214,214,214,214,
+214,214,209,214,214,214,214,214,214,214,214,214,214,214,214,214,
+214,214,214,214,214,214,214,214,214,214,214,214,214,214,214,214,
/* block 18 */
-222,222,222,223,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
-224,224,224,224,224,224,224,224,224,224,222,223,222,224,223,223,
-223,222,222,222,222,222,222,222,222,223,223,223,223,222,223,223,
-224,110,110,222,222,222,222,222,224,224,224,224,224,224,224,224,
-224,224,222,222, 4, 4,225,225,225,225,225,225,225,225,225,225,
-226,227,224,224,224,224,224,224,224,224,224,224,224,224,224,224,
+238,238,238,239,240,240,240,240,240,240,240,240,240,240,240,240,
+240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,
+240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,
+240,240,240,240,240,240,240,240,240,240,238,239,238,240,239,239,
+239,238,238,238,238,238,238,238,238,239,239,239,239,238,239,239,
+240,110,110,238,238,238,238,238,240,240,240,240,240,240,240,240,
+240,240,238,238, 4, 4,241,241,241,241,241,241,241,241,241,241,
+242,243,240,240,240,240,240,240,240,240,240,240,240,240,240,240,
/* block 19 */
-228,229,230,230,115,228,228,228,228,228,228,228,228,115,115,228,
-228,115,115,228,228,228,228,228,228,228,228,228,228,228,228,228,
-228,228,228,228,228,228,228,228,228,115,228,228,228,228,228,228,
-228,115,228,115,115,115,228,228,228,228,115,115,229,228,231,230,
-230,229,229,229,229,115,115,230,230,115,115,230,230,229,228,115,
-115,115,115,115,115,115,115,231,115,115,115,115,228,228,115,228,
-228,228,229,229,115,115,232,232,232,232,232,232,232,232,232,232,
-228,228,233,233,234,234,234,234,234,234,235,233,115,115,115,115,
+244,245,246,246,115,244,244,244,244,244,244,244,244,115,115,244,
+244,115,115,244,244,244,244,244,244,244,244,244,244,244,244,244,
+244,244,244,244,244,244,244,244,244,115,244,244,244,244,244,244,
+244,115,244,115,115,115,244,244,244,244,115,115,245,244,247,246,
+246,245,245,245,245,115,115,246,246,115,115,246,246,245,244,115,
+115,115,115,115,115,115,115,247,115,115,115,115,244,244,115,244,
+244,244,245,245,115,115,248,248,248,248,248,248,248,248,248,248,
+244,244,249,249,250,250,250,250,250,250,251,249,244,252,115,115,
/* block 20 */
-115,236,236,237,115,238,238,238,238,238,238,115,115,115,115,238,
-238,115,115,238,238,238,238,238,238,238,238,238,238,238,238,238,
-238,238,238,238,238,238,238,238,238,115,238,238,238,238,238,238,
-238,115,238,238,115,238,238,115,238,238,115,115,236,115,237,237,
-237,236,236,115,115,115,115,236,236,115,115,236,236,236,115,115,
-115,236,115,115,115,115,115,115,115,238,238,238,238,115,238,115,
-115,115,115,115,115,115,239,239,239,239,239,239,239,239,239,239,
-236,236,238,238,238,236,115,115,115,115,115,115,115,115,115,115,
+115,253,253,254,115,255,255,255,255,255,255,115,115,115,115,255,
+255,115,115,255,255,255,255,255,255,255,255,255,255,255,255,255,
+255,255,255,255,255,255,255,255,255,115,255,255,255,255,255,255,
+255,115,255,255,115,255,255,115,255,255,115,115,253,115,254,254,
+254,253,253,115,115,115,115,253,253,115,115,253,253,253,115,115,
+115,253,115,115,115,115,115,115,115,255,255,255,255,115,255,115,
+115,115,115,115,115,115,256,256,256,256,256,256,256,256,256,256,
+253,253,255,255,255,253,115,115,115,115,115,115,115,115,115,115,
/* block 21 */
-115,240,240,241,115,242,242,242,242,242,242,242,242,242,115,242,
-242,242,115,242,242,242,242,242,242,242,242,242,242,242,242,242,
-242,242,242,242,242,242,242,242,242,115,242,242,242,242,242,242,
-242,115,242,242,115,242,242,242,242,242,115,115,240,242,241,241,
-241,240,240,240,240,240,115,240,240,241,115,241,241,240,115,115,
-242,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-242,242,240,240,115,115,243,243,243,243,243,243,243,243,243,243,
-244,245,115,115,115,115,115,115,115,242,115,115,115,115,115,115,
+115,257,257,258,115,259,259,259,259,259,259,259,259,259,115,259,
+259,259,115,259,259,259,259,259,259,259,259,259,259,259,259,259,
+259,259,259,259,259,259,259,259,259,115,259,259,259,259,259,259,
+259,115,259,259,115,259,259,259,259,259,115,115,257,259,258,258,
+258,257,257,257,257,257,115,257,257,258,115,258,258,257,115,115,
+259,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+259,259,257,257,115,115,260,260,260,260,260,260,260,260,260,260,
+261,262,115,115,115,115,115,115,115,259,257,257,257,257,257,257,
/* block 22 */
-115,246,247,247,115,248,248,248,248,248,248,248,248,115,115,248,
-248,115,115,248,248,248,248,248,248,248,248,248,248,248,248,248,
-248,248,248,248,248,248,248,248,248,115,248,248,248,248,248,248,
-248,115,248,248,115,248,248,248,248,248,115,115,246,248,249,246,
-247,246,246,246,246,115,115,247,247,115,115,247,247,246,115,115,
-115,115,115,115,115,115,246,249,115,115,115,115,248,248,115,248,
-248,248,246,246,115,115,250,250,250,250,250,250,250,250,250,250,
-251,248,252,252,252,252,252,252,115,115,115,115,115,115,115,115,
+115,263,264,264,115,265,265,265,265,265,265,265,265,115,115,265,
+265,115,115,265,265,265,265,265,265,265,265,265,265,265,265,265,
+265,265,265,265,265,265,265,265,265,115,265,265,265,265,265,265,
+265,115,265,265,115,265,265,265,265,265,115,115,263,265,266,263,
+264,263,263,263,263,115,115,264,264,115,115,264,264,263,115,115,
+115,115,115,115,115,115,263,266,115,115,115,115,265,265,115,265,
+265,265,263,263,115,115,267,267,267,267,267,267,267,267,267,267,
+268,265,269,269,269,269,269,269,115,115,115,115,115,115,115,115,
/* block 23 */
-115,115,253,254,115,254,254,254,254,254,254,115,115,115,254,254,
-254,115,254,254,254,254,115,115,115,254,254,115,254,115,254,254,
-115,115,115,254,254,115,115,115,254,254,254,115,115,115,254,254,
-254,254,254,254,254,254,254,254,254,254,115,115,115,115,255,256,
-253,256,256,115,115,115,256,256,256,115,256,256,256,253,115,115,
-254,115,115,115,115,115,115,255,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,257,257,257,257,257,257,257,257,257,257,
-258,258,258,259,259,259,259,259,259,260,259,115,115,115,115,115,
+115,115,270,271,115,271,271,271,271,271,271,115,115,115,271,271,
+271,115,271,271,271,271,115,115,115,271,271,115,271,115,271,271,
+115,115,115,271,271,115,115,115,271,271,271,115,115,115,271,271,
+271,271,271,271,271,271,271,271,271,271,115,115,115,115,272,273,
+270,273,273,115,115,115,273,273,273,115,273,273,273,270,115,115,
+271,115,115,115,115,115,115,272,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,274,274,274,274,274,274,274,274,274,274,
+275,275,275,276,276,276,276,276,276,277,276,115,115,115,115,115,
/* block 24 */
-261,262,262,262,115,263,263,263,263,263,263,263,263,115,263,263,
-263,115,263,263,263,263,263,263,263,263,263,263,263,263,263,263,
-263,263,263,263,263,263,263,263,263,115,263,263,263,263,263,263,
-263,263,263,263,263,263,263,263,263,263,115,115,115,263,261,261,
-261,262,262,262,262,115,261,261,261,115,261,261,261,261,115,115,
-115,115,115,115,115,261,261,115,263,263,263,115,115,115,115,115,
-263,263,261,261,115,115,264,264,264,264,264,264,264,264,264,264,
-115,115,115,115,115,115,115,115,265,265,265,265,265,265,265,266,
+278,279,279,279,115,280,280,280,280,280,280,280,280,115,280,280,
+280,115,280,280,280,280,280,280,280,280,280,280,280,280,280,280,
+280,280,280,280,280,280,280,280,280,115,280,280,280,280,280,280,
+280,280,280,280,280,280,280,280,280,280,115,115,115,280,278,278,
+278,279,279,279,279,115,278,278,278,115,278,278,278,278,115,115,
+115,115,115,115,115,278,278,115,280,280,280,115,115,115,115,115,
+280,280,278,278,115,115,281,281,281,281,281,281,281,281,281,281,
+115,115,115,115,115,115,115,115,282,282,282,282,282,282,282,283,
/* block 25 */
-115,267,268,268,115,269,269,269,269,269,269,269,269,115,269,269,
-269,115,269,269,269,269,269,269,269,269,269,269,269,269,269,269,
-269,269,269,269,269,269,269,269,269,115,269,269,269,269,269,269,
-269,269,269,269,115,269,269,269,269,269,115,115,267,269,268,267,
-268,268,270,268,268,115,267,268,268,115,268,268,267,267,115,115,
-115,115,115,115,115,270,270,115,115,115,115,115,115,115,269,115,
-269,269,267,267,115,115,271,271,271,271,271,271,271,271,271,271,
-115,269,269,115,115,115,115,115,115,115,115,115,115,115,115,115,
+284,285,286,286,115,284,284,284,284,284,284,284,284,115,284,284,
+284,115,284,284,284,284,284,284,284,284,284,284,284,284,284,284,
+284,284,284,284,284,284,284,284,284,115,284,284,284,284,284,284,
+284,284,284,284,115,284,284,284,284,284,115,115,285,284,286,285,
+286,286,287,286,286,115,285,286,286,115,286,286,285,285,115,115,
+115,115,115,115,115,287,287,115,115,115,115,115,115,115,284,115,
+284,284,285,285,115,115,288,288,288,288,288,288,288,288,288,288,
+115,284,284,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 26 */
-115,272,273,273,115,274,274,274,274,274,274,274,274,115,274,274,
-274,115,274,274,274,274,274,274,274,274,274,274,274,274,274,274,
-274,274,274,274,274,274,274,274,274,274,274,274,274,274,274,274,
-274,274,274,274,274,274,274,274,274,274,274,115,115,274,275,273,
-273,272,272,272,272,115,273,273,273,115,273,273,273,272,274,115,
-115,115,115,115,115,115,115,275,115,115,115,115,115,115,115,274,
-274,274,272,272,115,115,276,276,276,276,276,276,276,276,276,276,
-277,277,277,277,277,277,115,115,115,278,274,274,274,274,274,274,
+289,289,290,290,115,291,291,291,291,291,291,291,291,115,291,291,
+291,115,291,291,291,291,291,291,291,291,291,291,291,291,291,291,
+291,291,291,291,291,291,291,291,291,291,291,291,291,291,291,291,
+291,291,291,291,291,291,291,291,291,291,291,289,289,291,292,290,
+290,289,289,289,289,115,290,290,290,115,290,290,290,289,293,294,
+115,115,115,115,291,291,291,292,295,295,295,295,295,295,295,291,
+291,291,289,289,115,115,296,296,296,296,296,296,296,296,296,296,
+295,295,295,295,295,295,295,295,295,294,291,291,291,291,291,291,
/* block 27 */
-115,115,279,279,115,280,280,280,280,280,280,280,280,280,280,280,
-280,280,280,280,280,280,280,115,115,115,280,280,280,280,280,280,
-280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,
-280,280,115,280,280,280,280,280,280,280,280,280,115,280,115,115,
-280,280,280,280,280,280,280,115,115,115,281,115,115,115,115,282,
-279,279,281,281,281,115,281,115,279,279,279,279,279,279,279,282,
-115,115,115,115,115,115,283,283,283,283,283,283,283,283,283,283,
-115,115,279,279,284,115,115,115,115,115,115,115,115,115,115,115,
+115,115,297,297,115,298,298,298,298,298,298,298,298,298,298,298,
+298,298,298,298,298,298,298,115,115,115,298,298,298,298,298,298,
+298,298,298,298,298,298,298,298,298,298,298,298,298,298,298,298,
+298,298,115,298,298,298,298,298,298,298,298,298,115,298,115,115,
+298,298,298,298,298,298,298,115,115,115,299,115,115,115,115,300,
+297,297,299,299,299,115,299,115,297,297,297,297,297,297,297,300,
+115,115,115,115,115,115,301,301,301,301,301,301,301,301,301,301,
+115,115,297,297,302,115,115,115,115,115,115,115,115,115,115,115,
/* block 28 */
-115,285,285,285,285,285,285,285,285,285,285,285,285,285,285,285,
-285,285,285,285,285,285,285,285,285,285,285,285,285,285,285,285,
-285,285,285,285,285,285,285,285,285,285,285,285,285,285,285,285,
-285,286,285,287,286,286,286,286,286,286,286,115,115,115,115, 5,
-285,285,285,285,285,285,288,286,286,286,286,286,286,286,286,289,
-290,290,290,290,290,290,290,290,290,290,289,289,115,115,115,115,
+115,303,303,303,303,303,303,303,303,303,303,303,303,303,303,303,
+303,303,303,303,303,303,303,303,303,303,303,303,303,303,303,303,
+303,303,303,303,303,303,303,303,303,303,303,303,303,303,303,303,
+303,304,303,305,304,304,304,304,304,304,304,115,115,115,115, 5,
+303,303,303,303,303,303,306,304,304,304,304,304,304,304,304,307,
+308,308,308,308,308,308,308,308,308,308,307,307,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 29 */
-115,291,291,115,291,115,115,291,291,115,291,115,115,291,115,115,
-115,115,115,115,291,291,291,291,115,291,291,291,291,291,291,291,
-115,291,291,291,115,291,115,291,115,115,291,291,115,291,291,291,
-291,292,291,293,292,292,292,292,292,292,115,292,292,291,115,115,
-291,291,291,291,291,115,294,115,292,292,292,292,292,292,115,115,
-295,295,295,295,295,295,295,295,295,295,115,115,291,291,291,291,
+115,309,309,115,309,115,115,309,309,115,309,115,115,309,115,115,
+115,115,115,115,309,309,309,309,115,309,309,309,309,309,309,309,
+115,309,309,309,115,309,115,309,115,115,309,309,115,309,309,309,
+309,310,309,311,310,310,310,310,310,310,115,310,310,309,115,115,
+309,309,309,309,309,115,312,115,310,310,310,310,310,310,115,115,
+313,313,313,313,313,313,313,313,313,313,115,115,309,309,309,309,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 30 */
-296,297,297,297,298,298,298,298,298,298,298,298,298,298,298,298,
-298,298,298,297,298,297,297,297,299,299,297,297,297,297,297,297,
-300,300,300,300,300,300,300,300,300,300,301,301,301,301,301,301,
-301,301,301,301,297,299,297,299,297,299,302,303,302,303,304,304,
-296,296,296,296,296,296,296,296,115,296,296,296,296,296,296,296,
-296,296,296,296,296,296,296,296,296,296,296,296,296,296,296,296,
-296,296,296,296,296,296,296,296,296,296,296,296,296,115,115,115,
-115,299,299,299,299,299,299,299,299,299,299,299,299,299,299,304,
+314,315,315,315,316,316,316,316,316,316,316,316,316,316,316,316,
+316,316,316,315,316,315,315,315,317,317,315,315,315,315,315,315,
+318,318,318,318,318,318,318,318,318,318,319,319,319,319,319,319,
+319,319,319,319,315,317,315,317,315,317,320,321,320,321,322,322,
+314,314,314,314,314,314,314,314,115,314,314,314,314,314,314,314,
+314,314,314,314,314,314,314,314,314,314,314,314,314,314,314,314,
+314,314,314,314,314,314,314,314,314,314,314,314,314,115,115,115,
+115,317,317,317,317,317,317,317,317,317,317,317,317,317,317,322,
/* block 31 */
-299,299,299,299,299,298,299,299,296,296,296,296,296,299,299,299,
-299,299,299,299,299,299,299,299,115,299,299,299,299,299,299,299,
-299,299,299,299,299,299,299,299,299,299,299,299,299,299,299,299,
-299,299,299,299,299,299,299,299,299,299,299,299,299,115,297,297,
-297,297,297,297,297,297,299,297,297,297,297,297,297,115,297,297,
-298,298,298,298,298, 19, 19, 19, 19,298,298,115,115,115,115,115,
+317,317,317,317,317,316,317,317,314,314,314,314,314,317,317,317,
+317,317,317,317,317,317,317,317,115,317,317,317,317,317,317,317,
+317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,
+317,317,317,317,317,317,317,317,317,317,317,317,317,115,315,315,
+315,315,315,315,315,315,317,315,315,315,315,315,315,115,315,315,
+316,316,316,316,316, 19, 19, 19, 19,316,316,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 32 */
-305,305,305,305,305,305,305,305,305,305,305,305,305,305,305,305,
-305,305,305,305,305,305,305,305,305,305,305,305,305,305,305,305,
-305,305,305,305,305,305,305,305,305,305,305,306,306,307,307,307,
-307,308,307,307,307,307,307,307,306,307,307,308,308,307,307,305,
-309,309,309,309,309,309,309,309,309,309,310,310,310,310,310,310,
-305,305,305,305,305,305,308,308,307,307,305,305,305,305,307,307,
-307,305,306,306,306,305,305,306,306,306,306,306,306,306,305,305,
-305,307,307,307,307,305,305,305,305,305,305,305,305,305,305,305,
+323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,
+323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,
+323,323,323,323,323,323,323,323,323,323,323,324,324,325,325,325,
+325,326,325,325,325,325,325,325,324,325,325,326,326,325,325,323,
+327,327,327,327,327,327,327,327,327,327,328,328,328,328,328,328,
+323,323,323,323,323,323,326,326,325,325,323,323,323,323,325,325,
+325,323,324,324,324,323,323,324,324,324,324,324,324,324,323,323,
+323,325,325,325,325,323,323,323,323,323,323,323,323,323,323,323,
/* block 33 */
-305,305,307,306,308,307,307,306,306,306,306,306,306,307,305,306,
-309,309,309,309,309,309,309,309,309,309,306,306,306,307,311,311,
-312,312,312,312,312,312,312,312,312,312,312,312,312,312,312,312,
-312,312,312,312,312,312,312,312,312,312,312,312,312,312,312,312,
-312,312,312,312,312,312,115,312,115,115,115,115,115,312,115,115,
-313,313,313,313,313,313,313,313,313,313,313,313,313,313,313,313,
-313,313,313,313,313,313,313,313,313,313,313,313,313,313,313,313,
-313,313,313,313,313,313,313,313,313,313,313, 4,314,313,313,313,
+323,323,325,324,326,325,325,324,324,324,324,324,324,325,323,324,
+327,327,327,327,327,327,327,327,327,327,324,324,324,325,329,329,
+330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,
+330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,
+330,330,330,330,330,330,115,330,115,115,115,115,115,330,115,115,
+331,331,331,331,331,331,331,331,331,331,331,331,331,331,331,331,
+331,331,331,331,331,331,331,331,331,331,331,331,331,331,331,331,
+331,331,331,331,331,331,331,331,331,331,331, 4,332,331,331,331,
/* block 34 */
-315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,
-315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,
-315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,
-315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,
-315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,
-315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,
-316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,
-316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,
+333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
+333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
+333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
+333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
+333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
+333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
+334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,
+334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,
/* block 35 */
-316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,
-316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,
-316,316,316,316,316,316,316,316,317,317,317,317,317,317,317,317,
-317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,
-317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,
-317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,
-317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,
-317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,
+334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,
+334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,
+334,334,334,334,334,334,334,334,335,335,335,335,335,335,335,335,
+335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
+335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
+335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
+335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
+335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
/* block 36 */
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
-318,318,318,318,318,318,318,318,318,115,318,318,318,318,115,115,
-318,318,318,318,318,318,318,115,318,115,318,318,318,318,115,115,
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+336,336,336,336,336,336,336,336,336,115,336,336,336,336,115,115,
+336,336,336,336,336,336,336,115,336,115,336,336,336,336,115,115,
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
/* block 37 */
-318,318,318,318,318,318,318,318,318,115,318,318,318,318,115,115,
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
-318,115,318,318,318,318,115,115,318,318,318,318,318,318,318,115,
-318,115,318,318,318,318,115,115,318,318,318,318,318,318,318,318,
-318,318,318,318,318,318,318,115,318,318,318,318,318,318,318,318,
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
+336,336,336,336,336,336,336,336,336,115,336,336,336,336,115,115,
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+336,115,336,336,336,336,115,115,336,336,336,336,336,336,336,115,
+336,115,336,336,336,336,115,115,336,336,336,336,336,336,336,336,
+336,336,336,336,336,336,336,115,336,336,336,336,336,336,336,336,
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
/* block 38 */
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
-318,115,318,318,318,318,115,115,318,318,318,318,318,318,318,318,
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
-318,318,318,318,318,318,318,318,318,318,318,115,115,319,319,319,
-320,320,320,320,320,320,320,320,320,321,321,321,321,321,321,321,
-321,321,321,321,321,321,321,321,321,321,321,321,321,115,115,115,
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+336,115,336,336,336,336,115,115,336,336,336,336,336,336,336,336,
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+336,336,336,336,336,336,336,336,336,336,336,115,115,337,337,337,
+338,338,338,338,338,338,338,338,338,339,339,339,339,339,339,339,
+339,339,339,339,339,339,339,339,339,339,339,339,339,115,115,115,
/* block 39 */
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
-322,322,322,322,322,322,322,322,322,322,115,115,115,115,115,115,
-323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,
-323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,
-323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,
-323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,
-323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,
-324,324,324,324,324,324,115,115,325,325,325,325,325,325,115,115,
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+340,340,340,340,340,340,340,340,340,340,115,115,115,115,115,115,
+341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,
+341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,
+341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,
+341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,
+341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,341,
+342,342,342,342,342,342,115,115,343,343,343,343,343,343,115,115,
/* block 40 */
-326,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
+344,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
/* block 41 */
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
/* block 42 */
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,328,328,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,346,346,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
/* block 43 */
-329,330,330,330,330,330,330,330,330,330,330,330,330,330,330,330,
-330,330,330,330,330,330,330,330,330,330,330,331,332,115,115,115,
-333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
-333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
-333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
-333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
-333,333,333,333,333,333,333,333,333,333,333, 4, 4, 4,334,334,
-334,333,333,333,333,333,333,333,333,115,115,115,115,115,115,115,
+347,348,348,348,348,348,348,348,348,348,348,348,348,348,348,348,
+348,348,348,348,348,348,348,348,348,348,348,349,350,115,115,115,
+351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,
+351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,
+351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,
+351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,351,
+351,351,351,351,351,351,351,351,351,351,351, 4, 4, 4,352,352,
+352,351,351,351,351,351,351,351,351,115,115,115,115,115,115,115,
/* block 44 */
-335,335,335,335,335,335,335,335,335,335,335,335,335,115,335,335,
-335,335,336,336,336,115,115,115,115,115,115,115,115,115,115,115,
-337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,337,
-337,337,338,338,338, 4, 4,115,115,115,115,115,115,115,115,115,
-339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,339,
-339,339,340,340,115,115,115,115,115,115,115,115,115,115,115,115,
-341,341,341,341,341,341,341,341,341,341,341,341,341,115,341,341,
-341,115,342,342,115,115,115,115,115,115,115,115,115,115,115,115,
+353,353,353,353,353,353,353,353,353,353,353,353,353,115,353,353,
+353,353,354,354,354,115,115,115,115,115,115,115,115,115,115,115,
+355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,355,
+355,355,356,356,356, 4, 4,115,115,115,115,115,115,115,115,115,
+357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,357,
+357,357,358,358,115,115,115,115,115,115,115,115,115,115,115,115,
+359,359,359,359,359,359,359,359,359,359,359,359,359,115,359,359,
+359,115,360,360,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 45 */
-343,343,343,343,343,343,343,343,343,343,343,343,343,343,343,343,
-343,343,343,343,343,343,343,343,343,343,343,343,343,343,343,343,
-343,343,343,343,343,343,343,343,343,343,343,343,343,343,343,343,
-343,343,343,343,344,344,345,344,344,344,344,344,344,344,345,345,
-345,345,345,345,345,345,344,345,345,344,344,344,344,344,344,344,
-344,344,344,344,346,346,346,347,346,346,346,348,343,344,115,115,
-349,349,349,349,349,349,349,349,349,349,115,115,115,115,115,115,
-350,350,350,350,350,350,350,350,350,350,115,115,115,115,115,115,
+361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,
+361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,
+361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,361,
+361,361,361,361,362,362,363,362,362,362,362,362,362,362,363,363,
+363,363,363,363,363,363,362,363,363,362,362,362,362,362,362,362,
+362,362,362,362,364,364,364,365,364,364,364,366,361,362,115,115,
+367,367,367,367,367,367,367,367,367,367,115,115,115,115,115,115,
+368,368,368,368,368,368,368,368,368,368,115,115,115,115,115,115,
/* block 46 */
-351,351, 4, 4,351, 4,352,351,351,351,351,353,353,353,354,115,
-355,355,355,355,355,355,355,355,355,355,115,115,115,115,115,115,
-356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
-356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
-356,356,356,357,356,356,356,356,356,356,356,356,356,356,356,356,
-356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
-356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
-356,356,356,356,356,356,356,356,115,115,115,115,115,115,115,115,
+369,369, 4, 4,369, 4,370,369,369,369,369,371,371,371,372,115,
+373,373,373,373,373,373,373,373,373,373,115,115,115,115,115,115,
+374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,
+374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,
+374,374,374,375,374,374,374,374,374,374,374,374,374,374,374,374,
+374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,
+374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,
+374,374,374,374,374,374,374,374,115,115,115,115,115,115,115,115,
/* block 47 */
-356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
-356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,356,
-356,356,356,356,356,356,356,356,356,353,356,115,115,115,115,115,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,327,
-327,327,327,327,327,327,115,115,115,115,115,115,115,115,115,115,
+374,374,374,374,374,371,371,374,374,374,374,374,374,374,374,374,
+374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,
+374,374,374,374,374,374,374,374,374,371,374,115,115,115,115,115,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,345,
+345,345,345,345,345,345,115,115,115,115,115,115,115,115,115,115,
/* block 48 */
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,
-358,358,358,358,358,358,358,358,358,358,358,358,358,358,358,115,
-359,359,359,360,360,360,360,359,359,360,360,360,115,115,115,115,
-360,360,359,360,360,360,360,360,360,359,359,359,115,115,115,115,
-361,115,115,115,362,362,363,363,363,363,363,363,363,363,363,363,
-364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,364,
-364,364,364,364,364,364,364,364,364,364,364,364,364,364,115,115,
-364,364,364,364,364,115,115,115,115,115,115,115,115,115,115,115,
+376,376,376,376,376,376,376,376,376,376,376,376,376,376,376,376,
+376,376,376,376,376,376,376,376,376,376,376,376,376,376,376,115,
+377,377,377,378,378,378,378,377,377,378,378,378,115,115,115,115,
+378,378,377,378,378,378,378,378,378,377,377,377,115,115,115,115,
+379,115,115,115,380,380,381,381,381,381,381,381,381,381,381,381,
+382,382,382,382,382,382,382,382,382,382,382,382,382,382,382,382,
+382,382,382,382,382,382,382,382,382,382,382,382,382,382,115,115,
+382,382,382,382,382,115,115,115,115,115,115,115,115,115,115,115,
/* block 49 */
-365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,
-365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,
-365,365,365,365,365,365,365,365,365,365,365,365,115,115,115,115,
-365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,365,
-365,365,365,365,365,365,365,365,365,365,115,115,115,115,115,115,
-366,366,366,366,366,366,366,366,366,366,367,115,115,115,368,368,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
-369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,369,
+383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,
+383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,
+383,383,383,383,383,383,383,383,383,383,383,383,115,115,115,115,
+383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,383,
+383,383,383,383,383,383,383,383,383,383,115,115,115,115,115,115,
+384,384,384,384,384,384,384,384,384,384,385,115,115,115,386,386,
+387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,
+387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,387,
/* block 50 */
-370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,370,
-370,370,370,370,370,370,370,371,371,372,372,371,115,115,373,373,
-374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,
-374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,
-374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,374,
-374,374,374,374,374,375,376,375,376,376,376,376,376,376,376,115,
-376,377,376,377,377,376,376,376,376,376,376,376,376,375,375,375,
-375,375,375,376,376,376,376,376,376,376,376,376,376,115,115,376,
+388,388,388,388,388,388,388,388,388,388,388,388,388,388,388,388,
+388,388,388,388,388,388,388,389,389,390,390,389,115,115,391,391,
+392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,
+392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,
+392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,
+392,392,392,392,392,393,394,393,394,394,394,394,394,394,394,115,
+394,395,394,395,395,394,394,394,394,394,394,394,394,393,393,393,
+393,393,393,394,394,394,394,394,394,394,394,394,394,115,115,394,
/* block 51 */
-378,378,378,378,378,378,378,378,378,378,115,115,115,115,115,115,
-378,378,378,378,378,378,378,378,378,378,115,115,115,115,115,115,
-379,379,379,379,379,379,379,380,379,379,379,379,379,379,115,115,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,381,115,
+396,396,396,396,396,396,396,396,396,396,115,115,115,115,115,115,
+396,396,396,396,396,396,396,396,396,396,115,115,115,115,115,115,
+397,397,397,397,397,397,397,398,397,397,397,397,397,397,115,115,
+110,110,110,110,110,110,110,110,110,110,110,110,110,110,399,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 52 */
-382,382,382,382,383,384,384,384,384,384,384,384,384,384,384,384,
-384,384,384,384,384,384,384,384,384,384,384,384,384,384,384,384,
-384,384,384,384,384,384,384,384,384,384,384,384,384,384,384,384,
-384,384,384,384,382,383,382,382,382,382,382,383,382,383,383,383,
-383,383,382,383,383,384,384,384,384,384,384,384,115,115,115,115,
-385,385,385,385,385,385,385,385,385,385,386,386,386,386,386,386,
-386,387,387,387,387,387,387,387,387,387,387,382,382,382,382,382,
-382,382,382,382,387,387,387,387,387,387,387,387,387,115,115,115,
+400,400,400,400,401,402,402,402,402,402,402,402,402,402,402,402,
+402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,
+402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,
+402,402,402,402,400,401,400,400,400,400,400,401,400,401,401,401,
+401,401,400,401,401,402,402,402,402,402,402,402,115,115,115,115,
+403,403,403,403,403,403,403,403,403,403,404,404,404,404,404,404,
+404,405,405,405,405,405,405,405,405,405,405,400,400,400,400,400,
+400,400,400,400,405,405,405,405,405,405,405,405,405,115,115,115,
/* block 53 */
-388,388,389,390,390,390,390,390,390,390,390,390,390,390,390,390,
-390,390,390,390,390,390,390,390,390,390,390,390,390,390,390,390,
-390,389,388,388,388,388,389,389,388,388,389,388,388,388,390,390,
-391,391,391,391,391,391,391,391,391,391,390,390,390,390,390,390,
-392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,
-392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,392,
-392,392,392,392,392,392,393,394,393,393,394,394,394,393,394,393,
-393,393,394,394,115,115,115,115,115,115,115,115,395,395,395,395,
+406,406,407,408,408,408,408,408,408,408,408,408,408,408,408,408,
+408,408,408,408,408,408,408,408,408,408,408,408,408,408,408,408,
+408,407,406,406,406,406,407,407,406,406,407,406,406,406,408,408,
+409,409,409,409,409,409,409,409,409,409,408,408,408,408,408,408,
+410,410,410,410,410,410,410,410,410,410,410,410,410,410,410,410,
+410,410,410,410,410,410,410,410,410,410,410,410,410,410,410,410,
+410,410,410,410,410,410,411,412,411,411,412,412,412,411,412,411,
+411,411,412,412,115,115,115,115,115,115,115,115,413,413,413,413,
/* block 54 */
-396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,
-396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,396,
-396,396,396,396,397,397,397,397,397,397,397,397,398,398,398,398,
-398,398,398,398,397,397,398,398,115,115,115,399,399,399,399,399,
-400,400,400,400,400,400,400,400,400,400,115,115,115,396,396,396,
-401,401,401,401,401,401,401,401,401,401,402,402,402,402,402,402,
-402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,402,
-402,402,402,402,402,402,402,402,403,403,403,403,403,403,404,404,
+414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,
+414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,414,
+414,414,414,414,415,415,415,415,415,415,415,415,416,416,416,416,
+416,416,416,416,415,415,416,416,115,115,115,417,417,417,417,417,
+418,418,418,418,418,418,418,418,418,418,115,115,115,414,414,414,
+419,419,419,419,419,419,419,419,419,419,420,420,420,420,420,420,
+420,420,420,420,420,420,420,420,420,420,420,420,420,420,420,420,
+420,420,420,420,420,420,420,420,421,421,421,421,421,421,422,422,
/* block 55 */
+423,424,425,426,427,428,429,430,431,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-405,405,405,405,405,405,405,405,115,115,115,115,115,115,115,115,
+432,432,432,432,432,432,432,432,115,115,115,115,115,115,115,115,
110,110,110, 4,110,110,110,110,110,110,110,110,110,110,110,110,
-110,406,110,110,110,110,110,110,110,407,407,407,407,110,407,407,
-407,407,406,406,110,407,407,115,110,110,115,115,115,115,115,115,
+110,433,110,110,110,110,110,110,110,434,434,434,434,110,434,434,
+434,434,433,433,110,434,434,433,110,110,115,115,115,115,115,115,
/* block 56 */
33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33,123,123,123,123,123,408,107,107,107,107,
+ 33, 33, 33, 33, 33, 33,123,123,123,123,123,435,107,107,107,107,
107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,
107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,
107,107,107,107,107,107,107,107,107,107,107,107,107,116,116,116,
116,116,107,107,107,107,116,116,116,116,116, 33, 33, 33, 33, 33,
- 33, 33, 33, 33, 33, 33, 33, 33,409,410, 33, 33, 33,411, 33, 33,
+ 33, 33, 33, 33, 33, 33, 33, 33,436,437, 33, 33, 33,438, 33, 33,
/* block 57 */
33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
@@ -1955,7 +2054,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-110,110,110,110,110,110,115,115,115,115,115,115,110,110,110,110,
+110,110,110,110,110,110,110,110,110,110,115,110,110,110,110,110,
/* block 58 */
30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
@@ -1964,12 +2063,12 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
-412,413, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
+439,440, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
/* block 59 */
30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 33, 33, 33, 33, 33,414, 33, 33,415, 33,
+ 30, 31, 30, 31, 30, 31, 33, 33, 33, 33, 33,441, 33, 33,442, 33,
30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
@@ -1978,57 +2077,57 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
/* block 60 */
-416,416,416,416,416,416,416,416,417,417,417,417,417,417,417,417,
-416,416,416,416,416,416,115,115,417,417,417,417,417,417,115,115,
-416,416,416,416,416,416,416,416,417,417,417,417,417,417,417,417,
-416,416,416,416,416,416,416,416,417,417,417,417,417,417,417,417,
-416,416,416,416,416,416,115,115,417,417,417,417,417,417,115,115,
-123,416,123,416,123,416,123,416,115,417,115,417,115,417,115,417,
-416,416,416,416,416,416,416,416,417,417,417,417,417,417,417,417,
-418,418,419,419,419,419,420,420,421,421,422,422,423,423,115,115,
+443,443,443,443,443,443,443,443,444,444,444,444,444,444,444,444,
+443,443,443,443,443,443,115,115,444,444,444,444,444,444,115,115,
+443,443,443,443,443,443,443,443,444,444,444,444,444,444,444,444,
+443,443,443,443,443,443,443,443,444,444,444,444,444,444,444,444,
+443,443,443,443,443,443,115,115,444,444,444,444,444,444,115,115,
+123,443,123,443,123,443,123,443,115,444,115,444,115,444,115,444,
+443,443,443,443,443,443,443,443,444,444,444,444,444,444,444,444,
+445,445,446,446,446,446,447,447,448,448,449,449,450,450,115,115,
/* block 61 */
-416,416,416,416,416,416,416,416,424,424,424,424,424,424,424,424,
-416,416,416,416,416,416,416,416,424,424,424,424,424,424,424,424,
-416,416,416,416,416,416,416,416,424,424,424,424,424,424,424,424,
-416,416,123,425,123,115,123,123,417,417,426,426,427,114,428,114,
-114,114,123,425,123,115,123,123,429,429,429,429,427,114,114,114,
-416,416,123,123,115,115,123,123,417,417,430,430,115,114,114,114,
-416,416,123,123,123,164,123,123,417,417,431,431,169,114,114,114,
-115,115,123,425,123,115,123,123,432,432,433,433,427,114,114,115,
+443,443,443,443,443,443,443,443,451,451,451,451,451,451,451,451,
+443,443,443,443,443,443,443,443,451,451,451,451,451,451,451,451,
+443,443,443,443,443,443,443,443,451,451,451,451,451,451,451,451,
+443,443,123,452,123,115,123,123,444,444,453,453,454,114,455,114,
+114,114,123,452,123,115,123,123,456,456,456,456,454,114,114,114,
+443,443,123,123,115,115,123,123,444,444,457,457,115,114,114,114,
+443,443,123,123,123,164,123,123,444,444,458,458,169,114,114,114,
+115,115,123,452,123,115,123,123,459,459,460,460,454,114,114,115,
/* block 62 */
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 22,434,434, 22, 22,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 22,461,462, 22, 22,
9, 9, 9, 9, 9, 9, 4, 4, 21, 25, 6, 21, 21, 25, 6, 21,
- 4, 4, 4, 4, 4, 4, 4, 4,435,436, 22, 22, 22, 22, 22, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4,463,464, 22, 22, 22, 22, 22, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 21, 25, 4, 4, 4, 4, 15,
15, 4, 4, 4, 8, 6, 7, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 8, 4, 15, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3,
- 22, 22, 22, 22, 22,437, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22,465, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
23,107,115,115, 23, 23, 23, 23, 23, 23, 8, 8, 8, 6, 7,107,
/* block 63 */
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 8, 8, 8, 6, 7,115,
107,107,107,107,107,107,107,107,107,107,107,107,107,115,115,115,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,115,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-110,110,110,110,110,110,110,110,110,110,110,110,110,381,381,381,
-381,110,381,381,381,110,110,110,110,110,110,110,110,110,110,110,
+110,110,110,110,110,110,110,110,110,110,110,110,110,399,399,399,
+399,110,399,399,399,110,110,110,110,110,110,110,110,110,110,110,
110,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 64 */
- 19, 19,438, 19, 19, 19, 19,438, 19, 19,439,438,438,438,439,439,
-438,438,438,439, 19,438, 19, 19, 8,438,438,438,438,438, 19, 19,
- 19, 19, 19, 19,438, 19,440, 19,438, 19,441,442,438,438, 19,439,
-438,438,443,438,439,407,407,407,407,439, 19, 19,439,439,438,438,
- 8, 8, 8, 8, 8,438,439,439,439,439, 19, 8, 19, 19,444, 19,
+ 19, 19,466, 19, 19, 19, 19,466, 19, 19,467,466,466,466,467,467,
+466,466,466,467, 19,466, 19, 19, 8,466,466,466,466,466, 19, 19,
+ 19, 19, 19, 19,466, 19,468, 19,466, 19,469,470,466,466, 19,467,
+466,466,471,466,467,434,434,434,434,467, 19, 19,467,467,466,466,
+ 8, 8, 8, 8, 8,466,467,467,467,467, 19, 8, 19, 19,472, 19,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
-445,445,445,445,445,445,445,445,445,445,445,445,445,445,445,445,
-446,446,446,446,446,446,446,446,446,446,446,446,446,446,446,446,
+473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+474,474,474,474,474,474,474,474,474,474,474,474,474,474,474,474,
/* block 65 */
-447,447,447, 30, 31,447,447,447,447, 23, 19, 19,115,115,115,115,
+475,475,475, 30, 31,475,475,475,475, 23, 19, 19,115,115,115,115,
8, 8, 8, 8, 8, 19, 19, 19, 19, 19, 8, 8, 19, 19, 19, 19,
8, 19, 19, 8, 19, 19, 8, 19, 19, 19, 19, 19, 19, 19, 8, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
@@ -2065,7 +2164,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 8, 8, 8, 8,
8, 8, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
/* block 69 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
@@ -2081,10 +2180,10 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19,448,448,448,448,448,448,448,448,448,448,
-448,448,448,448,448,448,448,448,448,448,448,448,448,448,448,448,
-449,449,449,449,449,449,449,449,449,449,449,449,449,449,449,449,
-449,449,449,449,449,449,449,449,449,449, 23, 23, 23, 23, 23, 23,
+ 19, 19, 19, 19, 19, 19,476,476,476,476,476,476,476,476,476,476,
+476,476,476,476,476,476,476,476,476,476,476,476,476,476,476,476,
+477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,
+477,477,477,477,477,477,477,477,477,477, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
/* block 71 */
@@ -2109,25 +2208,35 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
/* block 73 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,478, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+479, 19,479, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 8,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
/* block 74 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19,479,479, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 6, 7, 6, 7, 6, 7, 6, 7,
- 6, 7, 6, 7, 6, 7, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19,478, 19, 19, 19, 19, 19, 19,
/* block 75 */
+ 19, 19, 19, 19, 19, 19, 19, 19,479, 19,478,478,478,478, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19,479, 19, 19, 19, 6, 7, 6, 7, 6, 7, 6, 7,
+ 6, 7, 6, 7, 6, 7, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+
+/* block 76 */
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
@@ -2137,17 +2246,17 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
8, 8, 8, 8, 8, 8, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-/* block 76 */
-450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,
-450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,
-450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,
-450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,
-450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,
-450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,
-450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,
-450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,450,
-
/* block 77 */
+480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
+480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
+480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
+480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
+480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
+480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
+480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
+480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,
+
+/* block 78 */
8, 8, 8, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6,
7, 6, 7, 6, 7, 6, 7, 6, 7, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
@@ -2157,7 +2266,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 7, 8, 8,
-/* block 78 */
+/* block 79 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
@@ -2167,257 +2276,257 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19,115,115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-/* block 79 */
+/* block 80 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19,115,115, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19,115, 19, 19, 19, 19, 19, 19,
- 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+ 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115, 19, 19, 19, 19,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 80 */
-451,451,451,451,451,451,451,451,451,451,451,451,451,451,451,451,
-451,451,451,451,451,451,451,451,451,451,451,451,451,451,451,451,
-451,451,451,451,451,451,451,451,451,451,451,451,451,451,451,115,
-452,452,452,452,452,452,452,452,452,452,452,452,452,452,452,452,
-452,452,452,452,452,452,452,452,452,452,452,452,452,452,452,452,
-452,452,452,452,452,452,452,452,452,452,452,452,452,452,452,115,
- 30, 31,453,454,455,456,457, 30, 31, 30, 31, 30, 31,458,459,460,
-461, 33, 30, 31, 33, 30, 31, 33, 33, 33, 33, 33,107,107,462,462,
-
/* block 81 */
+481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,
+481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,
+481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,115,
+482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,
+482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,
+482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,115,
+ 30, 31,483,484,485,486,487, 30, 31, 30, 31, 30, 31,488,489,490,
+491, 33, 30, 31, 33, 30, 31, 33, 33, 33, 33, 33,107,107,492,492,
+
+/* block 82 */
160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161,
160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161,
160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161,
160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161,
160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161,
160,161,160,161,160,161,160,161,160,161,160,161,160,161,160,161,
-160,161,160,161,463,464,464,464,464,464,464,160,161,160,161,465,
-465,465,160,161,115,115,115,115,115,466,466,466,466,467,466,466,
-
-/* block 82 */
-468,468,468,468,468,468,468,468,468,468,468,468,468,468,468,468,
-468,468,468,468,468,468,468,468,468,468,468,468,468,468,468,468,
-468,468,468,468,468,468,115,468,115,115,115,115,115,468,115,115,
-469,469,469,469,469,469,469,469,469,469,469,469,469,469,469,469,
-469,469,469,469,469,469,469,469,469,469,469,469,469,469,469,469,
-469,469,469,469,469,469,469,469,469,469,469,469,469,469,469,469,
-469,469,469,469,469,469,469,469,115,115,115,115,115,115,115,470,
-471,115,115,115,115,115,115,115,115,115,115,115,115,115,115,472,
+160,161,160,161,493,494,494,494,494,494,494,160,161,160,161,495,
+495,495,160,161,115,115,115,115,115,496,496,496,496,497,496,496,
/* block 83 */
-318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,318,
-318,318,318,318,318,318,318,115,115,115,115,115,115,115,115,115,
-318,318,318,318,318,318,318,115,318,318,318,318,318,318,318,115,
-318,318,318,318,318,318,318,115,318,318,318,318,318,318,318,115,
-318,318,318,318,318,318,318,115,318,318,318,318,318,318,318,115,
-318,318,318,318,318,318,318,115,318,318,318,318,318,318,318,115,
-178,178,178,178,178,178,178,178,178,178,178,178,178,178,178,178,
-178,178,178,178,178,178,178,178,178,178,178,178,178,178,178,178,
+498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
+498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
+498,498,498,498,498,498,115,498,115,115,115,115,115,498,115,115,
+499,499,499,499,499,499,499,499,499,499,499,499,499,499,499,499,
+499,499,499,499,499,499,499,499,499,499,499,499,499,499,499,499,
+499,499,499,499,499,499,499,499,499,499,499,499,499,499,499,499,
+499,499,499,499,499,499,499,499,115,115,115,115,115,115,115,500,
+501,115,115,115,115,115,115,115,115,115,115,115,115,115,115,502,
/* block 84 */
+336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,336,
+336,336,336,336,336,336,336,115,115,115,115,115,115,115,115,115,
+336,336,336,336,336,336,336,115,336,336,336,336,336,336,336,115,
+336,336,336,336,336,336,336,115,336,336,336,336,336,336,336,115,
+336,336,336,336,336,336,336,115,336,336,336,336,336,336,336,115,
+336,336,336,336,336,336,336,115,336,336,336,336,336,336,336,115,
+192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,
+192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,
+
+/* block 85 */
4, 4, 21, 25, 21, 25, 4, 4, 4, 21, 25, 4, 21, 25, 4, 4,
4, 4, 4, 4, 4, 4, 4, 9, 4, 4, 9, 4, 21, 25, 4, 4,
21, 25, 6, 7, 6, 7, 6, 7, 6, 7, 4, 4, 4, 4, 4,108,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, 9, 4, 4, 4, 4,
- 9, 4, 6,115,115,115,115,115,115,115,115,115,115,115,115,115,
+ 9, 4, 6, 4, 4, 4, 4, 4, 4, 4,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 85 */
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,115,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,115,115,115,115,115,115,115,115,115,115,115,115,
-
/* block 86 */
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,115,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 87 */
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,473,
-473,473,473,473,473,473,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
/* block 88 */
- 3, 4, 4, 4, 19,474,407,475, 6, 7, 6, 7, 6, 7, 6, 7,
- 6, 7, 19, 19, 6, 7, 6, 7, 6, 7, 6, 7, 9, 6, 7, 7,
- 19,475,475,475,475,475,475,475,475,475,110,110,110,110,476,476,
- 9,108,108,108,108,108, 19, 19,475,475,475,474,407, 4, 19, 19,
-115,477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,
-477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,
-477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,
-477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,503,
+503,503,503,503,503,503,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
/* block 89 */
-477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,477,
-477,477,477,477,477,477,477,115,115,110,110, 14, 14,478,478,477,
- 9,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,
-479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,
-479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,
-479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,
-479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,
-479,479,479,479,479,479,479,479,479,479,479, 4,108,480,480,479,
+ 3, 4, 4, 4, 19,504,434,505, 6, 7, 6, 7, 6, 7, 6, 7,
+ 6, 7, 19, 19, 6, 7, 6, 7, 6, 7, 6, 7, 9, 6, 7, 7,
+ 19,505,505,505,505,505,505,505,505,505,110,110,110,110,506,506,
+ 9,108,108,108,108,108, 19, 19,505,505,505,504,434, 4, 19, 19,
+115,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
/* block 90 */
-115,115,115,115,115,481,481,481,481,481,481,481,481,481,481,481,
-481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,
-481,481,481,481,481,481,481,481,481,481,481,481,481,481,115,115,
-115,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,
-482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,
-482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,
-482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,
-482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,115,115,110,110, 14, 14,508,508,507,
+ 9,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
+509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
+509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
+509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
+509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
+509,509,509,509,509,509,509,509,509,509,509, 4,108,510,510,509,
/* block 91 */
-482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,115,
+115,115,115,115,115,511,511,511,511,511,511,511,511,511,511,511,
+511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,
+511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,115,
+115,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,
+512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,
+512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,
+512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,
+512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,
+
+/* block 92 */
+512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,115,
19, 19, 23, 23, 23, 23, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,481,
-481,481,481,481,481,481,481,481,481,481,481,115,115,115,115,115,
+511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,511,
+511,511,511,511,511,511,511,511,511,511,511,115,115,115,115,115,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,
-479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,
+509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
-/* block 92 */
-483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,
-483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,115,
+/* block 93 */
+513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,
+513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,115,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 23, 23, 23, 23, 23, 23, 23, 23,
19, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
-483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,483,
-483,483,483,483,483,483,483,483,483,483,483,483,483,483,483, 19,
+513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,513,
+513,513,513,513,513,513,513,513,513,513,513,513,513,513,513, 19,
-/* block 93 */
+/* block 94 */
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,
-484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,
-484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,115,
+514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
+514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
+514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,115,
-/* block 94 */
-484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,
-484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,
-484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,
-484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,
-484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,484,
-484,484,484,484,484,484,484,484, 19, 19, 19, 19, 19, 19, 19, 19,
+/* block 95 */
+514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
+514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
+514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
+514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
+514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,
+514,514,514,514,514,514,514,514, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-/* block 95 */
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-
/* block 96 */
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,115,115,115,115,115,115,115,115,115,115,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+
+/* block 97 */
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,115,115,115,115,115,115,115,115,115,115,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-/* block 97 */
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-
/* block 98 */
-486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
-486,486,486,486,486,487,486,486,486,486,486,486,486,486,486,486,
-486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
-486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
-486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
-486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
-486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
-486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 99 */
-486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
-486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
-486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
-486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
-486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
-486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
-486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
-486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,486,
+516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
+516,516,516,516,516,517,516,516,516,516,516,516,516,516,516,516,
+516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
+516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
+516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
+516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
+516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
+516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
/* block 100 */
-486,486,486,486,486,486,486,486,486,486,486,486,486,115,115,115,
-488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,
-488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,
-488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,488,
-488,488,488,488,488,488,488,115,115,115,115,115,115,115,115,115,
-489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,
-489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,489,
-489,489,489,489,489,489,489,489,490,490,490,490,490,490,491,491,
+516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
+516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
+516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
+516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
+516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
+516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
+516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
+516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
/* block 101 */
-492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,
-492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,
-492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,
-492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,
-492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,
-492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,
-492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,
-492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,
+516,516,516,516,516,516,516,516,516,516,516,516,516,115,115,115,
+518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,
+518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,
+518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,518,
+518,518,518,518,518,518,518,115,115,115,115,115,115,115,115,115,
+519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
+519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
+519,519,519,519,519,519,519,519,520,520,520,520,520,520,521,521,
/* block 102 */
-492,492,492,492,492,492,492,492,492,492,492,492,493,494,494,494,
-492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,492,
-495,495,495,495,495,495,495,495,495,495,492,492,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176,
-175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176,
-175,176,175,176,175,176,175,176,175,176,175,176,175,176,496,178,
-179,179,179,497,178,178,178,178,178,178,178,178,178,178,497,409,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
/* block 103 */
-175,176,175,176,175,176,175,176,175,176,175,176,175,176,175,176,
-175,176,175,176,175,176,175,176,175,176,175,176,409,409,178,178,
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,499,499,499,499,499,499,499,499,499,499,
-500,500,501,501,501,501,501,501,115,115,115,115,115,115,115,115,
+522,522,522,522,522,522,522,522,522,522,522,522,523,524,524,524,
+522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,522,
+525,525,525,525,525,525,525,525,525,525,522,522,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+187,188,187,188,187,188,187,188,187,188,526,527,187,188,187,188,
+187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
+187,188,187,188,187,188,187,188,187,188,187,188,187,188,528,192,
+193,193,193,529,192,192,192,192,192,192,192,192,192,192,529,436,
/* block 104 */
+187,188,187,188,187,188,187,188,187,188,187,188,187,188,187,188,
+187,188,187,188,187,188,187,188,187,188,187,188,436,436,192,192,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,531,531,531,531,531,531,531,531,531,531,
+532,532,533,533,533,533,533,533,115,115,115,115,115,115,115,115,
+
+/* block 105 */
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14,108,108,108,108,108,108,108,108,108,
14, 14, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
@@ -2425,349 +2534,349 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
-107, 33, 33, 33, 33, 33, 33, 33, 33, 30, 31, 30, 31,502, 30, 31,
+107, 33, 33, 33, 33, 33, 33, 33, 33, 30, 31, 30, 31,534, 30, 31,
-/* block 105 */
- 30, 31, 30, 31, 30, 31, 30, 31,108, 14, 14, 30, 31,503, 33, 20,
+/* block 106 */
+ 30, 31, 30, 31, 30, 31, 30, 31,108, 14, 14, 30, 31,535, 33, 20,
30, 31, 30, 31, 33, 33, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,
- 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,504,505,506,507,115,115,
-508,509,510,511, 30, 31, 30, 31,115,115,115,115,115,115,115,115,
+ 30, 31, 30, 31, 30, 31, 30, 31, 30, 31,536,537,538,539,536,115,
+540,541,542,543, 30, 31, 30, 31,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115, 20,107,107, 33, 20, 20, 20, 20, 20,
-/* block 106 */
-512,512,513,512,512,512,513,512,512,512,512,513,512,512,512,512,
-512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,
-512,512,512,514,514,513,513,514,515,515,515,515,115,115,115,115,
- 23, 23, 23, 23, 23, 23, 19, 19, 5, 19,115,115,115,115,115,115,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,516,
-516,516,516,516,517,517,517,517,115,115,115,115,115,115,115,115,
-
/* block 107 */
-518,518,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
-519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
-519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,519,
-519,519,519,519,518,518,518,518,518,518,518,518,518,518,518,518,
-518,518,518,518,520,115,115,115,115,115,115,115,115,115,521,521,
-522,522,522,522,522,522,522,522,522,522,115,115,115,115,115,115,
-222,222,222,222,222,222,222,222,222,222,222,222,222,222,222,222,
-222,222,224,224,224,224,224,224,226,226,226,224,226,224,115,115,
+544,544,545,544,544,544,545,544,544,544,544,545,544,544,544,544,
+544,544,544,544,544,544,544,544,544,544,544,544,544,544,544,544,
+544,544,544,546,546,545,545,546,547,547,547,547,115,115,115,115,
+ 23, 23, 23, 23, 23, 23, 19, 19, 5, 19,115,115,115,115,115,115,
+548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,
+548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,
+548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,548,
+548,548,548,548,549,549,549,549,115,115,115,115,115,115,115,115,
/* block 108 */
-523,523,523,523,523,523,523,523,523,523,524,524,524,524,524,524,
-524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,524,
-524,524,524,524,524,524,525,525,525,525,525,525,525,525, 4,526,
-527,527,527,527,527,527,527,527,527,527,527,527,527,527,527,527,
-527,527,527,527,527,527,527,528,528,528,528,528,528,528,528,528,
-528,528,529,529,115,115,115,115,115,115,115,115,115,115,115,530,
-315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,315,
-315,315,315,315,315,315,315,315,315,315,315,315,315,115,115,115,
+550,550,551,551,551,551,551,551,551,551,551,551,551,551,551,551,
+551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,
+551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,551,
+551,551,551,551,550,550,550,550,550,550,550,550,550,550,550,550,
+550,550,550,550,552,552,115,115,115,115,115,115,115,115,553,553,
+554,554,554,554,554,554,554,554,554,554,115,115,115,115,115,115,
+238,238,238,238,238,238,238,238,238,238,238,238,238,238,238,238,
+238,238,240,240,240,240,240,240,242,242,242,240,242,240,115,115,
/* block 109 */
-531,531,531,532,533,533,533,533,533,533,533,533,533,533,533,533,
-533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,
-533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,533,
-533,533,533,531,532,532,531,531,531,531,532,532,531,532,532,532,
-532,534,534,534,534,534,534,534,534,534,534,534,534,534,115,108,
-535,535,535,535,535,535,535,535,535,535,115,115,115,115,534,534,
-305,305,305,305,305,307,536,305,305,305,305,305,305,305,305,305,
-309,309,309,309,309,309,309,309,309,309,305,305,305,305,305,115,
+555,555,555,555,555,555,555,555,555,555,556,556,556,556,556,556,
+556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
+556,556,556,556,556,556,557,557,557,557,557,557,557,557, 4,558,
+559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,559,
+559,559,559,559,559,559,559,560,560,560,560,560,560,560,560,560,
+560,560,561,561,115,115,115,115,115,115,115,115,115,115,115,562,
+333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,333,
+333,333,333,333,333,333,333,333,333,333,333,333,333,115,115,115,
/* block 110 */
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,537,
-537,537,537,537,537,537,537,537,537,538,538,538,538,538,538,539,
-539,538,538,539,539,538,538,115,115,115,115,115,115,115,115,115,
-537,537,537,538,537,537,537,537,537,537,537,537,538,539,115,115,
-540,540,540,540,540,540,540,540,540,540,115,115,541,541,541,541,
-305,305,305,305,305,305,305,305,305,305,305,305,305,305,305,305,
-536,305,305,305,305,305,305,311,311,311,305,306,307,306,305,305,
+563,563,563,564,565,565,565,565,565,565,565,565,565,565,565,565,
+565,565,565,565,565,565,565,565,565,565,565,565,565,565,565,565,
+565,565,565,565,565,565,565,565,565,565,565,565,565,565,565,565,
+565,565,565,563,564,564,563,563,563,563,564,564,563,564,564,564,
+564,566,566,566,566,566,566,566,566,566,566,566,566,566,115,108,
+567,567,567,567,567,567,567,567,567,567,115,115,115,115,566,566,
+323,323,323,323,323,325,568,323,323,323,323,323,323,323,323,323,
+327,327,327,327,327,327,327,327,327,327,323,323,323,323,323,115,
/* block 111 */
-542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,
-542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,
-542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,542,
-543,542,543,543,543,542,542,543,543,542,542,542,542,542,543,543,
-542,543,542,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,542,542,544,545,545,
-546,546,546,546,546,546,546,546,546,546,546,547,548,548,547,547,
-549,549,546,550,550,547,548,115,115,115,115,115,115,115,115,115,
+569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,
+569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,
+569,569,569,569,569,569,569,569,569,570,570,570,570,570,570,571,
+571,570,570,571,571,570,570,115,115,115,115,115,115,115,115,115,
+569,569,569,570,569,569,569,569,569,569,569,569,570,571,115,115,
+572,572,572,572,572,572,572,572,572,572,115,115,573,573,573,573,
+323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,323,
+568,323,323,323,323,323,323,329,329,329,323,324,325,324,323,323,
/* block 112 */
-115,318,318,318,318,318,318,115,115,318,318,318,318,318,318,115,
-115,318,318,318,318,318,318,115,115,115,115,115,115,115,115,115,
-318,318,318,318,318,318,318,115,318,318,318,318,318,318,318,115,
+574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,
+574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,
+574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,574,
+575,574,575,575,575,574,574,575,575,574,574,574,574,574,575,575,
+574,575,574,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,574,574,576,577,577,
+578,578,578,578,578,578,578,578,578,578,578,579,580,580,579,579,
+581,581,578,582,582,579,580,115,115,115,115,115,115,115,115,115,
+
+/* block 113 */
+115,336,336,336,336,336,336,115,115,336,336,336,336,336,336,115,
+115,336,336,336,336,336,336,115,115,115,115,115,115,115,115,115,
+336,336,336,336,336,336,336,115,336,336,336,336,336,336,336,115,
33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
- 33, 33, 33,551, 33, 33, 33, 33, 33, 33, 33, 14,107,107,107,107,
+ 33, 33, 33,583, 33, 33, 33, 33, 33, 33, 33, 14,107,107,107,107,
33, 33, 33, 33, 33,123,115,115,115,115,115,115,115,115,115,115,
-552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,
-
-/* block 113 */
-552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,
-552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,
-552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,
-552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,552,
-546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,
-546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,546,
-546,546,546,547,547,548,547,547,548,547,547,549,547,548,115,115,
-553,553,553,553,553,553,553,553,553,553,115,115,115,115,115,115,
+584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
/* block 114 */
-554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
+584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
+584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
+584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
+584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
+578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
+578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
+578,578,578,579,579,580,579,579,580,579,579,581,579,580,115,115,
+585,585,585,585,585,585,585,585,585,585,115,115,115,115,115,115,
/* block 115 */
-555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555,
+586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
/* block 116 */
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
+587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
/* block 117 */
-555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
/* block 118 */
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
+587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
/* block 119 */
-555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
/* block 120 */
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-554,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,554,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,554,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
+587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
/* block 121 */
-555,555,555,555,555,555,555,555,554,555,555,555,555,555,555,555,
-555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,555,
-555,555,555,555,115,115,115,115,115,115,115,115,115,115,115,115,
-316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,316,
-316,316,316,316,316,316,316,115,115,115,115,317,317,317,317,317,
-317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,
-317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,317,
-317,317,317,317,317,317,317,317,317,317,317,317,115,115,115,115,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+586,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,586,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,586,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
/* block 122 */
-556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
-556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
-556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
-556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
-556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
-556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
-556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
-556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,556,
+587,587,587,587,587,587,587,587,586,587,587,587,587,587,587,587,
+587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
+587,587,587,587,115,115,115,115,115,115,115,115,115,115,115,115,
+334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,334,
+334,334,334,334,334,334,334,115,115,115,115,335,335,335,335,335,
+335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
+335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,335,
+335,335,335,335,335,335,335,335,335,335,335,335,115,115,115,115,
/* block 123 */
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
+588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
+588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
+588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
+588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
+588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
+588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
+588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
+588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
/* block 124 */
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,115,115,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
/* block 125 */
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,115,115,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
/* block 126 */
- 33, 33, 33, 33, 33, 33, 33,115,115,115,115,115,115,115,115,115,
-115,115,115,186,186,186,186,186,115,115,115,115,115,193,190,193,
-193,193,193,193,193,193,193,193,193,558,193,193,193,193,193,193,
-193,193,193,193,193,193,193,115,193,193,193,193,193,115,193,115,
-193,193,115,193,193,115,193,193,193,193,193,193,193,193,193,193,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 127 */
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,559,559,559,559,559,559,559,559,559,559,559,559,559,559,
-559,559,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
+ 33, 33, 33, 33, 33, 33, 33,115,115,115,115,115,115,115,115,115,
+115,115,115,200,200,200,200,200,115,115,115,115,115,207,204,207,
+207,207,207,207,207,207,207,207,207,590,207,207,207,207,207,207,
+207,207,207,207,207,207,207,115,207,207,207,207,207,115,207,115,
+207,207,115,207,207,115,207,207,207,207,207,207,207,207,207,207,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
/* block 128 */
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,591,591,591,591,591,591,591,591,591,591,591,591,591,591,
+591,591,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
/* block 129 */
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200, 7, 6,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
/* block 130 */
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-115,115,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216, 7, 6,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-200,200,200,200,200,200,200,200,200,200,200,200,197,198,115,115,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
/* block 131 */
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+115,115,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+216,216,216,216,216,216,216,216,216,216,216,216,212,213,115,115,
+
+/* block 132 */
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
4, 4, 4, 4, 4, 4, 4, 6, 7, 4,115,115,115,115,115,115,
-110,110,110,110,110,110,110,110,110,110,110,110,110,110,178,178,
+110,110,110,110,110,110,110,110,110,110,110,110,110,110,192,192,
4, 9, 9, 15, 15, 6, 7, 6, 7, 6, 7, 6, 7, 6, 7, 6,
7, 6, 7, 6, 7, 4, 4, 6, 7, 4, 4, 4, 4, 15, 15, 15,
4, 4, 4,115, 4, 4, 4, 4, 9, 6, 7, 6, 7, 6, 7, 4,
4, 4, 8, 9, 8, 8, 8,115, 4, 5, 4, 4,115,115,115,115,
-200,200,200,200,200,115,200,200,200,200,200,200,200,200,200,200,
-
-/* block 132 */
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,115,115, 22,
+216,216,216,216,216,115,216,216,216,216,216,216,216,216,216,216,
/* block 133 */
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,115,115, 22,
+
+/* block 134 */
115, 4, 4, 4, 5, 4, 4, 4, 6, 7, 4, 8, 4, 9, 4, 4,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 4, 4, 8, 8, 8, 4,
4, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 6, 4, 7, 14, 15,
14, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 6, 8, 7, 8, 6,
- 7, 4, 6, 7, 4, 4,479,479,479,479,479,479,479,479,479,479,
-108,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,
+ 7, 4, 6, 7, 4, 4,509,509,509,509,509,509,509,509,509,509,
+108,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
-/* block 134 */
-479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,479,
-479,479,479,479,479,479,479,479,479,479,479,479,479,479,560,560,
-482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,
-482,482,482,482,482,482,482,482,482,482,482,482,482,482,482,115,
-115,115,482,482,482,482,482,482,115,115,482,482,482,482,482,482,
-115,115,482,482,482,482,482,482,115,115,482,482,482,115,115,115,
+/* block 135 */
+509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,509,
+509,509,509,509,509,509,509,509,509,509,509,509,509,509,592,592,
+512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,
+512,512,512,512,512,512,512,512,512,512,512,512,512,512,512,115,
+115,115,512,512,512,512,512,512,115,115,512,512,512,512,512,512,
+115,115,512,512,512,512,512,512,115,115,512,512,512,115,115,115,
5, 5, 8, 14, 19, 5, 5,115, 19, 8, 8, 8, 8, 19, 19,115,
-437,437,437,437,437,437,437,437,437, 22, 22, 22, 19, 19,115,115,
+465,465,465,465,465,465,465,465,465, 22, 22, 22, 19, 19,115,115,
-/* block 135 */
-561,561,561,561,561,561,561,561,561,561,561,561,115,561,561,561,
-561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,
-561,561,561,561,561,561,561,115,561,561,561,561,561,561,561,561,
-561,561,561,561,561,561,561,561,561,561,561,115,561,561,115,561,
-561,561,561,561,561,561,561,561,561,561,561,561,561,561,115,115,
-561,561,561,561,561,561,561,561,561,561,561,561,561,561,115,115,
+/* block 136 */
+593,593,593,593,593,593,593,593,593,593,593,593,115,593,593,593,
+593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
+593,593,593,593,593,593,593,115,593,593,593,593,593,593,593,593,
+593,593,593,593,593,593,593,593,593,593,593,115,593,593,115,593,
+593,593,593,593,593,593,593,593,593,593,593,593,593,593,115,115,
+593,593,593,593,593,593,593,593,593,593,593,593,593,593,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 136 */
-561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,
-561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,
-561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,
-561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,
-561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,
-561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,
-561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,561,
-561,561,561,561,561,561,561,561,561,561,561,115,115,115,115,115,
-
/* block 137 */
+593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
+593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
+593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
+593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
+593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
+593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
+593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,593,
+593,593,593,593,593,593,593,593,593,593,593,115,115,115,115,115,
+
+/* block 138 */
4, 4, 4,115,115,115,115, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23,115,115,115, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-562,562,562,562,562,562,562,562,562,562,562,562,562,562,562,562,
-562,562,562,562,562,562,562,562,562,562,562,562,562,562,562,562,
-562,562,562,562,562,562,562,562,562,562,562,562,562,562,562,562,
-562,562,562,562,562,563,563,563,563,564,564,564,564,564,564,564,
+594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,
+594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,
+594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,
+594,594,594,594,594,595,595,595,595,596,596,596,596,596,596,596,
-/* block 138 */
-564,564,564,564,564,564,564,564,564,564,563,563,564,115,115,115,
+/* block 139 */
+596,596,596,596,596,596,596,596,596,596,595,595,596,596,596,115,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
-564,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+596,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,110,115,115,
-/* block 139 */
+/* block 140 */
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
@@ -2777,479 +2886,509 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 140 */
-565,565,565,565,565,565,565,565,565,565,565,565,565,565,565,565,
-565,565,565,565,565,565,565,565,565,565,565,565,565,115,115,115,
-566,566,566,566,566,566,566,566,566,566,566,566,566,566,566,566,
-566,566,566,566,566,566,566,566,566,566,566,566,566,566,566,566,
-566,566,566,566,566,566,566,566,566,566,566,566,566,566,566,566,
-566,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+/* block 141 */
+597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,597,
+597,597,597,597,597,597,597,597,597,597,597,597,597,115,115,115,
+598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,
+598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,
+598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,
+598,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
110, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,115,115,115,115,
-/* block 141 */
-567,567,567,567,567,567,567,567,567,567,567,567,567,567,567,567,
-567,567,567,567,567,567,567,567,567,567,567,567,567,567,567,567,
-568,568,568,568,115,115,115,115,115,115,115,115,115,115,115,115,
-569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,569,
-569,570,569,569,569,569,569,569,569,569,570,115,115,115,115,115,
-571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,
-571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,571,
-571,571,571,571,571,571,572,572,572,572,572,115,115,115,115,115,
-
/* block 142 */
-573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,573,
-573,573,573,573,573,573,573,573,573,573,573,573,573,573,115,574,
-575,575,575,575,575,575,575,575,575,575,575,575,575,575,575,575,
-575,575,575,575,575,575,575,575,575,575,575,575,575,575,575,575,
-575,575,575,575,115,115,115,115,575,575,575,575,575,575,575,575,
-576,577,577,577,577,577,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+599,599,599,599,599,599,599,599,599,599,599,599,599,599,599,599,
+599,599,599,599,599,599,599,599,599,599,599,599,599,599,599,599,
+600,600,600,600,115,115,115,115,115,115,115,115,115,599,599,599,
+601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
+601,602,601,601,601,601,601,601,601,601,602,115,115,115,115,115,
+603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,
+603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,
+603,603,603,603,603,603,604,604,604,604,604,115,115,115,115,115,
/* block 143 */
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,578,
-578,578,578,578,578,578,578,578,579,579,579,579,579,579,579,579,
-579,579,579,579,579,579,579,579,579,579,579,579,579,579,579,579,
-579,579,579,579,579,579,579,579,579,579,579,579,579,579,579,579,
-580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,
-580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,
-580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,580,
-
-/* block 144 */
-581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,581,
-581,581,581,581,581,581,581,581,581,581,581,581,581,581,115,115,
-582,582,582,582,582,582,582,582,582,582,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,
+605,605,605,605,605,605,605,605,605,605,605,605,605,605,115,606,
+607,607,607,607,607,607,607,607,607,607,607,607,607,607,607,607,
+607,607,607,607,607,607,607,607,607,607,607,607,607,607,607,607,
+607,607,607,607,115,115,115,115,607,607,607,607,607,607,607,607,
+608,609,609,609,609,609,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+/* block 144 */
+610,610,610,610,610,610,610,610,610,610,610,610,610,610,610,610,
+610,610,610,610,610,610,610,610,610,610,610,610,610,610,610,610,
+610,610,610,610,610,610,610,610,611,611,611,611,611,611,611,611,
+611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,
+611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,611,
+612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,
+612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,
+612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,612,
+
/* block 145 */
-583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,
-583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,583,
-583,583,583,583,583,583,583,583,115,115,115,115,115,115,115,115,
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
-584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,584,
-584,584,584,584,115,115,115,115,115,115,115,115,115,115,115,585,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+613,613,613,613,613,613,613,613,613,613,613,613,613,613,613,613,
+613,613,613,613,613,613,613,613,613,613,613,613,613,613,115,115,
+614,614,614,614,614,614,614,614,614,614,115,115,115,115,115,115,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
+615,615,615,615,115,115,115,115,616,616,616,616,616,616,616,616,
+616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,616,
+616,616,616,616,616,616,616,616,616,616,616,616,115,115,115,115,
/* block 146 */
-586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
+617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,
+617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,617,
+617,617,617,617,617,617,617,617,115,115,115,115,115,115,115,115,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
+618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,618,
+618,618,618,618,115,115,115,115,115,115,115,115,115,115,115,619,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 147 */
-586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-586,586,586,586,586,586,586,115,115,115,115,115,115,115,115,115,
-586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,586,
-586,586,586,586,586,586,115,115,115,115,115,115,115,115,115,115,
-586,586,586,586,586,586,586,586,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
/* block 148 */
-587,587,587,587,587,587,115,115,587,115,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,587,
-587,587,587,587,587,587,115,587,587,115,115,115,587,115,115,587,
-588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,588,
-588,588,588,588,588,588,115,589,590,590,590,590,590,590,590,590,
-591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,591,
-591,591,591,591,591,591,591,592,592,593,593,593,593,593,593,593,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+620,620,620,620,620,620,620,115,115,115,115,115,115,115,115,115,
+620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
+620,620,620,620,620,620,115,115,115,115,115,115,115,115,115,115,
+620,620,620,620,620,620,620,620,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 149 */
-594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,
-594,594,594,594,594,594,594,594,594,594,594,594,594,594,594,115,
-115,115,115,115,115,115,115,595,595,595,595,595,595,595,595,595,
+621,621,621,621,621,621,115,115,621,115,621,621,621,621,621,621,
+621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,
+621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,621,
+621,621,621,621,621,621,115,621,621,115,115,115,621,115,115,621,
+622,622,622,622,622,622,622,622,622,622,622,622,622,622,622,622,
+622,622,622,622,622,622,115,623,624,624,624,624,624,624,624,624,
+625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,625,
+625,625,625,625,625,625,625,626,626,627,627,627,627,627,627,627,
+
+/* block 150 */
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,
+628,628,628,628,628,628,628,628,628,628,628,628,628,628,628,115,
+115,115,115,115,115,115,115,629,629,629,629,629,629,629,629,629,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
-596,596,596,115,596,596,115,115,115,115,115,597,597,597,597,597,
+630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,
+630,630,630,115,630,630,115,115,115,115,115,631,631,631,631,631,
-/* block 150 */
-598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,598,
-598,598,598,598,598,598,599,599,599,599,599,599,115,115,115,600,
-601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,601,
-601,601,601,601,601,601,601,601,601,601,115,115,115,115,115,602,
+/* block 151 */
+632,632,632,632,632,632,632,632,632,632,632,632,632,632,632,632,
+632,632,632,632,632,632,633,633,633,633,633,633,115,115,115,634,
+635,635,635,635,635,635,635,635,635,635,635,635,635,635,635,635,
+635,635,635,635,635,635,635,635,635,635,115,115,115,115,115,636,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 151 */
-603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,
-603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,603,
-604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,604,
-604,604,604,604,604,604,604,604,115,115,115,115,605,605,604,604,
-605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,
-115,115,605,605,605,605,605,605,605,605,605,605,605,605,605,605,
-605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,
-605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,605,
-
/* block 152 */
-606,607,607,607,115,607,607,115,115,115,115,115,607,607,607,607,
-606,606,606,606,115,606,606,606,115,606,606,606,606,606,606,606,
-606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,606,
-606,606,606,606,115,115,115,115,607,607,607,115,115,115,115,607,
-608,608,608,608,608,608,608,608,115,115,115,115,115,115,115,115,
-609,609,609,609,609,609,609,609,609,115,115,115,115,115,115,115,
-610,610,610,610,610,610,610,610,610,610,610,610,610,610,610,610,
-610,610,610,610,610,610,610,610,610,610,610,610,610,611,611,612,
+637,637,637,637,637,637,637,637,637,637,637,637,637,637,637,637,
+637,637,637,637,637,637,637,637,637,637,637,637,637,637,637,637,
+638,638,638,638,638,638,638,638,638,638,638,638,638,638,638,638,
+638,638,638,638,638,638,638,638,115,115,115,115,639,639,638,638,
+639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,
+115,115,639,639,639,639,639,639,639,639,639,639,639,639,639,639,
+639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,
+639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,639,
/* block 153 */
-613,613,613,613,613,613,613,613,613,613,613,613,613,613,613,613,
-613,613,613,613,613,613,613,613,613,613,613,613,613,614,614,614,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-615,615,615,615,615,615,615,615,616,615,615,615,615,615,615,615,
-615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,615,
-615,615,615,615,615,617,617,115,115,115,115,618,618,618,618,618,
-619,619,619,619,619,619,619,115,115,115,115,115,115,115,115,115,
+640,641,641,641,115,641,641,115,115,115,115,115,641,641,641,641,
+640,640,640,640,115,640,640,640,115,640,640,640,640,640,640,640,
+640,640,640,640,640,640,640,640,640,640,640,640,640,640,640,640,
+640,640,640,640,115,115,115,115,641,641,641,115,115,115,115,641,
+642,642,642,642,642,642,642,642,115,115,115,115,115,115,115,115,
+643,643,643,643,643,643,643,643,643,115,115,115,115,115,115,115,
+644,644,644,644,644,644,644,644,644,644,644,644,644,644,644,644,
+644,644,644,644,644,644,644,644,644,644,644,644,644,645,645,646,
/* block 154 */
-620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
-620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
-620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,620,
-620,620,620,620,620,620,115,115,115,621,621,621,621,621,621,621,
-622,622,622,622,622,622,622,622,622,622,622,622,622,622,622,622,
-622,622,622,622,622,622,115,115,623,623,623,623,623,623,623,623,
-624,624,624,624,624,624,624,624,624,624,624,624,624,624,624,624,
-624,624,624,115,115,115,115,115,625,625,625,625,625,625,625,625,
+647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,647,
+647,647,647,647,647,647,647,647,647,647,647,647,647,648,648,648,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+649,649,649,649,649,649,649,649,650,649,649,649,649,649,649,649,
+649,649,649,649,649,649,649,649,649,649,649,649,649,649,649,649,
+649,649,649,649,649,651,651,115,115,115,115,652,652,652,652,652,
+653,653,653,653,653,653,653,115,115,115,115,115,115,115,115,115,
/* block 155 */
-626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,626,
-626,626,115,115,115,115,115,115,115,627,627,627,627,115,115,115,
-115,115,115,115,115,115,115,115,115,628,628,628,628,628,628,628,
+654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,
+654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,
+654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,654,
+654,654,654,654,654,654,115,115,115,655,655,655,655,655,655,655,
+656,656,656,656,656,656,656,656,656,656,656,656,656,656,656,656,
+656,656,656,656,656,656,115,115,657,657,657,657,657,657,657,657,
+658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,658,
+658,658,658,115,115,115,115,115,659,659,659,659,659,659,659,659,
+
+/* block 156 */
+660,660,660,660,660,660,660,660,660,660,660,660,660,660,660,660,
+660,660,115,115,115,115,115,115,115,661,661,661,661,115,115,115,
+115,115,115,115,115,115,115,115,115,662,662,662,662,662,662,662,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 156 */
-629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,
-629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,
-629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,
-629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,629,
-629,629,629,629,629,629,629,629,629,115,115,115,115,115,115,115,
+/* block 157 */
+663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,
+663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,
+663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,
+663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,663,
+663,663,663,663,663,663,663,663,663,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 157 */
-630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,
-630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,
-630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,630,
-630,630,630,115,115,115,115,115,115,115,115,115,115,115,115,115,
-631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,
-631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,
-631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,631,
-631,631,631,115,115,115,115,115,115,115,632,632,632,632,632,632,
-
/* block 158 */
+664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,
+664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,
+664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,664,
+664,664,664,115,115,115,115,115,115,115,115,115,115,115,115,115,
+665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,
+665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,
+665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,665,
+665,665,665,115,115,115,115,115,115,115,666,666,666,666,666,666,
+
+/* block 159 */
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-633,633,633,633,633,633,633,633,633,633,633,633,633,633,633,633,
-633,633,633,633,633,633,633,633,633,633,633,633,633,633,633,115,
-
-/* block 159 */
-634,635,634,636,636,636,636,636,636,636,636,636,636,636,636,636,
-636,636,636,636,636,636,636,636,636,636,636,636,636,636,636,636,
-636,636,636,636,636,636,636,636,636,636,636,636,636,636,636,636,
-636,636,636,636,636,636,636,636,635,635,635,635,635,635,635,635,
-635,635,635,635,635,635,635,637,637,637,637,637,637,637,115,115,
-115,115,638,638,638,638,638,638,638,638,638,638,638,638,638,638,
-638,638,638,638,638,638,639,639,639,639,639,639,639,639,639,639,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,635,
+667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,
+667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,115,
/* block 160 */
-640,640,641,642,642,642,642,642,642,642,642,642,642,642,642,642,
-642,642,642,642,642,642,642,642,642,642,642,642,642,642,642,642,
-642,642,642,642,642,642,642,642,642,642,642,642,642,642,642,642,
-641,641,641,640,640,640,640,641,641,640,640,643,643,644,643,643,
-643,643,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-645,645,645,645,645,645,645,645,645,645,645,645,645,645,645,645,
-645,645,645,645,645,645,645,645,645,115,115,115,115,115,115,115,
-646,646,646,646,646,646,646,646,646,646,115,115,115,115,115,115,
+668,669,668,670,670,670,670,670,670,670,670,670,670,670,670,670,
+670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
+670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,670,
+670,670,670,670,670,670,670,670,669,669,669,669,669,669,669,669,
+669,669,669,669,669,669,669,671,671,671,671,671,671,671,115,115,
+115,115,672,672,672,672,672,672,672,672,672,672,672,672,672,672,
+672,672,672,672,672,672,673,673,673,673,673,673,673,673,673,673,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,669,
/* block 161 */
-647,647,647,648,648,648,648,648,648,648,648,648,648,648,648,648,
-648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,648,
-648,648,648,648,648,648,648,647,647,647,647,647,649,647,647,647,
-647,647,647,647,647,115,650,650,650,650,650,650,650,650,650,650,
-651,651,651,651,115,115,115,115,115,115,115,115,115,115,115,115,
-652,652,652,652,652,652,652,652,652,652,652,652,652,652,652,652,
-652,652,652,652,652,652,652,652,652,652,652,652,652,652,652,652,
-652,652,652,653,654,654,652,115,115,115,115,115,115,115,115,115,
+674,674,675,676,676,676,676,676,676,676,676,676,676,676,676,676,
+676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,
+676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,676,
+675,675,675,674,674,674,674,675,675,674,674,677,677,678,677,677,
+677,677,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,679,
+679,679,679,679,679,679,679,679,679,115,115,115,115,115,115,115,
+680,680,680,680,680,680,680,680,680,680,115,115,115,115,115,115,
/* block 162 */
-655,655,656,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,657,
-657,657,657,656,656,656,655,655,655,655,655,655,655,655,655,656,
-656,657,657,657,657,658,658,658,658,658,655,655,655,658,115,115,
-659,659,659,659,659,659,659,659,659,659,657,658,657,658,658,658,
-115,660,660,660,660,660,660,660,660,660,660,660,660,660,660,660,
-660,660,660,660,660,115,115,115,115,115,115,115,115,115,115,115,
+681,681,681,682,682,682,682,682,682,682,682,682,682,682,682,682,
+682,682,682,682,682,682,682,682,682,682,682,682,682,682,682,682,
+682,682,682,682,682,682,682,681,681,681,681,681,683,681,681,681,
+681,681,681,681,681,115,684,684,684,684,684,684,684,684,684,684,
+685,685,685,685,115,115,115,115,115,115,115,115,115,115,115,115,
+686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,
+686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,
+686,686,686,687,688,688,686,115,115,115,115,115,115,115,115,115,
/* block 163 */
-661,661,661,661,661,661,661,661,661,661,661,661,661,661,661,661,
-661,661,115,661,661,661,661,661,661,661,661,661,661,661,661,661,
-661,661,661,661,661,661,661,661,661,661,661,661,662,662,662,663,
-663,663,662,662,663,662,663,663,664,664,664,664,664,664,115,115,
+689,689,690,691,691,691,691,691,691,691,691,691,691,691,691,691,
+691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,
+691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,
+691,691,691,690,690,690,689,689,689,689,689,689,689,689,689,690,
+690,691,692,692,691,693,693,693,693,693,689,689,689,693,115,115,
+694,694,694,694,694,694,694,694,694,694,691,693,691,693,693,693,
+115,695,695,695,695,695,695,695,695,695,695,695,695,695,695,695,
+695,695,695,695,695,115,115,115,115,115,115,115,115,115,115,115,
+
+/* block 164 */
+696,696,696,696,696,696,696,696,696,696,696,696,696,696,696,696,
+696,696,115,696,696,696,696,696,696,696,696,696,696,696,696,696,
+696,696,696,696,696,696,696,696,696,696,696,696,697,697,697,698,
+698,698,697,697,698,697,698,698,699,699,699,699,699,699,698,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 164 */
-665,665,665,665,665,665,665,115,665,115,665,665,665,665,115,665,
-665,665,665,665,665,665,665,665,665,665,665,665,665,665,115,665,
-665,665,665,665,665,665,665,665,665,666,115,115,115,115,115,115,
-667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,
-667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,
-667,667,667,667,667,667,667,667,667,667,667,667,667,667,667,668,
-669,669,669,668,668,668,668,668,668,668,668,115,115,115,115,115,
-670,670,670,670,670,670,670,670,670,670,115,115,115,115,115,115,
-
/* block 165 */
-671,671,672,672,115,673,673,673,673,673,673,673,673,115,115,673,
-673,115,115,673,673,673,673,673,673,673,673,673,673,673,673,673,
-673,673,673,673,673,673,673,673,673,115,673,673,673,673,673,673,
-673,115,673,673,115,673,673,673,673,673,115,115,671,673,674,672,
-671,672,672,672,672,115,115,672,672,115,115,672,672,672,115,115,
-673,115,115,115,115,115,115,674,115,115,115,115,115,673,673,673,
-673,673,672,672,115,115,671,671,671,671,671,671,671,115,115,115,
-671,671,671,671,671,115,115,115,115,115,115,115,115,115,115,115,
+700,700,700,700,700,700,700,115,700,115,700,700,700,700,115,700,
+700,700,700,700,700,700,700,700,700,700,700,700,700,700,115,700,
+700,700,700,700,700,700,700,700,700,701,115,115,115,115,115,115,
+702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,
+702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,
+702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,703,
+704,704,704,703,703,703,703,703,703,703,703,115,115,115,115,115,
+705,705,705,705,705,705,705,705,705,705,115,115,115,115,115,115,
/* block 166 */
-675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,
-675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,
-675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,675,
-676,677,677,678,678,678,678,678,678,677,678,677,677,676,677,678,
-678,677,678,678,675,675,679,675,115,115,115,115,115,115,115,115,
-680,680,680,680,680,680,680,680,680,680,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+706,706,707,707,115,708,708,708,708,708,708,708,708,115,115,708,
+708,115,115,708,708,708,708,708,708,708,708,708,708,708,708,708,
+708,708,708,708,708,708,708,708,708,115,708,708,708,708,708,708,
+708,115,708,708,115,708,708,708,708,708,115,115,706,708,709,707,
+706,707,707,707,707,115,115,707,707,115,115,707,707,707,115,115,
+708,115,115,115,115,115,115,709,115,115,115,115,115,708,708,708,
+708,708,707,707,115,115,706,706,706,706,706,706,706,115,115,115,
+706,706,706,706,706,115,115,115,115,115,115,115,115,115,115,115,
/* block 167 */
-681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,
-681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,
-681,681,681,681,681,681,681,681,681,681,681,681,681,681,681,682,
-683,683,684,684,684,684,115,115,683,683,683,683,684,684,683,684,
-684,685,685,685,685,685,685,685,685,685,685,685,685,685,685,685,
-685,685,685,685,685,685,685,685,681,681,681,681,684,684,115,115,
+710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
+710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
+710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,710,
+710,710,710,710,710,711,711,711,712,712,712,712,712,712,712,712,
+711,711,712,712,712,711,712,710,710,710,710,713,713,713,713,713,
+714,714,714,714,714,714,714,714,714,714,115,713,115,713,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 168 */
-686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,
-686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,
-686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,686,
-687,687,687,688,688,688,688,688,688,688,688,687,687,688,687,688,
-688,689,689,689,686,115,115,115,115,115,115,115,115,115,115,115,
-690,690,690,690,690,690,690,690,690,690,115,115,115,115,115,115,
+715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,
+715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,
+715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,715,
+716,717,717,718,718,718,718,718,718,717,718,717,717,716,717,718,
+718,717,718,718,715,715,719,715,115,115,115,115,115,115,115,115,
+720,720,720,720,720,720,720,720,720,720,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 169 */
-691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,
-691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,691,
-691,691,691,691,691,691,691,691,691,691,691,692,693,692,693,693,
-692,692,692,692,692,692,693,692,115,115,115,115,115,115,115,115,
-694,694,694,694,694,694,694,694,694,694,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,
+721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,
+721,721,721,721,721,721,721,721,721,721,721,721,721,721,721,722,
+723,723,724,724,724,724,115,115,723,723,723,723,724,724,723,724,
+724,725,725,725,725,725,725,725,725,725,725,725,725,725,725,725,
+725,725,725,725,725,725,725,725,721,721,721,721,724,724,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 170 */
-695,695,695,695,695,695,695,695,695,695,695,695,695,695,695,695,
-695,695,695,695,695,695,695,695,695,695,115,115,115,696,696,696,
-697,697,696,696,696,696,697,696,696,696,696,696,115,115,115,115,
-698,698,698,698,698,698,698,698,698,698,699,699,700,700,700,701,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,
+726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,
+726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,
+727,727,727,728,728,728,728,728,728,728,728,727,727,728,727,728,
+728,729,729,729,726,115,115,115,115,115,115,115,115,115,115,115,
+730,730,730,730,730,730,730,730,730,730,115,115,115,115,115,115,
+369,369,369,369,369,369,369,369,369,369,369,369,369,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 171 */
+731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,
+731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,731,
+731,731,731,731,731,731,731,731,731,731,731,732,733,732,733,733,
+732,732,732,732,732,732,733,732,115,115,115,115,115,115,115,115,
+734,734,734,734,734,734,734,734,734,734,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,
-702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,702,
-703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,
-703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,703,
-704,704,704,704,704,704,704,704,704,704,705,705,705,705,705,705,
-705,705,705,115,115,115,115,115,115,115,115,115,115,115,115,706,
/* block 172 */
+735,735,735,735,735,735,735,735,735,735,735,735,735,735,735,735,
+735,735,735,735,735,735,735,735,735,735,115,115,115,736,736,736,
+737,737,736,736,736,736,737,736,736,736,736,736,115,115,115,115,
+738,738,738,738,738,738,738,738,738,738,739,739,740,740,740,741,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,
-707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,
-707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,707,
-707,707,707,707,707,707,707,707,707,115,115,115,115,115,115,115,
/* block 173 */
-708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
-708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
-708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
-708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
-708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
-708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
-708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
-708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
-
-/* block 174 */
-708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
-708,708,708,708,708,708,708,708,708,708,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,
+742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,
+743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,
+743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,743,
+744,744,744,744,744,744,744,744,744,744,745,745,745,745,745,745,
+745,745,745,115,115,115,115,115,115,115,115,115,115,115,115,746,
+
+/* block 174 */
+747,748,748,748,748,748,748,749,749,748,748,747,747,747,747,747,
+747,747,747,747,747,747,747,747,747,747,747,747,747,747,747,747,
+747,747,747,747,747,747,747,747,747,747,747,747,747,747,747,747,
+747,747,747,748,748,748,748,748,748,749,750,748,748,748,748,751,
+751,751,751,751,751,751,751,748,115,115,115,115,115,115,115,115,
+752,753,753,753,753,753,753,754,754,753,753,753,752,752,752,752,
+752,752,752,752,752,752,752,752,752,752,752,752,752,752,752,752,
+752,752,752,752,752,752,752,752,752,752,752,752,752,752,752,752,
/* block 175 */
-709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,
-709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,
-709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,
-709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,
-709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,
-709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,
-709,709,709,709,709,709,709,709,709,709,709,709,709,709,709,115,
-710,710,710,710,710,115,115,115,115,115,115,115,115,115,115,115,
+752,752,752,752,115,115,755,755,755,755,753,753,753,753,753,753,
+753,753,753,753,753,753,753,754,753,753,756,756,756,115,756,756,
+756,756,756,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,
+757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,
+757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,757,
+757,757,757,757,757,757,757,757,757,115,115,115,115,115,115,115,
/* block 176 */
-708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
-708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
-708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
-708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,708,
-708,708,708,708,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+758,758,758,758,758,758,758,758,758,115,758,758,758,758,758,758,
+758,758,758,758,758,758,758,758,758,758,758,758,758,758,758,758,
+758,758,758,758,758,758,758,758,758,758,758,758,758,758,758,759,
+760,760,760,760,760,760,760,115,760,760,760,760,760,760,759,760,
+758,761,761,761,761,761,115,115,115,115,115,115,115,115,115,115,
+762,762,762,762,762,762,762,762,762,762,763,763,763,763,763,763,
+763,763,763,763,763,763,763,763,763,763,763,763,763,115,115,115,
+764,764,765,765,765,765,765,765,765,765,765,765,765,765,765,765,
/* block 177 */
-711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
-711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
-711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
-711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
-711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
-711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
-711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
-711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
-
-/* block 178 */
-711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
-711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,
-711,711,711,711,711,711,711,711,711,711,711,711,711,711,711,115,
+765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,765,
+115,115,766,766,766,766,766,766,766,766,766,766,766,766,766,766,
+766,766,766,766,766,766,766,766,115,767,766,766,766,766,766,766,
+766,767,766,766,767,766,766,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+
+/* block 178 */
+768,768,768,768,768,768,768,115,768,768,115,768,768,768,768,768,
+768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,
+768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,
+768,769,769,769,769,769,769,115,115,115,769,115,769,769,115,769,
+769,769,769,769,769,769,770,769,115,115,115,115,115,115,115,115,
+771,771,771,771,771,771,771,771,771,771,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 179 */
-712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
-712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
-712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
-712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
-712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
-712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
-712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
-712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
+772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
+772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
+772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
+772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
+772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
+772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
+772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
+772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
/* block 180 */
-712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
-712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
-712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
-712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,712,
-712,712,712,712,712,712,712,115,115,115,115,115,115,115,115,115,
+772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
+772,772,772,772,772,772,772,772,772,772,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 181 */
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
+773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
+773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
+773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
+773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
+773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
+773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,
+773,773,773,773,773,773,773,773,773,773,773,773,773,773,773,115,
+774,774,774,774,774,115,115,115,115,115,115,115,115,115,115,115,
/* block 182 */
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,498,
-498,498,498,498,498,498,498,498,498,115,115,115,115,115,115,115,
-713,713,713,713,713,713,713,713,713,713,713,713,713,713,713,713,
-713,713,713,713,713,713,713,713,713,713,713,713,713,713,713,115,
-714,714,714,714,714,714,714,714,714,714,115,115,115,115,715,715,
+772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
+772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
+772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
+772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,772,
+772,772,772,772,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
/* block 183 */
+775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
+775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
+775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
+775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
+775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
+775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
+775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
+775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
+
+/* block 184 */
+775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
+775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,
+775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-716,716,716,716,716,716,716,716,716,716,716,716,716,716,716,716,
-716,716,716,716,716,716,716,716,716,716,716,716,716,716,115,115,
-717,717,717,717,717,718,115,115,115,115,115,115,115,115,115,115,
-
-/* block 184 */
-719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,
-719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,
-719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,
-720,720,720,720,720,720,720,721,721,721,721,721,722,722,722,722,
-723,723,723,723,721,722,115,115,115,115,115,115,115,115,115,115,
-724,724,724,724,724,724,724,724,724,724,115,725,725,725,725,725,
-725,725,115,719,719,719,719,719,719,719,719,719,719,719,719,719,
-719,719,719,719,719,719,719,719,115,115,115,115,115,719,719,719,
/* block 185 */
-719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,719,
+776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
+776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
+776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
+776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
+776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
+776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
+776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
+776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
+
+/* block 186 */
+776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
+776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
+776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
+776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,776,
+776,776,776,776,776,776,776,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+
+/* block 187 */
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+
+/* block 188 */
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,530,
+530,530,530,530,530,530,530,530,530,115,115,115,115,115,115,115,
+777,777,777,777,777,777,777,777,777,777,777,777,777,777,777,777,
+777,777,777,777,777,777,777,777,777,777,777,777,777,777,777,115,
+778,778,778,778,778,778,778,778,778,778,115,115,115,115,779,779,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+
+/* block 189 */
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+780,780,780,780,780,780,780,780,780,780,780,780,780,780,780,780,
+780,780,780,780,780,780,780,780,780,780,780,780,780,780,115,115,
+781,781,781,781,781,782,115,115,115,115,115,115,115,115,115,115,
-/* block 186 */
-726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,
-726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,
-726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,
-726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,726,
-726,726,726,726,726,115,115,115,115,115,115,115,115,115,115,115,
-726,727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,
-727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,
-727,727,727,727,727,727,727,727,727,727,727,727,727,727,727,115,
+/* block 190 */
+783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,
+783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,
+783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,
+784,784,784,784,784,784,784,785,785,785,785,785,786,786,786,786,
+787,787,787,787,785,786,115,115,115,115,115,115,115,115,115,115,
+788,788,788,788,788,788,788,788,788,788,115,789,789,789,789,789,
+789,789,115,783,783,783,783,783,783,783,783,783,783,783,783,783,
+783,783,783,783,783,783,783,783,115,115,115,115,115,783,783,783,
-/* block 187 */
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,728,
-728,728,728,729,729,729,729,729,729,729,729,729,729,729,729,729,
+/* block 191 */
+783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,783,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
@@ -3257,29 +3396,119 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 188 */
-479,477,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+/* block 192 */
+790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,
+790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,
+790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,
+790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,790,
+790,790,790,790,790,115,115,115,115,115,115,115,115,115,115,115,
+790,791,791,791,791,791,791,791,791,791,791,791,791,791,791,791,
+791,791,791,791,791,791,791,791,791,791,791,791,791,791,791,791,
+791,791,791,791,791,791,791,791,791,791,791,791,791,791,791,115,
+
+/* block 193 */
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,792,
+792,792,792,793,793,793,793,793,793,793,793,793,793,793,793,793,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+794,795,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+
+/* block 194 */
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+
+/* block 195 */
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+
+/* block 196 */
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,796,
+796,796,796,115,115,115,115,115,115,115,115,115,115,115,115,115,
+
+/* block 197 */
+509,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+
+/* block 198 */
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+
+/* block 199 */
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,
+507,507,507,507,507,507,507,507,507,507,507,507,507,507,507,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-/* block 189 */
-730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,
-730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,
-730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,
-730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,
-730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,
-730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,730,
-730,730,730,730,730,730,730,730,730,730,730,115,115,115,115,115,
-730,730,730,730,730,730,730,730,730,730,730,730,730,115,115,115,
+/* block 200 */
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
-/* block 190 */
-730,730,730,730,730,730,730,730,730,115,115,115,115,115,115,115,
-730,730,730,730,730,730,730,730,730,730,115,115,731,732,732,733,
+/* block 201 */
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,797,
+797,797,797,797,797,797,797,797,797,797,797,797,115,115,115,115,
+
+/* block 202 */
+798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,
+798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,
+798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,
+798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,
+798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,
+798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,798,
+798,798,798,798,798,798,798,798,798,798,798,115,115,115,115,115,
+798,798,798,798,798,798,798,798,798,798,798,798,798,115,115,115,
+
+/* block 203 */
+798,798,798,798,798,798,798,798,798,115,115,115,115,115,115,115,
+798,798,798,798,798,798,798,798,798,798,115,115,799,800,800,801,
22, 22, 22, 22,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
@@ -3287,7 +3516,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 191 */
+/* block 204 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
@@ -3297,17 +3526,17 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,
-/* block 192 */
+/* block 205 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19,115,115, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19,734,406,110,110,110, 19, 19, 19,406,734,734,
-734,734,734, 22, 22, 22, 22, 22, 22, 22, 22,110,110,110,110,110,
+ 19, 19, 19, 19, 19,802,433,110,110,110, 19, 19, 19,433,802,802,
+802,802,802, 22, 22, 22, 22, 22, 22, 22, 22,110,110,110,110,110,
-/* block 193 */
+/* block 206 */
110,110,110, 19, 19,110,110,110,110,110,110,110, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19,110,110,110,110, 19, 19,
@@ -3317,17 +3546,17 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 194 */
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,564,
-564,564,735,735,735,564,115,115,115,115,115,115,115,115,115,115,
+/* block 207 */
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,596,
+596,596,803,803,803,596,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 195 */
+/* block 208 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
@@ -3337,157 +3566,177 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 196 */
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
-438,438,438,438,438,438,438,438,438,438,439,439,439,439,439,439,
-439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,
-439,439,439,439,438,438,438,438,438,438,438,438,438,438,438,438,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,439,439,
-439,439,439,439,439,115,439,439,439,439,439,439,439,439,439,439,
-439,439,439,439,439,439,439,439,438,438,438,438,438,438,438,438,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
+/* block 209 */
+466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
+466,466,466,466,466,466,466,466,466,466,467,467,467,467,467,467,
+467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
+467,467,467,467,466,466,466,466,466,466,466,466,466,466,466,466,
+466,466,466,466,466,466,466,466,466,466,466,466,466,466,467,467,
+467,467,467,467,467,115,467,467,467,467,467,467,467,467,467,467,
+467,467,467,467,467,467,467,467,466,466,466,466,466,466,466,466,
+466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
-/* block 197 */
-438,438,439,439,439,439,439,439,439,439,439,439,439,439,439,439,
-439,439,439,439,439,439,439,439,439,439,439,439,438,115,438,438,
-115,115,438,115,115,438,438,115,115,438,438,438,438,115,438,438,
-438,438,438,438,438,438,439,439,439,439,115,439,115,439,439,439,
-439,439,439,439,115,439,439,439,439,439,439,439,439,439,439,439,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
-438,438,438,438,438,438,438,438,438,438,439,439,439,439,439,439,
-439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,
+/* block 210 */
+466,466,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
+467,467,467,467,467,467,467,467,467,467,467,467,466,115,466,466,
+115,115,466,115,115,466,466,115,115,466,466,466,466,115,466,466,
+466,466,466,466,466,466,467,467,467,467,115,467,115,467,467,467,
+467,467,467,467,115,467,467,467,467,467,467,467,467,467,467,467,
+466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
+466,466,466,466,466,466,466,466,466,466,467,467,467,467,467,467,
+467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
-/* block 198 */
-439,439,439,439,438,438,115,438,438,438,438,115,115,438,438,438,
-438,438,438,438,438,115,438,438,438,438,438,438,438,115,439,439,
-439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,
-439,439,439,439,439,439,439,439,438,438,115,438,438,438,438,115,
-438,438,438,438,438,115,438,115,115,115,438,438,438,438,438,438,
-438,115,439,439,439,439,439,439,439,439,439,439,439,439,439,439,
-439,439,439,439,439,439,439,439,439,439,439,439,438,438,438,438,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
+/* block 211 */
+467,467,467,467,466,466,115,466,466,466,466,115,115,466,466,466,
+466,466,466,466,466,115,466,466,466,466,466,466,466,115,467,467,
+467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
+467,467,467,467,467,467,467,467,466,466,115,466,466,466,466,115,
+466,466,466,466,466,115,466,115,115,115,466,466,466,466,466,466,
+466,115,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
+467,467,467,467,467,467,467,467,467,467,467,467,466,466,466,466,
+466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
-/* block 199 */
-438,438,438,438,438,438,439,439,439,439,439,439,439,439,439,439,
-439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
-438,438,438,438,438,438,438,438,438,438,439,439,439,439,439,439,
-439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,
-439,439,439,439,438,438,438,438,438,438,438,438,438,438,438,438,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,439,439,
-439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,
+/* block 212 */
+466,466,466,466,466,466,467,467,467,467,467,467,467,467,467,467,
+467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
+466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
+466,466,466,466,466,466,466,466,466,466,467,467,467,467,467,467,
+467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
+467,467,467,467,466,466,466,466,466,466,466,466,466,466,466,466,
+466,466,466,466,466,466,466,466,466,466,466,466,466,466,467,467,
+467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
-/* block 200 */
-439,439,439,439,439,439,439,439,438,438,438,438,438,438,438,438,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
-438,438,439,439,439,439,439,439,439,439,439,439,439,439,439,439,
-439,439,439,439,439,439,439,439,439,439,439,439,438,438,438,438,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
-438,438,438,438,438,438,439,439,439,439,439,439,439,439,439,439,
-439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
+/* block 213 */
+467,467,467,467,467,467,467,467,466,466,466,466,466,466,466,466,
+466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
+466,466,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
+467,467,467,467,467,467,467,467,467,467,467,467,466,466,466,466,
+466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
+466,466,466,466,466,466,467,467,467,467,467,467,467,467,467,467,
+467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
+466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
-/* block 201 */
-438,438,438,438,438,438,438,438,438,438,439,439,439,439,439,439,
-439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,
-439,439,439,439,439,439,115,115,438,438,438,438,438,438,438,438,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
-438, 8,439,439,439,439,439,439,439,439,439,439,439,439,439,439,
-439,439,439,439,439,439,439,439,439,439,439, 8,439,439,439,439,
-439,439,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
-438,438,438,438,438,438,438,438,438,438,438, 8,439,439,439,439,
+/* block 214 */
+466,466,466,466,466,466,466,466,466,466,467,467,467,467,467,467,
+467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
+467,467,467,467,467,467,115,115,466,466,466,466,466,466,466,466,
+466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
+466, 8,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
+467,467,467,467,467,467,467,467,467,467,467, 8,467,467,467,467,
+467,467,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
+466,466,466,466,466,466,466,466,466,466,466, 8,467,467,467,467,
-/* block 202 */
-439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,
-439,439,439,439,439, 8,439,439,439,439,439,439,438,438,438,438,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
-438,438,438,438,438, 8,439,439,439,439,439,439,439,439,439,439,
-439,439,439,439,439,439,439,439,439,439,439,439,439,439,439, 8,
-439,439,439,439,439,439,438,438,438,438,438,438,438,438,438,438,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438, 8,
-439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,
+/* block 215 */
+467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
+467,467,467,467,467, 8,467,467,467,467,467,467,466,466,466,466,
+466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
+466,466,466,466,466, 8,467,467,467,467,467,467,467,467,467,467,
+467,467,467,467,467,467,467,467,467,467,467,467,467,467,467, 8,
+467,467,467,467,467,467,466,466,466,466,466,466,466,466,466,466,
+466,466,466,466,466,466,466,466,466,466,466,466,466,466,466, 8,
+467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
-/* block 203 */
-439,439,439,439,439,439,439,439,439, 8,439,439,439,439,439,439,
-438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,438,
-438,438,438,438,438,438,438,438,438, 8,439,439,439,439,439,439,
-439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,439,
-439,439,439, 8,439,439,439,439,439,439,438,439,115,115, 10, 10,
+/* block 216 */
+467,467,467,467,467,467,467,467,467, 8,467,467,467,467,467,467,
+466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,466,
+466,466,466,466,466,466,466,466,466, 8,467,467,467,467,467,467,
+467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,467,
+467,467,467, 8,467,467,467,467,467,467,466,467,115,115, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-/* block 204 */
-736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,
-736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,
-736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,
-736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,
-736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,
-736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,
-736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,
-736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,736,
+/* block 217 */
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
+804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,804,
-/* block 205 */
-737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,
-737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,
-737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,
-737,737,737,737,737,737,737,736,736,736,736,737,737,737,737,737,
-737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,
-737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,
-737,737,737,737,737,737,737,737,737,737,737,737,737,736,736,736,
-736,736,736,736,736,737,736,736,736,736,736,736,736,736,736,736,
+/* block 218 */
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,804,804,804,804,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
+805,805,805,805,805,805,805,805,805,805,805,805,805,804,804,804,
+804,804,804,804,804,805,804,804,804,804,804,804,804,804,804,804,
-/* block 206 */
-736,736,736,736,737,736,736,738,738,738,738,738,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,737,737,737,737,737,
-115,737,737,737,737,737,737,737,737,737,737,737,737,737,737,737,
+/* block 219 */
+804,804,804,804,805,804,804,806,806,806,806,806,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,805,805,805,805,805,
+115,805,805,805,805,805,805,805,805,805,805,805,805,805,805,805,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 207 */
-739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,
-739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,
-739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,
-739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,
-739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,
-739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,
-739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,
-739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,
+/* block 220 */
+807,807,807,807,807,807,807,115,807,807,807,807,807,807,807,807,
+807,807,807,807,807,807,807,807,807,115,115,807,807,807,807,807,
+807,807,115,807,807,115,807,807,807,807,807,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 208 */
-739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,
-739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,
-739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,
-739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,739,
-739,739,739,739,739,115,115,740,740,740,740,740,740,740,740,740,
-741,741,741,741,741,741,741,115,115,115,115,115,115,115,115,115,
+/* block 221 */
+808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
+808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
+808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
+808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
+808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
+808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
+808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
+808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
+
+/* block 222 */
+808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
+808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
+808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
+808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,808,
+808,808,808,808,808,115,115,809,809,809,809,809,809,809,809,809,
+810,810,810,810,810,810,810,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 209 */
-200,200,200,200,115,200,200,200,200,200,200,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,200,
-115,200,200,115,200,115,115,200,115,200,200,200,200,200,200,200,
-200,200,200,115,200,200,200,200,115,200,115,200,115,115,115,115,
-115,115,200,115,115,115,115,200,115,200,115,200,115,200,200,200,
-115,200,200,115,200,115,115,200,115,200,115,200,115,200,115,200,
-115,200,200,115,200,115,115,200,200,200,200,115,200,200,200,200,
-200,200,200,115,200,200,200,200,115,200,200,200,200,115,200,115,
+/* block 223 */
+811,811,811,811,811,811,811,811,811,811,811,811,811,811,811,811,
+811,811,811,811,811,811,811,811,811,811,811,811,811,811,811,811,
+811,811,812,812,812,812,812,812,812,812,812,812,812,812,812,812,
+812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,812,
+812,812,812,812,813,813,813,813,813,813,813,115,115,115,115,115,
+814,814,814,814,814,814,814,814,814,814,115,115,115,115,815,815,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 210 */
-200,200,200,200,200,200,200,200,200,200,115,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,115,115,115,115,
-115,200,200,200,115,200,200,200,200,200,115,200,200,200,200,200,
-200,200,200,200,200,200,200,200,200,200,200,200,115,115,115,115,
+/* block 224 */
+216,216,216,216,115,216,216,216,216,216,216,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,216,
+115,216,216,115,216,115,115,216,115,216,216,216,216,216,216,216,
+216,216,216,115,216,216,216,216,115,216,115,216,115,115,115,115,
+115,115,216,115,115,115,115,216,115,216,115,216,115,216,216,216,
+115,216,216,115,216,115,115,216,115,216,115,216,115,216,115,216,
+115,216,216,115,216,115,115,216,216,216,216,115,216,216,216,216,
+216,216,216,115,216,216,216,216,115,216,216,216,216,115,216,115,
+
+/* block 225 */
+216,216,216,216,216,216,216,216,216,216,115,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,115,115,115,115,
+115,216,216,216,115,216,216,216,216,216,115,216,216,216,216,216,
+216,216,216,216,216,216,216,216,216,216,216,216,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-195,195,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+210,210,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 211 */
+/* block 226 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
@@ -3497,7 +3746,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-/* block 212 */
+/* block 227 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,
@@ -3507,7 +3756,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,
-/* block 213 */
+/* block 228 */
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,115,115,115,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,
@@ -3517,67 +3766,107 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-/* block 214 */
+/* block 229 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,742,742,742,742,742,742,742,742,742,742,
-742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,742,
+115,115,115,115,115,115,816,816,816,816,816,816,816,816,816,816,
+816,816,816,816,816,816,816,816,816,816,816,816,816,816,816,816,
-/* block 215 */
-743, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,115,
+/* block 230 */
+817, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,115,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,
19, 19,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+ 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 216 */
+/* block 231 */
+ 19, 19, 19, 19, 19, 19, 19, 19,479, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,479, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19,479, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+
+/* block 232 */
+ 19, 19, 19, 19, 19,478, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19,479, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19,479, 19, 19, 19,479, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19,478,478,478, 19, 19,478, 19, 19,478,478,478, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 14, 14, 14, 14, 14,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,479, 19,479, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,818,818,818,818,818,
-/* block 217 */
+/* block 233 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19,478,478, 19, 19,478,478,478,478,478,478,478,478,478,478,
+478, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19,819,819,819,819, 19, 19, 19, 19,478, 19,
+478,478,478,478,478,478,478,478,478, 19, 19, 19,478, 19, 19, 19,
+
+/* block 234 */
+ 19,478,478,478, 19,478,478,478, 19, 19, 19,479, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,478, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,479,479, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115, 19, 19, 19, 19, 19,
-/* block 218 */
+/* block 235 */
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19,479, 19, 19, 19, 19,479, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19, 19, 19, 19,115, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19,478,478, 19, 19, 19, 19,478, 19, 19, 19, 19, 19,
+
+/* block 236 */
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+478, 19, 19, 19, 19,478,478, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19,479, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-/* block 219 */
+/* block 237 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19,478,478,478, 19, 19, 19,478,478,478,478,478,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
- 19,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+
+/* block 238 */
+479, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19,479, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19,478, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19,478,478,478, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+478, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,478, 19, 19, 19,
+ 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,
- 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,
-/* block 220 */
+/* block 239 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
@@ -3587,7 +3876,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 221 */
+/* block 240 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
@@ -3597,7 +3886,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 222 */
+/* block 241 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
@@ -3607,7 +3896,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
-/* block 223 */
+/* block 242 */
19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,
@@ -3617,97 +3906,107 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 224 */
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
- 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+/* block 243 */
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
+ 19, 19, 19, 19, 19, 19, 19, 19,478,478,478,478,478, 19,478,478,
+ 19, 19, 19, 19, 19, 19,478, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+478,478,478,478,478,478,478,478,478,478, 19, 19, 19,478,478,115,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-/* block 225 */
- 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+/* block 244 */
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
19,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+ 19,478,478,478,478,478,478,478,478,478,478,478,478,478, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19,115,115,115,115,115,115,115,115,115,
115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-
-/* block 226 */
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-
-/* block 227 */
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,115,115,115,115,115,115,115,115,115,115,115,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-
-/* block 228 */
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,115,115,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-
-/* block 229 */
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-
-/* block 230 */
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,485,
-485,485,485,485,485,485,485,485,485,485,485,485,485,485,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
-
-/* block 231 */
-437, 22,437,437,437,437,437,437,437,437,437,437,437,437,437,437,
-437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
-
-/* block 232 */
-437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,
-437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,
-437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,
-437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,
-437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,
-437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,
-437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,
-437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,
-/* block 233 */
+/* block 245 */
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+
+/* block 246 */
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,115,115,115,115,115,115,115,115,115,115,115,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+
+/* block 247 */
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,115,115,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+
+/* block 248 */
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+
+/* block 249 */
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+
+/* block 250 */
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,515,
+515,515,515,515,515,515,515,515,515,515,515,515,515,515,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,
+
+/* block 251 */
+465, 22,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
+465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
+820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
+820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
+820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
+820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
+820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
+820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,820,
+
+/* block 252 */
+465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
+465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
+465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
+465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
+465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
+465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
+465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
+465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
+
+/* block 253 */
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
@@ -3717,7 +4016,7 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-/* block 234 */
+/* block 254 */
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
@@ -3725,17 +4024,17 @@ const uint16_t PRIV(ucd_stage2)[] = { /* 60416 bytes, block = 128 */
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,
-437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,437,
-
-/* block 235 */
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,557,
-557,557,557,557,557,557,557,557,557,557,557,557,557,557,115,115,
+465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,465,
+
+/* block 255 */
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,589,
+589,589,589,589,589,589,589,589,589,589,589,589,589,589,115,115,
};
diff --git a/src/3rdparty/pcre2/src/pcre2_ucp.h b/src/3rdparty/pcre2/src/pcre2_ucp.h
index 0b7553e5e0..defba4c10e 100644
--- a/src/3rdparty/pcre2/src/pcre2_ucp.h
+++ b/src/3rdparty/pcre2/src/pcre2_ucp.h
@@ -39,8 +39,8 @@ POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef _PCRE2_UCP_H
-#define _PCRE2_UCP_H
+#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
+#define PCRE2_UCP_H_IDEMPOTENT_GUARD
/* This file contains definitions of the property values that are returned by
the UCD access macros. New values that are added for new releases of Unicode
@@ -100,9 +100,7 @@ enum {
ucp_Zs /* Space separator */
};
-/* These are grapheme break properties. Note that the code for processing them
-assumes that the values are less than 16. If more values are added that take
-the number to 16 or more, the code will have to be rewritten. */
+/* These are grapheme break properties. */
enum {
ucp_gbCR, /* 0 */
@@ -117,7 +115,12 @@ enum {
ucp_gbLV, /* 9 Hangul syllable type LV */
ucp_gbLVT, /* 10 Hangul syllable type LVT */
ucp_gbRegionalIndicator, /* 11 */
- ucp_gbOther /* 12 */
+ ucp_gbOther, /* 12 */
+ ucp_gbE_Base, /* 13 */
+ ucp_gbE_Modifier, /* 14 */
+ ucp_gbE_Base_GAZ, /* 15 */
+ ucp_gbZWJ, /* 16 */
+ ucp_gbGlue_After_Zwj /* 17 */
};
/* These are the script identifications. */
@@ -184,13 +187,13 @@ enum {
ucp_Tifinagh,
ucp_Ugaritic,
ucp_Yi,
- /* New for Unicode 5.0: */
+ /* New for Unicode 5.0 */
ucp_Balinese,
ucp_Cuneiform,
ucp_Nko,
ucp_Phags_Pa,
ucp_Phoenician,
- /* New for Unicode 5.1: */
+ /* New for Unicode 5.1 */
ucp_Carian,
ucp_Cham,
ucp_Kayah_Li,
@@ -202,7 +205,7 @@ enum {
ucp_Saurashtra,
ucp_Sundanese,
ucp_Vai,
- /* New for Unicode 5.2: */
+ /* New for Unicode 5.2 */
ucp_Avestan,
ucp_Bamum,
ucp_Egyptian_Hieroglyphs,
@@ -218,11 +221,11 @@ enum {
ucp_Samaritan,
ucp_Tai_Tham,
ucp_Tai_Viet,
- /* New for Unicode 6.0.0: */
+ /* New for Unicode 6.0.0 */
ucp_Batak,
ucp_Brahmi,
ucp_Mandaic,
- /* New for Unicode 6.1.0: */
+ /* New for Unicode 6.1.0 */
ucp_Chakma,
ucp_Meroitic_Cursive,
ucp_Meroitic_Hieroglyphs,
@@ -230,7 +233,7 @@ enum {
ucp_Sharada,
ucp_Sora_Sompeng,
ucp_Takri,
- /* New for Unicode 7.0.0: */
+ /* New for Unicode 7.0.0 */
ucp_Bassa_Vah,
ucp_Caucasian_Albanian,
ucp_Duployan,
@@ -254,15 +257,26 @@ enum {
ucp_Siddham,
ucp_Tirhuta,
ucp_Warang_Citi,
- /* New for Unicode 8.0.0: */
+ /* New for Unicode 8.0.0 */
ucp_Ahom,
ucp_Anatolian_Hieroglyphs,
ucp_Hatran,
ucp_Multani,
ucp_Old_Hungarian,
- ucp_SignWriting
+ ucp_SignWriting,
+ /* New for Unicode 10.0.0 (no update since 8.0.0) */
+ ucp_Adlam,
+ ucp_Bhaiksuki,
+ ucp_Marchen,
+ ucp_Newa,
+ ucp_Osage,
+ ucp_Tangut,
+ ucp_Masaram_Gondi,
+ ucp_Nushu,
+ ucp_Soyombo,
+ ucp_Zanabazar_Square
};
-#endif
+#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
/* End of pcre2_ucp.h */
diff --git a/src/3rdparty/pcre2/src/pcre2_valid_utf.c b/src/3rdparty/pcre2/src/pcre2_valid_utf.c
index 2dfd8df34d..96e8bff993 100644
--- a/src/3rdparty/pcre2/src/pcre2_valid_utf.c
+++ b/src/3rdparty/pcre2/src/pcre2_valid_utf.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2017 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -93,8 +93,8 @@ Returns: == 0 if the string is a valid UTF string
int
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
{
-register PCRE2_SPTR p;
-register uint32_t c;
+PCRE2_SPTR p;
+uint32_t c;
/* ----------------- Check a UTF-8 string ----------------- */
@@ -133,7 +133,7 @@ PCRE2_ERROR_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
for (p = string; length > 0; p++)
{
- register uint32_t ab, d;
+ uint32_t ab, d;
c = *p;
length--;
@@ -142,20 +142,20 @@ for (p = string; length > 0; p++)
if (c < 0xc0) /* Isolated 10xx xxxx byte */
{
- *erroroffset = (int)(p - string);
+ *erroroffset = (PCRE2_SIZE)(p - string);
return PCRE2_ERROR_UTF8_ERR20;
}
if (c >= 0xfe) /* Invalid 0xfe or 0xff bytes */
{
- *erroroffset = (int)(p - string);
+ *erroroffset = (PCRE2_SIZE)(p - string);
return PCRE2_ERROR_UTF8_ERR21;
}
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes (1-5) */
if (length < ab) /* Missing bytes */
{
- *erroroffset = (int)(p - string);
+ *erroroffset = (PCRE2_SIZE)(p - string);
switch(ab - length)
{
case 1: return PCRE2_ERROR_UTF8_ERR1;
diff --git a/src/3rdparty/pcre2/src/sljit/sljitConfig.h b/src/3rdparty/pcre2/src/sljit/sljitConfig.h
index a548c37ab6..b65584a4af 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitConfig.h
+++ b/src/3rdparty/pcre2/src/sljit/sljitConfig.h
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -90,10 +90,20 @@
/* Executable code allocation:
If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should
- define both SLJIT_MALLOC_EXEC and SLJIT_FREE_EXEC. */
+ define SLJIT_MALLOC_EXEC, SLJIT_FREE_EXEC, and SLJIT_EXEC_OFFSET. */
#ifndef SLJIT_EXECUTABLE_ALLOCATOR
/* Enabled by default. */
#define SLJIT_EXECUTABLE_ALLOCATOR 1
+
+/* When SLJIT_PROT_EXECUTABLE_ALLOCATOR is enabled SLJIT uses
+ an allocator which does not set writable and executable
+ permission flags at the same time. The trade-of is increased
+ memory consumption and disabled dynamic code modifications. */
+#ifndef SLJIT_PROT_EXECUTABLE_ALLOCATOR
+/* Disabled by default. */
+#define SLJIT_PROT_EXECUTABLE_ALLOCATOR 0
+#endif
+
#endif
/* Force cdecl calling convention even if a better calling
diff --git a/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h b/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h
index 566c368063..cc0810fbd7 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h
+++ b/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -187,14 +187,6 @@
/* External function definitions. */
/**********************************/
-#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
-
-/* These libraries are needed for the macros below. */
-#include <stdlib.h>
-#include <string.h>
-
-#endif /* SLJIT_STD_MACROS_DEFINED */
-
/* General macros:
Note: SLJIT is designed to be independent from them as possible.
@@ -304,6 +296,13 @@
#define SLJIT_CACHE_FLUSH(from, to) \
sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from))
+#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
+
+/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */
+#define SLJIT_CACHE_FLUSH(from, to) \
+ ppc_cache_flush((from), (to))
+#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
+
#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)))
#define SLJIT_CACHE_FLUSH(from, to) \
@@ -316,13 +315,6 @@
#define SLJIT_CACHE_FLUSH(from, to) \
cacheflush((long)(from), (long)(to), 0)
-#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
-
-/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */
-#define SLJIT_CACHE_FLUSH(from, to) \
- ppc_cache_flush((from), (to))
-#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
-
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */
@@ -401,7 +393,9 @@ typedef double sljit_f64;
#ifndef SLJIT_W
/* Defining long constants. */
-#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
+#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+#define SLJIT_W(w) (w##l)
+#elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
#define SLJIT_W(w) (w##ll)
#else
#define SLJIT_W(w) (w)
@@ -545,6 +539,14 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr);
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
#define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size)
#define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr)
+
+#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR)
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
+#define SLJIT_EXEC_OFFSET(ptr) sljit_exec_offset(ptr)
+#else
+#define SLJIT_EXEC_OFFSET(ptr) 0
+#endif
+
#endif
/**********************************************/
@@ -553,37 +555,37 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-#define SLJIT_NUMBER_OF_REGISTERS 10
-#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
+#define SLJIT_NUMBER_OF_REGISTERS 12
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 9
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
-#define SLJIT_LOCALS_OFFSET_BASE ((2 + 4) * sizeof(sljit_sw))
+#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset)
#else
/* Maximum 3 arguments are passed on the stack, +1 for double alignment. */
-#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1 + 4) * sizeof(sljit_sw))
+#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset)
#endif /* SLJIT_X86_32_FASTCALL */
#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
#ifndef _WIN64
-#define SLJIT_NUMBER_OF_REGISTERS 12
+#define SLJIT_NUMBER_OF_REGISTERS 13
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6
-#define SLJIT_LOCALS_OFFSET_BASE (sizeof(sljit_sw))
+#define SLJIT_LOCALS_OFFSET_BASE 0
#else
-#define SLJIT_NUMBER_OF_REGISTERS 12
+#define SLJIT_NUMBER_OF_REGISTERS 13
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
-#define SLJIT_LOCALS_OFFSET_BASE ((4 + 2) * sizeof(sljit_sw))
+#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset)
#endif /* _WIN64 */
#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
-#define SLJIT_NUMBER_OF_REGISTERS 11
+#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#define SLJIT_LOCALS_OFFSET_BASE 0
#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
-#define SLJIT_NUMBER_OF_REGISTERS 11
-#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
+#define SLJIT_NUMBER_OF_REGISTERS 12
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#define SLJIT_LOCALS_OFFSET_BASE 0
#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
@@ -607,7 +609,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
-#define SLJIT_NUMBER_OF_REGISTERS 17
+#define SLJIT_NUMBER_OF_REGISTERS 21
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
#define SLJIT_LOCALS_OFFSET_BASE (4 * sizeof(sljit_sw))
@@ -663,7 +665,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
-#if !defined(SLJIT_ASSERT) || !defined(SLJIT_ASSERT_STOP)
+#if !defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE)
/* SLJIT_HALT_PROCESS must halt the process. */
#ifndef SLJIT_HALT_PROCESS
@@ -675,7 +677,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
#include <stdio.h>
-#endif /* !SLJIT_ASSERT || !SLJIT_ASSERT_STOP */
+#endif /* !SLJIT_ASSERT || !SLJIT_UNREACHABLE */
/* Feel free to redefine these two macros. */
#ifndef SLJIT_ASSERT
@@ -690,34 +692,33 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
#endif /* !SLJIT_ASSERT */
-#ifndef SLJIT_ASSERT_STOP
+#ifndef SLJIT_UNREACHABLE
-#define SLJIT_ASSERT_STOP() \
+#define SLJIT_UNREACHABLE() \
do { \
printf("Should never been reached " __FILE__ ":%d\n", __LINE__); \
SLJIT_HALT_PROCESS(); \
} while (0)
-#endif /* !SLJIT_ASSERT_STOP */
+#endif /* !SLJIT_UNREACHABLE */
#else /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */
/* Forcing empty, but valid statements. */
#undef SLJIT_ASSERT
-#undef SLJIT_ASSERT_STOP
+#undef SLJIT_UNREACHABLE
#define SLJIT_ASSERT(x) \
do { } while (0)
-#define SLJIT_ASSERT_STOP() \
+#define SLJIT_UNREACHABLE() \
do { } while (0)
#endif /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */
#ifndef SLJIT_COMPILE_ASSERT
-/* Should be improved eventually. */
#define SLJIT_COMPILE_ASSERT(x, description) \
- SLJIT_ASSERT(x)
+ switch(0) { case 0: case ((x) ? 1 : 0): break; }
#endif /* !SLJIT_COMPILE_ASSERT */
diff --git a/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c b/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c
index 54f05f5dd7..f5009788f6 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -86,7 +86,7 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
}
-static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
+static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
{
SLJIT_UNUSED_ARG(size);
VirtualFree(chunk, 0, MEM_RELEASE);
@@ -96,7 +96,7 @@ static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
{
- void* retval;
+ void *retval;
#ifdef MAP_ANON
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
@@ -111,7 +111,7 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
return (retval != MAP_FAILED) ? retval : NULL;
}
-static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size)
+static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
{
munmap(chunk, size);
}
@@ -180,8 +180,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
sljit_uw chunk_size;
allocator_grab_lock();
- if (size < sizeof(struct free_block))
- size = sizeof(struct free_block);
+ if (size < (64 - sizeof(struct block_header)))
+ size = (64 - sizeof(struct block_header));
size = ALIGN_SIZE(size);
free_block = free_blocks;
diff --git a/src/3rdparty/pcre2/src/sljit/sljitLir.c b/src/3rdparty/pcre2/src/sljit/sljitLir.c
index ec1781e4c7..c0bbb5201a 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitLir.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitLir.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -26,6 +26,14 @@
#include "sljitLir.h"
+#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
+
+/* These libraries are needed for the macros below. */
+#include <stdlib.h>
+#include <string.h>
+
+#endif /* SLJIT_STD_MACROS_DEFINED */
+
#define CHECK_ERROR() \
do { \
if (SLJIT_UNLIKELY(compiler->error)) \
@@ -76,14 +84,18 @@
#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+#define VARIABLE_FLAG_SHIFT (10)
+#define VARIABLE_FLAG_MASK (0x3f << VARIABLE_FLAG_SHIFT)
+#define GET_FLAG_TYPE(op) ((op) >> VARIABLE_FLAG_SHIFT)
+
#define GET_OPCODE(op) \
- ((op) & ~(SLJIT_I32_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))
+ ((op) & ~(SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK))
-#define GET_FLAGS(op) \
- ((op) & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C))
+#define HAS_FLAGS(op) \
+ ((op) & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))
#define GET_ALL_FLAGS(op) \
- ((op) & (SLJIT_I32_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))
+ ((op) & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK))
#define TYPE_CAST_NEEDED(op) \
(((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S16) || ((op) >= SLJIT_MOVU_U8 && (op) <= SLJIT_MOVU_S16))
@@ -112,10 +124,10 @@
/* SLJIT_REWRITABLE_JUMP is 0x1000. */
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
-# define PATCH_MB 0x4
-# define PATCH_MW 0x8
+# define PATCH_MB 0x4
+# define PATCH_MW 0x8
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-# define PATCH_MD 0x10
+# define PATCH_MD 0x10
#endif
#endif
@@ -242,9 +254,21 @@
#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
+
+#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR)
+#include "sljitProtExecAllocator.c"
+#else
#include "sljitExecAllocator.c"
#endif
+#endif
+
+#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR)
+#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr) + (exec_offset))
+#else
+#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr))
+#endif
+
/* Argument checking features. */
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
@@ -318,7 +342,7 @@
/* Public functions */
/* --------------------------------------------------------------------- */
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
#define SLJIT_NEEDS_COMPILER_INIT 1
static sljit_s32 compiler_initialized = 0;
/* A thread safe initialization. */
@@ -345,6 +369,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo
int_op_and_single_op_must_be_the_same);
SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_F32_OP,
rewritable_jump_and_single_op_must_not_be_the_same);
+ SLJIT_COMPILE_ASSERT(!(SLJIT_EQUAL & 0x1) && !(SLJIT_LESS & 0x1) && !(SLJIT_EQUAL_F64 & 0x1) && !(SLJIT_JUMP & 0x1),
+ conditional_flags_must_be_even_numbers);
/* Only the non-zero members must be set. */
compiler->error = SLJIT_SUCCESS;
@@ -479,6 +505,18 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw
}
}
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags)
+{
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(current_flags);
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ if ((current_flags & ~(VARIABLE_FLAG_MASK | SLJIT_I32_OP | SLJIT_SET_Z)) == 0) {
+ compiler->last_flags = GET_FLAG_TYPE(current_flags) | (current_flags & (SLJIT_I32_OP | SLJIT_SET_Z));
+ }
+#endif
+}
+
/* --------------------------------------------------------------------- */
/* Private functions */
/* --------------------------------------------------------------------- */
@@ -624,65 +662,6 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp
(((exp) & SLJIT_MEM) && (((exp) & REG_MASK) == reg || OFFS_REG(exp) == reg))
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
-#define FUNCTION_CHECK_OP() \
- CHECK_ARGUMENT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \
- switch (GET_OPCODE(op)) { \
- case SLJIT_NOT: \
- case SLJIT_CLZ: \
- case SLJIT_AND: \
- case SLJIT_OR: \
- case SLJIT_XOR: \
- case SLJIT_SHL: \
- case SLJIT_LSHR: \
- case SLJIT_ASHR: \
- CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C))); \
- break; \
- case SLJIT_NEG: \
- CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \
- break; \
- case SLJIT_MUL: \
- CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \
- break; \
- case SLJIT_ADD: \
- CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S))); \
- break; \
- case SLJIT_SUB: \
- break; \
- case SLJIT_ADDC: \
- case SLJIT_SUBC: \
- CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))); \
- break; \
- case SLJIT_BREAKPOINT: \
- case SLJIT_NOP: \
- case SLJIT_LMUL_UW: \
- case SLJIT_LMUL_SW: \
- case SLJIT_MOV: \
- case SLJIT_MOV_U32: \
- case SLJIT_MOV_P: \
- case SLJIT_MOVU: \
- case SLJIT_MOVU_U32: \
- case SLJIT_MOVU_P: \
- /* Nothing allowed */ \
- CHECK_ARGUMENT(!(op & (SLJIT_I32_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
- break; \
- default: \
- /* Only SLJIT_I32_OP or SLJIT_F32_OP is allowed. */ \
- CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
- break; \
- }
-
-#define FUNCTION_CHECK_FOP() \
- CHECK_ARGUMENT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \
- switch (GET_OPCODE(op)) { \
- case SLJIT_CMP_F64: \
- CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
- CHECK_ARGUMENT((op & (SLJIT_SET_E | SLJIT_SET_S))); \
- break; \
- default: \
- /* Only SLJIT_I32_OP or SLJIT_F32_OP is allowed. */ \
- CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
- break; \
- }
#define FUNCTION_CHECK_IS_REG(r) \
(((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) || \
@@ -718,12 +697,12 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp
CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \
CHECK_ARGUMENT(!((i) & ~0x3)); \
} \
- CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
+ CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK))); \
}
-#define FUNCTION_CHECK_DST(p, i) \
+#define FUNCTION_CHECK_DST(p, i, unused) \
CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); \
- if (FUNCTION_CHECK_IS_REG_OR_UNUSED(p)) \
+ if (FUNCTION_CHECK_IS_REG(p) || ((unused) && (p) == SLJIT_UNUSED)) \
CHECK_ARGUMENT((i) == 0); \
else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
CHECK_ARGUMENT((i) >= 0 && (i) < compiler->logical_local_size); \
@@ -737,7 +716,7 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp
CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \
CHECK_ARGUMENT(!((i) & ~0x3)); \
} \
- CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
+ CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK))); \
}
#define FUNCTION_FCHECK(p, i) \
@@ -757,15 +736,7 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp
CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \
CHECK_ARGUMENT(((p) & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SP) && !(i & ~0x3)); \
} \
- CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
- }
-
-#define FUNCTION_CHECK_OP1() \
- if (GET_OPCODE(op) >= SLJIT_MOVU && GET_OPCODE(op) <= SLJIT_MOVU_P) { \
- CHECK_ARGUMENT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_SP); \
- CHECK_ARGUMENT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_SP); \
- if ((src & SLJIT_MEM) && (src & REG_MASK)) \
- CHECK_ARGUMENT((dst & REG_MASK) != (src & REG_MASK) && OFFS_REG(dst) != (src & REG_MASK)); \
+ CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK))); \
}
#endif /* SLJIT_ARGUMENT_CHECKS */
@@ -791,8 +762,10 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *comp
do { \
if ((r) < (SLJIT_R0 + compiler->scratches)) \
fprintf(compiler->verbose, "r%d", (r) - SLJIT_R0); \
- else \
+ else if ((r) != SLJIT_SP) \
fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - (r)); \
+ else \
+ fprintf(compiler->verbose, "sp"); \
} while (0)
#define sljit_verbose_param(compiler, p, i) \
@@ -885,6 +858,7 @@ static char* jump_names[] = {
(char*)"sig_greater", (char*)"sig_less_equal",
(char*)"overflow", (char*)"not_overflow",
(char*)"mul_overflow", (char*)"mul_not_overflow",
+ (char*)"carry", (char*)"",
(char*)"equal", (char*)"not_equal",
(char*)"less", (char*)"greater_equal",
(char*)"greater", (char*)"less_equal",
@@ -929,7 +903,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil
SLJIT_UNUSED_ARG(compiler);
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT(!(options & ~SLJIT_DOUBLE_ALIGNMENT));
+ CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT));
CHECK_ARGUMENT(args >= 0 && args <= 3);
CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS);
CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS);
@@ -939,6 +913,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil
CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
+ compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose))
@@ -952,13 +927,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi
sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
- if (SLJIT_UNLIKELY(compiler->skip_checks)) {
- compiler->skip_checks = 0;
- CHECK_RETURN_OK;
- }
-
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT(!(options & ~SLJIT_DOUBLE_ALIGNMENT));
+ CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT));
CHECK_ARGUMENT(args >= 0 && args <= 3);
CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS);
CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS);
@@ -968,6 +938,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi
CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
+ compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose))
@@ -987,6 +958,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compi
}
else
CHECK_ARGUMENT(src == 0 && srcw == 0);
+ compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1005,7 +977,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compi
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- FUNCTION_CHECK_DST(dst, dstw);
+ FUNCTION_CHECK_DST(dst, dstw, 0);
+ compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1021,6 +994,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_return(struct sljit_
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
FUNCTION_CHECK_SRC(src, srcw);
+ compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1038,6 +1012,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler
CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LMUL_SW)
|| ((op & ~SLJIT_I32_OP) >= SLJIT_DIVMOD_UW && (op & ~SLJIT_I32_OP) <= SLJIT_DIV_SW));
CHECK_ARGUMENT(op < SLJIT_LMUL_UW || compiler->scratches >= 2);
+ if (op >= SLJIT_LMUL_UW)
+ compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose))
@@ -1063,10 +1039,49 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CLZ);
- FUNCTION_CHECK_OP();
+
+ switch (GET_OPCODE(op)) {
+ case SLJIT_NOT:
+ /* Only SLJIT_I32_OP and SLJIT_SET_Z are allowed. */
+ CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK));
+ break;
+ case SLJIT_NEG:
+ CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)
+ || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW);
+ break;
+ case SLJIT_MOV:
+ case SLJIT_MOV_U32:
+ case SLJIT_MOV_P:
+ case SLJIT_MOVU:
+ case SLJIT_MOVU_U32:
+ case SLJIT_MOVU_P:
+ /* Nothing allowed */
+ CHECK_ARGUMENT(!(op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
+ break;
+ default:
+ /* Only SLJIT_I32_OP is allowed. */
+ CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
+ break;
+ }
+
+ FUNCTION_CHECK_DST(dst, dstw, 1);
FUNCTION_CHECK_SRC(src, srcw);
- FUNCTION_CHECK_DST(dst, dstw);
- FUNCTION_CHECK_OP1();
+
+ if (GET_OPCODE(op) >= SLJIT_NOT)
+ compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z));
+ else if (GET_OPCODE(op) >= SLJIT_MOVU) {
+ CHECK_ARGUMENT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_SP);
+ CHECK_ARGUMENT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_SP);
+ if ((src & REG_MASK) != SLJIT_UNUSED) {
+ CHECK_ARGUMENT((src & REG_MASK) != (dst & REG_MASK) && (src & REG_MASK) != OFFS_REG(dst));
+ CHECK_ARGUMENT((src & OFFS_REG_MASK) == SLJIT_UNUSED || srcw == 0);
+ }
+ if ((dst & REG_MASK) != SLJIT_UNUSED) {
+ CHECK_ARGUMENT((dst & REG_MASK) != OFFS_REG(src));
+ CHECK_ARGUMENT((dst & OFFS_REG_MASK) == SLJIT_UNUSED || dstw == 0);
+ }
+ compiler->last_flags = 0;
+ }
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1077,9 +1092,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler
}
else
{
- fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_I32_OP) ? "" : "32",
- !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s",
- !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k");
+ fprintf(compiler->verbose, " %s%s%s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_I32_OP) ? "" : "32",
+ !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".",
+ !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]);
}
sljit_verbose_param(compiler, dst, dstw);
@@ -1103,16 +1118,53 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ASHR);
- FUNCTION_CHECK_OP();
+
+ switch (GET_OPCODE(op)) {
+ case SLJIT_AND:
+ case SLJIT_OR:
+ case SLJIT_XOR:
+ case SLJIT_SHL:
+ case SLJIT_LSHR:
+ case SLJIT_ASHR:
+ CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK));
+ break;
+ case SLJIT_MUL:
+ CHECK_ARGUMENT(!(op & SLJIT_SET_Z));
+ CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)
+ || GET_FLAG_TYPE(op) == SLJIT_MUL_OVERFLOW);
+ break;
+ case SLJIT_ADD:
+ CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)
+ || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)
+ || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW);
+ break;
+ case SLJIT_SUB:
+ CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)
+ || (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_OVERFLOW)
+ || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY));
+ break;
+ case SLJIT_ADDC:
+ case SLJIT_SUBC:
+ CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)
+ || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY));
+ CHECK_ARGUMENT((compiler->last_flags & 0xff) == GET_FLAG_TYPE(SLJIT_SET_CARRY));
+ CHECK_ARGUMENT((op & SLJIT_I32_OP) == (compiler->last_flags & SLJIT_I32_OP));
+ break;
+ default:
+ SLJIT_UNREACHABLE();
+ break;
+ }
+
+ FUNCTION_CHECK_DST(dst, dstw, 1);
FUNCTION_CHECK_SRC(src1, src1w);
FUNCTION_CHECK_SRC(src2, src2w);
- FUNCTION_CHECK_DST(dst, dstw);
+ compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z));
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
- fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_I32_OP) ? "" : "32",
- !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s",
- !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k");
+ fprintf(compiler->verbose, " %s%s%s%s%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_I32_OP) ? "" : "32",
+ !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".",
+ !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]);
sljit_verbose_param(compiler, dst, dstw);
fprintf(compiler->verbose, ", ");
sljit_verbose_param(compiler, src1, src1w);
@@ -1153,6 +1205,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_co
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(instruction);
+
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
CHECK_ARGUMENT(size > 0 && size < 16);
#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
@@ -1162,6 +1215,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_co
CHECK_ARGUMENT(size == 4 && (((sljit_sw)instruction) & 0x3) == 0);
#endif
+ compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1184,9 +1238,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compile
}
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT(sljit_is_fpu_available());
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV_F64 && GET_OPCODE(op) <= SLJIT_ABS_F64);
- FUNCTION_CHECK_FOP();
+ CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
FUNCTION_FCHECK(src, srcw);
FUNCTION_FCHECK(dst, dstw);
#endif
@@ -1212,22 +1266,31 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z));
+#endif
+
if (SLJIT_UNLIKELY(compiler->skip_checks)) {
compiler->skip_checks = 0;
CHECK_RETURN_OK;
}
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT(sljit_is_fpu_available());
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_CMP_F64);
- FUNCTION_CHECK_FOP();
+ CHECK_ARGUMENT(!(op & SLJIT_SET_Z));
+ CHECK_ARGUMENT((op & VARIABLE_FLAG_MASK)
+ || (GET_FLAG_TYPE(op) >= SLJIT_EQUAL_F64 && GET_FLAG_TYPE(op) <= SLJIT_ORDERED_F64));
FUNCTION_FCHECK(src1, src1w);
FUNCTION_FCHECK(src2, src2w);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
- fprintf(compiler->verbose, " %s%s%s%s ", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_F32_OP) ? ".f32" : ".f64",
- (op & SLJIT_SET_E) ? ".e" : "", (op & SLJIT_SET_S) ? ".s" : "");
+ fprintf(compiler->verbose, " %s%s", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_F32_OP) ? ".f32" : ".f64");
+ if (op & VARIABLE_FLAG_MASK) {
+ fprintf(compiler->verbose, ".%s_f", jump_names[GET_FLAG_TYPE(op)]);
+ }
+ fprintf(compiler->verbose, " ");
sljit_verbose_fparam(compiler, src1, src1w);
fprintf(compiler->verbose, ", ");
sljit_verbose_fparam(compiler, src2, src2w);
@@ -1247,11 +1310,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_sw_from_f64(str
}
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT(sljit_is_fpu_available());
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CONV_S32_FROM_F64);
- FUNCTION_CHECK_FOP();
+ CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
FUNCTION_FCHECK(src, srcw);
- FUNCTION_CHECK_DST(dst, dstw);
+ FUNCTION_CHECK_DST(dst, dstw, 0);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1277,9 +1340,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_sw(str
}
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT(sljit_is_fpu_available());
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_SW && GET_OPCODE(op) <= SLJIT_CONV_F64_FROM_S32);
- FUNCTION_CHECK_FOP();
+ CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
FUNCTION_CHECK_SRC(src, srcw);
FUNCTION_FCHECK(dst, dstw);
#endif
@@ -1303,9 +1366,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compile
sljit_s32 src2, sljit_sw src2w)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT(sljit_is_fpu_available());
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD_F64 && GET_OPCODE(op) <= SLJIT_DIV_F64);
- FUNCTION_CHECK_FOP();
+ CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
FUNCTION_FCHECK(src1, src1w);
FUNCTION_FCHECK(src2, src2w);
FUNCTION_FCHECK(dst, dstw);
@@ -1328,6 +1391,15 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_label(struct sljit_compil
{
SLJIT_UNUSED_ARG(compiler);
+ if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+ compiler->skip_checks = 0;
+ CHECK_RETURN_OK;
+ }
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->last_flags = 0;
+#endif
+
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose))
fprintf(compiler->verbose, "label:\n");
@@ -1344,9 +1416,20 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP)));
+ CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1));
CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_CALL3);
CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_I32_OP));
CHECK_ARGUMENT((type & 0xff) <= SLJIT_CALL0 || ((type & 0xff) - SLJIT_CALL0) <= compiler->scratches);
+
+ if ((type & 0xff) < SLJIT_JUMP) {
+ if ((type & 0xff) <= SLJIT_NOT_ZERO)
+ CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
+ else
+ CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff)
+ || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)
+ || ((type & 0xff) == SLJIT_MUL_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_MUL_OVERFLOW));
+ CHECK_ARGUMENT((type & SLJIT_I32_OP) == (compiler->last_flags & SLJIT_I32_OP));
+ }
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose))
@@ -1365,6 +1448,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler
CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_SIG_LESS_EQUAL);
FUNCTION_CHECK_SRC(src1, src1w);
FUNCTION_CHECK_SRC(src2, src2w);
+ compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1384,11 +1468,12 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compile
sljit_s32 src2, sljit_sw src2w)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT(sljit_is_fpu_available());
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_F32_OP)));
CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL_F64 && (type & 0xff) <= SLJIT_ORDERED_F64);
FUNCTION_FCHECK(src1, src1w);
FUNCTION_FCHECK(src2, src2w);
+ compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1405,6 +1490,10 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compile
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->last_flags = 0;
+#endif
+
if (SLJIT_UNLIKELY(compiler->skip_checks)) {
compiler->skip_checks = 0;
CHECK_RETURN_OK;
@@ -1427,46 +1516,82 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compil
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
- CHECK_ARGUMENT(op == SLJIT_MOV || GET_OPCODE(op) == SLJIT_MOV_U32 || GET_OPCODE(op) == SLJIT_MOV_S32
+ CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1));
+ CHECK_ARGUMENT(op == SLJIT_MOV || op == SLJIT_MOV32
|| (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR));
- CHECK_ARGUMENT((op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C)) == 0);
- CHECK_ARGUMENT((op & (SLJIT_SET_E | SLJIT_KEEP_FLAGS)) != (SLJIT_SET_E | SLJIT_KEEP_FLAGS));
- if (GET_OPCODE(op) < SLJIT_ADD) {
- CHECK_ARGUMENT(src == SLJIT_UNUSED && srcw == 0);
- } else {
- CHECK_ARGUMENT(src == dst && srcw == dstw);
- }
- FUNCTION_CHECK_DST(dst, dstw);
+ CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK));
+
+ if ((type & 0xff) <= SLJIT_NOT_ZERO)
+ CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
+ else
+ CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff)
+ || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)
+ || ((type & 0xff) == SLJIT_MUL_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_MUL_OVERFLOW));
+
+ FUNCTION_CHECK_DST(dst, dstw, 0);
+
+ if (GET_OPCODE(op) >= SLJIT_ADD)
+ compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z));
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
- fprintf(compiler->verbose, " flags %s%s%s%s, ",
- !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k",
+ fprintf(compiler->verbose, " flags%s %s%s, ",
+ !(op & SLJIT_SET_Z) ? "" : ".z",
GET_OPCODE(op) < SLJIT_OP2_BASE ? "mov" : op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE],
GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_I32_OP) ? "32" : ""));
sljit_verbose_param(compiler, dst, dstw);
- if (src != SLJIT_UNUSED) {
- fprintf(compiler->verbose, ", ");
- sljit_verbose_param(compiler, src, srcw);
- }
fprintf(compiler->verbose, ", %s%s\n", jump_names[type & 0xff], JUMP_POSTFIX(type));
}
#endif
CHECK_RETURN_OK;
}
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_reg,
+ sljit_s32 src, sljit_sw srcw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
+ CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
+ if (src != SLJIT_IMM) {
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src));
+ CHECK_ARGUMENT(srcw == 0);
+ }
+
+ if ((type & 0xff) <= SLJIT_NOT_ZERO)
+ CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
+ else
+ CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff)
+ || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)
+ || ((type & 0xff) == SLJIT_MUL_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_MUL_OVERFLOW));
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " cmov%s %s%s, ",
+ !(dst_reg & SLJIT_I32_OP) ? "" : ".i",
+ jump_names[type & 0xff], JUMP_POSTFIX(type));
+ sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_param(compiler, src, srcw);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{
+ /* Any offset is allowed. */
SLJIT_UNUSED_ARG(offset);
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- FUNCTION_CHECK_DST(dst, dstw);
+ FUNCTION_CHECK_DST(dst, dstw, 0);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1483,7 +1608,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compil
SLJIT_UNUSED_ARG(init_value);
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- FUNCTION_CHECK_DST(dst, dstw);
+ FUNCTION_CHECK_DST(dst, dstw, 0);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1544,6 +1669,44 @@ static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *comp
return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw);
}
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
+ || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
+ || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
+ || ((defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) && !(defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1))
+
+static SLJIT_INLINE sljit_s32 sljit_emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_reg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ struct sljit_label *label;
+ struct sljit_jump *jump;
+ sljit_s32 op = (dst_reg & SLJIT_I32_OP) ? SLJIT_MOV32 : SLJIT_MOV;
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+ jump = sljit_emit_jump(compiler, type ^ 0x1);
+ FAIL_IF(!jump);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+ FAIL_IF(sljit_emit_op1(compiler, op, dst_reg & ~SLJIT_I32_OP, 0, src, srcw));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ compiler->skip_checks = 1;
+#endif
+ label = sljit_emit_label(compiler);
+ FAIL_IF(!label);
+ sljit_set_label(jump, label);
+ return SLJIT_SUCCESS;
+}
+
+#endif
+
/* CPU description section */
#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
@@ -1645,6 +1808,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
condition = SLJIT_SIG_GREATER_EQUAL;
break;
}
+
type = condition | (type & (SLJIT_I32_OP | SLJIT_REWRITABLE_JUMP));
tmp_src = src1;
src1 = src2;
@@ -1655,11 +1819,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
}
if (condition <= SLJIT_NOT_ZERO)
- flags = SLJIT_SET_E;
- else if (condition <= SLJIT_LESS_EQUAL)
- flags = SLJIT_SET_U;
+ flags = SLJIT_SET_Z;
else
- flags = SLJIT_SET_S;
+ flags = condition << VARIABLE_FLAG_SHIFT;
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
@@ -1671,38 +1833,31 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
- return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP));
+ return sljit_emit_jump(compiler, condition | (type & (SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP)));
}
+#endif
+
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
- sljit_s32 flags, condition;
-
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w));
- condition = type & 0xff;
- flags = (condition <= SLJIT_NOT_EQUAL_F64) ? SLJIT_SET_E : SLJIT_SET_S;
- if (type & SLJIT_F32_OP)
- flags |= SLJIT_F32_OP;
-
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
- sljit_emit_fop1(compiler, SLJIT_CMP_F64 | flags, src1, src1w, src2, src2w);
+ sljit_emit_fop1(compiler, SLJIT_CMP_F64 | ((type & 0xff) << VARIABLE_FLAG_SHIFT) | (type & SLJIT_I32_OP), src1, src1w, src2, src2w);
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
- return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP));
+ return sljit_emit_jump(compiler, type);
}
-#endif
-
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
@@ -1716,7 +1871,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c
compiler->skip_checks = 1;
#endif
if (offset != 0)
- return sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
+ return sljit_emit_op2(compiler, SLJIT_ADD, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0);
}
@@ -1731,23 +1886,30 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
return "unsupported";
}
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void)
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data)
{
- SLJIT_ASSERT_STOP();
+ SLJIT_UNUSED_ARG(allocator_data);
+ SLJIT_UNREACHABLE();
return NULL;
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler)
{
SLJIT_UNUSED_ARG(compiler);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler)
+{
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNREACHABLE();
}
SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size)
{
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(size);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return NULL;
}
@@ -1756,21 +1918,28 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *comp
{
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(verbose);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
}
#endif
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
{
SLJIT_UNUSED_ARG(compiler);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return NULL;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+ SLJIT_UNUSED_ARG(feature_type);
+ SLJIT_UNREACHABLE();
+ return 0;
+}
+
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code)
{
SLJIT_UNUSED_ARG(code);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
@@ -1785,7 +1954,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
SLJIT_UNUSED_ARG(fscratches);
SLJIT_UNUSED_ARG(fsaveds);
SLJIT_UNUSED_ARG(local_size);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
@@ -1801,7 +1970,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
SLJIT_UNUSED_ARG(fscratches);
SLJIT_UNUSED_ARG(fsaveds);
SLJIT_UNUSED_ARG(local_size);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
@@ -1811,7 +1980,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
SLJIT_UNUSED_ARG(op);
SLJIT_UNUSED_ARG(src);
SLJIT_UNUSED_ARG(srcw);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
@@ -1820,7 +1989,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(dst);
SLJIT_UNUSED_ARG(dstw);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
@@ -1829,7 +1998,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(src);
SLJIT_UNUSED_ARG(srcw);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
@@ -1837,7 +2006,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
{
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(op);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
@@ -1851,7 +2020,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
SLJIT_UNUSED_ARG(dstw);
SLJIT_UNUSED_ARG(src);
SLJIT_UNUSED_ARG(srcw);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
@@ -1868,13 +2037,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
SLJIT_UNUSED_ARG(src1w);
SLJIT_UNUSED_ARG(src2);
SLJIT_UNUSED_ARG(src2w);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
{
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return reg;
}
@@ -1884,14 +2053,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(instruction);
SLJIT_UNUSED_ARG(size);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags)
{
- SLJIT_ASSERT_STOP();
- return 0;
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(current_flags);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1904,7 +2073,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
SLJIT_UNUSED_ARG(dstw);
SLJIT_UNUSED_ARG(src);
SLJIT_UNUSED_ARG(srcw);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
@@ -1921,14 +2090,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
SLJIT_UNUSED_ARG(src1w);
SLJIT_UNUSED_ARG(src2);
SLJIT_UNUSED_ARG(src2w);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
{
SLJIT_UNUSED_ARG(compiler);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return NULL;
}
@@ -1936,7 +2105,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
{
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(type);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return NULL;
}
@@ -1950,7 +2119,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
SLJIT_UNUSED_ARG(src1w);
SLJIT_UNUSED_ARG(src2);
SLJIT_UNUSED_ARG(src2w);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return NULL;
}
@@ -1964,7 +2133,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile
SLJIT_UNUSED_ARG(src1w);
SLJIT_UNUSED_ARG(src2);
SLJIT_UNUSED_ARG(src2w);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return NULL;
}
@@ -1972,14 +2141,14 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sl
{
SLJIT_UNUSED_ARG(jump);
SLJIT_UNUSED_ARG(label);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target)
{
SLJIT_UNUSED_ARG(jump);
SLJIT_UNUSED_ARG(target);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
@@ -1988,23 +2157,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_UNUSED_ARG(type);
SLJIT_UNUSED_ARG(src);
SLJIT_UNUSED_ARG(srcw);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(op);
SLJIT_UNUSED_ARG(dst);
SLJIT_UNUSED_ARG(dstw);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNREACHABLE();
+ return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_reg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(dst_reg);
SLJIT_UNUSED_ARG(src);
SLJIT_UNUSED_ARG(srcw);
- SLJIT_UNUSED_ARG(type);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
@@ -2014,7 +2193,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c
SLJIT_UNUSED_ARG(dst);
SLJIT_UNUSED_ARG(dstw);
SLJIT_UNUSED_ARG(offset);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
}
@@ -2024,22 +2203,24 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
SLJIT_UNUSED_ARG(dst);
SLJIT_UNUSED_ARG(dstw);
SLJIT_UNUSED_ARG(initval);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return NULL;
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
SLJIT_UNUSED_ARG(addr);
- SLJIT_UNUSED_ARG(new_addr);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNUSED_ARG(new_target);
+ SLJIT_UNUSED_ARG(executable_offset);
+ SLJIT_UNREACHABLE();
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
SLJIT_UNUSED_ARG(addr);
SLJIT_UNUSED_ARG(new_constant);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNUSED_ARG(executable_offset);
+ SLJIT_UNREACHABLE();
}
#endif
diff --git a/src/3rdparty/pcre2/src/sljit/sljitLir.h b/src/3rdparty/pcre2/src/sljit/sljitLir.h
index df69b8656f..470c84f592 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitLir.h
+++ b/src/3rdparty/pcre2/src/sljit/sljitLir.h
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -99,6 +99,8 @@ of sljitConfigInternal.h */
#define SLJIT_ERR_UNSUPPORTED 4
/* An ivalid argument is passed to any SLJIT function. */
#define SLJIT_ERR_BAD_ARGUMENT 5
+/* Dynamic code modification is not enabled. */
+#define SLJIT_ERR_DYN_CODE_MOD 6
/* --------------------------------------------------------------------- */
/* Registers */
@@ -118,8 +120,8 @@ of sljitConfigInternal.h */
If an architecture provides two scratch and three saved registers,
its scratch and saved register sets are the following:
- R0 | [S4] | R0 and S4 represent the same physical register
- R1 | [S3] | R1 and S3 represent the same physical register
+ R0 | | R0 is always a scratch register
+ R1 | | R1 is always a scratch register
[R2] | S2 | R2 and S2 represent the same physical register
[R3] | S1 | R3 and S1 represent the same physical register
[R4] | S0 | R4 and S0 represent the same physical register
@@ -127,38 +129,35 @@ of sljitConfigInternal.h */
Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS would be 2 and
SLJIT_NUMBER_OF_SAVED_REGISTERS would be 3 for this architecture.
- Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 10
- and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 5. However, 4 registers
+ Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 12
+ and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 6. However, 6 registers
are virtual on x86-32. See below.
- The purpose of this definition is convenience. Although a register
- is either scratch register or saved register, SLJIT allows accessing
- them from the other set. For example, four registers can be used as
- scratch registers and the fifth one as saved register on the architecture
- above. Of course the last two scratch registers (R2 and R3) from this
- four will be saved on the stack, because they are defined as saved
- registers in the application binary interface. Still R2 and R3 can be
- used for referencing to these registers instead of S2 and S1, which
- makes easier to write platform independent code. Scratch registers
- can be saved registers in a similar way, but these extra saved
- registers will not be preserved across function calls! Hence the
- application must save them on those platforms, where the number of
- saved registers is too low. This can be done by copy them onto
- the stack and restore them after a function call.
+ The purpose of this definition is convenience: saved registers can
+ be used as extra scratch registers. For example four registers can
+ be specified as scratch registers and the fifth one as saved register
+ on the CPU above and any user code which requires four scratch
+ registers can run unmodified. The SLJIT compiler automatically saves
+ the content of the two extra scrath register on the stack. Scratch
+ registers can also be preserved by saving their value on the stack
+ but this needs to be done manually.
Note: To emphasize that registers assigned to R2-R4 are saved
- registers, they are enclosed by square brackets. S3-S4
- are marked in a similar way.
+ registers, they are enclosed by square brackets.
Note: sljit_emit_enter and sljit_set_context defines whether a register
is S or R register. E.g: when 3 scratches and 1 saved is mapped
by sljit_emit_enter, the allowed register set will be: R0-R2 and
S0. Although S2 is mapped to the same position as R2, it does not
- available in the current configuration. Furthermore the R3 (S1)
- register does not available as well.
+ available in the current configuration. Furthermore the S1 register
+ is not available at all.
*/
-/* When SLJIT_UNUSED is specified as destination, the result is discarded. */
+/* When SLJIT_UNUSED is specified as the destination of sljit_emit_op1 and
+ and sljit_emit_op2 operations the result is discarded. If no status
+ flags are set, no instructions are emitted for these operations. Data
+ prefetch is a special exception, see SLJIT_MOV operation. Other SLJIT
+ operations do not support SLJIT_UNUSED as a destination operand. */
#define SLJIT_UNUSED 0
/* Scratch registers. */
@@ -323,19 +322,22 @@ struct sljit_compiler {
sljit_s32 local_size;
/* Code size. */
sljit_uw size;
- /* For statistical purposes. */
+ /* Relative offset of the executable mapping from the writable mapping. */
+ sljit_uw executable_offset;
+ /* Executable size for statistical purposes. */
sljit_uw executable_size;
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
sljit_s32 args;
+ sljit_s32 locals_offset;
+ sljit_s32 saveds_offset;
#endif
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
sljit_s32 mode32;
+#ifdef _WIN64
+ sljit_s32 locals_offset;
#endif
-
-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
- sljit_s32 flags_saved;
#endif
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
@@ -352,13 +354,6 @@ struct sljit_compiler {
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
/* Temporary fields. */
sljit_uw shift_imm;
- sljit_s32 cache_arg;
- sljit_sw cache_argw;
-#endif
-
-#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
- sljit_s32 cache_arg;
- sljit_sw cache_argw;
#endif
#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
@@ -395,6 +390,9 @@ struct sljit_compiler {
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
|| (defined SLJIT_DEBUG && SLJIT_DEBUG)
+ /* Flags specified by the last arithmetic instruction.
+ It contains the type of the variable flag. */
+ sljit_s32 last_flags;
/* Local size passed to the functions. */
sljit_s32 logical_local_size;
#endif
@@ -402,6 +400,7 @@ struct sljit_compiler {
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
|| (defined SLJIT_DEBUG && SLJIT_DEBUG) \
|| (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ /* Trust arguments when the API function is called. */
sljit_s32 skip_checks;
#endif
};
@@ -455,19 +454,67 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compile
SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose);
#endif
+/*
+ Create executable code from the sljit instruction stream. This is the final step
+ of the code generation so no more instructions can be added after this call.
+*/
+
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler);
+
+/* Free executable code. */
+
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code);
/*
- After the machine code generation is finished we can retrieve the allocated
- executable memory size, although this area may not be fully filled with
- instructions depending on some optimizations. This function is useful only
- for statistical purposes.
+ When the protected executable allocator is used the JIT code is mapped
+ twice. The first mapping has read/write and the second mapping has read/exec
+ permissions. This function returns with the relative offset of the executable
+ mapping using the writable mapping as the base after the machine code is
+ successfully generated. The returned value is always 0 for the normal executable
+ allocator, since it uses only one mapping with read/write/exec permissions.
+ Dynamic code modifications requires this value.
+
+ Before a successful code generation, this function returns with 0.
+*/
+static SLJIT_INLINE sljit_sw sljit_get_executable_offset(struct sljit_compiler *compiler) { return compiler->executable_offset; }
+
+/*
+ The executable memory consumption of the generated code can be retrieved by
+ this function. The returned value can be used for statistical purposes.
Before a successful code generation, this function returns with 0.
*/
static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; }
+/* Returns with non-zero if the feature or limitation type passed as its
+ argument is present on the current CPU.
+
+ Some features (e.g. floating point operations) require hardware (CPU)
+ support while others (e.g. move with update) are emulated if not available.
+ However even if a feature is emulated, specialized code paths can be faster
+ than the emulation. Some limitations are emulated as well so their general
+ case is supported but it has extra performance costs. */
+
+/* [Not emulated] Floating-point support is available. */
+#define SLJIT_HAS_FPU 0
+/* [Limitation] Some registers are virtual registers. */
+#define SLJIT_HAS_VIRTUAL_REGISTERS 1
+/* [Emulated] Some forms of move with pre update is supported. */
+#define SLJIT_HAS_PRE_UPDATE 2
+/* [Emulated] Count leading zero is supported. */
+#define SLJIT_HAS_CLZ 3
+/* [Emulated] Conditional move is supported. */
+#define SLJIT_HAS_CMOV 4
+/* [Limitation] [Emulated] Shifting with register is limited to SLJIT_PREF_SHIFT_REG. */
+#define SLJIT_HAS_PREF_SHIFT_REG 5
+
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+/* [Not emulated] SSE2 support is available on x86. */
+#define SLJIT_HAS_SSE2 100
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type);
+
/* Instruction generation. Returns with any error code. If there is no
error, they return with SLJIT_SUCCESS. */
@@ -512,8 +559,8 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler
*/
/* The absolute address returned by sljit_get_local_base with
-offset 0 is aligned to sljit_d. Otherwise it is aligned to sljit_uw. */
-#define SLJIT_DOUBLE_ALIGNMENT 0x00000001
+offset 0 is aligned to sljit_f64. Otherwise it is aligned to sljit_sw. */
+#define SLJIT_F64_ALIGNMENT 0x00000001
/* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */
#define SLJIT_MAX_LOCAL_SIZE 65536
@@ -555,7 +602,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
and setting up a new stack frame would cost too much performance. However, it is still
possible to return to the address of the caller (or anywhere else). */
-/* Note: flags are not changed (unlike sljit_emit_enter / sljit_emit_return). */
+/* Note: may destroy flags. */
/* Note: although sljit_emit_fast_return could be replaced by an ijump, it is not suggested,
since many architectures do clever branch prediction on call / return instruction pairs. */
@@ -624,57 +671,97 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
#define SLJIT_MEM2(r1, r2) (SLJIT_MEM | (r1) | ((r2) << 8))
#define SLJIT_IMM 0x40
-/* Set 32 bit operation mode (I) on 64 bit CPUs. This flag is ignored on 32
- bit CPUs. When this flag is set for an arithmetic operation, only the
- lower 32 bit of the input register(s) are used, and the CPU status flags
- are set according to the 32 bit result. Although the higher 32 bit of
- the input and the result registers are not defined by SLJIT, it might be
- defined by the CPU architecture (e.g. MIPS). To satisfy these requirements
- all source registers must be computed by operations where this flag is
- also set. In other words 32 and 64 bit arithmetic operations cannot be
- mixed. The only exception is SLJIT_IMOV and SLJIT_IMOVU whose source
- register can hold any 32 or 64 bit value. This source register is
- converted to a 32 bit compatible format. SLJIT does not generate any
- instructions on certain CPUs (e.g. on x86 and ARM) if the source and
- destination operands are the same registers. Affects sljit_emit_op0,
- sljit_emit_op1 and sljit_emit_op2. */
+/* Set 32 bit operation mode (I) on 64 bit CPUs. This option is ignored on
+ 32 bit CPUs. When this option is set for an arithmetic operation, only
+ the lower 32 bit of the input registers are used, and the CPU status
+ flags are set according to the 32 bit result. Although the higher 32 bit
+ of the input and the result registers are not defined by SLJIT, it might
+ be defined by the CPU architecture (e.g. MIPS). To satisfy these CPU
+ requirements all source registers must be the result of those operations
+ where this option was also set. Memory loads read 32 bit values rather
+ than 64 bit ones. In other words 32 bit and 64 bit operations cannot
+ be mixed. The only exception is SLJIT_MOV32 and SLJIT_MOVU32 whose source
+ register can hold any 32 or 64 bit value, and it is converted to a 32 bit
+ compatible format first. This conversion is free (no instructions are
+ emitted) on most CPUs. A 32 bit value can also be coverted to a 64 bit
+ value by SLJIT_MOV_S32 (sign extension) or SLJIT_MOV_U32 (zero extension).
+
+ Note: memory addressing always uses 64 bit values on 64 bit systems so
+ the result of a 32 bit operation must not be used with SLJIT_MEMx
+ macros.
+
+ This option is part of the instruction name, so there is no need to
+ manually set it. E.g:
+
+ SLJIT_ADD32 == (SLJIT_ADD | SLJIT_I32_OP) */
#define SLJIT_I32_OP 0x100
-/* F32 precision mode (SP). This flag is similar to SLJIT_I32_OP, just
- it applies to floating point registers (it is even the same bit). When
- this flag is passed, the CPU performs 32 bit floating point operations.
- Similar to SLJIT_I32_OP, all register arguments must be computed by
- floating point operations where this flag is also set. Affects
- sljit_emit_fop1, sljit_emit_fop2 and sljit_emit_fcmp. */
-#define SLJIT_F32_OP 0x100
-
-/* Common CPU status flags for all architectures (x86, ARM, PPC)
- - carry flag
- - overflow flag
- - zero flag
- - negative/positive flag (depends on arc)
- On mips, these flags are emulated by software. */
-
-/* By default, the instructions may, or may not set the CPU status flags.
- Forcing to set or keep status flags can be done with the following flags: */
-
-/* Note: sljit tries to emit the minimum number of instructions. Using these
- flags can increase them, so use them wisely to avoid unnecessary code generation. */
-
-/* Set Equal (Zero) status flag (E). */
-#define SLJIT_SET_E 0x0200
-/* Set unsigned status flag (U). */
-#define SLJIT_SET_U 0x0400
-/* Set signed status flag (S). */
-#define SLJIT_SET_S 0x0800
-/* Set signed overflow flag (O). */
-#define SLJIT_SET_O 0x1000
-/* Set carry flag (C).
- Note: Kinda unsigned overflow, but behaves differently on various cpus. */
-#define SLJIT_SET_C 0x2000
-/* Do not modify the flags (K).
- Note: This flag cannot be combined with any other SLJIT_SET_* flag. */
-#define SLJIT_KEEP_FLAGS 0x4000
+/* Set F32 (single) precision mode for floating-point computation. This
+ option is similar to SLJIT_I32_OP, it just applies to floating point
+ registers. When this option is passed, the CPU performs 32 bit floating
+ point operations, rather than 64 bit one. Similar to SLJIT_I32_OP, all
+ register arguments must be the result of those operations where this
+ option was also set.
+
+ This option is part of the instruction name, so there is no need to
+ manually set it. E.g:
+
+ SLJIT_MOV_F32 = (SLJIT_MOV_F64 | SLJIT_F32_OP)
+ */
+#define SLJIT_F32_OP SLJIT_I32_OP
+
+/* Many CPUs (x86, ARM, PPC) has status flags which can be set according
+ to the result of an operation. Other CPUs (MIPS) does not have status
+ flags, and results must be stored in registers. To cover both architecture
+ types efficiently only two flags are defined by SLJIT:
+
+ * Zero (equal) flag: it is set if the result is zero
+ * Variable flag: its value is defined by the last arithmetic operation
+
+ SLJIT instructions can set any or both of these flags. The value of
+ these flags is undefined if the instruction does not specify their value.
+ The description of each instruction contains the list of allowed flag
+ types.
+
+ Example: SLJIT_ADD can set the Z, OVERFLOW, CARRY flags hence
+
+ sljit_op2(..., SLJIT_ADD, ...)
+ Both the zero and variable flags are undefined so they can
+ have any value after the operation is completed.
+
+ sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...)
+ Sets the zero flag if the result is zero, clears it otherwise.
+ The variable flag is undefined.
+
+ sljit_op2(..., SLJIT_ADD | SLJIT_SET_OVERFLOW, ...)
+ Sets the variable flag if an integer overflow occurs, clears
+ it otherwise. The zero flag is undefined.
+
+ sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z | SLJIT_SET_CARRY, ...)
+ Sets the zero flag if the result is zero, clears it otherwise.
+ Sets the variable flag if unsigned overflow (carry) occurs,
+ clears it otherwise.
+
+ If an instruction (e.g. SLJIT_MOV) does not modify flags the flags are
+ unchanged.
+
+ Using these flags can reduce the number of emitted instructions. E.g. a
+ fast loop can be implemented by decreasing a counter register and set the
+ zero flag to jump back if the counter register is not reached zero.
+
+ Motivation: although CPUs can set a large number of flags, usually their
+ values are ignored or only one of them is used. Emulating a large number
+ of flags on systems without flag register is complicated so SLJIT
+ instructions must specify the flag they want to use and only that flag
+ will be emulated. The last arithmetic instruction can be repeated if
+ multiple flags needs to be checked.
+*/
+
+/* Set Zero status flag. */
+#define SLJIT_SET_Z 0x0200
+/* Set the variable status flag if condition is true.
+ See comparison types. */
+#define SLJIT_SET(condition) ((condition) << 10)
/* Notes:
- you cannot postpone conditional jump instructions except if noted that
@@ -684,11 +771,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
/* Starting index of opcodes for sljit_emit_op0. */
#define SLJIT_OP0_BASE 0
-/* Flags: - (never set any flags)
+/* Flags: - (does not modify flags)
Note: breakpoint instruction is not supported by all architectures (e.g. ppc)
It falls back to SLJIT_NOP in those cases. */
#define SLJIT_BREAKPOINT (SLJIT_OP0_BASE + 0)
-/* Flags: - (never set any flags)
+/* Flags: - (does not modify flags)
Note: may or may not cause an extra cycle wait
it can even decrease the runtime in a few cases. */
#define SLJIT_NOP (SLJIT_OP0_BASE + 1)
@@ -700,13 +787,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
Signed multiplication of SLJIT_R0 and SLJIT_R1.
Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */
#define SLJIT_LMUL_SW (SLJIT_OP0_BASE + 3)
-/* Flags: I - (may destroy flags)
+/* Flags: - (may destroy flags)
Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1.
The result is placed into SLJIT_R0 and the remainder into SLJIT_R1.
Note: if SLJIT_R1 is 0, the behaviour is undefined. */
#define SLJIT_DIVMOD_UW (SLJIT_OP0_BASE + 4)
#define SLJIT_DIVMOD_U32 (SLJIT_DIVMOD_UW | SLJIT_I32_OP)
-/* Flags: I - (may destroy flags)
+/* Flags: - (may destroy flags)
Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1.
The result is placed into SLJIT_R0 and the remainder into SLJIT_R1.
Note: if SLJIT_R1 is 0, the behaviour is undefined.
@@ -714,13 +801,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
the behaviour is undefined. */
#define SLJIT_DIVMOD_SW (SLJIT_OP0_BASE + 5)
#define SLJIT_DIVMOD_S32 (SLJIT_DIVMOD_SW | SLJIT_I32_OP)
-/* Flags: I - (may destroy flags)
+/* Flags: - (may destroy flags)
Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1.
The result is placed into SLJIT_R0. SLJIT_R1 preserves its value.
Note: if SLJIT_R1 is 0, the behaviour is undefined. */
#define SLJIT_DIV_UW (SLJIT_OP0_BASE + 6)
#define SLJIT_DIV_U32 (SLJIT_DIV_UW | SLJIT_I32_OP)
-/* Flags: I - (may destroy flags)
+/* Flags: - (may destroy flags)
Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1.
The result is placed into SLJIT_R0. SLJIT_R1 preserves its value.
Note: if SLJIT_R1 is 0, the behaviour is undefined.
@@ -734,75 +821,113 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
/* Starting index of opcodes for sljit_emit_op1. */
#define SLJIT_OP1_BASE 32
-/* Notes for MOV instructions:
- U = Mov with update (pre form). If source or destination defined as SLJIT_MEM1(r1)
- or SLJIT_MEM2(r1, r2), r1 is increased by the sum of r2 and the constant argument
- UB = unsigned byte (8 bit)
- SB = signed byte (8 bit)
- UH = unsigned half (16 bit)
- SH = signed half (16 bit)
- UI = unsigned int (32 bit)
- SI = signed int (32 bit)
- P = pointer (sljit_p) size */
-
-/* Flags: - (never set any flags) */
+/* The MOV instruction transfer data from source to destination.
+
+ MOV instruction suffixes:
+
+ U8 - unsigned 8 bit data transfer
+ S8 - signed 8 bit data transfer
+ U16 - unsigned 16 bit data transfer
+ S16 - signed 16 bit data transfer
+ U32 - unsigned int (32 bit) data transfer
+ S32 - signed int (32 bit) data transfer
+ P - pointer (sljit_p) data transfer
+
+ U = move with update (pre form). If source or destination defined as
+ SLJIT_MEM1(r1) or SLJIT_MEM2(r1, r2), r1 is increased by the
+ offset part of the address.
+
+ Register arguments and base registers can only be used once for move
+ with update instructions. The shift value of SLJIT_MEM2 addressing
+ mode must also be 0. Reason: SLJIT_MOVU instructions are expected to
+ be in high-performance loops where complex instruction emulation
+ would be too costly.
+
+ Examples for invalid move with update instructions:
+
+ sljit_emit_op1(..., SLJIT_MOVU_U8,
+ SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), 8);
+ sljit_emit_op1(..., SLJIT_MOVU_U8,
+ SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0, SLJIT_R0, 0);
+ sljit_emit_op1(..., SLJIT_MOVU_U8,
+ SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 0, SLJIT_MEM1(SLJIT_R0), 8);
+ sljit_emit_op1(..., SLJIT_MOVU_U8,
+ SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 0, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0);
+ sljit_emit_op1(..., SLJIT_MOVU_U8,
+ SLJIT_R2, 0, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 1);
+
+ The following example is valid, since only the offset register is
+ used multiple times:
+
+ sljit_emit_op1(..., SLJIT_MOVU_U8,
+ SLJIT_MEM2(SLJIT_R0, SLJIT_R2), 0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0);
+
+ If the destination of a MOV without update instruction is SLJIT_UNUSED
+ and the source operand is a memory address the compiler emits a prefetch
+ instruction if this instruction is supported by the current CPU.
+ Higher data sizes bring the data closer to the core: a MOV with word
+ size loads the data into a higher level cache than a byte size. Otherwise
+ the type does not affect the prefetch instruction. Furthermore a prefetch
+ instruction never fails, so it can be used to prefetch a data from an
+ address and check whether that address is NULL afterwards.
+*/
+
+/* Flags: - (does not modify flags) */
#define SLJIT_MOV (SLJIT_OP1_BASE + 0)
-/* Flags: I - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_MOV_U8 (SLJIT_OP1_BASE + 1)
#define SLJIT_MOV32_U8 (SLJIT_MOV_U8 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_MOV_S8 (SLJIT_OP1_BASE + 2)
#define SLJIT_MOV32_S8 (SLJIT_MOV_S8 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_MOV_U16 (SLJIT_OP1_BASE + 3)
#define SLJIT_MOV32_U16 (SLJIT_MOV_U16 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_MOV_S16 (SLJIT_OP1_BASE + 4)
#define SLJIT_MOV32_S16 (SLJIT_MOV_S16 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags)
+/* Flags: - (does not modify flags)
Note: no SLJIT_MOV32_U32 form, since it is the same as SLJIT_MOV32 */
#define SLJIT_MOV_U32 (SLJIT_OP1_BASE + 5)
-/* Flags: I - (never set any flags)
+/* Flags: - (does not modify flags)
Note: no SLJIT_MOV32_S32 form, since it is the same as SLJIT_MOV32 */
#define SLJIT_MOV_S32 (SLJIT_OP1_BASE + 6)
-/* Flags: I - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_MOV32 (SLJIT_MOV_S32 | SLJIT_I32_OP)
-/* Flags: - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_MOV_P (SLJIT_OP1_BASE + 7)
-/* Flags: - (never set any flags) */
+/* Flags: - (may destroy flags) */
#define SLJIT_MOVU (SLJIT_OP1_BASE + 8)
-/* Flags: I - (never set any flags) */
+/* Flags: - (may destroy flags) */
#define SLJIT_MOVU_U8 (SLJIT_OP1_BASE + 9)
#define SLJIT_MOVU32_U8 (SLJIT_MOVU_U8 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags) */
+/* Flags: - (may destroy flags) */
#define SLJIT_MOVU_S8 (SLJIT_OP1_BASE + 10)
#define SLJIT_MOVU32_S8 (SLJIT_MOVU_S8 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags) */
+/* Flags: - (may destroy flags) */
#define SLJIT_MOVU_U16 (SLJIT_OP1_BASE + 11)
#define SLJIT_MOVU32_U16 (SLJIT_MOVU_U16 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags) */
+/* Flags: - (may destroy flags) */
#define SLJIT_MOVU_S16 (SLJIT_OP1_BASE + 12)
#define SLJIT_MOVU32_S16 (SLJIT_MOVU_S16 | SLJIT_I32_OP)
-/* Flags: I - (never set any flags)
+/* Flags: - (may destroy flags)
Note: no SLJIT_MOVU32_U32 form, since it is the same as SLJIT_MOVU32 */
#define SLJIT_MOVU_U32 (SLJIT_OP1_BASE + 13)
-/* Flags: I - (never set any flags)
+/* Flags: - (may destroy flags)
Note: no SLJIT_MOVU32_S32 form, since it is the same as SLJIT_MOVU32 */
#define SLJIT_MOVU_S32 (SLJIT_OP1_BASE + 14)
-/* Flags: I - (never set any flags) */
+/* Flags: - (may destroy flags) */
#define SLJIT_MOVU32 (SLJIT_MOVU_S32 | SLJIT_I32_OP)
-/* Flags: - (never set any flags) */
+/* Flags: - (may destroy flags) */
#define SLJIT_MOVU_P (SLJIT_OP1_BASE + 15)
-/* Flags: I | E | K */
+/* Flags: Z */
#define SLJIT_NOT (SLJIT_OP1_BASE + 16)
#define SLJIT_NOT32 (SLJIT_NOT | SLJIT_I32_OP)
-/* Flags: I | E | O | K */
+/* Flags: Z | OVERFLOW */
#define SLJIT_NEG (SLJIT_OP1_BASE + 17)
#define SLJIT_NEG32 (SLJIT_NEG | SLJIT_I32_OP)
/* Count leading zeroes
- Flags: I | E | K
- Important note! Sparc 32 does not support K flag, since
- the required popc instruction is introduced only in sparc 64. */
+ Flags: - (may destroy flags) */
#define SLJIT_CLZ (SLJIT_OP1_BASE + 18)
#define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_I32_OP)
@@ -813,46 +938,48 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
/* Starting index of opcodes for sljit_emit_op2. */
#define SLJIT_OP2_BASE 96
-/* Flags: I | E | O | C | K */
+/* Flags: Z | OVERFLOW | CARRY */
#define SLJIT_ADD (SLJIT_OP2_BASE + 0)
#define SLJIT_ADD32 (SLJIT_ADD | SLJIT_I32_OP)
-/* Flags: I | C | K */
+/* Flags: CARRY */
#define SLJIT_ADDC (SLJIT_OP2_BASE + 1)
#define SLJIT_ADDC32 (SLJIT_ADDC | SLJIT_I32_OP)
-/* Flags: I | E | U | S | O | C | K */
+/* Flags: Z | LESS | GREATER_EQUAL | GREATER | LESS_EQUAL
+ SIG_LESS | SIG_GREATER_EQUAL | SIG_GREATER
+ SIG_LESS_EQUAL | CARRY */
#define SLJIT_SUB (SLJIT_OP2_BASE + 2)
#define SLJIT_SUB32 (SLJIT_SUB | SLJIT_I32_OP)
-/* Flags: I | C | K */
+/* Flags: CARRY */
#define SLJIT_SUBC (SLJIT_OP2_BASE + 3)
#define SLJIT_SUBC32 (SLJIT_SUBC | SLJIT_I32_OP)
/* Note: integer mul
- Flags: I | O (see SLJIT_C_MUL_*) | K */
+ Flags: MUL_OVERFLOW */
#define SLJIT_MUL (SLJIT_OP2_BASE + 4)
#define SLJIT_MUL32 (SLJIT_MUL | SLJIT_I32_OP)
-/* Flags: I | E | K */
+/* Flags: Z */
#define SLJIT_AND (SLJIT_OP2_BASE + 5)
#define SLJIT_AND32 (SLJIT_AND | SLJIT_I32_OP)
-/* Flags: I | E | K */
+/* Flags: Z */
#define SLJIT_OR (SLJIT_OP2_BASE + 6)
#define SLJIT_OR32 (SLJIT_OR | SLJIT_I32_OP)
-/* Flags: I | E | K */
+/* Flags: Z */
#define SLJIT_XOR (SLJIT_OP2_BASE + 7)
#define SLJIT_XOR32 (SLJIT_XOR | SLJIT_I32_OP)
-/* Flags: I | E | K
+/* Flags: Z
Let bit_length be the length of the shift operation: 32 or 64.
If src2 is immediate, src2w is masked by (bit_length - 1).
Otherwise, if the content of src2 is outside the range from 0
to bit_length - 1, the result is undefined. */
#define SLJIT_SHL (SLJIT_OP2_BASE + 8)
#define SLJIT_SHL32 (SLJIT_SHL | SLJIT_I32_OP)
-/* Flags: I | E | K
+/* Flags: Z
Let bit_length be the length of the shift operation: 32 or 64.
If src2 is immediate, src2w is masked by (bit_length - 1).
Otherwise, if the content of src2 is outside the range from 0
to bit_length - 1, the result is undefined. */
#define SLJIT_LSHR (SLJIT_OP2_BASE + 9)
#define SLJIT_LSHR32 (SLJIT_LSHR | SLJIT_I32_OP)
-/* Flags: I | E | K
+/* Flags: Z
Let bit_length be the length of the shift operation: 32 or 64.
If src2 is immediate, src2w is masked by (bit_length - 1).
Otherwise, if the content of src2 is outside the range from 0
@@ -865,44 +992,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w);
-/* Returns with non-zero if fpu is available. */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void);
-
/* Starting index of opcodes for sljit_emit_fop1. */
#define SLJIT_FOP1_BASE 128
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_MOV_F64 (SLJIT_FOP1_BASE + 0)
#define SLJIT_MOV_F32 (SLJIT_MOV_F64 | SLJIT_F32_OP)
/* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE]
SRC/DST TYPE can be: D - double, S - single, W - signed word, I - signed int
Rounding mode when the destination is W or I: round towards zero. */
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_CONV_F64_FROM_F32 (SLJIT_FOP1_BASE + 1)
#define SLJIT_CONV_F32_FROM_F64 (SLJIT_CONV_F64_FROM_F32 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_CONV_SW_FROM_F64 (SLJIT_FOP1_BASE + 2)
#define SLJIT_CONV_SW_FROM_F32 (SLJIT_CONV_SW_FROM_F64 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_CONV_S32_FROM_F64 (SLJIT_FOP1_BASE + 3)
#define SLJIT_CONV_S32_FROM_F32 (SLJIT_CONV_S32_FROM_F64 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_CONV_F64_FROM_SW (SLJIT_FOP1_BASE + 4)
#define SLJIT_CONV_F32_FROM_SW (SLJIT_CONV_F64_FROM_SW | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_CONV_F64_FROM_S32 (SLJIT_FOP1_BASE + 5)
#define SLJIT_CONV_F32_FROM_S32 (SLJIT_CONV_F64_FROM_S32 | SLJIT_F32_OP)
/* Note: dst is the left and src is the right operand for SLJIT_CMPD.
- Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED flag
- is set, the comparison result is unpredictable.
- Flags: SP | E | S (see SLJIT_C_FLOAT_*) */
+ Flags: EQUAL_F | LESS_F | GREATER_EQUAL_F | GREATER_F | LESS_EQUAL_F */
#define SLJIT_CMP_F64 (SLJIT_FOP1_BASE + 6)
#define SLJIT_CMP_F32 (SLJIT_CMP_F64 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_NEG_F64 (SLJIT_FOP1_BASE + 7)
#define SLJIT_NEG_F32 (SLJIT_NEG_F64 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_ABS_F64 (SLJIT_FOP1_BASE + 8)
#define SLJIT_ABS_F32 (SLJIT_ABS_F64 | SLJIT_F32_OP)
@@ -913,16 +1034,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
/* Starting index of opcodes for sljit_emit_fop2. */
#define SLJIT_FOP2_BASE 160
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_ADD_F64 (SLJIT_FOP2_BASE + 0)
#define SLJIT_ADD_F32 (SLJIT_ADD_F64 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_SUB_F64 (SLJIT_FOP2_BASE + 1)
#define SLJIT_SUB_F32 (SLJIT_SUB_F64 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_MUL_F64 (SLJIT_FOP2_BASE + 2)
#define SLJIT_MUL_F32 (SLJIT_MUL_F64 | SLJIT_F32_OP)
-/* Flags: SP - (never set any flags) */
+/* Flags: - (does not modify flags) */
#define SLJIT_DIV_F64 (SLJIT_FOP2_BASE + 3)
#define SLJIT_DIV_F32 (SLJIT_DIV_F64 | SLJIT_F32_OP)
@@ -949,56 +1070,77 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
#define SLJIT_LESS 2
#define SLJIT_LESS32 (SLJIT_LESS | SLJIT_I32_OP)
+#define SLJIT_SET_LESS SLJIT_SET(SLJIT_LESS)
#define SLJIT_GREATER_EQUAL 3
#define SLJIT_GREATER_EQUAL32 (SLJIT_GREATER_EQUAL | SLJIT_I32_OP)
+#define SLJIT_SET_GREATER_EQUAL SLJIT_SET(SLJIT_GREATER_EQUAL)
#define SLJIT_GREATER 4
#define SLJIT_GREATER32 (SLJIT_GREATER | SLJIT_I32_OP)
+#define SLJIT_SET_GREATER SLJIT_SET(SLJIT_GREATER)
#define SLJIT_LESS_EQUAL 5
#define SLJIT_LESS_EQUAL32 (SLJIT_LESS_EQUAL | SLJIT_I32_OP)
+#define SLJIT_SET_LESS_EQUAL SLJIT_SET(SLJIT_LESS_EQUAL)
#define SLJIT_SIG_LESS 6
#define SLJIT_SIG_LESS32 (SLJIT_SIG_LESS | SLJIT_I32_OP)
+#define SLJIT_SET_SIG_LESS SLJIT_SET(SLJIT_SIG_LESS)
#define SLJIT_SIG_GREATER_EQUAL 7
#define SLJIT_SIG_GREATER_EQUAL32 (SLJIT_SIG_GREATER_EQUAL | SLJIT_I32_OP)
+#define SLJIT_SET_SIG_GREATER_EQUAL SLJIT_SET(SLJIT_SIG_GREATER_EQUAL)
#define SLJIT_SIG_GREATER 8
#define SLJIT_SIG_GREATER32 (SLJIT_SIG_GREATER | SLJIT_I32_OP)
+#define SLJIT_SET_SIG_GREATER SLJIT_SET(SLJIT_SIG_GREATER)
#define SLJIT_SIG_LESS_EQUAL 9
#define SLJIT_SIG_LESS_EQUAL32 (SLJIT_SIG_LESS_EQUAL | SLJIT_I32_OP)
+#define SLJIT_SET_SIG_LESS_EQUAL SLJIT_SET(SLJIT_SIG_LESS_EQUAL)
#define SLJIT_OVERFLOW 10
#define SLJIT_OVERFLOW32 (SLJIT_OVERFLOW | SLJIT_I32_OP)
+#define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW)
#define SLJIT_NOT_OVERFLOW 11
#define SLJIT_NOT_OVERFLOW32 (SLJIT_NOT_OVERFLOW | SLJIT_I32_OP)
#define SLJIT_MUL_OVERFLOW 12
#define SLJIT_MUL_OVERFLOW32 (SLJIT_MUL_OVERFLOW | SLJIT_I32_OP)
+#define SLJIT_SET_MUL_OVERFLOW SLJIT_SET(SLJIT_MUL_OVERFLOW)
#define SLJIT_MUL_NOT_OVERFLOW 13
#define SLJIT_MUL_NOT_OVERFLOW32 (SLJIT_MUL_NOT_OVERFLOW | SLJIT_I32_OP)
+/* There is no SLJIT_CARRY or SLJIT_NOT_CARRY. */
+#define SLJIT_SET_CARRY SLJIT_SET(14)
+
/* Floating point comparison types. */
-#define SLJIT_EQUAL_F64 14
+#define SLJIT_EQUAL_F64 16
#define SLJIT_EQUAL_F32 (SLJIT_EQUAL_F64 | SLJIT_F32_OP)
-#define SLJIT_NOT_EQUAL_F64 15
+#define SLJIT_SET_EQUAL_F SLJIT_SET(SLJIT_EQUAL_F64)
+#define SLJIT_NOT_EQUAL_F64 17
#define SLJIT_NOT_EQUAL_F32 (SLJIT_NOT_EQUAL_F64 | SLJIT_F32_OP)
-#define SLJIT_LESS_F64 16
+#define SLJIT_SET_NOT_EQUAL_F SLJIT_SET(SLJIT_NOT_EQUAL_F64)
+#define SLJIT_LESS_F64 18
#define SLJIT_LESS_F32 (SLJIT_LESS_F64 | SLJIT_F32_OP)
-#define SLJIT_GREATER_EQUAL_F64 17
+#define SLJIT_SET_LESS_F SLJIT_SET(SLJIT_LESS_F64)
+#define SLJIT_GREATER_EQUAL_F64 19
#define SLJIT_GREATER_EQUAL_F32 (SLJIT_GREATER_EQUAL_F64 | SLJIT_F32_OP)
-#define SLJIT_GREATER_F64 18
+#define SLJIT_SET_GREATER_EQUAL_F SLJIT_SET(SLJIT_GREATER_EQUAL_F64)
+#define SLJIT_GREATER_F64 20
#define SLJIT_GREATER_F32 (SLJIT_GREATER_F64 | SLJIT_F32_OP)
-#define SLJIT_LESS_EQUAL_F64 19
+#define SLJIT_SET_GREATER_F SLJIT_SET(SLJIT_GREATER_F64)
+#define SLJIT_LESS_EQUAL_F64 21
#define SLJIT_LESS_EQUAL_F32 (SLJIT_LESS_EQUAL_F64 | SLJIT_F32_OP)
-#define SLJIT_UNORDERED_F64 20
+#define SLJIT_SET_LESS_EQUAL_F SLJIT_SET(SLJIT_LESS_EQUAL_F64)
+#define SLJIT_UNORDERED_F64 22
#define SLJIT_UNORDERED_F32 (SLJIT_UNORDERED_F64 | SLJIT_F32_OP)
-#define SLJIT_ORDERED_F64 21
+#define SLJIT_SET_UNORDERED_F SLJIT_SET(SLJIT_UNORDERED_F64)
+#define SLJIT_ORDERED_F64 23
#define SLJIT_ORDERED_F32 (SLJIT_ORDERED_F64 | SLJIT_F32_OP)
+#define SLJIT_SET_ORDERED_F SLJIT_SET(SLJIT_ORDERED_F64)
/* Unconditional jump types. */
-#define SLJIT_JUMP 22
-#define SLJIT_FAST_CALL 23
-#define SLJIT_CALL0 24
-#define SLJIT_CALL1 25
-#define SLJIT_CALL2 26
-#define SLJIT_CALL3 27
+#define SLJIT_JUMP 24
+#define SLJIT_FAST_CALL 25
+#define SLJIT_CALL0 26
+#define SLJIT_CALL1 27
+#define SLJIT_CALL2 28
+#define SLJIT_CALL3 29
/* Fast calling method. See sljit_emit_fast_enter / sljit_emit_fast_return. */
@@ -1008,8 +1150,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
/* Emit a jump instruction. The destination is not set, only the type of the jump.
type must be between SLJIT_EQUAL and SLJIT_CALL3
type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
- Flags: - (never set any flags) for both conditional and unconditional jumps.
- Flags: destroy all flags for calls. */
+
+ Flags: does not modify flags for conditional and unconditional
+ jumps but destroy all flags for calls. */
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type);
/* Basic arithmetic comparison. In most architectures it is implemented as
@@ -1019,7 +1162,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
It is suggested to use this comparison form when appropriate.
type must be between SLJIT_EQUAL and SLJIT_I_SIG_LESS_EQUAL
type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
- Flags: destroy flags. */
+ Flags: may destroy flags. */
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w);
@@ -1047,36 +1190,55 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw
type must be between SLJIT_JUMP and SLJIT_CALL3
Direct form: set src to SLJIT_IMM() and srcw to the address
Indirect form: any other valid addressing mode
- Flags: - (never set any flags) for unconditional jumps.
- Flags: destroy all flags for calls. */
+
+ Flags: does not modify flags for unconditional jumps but
+ destroy all flags for calls. */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw);
/* Perform the operation using the conditional flags as the second argument.
- Type must always be between SLJIT_EQUAL and SLJIT_S_ORDERED. The value
+ Type must always be between SLJIT_EQUAL and SLJIT_ORDERED_F64. The value
represented by the type is 1, if the condition represented by the type
is fulfilled, and 0 otherwise.
- If op == SLJIT_MOV, SLJIT_MOV_S32, SLJIT_MOV_U32:
+ If op == SLJIT_MOV, SLJIT_MOV32:
Set dst to the value represented by the type (0 or 1).
- Src must be SLJIT_UNUSED, and srcw must be 0
- Flags: - (never set any flags)
+ Flags: - (does not modify flags)
If op == SLJIT_OR, op == SLJIT_AND, op == SLJIT_XOR
- Performs the binary operation using src as the first, and the value
- represented by type as the second argument.
- Important note: only dst=src and dstw=srcw is supported at the moment!
- Flags: I | E | K
- Note: sljit_emit_op_flags does nothing, if dst is SLJIT_UNUSED (regardless of op). */
+ Performs the binary operation using dst as the first, and the value
+ represented by type as the second argument. Result is written into dst.
+ Flags: Z (may destroy flags) */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type);
-/* Copies the base address of SLJIT_SP + offset to dst.
- Flags: - (never set any flags) */
+/* Emit a conditional mov instruction which moves source to destination,
+ if the condition is satisfied. Unlike other arithmetic operations this
+ instruction does not support memory accesses.
+
+ type must be between SLJIT_EQUAL and SLJIT_ORDERED_F64
+ dst_reg must be a valid register and it can be combined
+ with SLJIT_I32_OP to perform a 32 bit arithmetic operation
+ src must be register or immediate (SLJIT_IMM)
+
+ Flags: - (does not modify flags) */
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_reg,
+ sljit_s32 src, sljit_sw srcw);
+
+/* Copies the base address of SLJIT_SP + offset to dst. The offset can be
+ anything to negate the effect of relative addressing. For example if an
+ array of sljit_sw values is stored on the stack from offset 0x40, and R0
+ contains the offset of an array item plus 0x120, this item can be
+ overwritten by two SLJIT instructions:
+
+ sljit_get_local_base(compiler, SLJIT_R1, 0, 0x40 - 0x120);
+ sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0, SLJIT_IMM, 0x5);
+
+ Flags: - (may destroy flags) */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset);
/* The constant can be changed runtime (see: sljit_set_const)
- Flags: - (never set any flags) */
+ Flags: - (does not modify flags) */
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value);
/* After the code generation the address for label, jump and const instructions
@@ -1086,16 +1248,17 @@ static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { r
static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; }
static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; }
-/* Only the address is required to rewrite the code. */
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr);
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant);
+/* Only the address and executable offset are required to perform dynamic
+ code modifications. See sljit_get_executable_offset function. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset);
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset);
/* --------------------------------------------------------------------- */
/* Miscellaneous utility functions */
/* --------------------------------------------------------------------- */
#define SLJIT_MAJOR_VERSION 0
-#define SLJIT_MINOR_VERSION 93
+#define SLJIT_MINOR_VERSION 94
/* Get the human readable name of the platform. Can be useful on platforms
like ARM, where ARM and Thumb2 functions can be mixed, and
@@ -1113,19 +1276,23 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void);
#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK)
-/* The sljit_stack is a utiliy feature of sljit, which allocates a
- writable memory region between base (inclusive) and limit (exclusive).
- Both base and limit is a pointer, and base is always <= than limit.
- This feature uses the "address space reserve" feature
- of modern operating systems. Basically we don't need to allocate a
- huge memory block in one step for the worst case, we can start with
- a smaller chunk and extend it later. Since the address space is
- reserved, the data never copied to other regions, thus it is safe
- to store pointers here. */
-
-/* Note: The base field is aligned to PAGE_SIZE bytes (usually 4k or more).
- Note: stack growing should not happen in small steps: 4k, 16k or even
- bigger growth is better.
+/* The sljit_stack is a utility extension of sljit, which provides
+ a top-down stack. The stack starts at base and goes down to
+ max_limit, so the memory region for this stack is between
+ max_limit (inclusive) and base (exclusive). However the
+ application can only use the region between limit (inclusive)
+ and base (exclusive). The sljit_stack_resize can be used to
+ extend this region up to max_limit.
+
+ This feature uses the "address space reserve" feature of modern
+ operating systems, so instead of allocating a huge memory block
+ applications can allocate a small region and extend it later
+ without moving the memory area. Hence pointers can be stored
+ in this area. */
+
+/* Note: base and max_limit fields are aligned to PAGE_SIZE bytes
+ (usually 4 Kbyte or more).
+ Note: stack should grow in larger steps, e.g. 4Kbyte, 16Kbyte or more.
Note: this structure may not be supported by all operating systems.
Some kind of fallback mechanism is suggested when SLJIT_UTIL_STACK
is not defined. */
@@ -1133,15 +1300,16 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void);
struct sljit_stack {
/* User data, anything can be stored here.
Starting with the same value as base. */
- sljit_uw top;
+ sljit_u8 *top;
/* These members are read only. */
- sljit_uw base;
- sljit_uw limit;
- sljit_uw max_limit;
+ sljit_u8 *base;
+ sljit_u8 *limit;
+ sljit_u8 *max_limit;
};
/* Returns NULL if unsuccessful.
- Note: limit and max_limit contains the size for stack allocation.
+ Note: max_limit contains the maximum stack size in bytes.
+ Note: limit contains the starting stack size in bytes.
Note: the top field is initialized to base.
Note: see sljit_create_compiler for the explanation of allocator_data. */
SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data);
@@ -1153,7 +1321,7 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack *st
since the growth ratio can be added to the current limit, and sljit_stack_resize
will do all the necessary checks. The fields of the stack are not changed if
sljit_stack_resize fails. */
-SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack *stack, sljit_uw new_limit);
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_limit);
#endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */
@@ -1182,6 +1350,15 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct
#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
+#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
+/* Free unused executable memory. The allocator keeps some free memory
+ around to reduce the number of OS executable memory allocations.
+ This improves performance since these calls are costly. However
+ it is sometimes desired to free all unused memory regions, e.g.
+ before the application terminates. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
+#endif
+
/* --------------------------------------------------------------------- */
/* CPU specific functions */
/* --------------------------------------------------------------------- */
@@ -1214,32 +1391,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_s32 size);
-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
-
-/* Returns with non-zero if sse2 is available. */
+/* Define the currently available CPU status flags. It is usually used after an
+ sljit_emit_op_custom call to define which flags are set. */
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void);
-
-/* Returns with non-zero if cmov instruction is available. */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void);
-
-/* Emit a conditional mov instruction on x86 CPUs. This instruction
- moves src to destination, if the condition is satisfied. Unlike
- other arithmetic instructions, destination must be a register.
- Before such instructions are emitted, cmov support should be
- checked by sljit_x86_is_cmov_available function.
- type must be between SLJIT_EQUAL and SLJIT_S_ORDERED
- dst_reg must be a valid register and it can be combined
- with SLJIT_I32_OP to perform 32 bit arithmetic
- Flags: I - (never set any flags)
- */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler,
- sljit_s32 type,
- sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw);
-
-#endif
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler,
+ sljit_s32 current_flags);
#endif /* _SLJIT_LIR_H_ */
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_32.c
index b92808f526..745da99f61 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_32.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_32.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -38,8 +38,7 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
/* Last register + 1. */
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
-#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
-#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5)
+#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
#define TMP_FREG1 (0)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
@@ -55,8 +54,8 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
(((max_diff) / (sljit_s32)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1))
/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
-static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
- 0, 0, 1, 2, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 12, 14, 15
+static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+ 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 14, 12, 15
};
#define RM(rm) (reg_map[rm])
@@ -83,6 +82,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
#define BLX 0xe12fff30
#define BX 0xe12fff10
#define CLZ 0xe16f0f10
+#define CMN_DP 0xb
#define CMP_DP 0xa
#define BKPT 0xe1200070
#define EOR_DP 0x1
@@ -260,7 +260,7 @@ static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler)
{
/* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
- return push_inst(compiler, BLX | RM(TMP_REG1));
+ return push_inst(compiler, BLX | RM(TMP_REG2));
}
static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
@@ -389,7 +389,7 @@ static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s3
#endif
-static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code)
+static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset)
{
sljit_sw diff;
@@ -401,7 +401,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw
code_ptr--;
if (jump->flags & JUMP_ADDR)
- diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2));
+ diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset);
else {
SLJIT_ASSERT(jump->flags & JUMP_LABEL);
diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2));
@@ -426,7 +426,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw
}
#else
if (jump->flags & JUMP_ADDR)
- diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr);
+ diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset);
else {
SLJIT_ASSERT(jump->flags & JUMP_LABEL);
diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr);
@@ -446,26 +446,28 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw
return 0;
}
-static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, sljit_s32 flush)
+static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache)
{
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- sljit_uw *ptr = (sljit_uw*)addr;
- sljit_uw *inst = (sljit_uw*)ptr[0];
+ sljit_uw *ptr = (sljit_uw *)jump_ptr;
+ sljit_uw *inst = (sljit_uw *)ptr[0];
sljit_uw mov_pc = ptr[1];
sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
- sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2)) >> 2);
+ sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2);
if (diff <= 0x7fffff && diff >= -0x800000) {
/* Turn to branch. */
if (!bl) {
inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
- if (flush) {
+ if (flush_cache) {
+ inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 1);
}
} else {
inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
inst[1] = NOP;
- if (flush) {
+ if (flush_cache) {
+ inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
}
@@ -479,12 +481,14 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr,
if (*inst != mov_pc) {
inst[0] = mov_pc;
if (!bl) {
- if (flush) {
+ if (flush_cache) {
+ inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 1);
}
} else {
inst[1] = BLX | RM(TMP_REG1);
- if (flush) {
+ if (flush_cache) {
+ inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
}
@@ -492,11 +496,12 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr,
*ptr = new_addr;
}
#else
- sljit_uw *inst = (sljit_uw*)addr;
+ sljit_uw *inst = (sljit_uw*)jump_ptr;
SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
- if (flush) {
+ if (flush_cache) {
+ inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
#endif
@@ -504,7 +509,7 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr,
static sljit_uw get_imm(sljit_uw imm);
-static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, sljit_s32 flush)
+static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_sw new_constant, sljit_s32 flush_cache)
{
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
sljit_uw *ptr = (sljit_uw*)addr;
@@ -515,7 +520,8 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant,
src2 = get_imm(new_constant);
if (src2) {
*inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
- if (flush) {
+ if (flush_cache) {
+ inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 1);
}
return;
@@ -524,7 +530,8 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant,
src2 = get_imm(~new_constant);
if (src2) {
*inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
- if (flush) {
+ if (flush_cache) {
+ inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 1);
}
return;
@@ -537,7 +544,8 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant,
if (*inst != ldr_literal) {
*inst = ldr_literal;
- if (flush) {
+ if (flush_cache) {
+ inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 1);
}
}
@@ -547,7 +555,8 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant,
SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
- if (flush) {
+ if (flush_cache) {
+ inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
#endif
@@ -562,6 +571,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
sljit_uw *buf_end;
sljit_uw size;
sljit_uw word_count;
+ sljit_sw executable_offset;
+ sljit_sw jump_addr;
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
sljit_uw cpool_size;
sljit_uw cpool_skip_alignment;
@@ -602,14 +613,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
code_ptr = code;
word_count = 0;
+ executable_offset = SLJIT_EXEC_OFFSET(code);
label = compiler->labels;
jump = compiler->jumps;
const_ = compiler->consts;
if (label && label->size == 0) {
- label->addr = (sljit_uw)code;
- label->size = 0;
+ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
label = label->next;
}
@@ -636,7 +647,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
cpool_size = 0;
if (label && label->size == word_count) {
/* Points after the current instruction. */
- label->addr = (sljit_uw)code_ptr;
+ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
@@ -652,19 +663,19 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
if (jump && jump->addr == word_count) {
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- if (detect_jump_type(jump, code_ptr, code))
+ if (detect_jump_type(jump, code_ptr, code, executable_offset))
code_ptr--;
jump->addr = (sljit_uw)code_ptr;
#else
jump->addr = (sljit_uw)(code_ptr - 2);
- if (detect_jump_type(jump, code_ptr, code))
+ if (detect_jump_type(jump, code_ptr, code, executable_offset))
code_ptr -= 2;
#endif
jump = jump->next;
}
if (label && label->size == word_count) {
/* code_ptr can be affected above. */
- label->addr = (sljit_uw)(code_ptr + 1);
+ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset);
label->size = (code_ptr + 1) - code;
label = label->next;
}
@@ -729,17 +740,18 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = compiler->jumps;
while (jump) {
- buf_ptr = (sljit_uw*)jump->addr;
+ buf_ptr = (sljit_uw *)jump->addr;
if (jump->flags & PATCH_B) {
+ jump_addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
if (!(jump->flags & JUMP_ADDR)) {
SLJIT_ASSERT(jump->flags & JUMP_LABEL);
- SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >= -0x02000000);
- *buf_ptr |= (((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff;
+ SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - jump_addr) >= -0x02000000);
+ *buf_ptr |= (((sljit_sw)jump->u.label->addr - jump_addr) >> 2) & 0x00ffffff;
}
else {
- SLJIT_ASSERT(((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >= -0x02000000);
- *buf_ptr |= (((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff;
+ SLJIT_ASSERT(((sljit_sw)jump->u.target - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - jump_addr) >= -0x02000000);
+ *buf_ptr |= (((sljit_sw)jump->u.target - jump_addr) >> 2) & 0x00ffffff;
}
}
else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
@@ -747,10 +759,10 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump->addr = (sljit_uw)code_ptr;
code_ptr[0] = (sljit_uw)buf_ptr;
code_ptr[1] = *buf_ptr;
- inline_set_jump_addr((sljit_uw)code_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
+ inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
code_ptr += 2;
#else
- inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
+ inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
#endif
}
else {
@@ -763,7 +775,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
buf_ptr += 1;
*buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
#else
- inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
+ inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
#endif
}
jump = jump->next;
@@ -782,7 +794,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
else
buf_ptr += 1;
/* Set the value again (can be a simple constant). */
- inline_set_const((sljit_uw)code_ptr, *buf_ptr, 0);
+ inline_set_const((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0);
code_ptr += 2;
const_ = const_->next;
@@ -792,29 +804,90 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size);
compiler->error = SLJIT_ERR_COMPILED;
+ compiler->executable_offset = executable_offset;
compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw);
+
+ code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
+ code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+
SLJIT_CACHE_FLUSH(code, code_ptr);
return code;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+ switch (feature_type) {
+ case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+ return SLJIT_IS_FPU_AVAILABLE;
+#else
+ /* Available by default. */
+ return 1;
+#endif
+
+ case SLJIT_HAS_PRE_UPDATE:
+ case SLJIT_HAS_CLZ:
+ case SLJIT_HAS_CMOV:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
/* --------------------------------------------------------------------- */
/* Entry, exit */
/* --------------------------------------------------------------------- */
+/* Creates an index in data_transfer_insts array. */
+#define WORD_DATA 0x00
+#define BYTE_DATA 0x01
+#define HALF_DATA 0x02
+#define PRELOAD_DATA 0x03
+#define SIGNED_DATA 0x04
+#define LOAD_DATA 0x08
+
/* emit_op inp_flags.
WRITE_BACK must be the first, since it is a flag. */
-#define WRITE_BACK 0x01
-#define ALLOW_IMM 0x02
-#define ALLOW_INV_IMM 0x04
+#define WRITE_BACK 0x10
+#define ALLOW_IMM 0x20
+#define ALLOW_INV_IMM 0x40
#define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM)
-#define ARG_TEST 0x08
-/* Creates an index in data_transfer_insts array. */
-#define WORD_DATA 0x00
-#define BYTE_DATA 0x10
-#define HALF_DATA 0x20
-#define SIGNED_DATA 0x40
-#define LOAD_DATA 0x80
+/* s/l - store/load (1 bit)
+ u/s - signed/unsigned (1 bit)
+ w/b/h/N - word/byte/half/NOT allowed (2 bit)
+ Storing signed and unsigned values are the same operations. */
+
+static const sljit_uw data_transfer_insts[16] = {
+/* s u w */ 0xe5000000 /* str */,
+/* s u b */ 0xe5400000 /* strb */,
+/* s u h */ 0xe10000b0 /* strh */,
+/* s u N */ 0x00000000 /* not allowed */,
+/* s s w */ 0xe5000000 /* str */,
+/* s s b */ 0xe5400000 /* strb */,
+/* s s h */ 0xe10000b0 /* strh */,
+/* s s N */ 0x00000000 /* not allowed */,
+
+/* l u w */ 0xe5100000 /* ldr */,
+/* l u b */ 0xe5500000 /* ldrb */,
+/* l u h */ 0xe11000b0 /* ldrh */,
+/* l u p */ 0xf5500000 /* preload data */,
+/* l s w */ 0xe5100000 /* ldr */,
+/* l s b */ 0xe11000d0 /* ldrsb */,
+/* l s h */ 0xe11000f0 /* ldrsh */,
+/* l s N */ 0x00000000 /* not allowed */,
+};
+
+#define EMIT_DATA_TRANSFER(type, add, wb, target_reg, base_reg, arg) \
+ (data_transfer_insts[(type) & 0xf] | ((add) << 23) | ((wb) << (21 - 4)) | RD(target_reg) | RN(base_reg) | (arg))
+
+/* Normal ldr/str instruction.
+ Type2: ldrsb, ldrh, ldrsh */
+#define IS_TYPE1_TRANSFER(type) \
+ (data_transfer_insts[(type) & 0xf] & 0x04000000)
+#define TYPE2_TRANSFER_IMM(imm) \
+ (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
/* Condition: AL. */
#define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \
@@ -912,52 +985,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
/* Operators */
/* --------------------------------------------------------------------- */
-/* s/l - store/load (1 bit)
- u/s - signed/unsigned (1 bit)
- w/b/h/N - word/byte/half/NOT allowed (2 bit)
- It contans 16 items, but not all are different. */
-
-static sljit_sw data_transfer_insts[16] = {
-/* s u w */ 0xe5000000 /* str */,
-/* s u b */ 0xe5400000 /* strb */,
-/* s u h */ 0xe10000b0 /* strh */,
-/* s u N */ 0x00000000 /* not allowed */,
-/* s s w */ 0xe5000000 /* str */,
-/* s s b */ 0xe5400000 /* strb */,
-/* s s h */ 0xe10000b0 /* strh */,
-/* s s N */ 0x00000000 /* not allowed */,
-
-/* l u w */ 0xe5100000 /* ldr */,
-/* l u b */ 0xe5500000 /* ldrb */,
-/* l u h */ 0xe11000b0 /* ldrh */,
-/* l u N */ 0x00000000 /* not allowed */,
-/* l s w */ 0xe5100000 /* ldr */,
-/* l s b */ 0xe11000d0 /* ldrsb */,
-/* l s h */ 0xe11000f0 /* ldrsh */,
-/* l s N */ 0x00000000 /* not allowed */,
-};
-
-#define EMIT_DATA_TRANSFER(type, add, wb, target, base1, base2) \
- (data_transfer_insts[(type) >> 4] | ((add) << 23) | ((wb) << 21) | (reg_map[target] << 12) | (reg_map[base1] << 16) | (base2))
-/* Normal ldr/str instruction.
- Type2: ldrsb, ldrh, ldrsh */
-#define IS_TYPE1_TRANSFER(type) \
- (data_transfer_insts[(type) >> 4] & 0x04000000)
-#define TYPE2_TRANSFER_IMM(imm) \
- (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
-
/* flags: */
/* Arguments are swapped. */
#define ARGS_SWAPPED 0x01
/* Inverted immediate. */
#define INV_IMM 0x02
/* Source and destination is register. */
-#define REG_DEST 0x04
-#define REG_SOURCE 0x08
- /* One instruction is enough. */
-#define FAST_DEST 0x10
- /* Multiple instructions are required. */
-#define SLOW_DEST 0x20
+#define MOVE_REG_CONV 0x04
+ /* Unused return value. */
+#define UNUSED_RETURN 0x08
/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
#define SET_FLAGS (1 << 20)
/* dst: reg
@@ -966,157 +1002,135 @@ static sljit_sw data_transfer_insts[16] = {
SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
#define SRC2_IMM (1 << 25)
-#define EMIT_DATA_PROCESS_INS_AND_RETURN(opcode) \
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)))
-
-#define EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(opcode, dst, src1, src2) \
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, src2))
-
#define EMIT_SHIFT_INS_AND_RETURN(opcode) \
SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \
if (compiler->shift_imm != 0x20) { \
SLJIT_ASSERT(src1 == TMP_REG1); \
SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \
+ \
if (compiler->shift_imm != 0) \
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | reg_map[src2])); \
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, reg_map[src2])); \
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, \
+ dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | RM(src2))); \
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, RM(src2))); \
} \
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | ((flags & ARGS_SWAPPED) ? reg_map[src2] : reg_map[src1])));
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, \
+ dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1)));
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
sljit_s32 dst, sljit_s32 src1, sljit_s32 src2)
{
- sljit_sw mul_inst;
-
switch (GET_OPCODE(op)) {
case SLJIT_MOV:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
if (dst != src2) {
if (src2 & SRC2_IMM) {
- if (flags & INV_IMM)
- EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
- EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0,
+ dst, SLJIT_UNUSED, src2));
}
- EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, reg_map[src2]);
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(src2)));
}
return SLJIT_SUCCESS;
case SLJIT_MOV_U8:
case SLJIT_MOV_S8:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
- if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
+ if (flags & MOVE_REG_CONV) {
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
if (op == SLJIT_MOV_U8)
return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff));
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2])));
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | reg_map[dst]));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | RM(src2))));
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst)));
#else
return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2));
#endif
}
else if (dst != src2) {
SLJIT_ASSERT(src2 & SRC2_IMM);
- if (flags & INV_IMM)
- EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
- EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0,
+ dst, SLJIT_UNUSED, src2));
}
return SLJIT_SUCCESS;
case SLJIT_MOV_U16:
case SLJIT_MOV_S16:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
- if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
+ if (flags & MOVE_REG_CONV) {
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2])));
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | reg_map[dst]));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | RM(src2))));
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst)));
#else
return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2));
#endif
}
else if (dst != src2) {
SLJIT_ASSERT(src2 & SRC2_IMM);
- if (flags & INV_IMM)
- EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
- EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0,
+ dst, SLJIT_UNUSED, src2));
}
return SLJIT_SUCCESS;
case SLJIT_NOT:
if (src2 & SRC2_IMM) {
- if (flags & INV_IMM)
- EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2);
- EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2);
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MOV_DP : MVN_DP, flags & SET_FLAGS,
+ dst, SLJIT_UNUSED, src2));
}
- EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, RM(src2));
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, RM(src2)));
case SLJIT_CLZ:
SLJIT_ASSERT(!(flags & INV_IMM));
SLJIT_ASSERT(!(src2 & SRC2_IMM));
FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
- if (flags & SET_FLAGS)
- EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(CMP_DP, SLJIT_UNUSED, dst, SRC2_IMM);
return SLJIT_SUCCESS;
case SLJIT_ADD:
SLJIT_ASSERT(!(flags & INV_IMM));
- EMIT_DATA_PROCESS_INS_AND_RETURN(ADD_DP);
+ if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED))
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMN_DP, SET_FLAGS,
+ SLJIT_UNUSED, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, flags & SET_FLAGS,
+ dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_ADDC:
SLJIT_ASSERT(!(flags & INV_IMM));
- EMIT_DATA_PROCESS_INS_AND_RETURN(ADC_DP);
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADC_DP, flags & SET_FLAGS,
+ dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_SUB:
SLJIT_ASSERT(!(flags & INV_IMM));
- if (!(flags & ARGS_SWAPPED))
- EMIT_DATA_PROCESS_INS_AND_RETURN(SUB_DP);
- EMIT_DATA_PROCESS_INS_AND_RETURN(RSB_DP);
+ if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED))
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS,
+ SLJIT_UNUSED, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & ARGS_SWAPPED) ? SUB_DP : RSB_DP, flags & SET_FLAGS,
+ dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_SUBC:
SLJIT_ASSERT(!(flags & INV_IMM));
- if (!(flags & ARGS_SWAPPED))
- EMIT_DATA_PROCESS_INS_AND_RETURN(SBC_DP);
- EMIT_DATA_PROCESS_INS_AND_RETURN(RSC_DP);
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & ARGS_SWAPPED) ? SBC_DP : RSC_DP, flags & SET_FLAGS,
+ dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_MUL:
SLJIT_ASSERT(!(flags & INV_IMM));
SLJIT_ASSERT(!(src2 & SRC2_IMM));
- if (SLJIT_UNLIKELY(op & SLJIT_SET_O))
- mul_inst = SMULL | (reg_map[TMP_REG3] << 16) | (reg_map[dst] << 12);
- else
- mul_inst = MUL | (reg_map[dst] << 16);
- if (dst != src2)
- FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src1] << 8) | reg_map[src2]));
- else if (dst != src1)
- FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[src1]));
- else {
- /* Rm and Rd must not be the same register. */
- SLJIT_ASSERT(dst != TMP_REG1);
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, reg_map[src2])));
- FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[TMP_REG1]));
- }
+ if (!HAS_FLAGS(op))
+ return push_inst(compiler, MUL | (reg_map[dst] << 16) | (reg_map[src2] << 8) | reg_map[src1]);
- if (!(op & SLJIT_SET_O))
- return SLJIT_SUCCESS;
+ FAIL_IF(push_inst(compiler, SMULL | (reg_map[TMP_REG1] << 16) | (reg_map[dst] << 12) | (reg_map[src2] << 8) | reg_map[src1]));
- /* We need to use TMP_REG3. */
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- /* cmp TMP_REG2, dst asr #31. */
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG3, RM(dst) | 0xfc0));
+ /* cmp TMP_REG1, dst asr #31. */
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG1, RM(dst) | 0xfc0));
case SLJIT_AND:
- if (!(flags & INV_IMM))
- EMIT_DATA_PROCESS_INS_AND_RETURN(AND_DP);
- EMIT_DATA_PROCESS_INS_AND_RETURN(BIC_DP);
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & INV_IMM) ? AND_DP : BIC_DP, flags & SET_FLAGS,
+ dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_OR:
SLJIT_ASSERT(!(flags & INV_IMM));
- EMIT_DATA_PROCESS_INS_AND_RETURN(ORR_DP);
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(ORR_DP, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_XOR:
SLJIT_ASSERT(!(flags & INV_IMM));
- EMIT_DATA_PROCESS_INS_AND_RETURN(EOR_DP);
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(EOR_DP, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
case SLJIT_SHL:
EMIT_SHIFT_INS_AND_RETURN(0);
@@ -1127,12 +1141,11 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_ASHR:
EMIT_SHIFT_INS_AND_RETURN(2);
}
- SLJIT_ASSERT_STOP();
+
+ SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
}
-#undef EMIT_DATA_PROCESS_INS_AND_RETURN
-#undef EMIT_FULL_DATA_PROCESS_INS_AND_RETURN
#undef EMIT_SHIFT_INS_AND_RETURN
/* Tests whether the immediate can be stored in the 12 bit imm field.
@@ -1312,291 +1325,116 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
/* Load integer. */
return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), imm);
#else
- return emit_imm(compiler, reg, imm);
+ FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
+ if (imm <= 0xffff)
+ return SLJIT_SUCCESS;
+ return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
#endif
}
-/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
-static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value)
+static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
+ sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
{
- if (value >= 0) {
- value = get_imm(value);
- if (value)
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, dst, reg, value));
- }
- else {
- value = get_imm(-value);
- if (value)
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, dst, reg, value));
- }
- return SLJIT_ERR_UNSUPPORTED;
-}
+ sljit_uw offset_reg, imm;
+ sljit_uw is_type1_transfer = IS_TYPE1_TRANSFER(flags);
-/* Can perform an operation using at most 1 instruction. */
-static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
-{
- sljit_uw imm;
+ SLJIT_ASSERT (arg & SLJIT_MEM);
+ SLJIT_ASSERT((arg & REG_MASK) != tmp_reg);
- if (arg & SLJIT_IMM) {
- imm = get_imm(argw);
- if (imm) {
- if (inp_flags & ARG_TEST)
- return 1;
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm)));
- return -1;
+ SLJIT_COMPILE_ASSERT(WRITE_BACK == 0x10, optimized_for_emit_data_transfer);
+
+ if ((arg & REG_MASK) == SLJIT_UNUSED) {
+ /* Write back is not used. */
+ if (is_type1_transfer) {
+ FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xfff));
+ argw &= 0xfff;
}
- imm = get_imm(~argw);
- if (imm) {
- if (inp_flags & ARG_TEST)
- return 1;
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm)));
- return -1;
+ else {
+ FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xff));
+ argw &= 0xff;
}
- return 0;
+
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, is_type1_transfer ? argw : TYPE2_TRANSFER_IMM(argw)));
}
- SLJIT_ASSERT(arg & SLJIT_MEM);
+ if (arg & OFFS_REG_MASK) {
+ offset_reg = OFFS_REG(arg);
+ arg &= REG_MASK;
+ argw &= 0x3;
- /* Fast loads/stores. */
- if (!(arg & REG_MASK))
- return 0;
+ if (argw != 0 && !is_type1_transfer) {
+ SLJIT_ASSERT(!(flags & WRITE_BACK));
- if (arg & OFFS_REG_MASK) {
- if ((argw & 0x3) != 0 && !IS_TYPE1_TRANSFER(inp_flags))
- return 0;
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_reg, arg, RM(offset_reg) | (argw << 7))));
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
+ }
- if (inp_flags & ARG_TEST)
- return 1;
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK,
- RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7))));
- return -1;
+ /* Bit 25: RM is offset. */
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg,
+ RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | (argw << 7)));
}
- if (IS_TYPE1_TRANSFER(inp_flags)) {
+ arg &= REG_MASK;
+
+ if (is_type1_transfer) {
+ if (argw > 0xfff) {
+ imm = get_imm(argw & ~0xfff);
+ if (imm) {
+ offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, offset_reg, arg, imm)));
+ argw = argw & 0xfff;
+ arg = offset_reg;
+ }
+ }
+ else if (argw < -0xfff) {
+ imm = get_imm(-argw & ~0xfff);
+ if (imm) {
+ offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, offset_reg, arg, imm)));
+ argw = -(-argw & 0xfff);
+ arg = offset_reg;
+ }
+ }
+
if (argw >= 0 && argw <= 0xfff) {
- if (inp_flags & ARG_TEST)
- return 1;
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw)));
- return -1;
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg & REG_MASK, argw));
}
if (argw < 0 && argw >= -0xfff) {
- if (inp_flags & ARG_TEST)
- return 1;
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw)));
- return -1;
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, flags & WRITE_BACK, reg, arg & REG_MASK, -argw));
}
}
else {
- if (argw >= 0 && argw <= 0xff) {
- if (inp_flags & ARG_TEST)
- return 1;
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))));
- return -1;
- }
- if (argw < 0 && argw >= -0xff) {
- if (inp_flags & ARG_TEST)
- return 1;
- argw = -argw;
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))));
- return -1;
- }
- }
-
- return 0;
-}
-
-/* See getput_arg below.
- Note: can_cache is called only for binary operators. Those
- operators always uses word arguments without write back. */
-static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
-{
- /* Immediate caching is not supported as it would be an operation on constant arguments. */
- if (arg & SLJIT_IMM)
- return 0;
-
- /* Always a simple operation. */
- if (arg & OFFS_REG_MASK)
- return 0;
-
- if (!(arg & REG_MASK)) {
- /* Immediate access. */
- if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff))
- return 1;
- return 0;
- }
-
- if (argw <= 0xfffff && argw >= -0xfffff)
- return 0;
-
- if (argw == next_argw && (next_arg & SLJIT_MEM))
- return 1;
-
- if (arg == next_arg && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff))
- return 1;
-
- return 0;
-}
-
-#define GETPUT_ARG_DATA_TRANSFER(add, wb, target, base, imm) \
- if (max_delta & 0xf00) \
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, imm))); \
- else \
- FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, TYPE2_TRANSFER_IMM(imm))));
-
-#define TEST_WRITE_BACK() \
- if (inp_flags & WRITE_BACK) { \
- tmp_r = arg & REG_MASK; \
- if (reg == tmp_r) { \
- /* This can only happen for stores */ \
- /* since ldr reg, [reg, ...]! has no meaning */ \
- SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg)))); \
- reg = TMP_REG3; \
- } \
- }
-
-/* Emit the necessary instructions. See can_cache above. */
-static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
-{
- sljit_s32 tmp_r;
- sljit_sw max_delta;
- sljit_sw sign;
- sljit_uw imm;
-
- if (arg & SLJIT_IMM) {
- SLJIT_ASSERT(inp_flags & LOAD_DATA);
- return load_immediate(compiler, reg, argw);
- }
-
- SLJIT_ASSERT(arg & SLJIT_MEM);
-
- tmp_r = (inp_flags & LOAD_DATA) ? reg : TMP_REG3;
- max_delta = IS_TYPE1_TRANSFER(inp_flags) ? 0xfff : 0xff;
-
- if ((arg & REG_MASK) == SLJIT_UNUSED) {
- /* Write back is not used. */
- imm = (sljit_uw)(argw - compiler->cache_argw);
- if ((compiler->cache_arg & SLJIT_IMM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
- if (imm <= (sljit_uw)max_delta) {
- sign = 1;
- argw = argw - compiler->cache_argw;
+ if (argw > 0xff) {
+ imm = get_imm(argw & ~0xff);
+ if (imm) {
+ offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, offset_reg, arg, imm)));
+ argw = argw & 0xff;
+ arg = offset_reg;
}
- else {
- sign = 0;
- argw = compiler->cache_argw - argw;
+ }
+ else if (argw < -0xff) {
+ imm = get_imm(-argw & ~0xff);
+ if (imm) {
+ offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, offset_reg, arg, imm)));
+ argw = -(-argw & 0xff);
+ arg = offset_reg;
}
-
- GETPUT_ARG_DATA_TRANSFER(sign, 0, reg, TMP_REG3, argw);
- return SLJIT_SUCCESS;
}
- /* With write back, we can create some sophisticated loads, but
- it is hard to decide whether we should convert downward (0s) or upward (1s). */
- imm = (sljit_uw)(argw - next_argw);
- if ((next_arg & SLJIT_MEM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
- SLJIT_ASSERT(inp_flags & LOAD_DATA);
-
- compiler->cache_arg = SLJIT_IMM;
- compiler->cache_argw = argw;
- tmp_r = TMP_REG3;
+ if (argw >= 0 && argw <= 0xff) {
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg, TYPE2_TRANSFER_IMM(argw)));
+ }
+ if (argw < 0 && argw >= -0xff) {
+ argw = -argw;
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, flags & WRITE_BACK, reg, arg, TYPE2_TRANSFER_IMM(argw)));
}
-
- FAIL_IF(load_immediate(compiler, tmp_r, argw));
- GETPUT_ARG_DATA_TRANSFER(1, 0, reg, tmp_r, 0);
- return SLJIT_SUCCESS;
- }
-
- if (arg & OFFS_REG_MASK) {
- SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00));
- if (inp_flags & WRITE_BACK)
- tmp_r = arg & REG_MASK;
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
- return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0)));
- }
-
- imm = (sljit_uw)(argw - compiler->cache_argw);
- if (compiler->cache_arg == arg && imm <= (sljit_uw)max_delta) {
- SLJIT_ASSERT(!(inp_flags & WRITE_BACK));
- GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, imm);
- return SLJIT_SUCCESS;
- }
- if (compiler->cache_arg == arg && imm >= (sljit_uw)-max_delta) {
- SLJIT_ASSERT(!(inp_flags & WRITE_BACK));
- imm = (sljit_uw)-(sljit_sw)imm;
- GETPUT_ARG_DATA_TRANSFER(0, 0, reg, TMP_REG3, imm);
- return SLJIT_SUCCESS;
- }
-
- imm = get_imm(argw & ~max_delta);
- if (imm) {
- TEST_WRITE_BACK();
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm)));
- GETPUT_ARG_DATA_TRANSFER(1, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
- return SLJIT_SUCCESS;
- }
-
- imm = get_imm(-argw & ~max_delta);
- if (imm) {
- argw = -argw;
- TEST_WRITE_BACK();
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm)));
- GETPUT_ARG_DATA_TRANSFER(0, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
- return SLJIT_SUCCESS;
- }
-
- if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) {
- TEST_WRITE_BACK();
- return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
- }
-
- if (argw == next_argw && (next_arg & SLJIT_MEM)) {
- SLJIT_ASSERT(inp_flags & LOAD_DATA);
- FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
-
- compiler->cache_arg = SLJIT_IMM;
- compiler->cache_argw = argw;
-
- TEST_WRITE_BACK();
- return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
- }
-
- imm = (sljit_uw)(argw - next_argw);
- if (arg == next_arg && !(inp_flags & WRITE_BACK) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
- SLJIT_ASSERT(inp_flags & LOAD_DATA);
- FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK])));
-
- compiler->cache_arg = arg;
- compiler->cache_argw = argw;
-
- GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, 0);
- return SLJIT_SUCCESS;
- }
-
- if ((arg & REG_MASK) == tmp_r) {
- compiler->cache_arg = SLJIT_IMM;
- compiler->cache_argw = argw;
- tmp_r = TMP_REG3;
}
- FAIL_IF(load_immediate(compiler, tmp_r, argw));
- return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0)));
-}
-
-static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
-{
- if (getput_arg_fast(compiler, flags, reg, arg, argw))
- return compiler->error;
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
-}
-
-static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
-{
- if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
- return compiler->error;
- return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+ FAIL_IF(load_immediate(compiler, tmp_reg, argw));
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg,
+ RM(tmp_reg) | (is_type1_transfer ? (1 << 25) : 0)));
}
static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
@@ -1604,68 +1442,66 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
- /* arg1 goes to TMP_REG1 or src reg
- arg2 goes to TMP_REG2, imm or src reg
- TMP_REG3 can be used for caching
- result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
+ /* src1 is reg or TMP_REG1
+ src2 is reg, TMP_REG2, or imm
+ result goes to TMP_REG2, so put result can use TMP_REG1. */
/* We prefers register and simple consts. */
- sljit_s32 dst_r;
- sljit_s32 src1_r;
- sljit_s32 src2_r = 0;
- sljit_s32 sugg_src2_r = TMP_REG2;
- sljit_s32 flags = GET_FLAGS(op) ? SET_FLAGS : 0;
-
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
+ sljit_s32 dst_reg;
+ sljit_s32 src1_reg;
+ sljit_s32 src2_reg;
+ sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
/* Destination check. */
- if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
- return SLJIT_SUCCESS;
- dst_r = TMP_REG2;
- }
- else if (FAST_IS_REG(dst)) {
- dst_r = dst;
- flags |= REG_DEST;
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
- sugg_src2_r = dst_r;
- }
- else {
- SLJIT_ASSERT(dst & SLJIT_MEM);
- if (getput_arg_fast(compiler, inp_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
- flags |= FAST_DEST;
- dst_r = TMP_REG2;
- }
- else {
- flags |= SLOW_DEST;
- dst_r = 0;
+ if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED))
+ flags |= UNUSED_RETURN;
+
+ SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM));
+
+ src2_reg = 0;
+
+ do {
+ if (!(inp_flags & ALLOW_IMM))
+ break;
+
+ if (src2 & SLJIT_IMM) {
+ src2_reg = get_imm(src2w);
+ if (src2_reg)
+ break;
+ if (inp_flags & ALLOW_INV_IMM) {
+ src2_reg = get_imm(~src2w);
+ if (src2_reg) {
+ flags |= INV_IMM;
+ break;
+ }
+ }
+ if (GET_OPCODE(op) == SLJIT_ADD) {
+ src2_reg = get_imm(-src2w);
+ if (src2_reg) {
+ op = SLJIT_SUB | GET_ALL_FLAGS(op);
+ break;
+ }
+ }
+ if (GET_OPCODE(op) == SLJIT_SUB) {
+ src2_reg = get_imm(-src2w);
+ if (src2_reg) {
+ op = SLJIT_ADD | GET_ALL_FLAGS(op);
+ break;
+ }
+ }
}
- }
- /* Source 1. */
- if (FAST_IS_REG(src1))
- src1_r = src1;
- else if (FAST_IS_REG(src2)) {
- flags |= ARGS_SWAPPED;
- src1_r = src2;
- src2 = src1;
- src2w = src1w;
- }
- else do { /* do { } while(0) is used because of breaks. */
- src1_r = 0;
- if ((inp_flags & ALLOW_ANY_IMM) && (src1 & SLJIT_IMM)) {
- /* The second check will generate a hit. */
- src2_r = get_imm(src1w);
- if (src2_r) {
+ if (src1 & SLJIT_IMM) {
+ src2_reg = get_imm(src1w);
+ if (src2_reg) {
flags |= ARGS_SWAPPED;
src1 = src2;
src1w = src2w;
break;
}
if (inp_flags & ALLOW_INV_IMM) {
- src2_r = get_imm(~src1w);
- if (src2_r) {
+ src2_reg = get_imm(~src1w);
+ if (src2_reg) {
flags |= ARGS_SWAPPED | INV_IMM;
src1 = src2;
src1w = src2w;
@@ -1673,9 +1509,9 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
}
}
if (GET_OPCODE(op) == SLJIT_ADD) {
- src2_r = get_imm(-src1w);
- if (src2_r) {
- /* Note: ARGS_SWAPPED is intentionally not applied! */
+ src2_reg = get_imm(-src1w);
+ if (src2_reg) {
+ /* Note: add is commutative operation. */
src1 = src2;
src1w = src2w;
op = SLJIT_SUB | GET_ALL_FLAGS(op);
@@ -1683,110 +1519,54 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
}
}
}
+ } while(0);
- if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
- FAIL_IF(compiler->error);
- src1_r = TMP_REG1;
- }
- } while (0);
+ /* Source 1. */
+ if (FAST_IS_REG(src1))
+ src1_reg = src1;
+ else if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
+ src1_reg = TMP_REG1;
+ }
+ else {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
+ src1_reg = TMP_REG1;
+ }
- /* Source 2. */
- if (src2_r == 0) {
- if (FAST_IS_REG(src2)) {
- src2_r = src2;
- flags |= REG_SOURCE;
- if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
- dst_r = src2_r;
- }
- else do { /* do { } while(0) is used because of breaks. */
- if ((inp_flags & ALLOW_ANY_IMM) && (src2 & SLJIT_IMM)) {
- src2_r = get_imm(src2w);
- if (src2_r)
- break;
- if (inp_flags & ALLOW_INV_IMM) {
- src2_r = get_imm(~src2w);
- if (src2_r) {
- flags |= INV_IMM;
- break;
- }
- }
- if (GET_OPCODE(op) == SLJIT_ADD) {
- src2_r = get_imm(-src2w);
- if (src2_r) {
- op = SLJIT_SUB | GET_ALL_FLAGS(op);
- flags &= ~ARGS_SWAPPED;
- break;
- }
- }
- if (GET_OPCODE(op) == SLJIT_SUB && !(flags & ARGS_SWAPPED)) {
- src2_r = get_imm(-src2w);
- if (src2_r) {
- op = SLJIT_ADD | GET_ALL_FLAGS(op);
- flags &= ~ARGS_SWAPPED;
- break;
- }
- }
- }
+ /* Destination. */
+ dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
- /* src2_r is 0. */
- if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
- FAIL_IF(compiler->error);
- src2_r = sugg_src2_r;
- }
- } while (0);
- }
+ if (op <= SLJIT_MOVU_P) {
+ if (dst & SLJIT_MEM) {
+ if (inp_flags & BYTE_DATA)
+ inp_flags &= ~SIGNED_DATA;
- /* src1_r, src2_r and dst_r can be zero (=unprocessed) or non-zero.
- If they are zero, they must not be registers. */
- if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
- if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
- SLJIT_ASSERT(!(flags & ARGS_SWAPPED));
- flags |= ARGS_SWAPPED;
- FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src2, src2w, src1, src1w));
- FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src1, src1w, dst, dstw));
+ if (FAST_IS_REG(src2))
+ return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG2);
}
- else {
- FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
- FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
- }
- src1_r = TMP_REG1;
- src2_r = TMP_REG2;
- }
- else if (src1_r == 0 && src2_r == 0) {
- FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
- src1_r = TMP_REG1;
- }
- else if (src1_r == 0 && dst_r == 0) {
- FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
- src1_r = TMP_REG1;
- }
- else if (src2_r == 0 && dst_r == 0) {
- FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
- src2_r = sugg_src2_r;
+
+ if (FAST_IS_REG(src2) && dst_reg != TMP_REG2)
+ flags |= MOVE_REG_CONV;
}
- if (dst_r == 0)
- dst_r = TMP_REG2;
+ /* Source 2. */
+ if (src2_reg == 0) {
+ src2_reg = (op <= SLJIT_MOVU_P) ? dst_reg : TMP_REG2;
- if (src1_r == 0) {
- FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
- src1_r = TMP_REG1;
+ if (FAST_IS_REG(src2))
+ src2_reg = src2;
+ else if (src2 & SLJIT_MEM)
+ FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2));
+ else
+ FAIL_IF(load_immediate(compiler, src2_reg, src2w));
}
- if (src2_r == 0) {
- FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
- src2_r = sugg_src2_r;
- }
+ FAIL_IF(emit_single_op(compiler, op, flags, dst_reg, src1_reg, src2_reg));
- FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
+ if (!(dst & SLJIT_MEM))
+ return SLJIT_SUCCESS;
- if (flags & (FAST_DEST | SLOW_DEST)) {
- if (flags & FAST_DEST)
- FAIL_IF(getput_arg_fast(compiler, inp_flags, dst_r, dst, dstw));
- else
- FAIL_IF(getput_arg(compiler, inp_flags, dst_r, dst, dstw, 0, 0));
- }
- return SLJIT_SUCCESS;
+ return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1);
}
#ifdef __cplusplus
@@ -1806,6 +1586,9 @@ extern int __aeabi_idivmod(int numerator, int denominator);
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
{
+ sljit_sw saved_reg_list[3];
+ sljit_sw saved_reg_count;
+
CHECK_ERROR();
CHECK(check_sljit_emit_op0(compiler, op));
@@ -1819,33 +1602,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
break;
case SLJIT_LMUL_UW:
case SLJIT_LMUL_SW:
-#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
| (reg_map[SLJIT_R1] << 16)
| (reg_map[SLJIT_R0] << 12)
| (reg_map[SLJIT_R0] << 8)
| reg_map[SLJIT_R1]);
-#else
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_R1))));
- return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
- | (reg_map[SLJIT_R1] << 16)
- | (reg_map[SLJIT_R0] << 12)
- | (reg_map[SLJIT_R0] << 8)
- | reg_map[TMP_REG1]);
-#endif
case SLJIT_DIVMOD_UW:
case SLJIT_DIVMOD_SW:
case SLJIT_DIV_UW:
case SLJIT_DIV_SW:
SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
- SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2, bad_register_mapping);
-
- if ((op >= SLJIT_DIV_UW) && (compiler->scratches >= 3)) {
- FAIL_IF(push_inst(compiler, 0xe52d2008 /* str r2, [sp, #-8]! */));
- FAIL_IF(push_inst(compiler, 0xe58d1004 /* str r1, [sp, #4] */));
+ SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
+
+ saved_reg_count = 0;
+ if (compiler->scratches >= 4)
+ saved_reg_list[saved_reg_count++] = 3;
+ if (compiler->scratches >= 3)
+ saved_reg_list[saved_reg_count++] = 2;
+ if (op >= SLJIT_DIV_UW)
+ saved_reg_list[saved_reg_count++] = 1;
+
+ if (saved_reg_count > 0) {
+ FAIL_IF(push_inst(compiler, 0xe52d0000 | (saved_reg_count >= 3 ? 16 : 8)
+ | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
+ if (saved_reg_count >= 2) {
+ SLJIT_ASSERT(saved_reg_list[1] < 8);
+ FAIL_IF(push_inst(compiler, 0xe58d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */));
+ }
+ if (saved_reg_count >= 3) {
+ SLJIT_ASSERT(saved_reg_list[2] < 8);
+ FAIL_IF(push_inst(compiler, 0xe58d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */));
+ }
}
- else if ((op >= SLJIT_DIV_UW) || (compiler->scratches >= 3))
- FAIL_IF(push_inst(compiler, 0xe52d0008 | (op >= SLJIT_DIV_UW ? 0x1000 : 0x2000) /* str r1/r2, [sp, #-8]! */));
#if defined(__GNUC__)
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
@@ -1854,12 +1642,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
#error "Software divmod functions are needed"
#endif
- if ((op >= SLJIT_DIV_UW) && (compiler->scratches >= 3)) {
- FAIL_IF(push_inst(compiler, 0xe59d1004 /* ldr r1, [sp, #4] */));
- FAIL_IF(push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */));
+ if (saved_reg_count > 0) {
+ if (saved_reg_count >= 3) {
+ SLJIT_ASSERT(saved_reg_list[2] < 8);
+ FAIL_IF(push_inst(compiler, 0xe59d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */));
+ }
+ if (saved_reg_count >= 2) {
+ SLJIT_ASSERT(saved_reg_list[1] < 8);
+ FAIL_IF(push_inst(compiler, 0xe59d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */));
+ }
+ return push_inst(compiler, 0xe49d0000 | (saved_reg_count >= 3 ? 16 : 8)
+ | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
}
- else if ((op >= SLJIT_DIV_UW) || (compiler->scratches >= 3))
- return push_inst(compiler, 0xe49d0008 | (op >= SLJIT_DIV_UW ? 0x1000 : 0x2000) /* ldr r1/r2, [sp], #8 */);
return SLJIT_SUCCESS;
}
@@ -1875,6 +1669,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+ if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
+ return emit_op_mem(compiler, PRELOAD_DATA | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
+#endif
+ return SLJIT_SUCCESS;
+ }
+
switch (GET_OPCODE(op)) {
case SLJIT_MOV:
case SLJIT_MOV_U32:
@@ -1940,6 +1742,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
+
switch (GET_OPCODE(op)) {
case SLJIT_ADD:
case SLJIT_ADDC:
@@ -1996,43 +1801,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
/* Floating point operators */
/* --------------------------------------------------------------------- */
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
-
-/* 0 - no fpu
- 1 - vfp */
-static sljit_s32 arm_fpu_type = -1;
-
-static void init_compiler(void)
-{
- if (arm_fpu_type != -1)
- return;
-
- /* TODO: Only the OS can help to determine the correct fpu type. */
- arm_fpu_type = 1;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
-#else
- if (arm_fpu_type == -1)
- init_compiler();
- return arm_fpu_type;
-#endif
-}
-
-#else
-
-#define arm_fpu_type 1
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
- /* Always available. */
- return 1;
-}
-
-#endif
#define FPU_LOAD (1 << 20)
#define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
@@ -2042,72 +1810,54 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
{
- sljit_sw tmp;
sljit_uw imm;
sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD));
+
SLJIT_ASSERT(arg & SLJIT_MEM);
+ arg &= ~SLJIT_MEM;
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
- arg = SLJIT_MEM | TMP_REG1;
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
+ arg = TMP_REG2;
argw = 0;
}
/* Fast loads and stores. */
- if ((arg & REG_MASK)) {
+ if (arg) {
if (!(argw & ~0x3fc))
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2));
if (!(-argw & ~0x3fc))
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2));
- }
- if (compiler->cache_arg == arg) {
- tmp = argw - compiler->cache_argw;
- if (!(tmp & ~0x3fc))
- return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, tmp >> 2));
- if (!(-tmp & ~0x3fc))
- return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG3, reg, -tmp >> 2));
- if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) {
- FAIL_IF(compiler->error);
- compiler->cache_argw = argw;
- return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0));
- }
- }
-
- if (arg & REG_MASK) {
- if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
- FAIL_IF(compiler->error);
- return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0));
- }
imm = get_imm(argw & ~0x3fc);
if (imm) {
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm)));
- return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, imm)));
+ return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2));
}
imm = get_imm(-argw & ~0x3fc);
if (imm) {
argw = -argw;
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm)));
- return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG2, arg & REG_MASK, imm)));
+ return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2));
}
}
- compiler->cache_arg = arg;
- compiler->cache_argw = argw;
- if (arg & REG_MASK) {
- FAIL_IF(load_immediate(compiler, TMP_REG1, argw));
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1])));
+ if (arg) {
+ FAIL_IF(load_immediate(compiler, TMP_REG2, argw));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, RM(TMP_REG2))));
}
else
- FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+ FAIL_IF(load_immediate(compiler, TMP_REG2, argw));
- return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0));
+ return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0));
}
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
+ op ^= SLJIT_F32_OP;
+
if (src & SLJIT_MEM) {
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw));
src = TMP_FREG1;
@@ -2115,9 +1865,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_F32_OP, TMP_FREG1, src, 0)));
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16));
@@ -2131,6 +1878,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
{
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+ op ^= SLJIT_F32_OP;
+
if (FAST_IS_REG(src))
FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16)));
else if (src & SLJIT_MEM) {
@@ -2153,6 +1902,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
+ op ^= SLJIT_F32_OP;
+
if (src1 & SLJIT_MEM) {
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
src1 = TMP_FREG1;
@@ -2174,16 +1925,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
sljit_s32 dst_r;
CHECK_ERROR();
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
- op ^= SLJIT_F32_OP;
SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error);
SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+ if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
+ op ^= SLJIT_F32_OP;
+
if (src & SLJIT_MEM) {
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw));
src = dst_r;
@@ -2228,8 +1978,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
op ^= SLJIT_F32_OP;
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
@@ -2282,21 +2030,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
- /* For UNUSED dst. Uncommon, but possible. */
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
+ SLJIT_ASSERT(reg_map[TMP_REG1] == 14);
if (FAST_IS_REG(dst))
- return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG3)));
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG1)));
/* Memory. */
- if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw))
- return compiler->error;
- /* TMP_REG3 is used for caching. */
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3))));
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0);
+ return emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
@@ -2305,21 +2045,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
ADJUST_LOCAL_OFFSET(src, srcw);
+ SLJIT_ASSERT(reg_map[TMP_REG1] == 14);
+
if (FAST_IS_REG(src))
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src))));
- else if (src & SLJIT_MEM) {
- if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw))
- FAIL_IF(compiler->error);
- else {
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0));
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2))));
- }
- }
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, 0, RM(src))));
+ else if (src & SLJIT_MEM)
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG2));
else if (src & SLJIT_IMM)
- FAIL_IF(load_immediate(compiler, TMP_REG3, srcw));
- return push_inst(compiler, BLX | RM(TMP_REG3));
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+
+ return push_inst(compiler, BX | RM(TMP_REG1));
}
/* --------------------------------------------------------------------- */
@@ -2414,7 +2149,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
if (type >= SLJIT_FAST_CALL)
PTR_FAIL_IF(prepare_blx(compiler));
PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0,
- type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0));
+ type <= SLJIT_JUMP ? TMP_PC : TMP_REG2, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0));
if (jump->flags & SLJIT_REWRITABLE_JUMP) {
jump->addr = compiler->size;
@@ -2431,8 +2166,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
#else
if (type >= SLJIT_FAST_CALL)
jump->flags |= IS_BL;
- PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
- PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type)));
+ PTR_FAIL_IF(emit_imm(compiler, TMP_REG2, 0));
+ PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)) & ~COND_MASK) | get_cc(type)));
jump->addr = compiler->size;
#endif
return jump;
@@ -2452,7 +2187,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
SLJIT_ASSERT(src & SLJIT_MEM);
- FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw));
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG2));
return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2));
}
@@ -2464,12 +2199,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
if (type >= SLJIT_FAST_CALL)
FAIL_IF(prepare_blx(compiler));
- FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
+ FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG2, TMP_PC, 0), 0));
if (type >= SLJIT_FAST_CALL)
FAIL_IF(emit_blx(compiler));
#else
- FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
- FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
+ FAIL_IF(emit_imm(compiler, TMP_REG2, 0));
+ FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)));
#endif
jump->addr = compiler->size;
return SLJIT_SUCCESS;
@@ -2477,55 +2212,80 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
sljit_s32 dst_r, flags = GET_ALL_FLAGS(op);
sljit_uw cc, ins;
CHECK_ERROR();
- CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+ CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
- ADJUST_LOCAL_OFFSET(src, srcw);
-
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
op = GET_OPCODE(op);
cc = get_cc(type & 0xff);
- dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (op < SLJIT_ADD) {
FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0)));
FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc));
- return (dst_r == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2);
+ return SLJIT_SUCCESS;
}
ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP));
- if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
- FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc));
- /* The condition must always be set, even if the ORR/EOR is not executed above. */
- return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS;
- }
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
- src = TMP_REG1;
- srcw = 0;
- } else if (src & SLJIT_IMM) {
+ if (dst & SLJIT_MEM)
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
+
+ FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, dst_r, SRC2_IMM | 1) & ~COND_MASK) | cc));
+
+ if (op == SLJIT_AND)
+ FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, dst_r, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
+
+ if (dst & SLJIT_MEM)
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
+
+ if (flags & SLJIT_SET_Z)
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG2, SLJIT_UNUSED, RM(dst_r)));
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_reg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_uw cc, tmp;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+
+ dst_reg &= ~SLJIT_I32_OP;
+
+ cc = get_cc(type & 0xff);
+
+ if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
+ tmp = get_imm(srcw);
+ if (tmp)
+ return push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, tmp) & ~COND_MASK) | cc);
+
+ tmp = get_imm(~srcw);
+ if (tmp)
+ return push_inst(compiler, (EMIT_DATA_PROCESS_INS(MVN_DP, 0, dst_reg, SLJIT_UNUSED, tmp) & ~COND_MASK) | cc);
+
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+ tmp = (sljit_uw) srcw;
+ FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff)));
+ if (tmp <= 0xffff)
+ return SLJIT_SUCCESS;
+ return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff));
+#else
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
src = TMP_REG1;
- srcw = 0;
+#endif
}
- FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc));
- FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
- if (dst_r == TMP_REG2)
- FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0));
-
- return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst_r))) : SLJIT_SUCCESS;
+ return push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, RM(src)) & ~COND_MASK) | cc);
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
@@ -2551,16 +2311,16 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
set_const(const_, compiler);
if (dst & SLJIT_MEM)
- PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
+ PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1));
return const_;
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
- inline_set_jump_addr(addr, new_addr, 1);
+ inline_set_jump_addr(addr, executable_offset, new_target, 1);
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
- inline_set_const(addr, new_constant, 1);
+ inline_set_const(addr, executable_offset, new_constant, 1);
}
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c
index d9958512c8..fd67f50253 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -76,6 +76,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
#define BRK 0xd4200000
#define CBZ 0xb4000000
#define CLZ 0xdac01000
+#define CSEL 0x9a800000
#define CSINC 0x9a800400
#define EOR 0xca000000
#define EORI 0xd2000000
@@ -151,7 +152,7 @@ static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm)
inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21);
}
-static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
{
sljit_sw diff;
sljit_uw target_addr;
@@ -165,9 +166,10 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in
target_addr = jump->u.target;
else {
SLJIT_ASSERT(jump->flags & JUMP_LABEL);
- target_addr = (sljit_uw)(code + jump->u.label->size);
+ target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
}
- diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4);
+
+ diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset;
if (jump->flags & IS_COND) {
diff += sizeof(sljit_ins);
@@ -211,6 +213,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
sljit_ins *buf_ptr;
sljit_ins *buf_end;
sljit_uw word_count;
+ sljit_sw executable_offset;
sljit_uw addr;
sljit_s32 dst;
@@ -228,6 +231,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
code_ptr = code;
word_count = 0;
+ executable_offset = SLJIT_EXEC_OFFSET(code);
+
label = compiler->labels;
jump = compiler->jumps;
const_ = compiler->consts;
@@ -242,13 +247,13 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_ASSERT(!jump || jump->addr >= word_count);
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
if (label && label->size == word_count) {
- label->addr = (sljit_uw)code_ptr;
+ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
if (jump && jump->addr == word_count) {
jump->addr = (sljit_uw)(code_ptr - 4);
- code_ptr -= detect_jump_type(jump, code_ptr, code);
+ code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
jump = jump->next;
}
if (const_ && const_->addr == word_count) {
@@ -263,7 +268,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
} while (buf);
if (label && label->size == word_count) {
- label->addr = (sljit_uw)code_ptr;
+ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
@@ -277,9 +282,10 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
while (jump) {
do {
addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
- buf_ptr = (sljit_ins*)jump->addr;
+ buf_ptr = (sljit_ins *)jump->addr;
+
if (jump->flags & PATCH_B) {
- addr = (sljit_sw)(addr - jump->addr) >> 2;
+ addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000);
buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff);
if (jump->flags & IS_COND)
@@ -287,7 +293,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
break;
}
if (jump->flags & PATCH_COND) {
- addr = (sljit_sw)(addr - jump->addr) >> 2;
+ addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000);
buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5);
break;
@@ -308,11 +314,37 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
}
compiler->error = SLJIT_ERR_COMPILED;
+ compiler->executable_offset = executable_offset;
compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
+
+ code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
+ code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+
SLJIT_CACHE_FLUSH(code, code_ptr);
return code;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+ switch (feature_type) {
+ case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+ return SLJIT_IS_FPU_AVAILABLE;
+#else
+ /* Available by default. */
+ return 1;
+#endif
+
+ case SLJIT_HAS_PRE_UPDATE:
+ case SLJIT_HAS_CLZ:
+ case SLJIT_HAS_CMOV:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
/* --------------------------------------------------------------------- */
/* Core code generator functions. */
/* --------------------------------------------------------------------- */
@@ -365,7 +397,7 @@ static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len)
uimm = (sljit_uw)imm;
while (1) {
if (len <= 0) {
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return 0;
}
mask = ((sljit_uw)1 << len) - 1;
@@ -635,7 +667,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
}
goto set_flags;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
break;
}
@@ -702,7 +734,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
case SLJIT_NOT:
SLJIT_ASSERT(arg1 == TMP_REG1);
FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)));
- goto set_flags;
+ break; /* Set flags. */
case SLJIT_NEG:
SLJIT_ASSERT(arg1 == TMP_REG1);
if (flags & SET_FLAGS)
@@ -710,8 +742,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
case SLJIT_CLZ:
SLJIT_ASSERT(arg1 == TMP_REG1);
- FAIL_IF(push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)));
- goto set_flags;
+ return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2));
case SLJIT_ADD:
CHECK_FLAGS(1 << 29);
return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
@@ -740,24 +771,24 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
case SLJIT_OR:
FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
- goto set_flags;
+ break; /* Set flags. */
case SLJIT_XOR:
FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
- goto set_flags;
+ break; /* Set flags. */
case SLJIT_SHL:
FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
- goto set_flags;
+ break; /* Set flags. */
case SLJIT_LSHR:
FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
- goto set_flags;
+ break; /* Set flags. */
case SLJIT_ASHR:
FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
- goto set_flags;
+ break; /* Set flags. */
+ default:
+ SLJIT_UNREACHABLE();
+ return SLJIT_SUCCESS;
}
- SLJIT_ASSERT_STOP();
- return SLJIT_SUCCESS;
-
set_flags:
if (flags & SET_FLAGS)
return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO));
@@ -859,6 +890,10 @@ static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flag
}
arg &= REG_MASK;
+
+ if (arg == SLJIT_UNUSED)
+ return 0;
+
if (argw >= 0 && (argw >> shift) <= 0xfff && (argw & ((1 << shift) - 1)) == 0) {
if (SLJIT_UNLIKELY(flags & ARG_TEST))
return 1;
@@ -919,21 +954,23 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
next_argw = 0;
}
- tmp_r = (flags & STORE) ? TMP_REG3 : reg;
+ tmp_r = ((flags & STORE) || (flags == (WORD_SIZE | SIGNED))) ? TMP_REG3 : reg;
if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) {
/* Update only applies if a base register exists. */
other_r = OFFS_REG(arg);
if (!other_r) {
other_r = arg & REG_MASK;
- if (other_r != reg && argw >= 0 && argw <= 0xffffff) {
+ SLJIT_ASSERT(other_r != reg);
+
+ if (argw >= 0 && argw <= 0xffffff) {
if ((argw & 0xfff) != 0)
FAIL_IF(push_inst(compiler, ADDI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
if (argw >> 12)
FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10)));
return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r));
}
- else if (other_r != reg && argw < 0 && argw >= -0xffffff) {
+ else if (argw < 0 && argw >= -0xffffff) {
argw = -argw;
if ((argw & 0xfff) != 0)
FAIL_IF(push_inst(compiler, SUBI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
@@ -966,18 +1003,8 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
/* No caching here. */
arg &= REG_MASK;
- argw &= 0x3;
- if (!argw || argw == shift) {
- FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r) | (argw ? (1 << 12) : 0)));
- return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10));
- }
- if (arg != reg) {
- FAIL_IF(push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10)));
- return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg));
- }
- FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(arg) | RM(other_r) | (argw << 10)));
- FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_LR)));
- return push_inst(compiler, ORR | RD(arg) | RN(TMP_ZERO) | RM(TMP_LR));
+ FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r)));
+ return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r));
}
if (arg & OFFS_REG_MASK) {
@@ -998,16 +1025,16 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
}
}
- if (argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) {
- FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg & REG_MASK) | ((argw >> 12) << 10)));
- return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
- | RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift)));
- }
-
diff = argw - next_argw;
next_arg = (arg & REG_MASK) && (arg == next_arg) && diff <= 0xfff && diff >= -0xfff && diff != 0;
arg &= REG_MASK;
+ if (arg != SLJIT_UNUSED && argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) {
+ FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg) | ((argw >> 12) << 10)));
+ return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
+ | RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift)));
+ }
+
if (arg && compiler->cache_arg == SLJIT_MEM) {
if (compiler->cache_argw == argw)
return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
@@ -1290,6 +1317,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
compiler->cache_arg = 0;
compiler->cache_argw = 0;
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
+ if (op <= SLJIT_MOV_P && (src & SLJIT_MEM)) {
+ SLJIT_ASSERT(reg_map[1] == 0 && reg_map[3] == 2 && reg_map[5] == 4);
+
+ if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
+ dst = 5;
+ else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
+ dst = 3;
+ else
+ dst = 1;
+
+ /* Signed word sized load is the prefetch instruction. */
+ return emit_op_mem(compiler, WORD_SIZE | SIGNED, dst, src, srcw);
+ }
+ return SLJIT_SUCCESS;
+ }
+
dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
op = GET_OPCODE(op);
@@ -1364,7 +1408,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
srcw = (sljit_s32)srcw;
break;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
flags = 0;
break;
}
@@ -1391,7 +1435,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
- flags = GET_FLAGS(op_flags) ? SET_FLAGS : 0;
+ flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;
mem_flags = WORD_SIZE;
if (op_flags & SLJIT_I32_OP) {
flags |= INT_OP;
@@ -1443,8 +1487,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
compiler->cache_arg = 0;
compiler->cache_argw = 0;
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
+
dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
- flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+ flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
mem_flags = WORD_SIZE;
if (op & SLJIT_I32_OP) {
flags |= INT_OP;
@@ -1537,16 +1584,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
/* Floating point operators */
/* --------------------------------------------------------------------- */
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
-#else
- /* Available by default. */
- return 1;
-#endif
-}
-
static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
{
sljit_u32 shift = MEM_SIZE_SHIFT(flags);
@@ -1604,7 +1641,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
- sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+ sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64)
@@ -1617,7 +1654,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src)));
- if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
+ if (dst & SLJIT_MEM)
return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw);
return SLJIT_SUCCESS;
}
@@ -1775,10 +1812,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
- /* For UNUSED dst. Uncommon, but possible. */
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR));
@@ -1856,7 +1889,7 @@ static sljit_uw get_cc(sljit_s32 type)
return 0x6;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return 0xe;
}
}
@@ -1966,19 +1999,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
- sljit_s32 dst_r, flags, mem_flags;
+ sljit_s32 dst_r, src_r, flags, mem_flags;
sljit_ins cc;
CHECK_ERROR();
- CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+ CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
- ADJUST_LOCAL_OFFSET(src, srcw);
-
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
cc = get_cc(type & 0xff);
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
@@ -1992,26 +2020,50 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+ flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
mem_flags = WORD_SIZE;
if (op & SLJIT_I32_OP) {
flags |= INT_OP;
mem_flags = INT_SIZE;
}
- if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, src, srcw, dst, dstw));
+ src_r = dst;
+
+ if (dst & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, dst, dstw, dst, dstw));
+ src_r = TMP_REG1;
+ }
+
+ FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO)));
+ emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src_r, TMP_REG2);
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_reg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_ins inv_bits = (dst_reg & SLJIT_I32_OP) ? (1 << 31) : 0;
+ sljit_ins cc;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+
+ if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
+ if (dst_reg & SLJIT_I32_OP)
+ srcw = (sljit_s32)srcw;
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
src = TMP_REG1;
srcw = 0;
- } else if (src & SLJIT_IMM)
- flags |= ARG1_IMM;
+ }
- FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO)));
- emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src, TMP_REG2);
+ cc = get_cc(type & 0xff);
+ dst_reg &= ~SLJIT_I32_OP;
- if (dst_r != TMP_REG1)
- return SLJIT_SUCCESS;
- return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
+ return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src));
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
@@ -2027,7 +2079,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(!const_);
set_const(const_, compiler);
- dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value));
if (dst & SLJIT_MEM)
@@ -2035,16 +2087,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
return const_;
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_ins* inst = (sljit_ins*)addr;
- modify_imm64_const(inst, new_addr);
+ modify_imm64_const(inst, new_target);
+ inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 4);
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
sljit_ins* inst = (sljit_ins*)addr;
modify_imm64_const(inst, new_constant);
+ inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 4);
}
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c
index 1ed44a8130..29e5566a82 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -35,15 +35,14 @@ typedef sljit_u32 sljit_ins;
/* Last register + 1. */
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
-#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
-#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5)
+#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
#define TMP_FREG1 (0)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
-static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
- 0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 13, 3, 4, 14, 15
+static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+ 0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 14, 15
};
#define COPY_BITS(src, from, to, bits) \
@@ -108,7 +107,11 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
#define BLX 0x4780
#define BX 0x4700
#define CLZ 0xfab0f080
+#define CMNI_W 0xf1100f00
+#define CMP 0x4280
#define CMPI 0x2800
+#define CMPI_W 0xf1b00f00
+#define CMP_X 0x4500
#define CMP_W 0xebb00f00
#define EORI 0xf0800000
#define EORS 0x4040
@@ -221,7 +224,7 @@ static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm)
inst[3] = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16);
}
-static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code)
+static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset)
{
sljit_sw diff;
@@ -232,7 +235,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u1
/* Branch to ARM code is not optimized yet. */
if (!(jump->u.target & 0x1))
return 0;
- diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2)) >> 1;
+ diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset) >> 1;
}
else {
SLJIT_ASSERT(jump->flags & JUMP_LABEL);
@@ -276,7 +279,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u1
return 0;
}
-static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump)
+static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw executable_offset)
{
sljit_s32 type = (jump->flags >> 4) & 0xf;
sljit_sw diff;
@@ -290,10 +293,12 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump)
if (jump->flags & JUMP_ADDR) {
SLJIT_ASSERT(jump->u.target & 0x1);
- diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + 4)) >> 1;
+ diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1;
+ }
+ else {
+ SLJIT_ASSERT(jump->u.label->addr & 0x1);
+ diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1;
}
- else
- diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + 4)) >> 1;
jump_inst = (sljit_u16*)jump->addr;
switch (type) {
@@ -336,7 +341,7 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump)
else if (type == 6) /* Encoding T1 of 'BL' instruction */
jump_inst[1] |= 0xd000;
else
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
}
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
@@ -347,6 +352,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
sljit_u16 *buf_ptr;
sljit_u16 *buf_end;
sljit_uw half_count;
+ sljit_sw executable_offset;
struct sljit_label *label;
struct sljit_jump *jump;
@@ -362,6 +368,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
code_ptr = code;
half_count = 0;
+ executable_offset = SLJIT_EXEC_OFFSET(code);
+
label = compiler->labels;
jump = compiler->jumps;
const_ = compiler->consts;
@@ -376,13 +384,13 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_ASSERT(!jump || jump->addr >= half_count);
SLJIT_ASSERT(!const_ || const_->addr >= half_count);
if (label && label->size == half_count) {
- label->addr = ((sljit_uw)code_ptr) | 0x1;
+ label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
label->size = code_ptr - code;
label = label->next;
}
if (jump && jump->addr == half_count) {
jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8);
- code_ptr -= detect_jump_type(jump, code_ptr, code);
+ code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
jump = jump->next;
}
if (const_ && const_->addr == half_count) {
@@ -397,7 +405,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
} while (buf);
if (label && label->size == half_count) {
- label->addr = ((sljit_uw)code_ptr) | 0x1;
+ label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
label->size = code_ptr - code;
label = label->next;
}
@@ -409,17 +417,43 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = compiler->jumps;
while (jump) {
- set_jump_instruction(jump);
+ set_jump_instruction(jump, executable_offset);
jump = jump->next;
}
compiler->error = SLJIT_ERR_COMPILED;
+ compiler->executable_offset = executable_offset;
compiler->executable_size = (code_ptr - code) * sizeof(sljit_u16);
+
+ code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
+ code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+
SLJIT_CACHE_FLUSH(code, code_ptr);
/* Set thumb mode flag. */
return (void*)((sljit_uw)code | 0x1);
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+ switch (feature_type) {
+ case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+ return SLJIT_IS_FPU_AVAILABLE;
+#else
+ /* Available by default. */
+ return 1;
+#endif
+
+ case SLJIT_HAS_PRE_UPDATE:
+ case SLJIT_HAS_CLZ:
+ case SLJIT_HAS_CMOV:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
/* --------------------------------------------------------------------- */
/* Core code generator functions. */
/* --------------------------------------------------------------------- */
@@ -500,24 +534,20 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst,
#define ARG1_IMM 0x0010000
#define ARG2_IMM 0x0020000
-#define KEEP_FLAGS 0x0040000
/* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */
#define SET_FLAGS 0x0100000
#define UNUSED_RETURN 0x0200000
-#define SLOW_DEST 0x0400000
-#define SLOW_SRC1 0x0800000
-#define SLOW_SRC2 0x1000000
static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2)
{
/* dst must be register, TMP_REG1
- arg1 must be register, TMP_REG1, imm
- arg2 must be register, TMP_REG2, imm */
+ arg1 must be register, imm
+ arg2 must be register, imm */
sljit_s32 reg;
sljit_uw imm, nimm;
if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
- /* Both are immediates. */
+ /* Both are immediates, no temporaries are used. */
flags &= ~ARG1_IMM;
FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
arg1 = TMP_REG1;
@@ -533,7 +563,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
/* No form with immediate operand. */
break;
case SLJIT_MOV:
- SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
+ SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG2);
return load_immediate(compiler, dst, imm);
case SLJIT_NOT:
if (!(flags & SET_FLAGS))
@@ -543,7 +573,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
break;
case SLJIT_ADD:
nimm = -imm;
- if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) {
+ if (IS_2_LO_REGS(reg, dst)) {
if (imm <= 0x7)
return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
if (nimm <= 0x7)
@@ -561,9 +591,12 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
if (nimm <= 0xfff)
return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(nimm));
}
- imm = get_imm(imm);
- if (imm != INVALID_IMM)
- return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+ nimm = get_imm(imm);
+ if (nimm != INVALID_IMM)
+ return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
+ nimm = get_imm(-imm);
+ if (nimm != INVALID_IMM)
+ return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
break;
case SLJIT_ADDC:
imm = get_imm(imm);
@@ -571,16 +604,27 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
break;
case SLJIT_SUB:
+ /* SUB operation can be replaced by ADD because of the negative carry flag. */
if (flags & ARG1_IMM) {
- if (!(flags & KEEP_FLAGS) && imm == 0 && IS_2_LO_REGS(reg, dst))
+ if (imm == 0 && IS_2_LO_REGS(reg, dst))
return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg));
imm = get_imm(imm);
if (imm != INVALID_IMM)
return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
break;
}
+ if (flags & UNUSED_RETURN) {
+ if (imm <= 0xff && reg_map[reg] <= 7)
+ return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
+ nimm = get_imm(imm);
+ if (nimm != INVALID_IMM)
+ return push_inst32(compiler, CMPI_W | RN4(reg) | nimm);
+ nimm = get_imm(-imm);
+ if (nimm != INVALID_IMM)
+ return push_inst32(compiler, CMNI_W | RN4(reg) | nimm);
+ }
nimm = -imm;
- if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) {
+ if (IS_2_LO_REGS(reg, dst)) {
if (imm <= 0x7)
return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
if (nimm <= 0x7)
@@ -591,8 +635,6 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
if (nimm <= 0xff)
return push_inst16(compiler, ADDSI8 | IMM8(nimm) | RDN3(dst));
}
- if (imm <= 0xff && (flags & UNUSED_RETURN))
- return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
}
if (!(flags & SET_FLAGS)) {
if (imm <= 0xfff)
@@ -600,9 +642,12 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
if (nimm <= 0xfff)
return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(nimm));
}
- imm = get_imm(imm);
- if (imm != INVALID_IMM)
- return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+ nimm = get_imm(imm);
+ if (nimm != INVALID_IMM)
+ return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
+ nimm = get_imm(-imm);
+ if (nimm != INVALID_IMM)
+ return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
break;
case SLJIT_SUBC:
if (flags & ARG1_IMM)
@@ -647,31 +692,35 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
}
switch (flags & 0xffff) {
case SLJIT_SHL:
- if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
+ if (IS_2_LO_REGS(dst, reg))
return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6));
return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
case SLJIT_LSHR:
- if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
+ if (IS_2_LO_REGS(dst, reg))
return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6));
return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
default: /* SLJIT_ASHR */
- if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
+ if (IS_2_LO_REGS(dst, reg))
return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6));
return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
}
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
break;
}
if (flags & ARG2_IMM) {
- FAIL_IF(load_immediate(compiler, TMP_REG2, arg2));
- arg2 = TMP_REG2;
+ imm = arg2;
+ arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
+ FAIL_IF(load_immediate(compiler, arg2, imm));
}
else {
- FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
- arg1 = TMP_REG1;
+ imm = arg1;
+ arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
+ FAIL_IF(load_immediate(compiler, arg1, imm));
}
+
+ SLJIT_ASSERT(arg1 != arg2);
}
/* Both arguments are registers. */
@@ -684,104 +733,102 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
case SLJIT_MOVU_U32:
case SLJIT_MOVU_S32:
case SLJIT_MOVU_P:
- SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+ SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
if (dst == arg2)
return SLJIT_SUCCESS;
return push_inst16(compiler, MOV | SET_REGS44(dst, arg2));
case SLJIT_MOV_U8:
case SLJIT_MOVU_U8:
- SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+ SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
if (IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2));
return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2));
case SLJIT_MOV_S8:
case SLJIT_MOVU_S8:
- SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+ SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
if (IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2));
return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2));
case SLJIT_MOV_U16:
case SLJIT_MOVU_U16:
- SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+ SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
if (IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2));
return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2));
case SLJIT_MOV_S16:
case SLJIT_MOVU_S16:
- SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
+ SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
if (IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2));
return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2));
case SLJIT_NOT:
- SLJIT_ASSERT(arg1 == TMP_REG1);
- if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+ SLJIT_ASSERT(arg1 == TMP_REG2);
+ if (IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, MVNS | RD3(dst) | RN3(arg2));
return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(arg2));
case SLJIT_CLZ:
- SLJIT_ASSERT(arg1 == TMP_REG1);
+ SLJIT_ASSERT(arg1 == TMP_REG2);
FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2)));
- if (flags & SET_FLAGS) {
- if (reg_map[dst] <= 7)
- return push_inst16(compiler, CMPI | RDN3(dst));
- return push_inst32(compiler, ADD_WI | SET_FLAGS | RN4(dst) | RD4(dst));
- }
return SLJIT_SUCCESS;
case SLJIT_ADD:
- if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2))
+ if (IS_3_LO_REGS(dst, arg1, arg2))
return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2));
if (dst == arg1 && !(flags & SET_FLAGS))
return push_inst16(compiler, ADD | SET_REGS44(dst, arg2));
return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
case SLJIT_ADDC:
- if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+ if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2));
return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
case SLJIT_SUB:
- if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2))
+ if (flags & UNUSED_RETURN) {
+ if (IS_2_LO_REGS(arg1, arg2))
+ return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2));
+ return push_inst16(compiler, CMP_X | SET_REGS44(arg1, arg2));
+ }
+ if (IS_3_LO_REGS(dst, arg1, arg2))
return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2));
return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
case SLJIT_SUBC:
- if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+ if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2));
return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
case SLJIT_MUL:
if (!(flags & SET_FLAGS))
return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2));
- SLJIT_ASSERT(reg_map[TMP_REG2] <= 7 && dst != TMP_REG2);
+ SLJIT_ASSERT(dst != TMP_REG2);
FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2)));
/* cmp TMP_REG2, dst asr #31. */
return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst));
case SLJIT_AND:
- if (!(flags & KEEP_FLAGS)) {
- if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
- return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2));
- if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2))
- return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2));
- }
+ if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
+ return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2));
+ if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2))
+ return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2));
return push_inst32(compiler, AND_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
case SLJIT_OR:
- if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+ if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2));
return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
case SLJIT_XOR:
- if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+ if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2));
return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
case SLJIT_SHL:
- if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+ if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2));
return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
case SLJIT_LSHR:
- if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+ if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2));
return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
case SLJIT_ASHR:
- if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
+ if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2));
return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
}
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
}
@@ -791,9 +838,9 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
#define WORD_SIZE 0x00
#define BYTE_SIZE 0x04
#define HALF_SIZE 0x08
+#define PRELOAD 0x0c
#define UPDATE 0x10
-#define ARG_TEST 0x20
#define IS_WORD_SIZE(flags) (!(flags & (BYTE_SIZE | HALF_SIZE)))
#define OFFSET_CHECK(imm, shift) (!(argw & ~(imm << shift)))
@@ -849,7 +896,7 @@ static const sljit_ins sljit_mem16_imm5[12] = {
#define MEM_IMM8 0xc00
#define MEM_IMM12 0x800000
-static const sljit_ins sljit_mem32[12] = {
+static const sljit_ins sljit_mem32[13] = {
/* w u l */ 0xf8500000 /* ldr.w */,
/* w u s */ 0xf8400000 /* str.w */,
/* w s l */ 0xf8500000 /* ldr.w */,
@@ -864,6 +911,8 @@ static const sljit_ins sljit_mem32[12] = {
/* h u s */ 0xf8200000 /* strsh.w */,
/* h s l */ 0xf9300000 /* ldrsh.w */,
/* h s s */ 0xf8200000 /* strsh.w */,
+
+/* p u l */ 0xf8100000 /* pld */,
};
/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
@@ -887,20 +936,67 @@ static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst,
return SLJIT_ERR_UNSUPPORTED;
}
-/* Can perform an operation using at most 1 instruction. */
-static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
+static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
+ sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
{
- sljit_s32 other_r, shift;
+ sljit_s32 other_r;
+ sljit_s32 update = flags & UPDATE;
+ sljit_uw tmp;
SLJIT_ASSERT(arg & SLJIT_MEM);
+ SLJIT_ASSERT((arg & REG_MASK) != tmp_reg);
+ flags &= ~UPDATE;
+ arg &= ~SLJIT_MEM;
+
+ if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
+ tmp = get_imm(argw & ~0xfff);
+ if (tmp != INVALID_IMM) {
+ FAIL_IF(push_inst32(compiler, MOV_WI | RD4(tmp_reg) | tmp));
+ return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg) | (argw & 0xfff));
+ }
- if (SLJIT_UNLIKELY(flags & UPDATE)) {
- if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 0xff && argw >= -0xff) {
- if (SLJIT_UNLIKELY(flags & ARG_TEST))
- return 1;
+ FAIL_IF(load_immediate(compiler, tmp_reg, argw));
+ if (IS_2_LO_REGS(reg, tmp_reg) && sljit_mem16_imm5[flags])
+ return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(tmp_reg));
+ return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg));
+ }
+
+ if (SLJIT_UNLIKELY(update)) {
+ SLJIT_ASSERT(reg != arg);
- flags &= ~UPDATE;
+ if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+ other_r = OFFS_REG(arg);
arg &= 0xf;
+
+ if (IS_3_LO_REGS(reg, arg, other_r))
+ FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)));
+ else
+ FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r)));
+ return push_inst16(compiler, ADD | SET_REGS44(arg, other_r));
+ }
+
+ if (argw > 0xff) {
+ tmp = get_imm(argw & ~0xff);
+ if (tmp != INVALID_IMM) {
+ push_inst32(compiler, ADD_WI | RD4(arg) | RN4(arg) | tmp);
+ argw = argw & 0xff;
+ }
+ }
+ else if (argw < -0xff) {
+ tmp = get_imm(-argw & ~0xff);
+ if (tmp != INVALID_IMM) {
+ push_inst32(compiler, SUB_WI | RD4(arg) | RN4(arg) | tmp);
+ argw = -(-argw & 0xff);
+ }
+ }
+
+ if (argw == 0) {
+ if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags])
+ return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg));
+ return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg));
+ }
+
+ if (argw <= 0xff && argw >= -0xff) {
if (argw >= 0)
argw |= 0x200;
else {
@@ -908,219 +1004,83 @@ static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flag
}
SLJIT_ASSERT(argw >= 0 && (argw & 0xff) <= 0xff);
- FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | 0x100 | argw));
- return -1;
+ return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | 0x100 | argw);
}
- return 0;
+
+ FAIL_IF(load_immediate(compiler, tmp_reg, argw));
+
+ SLJIT_ASSERT(reg != tmp_reg);
+
+ if (IS_3_LO_REGS(reg, arg, tmp_reg))
+ FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg)));
+ else
+ FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg)));
+ return push_inst16(compiler, ADD | SET_REGS44(arg, tmp_reg));
}
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
- if (SLJIT_UNLIKELY(flags & ARG_TEST))
- return 1;
-
argw &= 0x3;
other_r = OFFS_REG(arg);
arg &= 0xf;
if (!argw && IS_3_LO_REGS(reg, arg, other_r))
- FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)));
- else
- FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4)));
- return -1;
+ return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r));
+ return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4));
}
- if (!(arg & REG_MASK) || argw > 0xfff || argw < -0xff)
- return 0;
-
- if (SLJIT_UNLIKELY(flags & ARG_TEST))
- return 1;
+ if (argw > 0xfff) {
+ tmp = get_imm(argw & ~0xfff);
+ if (tmp != INVALID_IMM) {
+ push_inst32(compiler, ADD_WI | RD4(tmp_reg) | RN4(arg) | tmp);
+ arg = tmp_reg;
+ argw = argw & 0xfff;
+ }
+ }
+ else if (argw < -0xff) {
+ tmp = get_imm(-argw & ~0xff);
+ if (tmp != INVALID_IMM) {
+ push_inst32(compiler, SUB_WI | RD4(tmp_reg) | RN4(arg) | tmp);
+ arg = tmp_reg;
+ argw = -(-argw & 0xff);
+ }
+ }
- arg &= 0xf;
if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) {
- shift = 3;
+ tmp = 3;
if (IS_WORD_SIZE(flags)) {
if (OFFSET_CHECK(0x1f, 2))
- shift = 2;
+ tmp = 2;
}
else if (flags & BYTE_SIZE)
{
if (OFFSET_CHECK(0x1f, 0))
- shift = 0;
+ tmp = 0;
}
else {
SLJIT_ASSERT(flags & HALF_SIZE);
if (OFFSET_CHECK(0x1f, 1))
- shift = 1;
+ tmp = 1;
}
- if (shift != 3) {
- FAIL_IF(push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - shift))));
- return -1;
- }
- }
-
- /* SP based immediate. */
- if (SLJIT_UNLIKELY(arg == SLJIT_SP) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) {
- FAIL_IF(push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2)));
- return -1;
+ if (tmp < 3)
+ return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - tmp)));
}
-
- if (argw >= 0)
- FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw));
- else
- FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw));
- return -1;
-}
-
-/* see getput_arg below.
- Note: can_cache is called only for binary operators. Those
- operators always uses word arguments without write back. */
-static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
-{
- sljit_sw diff;
- if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM))
- return 0;
-
- if (!(arg & REG_MASK)) {
- diff = argw - next_argw;
- if (diff <= 0xfff && diff >= -0xfff)
- return 1;
- return 0;
+ else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && OFFSET_CHECK(0xff, 2) && reg_map[reg] <= 7) {
+ /* SP based immediate. */
+ return push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2));
}
- if (argw == next_argw)
- return 1;
+ if (argw >= 0 && argw <= 0xfff)
+ return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw);
+ else if (argw < 0 && argw >= -0xff)
+ return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw);
- diff = argw - next_argw;
- if (arg == next_arg && diff <= 0xfff && diff >= -0xfff)
- return 1;
+ SLJIT_ASSERT(arg != tmp_reg);
- return 0;
-}
-
-/* Emit the necessary instructions. See can_cache above. */
-static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
- sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
-{
- sljit_s32 tmp_r, other_r;
- sljit_sw diff;
-
- SLJIT_ASSERT(arg & SLJIT_MEM);
- if (!(next_arg & SLJIT_MEM)) {
- next_arg = 0;
- next_argw = 0;
- }
-
- tmp_r = (flags & STORE) ? TMP_REG3 : reg;
-
- if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) {
- /* Update only applies if a base register exists. */
- /* There is no caching here. */
- other_r = OFFS_REG(arg);
- arg &= 0xf;
- flags &= ~UPDATE;
-
- if (!other_r) {
- if (!(argw & ~0xfff)) {
- FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw));
- return push_inst32(compiler, ADDWI | RD4(arg) | RN4(arg) | IMM12(argw));
- }
-
- if (compiler->cache_arg == SLJIT_MEM) {
- if (argw == compiler->cache_argw) {
- other_r = TMP_REG3;
- argw = 0;
- }
- else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
- FAIL_IF(compiler->error);
- compiler->cache_argw = argw;
- other_r = TMP_REG3;
- argw = 0;
- }
- }
-
- if (argw) {
- FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
- compiler->cache_arg = SLJIT_MEM;
- compiler->cache_argw = argw;
- other_r = TMP_REG3;
- argw = 0;
- }
- }
-
- argw &= 0x3;
- if (!argw && IS_3_LO_REGS(reg, arg, other_r)) {
- FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)));
- return push_inst16(compiler, ADD | SET_REGS44(arg, other_r));
- }
- FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4)));
- return push_inst32(compiler, ADD_W | RD4(arg) | RN4(arg) | RM4(other_r) | (argw << 6));
- }
- flags &= ~UPDATE;
-
- SLJIT_ASSERT(!(arg & OFFS_REG_MASK));
-
- if (compiler->cache_arg == arg) {
- diff = argw - compiler->cache_argw;
- if (!(diff & ~0xfff))
- return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | diff);
- if (!((compiler->cache_argw - argw) & ~0xff))
- return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(TMP_REG3) | (compiler->cache_argw - argw));
- if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) {
- FAIL_IF(compiler->error);
- return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0);
- }
- }
-
- next_arg = (arg & REG_MASK) && (arg == next_arg) && (argw != next_argw);
- arg &= 0xf;
- if (arg && compiler->cache_arg == SLJIT_MEM) {
- if (compiler->cache_argw == argw)
- return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
- if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
- FAIL_IF(compiler->error);
- compiler->cache_argw = argw;
- return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
- }
- }
-
- compiler->cache_argw = argw;
- if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) {
- FAIL_IF(compiler->error);
- compiler->cache_arg = SLJIT_MEM | arg;
- arg = 0;
- }
- else {
- FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
- compiler->cache_arg = SLJIT_MEM;
-
- diff = argw - next_argw;
- if (next_arg && diff <= 0xfff && diff >= -0xfff) {
- FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, arg)));
- compiler->cache_arg = SLJIT_MEM | arg;
- arg = 0;
- }
- }
-
- if (arg)
- return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
- return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0);
-}
-
-static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
-{
- if (getput_arg_fast(compiler, flags, reg, arg, argw))
- return compiler->error;
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
-}
-
-static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
-{
- if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
- return compiler->error;
- return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+ FAIL_IF(load_immediate(compiler, tmp_reg, argw));
+ if (IS_3_LO_REGS(reg, arg, tmp_reg))
+ return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg));
+ return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg));
}
/* --------------------------------------------------------------------- */
@@ -1132,14 +1092,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
sljit_s32 size, i, tmp;
- sljit_ins push;
+ sljit_ins push = 0;
CHECK_ERROR();
CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
- push = (1 << 4);
-
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
for (i = SLJIT_S0; i >= tmp; i--)
push |= 1 << reg_map[i];
@@ -1152,7 +1110,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
: push_inst16(compiler, PUSH | (1 << 8) | push));
/* Stack must be aligned to 8 bytes: (LR, R4) */
- size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
+ size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
local_size = ((size + local_size + 7) & ~7) - size;
compiler->local_size = local_size;
if (local_size > 0) {
@@ -1182,7 +1140,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
- size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
+ size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
compiler->local_size = ((size + local_size + 7) & ~7) - size;
return SLJIT_SUCCESS;
}
@@ -1190,7 +1148,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
{
sljit_s32 i, tmp;
- sljit_ins pop;
+ sljit_ins pop = 0;
CHECK_ERROR();
CHECK(check_sljit_emit_return(compiler, op, src, srcw));
@@ -1204,8 +1162,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_SP, SLJIT_SP, compiler->local_size));
}
- pop = (1 << 4);
-
tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
for (i = SLJIT_S0; i >= tmp; i--)
pop |= 1 << reg_map[i];
@@ -1263,7 +1219,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
case SLJIT_DIV_UW:
case SLJIT_DIV_SW:
SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
- SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 12, bad_register_mapping);
+ SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 12);
saved_reg_count = 0;
if (compiler->scratches >= 4)
@@ -1323,8 +1279,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
+ /* Since TMP_PC has index 15, IS_2_LO_REGS and IS_3_LO_REGS checks always fail. */
+ if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
+ return emit_op_mem(compiler, PRELOAD, TMP_PC, src, srcw, TMP_REG1);
+ return SLJIT_SUCCESS;
+ }
dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
@@ -1384,31 +1344,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
srcw = (sljit_s16)srcw;
break;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
flags = 0;
break;
}
if (src & SLJIT_IMM)
- FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
+ FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, srcw));
else if (src & SLJIT_MEM) {
- if (getput_arg_fast(compiler, flags, dst_r, src, srcw))
- FAIL_IF(compiler->error);
- else
- FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw));
+ FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, ((flags & UPDATE) && dst_r == TMP_REG1) ? TMP_REG2 : TMP_REG1));
} else {
if (dst_r != TMP_REG1)
- return emit_op_imm(compiler, op, dst_r, TMP_REG1, src);
+ return emit_op_imm(compiler, op, dst_r, TMP_REG2, src);
dst_r = src;
}
- if (dst & SLJIT_MEM) {
- if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
- return compiler->error;
- else
- return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
- }
- return SLJIT_SUCCESS;
+ if (!(dst & SLJIT_MEM))
+ return SLJIT_SUCCESS;
+
+ return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, (dst_r == TMP_REG1) ? TMP_REG2 : TMP_REG1);
}
if (op == SLJIT_NEG) {
@@ -1419,29 +1373,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return sljit_emit_op2(compiler, SLJIT_SUB | op_flags, dst, dstw, SLJIT_IMM, 0, src, srcw);
}
- flags = (GET_FLAGS(op_flags) ? SET_FLAGS : 0) | ((op_flags & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0);
- if (src & SLJIT_MEM) {
- if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src, srcw))
- FAIL_IF(compiler->error);
- else
- FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw));
- src = TMP_REG2;
- }
+ flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;
if (src & SLJIT_IMM)
flags |= ARG2_IMM;
+ else if (src & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
+ srcw = TMP_REG1;
+ }
else
srcw = src;
- emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw);
+ emit_op_imm(compiler, flags | op, dst_r, TMP_REG2, srcw);
- if (dst & SLJIT_MEM) {
- if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
- return compiler->error;
- else
- return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
- }
- return SLJIT_SUCCESS;
+ if (!(dst & SLJIT_MEM))
+ return SLJIT_SUCCESS;
+ return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1449,7 +1396,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
- sljit_s32 dst_r, flags;
+ sljit_s32 dst_reg, flags, src2_reg;
CHECK_ERROR();
CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
@@ -1457,70 +1404,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
-
- dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
- flags = (GET_FLAGS(op) ? SET_FLAGS : 0) | ((op & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0);
-
- if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, WORD_SIZE | STORE | ARG_TEST, TMP_REG1, dst, dstw))
- flags |= SLOW_DEST;
-
- if (src1 & SLJIT_MEM) {
- if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG1, src1, src1w))
- FAIL_IF(compiler->error);
- else
- flags |= SLOW_SRC1;
- }
- if (src2 & SLJIT_MEM) {
- if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src2, src2w))
- FAIL_IF(compiler->error);
- else
- flags |= SLOW_SRC2;
- }
-
- if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
- if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
- FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, src1, src1w));
- FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw));
- }
- else {
- FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, src2, src2w));
- FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw));
- }
- }
- else if (flags & SLOW_SRC1)
- FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw));
- else if (flags & SLOW_SRC2)
- FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw));
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
- if (src1 & SLJIT_MEM)
- src1 = TMP_REG1;
- if (src2 & SLJIT_MEM)
- src2 = TMP_REG2;
+ dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+ flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
if (src1 & SLJIT_IMM)
flags |= ARG1_IMM;
+ else if (src1 & SLJIT_MEM) {
+ emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1);
+ src1w = TMP_REG1;
+ }
else
src1w = src1;
+
if (src2 & SLJIT_IMM)
flags |= ARG2_IMM;
+ else if (src2 & SLJIT_MEM) {
+ src2_reg = (!(flags & ARG1_IMM) && (src1w == TMP_REG1)) ? TMP_REG2 : TMP_REG1;
+ emit_op_mem(compiler, WORD_SIZE, src2_reg, src2, src2w, src2_reg);
+ src2w = src2_reg;
+ }
else
src2w = src2;
if (dst == SLJIT_UNUSED)
flags |= UNUSED_RETURN;
- emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w);
+ emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, src1w, src2w);
- if (dst & SLJIT_MEM) {
- if (!(flags & SLOW_DEST)) {
- getput_arg_fast(compiler, WORD_SIZE | STORE, dst_r, dst, dstw);
- return compiler->error;
- }
- return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, 0, 0);
- }
- return SLJIT_SUCCESS;
+ if (!(dst & SLJIT_MEM))
+ return SLJIT_SUCCESS;
+ return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG2);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
@@ -1550,21 +1466,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
/* Floating point operators */
/* --------------------------------------------------------------------- */
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
-#else
- /* Available by default. */
- return 1;
-#endif
-}
-
#define FPU_LOAD (1 << 20)
static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
{
- sljit_sw tmp;
sljit_uw imm;
sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD));
@@ -1572,8 +1477,8 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags,
/* Fast loads and stores. */
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
- FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG2) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((argw & 0x3) << 6)));
- arg = SLJIT_MEM | TMP_REG2;
+ FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((argw & 0x3) << 6)));
+ arg = SLJIT_MEM | TMP_REG1;
argw = 0;
}
@@ -1584,21 +1489,6 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags,
return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | (-argw >> 2));
}
- /* Slow cases */
- SLJIT_ASSERT(!(arg & OFFS_REG_MASK));
- if (compiler->cache_arg == arg) {
- tmp = argw - compiler->cache_argw;
- if (!(tmp & ~0x3fc))
- return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg) | (tmp >> 2));
- if (!(-tmp & ~0x3fc))
- return push_inst32(compiler, inst | RN4(TMP_REG3) | DD4(reg) | (-tmp >> 2));
- if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) {
- FAIL_IF(compiler->error);
- compiler->cache_argw = argw;
- return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg));
- }
- }
-
if (arg & REG_MASK) {
if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
FAIL_IF(compiler->error);
@@ -1617,19 +1507,18 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags,
}
}
- compiler->cache_arg = arg;
- compiler->cache_argw = argw;
-
- FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
+ FAIL_IF(load_immediate(compiler, TMP_REG1, argw));
if (arg & REG_MASK)
- FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, (arg & REG_MASK))));
- return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg));
+ FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK))));
+ return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg));
}
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
+ op ^= SLJIT_F32_OP;
+
if (src & SLJIT_MEM) {
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw));
src = TMP_FREG1;
@@ -1637,9 +1526,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_F32_OP) | DD4(TMP_FREG1) | DM4(src)));
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1));
@@ -1653,6 +1539,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
{
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+ op ^= SLJIT_F32_OP;
+
if (FAST_IS_REG(src))
FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1)));
else if (src & SLJIT_MEM) {
@@ -1675,6 +1563,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
+ op ^= SLJIT_F32_OP;
+
if (src1 & SLJIT_MEM) {
emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w);
src1 = TMP_FREG1;
@@ -1696,16 +1586,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
sljit_s32 dst_r;
CHECK_ERROR();
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
- op ^= SLJIT_F32_OP;
SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error);
SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+ if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
+ op ^= SLJIT_F32_OP;
+
if (src & SLJIT_MEM) {
emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw);
src = dst_r;
@@ -1750,8 +1639,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
op ^= SLJIT_F32_OP;
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
@@ -1796,21 +1683,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
- /* For UNUSED dst. Uncommon, but possible. */
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
+ SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
if (FAST_IS_REG(dst))
- return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3));
+ return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2));
/* Memory. */
- if (getput_arg_fast(compiler, WORD_SIZE | STORE, TMP_REG3, dst, dstw))
- return compiler->error;
- /* TMP_REG3 is used for caching. */
- FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, TMP_REG3)));
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0);
+ return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, TMP_REG1);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
@@ -1819,21 +1698,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
ADJUST_LOCAL_OFFSET(src, srcw);
+ SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
+
if (FAST_IS_REG(src))
- FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src)));
+ FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, src)));
else if (src & SLJIT_MEM) {
- if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG3, src, srcw))
- FAIL_IF(compiler->error);
- else {
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, 0, 0));
- FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, TMP_REG2)));
- }
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2));
}
else if (src & SLJIT_IMM)
- FAIL_IF(load_immediate(compiler, TMP_REG3, srcw));
- return push_inst16(compiler, BLX | RN3(TMP_REG3));
+ FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
+ return push_inst16(compiler, BX | RN3(TMP_REG2));
}
/* --------------------------------------------------------------------- */
@@ -1890,7 +1764,7 @@ static sljit_uw get_cc(sljit_s32 type)
return 0x7;
default: /* SLJIT_JUMP */
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return 0xe;
}
}
@@ -1957,7 +1831,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
if (FAST_IS_REG(src))
return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src));
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw));
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw, TMP_REG1));
if (type >= SLJIT_FAST_CALL)
return push_inst16(compiler, BLX | RN3(TMP_REG1));
}
@@ -1974,23 +1848,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
sljit_s32 dst_r, flags = GET_ALL_FLAGS(op);
- sljit_ins cc, ins;
+ sljit_ins cc;
CHECK_ERROR();
- CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+ CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
- ADJUST_LOCAL_OFFSET(src, srcw);
-
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
op = GET_OPCODE(op);
cc = get_cc(type & 0xff);
- dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (op < SLJIT_ADD) {
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
@@ -1998,60 +1867,86 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1));
FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0));
} else {
+ /* The movsi (immediate) instruction does not set flags in IT block. */
FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1));
FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0));
}
- if (dst_r != TMP_REG2)
+ if (!(dst & SLJIT_MEM))
return SLJIT_SUCCESS;
- return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw);
+ return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2);
}
- ins = (op == SLJIT_AND ? ANDI : (op == SLJIT_OR ? ORRI : EORI));
- if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
- /* Does not change the other bits. */
+ if (dst & SLJIT_MEM)
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
+
+ if (op == SLJIT_AND) {
+ FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
+ FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 1));
+ FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 0));
+ }
+ else {
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
- FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst) | 1));
- if (flags & SLJIT_SET_E) {
- /* The condition must always be set, even if the ORRI/EORI is not executed above. */
- if (reg_map[dst] <= 7)
- return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst));
- return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst));
- }
+ FAIL_IF(push_inst32(compiler, ((op == SLJIT_OR) ? ORRI : EORI) | RN4(dst_r) | RD4(dst_r) | 1));
+ }
+
+ if (dst & SLJIT_MEM)
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2));
+
+ if (!(flags & SLJIT_SET_Z))
return SLJIT_SUCCESS;
+
+ /* The condition must always be set, even if the ORR/EORI is not executed above. */
+ if (reg_map[dst_r] <= 7)
+ return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst_r));
+ return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_reg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_uw cc, tmp;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+
+ dst_reg &= ~SLJIT_I32_OP;
+
+ cc = get_cc(type & 0xff);
+
+ if (!(src & SLJIT_IMM)) {
+ FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
+ return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src));
}
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw));
- src = TMP_REG2;
- srcw = 0;
- } else if (src & SLJIT_IMM) {
- FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
- src = TMP_REG2;
- srcw = 0;
+ tmp = (sljit_uw) srcw;
+
+ if (tmp < 0x10000) {
+ /* set low 16 bits, set hi 16 bits to 0. */
+ FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
+ return push_inst32(compiler, MOVW | RD4(dst_reg) |
+ COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff));
}
- if (op == SLJIT_AND || src != dst_r) {
- FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
- FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1));
- FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 0));
+ tmp = get_imm(srcw);
+ if (tmp != INVALID_IMM) {
+ FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
+ return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp);
}
- else {
+
+ tmp = get_imm(~srcw);
+ if (tmp != INVALID_IMM) {
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
- FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1));
+ return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp);
}
- if (dst_r == TMP_REG2)
- FAIL_IF(emit_op_mem2(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0));
+ FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4));
- if (flags & SLJIT_SET_E) {
- /* The condition must always be set, even if the ORR/EORI is not executed above. */
- if (reg_map[dst_r] <= 7)
- return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst_r));
- return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r));
- }
- return SLJIT_SUCCESS;
+ tmp = (sljit_uw) srcw;
+ FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg) |
+ COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)));
+ return push_inst32(compiler, MOVT | RD4(dst_reg) |
+ COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16));
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
@@ -2067,24 +1962,26 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(!const_);
set_const(const_, compiler);
- dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, init_value));
if (dst & SLJIT_MEM)
- PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw));
+ PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
return const_;
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_u16 *inst = (sljit_u16*)addr;
- modify_imm32_const(inst, new_addr);
+ modify_imm32_const(inst, new_target);
+ inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 4);
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
sljit_u16 *inst = (sljit_u16*)addr;
modify_imm32_const(inst, new_constant);
+ inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 4);
}
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c
index 5096e4f55e..62e16106b1 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -40,35 +40,37 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a
#define EMIT_LOGICAL(op_imm, op_norm) \
if (flags & SRC2_IMM) { \
- if (op & SLJIT_SET_E) \
+ if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
- if (CHECK_FLAGS(SLJIT_SET_E)) \
+ if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
} \
else { \
- if (op & SLJIT_SET_E) \
+ if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
- if (CHECK_FLAGS(SLJIT_SET_E)) \
+ if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
}
#define EMIT_SHIFT(op_imm, op_v) \
if (flags & SRC2_IMM) { \
- if (op & SLJIT_SET_E) \
+ if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
- if (CHECK_FLAGS(SLJIT_SET_E)) \
+ if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
} \
else { \
- if (op & SLJIT_SET_E) \
+ if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
- if (CHECK_FLAGS(SLJIT_SET_E)) \
+ if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst))); \
}
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
{
+ sljit_s32 is_overflow, is_carry, is_handled;
+
switch (GET_OPCODE(op)) {
case SLJIT_MOV:
case SLJIT_MOV_U32:
@@ -93,8 +95,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
}
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
}
- else if (dst != src2)
- SLJIT_ASSERT_STOP();
+ else {
+ SLJIT_ASSERT(dst == src2);
+ }
return SLJIT_SUCCESS;
case SLJIT_MOV_U16:
@@ -111,24 +114,25 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
}
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
}
- else if (dst != src2)
- SLJIT_ASSERT_STOP();
+ else {
+ SLJIT_ASSERT(dst == src2);
+ }
return SLJIT_SUCCESS;
case SLJIT_NOT:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
- if (op & SLJIT_SET_E)
+ if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
- if (CHECK_FLAGS(SLJIT_SET_E))
+ if (!(flags & UNUSED_DEST))
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
return SLJIT_SUCCESS;
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
- if (op & SLJIT_SET_E)
+ if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
- if (CHECK_FLAGS(SLJIT_SET_E))
+ if (!(flags & UNUSED_DEST))
FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst)));
#else
if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
@@ -145,130 +149,192 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
FAIL_IF(push_inst(compiler, ADDIU | S(dst) | T(dst) | IMM(1), DR(dst)));
FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
- if (op & SLJIT_SET_E)
- return push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
#endif
return SLJIT_SUCCESS;
case SLJIT_ADD:
+ is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
+ is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
if (flags & SRC2_IMM) {
- if (op & SLJIT_SET_O) {
+ if (is_overflow) {
if (src2 >= 0)
- FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
else
- FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
- if (op & SLJIT_SET_E)
+ else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
- if (op & (SLJIT_SET_C | SLJIT_SET_O)) {
+
+ if (is_overflow || is_carry) {
if (src2 >= 0)
- FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+ FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
else {
- FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
- FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+ FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+ FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
}
}
/* dst may be the same as src1 or src2. */
- if (CHECK_FLAGS(SLJIT_SET_E))
+ if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
}
else {
- if (op & SLJIT_SET_O)
- FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
- if (op & SLJIT_SET_E)
+ if (is_overflow)
+ FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+ else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
- if (op & (SLJIT_SET_C | SLJIT_SET_O))
- FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
+
+ if (is_overflow || is_carry)
+ FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
- if (CHECK_FLAGS(SLJIT_SET_E))
+ if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
}
/* a + b >= a | b (otherwise, the carry should be set to 1). */
- if (op & (SLJIT_SET_C | SLJIT_SET_O))
- FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
- if (!(op & SLJIT_SET_O))
+ if (is_overflow || is_carry)
+ FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+ if (!is_overflow)
return SLJIT_SUCCESS;
- FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
- FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
- FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
- return push_inst(compiler, SLL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+ FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+ FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+ if (op & SLJIT_SET_Z)
+ FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
+ return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
case SLJIT_ADDC:
+ is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
if (flags & SRC2_IMM) {
- if (op & SLJIT_SET_C) {
+ if (is_carry) {
if (src2 >= 0)
- FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+ FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
else {
- FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
- FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+ FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
}
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
} else {
- if (op & SLJIT_SET_C)
- FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ if (is_carry)
+ FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
}
- if (op & SLJIT_SET_C)
- FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ if (is_carry)
+ FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
- FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
- if (!(op & SLJIT_SET_C))
+ FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
+ if (!is_carry)
return SLJIT_SUCCESS;
- /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */
- FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+ /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
+ FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
/* Set carry flag. */
- return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG);
+ return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
case SLJIT_SUB:
- if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) {
+ if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
+ is_handled = 0;
+
+ if (flags & SRC2_IMM) {
+ if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
+ FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+ is_handled = 1;
+ }
+ else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
+ FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+ is_handled = 1;
+ }
+ }
+
+ if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
+ is_handled = 1;
+
+ if (flags & SRC2_IMM) {
+ FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+ src2 = TMP_REG2;
+ flags &= ~SRC2_IMM;
+ }
+
+ if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
+ FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+ }
+ else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL)
+ {
+ FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
+ }
+ else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
+ FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+ }
+ else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL)
+ {
+ FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
+ }
+ }
+
+ if (is_handled) {
+ if (flags & SRC2_IMM) {
+ if (op & SLJIT_SET_Z)
+ FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
+ if (!(flags & UNUSED_DEST))
+ return push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst));
+ }
+ else {
+ if (op & SLJIT_SET_Z)
+ FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+ if (!(flags & UNUSED_DEST))
+ return push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst));
+ }
+ return SLJIT_SUCCESS;
+ }
+
+ is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
+ is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
if (flags & SRC2_IMM) {
- if (op & SLJIT_SET_O) {
+ if (is_overflow) {
if (src2 >= 0)
- FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
else
- FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
- if (op & SLJIT_SET_E)
+ else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
- if (op & (SLJIT_SET_C | SLJIT_SET_O))
- FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+
+ if (is_overflow || is_carry)
+ FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
- if (CHECK_FLAGS(SLJIT_SET_E))
+ if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
- if (op & SLJIT_SET_O)
- FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
- if (op & SLJIT_SET_E)
+ if (is_overflow)
+ FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+ else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
- if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O))
- FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
- if (op & SLJIT_SET_U)
- FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG));
- if (op & SLJIT_SET_S) {
- FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG));
- FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG));
- }
+
+ if (is_overflow || is_carry)
+ FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
- if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
+ if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
}
- if (!(op & SLJIT_SET_O))
+ if (!is_overflow)
return SLJIT_SUCCESS;
- FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
- FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
- FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
- return push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+ FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+ FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+ if (op & SLJIT_SET_Z)
+ FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
+ return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
case SLJIT_SUBC:
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
@@ -277,28 +343,31 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
flags &= ~SRC2_IMM;
}
+ is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
if (flags & SRC2_IMM) {
- if (op & SLJIT_SET_C)
- FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+ if (is_carry)
+ FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
- if (op & SLJIT_SET_C)
- FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ if (is_carry)
+ FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
}
- if (op & SLJIT_SET_C)
- FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG));
+ if (is_carry)
+ FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1)));
- FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
- return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;
+ FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
+ return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS;
case SLJIT_MUL:
SLJIT_ASSERT(!(flags & SRC2_IMM));
- if (!(op & SLJIT_SET_O)) {
+
+ if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) {
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
#else
@@ -307,10 +376,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
#endif
}
FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
- FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG));
+ FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
- FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG));
- return push_inst(compiler, SUBU | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);
+ FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
+ return push_inst(compiler, SUBU | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
case SLJIT_AND:
EMIT_LOGICAL(ANDI, AND);
@@ -337,7 +406,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return SLJIT_SUCCESS;
}
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
}
@@ -347,20 +416,22 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_
return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst));
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
- sljit_ins *inst = (sljit_ins*)addr;
+ sljit_ins *inst = (sljit_ins *)addr;
- inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
- inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff);
+ inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff);
+ inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff);
+ inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
- sljit_ins *inst = (sljit_ins*)addr;
+ sljit_ins *inst = (sljit_ins *)addr;
inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
+ inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c
index c7ee8c9c2e..dd114bb27a 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -123,15 +123,15 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a
#define EMIT_LOGICAL(op_imm, op_norm) \
if (flags & SRC2_IMM) { \
- if (op & SLJIT_SET_E) \
+ if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
- if (CHECK_FLAGS(SLJIT_SET_E)) \
+ if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
} \
else { \
- if (op & SLJIT_SET_E) \
+ if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
- if (CHECK_FLAGS(SLJIT_SET_E)) \
+ if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
}
@@ -144,16 +144,16 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a
} \
else \
ins = (op & SLJIT_I32_OP) ? op_imm : op_dimm; \
- if (op & SLJIT_SET_E) \
+ if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
- if (CHECK_FLAGS(SLJIT_SET_E)) \
+ if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
} \
else { \
ins = (op & SLJIT_I32_OP) ? op_v : op_dv; \
- if (op & SLJIT_SET_E) \
+ if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
- if (CHECK_FLAGS(SLJIT_SET_E)) \
+ if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst))); \
}
@@ -161,6 +161,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
{
sljit_ins ins;
+ sljit_s32 is_overflow, is_carry, is_handled;
switch (GET_OPCODE(op)) {
case SLJIT_MOV:
@@ -180,8 +181,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
}
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
}
- else if (dst != src2)
- SLJIT_ASSERT_STOP();
+ else {
+ SLJIT_ASSERT(dst == src2);
+ }
return SLJIT_SUCCESS;
case SLJIT_MOV_U16:
@@ -194,8 +196,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
}
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
}
- else if (dst != src2)
- SLJIT_ASSERT_STOP();
+ else {
+ SLJIT_ASSERT(dst == src2);
+ }
return SLJIT_SUCCESS;
case SLJIT_MOV_U32:
@@ -209,18 +212,18 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_NOT:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
- if (op & SLJIT_SET_E)
+ if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
- if (CHECK_FLAGS(SLJIT_SET_E))
+ if (!(flags & UNUSED_DEST))
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
return SLJIT_SUCCESS;
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
- if (op & SLJIT_SET_E)
+ if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
- if (CHECK_FLAGS(SLJIT_SET_E))
+ if (!(flags & UNUSED_DEST))
FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst)));
#else
if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
@@ -237,130 +240,192 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst)));
FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
- if (op & SLJIT_SET_E)
- return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG);
#endif
return SLJIT_SUCCESS;
case SLJIT_ADD:
+ is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
+ is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
if (flags & SRC2_IMM) {
- if (op & SLJIT_SET_O) {
+ if (is_overflow) {
if (src2 >= 0)
- FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
else
- FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
- if (op & SLJIT_SET_E)
+ else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
- if (op & (SLJIT_SET_C | SLJIT_SET_O)) {
+
+ if (is_overflow || is_carry) {
if (src2 >= 0)
- FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+ FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
else {
- FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
- FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+ FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+ FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
}
}
/* dst may be the same as src1 or src2. */
- if (CHECK_FLAGS(SLJIT_SET_E))
+ if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
}
else {
- if (op & SLJIT_SET_O)
- FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
- if (op & SLJIT_SET_E)
+ if (is_overflow)
+ FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+ else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
- if (op & (SLJIT_SET_C | SLJIT_SET_O))
- FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
+
+ if (is_overflow || is_carry)
+ FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
- if (CHECK_FLAGS(SLJIT_SET_E))
+ if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
/* a + b >= a | b (otherwise, the carry should be set to 1). */
- if (op & (SLJIT_SET_C | SLJIT_SET_O))
- FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
- if (!(op & SLJIT_SET_O))
+ if (is_overflow || is_carry)
+ FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+ if (!is_overflow)
return SLJIT_SUCCESS;
- FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
- FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
- FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
- return push_inst(compiler, SELECT_OP(DSRL32, SLL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+ FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+ FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+ if (op & SLJIT_SET_Z)
+ FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
+ return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
case SLJIT_ADDC:
+ is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
if (flags & SRC2_IMM) {
- if (op & SLJIT_SET_C) {
+ if (is_carry) {
if (src2 >= 0)
- FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+ FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
else {
- FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
- FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+ FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
}
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
} else {
- if (op & SLJIT_SET_C)
- FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ if (is_carry)
+ FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
- if (op & SLJIT_SET_C)
- FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ if (is_carry)
+ FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
- FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
- if (!(op & SLJIT_SET_C))
+ FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
+ if (!is_carry)
return SLJIT_SUCCESS;
- /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */
- FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG));
+ /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
+ FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
/* Set carry flag. */
- return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG);
+ return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
case SLJIT_SUB:
- if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) {
+ if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
+ is_handled = 0;
+
if (flags & SRC2_IMM) {
- if (op & SLJIT_SET_O) {
+ if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
+ FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+ is_handled = 1;
+ }
+ else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
+ FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+ is_handled = 1;
+ }
+ }
+
+ if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
+ is_handled = 1;
+
+ if (flags & SRC2_IMM) {
+ FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+ src2 = TMP_REG2;
+ flags &= ~SRC2_IMM;
+ }
+
+ if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
+ FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+ }
+ else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL)
+ {
+ FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
+ }
+ else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
+ FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+ }
+ else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL)
+ {
+ FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
+ }
+ }
+
+ if (is_handled) {
+ if (flags & SRC2_IMM) {
+ if (op & SLJIT_SET_Z)
+ FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
+ if (!(flags & UNUSED_DEST))
+ return push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst));
+ }
+ else {
+ if (op & SLJIT_SET_Z)
+ FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+ if (!(flags & UNUSED_DEST))
+ return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst));
+ }
+ return SLJIT_SUCCESS;
+ }
+
+ is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
+ is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
+ if (flags & SRC2_IMM) {
+ if (is_overflow) {
if (src2 >= 0)
- FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
else
- FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
- if (op & SLJIT_SET_E)
+ else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
- if (op & (SLJIT_SET_C | SLJIT_SET_O))
- FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG));
+
+ if (is_overflow || is_carry)
+ FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
- if (CHECK_FLAGS(SLJIT_SET_E))
+ if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
- if (op & SLJIT_SET_O)
- FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
- if (op & SLJIT_SET_E)
+ if (is_overflow)
+ FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+ else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
- if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O))
- FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG));
- if (op & SLJIT_SET_U)
- FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG));
- if (op & SLJIT_SET_S) {
- FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG));
- FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG));
- }
+
+ if (is_overflow || is_carry)
+ FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
- if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
+ if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
- if (!(op & SLJIT_SET_O))
+ if (!is_overflow)
return SLJIT_SUCCESS;
- FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
- FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
- FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
- return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG);
+ FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+ FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+ if (op & SLJIT_SET_Z)
+ FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
+ return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
case SLJIT_SUBC:
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
@@ -369,28 +434,31 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
flags &= ~SRC2_IMM;
}
+ is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
if (flags & SRC2_IMM) {
- if (op & SLJIT_SET_C)
- FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG));
+ if (is_carry)
+ FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
- if (op & SLJIT_SET_C)
- FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG));
+ if (is_carry)
+ FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
- if (op & SLJIT_SET_C)
- FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG));
+ if (is_carry)
+ FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1)));
- FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst)));
- return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS;
+ FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
+ return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS;
case SLJIT_MUL:
SLJIT_ASSERT(!(flags & SRC2_IMM));
- if (!(op & SLJIT_SET_O)) {
+
+ if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) {
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
if (op & SLJIT_I32_OP)
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
@@ -402,10 +470,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
#endif
}
FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
- FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG));
+ FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
- FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG));
- return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG);
+ FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
+ return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
case SLJIT_AND:
EMIT_LOGICAL(ANDI, AND);
@@ -432,7 +500,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return SLJIT_SUCCESS;
}
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
}
@@ -446,24 +514,26 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_
return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst));
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
- sljit_ins *inst = (sljit_ins*)addr;
+ sljit_ins *inst = (sljit_ins *)addr;
- inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff);
- inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff);
- inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
- inst[5] = (inst[5] & 0xffff0000) | (new_addr & 0xffff);
+ inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff);
+ inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff);
+ inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff);
+ inst[5] = (inst[5] & 0xffff0000) | (new_target & 0xffff);
+ inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 6);
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
- sljit_ins *inst = (sljit_ins*)addr;
+ sljit_ins *inst = (sljit_ins *)addr;
inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff);
inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
inst[5] = (inst[5] & 0xffff0000) | (new_constant & 0xffff);
+ inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 6);
}
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
index c2c251b1ff..00e8303090 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -57,19 +57,14 @@ typedef sljit_u32 sljit_ins;
#define RETURN_ADDR_REG 31
/* Flags are kept in volatile registers. */
-#define EQUAL_FLAG 12
-/* And carry flag as well. */
-#define ULESS_FLAG 13
-#define UGREATER_FLAG 14
-#define LESS_FLAG 15
-#define GREATER_FLAG 31
-#define OVERFLOW_FLAG 1
+#define EQUAL_FLAG 31
+#define OTHER_FLAG 1
#define TMP_FREG1 (0)
#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1)
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
- 0, 2, 5, 6, 7, 8, 9, 10, 11, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4
+ 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4
};
/* --------------------------------------------------------------------- */
@@ -178,7 +173,13 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
#define CLZ (HI(28) | LO(32))
#define DCLZ (HI(28) | LO(36))
+#define MOVF (HI(0) | (0 << 16) | LO(1))
+#define MOVN (HI(0) | LO(11))
+#define MOVT (HI(0) | (1 << 16) | LO(1))
+#define MOVZ (HI(0) | LO(10))
#define MUL (HI(28) | LO(2))
+#define PREF (HI(51))
+#define PREFX (HI(19) | LO(15))
#define SEB (HI(31) | (16 << 6) | LO(32))
#define SEH (HI(31) | (24 << 6) | LO(32))
#endif
@@ -218,7 +219,7 @@ static SLJIT_INLINE sljit_ins invert_branch(sljit_s32 flags)
return (flags & IS_BIT26_COND) ? (1 << 26) : (1 << 16);
}
-static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
{
sljit_sw diff;
sljit_uw target_addr;
@@ -237,9 +238,10 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i
target_addr = jump->u.target;
else {
SLJIT_ASSERT(jump->flags & JUMP_LABEL);
- target_addr = (sljit_uw)(code + jump->u.label->size);
+ target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
}
- inst = (sljit_ins*)jump->addr;
+
+ inst = (sljit_ins *)jump->addr;
if (jump->flags & IS_COND)
inst--;
@@ -250,7 +252,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i
/* B instructions. */
if (jump->flags & IS_MOVABLE) {
- diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2;
+ diff = ((sljit_sw)target_addr - (sljit_sw)inst - executable_offset) >> 2;
if (diff <= SIMM_MAX && diff >= SIMM_MIN) {
jump->flags |= PATCH_B;
@@ -268,7 +270,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i
}
}
else {
- diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1)) >> 2;
+ diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1) - executable_offset) >> 2;
if (diff <= SIMM_MAX && diff >= SIMM_MIN) {
jump->flags |= PATCH_B;
@@ -364,6 +366,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
sljit_ins *buf_ptr;
sljit_ins *buf_end;
sljit_uw word_count;
+ sljit_sw executable_offset;
sljit_uw addr;
struct sljit_label *label;
@@ -380,9 +383,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
code_ptr = code;
word_count = 0;
+ executable_offset = SLJIT_EXEC_OFFSET(code);
+
label = compiler->labels;
jump = compiler->jumps;
const_ = compiler->consts;
+
do {
buf_ptr = (sljit_ins*)buf->memory;
buf_end = buf_ptr + (buf->used_size >> 2);
@@ -393,8 +399,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
/* These structures are ordered by their address. */
if (label && label->size == word_count) {
- /* Just recording the address. */
- label->addr = (sljit_uw)code_ptr;
+ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
@@ -404,7 +409,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
#else
jump->addr = (sljit_uw)(code_ptr - 7);
#endif
- code_ptr = detect_jump_type(jump, code_ptr, code);
+ code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
jump = jump->next;
}
if (const_ && const_->addr == word_count) {
@@ -434,16 +439,16 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
while (jump) {
do {
addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
- buf_ptr = (sljit_ins*)jump->addr;
+ buf_ptr = (sljit_ins *)jump->addr;
if (jump->flags & PATCH_B) {
- addr = (sljit_sw)(addr - (jump->addr + sizeof(sljit_ins))) >> 2;
+ addr = (sljit_sw)(addr - ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins))) >> 2;
SLJIT_ASSERT((sljit_sw)addr <= SIMM_MAX && (sljit_sw)addr >= SIMM_MIN);
buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | (addr & 0xffff);
break;
}
if (jump->flags & PATCH_J) {
- SLJIT_ASSERT((addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff));
+ SLJIT_ASSERT((addr & ~0xfffffff) == (((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins)) & ~0xfffffff));
buf_ptr[0] |= (addr >> 2) & 0x03ffffff;
break;
}
@@ -476,7 +481,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
}
compiler->error = SLJIT_ERR_COMPILED;
+ compiler->executable_offset = executable_offset;
compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
+
+ code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
+ code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+
#ifndef __GNUC__
SLJIT_CACHE_FLUSH(code, code_ptr);
#else
@@ -486,6 +496,32 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
return code;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+ sljit_sw fir = 0;
+
+ switch (feature_type) {
+ case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+ return SLJIT_IS_FPU_AVAILABLE;
+#elif defined(__GNUC__)
+ asm ("cfc1 %0, $0" : "=r"(fir));
+ return (fir >> 22) & 0x1;
+#else
+#error "FIR check is not implemented for this architecture"
+#endif
+
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+ case SLJIT_HAS_CLZ:
+ case SLJIT_HAS_CMOV:
+ return 1;
+#endif
+
+ default:
+ return fir;
+ }
+}
+
/* --------------------------------------------------------------------- */
/* Entry, exit */
/* --------------------------------------------------------------------- */
@@ -520,10 +556,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
#define SLOW_SRC2 0x20000
#define SLOW_DEST 0x40000
-/* Only these flags are set. UNUSED_DEST is not set when no flags should be set. */
-#define CHECK_FLAGS(list) \
- (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list))))
-
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
#define STACK_STORE SW
#define STACK_LOAD LW
@@ -759,34 +791,28 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
base = arg & REG_MASK;
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
- argw &= 0x3;
- if ((flags & WRITE_BACK) && reg_ar == DR(base)) {
- SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar);
- FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
- reg_ar = DR(TMP_REG1);
+ if (SLJIT_UNLIKELY(flags & WRITE_BACK)) {
+ SLJIT_ASSERT(argw == 0);
+ FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(OFFS_REG(arg)) | D(base), DR(base)));
+ return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
}
+ argw &= 0x3;
+
/* Using the cache. */
if (argw == compiler->cache_argw) {
- if (!(flags & WRITE_BACK)) {
- if (arg == compiler->cache_arg)
+ if (arg == compiler->cache_arg)
+ return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
+
+ if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
+ if (arg == next_arg && argw == (next_argw & 0x3)) {
+ compiler->cache_arg = arg;
+ compiler->cache_argw = argw;
+ FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3)));
return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
- if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
- if (arg == next_arg && argw == (next_argw & 0x3)) {
- compiler->cache_arg = arg;
- compiler->cache_argw = argw;
- FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3)));
- return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot);
- }
- FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar));
- return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
- }
- }
- else {
- if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
- FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base)));
- return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
}
+ FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar));
+ return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
}
}
@@ -796,35 +822,18 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | D(TMP_REG3) | SH_IMM(argw), DR(TMP_REG3)));
}
- if (!(flags & WRITE_BACK)) {
- if (arg == next_arg && argw == (next_argw & 0x3)) {
- compiler->cache_arg = arg;
- compiler->cache_argw = argw;
- FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(TMP_REG3), DR(TMP_REG3)));
- tmp_ar = DR(TMP_REG3);
- }
- else
- FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | DA(tmp_ar), tmp_ar));
- return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
+ if (arg == next_arg && argw == (next_argw & 0x3)) {
+ compiler->cache_arg = arg;
+ compiler->cache_argw = argw;
+ FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(TMP_REG3), DR(TMP_REG3)));
+ tmp_ar = DR(TMP_REG3);
}
- FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(base), DR(base)));
- return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot);
+ else
+ FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | DA(tmp_ar), tmp_ar));
+ return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
}
if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) {
- /* Update only applies if a base register exists. */
- if (reg_ar == DR(base)) {
- SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar);
- if (argw <= SIMM_MAX && argw >= SIMM_MIN) {
- FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar) | IMM(argw), MOVABLE_INS));
- if (argw)
- return push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base));
- return SLJIT_SUCCESS;
- }
- FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
- reg_ar = DR(TMP_REG1);
- }
-
if (argw <= SIMM_MAX && argw >= SIMM_MIN) {
if (argw)
FAIL_IF(push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base)));
@@ -914,10 +923,8 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
}
if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
- return SLJIT_SUCCESS;
- if (GET_FLAGS(op))
- flags |= UNUSED_DEST;
+ SLJIT_ASSERT(HAS_FLAGS(op));
+ flags |= UNUSED_DEST;
}
else if (FAST_IS_REG(dst)) {
dst_r = dst;
@@ -1079,6 +1086,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+static sljit_s32 emit_prefetch(struct sljit_compiler *compiler,
+ sljit_s32 src, sljit_sw srcw)
+{
+ if (!(src & OFFS_REG_MASK)) {
+ if (srcw <= SIMM_MAX && srcw >= SIMM_MIN)
+ return push_inst(compiler, PREF | S(src & REG_MASK) | IMM(srcw), MOVABLE_INS);
+
+ FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw));
+ return push_inst(compiler, PREFX | S(src & REG_MASK) | T(TMP_REG1), MOVABLE_INS);
+ }
+
+ srcw &= 0x3;
+
+ if (SLJIT_UNLIKELY(srcw != 0)) {
+ FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(src)) | D(TMP_REG1) | SH_IMM(srcw), DR(TMP_REG1)));
+ return push_inst(compiler, PREFX | S(src & REG_MASK) | T(TMP_REG1), MOVABLE_INS);
+ }
+
+ return push_inst(compiler, PREFX | S(src & REG_MASK) | T(OFFS_REG(src)), MOVABLE_INS);
+}
+#endif
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
@@ -1094,6 +1124,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+ if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
+ return emit_prefetch(compiler, src, srcw);
+#endif
+ return SLJIT_SUCCESS;
+ }
+
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
if ((op & SLJIT_I32_OP) && GET_OPCODE(op) >= SLJIT_NOT) {
flags |= INT_DATA | SIGNED_DATA;
@@ -1197,6 +1235,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
+
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
if (op & SLJIT_I32_OP) {
flags |= INT_DATA | SIGNED_DATA;
@@ -1273,19 +1314,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
/* Floating point operators */
/* --------------------------------------------------------------------- */
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
-#elif defined(__GNUC__)
- sljit_sw fir;
- asm ("cfc1 %0, $0" : "=r"(fir));
- return (fir >> 22) & 0x1;
-#else
-#error "FIR check is not implemented for this architecture"
-#endif
-}
-
#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7))
#define FMT(op) (((op & SLJIT_F32_OP) ^ SLJIT_F32_OP) << (21 - 8))
@@ -1308,9 +1336,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
FAIL_IF(push_inst(compiler, (TRUNC_W_S ^ (flags >> 19)) | FMT(op) | FS(src) | FD(TMP_FREG1), MOVABLE_INS));
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS);
@@ -1364,6 +1389,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
+ sljit_ins inst;
+
if (src1 & SLJIT_MEM) {
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
src1 = TMP_FREG1;
@@ -1378,25 +1405,26 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
else
src2 <<= 1;
- /* src2 and src1 are swapped. */
- if (op & SLJIT_SET_E) {
- FAIL_IF(push_inst(compiler, C_UEQ_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS));
- FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG));
- FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG));
- FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG));
- }
- if (op & SLJIT_SET_S) {
- /* Mixing the instructions for the two checks. */
- FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS));
- FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG));
- FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src1) | FS(src2), UNMOVABLE_INS));
- FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG));
- FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG));
- FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG));
- FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG));
- FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG));
- }
- return push_inst(compiler, C_UN_S | FMT(op) | FT(src2) | FS(src1), FCSR_FCC);
+ switch (GET_FLAG_TYPE(op)) {
+ case SLJIT_EQUAL_F64:
+ case SLJIT_NOT_EQUAL_F64:
+ inst = C_UEQ_S;
+ break;
+ case SLJIT_LESS_F64:
+ case SLJIT_GREATER_EQUAL_F64:
+ inst = C_ULT_S;
+ break;
+ case SLJIT_GREATER_F64:
+ case SLJIT_LESS_EQUAL_F64:
+ inst = C_ULE_S;
+ break;
+ default:
+ SLJIT_ASSERT(GET_FLAG_TYPE(op) == SLJIT_UNORDERED_F64 || GET_FLAG_TYPE(op) == SLJIT_ORDERED_F64);
+ inst = C_UN_S;
+ break;
+ }
+
+ return push_inst(compiler, inst | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1542,10 +1570,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
- /* For UNUSED dst. Uncommon, but possible. */
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), DR(dst));
@@ -1634,55 +1658,39 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
switch (type) {
case SLJIT_EQUAL:
- case SLJIT_NOT_EQUAL_F64:
BR_NZ(EQUAL_FLAG);
break;
case SLJIT_NOT_EQUAL:
- case SLJIT_EQUAL_F64:
BR_Z(EQUAL_FLAG);
break;
case SLJIT_LESS:
- case SLJIT_LESS_F64:
- BR_Z(ULESS_FLAG);
- break;
- case SLJIT_GREATER_EQUAL:
- case SLJIT_GREATER_EQUAL_F64:
- BR_NZ(ULESS_FLAG);
- break;
case SLJIT_GREATER:
- case SLJIT_GREATER_F64:
- BR_Z(UGREATER_FLAG);
- break;
- case SLJIT_LESS_EQUAL:
- case SLJIT_LESS_EQUAL_F64:
- BR_NZ(UGREATER_FLAG);
- break;
case SLJIT_SIG_LESS:
- BR_Z(LESS_FLAG);
- break;
- case SLJIT_SIG_GREATER_EQUAL:
- BR_NZ(LESS_FLAG);
- break;
case SLJIT_SIG_GREATER:
- BR_Z(GREATER_FLAG);
- break;
- case SLJIT_SIG_LESS_EQUAL:
- BR_NZ(GREATER_FLAG);
- break;
case SLJIT_OVERFLOW:
case SLJIT_MUL_OVERFLOW:
- BR_Z(OVERFLOW_FLAG);
+ BR_Z(OTHER_FLAG);
break;
+ case SLJIT_GREATER_EQUAL:
+ case SLJIT_LESS_EQUAL:
+ case SLJIT_SIG_GREATER_EQUAL:
+ case SLJIT_SIG_LESS_EQUAL:
case SLJIT_NOT_OVERFLOW:
case SLJIT_MUL_NOT_OVERFLOW:
- BR_NZ(OVERFLOW_FLAG);
- break;
- case SLJIT_UNORDERED_F64:
- BR_F();
+ BR_NZ(OTHER_FLAG);
break;
+ case SLJIT_NOT_EQUAL_F64:
+ case SLJIT_GREATER_EQUAL_F64:
+ case SLJIT_GREATER_F64:
case SLJIT_ORDERED_F64:
BR_T();
break;
+ case SLJIT_EQUAL_F64:
+ case SLJIT_LESS_F64:
+ case SLJIT_LESS_EQUAL_F64:
+ case SLJIT_UNORDERED_F64:
+ BR_F();
+ break;
default:
/* Not conditional branch. */
inst = 0;
@@ -1854,86 +1862,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
#undef RESOLVE_IMM1
#undef RESOLVE_IMM2
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type,
- sljit_s32 src1, sljit_sw src1w,
- sljit_s32 src2, sljit_sw src2w)
-{
- struct sljit_jump *jump;
- sljit_ins inst;
- sljit_s32 if_true;
-
- CHECK_ERROR_PTR();
- CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w));
-
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
-
- if (src1 & SLJIT_MEM) {
- PTR_FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
- src1 = TMP_FREG1;
- }
- else
- src1 <<= 1;
-
- if (src2 & SLJIT_MEM) {
- PTR_FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
- src2 = TMP_FREG2;
- }
- else
- src2 <<= 1;
-
- jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
- PTR_FAIL_IF(!jump);
- set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
- jump->flags |= IS_BIT16_COND;
-
- switch (type & 0xff) {
- case SLJIT_EQUAL_F64:
- inst = C_UEQ_S;
- if_true = 1;
- break;
- case SLJIT_NOT_EQUAL_F64:
- inst = C_UEQ_S;
- if_true = 0;
- break;
- case SLJIT_LESS_F64:
- inst = C_ULT_S;
- if_true = 1;
- break;
- case SLJIT_GREATER_EQUAL_F64:
- inst = C_ULT_S;
- if_true = 0;
- break;
- case SLJIT_GREATER_F64:
- inst = C_ULE_S;
- if_true = 0;
- break;
- case SLJIT_LESS_EQUAL_F64:
- inst = C_ULE_S;
- if_true = 1;
- break;
- case SLJIT_UNORDERED_F64:
- inst = C_UN_S;
- if_true = 1;
- break;
- default: /* Make compilers happy. */
- SLJIT_ASSERT_STOP();
- case SLJIT_ORDERED_F64:
- inst = C_UN_S;
- if_true = 0;
- break;
- }
-
- PTR_FAIL_IF(push_inst(compiler, inst | FMT(type) | FT(src2) | FS(src1), UNMOVABLE_INS));
- /* Intentionally the other opcode. */
- PTR_FAIL_IF(push_inst(compiler, (if_true ? BC1F : BC1T) | JUMP_LENGTH, UNMOVABLE_INS));
- PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0));
- PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS));
- jump->addr = compiler->size;
- PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
- return jump;
-}
-
#undef JUMP_LENGTH
#undef BR_Z
#undef BR_NZ
@@ -2003,115 +1931,160 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
- sljit_s32 sugg_dst_ar, dst_ar;
- sljit_s32 flags = GET_ALL_FLAGS(op);
+ sljit_s32 src_ar, dst_ar;
+ sljit_s32 saved_op = op;
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-# define mem_type WORD_DATA
+ sljit_s32 mem_type = WORD_DATA;
#else
sljit_s32 mem_type = (op & SLJIT_I32_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
#endif
CHECK_ERROR();
- CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+ CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
op = GET_OPCODE(op);
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
- if (op == SLJIT_MOV_S32 || op == SLJIT_MOV_U32)
+ if (op == SLJIT_MOV_S32)
mem_type = INT_DATA | SIGNED_DATA;
#endif
- sugg_dst_ar = DR((op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2);
+ dst_ar = DR((op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2);
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
- ADJUST_LOCAL_OFFSET(src, srcw);
- FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, DR(TMP_REG1), src, srcw, dst, dstw));
- src = TMP_REG1;
- srcw = 0;
- }
+
+ if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
+ FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, DR(TMP_REG1), dst, dstw, dst, dstw));
switch (type & 0xff) {
case SLJIT_EQUAL:
case SLJIT_NOT_EQUAL:
- FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
- dst_ar = sugg_dst_ar;
- break;
- case SLJIT_LESS:
- case SLJIT_GREATER_EQUAL:
- case SLJIT_LESS_F64:
- case SLJIT_GREATER_EQUAL_F64:
- dst_ar = ULESS_FLAG;
- break;
- case SLJIT_GREATER:
- case SLJIT_LESS_EQUAL:
- case SLJIT_GREATER_F64:
- case SLJIT_LESS_EQUAL_F64:
- dst_ar = UGREATER_FLAG;
- break;
- case SLJIT_SIG_LESS:
- case SLJIT_SIG_GREATER_EQUAL:
- dst_ar = LESS_FLAG;
- break;
- case SLJIT_SIG_GREATER:
- case SLJIT_SIG_LESS_EQUAL:
- dst_ar = GREATER_FLAG;
- break;
- case SLJIT_OVERFLOW:
- case SLJIT_NOT_OVERFLOW:
- dst_ar = OVERFLOW_FLAG;
+ FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(dst_ar) | IMM(1), dst_ar));
+ src_ar = dst_ar;
break;
case SLJIT_MUL_OVERFLOW:
case SLJIT_MUL_NOT_OVERFLOW:
- FAIL_IF(push_inst(compiler, SLTIU | SA(OVERFLOW_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
- dst_ar = sugg_dst_ar;
+ FAIL_IF(push_inst(compiler, SLTIU | SA(OTHER_FLAG) | TA(dst_ar) | IMM(1), dst_ar));
+ src_ar = dst_ar;
type ^= 0x1; /* Flip type bit for the XORI below. */
break;
+ case SLJIT_GREATER_F64:
+ case SLJIT_LESS_EQUAL_F64:
+ type ^= 0x1; /* Flip type bit for the XORI below. */
case SLJIT_EQUAL_F64:
case SLJIT_NOT_EQUAL_F64:
- dst_ar = EQUAL_FLAG;
- break;
-
+ case SLJIT_LESS_F64:
+ case SLJIT_GREATER_EQUAL_F64:
case SLJIT_UNORDERED_F64:
case SLJIT_ORDERED_F64:
- FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(FCSR_REG), sugg_dst_ar));
- FAIL_IF(push_inst(compiler, SRL | TA(sugg_dst_ar) | DA(sugg_dst_ar) | SH_IMM(23), sugg_dst_ar));
- FAIL_IF(push_inst(compiler, ANDI | SA(sugg_dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
- dst_ar = sugg_dst_ar;
+ FAIL_IF(push_inst(compiler, CFC1 | TA(dst_ar) | DA(FCSR_REG), dst_ar));
+ FAIL_IF(push_inst(compiler, SRL | TA(dst_ar) | DA(dst_ar) | SH_IMM(23), dst_ar));
+ FAIL_IF(push_inst(compiler, ANDI | SA(dst_ar) | TA(dst_ar) | IMM(1), dst_ar));
+ src_ar = dst_ar;
break;
default:
- SLJIT_ASSERT_STOP();
- dst_ar = sugg_dst_ar;
+ src_ar = OTHER_FLAG;
break;
}
if (type & 0x1) {
- FAIL_IF(push_inst(compiler, XORI | SA(dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
- dst_ar = sugg_dst_ar;
+ FAIL_IF(push_inst(compiler, XORI | SA(src_ar) | TA(dst_ar) | IMM(1), dst_ar));
+ src_ar = dst_ar;
}
- if (op >= SLJIT_ADD) {
- if (DR(TMP_REG2) != dst_ar)
- FAIL_IF(push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
- return emit_op(compiler, op | flags, mem_type | CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
+ if (op < SLJIT_ADD) {
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, mem_type, src_ar, dst, dstw);
+
+ if (src_ar != dst_ar)
+ return push_inst(compiler, ADDU_W | SA(src_ar) | TA(0) | DA(dst_ar), dst_ar);
+ return SLJIT_SUCCESS;
}
+ /* OTHER_FLAG cannot be specified as src2 argument at the moment. */
+ if (DR(TMP_REG2) != src_ar)
+ FAIL_IF(push_inst(compiler, ADDU_W | SA(src_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
+
+ mem_type |= CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE;
+
if (dst & SLJIT_MEM)
- return emit_op_mem(compiler, mem_type, dst_ar, dst, dstw);
+ return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
+ return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, TMP_REG2, 0);
+}
- if (sugg_dst_ar != dst_ar)
- return push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | DA(sugg_dst_ar), sugg_dst_ar);
- return SLJIT_SUCCESS;
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_reg,
+ sljit_s32 src, sljit_sw srcw)
+{
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+ sljit_ins ins;
+#endif
-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-# undef mem_type
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+
+ if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+ if (dst_reg & SLJIT_I32_OP)
+ srcw = (sljit_s32)srcw;
+#endif
+ FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw));
+ src = TMP_REG1;
+ srcw = 0;
+ }
+
+ dst_reg &= ~SLJIT_I32_OP;
+
+ switch (type & 0xff) {
+ case SLJIT_EQUAL:
+ ins = MOVZ | TA(EQUAL_FLAG);
+ break;
+ case SLJIT_NOT_EQUAL:
+ ins = MOVN | TA(EQUAL_FLAG);
+ break;
+ case SLJIT_LESS:
+ case SLJIT_GREATER:
+ case SLJIT_SIG_LESS:
+ case SLJIT_SIG_GREATER:
+ case SLJIT_OVERFLOW:
+ case SLJIT_MUL_OVERFLOW:
+ ins = MOVN | TA(OTHER_FLAG);
+ break;
+ case SLJIT_GREATER_EQUAL:
+ case SLJIT_LESS_EQUAL:
+ case SLJIT_SIG_GREATER_EQUAL:
+ case SLJIT_SIG_LESS_EQUAL:
+ case SLJIT_NOT_OVERFLOW:
+ case SLJIT_MUL_NOT_OVERFLOW:
+ ins = MOVZ | TA(OTHER_FLAG);
+ break;
+ case SLJIT_EQUAL_F64:
+ case SLJIT_LESS_F64:
+ case SLJIT_LESS_EQUAL_F64:
+ case SLJIT_UNORDERED_F64:
+ ins = MOVT;
+ break;
+ case SLJIT_NOT_EQUAL_F64:
+ case SLJIT_GREATER_EQUAL_F64:
+ case SLJIT_GREATER_F64:
+ case SLJIT_ORDERED_F64:
+ ins = MOVF;
+ break;
+ default:
+ ins = MOVZ | TA(OTHER_FLAG);
+ SLJIT_UNREACHABLE();
+ break;
+ }
+
+ return push_inst(compiler, ins | S(src) | D(dst_reg), DR(dst_reg));
+
+#else
+ return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
#endif
}
@@ -2128,7 +2101,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(!const_);
set_const(const_, compiler);
- reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+ reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
PTR_FAIL_IF(emit_const(compiler, reg, init_value));
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_32.c
index 0f23cf86dd..fc185f7847 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_32.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_32.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -88,77 +88,86 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_NEG:
SLJIT_ASSERT(src1 == TMP_REG1);
- return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2));
+ /* Setting XER SO is not enough, CR SO is also needed. */
+ return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2));
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1);
- return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst));
+ return push_inst(compiler, CNTLZW | S(src2) | A(dst));
case SLJIT_ADD:
if (flags & ALT_FORM1) {
- /* Flags does not set: BIN_IMM_EXTS unnecessary. */
- SLJIT_ASSERT(src2 == TMP_REG2);
- return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm);
+ /* Setting XER SO is not enough, CR SO is also needed. */
+ return push_inst(compiler, ADD | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
}
+
if (flags & ALT_FORM2) {
/* Flags does not set: BIN_IMM_EXTS unnecessary. */
SLJIT_ASSERT(src2 == TMP_REG2);
- return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
+
+ if (flags & ALT_FORM3)
+ return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
+
+ if (flags & ALT_FORM4) {
+ FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))));
+ src1 = dst;
+ }
+
+ return push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff));
}
if (flags & ALT_FORM3) {
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm);
}
- if (flags & ALT_FORM4) {
- /* Flags does not set: BIN_IMM_EXTS unnecessary. */
- FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff)));
- return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1)));
- }
if (!(flags & ALT_SET_FLAGS))
return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2));
- return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
+ if (flags & ALT_FORM4)
+ return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
+ return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2));
case SLJIT_ADDC:
- if (flags & ALT_FORM1) {
- FAIL_IF(push_inst(compiler, MFXER | D(0)));
- FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)));
- return push_inst(compiler, MTXER | S(0));
- }
return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2));
case SLJIT_SUB:
if (flags & ALT_FORM1) {
+ if (flags & ALT_FORM2) {
+ FAIL_IF(push_inst(compiler, CMPLI | CRD(0) | A(src1) | compiler->imm));
+ if (!(flags & ALT_FORM3))
+ return SLJIT_SUCCESS;
+ return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff));
+ }
+ FAIL_IF(push_inst(compiler, CMPL | CRD(0) | A(src1) | B(src2)));
+ if (!(flags & ALT_FORM3))
+ return SLJIT_SUCCESS;
+ return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
+ }
+
+ if (flags & ALT_FORM2) {
+ /* Setting XER SO is not enough, CR SO is also needed. */
+ return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+ }
+
+ if (flags & ALT_FORM3) {
/* Flags does not set: BIN_IMM_EXTS unnecessary. */
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
}
- if (flags & (ALT_FORM2 | ALT_FORM3)) {
- SLJIT_ASSERT(src2 == TMP_REG2);
- if (flags & ALT_FORM2)
- FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm));
- if (flags & ALT_FORM3)
- return push_inst(compiler, CMPLI | CRD(4) | A(src1) | compiler->imm);
- return SLJIT_SUCCESS;
- }
- if (flags & (ALT_FORM4 | ALT_FORM5)) {
- if (flags & ALT_FORM4)
- FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2)));
- if (flags & ALT_FORM5)
- FAIL_IF(push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2)));
- return SLJIT_SUCCESS;
+
+ if (flags & ALT_FORM4) {
+ if (flags & ALT_FORM5) {
+ SLJIT_ASSERT(src2 == TMP_REG2);
+ return push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm);
+ }
+ return push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2));
}
+
if (!(flags & ALT_SET_FLAGS))
return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
- if (flags & ALT_FORM6)
- FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2)));
- return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+ if (flags & ALT_FORM5)
+ return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+ return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1));
case SLJIT_SUBC:
- if (flags & ALT_FORM1) {
- FAIL_IF(push_inst(compiler, MFXER | D(0)));
- FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)));
- return push_inst(compiler, MTXER | S(0));
- }
return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1));
case SLJIT_MUL:
@@ -166,7 +175,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm);
}
- return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1));
+ return push_inst(compiler, MULLW | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1));
case SLJIT_AND:
if (flags & ALT_FORM1) {
@@ -228,19 +237,15 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_ASHR:
- if (flags & ALT_FORM3)
- FAIL_IF(push_inst(compiler, MFXER | D(0)));
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
compiler->imm &= 0x1f;
- FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)));
+ return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11));
}
- else
- FAIL_IF(push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2)));
- return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS;
+ return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2));
}
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
}
@@ -250,20 +255,22 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_
return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
- sljit_ins *inst = (sljit_ins*)addr;
+ sljit_ins *inst = (sljit_ins *)addr;
- inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
- inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff);
+ inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff);
+ inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff);
+ inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
- sljit_ins *inst = (sljit_ins*)addr;
+ sljit_ins *inst = (sljit_ins *)addr;
inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
+ inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_64.c
index 8e3223f725..5366c30d90 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_64.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_64.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -204,84 +204,118 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_NEG:
SLJIT_ASSERT(src1 == TMP_REG1);
+
+ if ((flags & (ALT_FORM1 | ALT_SIGN_EXT)) == (ALT_FORM1 | ALT_SIGN_EXT)) {
+ FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1)));
+ FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(TMP_REG2)));
+ return push_inst(compiler, RLDI(dst, dst, 32, 32, 0));
+ }
+
UN_EXTS();
- return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2));
+ /* Setting XER SO is not enough, CR SO is also needed. */
+ return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2));
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1);
if (flags & ALT_FORM1)
- return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst));
- return push_inst(compiler, CNTLZD | RC(flags) | S(src2) | A(dst));
+ return push_inst(compiler, CNTLZW | S(src2) | A(dst));
+ return push_inst(compiler, CNTLZD | S(src2) | A(dst));
case SLJIT_ADD:
if (flags & ALT_FORM1) {
- /* Flags does not set: BIN_IMM_EXTS unnecessary. */
- SLJIT_ASSERT(src2 == TMP_REG2);
- return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm);
+ if (flags & ALT_SIGN_EXT) {
+ FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1)));
+ src1 = TMP_REG1;
+ FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1)));
+ src2 = TMP_REG2;
+ }
+ /* Setting XER SO is not enough, CR SO is also needed. */
+ FAIL_IF(push_inst(compiler, ADD | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)));
+ if (flags & ALT_SIGN_EXT)
+ return push_inst(compiler, RLDI(dst, dst, 32, 32, 0));
+ return SLJIT_SUCCESS;
}
+
if (flags & ALT_FORM2) {
/* Flags does not set: BIN_IMM_EXTS unnecessary. */
SLJIT_ASSERT(src2 == TMP_REG2);
- return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
+
+ if (flags & ALT_FORM3)
+ return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
+
+ if (flags & ALT_FORM4) {
+ FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))));
+ src1 = dst;
+ }
+
+ return push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff));
}
if (flags & ALT_FORM3) {
SLJIT_ASSERT(src2 == TMP_REG2);
BIN_IMM_EXTS();
return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm);
}
- if (flags & ALT_FORM4) {
- /* Flags does not set: BIN_IMM_EXTS unnecessary. */
- FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff)));
- return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1)));
- }
if (!(flags & ALT_SET_FLAGS))
return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2));
BIN_EXTS();
- return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
+ if (flags & ALT_FORM4)
+ return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2));
+ return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2));
case SLJIT_ADDC:
- if (flags & ALT_FORM1) {
- FAIL_IF(push_inst(compiler, MFXER | D(0)));
- FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)));
- return push_inst(compiler, MTXER | S(0));
- }
BIN_EXTS();
return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2));
case SLJIT_SUB:
if (flags & ALT_FORM1) {
+ if (flags & ALT_FORM2) {
+ FAIL_IF(push_inst(compiler, CMPLI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm));
+ if (!(flags & ALT_FORM3))
+ return SLJIT_SUCCESS;
+ return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff));
+ }
+ FAIL_IF(push_inst(compiler, CMPL | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
+ if (!(flags & ALT_FORM3))
+ return SLJIT_SUCCESS;
+ return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
+ }
+
+ if (flags & ALT_FORM2) {
+ if (flags & ALT_SIGN_EXT) {
+ FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1)));
+ src1 = TMP_REG1;
+ FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1)));
+ src2 = TMP_REG2;
+ }
+ /* Setting XER SO is not enough, CR SO is also needed. */
+ FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)));
+ if (flags & ALT_SIGN_EXT)
+ return push_inst(compiler, RLDI(dst, dst, 32, 32, 0));
+ return SLJIT_SUCCESS;
+ }
+
+ if (flags & ALT_FORM3) {
/* Flags does not set: BIN_IMM_EXTS unnecessary. */
SLJIT_ASSERT(src2 == TMP_REG2);
return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
}
- if (flags & (ALT_FORM2 | ALT_FORM3)) {
- SLJIT_ASSERT(src2 == TMP_REG2);
- if (flags & ALT_FORM2)
- FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm));
- if (flags & ALT_FORM3)
- return push_inst(compiler, CMPLI | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm);
- return SLJIT_SUCCESS;
- }
- if (flags & (ALT_FORM4 | ALT_FORM5)) {
- if (flags & ALT_FORM4)
- FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
- if (flags & ALT_FORM5)
- return push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2));
- return SLJIT_SUCCESS;
+
+ if (flags & ALT_FORM4) {
+ if (flags & ALT_FORM5) {
+ SLJIT_ASSERT(src2 == TMP_REG2);
+ return push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm);
+ }
+ return push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2));
}
+
if (!(flags & ALT_SET_FLAGS))
return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
BIN_EXTS();
- if (flags & ALT_FORM6)
- FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)));
- return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+ if (flags & ALT_FORM5)
+ return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+ return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1));
case SLJIT_SUBC:
- if (flags & ALT_FORM1) {
- FAIL_IF(push_inst(compiler, MFXER | D(0)));
- FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)));
- return push_inst(compiler, MTXER | S(0));
- }
BIN_EXTS();
return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1));
@@ -292,8 +326,8 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
}
BIN_EXTS();
if (flags & ALT_FORM2)
- return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1));
- return push_inst(compiler, MULLD | OERC(flags) | D(dst) | A(src2) | B(src1));
+ return push_inst(compiler, MULLW | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1));
+ return push_inst(compiler, MULLD | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1));
case SLJIT_AND:
if (flags & ALT_FORM1) {
@@ -345,10 +379,8 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
compiler->imm &= 0x1f;
return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1));
}
- else {
- compiler->imm &= 0x3f;
- return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags));
- }
+ compiler->imm &= 0x3f;
+ return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags));
}
return push_inst(compiler, ((flags & ALT_FORM2) ? SLW : SLD) | RC(flags) | S(src1) | A(dst) | B(src2));
@@ -359,33 +391,25 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
compiler->imm &= 0x1f;
return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1));
}
- else {
- compiler->imm &= 0x3f;
- return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags));
- }
+ compiler->imm &= 0x3f;
+ return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags));
}
return push_inst(compiler, ((flags & ALT_FORM2) ? SRW : SRD) | RC(flags) | S(src1) | A(dst) | B(src2));
case SLJIT_ASHR:
- if (flags & ALT_FORM3)
- FAIL_IF(push_inst(compiler, MFXER | D(0)));
if (flags & ALT_FORM1) {
SLJIT_ASSERT(src2 == TMP_REG2);
if (flags & ALT_FORM2) {
compiler->imm &= 0x1f;
- FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)));
- }
- else {
- compiler->imm &= 0x3f;
- FAIL_IF(push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4)));
+ return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11));
}
+ compiler->imm &= 0x3f;
+ return push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4));
}
- else
- FAIL_IF(push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2)));
- return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS;
+ return push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2));
}
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
}
@@ -398,18 +422,19 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_
return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins*)addr;
- inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff);
- inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff);
- inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff);
- inst[4] = (inst[4] & 0xffff0000) | (new_addr & 0xffff);
+ inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff);
+ inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff);
+ inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff);
+ inst[4] = (inst[4] & 0xffff0000) | (new_target & 0xffff);
+ inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 5);
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
sljit_ins *inst = (sljit_ins*)addr;
@@ -417,5 +442,6 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff);
inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
inst[4] = (inst[4] & 0xffff0000) | (new_constant & 0xffff);
+ inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 5);
}
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_common.c
index a3647327bf..2bf855c6bc 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_common.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_common.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -127,9 +127,9 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
/* Instruction bit sections.
OE and Rc flag (see ALT_SET_FLAGS). */
-#define OERC(flags) (((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS))
+#define OE(flags) ((flags) & ALT_SET_FLAGS)
/* Rc flag (see ALT_SET_FLAGS). */
-#define RC(flags) ((flags & ALT_SET_FLAGS) >> 10)
+#define RC(flags) (((flags) & ALT_SET_FLAGS) >> 10)
#define HI(opcode) ((opcode) << 26)
#define LO(opcode) ((opcode) << 1)
@@ -154,6 +154,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
#define CMPL (HI(31) | LO(32))
#define CMPLI (HI(10))
#define CROR (HI(19) | LO(449))
+#define DCBT (HI(31) | LO(278))
#define DIVD (HI(31) | LO(489))
#define DIVDU (HI(31) | LO(457))
#define DIVW (HI(31) | LO(491))
@@ -249,7 +250,7 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
return SLJIT_SUCCESS;
}
-static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
{
sljit_sw diff;
sljit_uw target_addr;
@@ -267,7 +268,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in
target_addr = jump->u.target;
else {
SLJIT_ASSERT(jump->flags & JUMP_LABEL);
- target_addr = (sljit_uw)(code + jump->u.label->size);
+ target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
}
#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
@@ -275,7 +276,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in
goto keep_address;
#endif
- diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l;
+ diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr) - executable_offset) & ~0x3l;
extra_jump_flags = 0;
if (jump->flags & IS_COND) {
@@ -296,6 +297,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in
jump->flags |= PATCH_B | extra_jump_flags;
return 1;
}
+
if (target_addr <= 0x03ffffff) {
jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
return 1;
@@ -309,6 +311,7 @@ keep_address:
jump->flags |= PATCH_ABS32;
return 1;
}
+
if (target_addr <= 0x7fffffffffffl) {
jump->flags |= PATCH_ABS48;
return 1;
@@ -326,6 +329,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
sljit_ins *buf_ptr;
sljit_ins *buf_end;
sljit_uw word_count;
+ sljit_sw executable_offset;
sljit_uw addr;
struct sljit_label *label;
@@ -349,9 +353,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
code_ptr = code;
word_count = 0;
+ executable_offset = SLJIT_EXEC_OFFSET(code);
+
label = compiler->labels;
jump = compiler->jumps;
const_ = compiler->consts;
+
do {
buf_ptr = (sljit_ins*)buf->memory;
buf_end = buf_ptr + (buf->used_size >> 2);
@@ -363,7 +370,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
/* These structures are ordered by their address. */
if (label && label->size == word_count) {
/* Just recording the address. */
- label->addr = (sljit_uw)code_ptr;
+ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
@@ -373,7 +380,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
#else
jump->addr = (sljit_uw)(code_ptr - 6);
#endif
- if (detect_jump_type(jump, code_ptr, code)) {
+ if (detect_jump_type(jump, code_ptr, code, executable_offset)) {
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
code_ptr[-3] = code_ptr[0];
code_ptr -= 3;
@@ -420,7 +427,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
} while (buf);
if (label && label->size == word_count) {
- label->addr = (sljit_uw)code_ptr;
+ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
@@ -438,11 +445,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
while (jump) {
do {
addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
- buf_ptr = (sljit_ins*)jump->addr;
+ buf_ptr = (sljit_ins *)jump->addr;
+
if (jump->flags & PATCH_B) {
if (jump->flags & IS_COND) {
if (!(jump->flags & PATCH_ABS_B)) {
- addr = addr - jump->addr;
+ addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
*buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
}
@@ -453,7 +461,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
}
else {
if (!(jump->flags & PATCH_ABS_B)) {
- addr = addr - jump->addr;
+ addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
*buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
}
@@ -464,6 +472,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
}
break;
}
+
/* Set the fields of immediate loads. */
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
@@ -492,22 +501,48 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
}
compiler->error = SLJIT_ERR_COMPILED;
+ compiler->executable_offset = executable_offset;
compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
- SLJIT_CACHE_FLUSH(code, code_ptr);
+
+ code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
if (((sljit_sw)code_ptr) & 0x4)
code_ptr++;
+#endif
sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
+#endif
+
+ code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+
+ SLJIT_CACHE_FLUSH(code, code_ptr);
+
+#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
return code_ptr;
#else
- sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
- return code_ptr;
+ return code;
#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+ switch (feature_type) {
+ case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+ return SLJIT_IS_FPU_AVAILABLE;
#else
- return code;
+ /* Available by default. */
+ return 1;
#endif
+
+ case SLJIT_HAS_PRE_UPDATE:
+ case SLJIT_HAS_CLZ:
+ return 1;
+
+ default:
+ return 0;
+ }
}
/* --------------------------------------------------------------------- */
@@ -544,7 +579,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
#define ALT_FORM3 0x040000
#define ALT_FORM4 0x080000
#define ALT_FORM5 0x100000
-#define ALT_FORM6 0x200000
/* Source and destination is register. */
#define REG_DEST 0x000001
@@ -559,7 +593,7 @@ ALT_SIGN_EXT 0x000200
ALT_SET_FLAGS 0x000400
ALT_FORM1 0x010000
...
-ALT_FORM6 0x200000 */
+ALT_FORM5 0x100000 */
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
#include "sljitNativePPC_32.c"
@@ -726,7 +760,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
(((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
#endif
-static const sljit_ins data_transfer_insts[64 + 8] = {
+static const sljit_ins data_transfer_insts[64 + 16] = {
/* -------- Unsigned -------- */
@@ -835,11 +869,20 @@ static const sljit_ins data_transfer_insts[64 + 8] = {
/* d n x s */ HI(31) | LO(727) /* stfdx */,
/* d n x l */ HI(31) | LO(599) /* lfdx */,
+/* d w i s */ HI(55) /* stfdu */,
+/* d w i l */ HI(51) /* lfdu */,
+/* d w x s */ HI(31) | LO(759) /* stfdux */,
+/* d w x l */ HI(31) | LO(631) /* lfdux */,
+
/* s n i s */ HI(52) /* stfs */,
/* s n i l */ HI(48) /* lfs */,
/* s n x s */ HI(31) | LO(663) /* stfsx */,
/* s n x l */ HI(31) | LO(535) /* lfsx */,
+/* s w i s */ HI(53) /* stfsu */,
+/* s w i l */ HI(49) /* lfsu */,
+/* s w x s */ HI(31) | LO(695) /* stfsux */,
+/* s w x l */ HI(31) | LO(567) /* lfsux */,
};
#undef ARCH_32_64
@@ -850,7 +893,7 @@ static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_
sljit_ins inst;
/* Should work when (arg & REG_MASK) == 0. */
- SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0);
+ SLJIT_ASSERT(A(0) == 0);
SLJIT_ASSERT(arg & SLJIT_MEM);
if (arg & OFFS_REG_MASK) {
@@ -1005,10 +1048,6 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags
#endif
if (inp_flags & WRITE_BACK) {
- if (arg == reg) {
- FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg)));
- reg = tmp_r;
- }
tmp_r = arg;
FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
}
@@ -1131,7 +1170,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
sljit_s32 src1_r;
sljit_s32 src2_r;
sljit_s32 sugg_src2_r = TMP_REG2;
- sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);
+ sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS);
if (!(input_flags & ALT_KEEP_CACHE)) {
compiler->cache_arg = 0;
@@ -1140,8 +1179,6 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
/* Destination check. */
if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
- return SLJIT_SUCCESS;
dst_r = TMP_REG2;
}
else if (FAST_IS_REG(dst)) {
@@ -1294,6 +1331,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
+static sljit_s32 emit_prefetch(struct sljit_compiler *compiler,
+ sljit_s32 src, sljit_sw srcw)
+{
+ if (!(src & OFFS_REG_MASK)) {
+ if (srcw == 0 && (src & REG_MASK) != SLJIT_UNUSED)
+ return push_inst(compiler, DCBT | A(0) | B(src & REG_MASK));
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+ /* Works with SLJIT_MEM0() case as well. */
+ return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
+ }
+
+ srcw &= 0x3;
+
+ if (srcw == 0)
+ return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src)));
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+ FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(src)) | A(TMP_REG1) | (srcw << 11) | ((31 - srcw) << 1)));
+#else
+ FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(src), srcw, 63 - srcw, 1)));
+#endif
+ return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
+}
+
#define EMIT_MOV(type, type_flags, type_cast) \
emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
@@ -1301,7 +1363,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
- sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
+ sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0;
sljit_s32 op_flags = GET_ALL_FLAGS(op);
CHECK_ERROR();
@@ -1309,11 +1371,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
+ if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
+ return emit_prefetch(compiler, src, srcw);
+
+ return SLJIT_SUCCESS;
+ }
+
op = GET_OPCODE(op);
if ((src & SLJIT_IMM) && srcw == 0)
src = TMP_ZERO;
- if (op_flags & SLJIT_SET_O)
+ if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW)
FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
if (op_flags & SLJIT_I32_OP) {
@@ -1339,6 +1408,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
flags |= INT_DATA | SIGNED_DATA;
if (src & SLJIT_IMM)
srcw = (sljit_s32)srcw;
+ if (HAS_FLAGS(op_flags))
+ flags |= ALT_SIGN_EXT;
}
#endif
}
@@ -1404,7 +1475,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
case SLJIT_NEG:
- return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+ return emit_op(compiler, SLJIT_NEG, flags | (GET_FLAG_TYPE(op_flags) ? ALT_FORM1 : 0), dst, dstw, TMP_REG1, 0, src, srcw);
case SLJIT_CLZ:
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
@@ -1457,7 +1528,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
- sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
+ sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0;
CHECK_ERROR();
CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
@@ -1465,6 +1536,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
+
if ((src1 & SLJIT_IMM) && src1w == 0)
src1 = TMP_ZERO;
if ((src2 & SLJIT_IMM) && src2w == 0)
@@ -1478,45 +1552,48 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
src1w = (sljit_s32)(src1w);
if (src2 & SLJIT_IMM)
src2w = (sljit_s32)(src2w);
- if (GET_FLAGS(op))
+ if (HAS_FLAGS(op))
flags |= ALT_SIGN_EXT;
}
#endif
- if (op & SLJIT_SET_O)
+ if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
if (src2 == TMP_REG2)
flags |= ALT_KEEP_CACHE;
switch (GET_OPCODE(op)) {
case SLJIT_ADD:
- if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
+ if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
+ return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
+
+ if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
if (TEST_SL_IMM(src2, src2w)) {
compiler->imm = src2w & 0xffff;
- return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+ return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
}
if (TEST_SL_IMM(src1, src1w)) {
compiler->imm = src1w & 0xffff;
- return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+ return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
}
if (TEST_SH_IMM(src2, src2w)) {
compiler->imm = (src2w >> 16) & 0xffff;
- return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+ return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
}
if (TEST_SH_IMM(src1, src1w)) {
compiler->imm = (src1w >> 16) & 0xffff;
- return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
+ return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
}
/* Range between -1 and -32768 is covered above. */
if (TEST_ADD_IMM(src2, src2w)) {
compiler->imm = src2w & 0xffffffff;
- return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
+ return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
}
if (TEST_ADD_IMM(src1, src1w)) {
compiler->imm = src1w & 0xffffffff;
- return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
+ return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
}
}
- if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) {
+ if (HAS_FLAGS(op)) {
if (TEST_SL_IMM(src2, src2w)) {
compiler->imm = src2w & 0xffff;
return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
@@ -1526,75 +1603,75 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
}
}
- return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);
+ return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM4 : 0), dst, dstw, src1, src1w, src2, src2w);
case SLJIT_ADDC:
- return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
+ return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SUB:
- if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
+ if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) {
+ if (dst == SLJIT_UNUSED) {
+ if (TEST_UL_IMM(src2, src2w)) {
+ compiler->imm = src2w & 0xffff;
+ return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+ }
+ return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
+ }
+
+ if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1)) {
+ compiler->imm = src2w;
+ return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+ }
+ return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w);
+ }
+
+ if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
+ return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, src2, src2w);
+
+ if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
if (TEST_SL_IMM(src2, -src2w)) {
compiler->imm = (-src2w) & 0xffff;
- return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
+ return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
}
if (TEST_SL_IMM(src1, src1w)) {
compiler->imm = src1w & 0xffff;
- return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
+ return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
}
if (TEST_SH_IMM(src2, -src2w)) {
compiler->imm = ((-src2w) >> 16) & 0xffff;
- return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
+ return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
}
/* Range between -1 and -32768 is covered above. */
if (TEST_ADD_IMM(src2, -src2w)) {
compiler->imm = -src2w & 0xffffffff;
- return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
+ return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
}
}
- if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) {
- if (!(op & SLJIT_SET_U)) {
- /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
- if (TEST_SL_IMM(src2, src2w)) {
- compiler->imm = src2w & 0xffff;
- return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
- }
- if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) {
- compiler->imm = src1w & 0xffff;
- return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
- }
- }
- if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) {
- /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
- if (TEST_UL_IMM(src2, src2w)) {
- compiler->imm = src2w & 0xffff;
- return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
- }
- return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
- }
- if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) {
- compiler->imm = src2w;
- return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
+
+ if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) != GET_FLAG_TYPE(SLJIT_SET_CARRY)) {
+ if (TEST_SL_IMM(src2, src2w)) {
+ compiler->imm = src2w & 0xffff;
+ return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src1, src1w, TMP_REG2, 0);
}
- return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
+ return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
}
- if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) {
- if (TEST_SL_IMM(src2, -src2w)) {
- compiler->imm = (-src2w) & 0xffff;
- return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
- }
+
+ if (TEST_SL_IMM(src2, -src2w)) {
+ compiler->imm = (-src2w) & 0xffff;
+ return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
}
/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
- return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);
+ return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SUBC:
- return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
+ return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_MUL:
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
if (op & SLJIT_I32_OP)
flags |= ALT_FORM2;
#endif
- if (!GET_FLAGS(op)) {
+ if (!HAS_FLAGS(op)) {
if (TEST_SL_IMM(src2, src2w)) {
compiler->imm = src2w & 0xffff;
return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
@@ -1604,13 +1681,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
}
}
+ else
+ FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_AND:
case SLJIT_OR:
case SLJIT_XOR:
/* Commutative unsigned operations. */
- if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
+ if (!HAS_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
if (TEST_UL_IMM(src2, src2w)) {
compiler->imm = src2w;
return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
@@ -1628,7 +1707,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
}
}
- if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
+ if (GET_OPCODE(op) != SLJIT_AND && GET_OPCODE(op) != SLJIT_AND) {
+ /* Unlike or and xor, and resets unwanted bits as well. */
if (TEST_UI_IMM(src2, src2w)) {
compiler->imm = src2w;
return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
@@ -1640,12 +1720,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
}
return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
- case SLJIT_ASHR:
- if (op & SLJIT_KEEP_FLAGS)
- flags |= ALT_FORM3;
- /* Fall through. */
case SLJIT_SHL:
case SLJIT_LSHR:
+ case SLJIT_ASHR:
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
if (op & SLJIT_I32_OP)
flags |= ALT_FORM2;
@@ -1685,17 +1762,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
/* Floating point operators */
/* --------------------------------------------------------------------- */
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
-#else
- /* Available by default. */
- return 1;
-#endif
-}
-
-#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6))
+#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 5))
#define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
@@ -1727,9 +1794,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
op = GET_OPCODE(op);
FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (op == SLJIT_CONV_SW_FROM_F64) {
if (FAST_IS_REG(dst)) {
FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0));
@@ -1737,12 +1801,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
}
return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
}
-
#else
FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
-
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
#endif
if (FAST_IS_REG(dst)) {
@@ -2019,10 +2079,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
- /* For UNUSED dst. Uncommon, but possible. */
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst(compiler, MFLR | D(dst));
@@ -2079,33 +2135,33 @@ static sljit_ins get_bo_bi_flags(sljit_s32 type)
return (4 << 21) | (2 << 16);
case SLJIT_LESS:
- case SLJIT_LESS_F64:
- return (12 << 21) | ((4 + 0) << 16);
-
- case SLJIT_GREATER_EQUAL:
- case SLJIT_GREATER_EQUAL_F64:
- return (4 << 21) | ((4 + 0) << 16);
-
- case SLJIT_GREATER:
- case SLJIT_GREATER_F64:
- return (12 << 21) | ((4 + 1) << 16);
-
- case SLJIT_LESS_EQUAL:
- case SLJIT_LESS_EQUAL_F64:
- return (4 << 21) | ((4 + 1) << 16);
-
case SLJIT_SIG_LESS:
return (12 << 21) | (0 << 16);
+ case SLJIT_GREATER_EQUAL:
case SLJIT_SIG_GREATER_EQUAL:
return (4 << 21) | (0 << 16);
+ case SLJIT_GREATER:
case SLJIT_SIG_GREATER:
return (12 << 21) | (1 << 16);
+ case SLJIT_LESS_EQUAL:
case SLJIT_SIG_LESS_EQUAL:
return (4 << 21) | (1 << 16);
+ case SLJIT_LESS_F64:
+ return (12 << 21) | ((4 + 0) << 16);
+
+ case SLJIT_GREATER_EQUAL_F64:
+ return (4 << 21) | ((4 + 0) << 16);
+
+ case SLJIT_GREATER_F64:
+ return (12 << 21) | ((4 + 1) << 16);
+
+ case SLJIT_LESS_EQUAL_F64:
+ return (4 << 21) | ((4 + 1) << 16);
+
case SLJIT_OVERFLOW:
case SLJIT_MUL_OVERFLOW:
return (12 << 21) | (3 << 16);
@@ -2207,153 +2263,147 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
}
-/* Get a bit from CR, all other bits are zeroed. */
-#define GET_CR_BIT(bit, dst) \
- FAIL_IF(push_inst(compiler, MFCR | D(dst))); \
- FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));
-
-#define INVERT_BIT(dst) \
- FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1));
-
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
- sljit_s32 reg, input_flags;
- sljit_s32 flags = GET_ALL_FLAGS(op);
- sljit_sw original_dstw = dstw;
+ sljit_s32 reg, input_flags, cr_bit, invert;
+ sljit_s32 saved_op = op;
+ sljit_sw saved_dstw = dstw;
CHECK_ERROR();
- CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+ CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ input_flags = (op & SLJIT_I32_OP) ? INT_DATA : WORD_DATA;
+#else
+ input_flags = WORD_DATA;
+#endif
op = GET_OPCODE(op);
reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
- ADJUST_LOCAL_OFFSET(src, srcw);
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
- input_flags = (flags & SLJIT_I32_OP) ? INT_DATA : WORD_DATA;
-#else
- input_flags = WORD_DATA;
-#endif
- FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
- src = TMP_REG1;
- srcw = 0;
- }
- switch (type & 0xff) {
- case SLJIT_EQUAL:
- GET_CR_BIT(2, reg);
- break;
+ if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
+ FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
- case SLJIT_NOT_EQUAL:
- GET_CR_BIT(2, reg);
- INVERT_BIT(reg);
- break;
+ invert = 0;
+ cr_bit = 0;
+ switch (type & 0xff) {
case SLJIT_LESS:
- case SLJIT_LESS_F64:
- GET_CR_BIT(4 + 0, reg);
+ case SLJIT_SIG_LESS:
break;
case SLJIT_GREATER_EQUAL:
- case SLJIT_GREATER_EQUAL_F64:
- GET_CR_BIT(4 + 0, reg);
- INVERT_BIT(reg);
+ case SLJIT_SIG_GREATER_EQUAL:
+ invert = 1;
break;
case SLJIT_GREATER:
- case SLJIT_GREATER_F64:
- GET_CR_BIT(4 + 1, reg);
+ case SLJIT_SIG_GREATER:
+ cr_bit = 1;
break;
case SLJIT_LESS_EQUAL:
- case SLJIT_LESS_EQUAL_F64:
- GET_CR_BIT(4 + 1, reg);
- INVERT_BIT(reg);
- break;
-
- case SLJIT_SIG_LESS:
- GET_CR_BIT(0, reg);
- break;
-
- case SLJIT_SIG_GREATER_EQUAL:
- GET_CR_BIT(0, reg);
- INVERT_BIT(reg);
+ case SLJIT_SIG_LESS_EQUAL:
+ cr_bit = 1;
+ invert = 1;
break;
- case SLJIT_SIG_GREATER:
- GET_CR_BIT(1, reg);
+ case SLJIT_EQUAL:
+ cr_bit = 2;
break;
- case SLJIT_SIG_LESS_EQUAL:
- GET_CR_BIT(1, reg);
- INVERT_BIT(reg);
+ case SLJIT_NOT_EQUAL:
+ cr_bit = 2;
+ invert = 1;
break;
case SLJIT_OVERFLOW:
case SLJIT_MUL_OVERFLOW:
- GET_CR_BIT(3, reg);
+ cr_bit = 3;
break;
case SLJIT_NOT_OVERFLOW:
case SLJIT_MUL_NOT_OVERFLOW:
- GET_CR_BIT(3, reg);
- INVERT_BIT(reg);
+ cr_bit = 3;
+ invert = 1;
+ break;
+
+ case SLJIT_LESS_F64:
+ cr_bit = 4 + 0;
+ break;
+
+ case SLJIT_GREATER_EQUAL_F64:
+ cr_bit = 4 + 0;
+ invert = 1;
+ break;
+
+ case SLJIT_GREATER_F64:
+ cr_bit = 4 + 1;
+ break;
+
+ case SLJIT_LESS_EQUAL_F64:
+ cr_bit = 4 + 1;
+ invert = 1;
break;
case SLJIT_EQUAL_F64:
- GET_CR_BIT(4 + 2, reg);
+ cr_bit = 4 + 2;
break;
case SLJIT_NOT_EQUAL_F64:
- GET_CR_BIT(4 + 2, reg);
- INVERT_BIT(reg);
+ cr_bit = 4 + 2;
+ invert = 1;
break;
case SLJIT_UNORDERED_F64:
- GET_CR_BIT(4 + 3, reg);
+ cr_bit = 4 + 3;
break;
case SLJIT_ORDERED_F64:
- GET_CR_BIT(4 + 3, reg);
- INVERT_BIT(reg);
+ cr_bit = 4 + 3;
+ invert = 1;
break;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
break;
}
+ FAIL_IF(push_inst(compiler, MFCR | D(reg)));
+ FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + (cr_bit)) << 11) | (31 << 6) | (31 << 1)));
+
+ if (invert)
+ FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1));
+
if (op < SLJIT_ADD) {
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
- if (op == SLJIT_MOV)
- input_flags = WORD_DATA;
- else {
- op = SLJIT_MOV_U32;
- input_flags = INT_DATA;
- }
-#else
- op = SLJIT_MOV;
- input_flags = WORD_DATA;
-#endif
- if (reg != TMP_REG2)
+ if (!(dst & SLJIT_MEM))
return SLJIT_SUCCESS;
- return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
+ return emit_op_mem2(compiler, input_flags, reg, dst, dstw, reg, 0);
}
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
- return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
+ if (dst & SLJIT_MEM)
+ return sljit_emit_op2(compiler, saved_op, dst, saved_dstw, TMP_REG1, 0, TMP_REG2, 0);
+ return sljit_emit_op2(compiler, saved_op, dst, 0, dst, 0, TMP_REG2, 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_reg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+
+ return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
@@ -2369,7 +2419,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(!const_);
set_const(const_, compiler);
- reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+ reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
PTR_FAIL_IF(emit_const(compiler, reg, init_value));
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_32.c
index 7e589a17c2..ee42130e87 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_32.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_32.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -60,7 +60,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst));
}
else if (dst != src2)
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
case SLJIT_MOV_U16:
@@ -71,7 +71,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return push_inst(compiler, (op == SLJIT_MOV_S16 ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst));
}
else if (dst != src2)
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
case SLJIT_NOT:
@@ -80,18 +80,17 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
- /* sparc 32 does not support SLJIT_KEEP_FLAGS. Not sure I can fix this. */
FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(src2) | S2(0), SET_FLAGS));
FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2(src2), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, BICC | DA(0x1) | (7 & DISP_MASK), UNMOVABLE_INS));
- FAIL_IF(push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS | (flags & SET_FLAGS)));
+ FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(-1), DR(dst)));
/* Loop. */
FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS));
FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS));
- return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS | (flags & SET_FLAGS));
+ return push_inst(compiler, ADD | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS);
case SLJIT_ADD:
return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
@@ -135,7 +134,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
}
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
}
@@ -145,20 +144,22 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_
return push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (init_value & 0x3ff), DR(dst));
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
- sljit_ins *inst = (sljit_ins*)addr;
+ sljit_ins *inst = (sljit_ins *)addr;
- inst[0] = (inst[0] & 0xffc00000) | ((new_addr >> 10) & 0x3fffff);
- inst[1] = (inst[1] & 0xfffffc00) | (new_addr & 0x3ff);
+ inst[0] = (inst[0] & 0xffc00000) | ((new_target >> 10) & 0x3fffff);
+ inst[1] = (inst[1] & 0xfffffc00) | (new_target & 0x3ff);
+ inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
- sljit_ins *inst = (sljit_ins*)addr;
+ sljit_ins *inst = (sljit_ins *)addr;
inst[0] = (inst[0] & 0xffc00000) | ((new_constant >> 10) & 0x3fffff);
inst[1] = (inst[1] & 0xfffffc00) | (new_constant & 0x3ff);
+ inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 2);
}
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_common.c
index f3a33a1097..9831bd83d7 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_common.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_common.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -199,7 +199,7 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit
return SLJIT_SUCCESS;
}
-static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
+static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
{
sljit_sw diff;
sljit_uw target_addr;
@@ -213,7 +213,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i
target_addr = jump->u.target;
else {
SLJIT_ASSERT(jump->flags & JUMP_LABEL);
- target_addr = (sljit_uw)(code + jump->u.label->size);
+ target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
}
inst = (sljit_ins*)jump->addr;
@@ -239,8 +239,9 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i
if (jump->flags & IS_COND)
inst--;
+ diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1) - executable_offset) >> 2;
+
if (jump->flags & IS_MOVABLE) {
- diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1)) >> 2;
if (diff <= MAX_DISP && diff >= MIN_DISP) {
jump->flags |= PATCH_B;
inst--;
@@ -257,7 +258,8 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i
}
}
- diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2;
+ diff += sizeof(sljit_ins);
+
if (diff <= MAX_DISP && diff >= MIN_DISP) {
jump->flags |= PATCH_B;
if (jump->flags & IS_COND)
@@ -280,6 +282,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
sljit_ins *buf_ptr;
sljit_ins *buf_end;
sljit_uw word_count;
+ sljit_sw executable_offset;
sljit_uw addr;
struct sljit_label *label;
@@ -296,9 +299,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
code_ptr = code;
word_count = 0;
+ executable_offset = SLJIT_EXEC_OFFSET(code);
+
label = compiler->labels;
jump = compiler->jumps;
const_ = compiler->consts;
+
do {
buf_ptr = (sljit_ins*)buf->memory;
buf_end = buf_ptr + (buf->used_size >> 2);
@@ -310,7 +316,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
/* These structures are ordered by their address. */
if (label && label->size == word_count) {
/* Just recording the address. */
- label->addr = (sljit_uw)code_ptr;
+ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
@@ -320,7 +326,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
#else
jump->addr = (sljit_uw)(code_ptr - 6);
#endif
- code_ptr = detect_jump_type(jump, code_ptr, code);
+ code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
jump = jump->next;
}
if (const_ && const_->addr == word_count) {
@@ -336,7 +342,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
} while (buf);
if (label && label->size == word_count) {
- label->addr = (sljit_uw)code_ptr;
+ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
@@ -350,16 +356,16 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
while (jump) {
do {
addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
- buf_ptr = (sljit_ins*)jump->addr;
+ buf_ptr = (sljit_ins *)jump->addr;
if (jump->flags & PATCH_CALL) {
- addr = (sljit_sw)(addr - jump->addr) >> 2;
+ addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
SLJIT_ASSERT((sljit_sw)addr <= 0x1fffffff && (sljit_sw)addr >= -0x20000000);
buf_ptr[0] = CALL | (addr & 0x3fffffff);
break;
}
if (jump->flags & PATCH_B) {
- addr = (sljit_sw)(addr - jump->addr) >> 2;
+ addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
SLJIT_ASSERT((sljit_sw)addr <= MAX_DISP && (sljit_sw)addr >= MIN_DISP);
buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK);
break;
@@ -378,11 +384,37 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
compiler->error = SLJIT_ERR_COMPILED;
+ compiler->executable_offset = executable_offset;
compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
+
+ code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
+ code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+
SLJIT_CACHE_FLUSH(code, code_ptr);
return code;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+ switch (feature_type) {
+ case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+ return SLJIT_IS_FPU_AVAILABLE;
+#else
+ /* Available by default. */
+ return 1;
+#endif
+
+#if (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64)
+ case SLJIT_HAS_CMOV:
+ return 1;
+#endif
+
+ default:
+ return 0;
+ }
+}
+
/* --------------------------------------------------------------------- */
/* Entry, exit */
/* --------------------------------------------------------------------- */
@@ -567,7 +599,6 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
base = arg & REG_MASK;
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
argw &= 0x3;
- SLJIT_ASSERT(argw != 0);
/* Using the cache. */
if (((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) && (argw == compiler->cache_argw))
@@ -652,18 +683,16 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
compiler->cache_argw = 0;
}
- if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
- return SLJIT_SUCCESS;
- }
- else if (FAST_IS_REG(dst)) {
- dst_r = dst;
- flags |= REG_DEST;
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
- sugg_src2_r = dst_r;
+ if (dst != SLJIT_UNUSED) {
+ if (FAST_IS_REG(dst)) {
+ dst_r = dst;
+ flags |= REG_DEST;
+ if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
+ sugg_src2_r = dst_r;
+ }
+ else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
+ flags |= SLOW_DEST;
}
- else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
- flags |= SLOW_DEST;
if (flags & IMM_OP) {
if ((src2 & SLJIT_IMM) && src2w) {
@@ -812,13 +841,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
- sljit_s32 flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+ sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
CHECK_ERROR();
CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
+
op = GET_OPCODE(op);
switch (op) {
case SLJIT_MOV:
@@ -881,7 +913,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
- sljit_s32 flags = GET_FLAGS(op) ? SET_FLAGS : 0;
+ sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
CHECK_ERROR();
CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
@@ -889,6 +921,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
+
op = GET_OPCODE(op);
switch (op) {
case SLJIT_ADD:
@@ -910,7 +945,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
if (src2 & SLJIT_IMM)
src2w &= 0x1f;
#else
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
#endif
return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
}
@@ -943,16 +978,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
/* Floating point operators */
/* --------------------------------------------------------------------- */
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
-#else
- /* Available by default. */
- return 1;
-#endif
-}
-
#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7))
#define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
#define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw))
@@ -970,9 +995,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | DA(TMP_FREG1) | S2A(src), MOVABLE_INS));
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst)) {
FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET);
@@ -1186,10 +1208,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
- /* For UNUSED dst. Uncommon, but possible. */
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst(compiler, OR | D(dst) | S1(0) | S2(TMP_LINK), DR(dst));
@@ -1285,7 +1303,7 @@ static sljit_ins get_cc(sljit_s32 type)
return DA(0xf);
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return DA(0x8);
}
}
@@ -1373,30 +1391,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
- sljit_s32 reg, flags = (GET_FLAGS(op) ? SET_FLAGS : 0);
+ sljit_s32 reg, flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
CHECK_ERROR();
- CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+ CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
op = GET_OPCODE(op);
reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
- ADJUST_LOCAL_OFFSET(src, srcw);
- FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
- src = TMP_REG1;
- srcw = 0;
- }
+
+ if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
+ FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
type &= 0xff;
if (type < SLJIT_EQUAL_F64)
@@ -1407,10 +1418,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS));
- if (op >= SLJIT_ADD)
- return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
+ if (op >= SLJIT_ADD) {
+ flags |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE;
+ if (dst & SLJIT_MEM)
+ return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
+ return emit_op(compiler, op, flags, dst, 0, dst, 0, TMP_REG2, 0);
+ }
+
+ if (!(dst & SLJIT_MEM))
+ return SLJIT_SUCCESS;
+
+ return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw);
+#else
+#error "Implementation required"
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_reg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
- return (reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+ return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);;
#else
#error "Implementation required"
#endif
@@ -1429,7 +1461,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(!const_);
set_const(const_, compiler);
- reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+ reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
PTR_FAIL_IF(emit_const(compiler, reg, init_value));
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX-encoder.c b/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX-encoder.c
index 719632908c..dd82ebae6a 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX-encoder.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX-encoder.c
@@ -2,7 +2,7 @@
* Stack-less Just-In-Time compiler
*
* Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved.
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX_64.c
index 462a8b9cd9..003f43a790 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX_64.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX_64.c
@@ -2,7 +2,7 @@
* Stack-less Just-In-Time compiler
*
* Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved.
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -687,7 +687,7 @@ static sljit_s32 update_buffer(struct sljit_compiler *compiler)
inst_buf[0] = inst1;
inst_buf_index = 1;
} else
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
#ifdef TILEGX_JIT_DEBUG
return push_inst_nodebug(compiler, bits);
@@ -727,10 +727,10 @@ static sljit_s32 update_buffer(struct sljit_compiler *compiler)
return push_inst(compiler, bits);
#endif
} else
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
}
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
}
static sljit_s32 flush_buffer(struct sljit_compiler *compiler)
@@ -814,7 +814,7 @@ static sljit_s32 push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic
break;
default:
printf("unrecoginzed opc: %s\n", opcode->name);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
}
inst_buf_index++;
@@ -859,7 +859,7 @@ static sljit_s32 push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic
break;
default:
printf("unrecoginzed opc: %s\n", opcode->name);
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
}
inst_buf_index++;
@@ -1952,7 +1952,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return SLJIT_SUCCESS;
}
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
}
@@ -2092,9 +2092,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
op = GET_OPCODE(op);
if (op == SLJIT_MOV_S32 || op == SLJIT_MOV_U32)
mem_type = INT_DATA | SIGNED_DATA;
@@ -2143,7 +2140,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
break;
default:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
dst_ar = sugg_dst_ar;
break;
}
@@ -2186,7 +2183,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
case SLJIT_DIVMOD_SW:
case SLJIT_DIV_UW:
case SLJIT_DIV_SW:
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
}
return SLJIT_SUCCESS;
@@ -2487,19 +2484,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compil
return jump;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
- return 0;
-}
-
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw)
{
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
{
- SLJIT_ASSERT_STOP();
+ SLJIT_UNREACHABLE();
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
@@ -2526,13 +2518,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_comp
return const_;
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target)
{
sljit_ins *inst = (sljit_ins *)addr;
- inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_addr >> 32) & 0xffff) << 43);
- inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_addr >> 16) & 0xffff) << 43);
- inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_addr & 0xffff) << 43);
+ inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_target >> 32) & 0xffff) << 43);
+ inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_target >> 16) & 0xffff) << 43);
+ inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_target & 0xffff) << 43);
SLJIT_CACHE_FLUSH(inst, inst + 3);
}
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c
index 78f3dcb06f..f5cf8834b0 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -38,7 +38,7 @@ static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, s
return SLJIT_SUCCESS;
}
-static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type)
+static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset)
{
if (type == SLJIT_JUMP) {
*code_ptr++ = JMP_i32;
@@ -57,7 +57,7 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_
if (jump->flags & JUMP_LABEL)
jump->flags |= PATCH_MW;
else
- sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4));
+ sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset);
code_ptr += 4;
return code_ptr;
@@ -75,9 +75,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
compiler->args = args;
- compiler->flags_saved = 0;
- size = 1 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3);
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+ /* [esp+0] for saving temporaries and third argument for calls. */
+ compiler->saveds_offset = 1 * sizeof(sljit_sw);
+#else
+ /* [esp+0] for saving temporaries and space for maximum three arguments. */
+ if (scratches <= 1)
+ compiler->saveds_offset = 1 * sizeof(sljit_sw);
+ else
+ compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw);
+#endif
+
+ if (scratches > 3)
+ compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw);
+
+ compiler->locals_offset = compiler->saveds_offset;
+
+ if (saveds > 3)
+ compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw);
+
+ if (options & SLJIT_F64_ALIGNMENT)
+ compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1);
+
+ size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3);
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
#else
@@ -94,11 +115,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
*inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */;
}
#endif
- if (saveds > 2 || scratches > 7)
+ if (saveds > 2 || scratches > 9)
PUSH_REG(reg_map[SLJIT_S2]);
- if (saveds > 1 || scratches > 8)
+ if (saveds > 1 || scratches > 10)
PUSH_REG(reg_map[SLJIT_S1]);
- if (saveds > 0 || scratches > 9)
+ if (saveds > 0 || scratches > 11)
PUSH_REG(reg_map[SLJIT_S0]);
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
@@ -134,51 +155,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
}
#endif
- SLJIT_COMPILE_ASSERT(SLJIT_LOCALS_OFFSET >= (2 + 4) * sizeof(sljit_uw), require_at_least_two_words);
+ SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
+
#if defined(__APPLE__)
/* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
- saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
+ saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
#else
- if (options & SLJIT_DOUBLE_ALIGNMENT) {
- local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7);
-
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 17);
- FAIL_IF(!inst);
-
- INC_SIZE(17);
- inst[0] = MOV_r_rm;
- inst[1] = MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[SLJIT_SP];
- inst[2] = GROUP_F7;
- inst[3] = MOD_REG | (0 << 3) | reg_map[SLJIT_SP];
- sljit_unaligned_store_sw(inst + 4, 0x4);
- inst[8] = JNE_i8;
- inst[9] = 6;
- inst[10] = GROUP_BINARY_81;
- inst[11] = MOD_REG | (5 << 3) | reg_map[SLJIT_SP];
- sljit_unaligned_store_sw(inst + 12, 0x4);
- inst[16] = PUSH_r + reg_map[TMP_REG1];
- }
+ if (options & SLJIT_F64_ALIGNMENT)
+ local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1));
else
- local_size = SLJIT_LOCALS_OFFSET + ((local_size + 3) & ~3);
+ local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1));
#endif
compiler->local_size = local_size;
+
#ifdef _WIN32
if (local_size > 1024) {
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
#else
- local_size -= SLJIT_LOCALS_OFFSET;
+ /* Space for a single argument. This amount is excluded when the stack is allocated below. */
+ local_size -= sizeof(sljit_sw);
FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
- SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, SLJIT_LOCALS_OFFSET));
+ SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw)));
#endif
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
}
#endif
SLJIT_ASSERT(local_size > 0);
+
+#if !defined(__APPLE__)
+ if (options & SLJIT_F64_ALIGNMENT) {
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SP, 0);
+
+ /* Some space might allocated during sljit_grow_stack() above on WIN32. */
+ FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
+ SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + sizeof(sljit_sw)));
+
+#if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+ if (compiler->local_size > 1024)
+ FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+ TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, sizeof(sljit_sw)));
+#endif
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
+ FAIL_IF(!inst);
+
+ INC_SIZE(6);
+ inst[0] = GROUP_BINARY_81;
+ inst[1] = MOD_REG | AND | reg_map[SLJIT_SP];
+ sljit_unaligned_store_sw(inst + 2, ~(sizeof(sljit_f64) - 1));
+
+ /* The real local size must be used. */
+ return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, TMP_REG1, 0);
+ }
+#endif
return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size);
}
@@ -193,14 +227,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
compiler->args = args;
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+ /* [esp+0] for saving temporaries and third argument for calls. */
+ compiler->saveds_offset = 1 * sizeof(sljit_sw);
+#else
+ /* [esp+0] for saving temporaries and space for maximum three arguments. */
+ if (scratches <= 1)
+ compiler->saveds_offset = 1 * sizeof(sljit_sw);
+ else
+ compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw);
+#endif
+
+ if (scratches > 3)
+ compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw);
+
+ compiler->locals_offset = compiler->saveds_offset;
+
+ if (saveds > 3)
+ compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw);
+
+ if (options & SLJIT_F64_ALIGNMENT)
+ compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1);
+
#if defined(__APPLE__)
- saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
+ saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
#else
- if (options & SLJIT_DOUBLE_ALIGNMENT)
- compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7);
+ if (options & SLJIT_F64_ALIGNMENT)
+ compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1));
else
- compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + 3) & ~3);
+ compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1));
#endif
return SLJIT_SUCCESS;
}
@@ -214,23 +270,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
CHECK(check_sljit_emit_return(compiler, op, src, srcw));
SLJIT_ASSERT(compiler->args >= 0);
- compiler->flags_saved = 0;
FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
SLJIT_ASSERT(compiler->local_size > 0);
- FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
- SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
#if !defined(__APPLE__)
- if (compiler->options & SLJIT_DOUBLE_ALIGNMENT) {
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
- FAIL_IF(!inst);
-
- INC_SIZE(3);
- inst[0] = MOV_r_rm;
- inst[1] = (reg_map[SLJIT_SP] << 3) | 0x4 /* SIB */;
- inst[2] = (4 << 3) | reg_map[SLJIT_SP];
- }
+ if (compiler->options & SLJIT_F64_ALIGNMENT)
+ EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size)
+ else
+ FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+ SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
+#else
+ FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+ SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
#endif
size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) +
@@ -247,11 +299,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
INC_SIZE(size);
- if (compiler->saveds > 0 || compiler->scratches > 9)
+ if (compiler->saveds > 0 || compiler->scratches > 11)
POP_REG(reg_map[SLJIT_S0]);
- if (compiler->saveds > 1 || compiler->scratches > 8)
+ if (compiler->saveds > 1 || compiler->scratches > 10)
POP_REG(reg_map[SLJIT_S1]);
- if (compiler->saveds > 2 || compiler->scratches > 7)
+ if (compiler->saveds > 2 || compiler->scratches > 9)
POP_REG(reg_map[SLJIT_S2]);
POP_REG(reg_map[TMP_REG1]);
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c
index e88ddedcd1..039b68c45a 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -47,9 +47,8 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_
*code_ptr++ = 10 + 3;
}
- SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_first);
- *code_ptr++ = REX_W | REX_B;
- *code_ptr++ = MOV_r_i32 + 1;
+ *code_ptr++ = REX_W | ((reg_map[TMP_REG2] <= 7) ? 0 : REX_B);
+ *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2];
jump->addr = (sljit_uw)code_ptr;
if (jump->flags & JUMP_LABEL)
@@ -58,31 +57,10 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_
sljit_unaligned_store_sw(code_ptr, jump->u.target);
code_ptr += sizeof(sljit_sw);
- *code_ptr++ = REX_B;
- *code_ptr++ = GROUP_FF;
- *code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
-
- return code_ptr;
-}
-
-static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s32 type)
-{
- sljit_sw delta = addr - ((sljit_sw)code_ptr + 1 + sizeof(sljit_s32));
-
- if (delta <= HALFWORD_MAX && delta >= HALFWORD_MIN) {
- *code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32;
- sljit_unaligned_store_sw(code_ptr, delta);
- }
- else {
- SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second);
- *code_ptr++ = REX_W | REX_B;
- *code_ptr++ = MOV_r_i32 + 1;
- sljit_unaligned_store_sw(code_ptr, addr);
- code_ptr += sizeof(sljit_sw);
+ if (reg_map[TMP_REG2] >= 8)
*code_ptr++ = REX_B;
- *code_ptr++ = GROUP_FF;
- *code_ptr++ = (type == 2) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
- }
+ *code_ptr++ = GROUP_FF;
+ *code_ptr++ = MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2];
return code_ptr;
}
@@ -98,7 +76,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
- compiler->flags_saved = 0;
+#ifdef _WIN64
+ /* Two/four register slots for parameters plus space for xmm6 register if needed. */
+ if (fscratches >= 6 || fsaveds >= 1)
+ compiler->locals_offset = 6 * sizeof(sljit_sw);
+ else
+ compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
+#endif
/* Including the return address saved by the call instruction. */
saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
@@ -177,7 +161,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
INC_SIZE(4 + (3 + sizeof(sljit_s32)));
*inst++ = REX_W;
*inst++ = GROUP_BINARY_83;
- *inst++ = MOD_REG | SUB | 4;
+ *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
/* Allocated size for registers must be divisible by 8. */
SLJIT_ASSERT(!(saved_register_size & 0x7));
/* Aligned to 16 byte. */
@@ -189,7 +173,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
local_size -= 4 * sizeof(sljit_sw);
}
/* Second instruction */
- SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] < 8, temporary_reg1_is_loreg);
+ SLJIT_ASSERT(reg_map[SLJIT_R0] < 8);
*inst++ = REX_W;
*inst++ = MOV_rm_i32;
*inst++ = MOD_REG | reg_lmap[SLJIT_R0];
@@ -202,25 +186,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
}
#endif
- SLJIT_ASSERT(local_size > 0);
- if (local_size <= 127) {
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
- FAIL_IF(!inst);
- INC_SIZE(4);
- *inst++ = REX_W;
- *inst++ = GROUP_BINARY_83;
- *inst++ = MOD_REG | SUB | 4;
- *inst++ = local_size;
- }
- else {
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
- FAIL_IF(!inst);
- INC_SIZE(7);
- *inst++ = REX_W;
- *inst++ = GROUP_BINARY_81;
- *inst++ = MOD_REG | SUB | 4;
- sljit_unaligned_store_s32(inst, local_size);
- inst += sizeof(sljit_s32);
+ if (local_size > 0) {
+ if (local_size <= 127) {
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
+ FAIL_IF(!inst);
+ INC_SIZE(4);
+ *inst++ = REX_W;
+ *inst++ = GROUP_BINARY_83;
+ *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
+ *inst++ = local_size;
+ }
+ else {
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
+ FAIL_IF(!inst);
+ INC_SIZE(7);
+ *inst++ = REX_W;
+ *inst++ = GROUP_BINARY_81;
+ *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
+ sljit_unaligned_store_s32(inst, local_size);
+ inst += sizeof(sljit_s32);
+ }
}
#ifdef _WIN64
@@ -247,6 +232,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+#ifdef _WIN64
+ /* Two/four register slots for parameters plus space for xmm6 register if needed. */
+ if (fscratches >= 6 || fsaveds >= 1)
+ compiler->locals_offset = 6 * sizeof(sljit_sw);
+ else
+ compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
+#endif
+
/* Including the return address saved by the call instruction. */
saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
@@ -261,7 +254,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
CHECK_ERROR();
CHECK(check_sljit_emit_return(compiler, op, src, srcw));
- compiler->flags_saved = 0;
FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
#ifdef _WIN64
@@ -275,24 +267,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
}
#endif
- SLJIT_ASSERT(compiler->local_size > 0);
- if (compiler->local_size <= 127) {
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
- FAIL_IF(!inst);
- INC_SIZE(4);
- *inst++ = REX_W;
- *inst++ = GROUP_BINARY_83;
- *inst++ = MOD_REG | ADD | 4;
- *inst = compiler->local_size;
- }
- else {
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
- FAIL_IF(!inst);
- INC_SIZE(7);
- *inst++ = REX_W;
- *inst++ = GROUP_BINARY_81;
- *inst++ = MOD_REG | ADD | 4;
- sljit_unaligned_store_s32(inst, compiler->local_size);
+ if (compiler->local_size > 0) {
+ if (compiler->local_size <= 127) {
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
+ FAIL_IF(!inst);
+ INC_SIZE(4);
+ *inst++ = REX_W;
+ *inst++ = GROUP_BINARY_83;
+ *inst++ = MOD_REG | ADD | 4;
+ *inst = compiler->local_size;
+ }
+ else {
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
+ FAIL_IF(!inst);
+ INC_SIZE(7);
+ *inst++ = REX_W;
+ *inst++ = GROUP_BINARY_81;
+ *inst++ = MOD_REG | ADD | 4;
+ sljit_unaligned_store_s32(inst, compiler->local_size);
+ }
}
tmp = compiler->scratches;
@@ -387,13 +380,12 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32
if (b & SLJIT_MEM) {
if (!(b & OFFS_REG_MASK)) {
if (NOT_HALFWORD(immb)) {
- if (emit_load_imm64(compiler, TMP_REG3, immb))
- return NULL;
+ PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
immb = 0;
if (b & REG_MASK)
- b |= TO_OFFS_REG(TMP_REG3);
+ b |= TO_OFFS_REG(TMP_REG2);
else
- b |= TMP_REG3;
+ b |= TMP_REG2;
}
else if (reg_lmap[b & REG_MASK] == 4)
b |= TO_OFFS_REG(SLJIT_SP);
@@ -553,17 +545,19 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32
/* Call / return instructions */
/* --------------------------------------------------------------------- */
-static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
+static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
{
sljit_u8 *inst;
+ /* After any change update IS_REG_CHANGED_BY_CALL as well. */
#ifndef _WIN64
- SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers);
+ SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8 && reg_map[TMP_REG1] == 2);
inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
FAIL_IF(!inst);
INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
if (type >= SLJIT_CALL3) {
+ /* Move third argument to TMP_REG1. */
*inst++ = REX_W;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2];
@@ -572,12 +566,13 @@ static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sl
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0];
#else
- SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers);
+ SLJIT_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8 && reg_map[TMP_REG1] == 8);
inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
FAIL_IF(!inst);
INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
if (type >= SLJIT_CALL3) {
+ /* Move third argument to TMP_REG1. */
*inst++ = REX_W | REX_R;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2];
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c
index aa5ba089d2..eb0886d671 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -67,12 +67,15 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
- 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
+ 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
};
#define CHECK_EXTRA_REGS(p, w, do) \
- if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
- w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
+ if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
+ if (p <= compiler->scratches) \
+ w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
+ else \
+ w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
p = SLJIT_MEM1(SLJIT_SP); \
do; \
}
@@ -82,28 +85,27 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
/* Last register + 1. */
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
-#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
Note: avoid to use r12 and r13 for memory addessing
- therefore r12 is better for SAVED_EREG than SAVED_REG. */
+ therefore r12 is better to be a higher saved register. */
#ifndef _WIN64
-/* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
-static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
- 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
+/* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
+static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
+ 0, 0, 6, 1, 7, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
};
/* low-map. reg_map & 0x7. */
-static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
- 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1
+static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
+ 0, 0, 6, 1, 7, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
};
#else
-/* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
-static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
- 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
+/* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
+static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
+ 0, 0, 2, 1, 10, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 8, 9
};
/* low-map. reg_map & 0x7. */
-static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
- 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1
+static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
+ 0, 0, 2, 1, 2, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 0, 1
};
#endif
@@ -166,7 +168,7 @@ static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
#define CALL_i32 0xe8
#define CALL_rm (/* GROUP_FF */ 2 << 3)
#define CDQ 0x99
-#define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
+#define CMOVE_r_rm (/* GROUP_0F */ 0x44)
#define CMP (/* BINARY */ 7 << 3)
#define CMP_EAX_i32 0x3d
#define CMP_r_rm 0x3b
@@ -214,6 +216,7 @@ static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
#define POP_r 0x58
#define POP_rm 0x8f
#define POPF 0x9d
+#define PREFETCH 0x18
#define PUSH_i32 0x68
#define PUSH_r 0x50
#define PUSH_rm (/* GROUP_FF */ 6 << 3)
@@ -409,13 +412,13 @@ static sljit_u8 get_jump_code(sljit_s32 type)
return 0;
}
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset);
+#else
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
-
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s32 type);
#endif
-static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type)
+static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
{
sljit_s32 short_jump;
sljit_uw label_addr;
@@ -423,7 +426,8 @@ static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code
if (jump->flags & JUMP_LABEL)
label_addr = (sljit_uw)(code + jump->u.label->size);
else
- label_addr = jump->u.target;
+ label_addr = jump->u.target - executable_offset;
+
short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -476,6 +480,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
sljit_u8 *buf_ptr;
sljit_u8 *buf_end;
sljit_u8 len;
+ sljit_sw executable_offset;
+ sljit_sw jump_addr;
struct sljit_label *label;
struct sljit_jump *jump;
@@ -494,6 +500,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
label = compiler->labels;
jump = compiler->jumps;
const_ = compiler->consts;
+ executable_offset = SLJIT_EXEC_OFFSET(code);
+
do {
buf_ptr = buf->memory;
buf_end = buf_ptr + buf->used_size;
@@ -506,35 +514,28 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
buf_ptr += len;
}
else {
- if (*buf_ptr >= 4) {
+ if (*buf_ptr >= 2) {
jump->addr = (sljit_uw)code_ptr;
if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
- code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
- else
- code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
+ code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
+ else {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset);
+#else
+ code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
+#endif
+ }
jump = jump->next;
}
else if (*buf_ptr == 0) {
- label->addr = (sljit_uw)code_ptr;
+ label->addr = ((sljit_uw)code_ptr) + executable_offset;
label->size = code_ptr - code;
label = label->next;
}
- else if (*buf_ptr == 1) {
+ else { /* *buf_ptr is 1 */
const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
const_ = const_->next;
}
- else {
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
- buf_ptr++;
- sljit_unaligned_store_sw(code_ptr, *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)));
- code_ptr += sizeof(sljit_sw);
- buf_ptr += sizeof(sljit_sw) - 1;
-#else
- code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
- buf_ptr += sizeof(sljit_sw);
-#endif
- }
buf_ptr++;
}
} while (buf_ptr < buf_end);
@@ -548,24 +549,26 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = compiler->jumps;
while (jump) {
+ jump_addr = jump->addr + executable_offset;
+
if (jump->flags & PATCH_MB) {
- SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) <= 127);
- *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8)));
+ SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
+ *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
} else if (jump->flags & PATCH_MW) {
if (jump->flags & JUMP_LABEL) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))));
+ sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
#else
- SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
- sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))));
+ SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
+ sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
#endif
}
else {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))));
+ sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
#else
- SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
- sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32))));
+ SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
+ sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
#endif
}
}
@@ -577,11 +580,54 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = jump->next;
}
- /* Maybe we waste some space because of short jumps. */
+ /* Some space may be wasted because of short jumps. */
SLJIT_ASSERT(code_ptr <= code + compiler->size);
compiler->error = SLJIT_ERR_COMPILED;
+ compiler->executable_offset = executable_offset;
compiler->executable_size = code_ptr - code;
- return (void*)code;
+ return (void*)(code + executable_offset);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+ switch (feature_type) {
+ case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+ return SLJIT_IS_FPU_AVAILABLE;
+#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+ if (cpu_has_sse2 == -1)
+ get_cpu_features();
+ return cpu_has_sse2;
+#else /* SLJIT_DETECT_SSE2 */
+ return 1;
+#endif /* SLJIT_DETECT_SSE2 */
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ case SLJIT_HAS_VIRTUAL_REGISTERS:
+ return 1;
+#endif
+
+ case SLJIT_HAS_CLZ:
+ case SLJIT_HAS_CMOV:
+ if (cpu_has_cmov == -1)
+ get_cpu_features();
+ return cpu_has_cmov;
+
+ case SLJIT_HAS_PREF_SHIFT_REG:
+ return 1;
+
+ case SLJIT_HAS_SSE2:
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+ if (cpu_has_sse2 == -1)
+ get_cpu_features();
+ return cpu_has_sse2;
+#else
+ return 1;
+#endif
+
+ default:
+ return 0;
+ }
}
/* --------------------------------------------------------------------- */
@@ -604,52 +650,8 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw);
-static SLJIT_INLINE sljit_s32 emit_save_flags(struct sljit_compiler *compiler)
-{
- sljit_u8 *inst;
-
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
- FAIL_IF(!inst);
- INC_SIZE(5);
-#else
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
- FAIL_IF(!inst);
- INC_SIZE(6);
- *inst++ = REX_W;
-#endif
- *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
- *inst++ = 0x64;
- *inst++ = 0x24;
- *inst++ = (sljit_u8)sizeof(sljit_sw);
- *inst++ = PUSHF;
- compiler->flags_saved = 1;
- return SLJIT_SUCCESS;
-}
-
-static SLJIT_INLINE sljit_s32 emit_restore_flags(struct sljit_compiler *compiler, sljit_s32 keep_flags)
-{
- sljit_u8 *inst;
-
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
- FAIL_IF(!inst);
- INC_SIZE(5);
- *inst++ = POPF;
-#else
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
- FAIL_IF(!inst);
- INC_SIZE(6);
- *inst++ = POPF;
- *inst++ = REX_W;
-#endif
- *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
- *inst++ = 0x64;
- *inst++ = 0x24;
- *inst++ = (sljit_u8)(-(sljit_s8)sizeof(sljit_sw));
- compiler->flags_saved = keep_flags;
- return SLJIT_SUCCESS;
-}
+#define EMIT_MOV(compiler, dst, dstw, src, srcw) \
+ FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
#ifdef _WIN32
#include <malloc.h>
@@ -680,15 +682,8 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler,
{
sljit_u8* inst;
- if (dst == SLJIT_UNUSED) {
- /* No destination, doesn't need to setup flags. */
- if (src & SLJIT_MEM) {
- inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
- FAIL_IF(!inst);
- *inst = MOV_r_rm;
- }
- return SLJIT_SUCCESS;
- }
+ SLJIT_ASSERT(dst != SLJIT_UNUSED);
+
if (FAST_IS_REG(src)) {
inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
FAIL_IF(!inst);
@@ -710,8 +705,10 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler,
}
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
- FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
- inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
+ /* Immediate to memory move. Only SLJIT_MOV operation copies
+ an immediate directly into memory so TMP_REG1 can be used. */
+ FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
+ inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
FAIL_IF(!inst);
*inst = MOV_rm_r;
return SLJIT_SUCCESS;
@@ -729,7 +726,8 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler,
return SLJIT_SUCCESS;
}
- /* Memory to memory move. Requires two instruction. */
+ /* Memory to memory move. Only SLJIT_MOV operation copies
+ data from memory to memory so TMP_REG1 can be used. */
inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
FAIL_IF(!inst);
*inst = MOV_r_rm;
@@ -739,9 +737,6 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler,
return SLJIT_SUCCESS;
}
-#define EMIT_MOV(compiler, dst, dstw, src, srcw) \
- FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
-
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
{
sljit_u8 *inst;
@@ -771,20 +766,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
case SLJIT_DIVMOD_SW:
case SLJIT_DIV_UW:
case SLJIT_DIV_SW:
- compiler->flags_saved = 0;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
#ifdef _WIN64
- SLJIT_COMPILE_ASSERT(
+ SLJIT_ASSERT(
reg_map[SLJIT_R0] == 0
&& reg_map[SLJIT_R1] == 2
- && reg_map[TMP_REG1] > 7,
- invalid_register_assignment_for_div_mul);
+ && reg_map[TMP_REG1] > 7);
#else
- SLJIT_COMPILE_ASSERT(
+ SLJIT_ASSERT(
reg_map[SLJIT_R0] == 0
&& reg_map[SLJIT_R1] < 7
- && reg_map[TMP_REG1] == 2,
- invalid_register_assignment_for_div_mul);
+ && reg_map[TMP_REG1] == 2);
#endif
compiler->mode32 = op & SLJIT_I32_OP;
#endif
@@ -908,9 +900,6 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
compiler->mode32 = 0;
#endif
- if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
- return SLJIT_SUCCESS; /* Empty instruction. */
-
if (src & SLJIT_IMM) {
if (FAST_IS_REG(dst)) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
@@ -1039,6 +1028,30 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
return SLJIT_SUCCESS;
}
+static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_u8* inst;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#endif
+
+ inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
+ FAIL_IF(!inst);
+ *inst++ = GROUP_0F;
+ *inst++ = PREFETCH;
+
+ if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
+ *inst |= (3 << 3);
+ else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
+ *inst |= (2 << 3);
+ else
+ *inst |= (1 << 3);
+
+ return SLJIT_SUCCESS;
+}
+
static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
@@ -1050,9 +1063,6 @@ static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
compiler->mode32 = 0;
#endif
- if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
- return SLJIT_SUCCESS; /* Empty instruction. */
-
if (src & SLJIT_IMM) {
if (FAST_IS_REG(dst)) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
@@ -1096,14 +1106,6 @@ static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
{
sljit_u8* inst;
- if (dst == SLJIT_UNUSED) {
- EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
- inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
- FAIL_IF(!inst);
- *inst++ = GROUP_F7;
- *inst |= opcode;
- return SLJIT_SUCCESS;
- }
if (dst == src && dstw == srcw) {
/* Same input and output */
inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
@@ -1112,14 +1114,19 @@ static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
*inst |= opcode;
return SLJIT_SUCCESS;
}
+
+ if (dst == SLJIT_UNUSED)
+ dst = TMP_REG1;
+
if (FAST_IS_REG(dst)) {
EMIT_MOV(compiler, dst, 0, src, srcw);
- inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+ inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
FAIL_IF(!inst);
*inst++ = GROUP_F7;
*inst |= opcode;
return SLJIT_SUCCESS;
}
+
EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
FAIL_IF(!inst);
@@ -1135,20 +1142,12 @@ static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
{
sljit_u8* inst;
- if (dst == SLJIT_UNUSED) {
- EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
- inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
- FAIL_IF(!inst);
- *inst++ = GROUP_F7;
- *inst |= NOT_rm;
- inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
- FAIL_IF(!inst);
- *inst = OR_r_rm;
- return SLJIT_SUCCESS;
- }
+ if (dst == SLJIT_UNUSED)
+ dst = TMP_REG1;
+
if (FAST_IS_REG(dst)) {
EMIT_MOV(compiler, dst, 0, src, srcw);
- inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+ inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
FAIL_IF(!inst);
*inst++ = GROUP_F7;
*inst |= NOT_rm;
@@ -1157,6 +1156,7 @@ static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
*inst = OR_r_rm;
return SLJIT_SUCCESS;
}
+
EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
FAIL_IF(!inst);
@@ -1169,6 +1169,10 @@ static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
return SLJIT_SUCCESS;
}
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+static const sljit_sw emit_clz_arg = 32 + 31;
+#endif
+
static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
@@ -1177,22 +1181,6 @@ static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
sljit_s32 dst_r;
SLJIT_UNUSED_ARG(op_flags);
- if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
- /* Just set the zero flag. */
- EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
- inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
- FAIL_IF(!inst);
- *inst++ = GROUP_F7;
- *inst |= NOT_rm;
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
-#else
- inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0);
-#endif
- FAIL_IF(!inst);
- *inst |= SHR;
- return SLJIT_SUCCESS;
- }
if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
@@ -1200,81 +1188,53 @@ static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
srcw = 0;
}
- inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
+ if (cpu_has_cmov == -1)
+ get_cpu_features();
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+ inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
FAIL_IF(!inst);
*inst++ = GROUP_0F;
*inst = BSR_r_rm;
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- if (FAST_IS_REG(dst))
- dst_r = dst;
- else {
- /* Find an unused temporary register. */
- if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
- dst_r = SLJIT_R0;
- else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
- dst_r = SLJIT_R1;
+ if (cpu_has_cmov) {
+ if (dst_r != TMP_REG1) {
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
+ inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
+ }
else
- dst_r = SLJIT_R2;
- EMIT_MOV(compiler, dst, dstw, dst_r, 0);
- }
- EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
-#else
- dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
- compiler->mode32 = 0;
- EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 + 63 : 32 + 31);
- compiler->mode32 = op_flags & SLJIT_I32_OP;
-#endif
+ inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
- if (cpu_has_cmov == -1)
- get_cpu_features();
-
- if (cpu_has_cmov) {
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
FAIL_IF(!inst);
*inst++ = GROUP_0F;
- *inst = CMOVNE_r_rm;
- } else {
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
- FAIL_IF(!inst);
- INC_SIZE(4);
+ *inst = CMOVE_r_rm;
+ }
+ else
+ FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
- *inst++ = JE_i8;
- *inst++ = 2;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
+ inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
#else
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
- FAIL_IF(!inst);
- INC_SIZE(5);
+ if (cpu_has_cmov) {
+ EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31));
- *inst++ = JE_i8;
- *inst++ = 3;
- *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
-#endif
+ inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
+ FAIL_IF(!inst);
+ *inst++ = GROUP_0F;
+ *inst = CMOVE_r_rm;
}
+ else
+ FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)));
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
-#else
inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
#endif
+
FAIL_IF(!inst);
*(inst + 1) |= XOR;
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- if (dst & SLJIT_MEM) {
- inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
- FAIL_IF(!inst);
- *inst = XCHG_r_rm;
- }
-#else
if (dst & SLJIT_MEM)
- EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
-#endif
+ EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
return SLJIT_SUCCESS;
}
@@ -1282,7 +1242,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
- sljit_u8* inst;
sljit_s32 update = 0;
sljit_s32 op_flags = GET_ALL_FLAGS(op);
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
@@ -1303,7 +1262,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
compiler->mode32 = op_flags & SLJIT_I32_OP;
#endif
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
+ if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
+ return emit_prefetch(compiler, op, src, srcw);
+ return SLJIT_SUCCESS;
+ }
+
op = GET_OPCODE(op);
+
if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
@@ -1361,14 +1327,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
#endif
}
- if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
- inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
- FAIL_IF(!inst);
- *inst = LEA_r_m;
- src &= SLJIT_MEM | 0xf;
- srcw = 0;
- }
-
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
@@ -1412,31 +1370,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
#endif
- if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
- inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
- FAIL_IF(!inst);
- *inst = LEA_r_m;
+ if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK)) {
+ if ((src & OFFS_REG_MASK) != 0) {
+ FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+ (src & REG_MASK), 0, (src & REG_MASK), 0, OFFS_REG(dst), 0));
+ }
+ else if (srcw != 0) {
+ FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+ (src & REG_MASK), 0, (src & REG_MASK), 0, SLJIT_IMM, srcw));
+ }
+ }
+
+ if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK)) {
+ if ((dst & OFFS_REG_MASK) != 0) {
+ FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+ (dst & REG_MASK), 0, (dst & REG_MASK), 0, OFFS_REG(dst), 0));
+ }
+ else if (dstw != 0) {
+ FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
+ (dst & REG_MASK), 0, (dst & REG_MASK), 0, SLJIT_IMM, dstw));
+ }
}
return SLJIT_SUCCESS;
}
- if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
- compiler->flags_saved = 0;
-
switch (op) {
case SLJIT_NOT:
- if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
+ if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
return emit_not_with_flags(compiler, dst, dstw, src, srcw);
return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
case SLJIT_NEG:
- if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
- FAIL_IF(emit_save_flags(compiler));
return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
case SLJIT_CLZ:
- if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
- FAIL_IF(emit_save_flags(compiler));
return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
}
@@ -1456,8 +1423,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
*(inst + 1) |= (op_imm); \
} \
else { \
- FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
- inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
+ FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
+ inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
FAIL_IF(!inst); \
*inst = (op_mr); \
}
@@ -1683,7 +1650,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
sljit_u8* inst;
sljit_s32 dst_r;
- dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
/* Register destination. */
if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
@@ -1735,9 +1702,9 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
}
else {
- EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
if (dst_r != src2)
EMIT_MOV(compiler, dst_r, 0, src2, src2w);
+ FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
FAIL_IF(!inst);
*inst++ = GROUP_0F;
@@ -1778,9 +1745,9 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
}
else {
- EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
if (dst_r != src1)
EMIT_MOV(compiler, dst_r, 0, src1, src1w);
+ FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
FAIL_IF(!inst);
*inst++ = GROUP_0F;
@@ -1799,13 +1766,13 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
*inst = IMUL_r_rm;
}
- if (dst_r == TMP_REG1)
+ if (dst & SLJIT_MEM)
EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
return SLJIT_SUCCESS;
}
-static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, sljit_s32 keep_flags,
+static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
@@ -1814,12 +1781,10 @@ static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, sljit_s32 keep
sljit_s32 dst_r, done = 0;
/* These cases better be left to handled by normal way. */
- if (!keep_flags) {
- if (dst == src1 && dstw == src1w)
- return SLJIT_ERR_UNSUPPORTED;
- if (dst == src2 && dstw == src2w)
- return SLJIT_ERR_UNSUPPORTED;
- }
+ if (dst == src1 && dstw == src1w)
+ return SLJIT_ERR_UNSUPPORTED;
+ if (dst == src2 && dstw == src2w)
+ return SLJIT_ERR_UNSUPPORTED;
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
@@ -1931,7 +1896,7 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
}
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+ if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
#else
if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
#endif
@@ -1948,8 +1913,8 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
*inst = GROUP_F7;
}
else {
- FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
- inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w);
+ FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
+ inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
FAIL_IF(!inst);
*inst = TEST_rm_r;
}
@@ -1977,8 +1942,8 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
*inst = GROUP_F7;
}
else {
- FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
- inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w);
+ FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
+ inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
FAIL_IF(!inst);
*inst = TEST_rm_r;
}
@@ -2090,25 +2055,29 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler,
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
}
else {
- /* This case is really difficult, since ecx itself may used for
- addressing, and we must ensure to work even in that case. */
+ /* This case is complex since ecx itself may be used for
+ addressing, and this case must be supported as well. */
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
-#else
- /* [esp+0] contains the flags. */
- EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
-#endif
+ EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
FAIL_IF(!inst);
*inst |= mode;
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
+ EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
+ EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
#else
- EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
-#endif
+ EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+ EMIT_MOV(compiler, TMP_REG2, 0, src2, src2w);
+ inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
+ FAIL_IF(!inst);
+ *inst = XCHG_r_rm;
+ inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+ FAIL_IF(!inst);
+ *inst |= mode;
+ EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+#endif
}
return SLJIT_SUCCESS;
@@ -2167,54 +2136,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
compiler->mode32 = op & SLJIT_I32_OP;
#endif
- if (GET_OPCODE(op) >= SLJIT_MUL) {
- if (SLJIT_UNLIKELY(GET_FLAGS(op)))
- compiler->flags_saved = 0;
- else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
- FAIL_IF(emit_save_flags(compiler));
- }
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
switch (GET_OPCODE(op)) {
case SLJIT_ADD:
- if (!GET_FLAGS(op)) {
- if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
+ if (!HAS_FLAGS(op)) {
+ if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
return compiler->error;
}
- else
- compiler->flags_saved = 0;
- if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
- FAIL_IF(emit_save_flags(compiler));
return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_ADDC:
- if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
- FAIL_IF(emit_restore_flags(compiler, 1));
- else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
- FAIL_IF(emit_save_flags(compiler));
- if (SLJIT_UNLIKELY(GET_FLAGS(op)))
- compiler->flags_saved = 0;
return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SUB:
- if (!GET_FLAGS(op)) {
- if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
+ if (!HAS_FLAGS(op)) {
+ if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
return compiler->error;
}
- else
- compiler->flags_saved = 0;
- if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
- FAIL_IF(emit_save_flags(compiler));
+
if (dst == SLJIT_UNUSED)
return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SUBC:
- if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
- FAIL_IF(emit_restore_flags(compiler, 1));
- else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
- FAIL_IF(emit_save_flags(compiler));
- if (SLJIT_UNLIKELY(GET_FLAGS(op)))
- compiler->flags_saved = 0;
return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_MUL:
@@ -2231,13 +2177,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SHL:
- return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
+ return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_LSHR:
- return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
+ return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_ASHR:
- return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
+ return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
dst, dstw, src1, src1w, src2, src2w);
}
@@ -2248,7 +2194,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
{
CHECK_REG_INDEX(check_sljit_get_register_index(reg));
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
+ if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
return -1;
#endif
return reg_map[reg];
@@ -2279,36 +2225,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
/* Floating point operators */
/* --------------------------------------------------------------------- */
-/* Alignment + 2 * 16 bytes. */
-static sljit_s32 sse2_data[3 + (4 + 4) * 2];
+/* Alignment(3) + 4 * 16 bytes. */
+static sljit_s32 sse2_data[3 + (4 * 4)];
static sljit_s32 *sse2_buffer;
static void init_compiler(void)
{
+ /* Align to 16 bytes. */
sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
- /* Single precision constants. */
+
+ /* Single precision constants (each constant is 16 byte long). */
sse2_buffer[0] = 0x80000000;
sse2_buffer[4] = 0x7fffffff;
- /* Double precision constants. */
+ /* Double precision constants (each constant is 16 byte long). */
sse2_buffer[8] = 0;
sse2_buffer[9] = 0x80000000;
sse2_buffer[12] = 0xffffffff;
sse2_buffer[13] = 0x7fffffff;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
-{
-#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
-#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
- if (cpu_has_sse2 == -1)
- get_cpu_features();
- return cpu_has_sse2;
-#else /* SLJIT_DETECT_SSE2 */
- return 1;
-#endif /* SLJIT_DETECT_SSE2 */
-}
-
static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
{
@@ -2349,7 +2284,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
- sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+ sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
sljit_u8 *inst;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -2362,7 +2297,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
*inst++ = GROUP_0F;
*inst = CVTTSD2SI_r_xm;
- if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
+ if (dst & SLJIT_MEM)
return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
return SLJIT_SUCCESS;
}
@@ -2406,7 +2341,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
- compiler->flags_saved = 0;
if (!FAST_IS_REG(src1)) {
FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
src1 = TMP_FREG;
@@ -2455,7 +2389,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
return SLJIT_SUCCESS;
}
- if (SLOW_IS_REG(dst)) {
+ if (FAST_IS_REG(dst)) {
dst_r = dst;
if (dst != src)
FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
@@ -2553,11 +2487,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_label(compiler));
- /* We should restore the flags before the label,
- since other taken jumps has their own flags as well. */
- if (SLJIT_UNLIKELY(compiler->flags_saved))
- PTR_FAIL_IF(emit_restore_flags(compiler, 0));
-
if (compiler->last_label && compiler->last_label->size == compiler->size)
return compiler->last_label;
@@ -2582,12 +2511,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_jump(compiler, type));
- if (SLJIT_UNLIKELY(compiler->flags_saved)) {
- if ((type & 0xff) <= SLJIT_JUMP)
- PTR_FAIL_IF(emit_restore_flags(compiler, 0));
- compiler->flags_saved = 0;
- }
-
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
PTR_FAIL_IF_NULL(jump);
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
@@ -2607,10 +2530,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
PTR_FAIL_IF_NULL(inst);
*inst++ = 0;
- *inst++ = type + 4;
+ *inst++ = type + 2;
return jump;
}
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#ifndef _WIN64
+#define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R3)
+#else
+#define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R2)
+#endif
+#endif
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
{
sljit_u8 *inst;
@@ -2622,12 +2553,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
CHECK_EXTRA_REGS(src, srcw, (void)0);
- if (SLJIT_UNLIKELY(compiler->flags_saved)) {
- if (type <= SLJIT_JUMP)
- FAIL_IF(emit_restore_flags(compiler, 0));
- compiler->flags_saved = 0;
- }
-
if (type >= SLJIT_CALL1) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
@@ -2638,11 +2563,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
srcw += sizeof(sljit_sw);
#endif
-#endif
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
- if (src == SLJIT_R2) {
- EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
- src = TMP_REG1;
+#else
+ if ((src & SLJIT_MEM) || IS_REG_CHANGED_BY_CALL(src, type)) {
+ EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
+ src = TMP_REG2;
}
#endif
FAIL_IF(call_with_args(compiler, type));
@@ -2665,7 +2589,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
FAIL_IF_NULL(inst);
*inst++ = 0;
- *inst++ = type + 4;
+ *inst++ = type + 2;
}
else {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -2682,37 +2606,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
sljit_u8 *inst;
sljit_u8 cond_set = 0;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
sljit_s32 reg;
-#else
- /* CHECK_EXTRA_REGS migh overwrite these values. */
+#endif
+ /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
sljit_s32 dst_save = dst;
sljit_sw dstw_save = dstw;
-#endif
CHECK_ERROR();
- CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
- SLJIT_UNUSED_ARG(srcw);
-
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
+ CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
CHECK_EXTRA_REGS(dst, dstw, (void)0);
- if (SLJIT_UNLIKELY(compiler->flags_saved))
- FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
type &= 0xff;
/* setcc = jcc + 0x10. */
cond_set = get_jump_code(type) + 0x10;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
+ if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
FAIL_IF(!inst);
INC_SIZE(4 + 3);
@@ -2727,7 +2643,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
return SLJIT_SUCCESS;
}
- reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
+ reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
FAIL_IF(!inst);
@@ -2738,6 +2654,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
*inst++ = cond_set;
*inst++ = MOD_REG | reg_lmap[reg];
*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
+ /* The movzx instruction does not affect flags. */
*inst++ = GROUP_0F;
*inst++ = MOVZX_r_rm8;
*inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
@@ -2749,12 +2666,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
}
+
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
- return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
-#else /* SLJIT_CONFIG_X86_64 */
+ return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
+
+#else
+ /* The SLJIT_CONFIG_X86_32 code path starts here. */
if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
if (reg_map[dst] <= 4) {
/* Low byte is accessible. */
@@ -2808,8 +2728,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
return SLJIT_SUCCESS;
}
- if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
- SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
+ if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
+ SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
+
if (dst != SLJIT_R0) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
FAIL_IF(!inst);
@@ -2868,6 +2789,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
#endif /* SLJIT_CONFIG_X86_64 */
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_reg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_u8* inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ dst_reg &= ~SLJIT_I32_OP;
+
+ if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
+ return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
+#else
+ if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
+ return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
+#endif
+
+ /* ADJUST_LOCAL_OFFSET is not needed. */
+ CHECK_EXTRA_REGS(src, srcw, (void)0);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = dst_reg & SLJIT_I32_OP;
+ dst_reg &= ~SLJIT_I32_OP;
+#endif
+
+ if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
+ src = TMP_REG1;
+ srcw = 0;
+ }
+
+ inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
+ FAIL_IF(!inst);
+ *inst++ = GROUP_0F;
+ *inst = get_jump_code(type & 0xff) - 0x40;
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{
CHECK_ERROR();
@@ -2886,16 +2847,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c
if (NOT_HALFWORD(offset)) {
FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
- SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
+ SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
return compiler->error;
#else
- return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
+ return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
#endif
}
#endif
if (offset != 0)
- return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
+ return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
}
@@ -2919,14 +2880,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
- reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+ reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (emit_load_imm64(compiler, reg, init_value))
return NULL;
#else
- if (dst == SLJIT_UNUSED)
- dst = TMP_REG1;
-
if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
return NULL;
#endif
@@ -2946,82 +2904,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
return const_;
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
+ SLJIT_UNUSED_ARG(executable_offset);
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- sljit_unaligned_store_sw((void*)addr, new_addr - (addr + 4));
+ sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset);
#else
- sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_addr);
+ sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
#endif
}
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
+ SLJIT_UNUSED_ARG(executable_offset);
sljit_unaligned_store_sw((void*)addr, new_constant);
}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void)
-{
-#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
- if (cpu_has_sse2 == -1)
- get_cpu_features();
- return cpu_has_sse2;
-#else
- return 1;
-#endif
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void)
-{
- if (cpu_has_cmov == -1)
- get_cpu_features();
- return cpu_has_cmov;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler,
- sljit_s32 type,
- sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw)
-{
- sljit_u8* inst;
-
- CHECK_ERROR();
-#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT(sljit_x86_is_cmov_available());
- CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
- CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
- FUNCTION_CHECK_SRC(src, srcw);
-#endif
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
- if (SLJIT_UNLIKELY(!!compiler->verbose)) {
- fprintf(compiler->verbose, " x86_cmov%s %s%s, ",
- !(dst_reg & SLJIT_I32_OP) ? "" : ".i",
- jump_names[type & 0xff], JUMP_POSTFIX(type));
- sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP);
- fprintf(compiler->verbose, ", ");
- sljit_verbose_param(compiler, src, srcw);
- fprintf(compiler->verbose, "\n");
- }
-#endif
-
- ADJUST_LOCAL_OFFSET(src, srcw);
- CHECK_EXTRA_REGS(src, srcw, (void)0);
-
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- compiler->mode32 = dst_reg & SLJIT_I32_OP;
-#endif
- dst_reg &= ~SLJIT_I32_OP;
-
- if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
- EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
- src = TMP_REG1;
- srcw = 0;
- }
-
- inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
- FAIL_IF(!inst);
- *inst++ = GROUP_0F;
- *inst = get_jump_code(type & 0xff) - 0x40;
- return SLJIT_SUCCESS;
-}
diff --git a/src/3rdparty/pcre2/src/sljit/sljitUtils.c b/src/3rdparty/pcre2/src/sljit/sljitUtils.c
index ec5c321194..9029db292c 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitUtils.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitUtils.c
@@ -1,7 +1,7 @@
/*
* Stack-less Just-In-Time compiler
*
- * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
+ * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
@@ -206,10 +206,7 @@ static sljit_sw sljit_page_align = 0;
SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data)
{
struct sljit_stack *stack;
- union {
- void *ptr;
- sljit_uw uw;
- } base;
+ void *ptr;
#ifdef _WIN32
SYSTEM_INFO si;
#endif
@@ -233,29 +230,29 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(slj
}
#endif
- /* Align limit and max_limit. */
- max_limit = (max_limit + sljit_page_align) & ~sljit_page_align;
-
stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), allocator_data);
if (!stack)
return NULL;
+ /* Align max_limit. */
+ max_limit = (max_limit + sljit_page_align) & ~sljit_page_align;
+
#ifdef _WIN32
- base.ptr = VirtualAlloc(NULL, max_limit, MEM_RESERVE, PAGE_READWRITE);
- if (!base.ptr) {
+ ptr = VirtualAlloc(NULL, max_limit, MEM_RESERVE, PAGE_READWRITE);
+ if (!ptr) {
SLJIT_FREE(stack, allocator_data);
return NULL;
}
- stack->base = base.uw;
+ stack->max_limit = (sljit_u8 *)ptr;
+ stack->base = stack->max_limit + max_limit;
stack->limit = stack->base;
- stack->max_limit = stack->base + max_limit;
- if (sljit_stack_resize(stack, stack->base + limit)) {
+ if (sljit_stack_resize(stack, stack->base - limit)) {
sljit_free_stack(stack, allocator_data);
return NULL;
}
#else
#ifdef MAP_ANON
- base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
#else
if (dev_zero < 0) {
if (open_dev_zero()) {
@@ -263,15 +260,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(slj
return NULL;
}
}
- base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0);
+ ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0);
#endif
- if (base.ptr == MAP_FAILED) {
+ if (ptr == MAP_FAILED) {
SLJIT_FREE(stack, allocator_data);
return NULL;
}
- stack->base = base.uw;
- stack->limit = stack->base + limit;
- stack->max_limit = stack->base + max_limit;
+ stack->max_limit = (sljit_u8 *)ptr;
+ stack->base = stack->max_limit + max_limit;
+ stack->limit = stack->base - limit;
#endif
stack->top = stack->base;
return stack;
@@ -279,53 +276,53 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(slj
#undef PAGE_ALIGN
-SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack* stack, void *allocator_data)
+SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack *stack, void *allocator_data)
{
SLJIT_UNUSED_ARG(allocator_data);
#ifdef _WIN32
- VirtualFree((void*)stack->base, 0, MEM_RELEASE);
+ VirtualFree((void*)stack->max_limit, 0, MEM_RELEASE);
#else
- munmap((void*)stack->base, stack->max_limit - stack->base);
+ munmap((void*)stack->max_limit, stack->base - stack->max_limit);
#endif
SLJIT_FREE(stack, allocator_data);
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack* stack, sljit_uw new_limit)
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_limit)
{
sljit_uw aligned_old_limit;
sljit_uw aligned_new_limit;
- if ((new_limit > stack->max_limit) || (new_limit < stack->base))
+ if ((new_limit < stack->max_limit) || (new_limit >= stack->base))
return -1;
#ifdef _WIN32
- aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align;
- aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align;
+ aligned_new_limit = (sljit_uw)new_limit & ~sljit_page_align;
+ aligned_old_limit = ((sljit_uw)stack->limit) & ~sljit_page_align;
if (aligned_new_limit != aligned_old_limit) {
- if (aligned_new_limit > aligned_old_limit) {
- if (!VirtualAlloc((void*)aligned_old_limit, aligned_new_limit - aligned_old_limit, MEM_COMMIT, PAGE_READWRITE))
+ if (aligned_new_limit < aligned_old_limit) {
+ if (!VirtualAlloc((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MEM_COMMIT, PAGE_READWRITE))
return -1;
}
else {
- if (!VirtualFree((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MEM_DECOMMIT))
+ if (!VirtualFree((void*)aligned_old_limit, aligned_new_limit - aligned_old_limit, MEM_DECOMMIT))
return -1;
}
}
stack->limit = new_limit;
return 0;
#else
- if (new_limit >= stack->limit) {
+ if (new_limit <= stack->limit) {
stack->limit = new_limit;
return 0;
}
- aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align;
- aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align;
+ aligned_new_limit = (sljit_uw)new_limit & ~sljit_page_align;
+ aligned_old_limit = ((sljit_uw)stack->limit) & ~sljit_page_align;
/* If madvise is available, we release the unnecessary space. */
#if defined(MADV_DONTNEED)
- if (aligned_new_limit < aligned_old_limit)
- madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MADV_DONTNEED);
+ if (aligned_new_limit > aligned_old_limit)
+ madvise((void*)aligned_old_limit, aligned_new_limit - aligned_old_limit, MADV_DONTNEED);
#elif defined(POSIX_MADV_DONTNEED)
- if (aligned_new_limit < aligned_old_limit)
- posix_madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, POSIX_MADV_DONTNEED);
+ if (aligned_new_limit > aligned_old_limit)
+ posix_madvise((void*)aligned_old_limit, aligned_new_limit - aligned_old_limit, POSIX_MADV_DONTNEED);
#endif
stack->limit = new_limit;
return 0;