1 files changed, 171 insertions, 91 deletions
diff --git a/src/3rdparty/pcre2/src/pcre2_internal.h b/src/3rdparty/pcre2/src/pcre2_internal.h
index d8fad1e93b..e5808182e6 100644
--- a/src/3rdparty/pcre2/src/pcre2_internal.h
+++ b/src/3rdparty/pcre2/src/pcre2_internal.h
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2020 University of Cambridge
+          New API code Copyright (c) 2016-2023 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -51,6 +51,24 @@ pcre2test.c with CODE_UNIT_WIDTH == 0. */
 #error The use of both EBCDIC and SUPPORT_UNICODE is not supported.
 #endif
 
+/* When compiling one of the libraries, the value of PCRE2_CODE_UNIT_WIDTH must
+be 8, 16, or 32. AutoTools and CMake ensure that this is always the case, but
+other other building methods may not, so here is a check. It is cut out when
+building pcre2test, bcause that sets the value to zero. No other source should
+be including this file. There is no explicit way of forcing a compile to be
+abandoned, but trying to include a non-existent file seems cleanest. Otherwise
+there will be many irrelevant consequential errors. */
+
+#if (!defined PCRE2_BUILDING_PCRE2TEST && !defined PCRE2_DFTABLES) && \
+  (!defined PCRE2_CODE_UNIT_WIDTH ||     \
+    (PCRE2_CODE_UNIT_WIDTH != 8 &&       \
+     PCRE2_CODE_UNIT_WIDTH != 16 &&      \
+     PCRE2_CODE_UNIT_WIDTH != 32))
+#error PCRE2_CODE_UNIT_WIDTH must be defined as 8, 16, or 32.
+#include <AbandonCompile>
+#endif
+
+
 /* Standard C headers */
 
 #include <ctype.h>
@@ -119,20 +137,20 @@ only if it is not already set. */
 #ifndef PCRE2_EXP_DECL
 #  ifdef _WIN32
 #    ifndef PCRE2_STATIC
-#      define PCRE2_EXP_DECL       extern __declspec(dllexport)
-#      define PCRE2_EXP_DEFN       __declspec(dllexport)
+#      define PCRE2_EXP_DECL		extern __declspec(dllexport)
+#      define PCRE2_EXP_DEFN		__declspec(dllexport)
 #    else
-#      define PCRE2_EXP_DECL       extern
+#      define PCRE2_EXP_DECL		extern PCRE2_EXPORT
 #      define PCRE2_EXP_DEFN
 #    endif
 #  else
 #    ifdef __cplusplus
-#      define PCRE2_EXP_DECL       extern "C"
+#      define PCRE2_EXP_DECL		extern "C" PCRE2_EXPORT
 #    else
-#      define PCRE2_EXP_DECL       extern
+#      define PCRE2_EXP_DECL		extern PCRE2_EXPORT
 #    endif
 #    ifndef PCRE2_EXP_DEFN
-#      define PCRE2_EXP_DEFN       PCRE2_EXP_DECL
+#      define PCRE2_EXP_DEFN		PCRE2_EXP_DECL
 #    endif
 #  endif
 #endif
@@ -156,8 +174,8 @@ pcre2_match() because of the way it backtracks. */
 #define PCRE2_SPTR CUSTOM_SUBJECT_PTR
 #endif
 
-/* When checking for integer overflow in pcre2_compile(), we need to handle
-large integers. If a 64-bit integer type is available, we can use that.
+/* When checking for integer overflow, we need to handle large integers.
+If a 64-bit integer type is available, we can use that.
 Otherwise we have to cast to double, which of course requires floating point
 arithmetic. Handle this by defining a macro for the appropriate type. */
 
@@ -220,18 +238,17 @@ not rely on this. */
 
 #define COMPILE_ERROR_BASE 100
 
-/* The initial frames vector for remembering backtracking points in
-pcre2_match() is allocated on the system stack, of this size (bytes). The size
-must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
-multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
-on the number of capturing parentheses) so 20KiB handles quite a few frames. A
-larger vector on the heap is obtained for patterns that need more frames. The
-maximum size of this can be limited. */
+/* The initial frames vector for remembering pcre2_match() backtracking points
+is allocated on the heap, of this size (bytes) or ten times the frame size if
+larger, unless the heap limit is smaller. Typical frame sizes are a few hundred
+bytes (it depends on the number of capturing parentheses) so 20KiB handles
+quite a few frames. A larger vector on the heap is obtained for matches that
+need more frames, subject to the heap limit. */
 
 #define START_FRAMES_SIZE 20480
 
-/* Similarly, for DFA matching, an initial internal workspace vector is
-allocated on the stack. */
+/* For DFA matching, an initial internal workspace vector is allocated on the
+stack. The heap is used only if this turns out to be too small. */
 
 #define DFA_START_RWS_SIZE 30720
 
@@ -954,6 +971,13 @@ a positive value. */
 #define STRING_LIMIT_RECURSION_EQ         "LIMIT_RECURSION="
 #define STRING_MARK                       "MARK"
 
+#define STRING_bc                         "bc"
+#define STRING_bidiclass                  "bidiclass"
+#define STRING_sc                         "sc"
+#define STRING_script                     "script"
+#define STRING_scriptextensions           "scriptextensions"
+#define STRING_scx                        "scx"
+
 #else  /* SUPPORT_UNICODE */
 
 /* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
@@ -1248,26 +1272,39 @@ only. */
 #define STRING_LIMIT_RECURSION_EQ         STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
 #define STRING_MARK                       STR_M STR_A STR_R STR_K
 
+#define STRING_bc                         STR_b STR_c
+#define STRING_bidiclass                  STR_b STR_i STR_d STR_i STR_c STR_l STR_a STR_s STR_s
+#define STRING_sc                         STR_s STR_c
+#define STRING_script                     STR_s STR_c STR_r STR_i STR_p STR_t
+#define STRING_scriptextensions           STR_s STR_c STR_r STR_i STR_p STR_t STR_e STR_x STR_t STR_e STR_n STR_s STR_i STR_o STR_n STR_s
+#define STRING_scx                        STR_s STR_c STR_x
+
+
 #endif  /* SUPPORT_UNICODE */
 
 /* -------------------- End of character and string names -------------------*/
 
 /* -------------------- Definitions for compiled patterns -------------------*/
 
-/* Codes for different types of Unicode property */
+/* Codes for different types of Unicode property. If these definitions are
+changed, the autopossessifying table in pcre2_auto_possess.c must be updated to
+match. */
 
 #define PT_ANY        0    /* Any property - matches all chars */
 #define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */
 #define PT_GC         2    /* Specified general characteristic (e.g. L) */
 #define PT_PC         3    /* Specified particular characteristic (e.g. Lu) */
-#define PT_SC         4    /* Script (e.g. Han) */
-#define PT_ALNUM      5    /* Alphanumeric - the union of L and N */
-#define PT_SPACE      6    /* Perl space - Z plus 9,10,12,13 */
-#define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */
-#define PT_WORD       8    /* Word - L plus N plus underscore */
-#define PT_CLIST      9    /* Pseudo-property: match character list */
-#define PT_UCNC      10    /* Universal Character nameable character */
-#define PT_TABSIZE   11    /* Size of square table for autopossessify tests */
+#define PT_SC         4    /* Script only (e.g. Han) */
+#define PT_SCX        5    /* Script extensions (includes SC) */
+#define PT_ALNUM      6    /* Alphanumeric - the union of L and N */
+#define PT_SPACE      7    /* Perl space - general category Z plus 9,10,12,13 */
+#define PT_PXSPACE    8    /* POSIX space - Z plus 9,10,11,12,13 */
+#define PT_WORD       9    /* Word - L, N, Mn, or Pc */
+#define PT_CLIST     10    /* Pseudo-property: match character list */
+#define PT_UCNC      11    /* Universal Character nameable character */
+#define PT_BIDICL    12    /* Specified bidi class */
+#define PT_BOOL      13    /* Boolean property */
+#define PT_TABSIZE   14    /* Size of square table for autopossessify tests */
 
 /* The following special properties are used only in XCLASS items, when POSIX
 classes are specified and PCRE2_UCP is set - in other words, for Unicode
@@ -1275,22 +1312,28 @@ handling of these classes. They are not available via the \p or \P escapes like
 those in the above list, and so they do not take part in the autopossessifying
 table. */
 
-#define PT_PXGRAPH   11    /* [:graph:] - characters that mark the paper */
-#define PT_PXPRINT   12    /* [:print:] - [:graph:] plus non-control spaces */
-#define PT_PXPUNCT   13    /* [:punct:] - punctuation characters */
+#define PT_PXGRAPH   14    /* [:graph:] - characters that mark the paper */
+#define PT_PXPRINT   15    /* [:print:] - [:graph:] plus non-control spaces */
+#define PT_PXPUNCT   16    /* [:punct:] - punctuation characters */
+#define PT_PXXDIGIT  17    /* [:xdigit:] - hex digits */
+
+/* This value is used when parsing \p and \P escapes to indicate that neither
+\p{script:...} nor \p{scx:...} has been encountered. */
+
+#define PT_NOTSCRIPT 255
 
 /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
 contain characters with values greater than 255. */
 
-#define XCL_NOT       0x01    /* Flag: this is a negative class */
-#define XCL_MAP       0x02    /* Flag: a 32-byte map is present */
-#define XCL_HASPROP   0x04    /* Flag: property checks are present. */
+#define XCL_NOT      0x01  /* Flag: this is a negative class */
+#define XCL_MAP      0x02  /* Flag: a 32-byte map is present */
+#define XCL_HASPROP  0x04  /* Flag: property checks are present. */
 
-#define XCL_END       0    /* Marks end of individual items */
-#define XCL_SINGLE    1    /* Single item (one multibyte char) follows */
-#define XCL_RANGE     2    /* A range (two multibyte chars) follows */
-#define XCL_PROP      3    /* Unicode property (2-byte property code follows) */
-#define XCL_NOTPROP   4    /* Unicode inverted property (ditto) */
+#define XCL_END      0     /* Marks end of individual items */
+#define XCL_SINGLE   1     /* Single item (one multibyte char) follows */
+#define XCL_RANGE    2     /* A range (two multibyte chars) follows */
+#define XCL_PROP     3     /* Unicode property (2-byte property code follows) */
+#define XCL_NOTPROP  4     /* Unicode inverted property (ditto) */
 
 /* These are escaped items that aren't just an encoding of a particular data
 value such as \n. They must have non-zero values, as check_escape() returns 0
@@ -1303,6 +1346,12 @@ mode rather than an escape sequence. It is also used for [^] in JavaScript
 compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves
 like \N.
 
+ESC_ub is a special return from check_escape() when, in BSUX mode, \u{ is not
+followed by hex digits and }, in which case it should mean a literal "u"
+followed by a literal "{". This hack is necessary for cases like \u{ 12}
+because without it, this is interpreted as u{12} now that spaces are allowed in
+quantifiers.
+
 Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in
 check_escape(). There are tests in the code for an escape greater than ESC_b
 and less than ESC_Z to detect the types that may be repeated. These are the
@@ -1312,7 +1361,7 @@ consume a character, that code will have to change. */
 enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
        ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H,
        ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z,
-       ESC_E, ESC_Q, ESC_g, ESC_k };
+       ESC_E, ESC_Q, ESC_g, ESC_k, ESC_ub };
 
 
 /********************** Opcode definitions ******************/
@@ -1348,8 +1397,8 @@ enum {
   OP_SOD,            /* 1 Start of data: \A */
   OP_SOM,            /* 2 Start of match (subject + offset): \G */
   OP_SET_SOM,        /* 3 Set start of match (\K) */
-  OP_NOT_WORD_BOUNDARY,  /*  4 \B */
-  OP_WORD_BOUNDARY,      /*  5 \b */
+  OP_NOT_WORD_BOUNDARY,  /*  4 \B -- see also OP_NOT_UCP_WORD_BOUNDARY */
+  OP_WORD_BOUNDARY,      /*  5 \b -- see also OP_UCP_WORD_BOUNDARY */
   OP_NOT_DIGIT,          /*  6 \D */
   OP_DIGIT,              /*  7 \d */
   OP_NOT_WHITESPACE,     /*  8 \S */
@@ -1523,78 +1572,85 @@ enum {
   /* The assertions must come before BRA, CBRA, ONCE, and COND. */
 
   OP_REVERSE,        /* 125 Move pointer back - used in lookbehind assertions */
-  OP_ASSERT,         /* 126 Positive lookahead */
-  OP_ASSERT_NOT,     /* 127 Negative lookahead */
-  OP_ASSERTBACK,     /* 128 Positive lookbehind */
-  OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */
-  OP_ASSERT_NA,      /* 130 Positive non-atomic lookahead */
-  OP_ASSERTBACK_NA,  /* 131 Positive non-atomic lookbehind */
+  OP_VREVERSE,       /* 126 Move pointer back - variable */
+  OP_ASSERT,         /* 127 Positive lookahead */
+  OP_ASSERT_NOT,     /* 128 Negative lookahead */
+  OP_ASSERTBACK,     /* 129 Positive lookbehind */
+  OP_ASSERTBACK_NOT, /* 130 Negative lookbehind */
+  OP_ASSERT_NA,      /* 131 Positive non-atomic lookahead */
+  OP_ASSERTBACK_NA,  /* 132 Positive non-atomic lookbehind */
 
   /* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come
   immediately after the assertions, with ONCE first, as there's a test for >=
   ONCE for a subpattern that isn't an assertion. The POS versions must
   immediately follow the non-POS versions in each case. */
 
-  OP_ONCE,           /* 132 Atomic group, contains captures */
-  OP_SCRIPT_RUN,     /* 133 Non-capture, but check characters' scripts */
-  OP_BRA,            /* 134 Start of non-capturing bracket */
-  OP_BRAPOS,         /* 135 Ditto, with unlimited, possessive repeat */
-  OP_CBRA,           /* 136 Start of capturing bracket */
-  OP_CBRAPOS,        /* 137 Ditto, with unlimited, possessive repeat */
-  OP_COND,           /* 138 Conditional group */
+  OP_ONCE,           /* 133 Atomic group, contains captures */
+  OP_SCRIPT_RUN,     /* 134 Non-capture, but check characters' scripts */
+  OP_BRA,            /* 135 Start of non-capturing bracket */
+  OP_BRAPOS,         /* 136 Ditto, with unlimited, possessive repeat */
+  OP_CBRA,           /* 137 Start of capturing bracket */
+  OP_CBRAPOS,        /* 138 Ditto, with unlimited, possessive repeat */
+  OP_COND,           /* 139 Conditional group */
 
   /* These five must follow the previous five, in the same order. There's a
   check for >= SBRA to distinguish the two sets. */
 
-  OP_SBRA,           /* 139 Start of non-capturing bracket, check empty  */
-  OP_SBRAPOS,        /* 149 Ditto, with unlimited, possessive repeat */
-  OP_SCBRA,          /* 141 Start of capturing bracket, check empty */
-  OP_SCBRAPOS,       /* 142 Ditto, with unlimited, possessive repeat */
-  OP_SCOND,          /* 143 Conditional group, check empty */
+  OP_SBRA,           /* 140 Start of non-capturing bracket, check empty  */
+  OP_SBRAPOS,        /* 141 Ditto, with unlimited, possessive repeat */
+  OP_SCBRA,          /* 142 Start of capturing bracket, check empty */
+  OP_SCBRAPOS,       /* 143 Ditto, with unlimited, possessive repeat */
+  OP_SCOND,          /* 144 Conditional group, check empty */
 
   /* The next two pairs must (respectively) be kept together. */
 
-  OP_CREF,           /* 144 Used to hold a capture number as condition */
-  OP_DNCREF,         /* 145 Used to point to duplicate names as a condition */
-  OP_RREF,           /* 146 Used to hold a recursion number as condition */
-  OP_DNRREF,         /* 147 Used to point to duplicate names as a condition */
-  OP_FALSE,          /* 148 Always false (used by DEFINE and VERSION) */
-  OP_TRUE,           /* 149 Always true (used by VERSION) */
+  OP_CREF,           /* 145 Used to hold a capture number as condition */
+  OP_DNCREF,         /* 146 Used to point to duplicate names as a condition */
+  OP_RREF,           /* 147 Used to hold a recursion number as condition */
+  OP_DNRREF,         /* 148 Used to point to duplicate names as a condition */
+  OP_FALSE,          /* 149 Always false (used by DEFINE and VERSION) */
+  OP_TRUE,           /* 150 Always true (used by VERSION) */
 
-  OP_BRAZERO,        /* 150 These two must remain together and in this */
-  OP_BRAMINZERO,     /* 151 order. */
-  OP_BRAPOSZERO,     /* 152 */
+  OP_BRAZERO,        /* 151 These two must remain together and in this */
+  OP_BRAMINZERO,     /* 152 order. */
+  OP_BRAPOSZERO,     /* 153 */
 
   /* These are backtracking control verbs */
 
-  OP_MARK,           /* 153 always has an argument */
-  OP_PRUNE,          /* 154 */
-  OP_PRUNE_ARG,      /* 155 same, but with argument */
-  OP_SKIP,           /* 156 */
-  OP_SKIP_ARG,       /* 157 same, but with argument */
-  OP_THEN,           /* 158 */
-  OP_THEN_ARG,       /* 159 same, but with argument */
-  OP_COMMIT,         /* 160 */
-  OP_COMMIT_ARG,     /* 161 same, but with argument */
+  OP_MARK,           /* 154 always has an argument */
+  OP_PRUNE,          /* 155 */
+  OP_PRUNE_ARG,      /* 156 same, but with argument */
+  OP_SKIP,           /* 157 */
+  OP_SKIP_ARG,       /* 158 same, but with argument */
+  OP_THEN,           /* 159 */
+  OP_THEN_ARG,       /* 160 same, but with argument */
+  OP_COMMIT,         /* 161 */
+  OP_COMMIT_ARG,     /* 162 same, but with argument */
 
   /* These are forced failure and success verbs. FAIL and ACCEPT do accept an
   argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
   without the need for a special opcode. */
 
-  OP_FAIL,           /* 162 */
-  OP_ACCEPT,         /* 163 */
-  OP_ASSERT_ACCEPT,  /* 164 Used inside assertions */
-  OP_CLOSE,          /* 165 Used before OP_ACCEPT to close open captures */
+  OP_FAIL,           /* 163 */
+  OP_ACCEPT,         /* 164 */
+  OP_ASSERT_ACCEPT,  /* 165 Used inside assertions */
+  OP_CLOSE,          /* 166 Used before OP_ACCEPT to close open captures */
 
   /* This is used to skip a subpattern with a {0} quantifier */
 
-  OP_SKIPZERO,       /* 166 */
+  OP_SKIPZERO,       /* 167 */
 
   /* This is used to identify a DEFINE group during compilation so that it can
   be checked for having only one branch. It is changed to OP_FALSE before
   compilation finishes. */
 
-  OP_DEFINE,         /* 167 */
+  OP_DEFINE,         /* 168 */
+
+  /* These opcodes replace their normal counterparts in UCP mode when
+  PCRE2_EXTRA_ASCII_BSW is not set. */
+
+  OP_NOT_UCP_WORD_BOUNDARY, /* 169 */
+  OP_UCP_WORD_BOUNDARY,     /* 170 */
 
   /* This is not an opcode, but is used to check that tables indexed by opcode
   are the correct length, in order to catch updating errors - there have been
@@ -1640,7 +1696,7 @@ some cases doesn't actually use these names at all). */
   "class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi",  \
   "Recurse", "Callout", "CalloutStr",                             \
   "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos",                  \
-  "Reverse", "Assert", "Assert not",                              \
+  "Reverse", "VReverse", "Assert", "Assert not",                  \
   "Assert back", "Assert back not",                               \
   "Non-atomic assert", "Non-atomic assert back",                  \
   "Once",                                                         \
@@ -1655,7 +1711,7 @@ some cases doesn't actually use these names at all). */
   "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP",                  \
   "*THEN", "*THEN", "*COMMIT", "*COMMIT", "*FAIL",                \
   "*ACCEPT", "*ASSERT_ACCEPT",                                    \
-  "Close", "Skip zero", "Define"
+  "Close", "Skip zero", "Define", "\\B (ucp)", "\\b (ucp)"
 
 
 /* This macro defines the length of fixed length operations in the compiled
@@ -1722,7 +1778,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
   1+LINK_SIZE,                   /* KetRmax                                */ \
   1+LINK_SIZE,                   /* KetRmin                                */ \
   1+LINK_SIZE,                   /* KetRpos                                */ \
-  1+LINK_SIZE,                   /* Reverse                                */ \
+  1+IMM2_SIZE,                   /* Reverse                                */ \
+  1+2*IMM2_SIZE,                 /* VReverse                               */ \
   1+LINK_SIZE,                   /* Assert                                 */ \
   1+LINK_SIZE,                   /* Assert not                             */ \
   1+LINK_SIZE,                   /* Assert behind                          */ \
@@ -1751,7 +1808,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
   1, 3,                          /* COMMIT, COMMIT_ARG                     */ \
   1, 1, 1,                       /* FAIL, ACCEPT, ASSERT_ACCEPT            */ \
   1+IMM2_SIZE, 1,                /* CLOSE, SKIPZERO                        */ \
-  1                              /* DEFINE                                 */
+  1,                             /* DEFINE                                 */ \
+  1, 1                           /* \B and \b in UCP mode                  */
 
 /* A magic value for OP_RREF to indicate the "any recursion" condition. */
 
@@ -1797,8 +1855,8 @@ typedef struct {
   uint8_t gbprop;     /* ucp_gbControl, etc. (grapheme break property) */
   uint8_t caseset;    /* offset to multichar other cases or zero */
   int32_t other_case; /* offset to other case, or zero if none */
-  int16_t scriptx;    /* script extension value */
-  int16_t dummy;      /* spare - to round to multiple of 4 bytes */
+  uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */
+  uint16_t bprops;    /* binary properties offset */
 } ucd_record;
 
 /* UCD access macros */
@@ -1815,13 +1873,30 @@ typedef struct {
 #define GET_UCD(ch) REAL_GET_UCD(ch)
 #endif
 
+#define UCD_SCRIPTX_MASK 0x3ff
+#define UCD_BIDICLASS_SHIFT 11
+#define UCD_BPROPS_MASK 0xfff
+
+#define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK)
+#define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT)
+#define UCD_BPROPS_PROP(prop) ((prop)->bprops & UCD_BPROPS_MASK)
+
 #define UCD_CHARTYPE(ch)    GET_UCD(ch)->chartype
 #define UCD_SCRIPT(ch)      GET_UCD(ch)->script
 #define UCD_CATEGORY(ch)    PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
 #define UCD_GRAPHBREAK(ch)  GET_UCD(ch)->gbprop
 #define UCD_CASESET(ch)     GET_UCD(ch)->caseset
 #define UCD_OTHERCASE(ch)   ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
-#define UCD_SCRIPTX(ch)     GET_UCD(ch)->scriptx
+#define UCD_SCRIPTX(ch)     UCD_SCRIPTX_PROP(GET_UCD(ch))
+#define UCD_BPROPS(ch)      UCD_BPROPS_PROP(GET_UCD(ch))
+#define UCD_BIDICLASS(ch)   UCD_BIDICLASS_PROP(GET_UCD(ch))
+
+/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words
+that form a bitmap representing a list of scripts or boolean properties. These
+macros test or set a bit in the map by number. */
+
+#define MAPBIT(map,n) ((map)[(n)/32]&(1u<<((n)%32)))
+#define MAPSET(map,n) ((map)[(n)/32]|=(1u<<((n)%32)))
 
 /* Header for serialized pcre2 codes. */
 
@@ -1878,6 +1953,7 @@ extern const uint8_t          PRIV(utf8_table4)[];
 #endif
 #define _pcre2_hspace_list             PCRE2_SUFFIX(_pcre2_hspace_list_)
 #define _pcre2_vspace_list             PCRE2_SUFFIX(_pcre2_vspace_list_)
+#define _pcre2_ucd_boolprop_sets       PCRE2_SUFFIX(_pcre2_ucd_boolprop_sets_)
 #define _pcre2_ucd_caseless_sets       PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
 #define _pcre2_ucd_digit_sets          PCRE2_SUFFIX(_pcre2_ucd_digit_sets_)
 #define _pcre2_ucd_script_sets         PCRE2_SUFFIX(_pcre2_ucd_script_sets_)
@@ -1901,9 +1977,10 @@ extern const pcre2_match_context       PRIV(default_match_context);
 extern const uint8_t                   PRIV(default_tables)[];
 extern const uint32_t                  PRIV(hspace_list)[];
 extern const uint32_t                  PRIV(vspace_list)[];
+extern const uint32_t                  PRIV(ucd_boolprop_sets)[];
 extern const uint32_t                  PRIV(ucd_caseless_sets)[];
 extern const uint32_t                  PRIV(ucd_digit_sets)[];
-extern const uint8_t                   PRIV(ucd_script_sets)[];
+extern const uint32_t                  PRIV(ucd_script_sets)[];
 extern const ucd_record                PRIV(ucd_records)[];
 #if PCRE2_CODE_UNIT_WIDTH == 32
 extern const ucd_record                PRIV(dummy_ucd_record)[];
@@ -1999,6 +2076,9 @@ extern void *       _pcre2_memmove(void *, const void *, size_t);
 #endif
 
 #endif  /* PCRE2_CODE_UNIT_WIDTH */
+
+extern BOOL         PRIV(ckd_smul)(PCRE2_SIZE *, int, int);
+
 #endif  /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */
 
 /* End of pcre2_internal.h */