summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/pcre2/src/pcre2_extuni.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/pcre2/src/pcre2_extuni.c')
-rw-r--r--src/3rdparty/pcre2/src/pcre2_extuni.c32
1 files changed, 23 insertions, 9 deletions
diff --git a/src/3rdparty/pcre2/src/pcre2_extuni.c b/src/3rdparty/pcre2/src/pcre2_extuni.c
index 5a719e9cb4..4ed9f00c55 100644
--- a/src/3rdparty/pcre2/src/pcre2_extuni.c
+++ b/src/3rdparty/pcre2/src/pcre2_extuni.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -75,7 +75,11 @@ return NULL;
* Match an extended grapheme sequence *
*************************************************/
-/*
+/* NOTE: The logic contained in this function is replicated in three special-
+purpose functions in the pcre2_jit_compile.c module. If the logic below is
+changed, they must be kept in step so that the interpreter and the JIT have the
+same behaviour.
+
Arguments:
c the first character
eptr pointer to next character
@@ -92,6 +96,7 @@ PCRE2_SPTR
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
{
+BOOL was_ep_ZWJ = FALSE;
int lgb = UCD_GRAPHBREAK(c);
while (eptr < end_subject)
@@ -102,10 +107,16 @@ while (eptr < end_subject)
rgb = UCD_GRAPHBREAK(c);
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
+ /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
+ preceded by Extended Pictographic. */
+
+ if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
+ break;
+
/* Not breaking between Regional Indicators is allowed only if there
are an even number of preceding RIs. */
- if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
+ if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
{
int ricount = 0;
PCRE2_SPTR bptr = eptr - 1;
@@ -123,18 +134,21 @@ while (eptr < end_subject)
}
else
c = *bptr;
- if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
+ if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
ricount++;
}
if ((ricount & 1) != 0) break; /* Grapheme break required */
}
- /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
- allows any number of them before a following Extended_Pictographic. */
+ /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
+ between; see next statement). */
+
+ was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
+
+ /* If Extend follows Extended_Pictographic, do not update lgb; this allows
+ any number of them before a following ZWJ. */
- if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
- lgb != ucp_gbExtended_Pictographic)
- lgb = rgb;
+ if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb;
eptr += len;
if (xcount != NULL) *xcount += 1;