aboutsummaryrefslogtreecommitdiffstats
path: root/src/3rdparty
diff options
context:
space:
mode:
authorUlf Hermann <ulf.hermann@qt.io>2019-02-22 11:35:34 +0100
committerUlf Hermann <ulf.hermann@qt.io>2019-02-26 09:32:51 +0000
commita126b566dc9c0cebb6ef6ddc337e7665a1ce54e8 (patch)
tree805b913b4040b8747b2c61228e27d327128e121b /src/3rdparty
parentf52b331e06136bf9d47ec2077626515c9008f97d (diff)
Upgrade Yarr to latest version from WebKit
This is an upgrade to commit cbb0aa18662bc26da31de91e2104c030eaa6ead2 in webkit. It causes some more ecmascript tests to pass. Fixes: QTBUG-73915 Change-Id: I8bb5ff9b37907d17b1020576ba64f0b3aed2f1b3 Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Diffstat (limited to 'src/3rdparty')
-rw-r--r--src/3rdparty/masm/assembler/MacroAssemblerX86_64.h17
-rw-r--r--src/3rdparty/masm/stubs/wtf/Vector.h9
-rw-r--r--src/3rdparty/masm/yarr/YarrCanonicalize.h1
-rw-r--r--src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.cpp122
-rw-r--r--src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js21
-rw-r--r--src/3rdparty/masm/yarr/YarrErrorCode.h7
-rw-r--r--src/3rdparty/masm/yarr/YarrInterpreter.cpp43
-rw-r--r--src/3rdparty/masm/yarr/YarrJIT.cpp538
-rw-r--r--src/3rdparty/masm/yarr/YarrJIT.h5
-rw-r--r--src/3rdparty/masm/yarr/YarrParser.h23
-rw-r--r--src/3rdparty/masm/yarr/YarrPattern.cpp137
-rw-r--r--src/3rdparty/masm/yarr/YarrPattern.h22
-rw-r--r--src/3rdparty/masm/yarr/YarrSyntaxChecker.cpp4
-rw-r--r--src/3rdparty/masm/yarr/create_regex_tables2
-rw-r--r--src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode16
15 files changed, 746 insertions, 221 deletions
diff --git a/src/3rdparty/masm/assembler/MacroAssemblerX86_64.h b/src/3rdparty/masm/assembler/MacroAssemblerX86_64.h
index f4349e1f93..64df58d121 100644
--- a/src/3rdparty/masm/assembler/MacroAssemblerX86_64.h
+++ b/src/3rdparty/masm/assembler/MacroAssemblerX86_64.h
@@ -116,6 +116,23 @@ public:
sub32(imm, Address(scratchRegister));
}
+ void load16(ExtendedAddress address, RegisterID dest)
+ {
+ TrustedImmPtr addr(reinterpret_cast<void*>(address.offset));
+ MacroAssemblerX86Common::move(addr, scratchRegister);
+ MacroAssemblerX86Common::load16(BaseIndex(scratchRegister, address.base, TimesTwo), dest);
+ }
+
+ void load16(BaseIndex address, RegisterID dest)
+ {
+ MacroAssemblerX86Common::load16(address, dest);
+ }
+
+ void load16(Address address, RegisterID dest)
+ {
+ MacroAssemblerX86Common::load16(address, dest);
+ }
+
void load32(const void* address, RegisterID dest)
{
if (dest == X86Registers::eax)
diff --git a/src/3rdparty/masm/stubs/wtf/Vector.h b/src/3rdparty/masm/stubs/wtf/Vector.h
index f4f4dc5cf4..2fead9f6ba 100644
--- a/src/3rdparty/masm/stubs/wtf/Vector.h
+++ b/src/3rdparty/masm/stubs/wtf/Vector.h
@@ -109,6 +109,15 @@ public:
inline bool isEmpty() const { return this->empty(); }
inline T &last() { return *(this->begin() + this->size() - 1); }
+
+ bool contains(const T &value) const
+ {
+ for (const T &inVector : *this) {
+ if (inVector == value)
+ return true;
+ }
+ return false;
+ }
};
template <typename T, int capacity>
diff --git a/src/3rdparty/masm/yarr/YarrCanonicalize.h b/src/3rdparty/masm/yarr/YarrCanonicalize.h
index fb5e0231ac..cbd279edca 100644
--- a/src/3rdparty/masm/yarr/YarrCanonicalize.h
+++ b/src/3rdparty/masm/yarr/YarrCanonicalize.h
@@ -53,6 +53,7 @@ struct CanonicalizationRange {
extern const size_t UCS2_CANONICALIZATION_RANGES;
extern const UChar32* const ucs2CharacterSetInfo[];
extern const CanonicalizationRange ucs2RangeInfo[];
+extern const uint16_t canonicalTableLChar[256];
extern const size_t UNICODE_CANONICALIZATION_RANGES;
extern const UChar32* const unicodeCharacterSetInfo[];
diff --git a/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.cpp b/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.cpp
index d91c771590..0eb59f38d2 100644
--- a/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.cpp
+++ b/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012-2013, 2015-2016 Apple Inc. All rights reserved.
+ * Copyright (C) 2012-2018 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -44,9 +44,17 @@ const UChar32 ucs2CharacterSet10[] = { 0x03a0, 0x03c0, 0x03d6, 0 };
const UChar32 ucs2CharacterSet11[] = { 0x03a1, 0x03c1, 0x03f1, 0 };
const UChar32 ucs2CharacterSet12[] = { 0x03a3, 0x03c2, 0x03c3, 0 };
const UChar32 ucs2CharacterSet13[] = { 0x03a6, 0x03c6, 0x03d5, 0 };
-const UChar32 ucs2CharacterSet14[] = { 0x1e60, 0x1e61, 0x1e9b, 0 };
+const UChar32 ucs2CharacterSet14[] = { 0x0412, 0x0432, 0x1c80, 0 };
+const UChar32 ucs2CharacterSet15[] = { 0x0414, 0x0434, 0x1c81, 0 };
+const UChar32 ucs2CharacterSet16[] = { 0x041e, 0x043e, 0x1c82, 0 };
+const UChar32 ucs2CharacterSet17[] = { 0x0421, 0x0441, 0x1c83, 0 };
+const UChar32 ucs2CharacterSet18[] = { 0x0422, 0x0442, 0x1c84, 0x1c85, 0 };
+const UChar32 ucs2CharacterSet19[] = { 0x042a, 0x044a, 0x1c86, 0 };
+const UChar32 ucs2CharacterSet20[] = { 0x0462, 0x0463, 0x1c87, 0 };
+const UChar32 ucs2CharacterSet21[] = { 0x1e60, 0x1e61, 0x1e9b, 0 };
+const UChar32 ucs2CharacterSet22[] = { 0x1c88, 0xa64a, 0xa64b, 0 };
-static const size_t UCS2_CANONICALIZATION_SETS = 15;
+static const size_t UCS2_CANONICALIZATION_SETS = 23;
const UChar32* const ucs2CharacterSetInfo[UCS2_CANONICALIZATION_SETS] = {
ucs2CharacterSet0,
ucs2CharacterSet1,
@@ -63,9 +71,17 @@ const UChar32* const ucs2CharacterSetInfo[UCS2_CANONICALIZATION_SETS] = {
ucs2CharacterSet12,
ucs2CharacterSet13,
ucs2CharacterSet14,
+ ucs2CharacterSet15,
+ ucs2CharacterSet16,
+ ucs2CharacterSet17,
+ ucs2CharacterSet18,
+ ucs2CharacterSet19,
+ ucs2CharacterSet20,
+ ucs2CharacterSet21,
+ ucs2CharacterSet22,
};
-const size_t UCS2_CANONICALIZATION_RANGES = 391;
+const size_t UCS2_CANONICALIZATION_RANGES = 448;
const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = {
{ 0x0000, 0x0040, 0x0000, CanonicalizeUnique },
{ 0x0041, 0x005a, 0x0020, CanonicalizeRangeLo },
@@ -182,7 +198,7 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = {
{ 0x0267, 0x0267, 0x0000, CanonicalizeUnique },
{ 0x0268, 0x0268, 0x00d1, CanonicalizeRangeHi },
{ 0x0269, 0x0269, 0x00d3, CanonicalizeRangeHi },
- { 0x026a, 0x026a, 0x0000, CanonicalizeUnique },
+ { 0x026a, 0x026a, 0xa544, CanonicalizeRangeLo },
{ 0x026b, 0x026b, 0x29f7, CanonicalizeRangeLo },
{ 0x026c, 0x026c, 0xa541, CanonicalizeRangeLo },
{ 0x026d, 0x026e, 0x0000, CanonicalizeUnique },
@@ -206,7 +222,8 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = {
{ 0x028c, 0x028c, 0x0047, CanonicalizeRangeHi },
{ 0x028d, 0x0291, 0x0000, CanonicalizeUnique },
{ 0x0292, 0x0292, 0x00db, CanonicalizeRangeHi },
- { 0x0293, 0x029d, 0x0000, CanonicalizeUnique },
+ { 0x0293, 0x029c, 0x0000, CanonicalizeUnique },
+ { 0x029d, 0x029d, 0xa515, CanonicalizeRangeLo },
{ 0x029e, 0x029e, 0xa512, CanonicalizeRangeLo },
{ 0x029f, 0x0344, 0x0000, CanonicalizeUnique },
{ 0x0345, 0x0345, 0x0007, CanonicalizeSet },
@@ -288,10 +305,34 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = {
{ 0x03fc, 0x03fc, 0x0000, CanonicalizeUnique },
{ 0x03fd, 0x03ff, 0x0082, CanonicalizeRangeHi },
{ 0x0400, 0x040f, 0x0050, CanonicalizeRangeLo },
- { 0x0410, 0x042f, 0x0020, CanonicalizeRangeLo },
- { 0x0430, 0x044f, 0x0020, CanonicalizeRangeHi },
+ { 0x0410, 0x0411, 0x0020, CanonicalizeRangeLo },
+ { 0x0412, 0x0412, 0x000e, CanonicalizeSet },
+ { 0x0413, 0x0413, 0x0020, CanonicalizeRangeLo },
+ { 0x0414, 0x0414, 0x000f, CanonicalizeSet },
+ { 0x0415, 0x041d, 0x0020, CanonicalizeRangeLo },
+ { 0x041e, 0x041e, 0x0010, CanonicalizeSet },
+ { 0x041f, 0x0420, 0x0020, CanonicalizeRangeLo },
+ { 0x0421, 0x0421, 0x0011, CanonicalizeSet },
+ { 0x0422, 0x0422, 0x0012, CanonicalizeSet },
+ { 0x0423, 0x0429, 0x0020, CanonicalizeRangeLo },
+ { 0x042a, 0x042a, 0x0013, CanonicalizeSet },
+ { 0x042b, 0x042f, 0x0020, CanonicalizeRangeLo },
+ { 0x0430, 0x0431, 0x0020, CanonicalizeRangeHi },
+ { 0x0432, 0x0432, 0x000e, CanonicalizeSet },
+ { 0x0433, 0x0433, 0x0020, CanonicalizeRangeHi },
+ { 0x0434, 0x0434, 0x000f, CanonicalizeSet },
+ { 0x0435, 0x043d, 0x0020, CanonicalizeRangeHi },
+ { 0x043e, 0x043e, 0x0010, CanonicalizeSet },
+ { 0x043f, 0x0440, 0x0020, CanonicalizeRangeHi },
+ { 0x0441, 0x0441, 0x0011, CanonicalizeSet },
+ { 0x0442, 0x0442, 0x0012, CanonicalizeSet },
+ { 0x0443, 0x0449, 0x0020, CanonicalizeRangeHi },
+ { 0x044a, 0x044a, 0x0013, CanonicalizeSet },
+ { 0x044b, 0x044f, 0x0020, CanonicalizeRangeHi },
{ 0x0450, 0x045f, 0x0050, CanonicalizeRangeHi },
- { 0x0460, 0x0481, 0x0000, CanonicalizeAlternatingAligned },
+ { 0x0460, 0x0461, 0x0000, CanonicalizeAlternatingAligned },
+ { 0x0462, 0x0463, 0x0014, CanonicalizeSet },
+ { 0x0464, 0x0481, 0x0000, CanonicalizeAlternatingAligned },
{ 0x0482, 0x0489, 0x0000, CanonicalizeUnique },
{ 0x048a, 0x04bf, 0x0000, CanonicalizeAlternatingAligned },
{ 0x04c0, 0x04c0, 0x000f, CanonicalizeRangeLo },
@@ -308,16 +349,38 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = {
{ 0x10c7, 0x10c7, 0x1c60, CanonicalizeRangeLo },
{ 0x10c8, 0x10cc, 0x0000, CanonicalizeUnique },
{ 0x10cd, 0x10cd, 0x1c60, CanonicalizeRangeLo },
- { 0x10ce, 0x1d78, 0x0000, CanonicalizeUnique },
+ { 0x10ce, 0x10cf, 0x0000, CanonicalizeUnique },
+ { 0x10d0, 0x10fa, 0x0bc0, CanonicalizeRangeLo },
+ { 0x10fb, 0x10fc, 0x0000, CanonicalizeUnique },
+ { 0x10fd, 0x10ff, 0x0bc0, CanonicalizeRangeLo },
+ { 0x1100, 0x139f, 0x0000, CanonicalizeUnique },
+ { 0x13a0, 0x13ef, 0x97d0, CanonicalizeRangeLo },
+ { 0x13f0, 0x13f5, 0x0008, CanonicalizeRangeLo },
+ { 0x13f6, 0x13f7, 0x0000, CanonicalizeUnique },
+ { 0x13f8, 0x13fd, 0x0008, CanonicalizeRangeHi },
+ { 0x13fe, 0x1c7f, 0x0000, CanonicalizeUnique },
+ { 0x1c80, 0x1c80, 0x000e, CanonicalizeSet },
+ { 0x1c81, 0x1c81, 0x000f, CanonicalizeSet },
+ { 0x1c82, 0x1c82, 0x0010, CanonicalizeSet },
+ { 0x1c83, 0x1c83, 0x0011, CanonicalizeSet },
+ { 0x1c84, 0x1c85, 0x0012, CanonicalizeSet },
+ { 0x1c86, 0x1c86, 0x0013, CanonicalizeSet },
+ { 0x1c87, 0x1c87, 0x0014, CanonicalizeSet },
+ { 0x1c88, 0x1c88, 0x0016, CanonicalizeSet },
+ { 0x1c89, 0x1c8f, 0x0000, CanonicalizeUnique },
+ { 0x1c90, 0x1cba, 0x0bc0, CanonicalizeRangeHi },
+ { 0x1cbb, 0x1cbc, 0x0000, CanonicalizeUnique },
+ { 0x1cbd, 0x1cbf, 0x0bc0, CanonicalizeRangeHi },
+ { 0x1cc0, 0x1d78, 0x0000, CanonicalizeUnique },
{ 0x1d79, 0x1d79, 0x8a04, CanonicalizeRangeLo },
{ 0x1d7a, 0x1d7c, 0x0000, CanonicalizeUnique },
{ 0x1d7d, 0x1d7d, 0x0ee6, CanonicalizeRangeLo },
{ 0x1d7e, 0x1dff, 0x0000, CanonicalizeUnique },
{ 0x1e00, 0x1e5f, 0x0000, CanonicalizeAlternatingAligned },
- { 0x1e60, 0x1e61, 0x000e, CanonicalizeSet },
+ { 0x1e60, 0x1e61, 0x0015, CanonicalizeSet },
{ 0x1e62, 0x1e95, 0x0000, CanonicalizeAlternatingAligned },
{ 0x1e96, 0x1e9a, 0x0000, CanonicalizeUnique },
- { 0x1e9b, 0x1e9b, 0x000e, CanonicalizeSet },
+ { 0x1e9b, 0x1e9b, 0x0015, CanonicalizeSet },
{ 0x1e9c, 0x1e9f, 0x0000, CanonicalizeUnique },
{ 0x1ea0, 0x1eff, 0x0000, CanonicalizeAlternatingAligned },
{ 0x1f00, 0x1f07, 0x0008, CanonicalizeRangeLo },
@@ -428,7 +491,9 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = {
{ 0x2d28, 0x2d2c, 0x0000, CanonicalizeUnique },
{ 0x2d2d, 0x2d2d, 0x1c60, CanonicalizeRangeHi },
{ 0x2d2e, 0xa63f, 0x0000, CanonicalizeUnique },
- { 0xa640, 0xa66d, 0x0000, CanonicalizeAlternatingAligned },
+ { 0xa640, 0xa649, 0x0000, CanonicalizeAlternatingAligned },
+ { 0xa64a, 0xa64b, 0x0016, CanonicalizeSet },
+ { 0xa64c, 0xa66d, 0x0000, CanonicalizeAlternatingAligned },
{ 0xa66e, 0xa67f, 0x0000, CanonicalizeUnique },
{ 0xa680, 0xa69b, 0x0000, CanonicalizeAlternatingAligned },
{ 0xa69c, 0xa721, 0x0000, CanonicalizeUnique },
@@ -450,15 +515,42 @@ const CanonicalizationRange ucs2RangeInfo[UCS2_CANONICALIZATION_RANGES] = {
{ 0xa7ab, 0xa7ab, 0xa54f, CanonicalizeRangeHi },
{ 0xa7ac, 0xa7ac, 0xa54b, CanonicalizeRangeHi },
{ 0xa7ad, 0xa7ad, 0xa541, CanonicalizeRangeHi },
- { 0xa7ae, 0xa7af, 0x0000, CanonicalizeUnique },
+ { 0xa7ae, 0xa7ae, 0xa544, CanonicalizeRangeHi },
+ { 0xa7af, 0xa7af, 0x0000, CanonicalizeUnique },
{ 0xa7b0, 0xa7b0, 0xa512, CanonicalizeRangeHi },
{ 0xa7b1, 0xa7b1, 0xa52a, CanonicalizeRangeHi },
- { 0xa7b2, 0xff20, 0x0000, CanonicalizeUnique },
+ { 0xa7b2, 0xa7b2, 0xa515, CanonicalizeRangeHi },
+ { 0xa7b3, 0xa7b3, 0x03a0, CanonicalizeRangeLo },
+ { 0xa7b4, 0xa7b9, 0x0000, CanonicalizeAlternatingAligned },
+ { 0xa7ba, 0xab52, 0x0000, CanonicalizeUnique },
+ { 0xab53, 0xab53, 0x03a0, CanonicalizeRangeHi },
+ { 0xab54, 0xab6f, 0x0000, CanonicalizeUnique },
+ { 0xab70, 0xabbf, 0x97d0, CanonicalizeRangeHi },
+ { 0xabc0, 0xff20, 0x0000, CanonicalizeUnique },
{ 0xff21, 0xff3a, 0x0020, CanonicalizeRangeLo },
{ 0xff3b, 0xff40, 0x0000, CanonicalizeUnique },
{ 0xff41, 0xff5a, 0x0020, CanonicalizeRangeHi },
{ 0xff5b, 0xffff, 0x0000, CanonicalizeUnique },
};
+const uint16_t canonicalTableLChar[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0x39c, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xf7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0x178
+};
+
} } // JSC::Yarr
diff --git a/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js b/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js
index dc578cfece..b92d8bdd4f 100644
--- a/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js
+++ b/src/3rdparty/masm/yarr/YarrCanonicalizeUCS2.js
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012, 2016 Apple Inc. All rights reserved.
+ * Copyright (C) 2012-2018 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -27,7 +27,7 @@ function printHeader()
{
var copyright = (
"/*" + "\n" +
- " * Copyright (C) 2012-2013, 2015-2016 Apple Inc. All rights reserved." + "\n" +
+ " * Copyright (C) 2012-2018 Apple Inc. All rights reserved." + "\n" +
" *" + "\n" +
" * Redistribution and use in source and binary forms, with or without" + "\n" +
" * modification, are permitted provided that the following conditions" + "\n" +
@@ -183,6 +183,23 @@ function createTables(prefix, maxValue, canonicalGroups)
}
print("};");
print();
+ // Create canonical table for LChar domain
+ let line = "const uint16_t canonicalTableLChar[256] = {";
+ for (let i = 0; i < 256; i++) {
+ if (!(i % 16)) {
+ print(line);
+ line = " ";
+ }
+ let canonicalChar = canonicalize(i);
+ line = line + (canonicalChar < 16 ? "0x0" : "0x") + canonicalChar.toString(16);
+ if ((i % 16) != 15)
+ line += ", ";
+ else if (i != 255)
+ line += ",";
+ }
+ print(line);
+ print("};");
+ print();
}
printHeader();
diff --git a/src/3rdparty/masm/yarr/YarrErrorCode.h b/src/3rdparty/masm/yarr/YarrErrorCode.h
index 48f2bb7900..3f06a6bff1 100644
--- a/src/3rdparty/masm/yarr/YarrErrorCode.h
+++ b/src/3rdparty/masm/yarr/YarrErrorCode.h
@@ -60,6 +60,13 @@ inline bool hasError(ErrorCode errorCode)
{
return errorCode != ErrorCode::NoError;
}
+
+inline bool hasHardError(ErrorCode errorCode)
+{
+ // TooManyDisjunctions means that we ran out stack compiling.
+ // All other errors are due to problems in the expression.
+ return hasError(errorCode) && errorCode != ErrorCode::TooManyDisjunctions;
+}
JS_EXPORT_PRIVATE JSObject* errorToThrow(ExecState*, ErrorCode);
} } // namespace JSC::Yarr
diff --git a/src/3rdparty/masm/yarr/YarrInterpreter.cpp b/src/3rdparty/masm/yarr/YarrInterpreter.cpp
index 4d3652fcbc..cdcd16af64 100644
--- a/src/3rdparty/masm/yarr/YarrInterpreter.cpp
+++ b/src/3rdparty/masm/yarr/YarrInterpreter.cpp
@@ -32,12 +32,12 @@
#include "Yarr.h"
#include "YarrCanonicalize.h"
#include <wtf/BumpPointerAllocator.h>
+#include <wtf/CheckedArithmetic.h>
#include <wtf/DataLog.h>
+#include <wtf/StdLibExtras.h>
#include <wtf/text/CString.h>
#include <wtf/text/WTFString.h>
-using namespace WTF;
-
namespace JSC { namespace Yarr {
template<typename CharType>
@@ -67,17 +67,23 @@ public:
struct DisjunctionContext
{
- DisjunctionContext()
- : term(0)
- {
- }
+ DisjunctionContext() = default;
void* operator new(size_t, void* where)
{
return where;
}
- int term;
+ static size_t allocationSize(unsigned numberOfFrames)
+ {
+ static_assert(alignof(DisjunctionContext) <= sizeof(void*), "");
+ size_t rawSize = (sizeof(DisjunctionContext) - sizeof(uintptr_t) + Checked<size_t>(numberOfFrames) * sizeof(uintptr_t)).unsafeGet();
+ size_t roundedSize = WTF::roundUpToMultipleOf<sizeof(void*)>(rawSize);
+ RELEASE_ASSERT(roundedSize >= rawSize);
+ return roundedSize;
+ }
+
+ int term { 0 };
unsigned matchBegin;
unsigned matchEnd;
uintptr_t frame[1];
@@ -85,7 +91,7 @@ public:
DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
{
- size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
+ size_t size = DisjunctionContext::allocationSize(disjunction->m_frameSize);
allocatorPool = allocatorPool->ensureCapacity(size);
RELEASE_ASSERT(allocatorPool);
return new (allocatorPool->alloc(size)) DisjunctionContext();
@@ -99,7 +105,6 @@ public:
struct ParenthesesDisjunctionContext
{
ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
- : next(0)
{
unsigned firstSubpatternId = term.atom.subpatternId;
unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
@@ -125,16 +130,25 @@ public:
DisjunctionContext* getDisjunctionContext(ByteTerm& term)
{
- return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
+ return bitwise_cast<DisjunctionContext*>(bitwise_cast<uintptr_t>(this) + allocationSize(term.atom.parenthesesDisjunction->m_numSubpatterns));
}
- ParenthesesDisjunctionContext* next;
+ static size_t allocationSize(unsigned numberOfSubpatterns)
+ {
+ static_assert(alignof(ParenthesesDisjunctionContext) <= sizeof(void*), "");
+ size_t rawSize = (sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (Checked<size_t>(numberOfSubpatterns) * 2U) * sizeof(unsigned)).unsafeGet();
+ size_t roundedSize = WTF::roundUpToMultipleOf<sizeof(void*)>(rawSize);
+ RELEASE_ASSERT(roundedSize >= rawSize);
+ return roundedSize;
+ }
+
+ ParenthesesDisjunctionContext* next { nullptr };
unsigned subpatternBackup[1];
};
ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
{
- size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + static_cast<size_t>(disjunction->m_frameSize) * sizeof(uintptr_t);
+ size_t size = (Checked<size_t>(ParenthesesDisjunctionContext::allocationSize(term.atom.parenthesesDisjunction->m_numSubpatterns)) + DisjunctionContext::allocationSize(disjunction->m_frameSize)).unsafeGet();
allocatorPool = allocatorPool->ensureCapacity(size);
RELEASE_ASSERT(allocatorPool);
return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
@@ -1630,7 +1644,6 @@ public:
, unicode(pattern->unicode())
, output(output)
, input(input, start, length, pattern->unicode())
- , allocatorPool(0)
, startOffset(start)
, remainingMatchCount(matchLimit)
{
@@ -1641,7 +1654,7 @@ private:
bool unicode;
unsigned* output;
InputStream input;
- BumpPointerPool* allocatorPool;
+ WTF::BumpPointerPool* allocatorPool { nullptr };
unsigned startOffset;
unsigned remainingMatchCount;
};
@@ -1740,7 +1753,7 @@ public:
void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
{
- unsigned beginTerm = m_bodyDisjunction->terms.size();
+ int beginTerm = m_bodyDisjunction->terms.size();
m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
diff --git a/src/3rdparty/masm/yarr/YarrJIT.cpp b/src/3rdparty/masm/yarr/YarrJIT.cpp
index da65b772f7..1c8138c66e 100644
--- a/src/3rdparty/masm/yarr/YarrJIT.cpp
+++ b/src/3rdparty/masm/yarr/YarrJIT.cpp
@@ -37,15 +37,12 @@
#if ENABLE(YARR_JIT)
-using namespace WTF;
-
namespace JSC { namespace Yarr {
template<YarrJITCompileMode compileMode>
class YarrGenerator : private DefaultMacroAssembler {
- friend void jitCompile(VM*, YarrCodeBlock&, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
-#if CPU(ARM)
+#if CPU(ARM_THUMB2)
static const RegisterID input = ARMRegisters::r0;
static const RegisterID index = ARMRegisters::r1;
static const RegisterID length = ARMRegisters::r2;
@@ -477,6 +474,12 @@ class YarrGenerator : private DefaultMacroAssembler {
return branch32(BelowOrEqual, index, length);
}
+ Jump checkNotEnoughInput(RegisterID additionalAmount)
+ {
+ add32(index, additionalAmount);
+ return branch32(Above, additionalAmount, length);
+ }
+
Jump checkInput()
{
return branch32(BelowOrEqual, index, length);
@@ -559,6 +562,16 @@ class YarrGenerator : private DefaultMacroAssembler {
}
#endif
+ void readCharacterDontDecodeSurrogates(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
+ {
+ BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
+
+ if (m_charSize == Char8)
+ load8(address, resultReg);
+ else
+ load16Unaligned(address, resultReg);
+ }
+
void readCharacter(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
{
BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
@@ -809,16 +822,16 @@ class YarrGenerator : private DefaultMacroAssembler {
// The operation, as a YarrOpCode, and also a reference to the PatternTerm.
YarrOpCode m_op;
- PatternTerm* m_term;
+ PatternTerm* m_term = nullptr;
// For alternatives, this holds the PatternAlternative and doubly linked
// references to this alternative's siblings. In the case of the
// OpBodyAlternativeEnd node at the end of a section of repeating nodes,
// m_nextOp will reference the OpBodyAlternativeBegin node of the first
// repeating alternative.
- PatternAlternative* m_alternative;
- size_t m_previousOp;
- size_t m_nextOp;
+ PatternAlternative* m_alternative = nullptr;
+ size_t m_previousOp = 0;
+ size_t m_nextOp = 0;
// Used to record a set of Jumps out of the generated code, typically
// used for jumps out to backtracking code, and a single reentry back
@@ -1119,6 +1132,228 @@ class YarrGenerator : private DefaultMacroAssembler {
backtrackTermDefault(opIndex);
}
+#if ENABLE(YARR_JIT_BACKREFERENCES)
+ void matchBackreference(size_t opIndex, JumpList& characterMatchFails, RegisterID character, RegisterID patternIndex, RegisterID patternCharacter)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+ unsigned subpatternId = term->backReferenceSubpatternId;
+
+ Label loop(this);
+
+ readCharacterDontDecodeSurrogates(0, patternCharacter, patternIndex);
+ readCharacterDontDecodeSurrogates(m_checkedOffset - term->inputPosition, character);
+
+ if (!m_pattern.ignoreCase())
+ characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
+ else {
+ Jump charactersMatch = branch32(Equal, character, patternCharacter);
+ ExtendedAddress characterTableEntry(character, reinterpret_cast<intptr_t>(&canonicalTableLChar));
+ load16(characterTableEntry, character);
+ ExtendedAddress patternTableEntry(patternCharacter, reinterpret_cast<intptr_t>(&canonicalTableLChar));
+ load16(patternTableEntry, patternCharacter);
+ characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
+ charactersMatch.link(this);
+ }
+
+
+ add32(TrustedImm32(1), index);
+ add32(TrustedImm32(1), patternIndex);
+
+ branch32(NotEqual, patternIndex, Address(output, ((subpatternId << 1) + 1) * sizeof(int))).linkTo(loop, this);
+ }
+
+ void generateBackReference(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ if (m_pattern.ignoreCase() && m_charSize != Char8) {
+ m_failureReason = JITFailureReason::BackReference;
+ return;
+ }
+
+ unsigned subpatternId = term->backReferenceSubpatternId;
+ unsigned parenthesesFrameLocation = term->frameLocation;
+
+ const RegisterID characterOrTemp = regT0;
+ const RegisterID patternIndex = regT1;
+ const RegisterID patternTemp = regT2;
+
+ storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
+ if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1)
+ storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+
+ JumpList matches;
+
+ if (term->quantityType != QuantifierNonGreedy) {
+ load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
+ load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
+
+ // An empty match is successful without consuming characters
+ if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1) {
+ matches.append(branch32(Equal, TrustedImm32(-1), patternIndex));
+ matches.append(branch32(Equal, patternIndex, patternTemp));
+ } else {
+ Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
+ Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
+ zeroLengthMatch.link(this);
+ storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ matches.append(jump());
+ tryNonZeroMatch.link(this);
+ }
+ }
+
+ switch (term->quantityType) {
+ case QuantifierFixedCount: {
+ Label outerLoop(this);
+
+ // PatternTemp should contain pattern end index at this point
+ sub32(patternIndex, patternTemp);
+ if (m_checkedOffset - term->inputPosition)
+ sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
+ op.m_jumps.append(checkNotEnoughInput(patternTemp));
+
+ matchBackreference(opIndex, op.m_jumps, characterOrTemp, patternIndex, patternTemp);
+
+ if (term->quantityMaxCount != 1) {
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
+ add32(TrustedImm32(1), characterOrTemp);
+ storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
+ load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
+ load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
+ jump(outerLoop);
+ }
+ matches.link(this);
+ break;
+ }
+
+ case QuantifierGreedy: {
+ JumpList incompleteMatches;
+
+ Label outerLoop(this);
+
+ // PatternTemp should contain pattern end index at this point
+ sub32(patternIndex, patternTemp);
+ if (m_checkedOffset - term->inputPosition)
+ sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
+ matches.append(checkNotEnoughInput(patternTemp));
+
+ matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
+
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
+ add32(TrustedImm32(1), characterOrTemp);
+ storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ if (term->quantityMaxCount != quantifyInfinite)
+ matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
+ load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
+ load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
+
+ // Store current index in frame for restoring after a partial match
+ storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
+ jump(outerLoop);
+
+ incompleteMatches.link(this);
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
+
+ matches.link(this);
+ op.m_reentry = label();
+ break;
+ }
+
+ case QuantifierNonGreedy: {
+ JumpList incompleteMatches;
+
+ matches.append(jump());
+
+ op.m_reentry = label();
+
+ load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
+ load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
+
+ // An empty match is successful without consuming characters
+ Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
+ Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
+ zeroLengthMatch.link(this);
+ storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ matches.append(jump());
+ tryNonZeroMatch.link(this);
+
+ // Check if we have input remaining to match
+ sub32(patternIndex, patternTemp);
+ if (m_checkedOffset - term->inputPosition)
+ sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
+ matches.append(checkNotEnoughInput(patternTemp));
+
+ storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
+
+ matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
+
+ matches.append(jump());
+
+ incompleteMatches.link(this);
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
+
+ matches.link(this);
+ break;
+ }
+ }
+ }
+ void backtrackBackReference(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ unsigned subpatternId = term->backReferenceSubpatternId;
+
+ m_backtrackingState.link(this);
+ op.m_jumps.link(this);
+
+ JumpList failures;
+
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ switch (term->quantityType) {
+ case QuantifierFixedCount:
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
+ break;
+
+ case QuantifierGreedy: {
+ const RegisterID matchAmount = regT0;
+ const RegisterID patternStartIndex = regT1;
+ const RegisterID patternEndIndexOrLen = regT2;
+
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
+ failures.append(branchTest32(Zero, matchAmount));
+
+ load32(Address(output, (subpatternId << 1) * sizeof(int)), patternStartIndex);
+ load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternEndIndexOrLen);
+ sub32(patternStartIndex, patternEndIndexOrLen);
+ sub32(patternEndIndexOrLen, index);
+
+ sub32(TrustedImm32(1), matchAmount);
+ storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ jump(op.m_reentry);
+ break;
+ }
+
+ case QuantifierNonGreedy: {
+ const RegisterID matchAmount = regT0;
+
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
+ if (term->quantityMaxCount != quantifyInfinite)
+ failures.append(branch32(AboveOrEqual, Imm32(term->quantityMaxCount.unsafeGet()), matchAmount));
+ add32(TrustedImm32(1), matchAmount);
+ storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ jump(op.m_reentry);
+ break;
+ }
+ }
+ failures.link(this);
+ m_backtrackingState.fallthrough();
+ }
+#endif
+
void generatePatternCharacterOnce(size_t opIndex)
{
YarrOp& op = m_ops[opIndex];
@@ -1141,12 +1376,16 @@ class YarrGenerator : private DefaultMacroAssembler {
}
const RegisterID character = regT0;
+#if CPU(X86_64) || CPU(ARM64)
+ unsigned maxCharactersAtOnce = m_charSize == Char8 ? 8 : 4;
+#else
unsigned maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
- unsigned ignoreCaseMask = 0;
+#endif
+ uint64_t ignoreCaseMask = 0;
#if CPU(BIG_ENDIAN)
- int allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
+ uint64_t allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
#else
- int allCharacters = ch;
+ uint64_t allCharacters = ch;
#endif
unsigned numberCharacters;
unsigned startTermPosition = term->inputPosition;
@@ -1155,16 +1394,19 @@ class YarrGenerator : private DefaultMacroAssembler {
// upper & lower case representations are converted to a character class.
ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
- if (m_pattern.ignoreCase() && isASCIIAlpha(ch))
+ if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
#if CPU(BIG_ENDIAN)
ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
#else
ignoreCaseMask |= 32;
#endif
+ }
for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
PatternTerm* nextTerm = nextOp->m_term;
-
+
+ // YarrJIT handles decoded surrogate pair as one character if unicode flag is enabled.
+ // Note that the numberCharacters become 1 while the width of the pattern character becomes 32bit in this case.
if (nextTerm->type != PatternTerm::TypePatternCharacter
|| nextTerm->quantityType != QuantifierFixedCount
|| nextTerm->quantityMaxCount != 1
@@ -1192,49 +1434,132 @@ class YarrGenerator : private DefaultMacroAssembler {
// upper & lower case representations are converted to a character class.
ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter, m_canonicalMode));
- allCharacters |= (currentCharacter << shiftAmount);
+ allCharacters |= (static_cast<uint64_t>(currentCharacter) << shiftAmount);
if ((m_pattern.ignoreCase()) && (isASCIIAlpha(currentCharacter)))
- ignoreCaseMask |= 32 << shiftAmount;
+ ignoreCaseMask |= 32ULL << shiftAmount;
}
+ if (m_decodeSurrogatePairs)
+ op.m_jumps.append(jumpIfNoAvailableInput());
+
if (m_charSize == Char8) {
+ auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
+ op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
+ };
+
+ auto check2 = [&] (Checked<unsigned> offset, uint16_t characters, uint16_t mask) {
+ load16Unaligned(negativeOffsetIndexedAddress(offset, character), character);
+ if (mask)
+ or32(Imm32(mask), character);
+ op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
+ };
+
+ auto check4 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
+ if (mask) {
+ load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
+ if (mask)
+ or32(Imm32(mask), character);
+ op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
+ return;
+ }
+ op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
+ };
+
+#if CPU(X86_64) || CPU(ARM64)
+ auto check8 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
+ load64(negativeOffsetIndexedAddress(offset, character), character);
+ if (mask)
+ or64(TrustedImm64(mask), character);
+ op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
+ };
+#endif
+
switch (numberCharacters) {
case 1:
- op.m_jumps.append(jumpIfCharNotEquals(ch, m_checkedOffset - startTermPosition, character));
+ // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
+ check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
return;
case 2: {
- load16Unaligned(negativeOffsetIndexedAddress(m_checkedOffset - startTermPosition, character), character);
- break;
+ check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
+ return;
}
case 3: {
- load16Unaligned(negativeOffsetIndexedAddress(m_checkedOffset - startTermPosition, character), character);
- if (ignoreCaseMask)
- or32(Imm32(ignoreCaseMask), character);
- op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask)));
- op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, m_checkedOffset - startTermPosition - 2, character));
+ check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
+ check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 16) & 0xff);
return;
}
case 4: {
- load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(m_checkedOffset- startTermPosition, character), character);
- break;
+ check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+ return;
+ }
+#if CPU(X86_64) || CPU(ARM64)
+ case 5: {
+ check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+ check1(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xff);
+ return;
+ }
+ case 6: {
+ check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+ check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
+ return;
+ }
+ case 7: {
+ check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+ check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
+ check1(m_checkedOffset - startTermPosition - 6, (allCharacters >> 48) & 0xff);
+ return;
+ }
+ case 8: {
+ check8(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
+ return;
}
+#endif
}
} else {
+ auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
+ op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
+ };
+
+ auto check2 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
+ if (mask) {
+ load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
+ if (mask)
+ or32(Imm32(mask), character);
+ op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
+ return;
+ }
+ op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
+ };
+
+#if CPU(X86_64) || CPU(ARM64)
+ auto check4 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
+ load64(negativeOffsetIndexedAddress(offset, character), character);
+ if (mask)
+ or64(TrustedImm64(mask), character);
+ op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
+ };
+#endif
+
switch (numberCharacters) {
case 1:
- op.m_jumps.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
+ // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
+ check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
return;
case 2:
- load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(m_checkedOffset- term->inputPosition, character), character);
- break;
+ check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+ return;
+#if CPU(X86_64) || CPU(ARM64)
+ case 3:
+ check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+ check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 32) & 0xffff);
+ return;
+ case 4:
+ check4(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
+ return;
+#endif
}
}
-
- if (ignoreCaseMask)
- or32(Imm32(ignoreCaseMask), character);
- op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask)));
- return;
}
void backtrackPatternCharacterOnce(size_t opIndex)
{
@@ -1250,6 +1575,9 @@ class YarrGenerator : private DefaultMacroAssembler {
const RegisterID character = regT0;
const RegisterID countRegister = regT1;
+ if (m_decodeSurrogatePairs)
+ op.m_jumps.append(jumpIfNoAvailableInput());
+
move(index, countRegister);
Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
scaledMaxCount *= U_IS_BMP(ch) ? 1 : 2;
@@ -1403,8 +1731,10 @@ class YarrGenerator : private DefaultMacroAssembler {
const RegisterID character = regT0;
- if (m_decodeSurrogatePairs)
+ if (m_decodeSurrogatePairs) {
+ op.m_jumps.append(jumpIfNoAvailableInput());
storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
+ }
JumpList matchDest;
readCharacter(m_checkedOffset - term->inputPosition, character);
@@ -1451,6 +1781,9 @@ class YarrGenerator : private DefaultMacroAssembler {
const RegisterID character = regT0;
const RegisterID countRegister = regT1;
+ if (m_decodeSurrogatePairs)
+ op.m_jumps.append(jumpIfNoAvailableInput());
+
move(index, countRegister);
sub32(Imm32(term->quantityMaxCount.unsafeGet()), countRegister);
@@ -1780,13 +2113,19 @@ class YarrGenerator : private DefaultMacroAssembler {
break;
case PatternTerm::TypeForwardReference:
+ m_failureReason = JITFailureReason::ForwardReference;
break;
case PatternTerm::TypeParenthesesSubpattern:
case PatternTerm::TypeParentheticalAssertion:
RELEASE_ASSERT_NOT_REACHED();
+
case PatternTerm::TypeBackReference:
+#if ENABLE(YARR_JIT_BACKREFERENCES)
+ generateBackReference(opIndex);
+#else
m_failureReason = JITFailureReason::BackReference;
+#endif
break;
case PatternTerm::TypeDotStarEnclosure:
generateDotStarEnclosure(opIndex);
@@ -1846,18 +2185,23 @@ class YarrGenerator : private DefaultMacroAssembler {
break;
case PatternTerm::TypeForwardReference:
+ m_failureReason = JITFailureReason::ForwardReference;
break;
case PatternTerm::TypeParenthesesSubpattern:
case PatternTerm::TypeParentheticalAssertion:
RELEASE_ASSERT_NOT_REACHED();
- case PatternTerm::TypeDotStarEnclosure:
- backtrackDotStarEnclosure(opIndex);
- break;
-
case PatternTerm::TypeBackReference:
+#if ENABLE(YARR_JIT_BACKREFERENCES)
+ backtrackBackReference(opIndex);
+#else
m_failureReason = JITFailureReason::BackReference;
+#endif
+ break;
+
+ case PatternTerm::TypeDotStarEnclosure:
+ backtrackDotStarEnclosure(opIndex);
break;
}
}
@@ -2157,7 +2501,7 @@ class YarrGenerator : private DefaultMacroAssembler {
}
// If the parentheses are quantified Greedy then add a label to jump back
- // to if get a failed match from after the parentheses. For NonGreedy
+ // to if we get a failed match from after the parentheses. For NonGreedy
// parentheses, link the jump from before the subpattern to here.
if (term->quantityType == QuantifierGreedy)
op.m_reentry = label();
@@ -2221,11 +2565,11 @@ class YarrGenerator : private DefaultMacroAssembler {
// match within the parentheses, or the second having skipped over them.
// - To check for empty matches, which must be rejected.
//
- // At the head of a NonGreedy set of parentheses we'll immediately set the
- // value on the stack to -1 (indicating a match skipping the subpattern),
+ // At the head of a NonGreedy set of parentheses we'll immediately set 'begin'
+ // in the backtrack info to -1 (indicating a match skipping the subpattern),
// and plant a jump to the end. We'll also plant a label to backtrack to
- // to reenter the subpattern later, with a store to set up index on the
- // second iteration.
+ // to reenter the subpattern later, with a store to set 'begin' to current index
+ // on the second iteration.
//
// FIXME: for capturing parens, could use the index in the capture array?
if (term->quantityType == QuantifierGreedy || term->quantityType == QuantifierNonGreedy) {
@@ -2312,7 +2656,7 @@ class YarrGenerator : private DefaultMacroAssembler {
}
// If the parentheses are quantified Greedy then add a label to jump back
- // to if get a failed match from after the parentheses. For NonGreedy
+ // to if we get a failed match from after the parentheses. For NonGreedy
// parentheses, link the jump from before the subpattern to here.
if (term->quantityType == QuantifierGreedy) {
if (term->quantityMaxCount != quantifyInfinite)
@@ -2324,6 +2668,7 @@ class YarrGenerator : private DefaultMacroAssembler {
} else if (term->quantityType == QuantifierNonGreedy) {
YarrOp& beginOp = m_ops[op.m_previousOp];
beginOp.m_jumps.link(this);
+ op.m_reentry = label();
}
#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
RELEASE_ASSERT_NOT_REACHED();
@@ -2385,6 +2730,7 @@ class YarrGenerator : private DefaultMacroAssembler {
do {
--opIndex;
+
YarrOp& op = m_ops[opIndex];
switch (op.m_op) {
@@ -2881,32 +3227,32 @@ class YarrGenerator : private DefaultMacroAssembler {
if (term->quantityType != QuantifierFixedCount) {
m_backtrackingState.link(this);
- if (term->quantityType == QuantifierGreedy) {
- RegisterID currParenContextReg = regT0;
- RegisterID newParenContextReg = regT1;
+ RegisterID currParenContextReg = regT0;
+ RegisterID newParenContextReg = regT1;
- loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
- restoreParenContext(currParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
+ restoreParenContext(currParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
- freeParenContext(currParenContextReg, newParenContextReg);
- storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
- const RegisterID countTemporary = regT0;
- loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
- Jump zeroLengthMatch = branchTest32(Zero, countTemporary);
+ freeParenContext(currParenContextReg, newParenContextReg);
+ storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
- sub32(TrustedImm32(1), countTemporary);
- storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
+ const RegisterID countTemporary = regT0;
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
+ Jump zeroLengthMatch = branchTest32(Zero, countTemporary);
- jump(m_ops[op.m_nextOp].m_reentry);
+ sub32(TrustedImm32(1), countTemporary);
+ storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
- zeroLengthMatch.link(this);
+ jump(m_ops[op.m_nextOp].m_reentry);
- // Clear the flag in the stackframe indicating we didn't run through the subpattern.
- storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
+ zeroLengthMatch.link(this);
+ // Clear the flag in the stackframe indicating we didn't run through the subpattern.
+ storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
+
+ if (term->quantityType == QuantifierGreedy)
jump(m_ops[op.m_nextOp].m_reentry);
- }
// If Greedy, jump to the end.
if (term->quantityType == QuantifierGreedy) {
@@ -2929,13 +3275,14 @@ class YarrGenerator : private DefaultMacroAssembler {
if (term->quantityType != QuantifierFixedCount) {
m_backtrackingState.link(this);
- // Check whether we should backtrack back into the parentheses, or if we
- // are currently in a state where we had skipped over the subpattern
- // (in which case the flag value on the stack will be -1).
unsigned parenthesesFrameLocation = term->frameLocation;
- Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
if (term->quantityType == QuantifierGreedy) {
+ // Check whether we should backtrack back into the parentheses, or if we
+ // are currently in a state where we had skipped over the subpattern
+ // (in which case the flag value on the stack will be -1).
+ Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
+
// For Greedy parentheses, we skip after having already tried going
// through the subpattern, so if we get here we're done.
YarrOp& beginOp = m_ops[op.m_previousOp];
@@ -2946,8 +3293,25 @@ class YarrGenerator : private DefaultMacroAssembler {
// next. Jump back to the start of the parentheses in the forwards
// matching path.
ASSERT(term->quantityType == QuantifierNonGreedy);
+
+ const RegisterID beginTemporary = regT0;
+ const RegisterID countTemporary = regT1;
+
YarrOp& beginOp = m_ops[op.m_previousOp];
- hadSkipped.linkTo(beginOp.m_reentry, this);
+
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex(), beginTemporary);
+ branch32(Equal, beginTemporary, TrustedImm32(-1)).linkTo(beginOp.m_reentry, this);
+
+ JumpList exceededMatchLimit;
+
+ if (term->quantityMaxCount != quantifyInfinite) {
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
+ exceededMatchLimit.append(branch32(AboveOrEqual, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())));
+ }
+
+ branch32(Above, index, beginTemporary).linkTo(beginOp.m_reentry, this);
+
+ exceededMatchLimit.link(this);
}
m_backtrackingState.fallthrough();
@@ -3021,7 +3385,7 @@ class YarrGenerator : private DefaultMacroAssembler {
// the parentheses.
// Supported types of parentheses are 'Once' (quantityMaxCount == 1),
// 'Terminal' (non-capturing parentheses quantified as greedy
- // and infinite), and 0 based greedy quantified parentheses.
+ // and infinite), and 0 based greedy / non-greedy quantified parentheses.
// Alternatives will use the 'Simple' set of ops if either the
// subpattern is terminal (in which case we will never need to
// backtrack), or if the subpattern only contains one alternative.
@@ -3043,7 +3407,9 @@ class YarrGenerator : private DefaultMacroAssembler {
if (term->quantityMinCount && term->quantityMinCount != term->quantityMaxCount) {
m_failureReason = JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum;
return;
- } if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) {
+ }
+
+ if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) {
// Select the 'Once' nodes.
parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
@@ -3060,10 +3426,10 @@ class YarrGenerator : private DefaultMacroAssembler {
parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
} else {
#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
- // We only handle generic parenthesis with greedy counts.
- if (term->quantityType != QuantifierGreedy) {
+ // We only handle generic parenthesis with non-fixed counts.
+ if (term->quantityType == QuantifierFixedCount) {
// This subpattern is not supported by the JIT.
- m_failureReason = JITFailureReason::NonGreedyParenthesizedSubpattern;
+ m_failureReason = JITFailureReason::FixedCountParenthesizedSubpattern;
return;
}
@@ -3369,7 +3735,7 @@ class YarrGenerator : private DefaultMacroAssembler {
// The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
zeroExtend32ToPtr(index, index);
zeroExtend32ToPtr(length, length);
-#elif CPU(ARM)
+#elif CPU(ARM_THUMB2)
push(ARMRegisters::r4);
push(ARMRegisters::r5);
push(ARMRegisters::r6);
@@ -3422,7 +3788,7 @@ class YarrGenerator : private DefaultMacroAssembler {
#elif CPU(ARM64)
if (m_decodeSurrogatePairs)
popPair(framePointerRegister, linkRegister);
-#elif CPU(ARM)
+#elif CPU(ARM_THUMB2)
pop(ARMRegisters::r8);
pop(ARMRegisters::r6);
pop(ARMRegisters::r5);
@@ -3460,10 +3826,14 @@ public:
}
#endif
-#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
- if (m_containsNestedSubpatterns)
- codeBlock.setUsesPaternContextBuffer();
+ if (m_pattern.m_containsBackreferences
+#if ENABLE(YARR_JIT_BACKREFERENCES)
+ && (compileMode == MatchOnly || (m_pattern.ignoreCase() && m_charSize != Char8))
#endif
+ ) {
+ codeBlock.setFallBackWithFailureReason(JITFailureReason::BackReference);
+ return;
+ }
// We need to compile before generating code since we set flags based on compilation that
// are used during generation.
@@ -3473,7 +3843,12 @@ public:
codeBlock.setFallBackWithFailureReason(*m_failureReason);
return;
}
-
+
+#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
+ if (m_containsNestedSubpatterns)
+ codeBlock.setUsesPatternContextBuffer();
+#endif
+
generateEnter();
Jump hasInput = checkInput();
@@ -3618,7 +3993,10 @@ static void dumpCompileFailure(JITFailureReason failure)
dataLog("Can't JIT a pattern decoding surrogate pairs\n");
break;
case JITFailureReason::BackReference:
- dataLog("Can't JIT a pattern containing back references\n");
+ dataLog("Can't JIT some patterns containing back references\n");
+ break;
+ case JITFailureReason::ForwardReference:
+ dataLog("Can't JIT a pattern containing forward references\n");
break;
case JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum:
dataLog("Can't JIT a pattern containing a variable counted parenthesis with a non-zero minimum\n");
@@ -3626,8 +4004,8 @@ static void dumpCompileFailure(JITFailureReason failure)
case JITFailureReason::ParenthesizedSubpattern:
dataLog("Can't JIT a pattern containing parenthesized subpatterns\n");
break;
- case JITFailureReason::NonGreedyParenthesizedSubpattern:
- dataLog("Can't JIT a pattern containing non-greedy parenthesized subpatterns\n");
+ case JITFailureReason::FixedCountParenthesizedSubpattern:
+ dataLog("Can't JIT a pattern containing fixed count parenthesized subpatterns\n");
break;
case JITFailureReason::ExecutableMemoryAllocationFailure:
dataLog("Can't JIT because of failure of allocation of executable memory\n");
diff --git a/src/3rdparty/masm/yarr/YarrJIT.h b/src/3rdparty/masm/yarr/YarrJIT.h
index 35a0690f6e..c6410d3c44 100644
--- a/src/3rdparty/masm/yarr/YarrJIT.h
+++ b/src/3rdparty/masm/yarr/YarrJIT.h
@@ -54,9 +54,10 @@ namespace Yarr {
enum class JITFailureReason : uint8_t {
DecodeSurrogatePair,
BackReference,
+ ForwardReference,
VariableCountedParenthesisWithNonZeroMinimum,
ParenthesizedSubpattern,
- NonGreedyParenthesizedSubpattern,
+ FixedCountParenthesizedSubpattern,
ExecutableMemoryAllocationFailure,
};
@@ -107,7 +108,7 @@ public:
#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
bool usesPatternContextBuffer() { return m_usesPatternContextBuffer; }
- void setUsesPaternContextBuffer() { m_usesPatternContextBuffer = true; }
+ void setUsesPatternContextBuffer() { m_usesPatternContextBuffer = true; }
MatchResult execute(const LChar* input, unsigned start, unsigned length, int* output, void* freeParenContext, unsigned parenContextSize)
{
diff --git a/src/3rdparty/masm/yarr/YarrParser.h b/src/3rdparty/masm/yarr/YarrParser.h
index 3e5311f1fb..8032b39811 100644
--- a/src/3rdparty/masm/yarr/YarrParser.h
+++ b/src/3rdparty/masm/yarr/YarrParser.h
@@ -194,7 +194,9 @@ private:
// invoked with inCharacterClass set.
NO_RETURN_DUE_TO_ASSERT void assertionWordBoundary(bool) { RELEASE_ASSERT_NOT_REACHED(); }
NO_RETURN_DUE_TO_ASSERT void atomBackReference(unsigned) { RELEASE_ASSERT_NOT_REACHED(); }
- NO_RETURN_DUE_TO_ASSERT void atomNamedBackReference(String) { RELEASE_ASSERT_NOT_REACHED(); }
+ NO_RETURN_DUE_TO_ASSERT void atomNamedBackReference(const String&) { RELEASE_ASSERT_NOT_REACHED(); }
+ NO_RETURN_DUE_TO_ASSERT bool isValidNamedForwardReference(const String&) { RELEASE_ASSERT_NOT_REACHED(); return false; }
+ NO_RETURN_DUE_TO_ASSERT void atomNamedForwardReference(const String&) { RELEASE_ASSERT_NOT_REACHED(); }
private:
Delegate& m_delegate;
@@ -421,9 +423,16 @@ private:
if (!atEndOfPattern() && !inCharacterClass) {
if (consume() == '<') {
auto groupName = tryConsumeGroupName();
- if (groupName && m_captureGroupNames.contains(groupName.value())) {
- delegate.atomNamedBackReference(groupName.value());
- break;
+ if (groupName) {
+ if (m_captureGroupNames.contains(groupName.value())) {
+ delegate.atomNamedBackReference(groupName.value());
+ break;
+ }
+
+ if (delegate.isValidNamedForwardReference(groupName.value())) {
+ delegate.atomNamedForwardReference(groupName.value());
+ break;
+ }
}
if (m_isUnicode) {
m_errorCode = ErrorCode::InvalidBackreference;
@@ -1132,11 +1141,13 @@ private:
* void atomCharacterClassRange(UChar32 begin, UChar32 end)
* void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
* void atomCharacterClassEnd()
- * void atomParenthesesSubpatternBegin(bool capture = true, std::optional<String> groupName);
+ * void atomParenthesesSubpatternBegin(bool capture = true, Optional<String> groupName);
* void atomParentheticalAssertionBegin(bool invert = false);
* void atomParenthesesEnd();
* void atomBackReference(unsigned subpatternId);
- * void atomNamedBackReference(String subpatternName);
+ * void atomNamedBackReference(const String& subpatternName);
+ * bool isValidNamedForwardReference(const String& subpatternName);
+ * void atomNamedForwardReference(const String& subpatternName);
*
* void quantifyAtom(unsigned min, unsigned max, bool greedy);
*
diff --git a/src/3rdparty/masm/yarr/YarrPattern.cpp b/src/3rdparty/masm/yarr/YarrPattern.cpp
index ac66ea1b9a..9c1cdadf3f 100644
--- a/src/3rdparty/masm/yarr/YarrPattern.cpp
+++ b/src/3rdparty/masm/yarr/YarrPattern.cpp
@@ -33,12 +33,9 @@
#include "YarrParser.h"
#include <wtf/DataLog.h>
#include <wtf/Optional.h>
-//#include <wtf/Threading.h>
#include <wtf/Vector.h>
#include <wtf/text/WTFString.h>
-using namespace WTF;
-
namespace JSC { namespace Yarr {
#include "RegExpJitTables.h"
@@ -334,7 +331,7 @@ private:
ranges.insert(i, CharacterRange(lo, hi));
return;
}
- // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining
+ // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the beginning
// If the new range start at or before the end of the last range, then the overlap (if it starts one after the
// end of the last range they concatenate, which is just as good.
if (lo <= (ranges[i].end + 1)) {
@@ -446,9 +443,9 @@ public:
{
}
- void reset()
+ void resetForReparsing()
{
- m_pattern.reset();
+ m_pattern.resetForReparsing();
m_characterClassConstructor.reset();
auto body = std::make_unique<PatternDisjunction>();
@@ -456,7 +453,17 @@ public:
m_alternative = body->addNewAlternative();
m_pattern.m_disjunctions.append(WTFMove(body));
}
-
+
+ void saveUnmatchedNamedForwardReferences()
+ {
+ m_unmatchedNamedForwardReferences.shrink(0);
+
+ for (auto& entry : m_pattern.m_namedForwardReferences) {
+ if (!m_pattern.m_captureGroupNames.contains(entry))
+ m_unmatchedNamedForwardReferences.append(entry);
+ }
+ }
+
void assertionBOL()
{
if (!m_alternative->m_terms.size() && !m_invertParentheticalAssertion) {
@@ -666,12 +673,24 @@ public:
m_alternative->m_terms.append(PatternTerm(subpatternId));
}
- void atomNamedBackReference(String subpatternName)
+ void atomNamedBackReference(const String& subpatternName)
{
ASSERT(m_pattern.m_namedGroupToParenIndex.find(subpatternName) != m_pattern.m_namedGroupToParenIndex.end());
atomBackReference(m_pattern.m_namedGroupToParenIndex.get(subpatternName));
}
+ bool isValidNamedForwardReference(const String& subpatternName)
+ {
+ return !m_unmatchedNamedForwardReferences.contains(subpatternName);
+ }
+
+ void atomNamedForwardReference(const String& subpatternName)
+ {
+ if (!m_pattern.m_namedForwardReferences.contains(subpatternName))
+ m_pattern.m_namedForwardReferences.append(subpatternName);
+ m_alternative->m_terms.append(PatternTerm::ForwardReference());
+ }
+
// deep copy the argument disjunction. If filterStartsWithBOL is true,
// skip alternatives with m_startsWithBOL set true.
PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false)
@@ -1079,6 +1098,7 @@ private:
YarrPattern& m_pattern;
PatternAlternative* m_alternative;
CharacterClassConstructor m_characterClassConstructor;
+ Vector<String> m_unmatchedNamedForwardReferences;
void* m_stackLimit;
bool m_invertCharacterClass;
bool m_invertParentheticalAssertion { false };
@@ -1101,13 +1121,14 @@ ErrorCode YarrPattern::compile(const String& patternString, void* stackLimit)
// Quoting Netscape's "What's new in JavaScript 1.2",
// "Note: if the number of left parentheses is less than the number specified
// in \#, the \# is taken as an octal escape as described in the next row."
- if (containsIllegalBackReference()) {
+ if (containsIllegalBackReference() || containsIllegalNamedForwardReferences()) {
if (unicode())
return ErrorCode::InvalidBackreference;
unsigned numSubpatterns = m_numSubpatterns;
- constructor.reset();
+ constructor.saveUnmatchedNamedForwardReferences();
+ constructor.resetForReparsing();
ErrorCode error = parse(constructor, patternString, unicode(), numSubpatterns);
ASSERT_UNUSED(error, !hasError(error));
ASSERT(numSubpatterns == m_numSubpatterns);
@@ -1168,7 +1189,7 @@ void dumpCharacterClass(PrintStream& out, YarrPattern* pattern, CharacterClass*
else if (characterClass == pattern->wordcharCharacterClass())
out.print("<word>");
else if (characterClass == pattern->wordUnicodeIgnoreCaseCharCharacterClass())
- out.print("<unicode ignore case>");
+ out.print("<unicode word ignore case>");
else if (characterClass == pattern->nondigitsCharacterClass())
out.print("<non-digits>");
else if (characterClass == pattern->nonspacesCharacterClass())
@@ -1176,7 +1197,7 @@ void dumpCharacterClass(PrintStream& out, YarrPattern* pattern, CharacterClass*
else if (characterClass == pattern->nonwordcharCharacterClass())
out.print("<non-word>");
else if (characterClass == pattern->nonwordUnicodeIgnoreCaseCharCharacterClass())
- out.print("<unicode non-ignore case>");
+ out.print("<unicode non-word ignore case>");
else {
bool needMatchesRangesSeperator = false;
@@ -1298,75 +1319,7 @@ void PatternTerm::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nest
break;
case TypeCharacterClass:
out.print("character class ");
- if (characterClass->m_anyCharacter)
- out.print("<any character>");
- else if (characterClass == thisPattern->newlineCharacterClass())
- out.print("<newline>");
- else if (characterClass == thisPattern->digitsCharacterClass())
- out.print("<digits>");
- else if (characterClass == thisPattern->spacesCharacterClass())
- out.print("<whitespace>");
- else if (characterClass == thisPattern->wordcharCharacterClass())
- out.print("<word>");
- else if (characterClass == thisPattern->wordUnicodeIgnoreCaseCharCharacterClass())
- out.print("<unicode ignore case>");
- else if (characterClass == thisPattern->nondigitsCharacterClass())
- out.print("<non-digits>");
- else if (characterClass == thisPattern->nonspacesCharacterClass())
- out.print("<non-whitespace>");
- else if (characterClass == thisPattern->nonwordcharCharacterClass())
- out.print("<non-word>");
- else if (characterClass == thisPattern->nonwordUnicodeIgnoreCaseCharCharacterClass())
- out.print("<unicode non-ignore case>");
- else {
- bool needMatchesRangesSeperator = false;
-
- auto dumpMatches = [&] (const char* prefix, Vector<UChar32> matches) {
- size_t matchesSize = matches.size();
- if (matchesSize) {
- if (needMatchesRangesSeperator)
- out.print(",");
- needMatchesRangesSeperator = true;
-
- out.print(prefix, ":(");
- for (size_t i = 0; i < matchesSize; ++i) {
- if (i)
- out.print(",");
- dumpUChar32(out, matches[i]);
- }
- out.print(")");
- }
- };
-
- auto dumpRanges = [&] (const char* prefix, Vector<CharacterRange> ranges) {
- size_t rangeSize = ranges.size();
- if (rangeSize) {
- if (needMatchesRangesSeperator)
- out.print(",");
- needMatchesRangesSeperator = true;
-
- out.print(prefix, " ranges:(");
- for (size_t i = 0; i < rangeSize; ++i) {
- if (i)
- out.print(",");
- CharacterRange range = ranges[i];
- out.print("(");
- dumpUChar32(out, range.begin);
- out.print("..");
- dumpUChar32(out, range.end);
- out.print(")");
- }
- out.print(")");
- }
- };
-
- out.print("[");
- dumpMatches("ASCII", characterClass->m_matches);
- dumpRanges("ASCII", characterClass->m_ranges);
- dumpMatches("Unicode", characterClass->m_matchesUnicode);
- dumpRanges("Unicode", characterClass->m_rangesUnicode);
- out.print("]");
- }
+ dumpCharacterClass(out, thisPattern, characterClass);
dumpQuantifier(out);
if (quantityType != QuantifierFixedCount || thisPattern->unicode())
out.print(",frame location ", frameLocation);
@@ -1439,16 +1392,10 @@ void PatternDisjunction::dump(PrintStream& out, YarrPattern* thisPattern, unsign
}
}
-void YarrPattern::dumpPattern(const String& patternString)
+void YarrPattern::dumpPatternString(PrintStream& out, const String& patternString)
{
- dumpPattern(WTF::dataFile(), patternString);
-}
+ out.print("/", patternString, "/");
-void YarrPattern::dumpPattern(PrintStream& out, const String& patternString)
-{
- out.print("RegExp pattern for /");
- out.print(patternString);
- out.print("/");
if (global())
out.print("g");
if (ignoreCase())
@@ -1459,6 +1406,18 @@ void YarrPattern::dumpPattern(PrintStream& out, const String& patternString)
out.print("u");
if (sticky())
out.print("y");
+}
+
+void YarrPattern::dumpPattern(const String& patternString)
+{
+ dumpPattern(WTF::dataFile(), patternString);
+}
+
+void YarrPattern::dumpPattern(PrintStream& out, const String& patternString)
+{
+ out.print("RegExp pattern for ");
+ dumpPatternString(out, patternString);
+
if (m_flags != NoFlags) {
bool printSeperator = false;
out.print(" (");
diff --git a/src/3rdparty/masm/yarr/YarrPattern.h b/src/3rdparty/masm/yarr/YarrPattern.h
index 59decbac46..1417ff1549 100644
--- a/src/3rdparty/masm/yarr/YarrPattern.h
+++ b/src/3rdparty/masm/yarr/YarrPattern.h
@@ -354,7 +354,7 @@ struct TermChain {
struct YarrPattern {
JS_EXPORT_PRIVATE YarrPattern(const String& pattern, RegExpFlags, ErrorCode&, void* stackLimit = nullptr);
- void reset()
+ void resetForReparsing()
{
m_numSubpatterns = 0;
m_maxBackReference = 0;
@@ -381,6 +381,7 @@ struct YarrPattern {
m_disjunctions.clear();
m_userCharacterClasses.clear();
m_captureGroupNames.shrink(0);
+ m_namedForwardReferences.shrink(0);
}
bool containsIllegalBackReference()
@@ -388,6 +389,19 @@ struct YarrPattern {
return m_maxBackReference > m_numSubpatterns;
}
+ bool containsIllegalNamedForwardReferences()
+ {
+ if (m_namedForwardReferences.isEmpty())
+ return false;
+
+ for (auto& entry : m_namedForwardReferences) {
+ if (m_captureGroupNames.contains(entry))
+ return true;
+ }
+
+ return false;
+ }
+
bool containsUnsignedLengthPattern()
{
return m_containsUnsignedLengthPattern;
@@ -489,6 +503,7 @@ struct YarrPattern {
return unicodePropertiesCached.get(classID);
}
+ void dumpPatternString(PrintStream& out, const String& patternString);
void dumpPattern(const String& pattern);
void dumpPattern(PrintStream& out, const String& pattern);
@@ -512,6 +527,7 @@ struct YarrPattern {
Vector<std::unique_ptr<PatternDisjunction>, 4> m_disjunctions;
Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses;
Vector<String> m_captureGroupNames;
+ Vector<String> m_namedForwardReferences;
HashMap<String, unsigned> m_namedGroupToParenIndex;
private:
@@ -554,8 +570,8 @@ private:
uintptr_t begin; // Not really needed for greedy quantifiers.
uintptr_t matchAmount; // Not really needed for fixed quantifiers.
- unsigned beginIndex() { return offsetof(BackTrackInfoBackReference, begin) / sizeof(uintptr_t); }
- unsigned matchAmountIndex() { return offsetof(BackTrackInfoBackReference, matchAmount) / sizeof(uintptr_t); }
+ static unsigned beginIndex() { return offsetof(BackTrackInfoBackReference, begin) / sizeof(uintptr_t); }
+ static unsigned matchAmountIndex() { return offsetof(BackTrackInfoBackReference, matchAmount) / sizeof(uintptr_t); }
};
struct BackTrackInfoAlternative {
diff --git a/src/3rdparty/masm/yarr/YarrSyntaxChecker.cpp b/src/3rdparty/masm/yarr/YarrSyntaxChecker.cpp
index 9f05f22852..358cc94d6b 100644
--- a/src/3rdparty/masm/yarr/YarrSyntaxChecker.cpp
+++ b/src/3rdparty/masm/yarr/YarrSyntaxChecker.cpp
@@ -48,7 +48,9 @@ public:
void atomParentheticalAssertionBegin(bool = false) {}
void atomParenthesesEnd() {}
void atomBackReference(unsigned) {}
- void atomNamedBackReference(String) {}
+ void atomNamedBackReference(const String&) {}
+ bool isValidNamedForwardReference(const String&) { return true; }
+ void atomNamedForwardReference(const String&) {}
void quantifyAtom(unsigned, unsigned, bool) {}
void disjunction() {}
};
diff --git a/src/3rdparty/masm/yarr/create_regex_tables b/src/3rdparty/masm/yarr/create_regex_tables
index 4c3dbbe3fb..992566db77 100644
--- a/src/3rdparty/masm/yarr/create_regex_tables
+++ b/src/3rdparty/masm/yarr/create_regex_tables
@@ -32,7 +32,7 @@ types = {
"nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0x10ffff)]},
"nonwordUnicodeIgnoreCaseChar": { "UseTable" : False, "Inverse": "wordUnicodeIgnoreCaseChar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0x017e), (0x0180, 0x2129), (0x212b, 0x10ffff)]},
"newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]},
- "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]},
+ "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]},
"nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0x10ffff)]},
"digits": { "UseTable" : False, "data": [('0', '9')]},
"nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0x10ffff)] }
diff --git a/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode b/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode
index a103bcdf16..95549c7eb5 100644
--- a/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode
+++ b/src/3rdparty/masm/yarr/generateYarrCanonicalizeUnicode
@@ -31,7 +31,6 @@ import optparse
import os
import re
import sys
-from sets import Set
header = """/*
* Copyright (C) 2016 Apple Inc. All rights reserved.
@@ -78,9 +77,12 @@ def openOrExit(path, mode):
dirname = os.path.dirname(path)
if not os.path.isdir(dirname):
os.makedirs(dirname)
- return open(path, mode)
+ if sys.version_info.major >= 3:
+ return open(path, mode, encoding="UTF-8")
+ else:
+ return open(path, mode)
except IOError as e:
- print "I/O error opening {0}, ({1}): {2}".format(path, e.errno, e.strerror)
+ print("I/O error opening {0}, ({1}): {2}".format(path, e.errno, e.strerror))
exit(1)
class Canonicalize:
@@ -93,7 +95,7 @@ class Canonicalize:
self.canonicalGroups[mapping].append(code)
def readCaseFolding(self, file):
- codesSeen = Set()
+ codesSeen = set()
for line in file:
line = line.split('#', 1)[0]
line = line.rstrip()
@@ -154,8 +156,8 @@ class Canonicalize:
for i in range(len(characterSets)):
characters = ""
- set = characterSets[i]
- for ch in set:
+ cur_set = characterSets[i]
+ for ch in cur_set:
characters = characters + "0x{character:04x}, ".format(character=ch)
file.write("const UChar32 unicodeCharacterSet{index:d}[] = {{ {characters}0 }};\n".format(index=i, characters=characters))
@@ -189,7 +191,7 @@ if __name__ == "__main__":
caseFoldingTxtPath = args[0]
canonicalizeHPath = args[1]
caseFoldingTxtFile = openOrExit(caseFoldingTxtPath, "r")
- canonicalizeHFile = openOrExit(canonicalizeHPath, "wb")
+ canonicalizeHFile = openOrExit(canonicalizeHPath, "w")
canonicalize = Canonicalize()
canonicalize.readCaseFolding(caseFoldingTxtFile)