aboutsummaryrefslogtreecommitdiffstats
path: root/src/3rdparty/masm/yarr/YarrJIT.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/masm/yarr/YarrJIT.cpp')
-rw-r--r--src/3rdparty/masm/yarr/YarrJIT.cpp538
1 files changed, 458 insertions, 80 deletions
diff --git a/src/3rdparty/masm/yarr/YarrJIT.cpp b/src/3rdparty/masm/yarr/YarrJIT.cpp
index da65b772f7..1c8138c66e 100644
--- a/src/3rdparty/masm/yarr/YarrJIT.cpp
+++ b/src/3rdparty/masm/yarr/YarrJIT.cpp
@@ -37,15 +37,12 @@
#if ENABLE(YARR_JIT)
-using namespace WTF;
-
namespace JSC { namespace Yarr {
template<YarrJITCompileMode compileMode>
class YarrGenerator : private DefaultMacroAssembler {
- friend void jitCompile(VM*, YarrCodeBlock&, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
-#if CPU(ARM)
+#if CPU(ARM_THUMB2)
static const RegisterID input = ARMRegisters::r0;
static const RegisterID index = ARMRegisters::r1;
static const RegisterID length = ARMRegisters::r2;
@@ -477,6 +474,12 @@ class YarrGenerator : private DefaultMacroAssembler {
return branch32(BelowOrEqual, index, length);
}
+ Jump checkNotEnoughInput(RegisterID additionalAmount)
+ {
+ add32(index, additionalAmount);
+ return branch32(Above, additionalAmount, length);
+ }
+
Jump checkInput()
{
return branch32(BelowOrEqual, index, length);
@@ -559,6 +562,16 @@ class YarrGenerator : private DefaultMacroAssembler {
}
#endif
+ void readCharacterDontDecodeSurrogates(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
+ {
+ BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
+
+ if (m_charSize == Char8)
+ load8(address, resultReg);
+ else
+ load16Unaligned(address, resultReg);
+ }
+
void readCharacter(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
{
BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, resultReg, indexReg);
@@ -809,16 +822,16 @@ class YarrGenerator : private DefaultMacroAssembler {
// The operation, as a YarrOpCode, and also a reference to the PatternTerm.
YarrOpCode m_op;
- PatternTerm* m_term;
+ PatternTerm* m_term = nullptr;
// For alternatives, this holds the PatternAlternative and doubly linked
// references to this alternative's siblings. In the case of the
// OpBodyAlternativeEnd node at the end of a section of repeating nodes,
// m_nextOp will reference the OpBodyAlternativeBegin node of the first
// repeating alternative.
- PatternAlternative* m_alternative;
- size_t m_previousOp;
- size_t m_nextOp;
+ PatternAlternative* m_alternative = nullptr;
+ size_t m_previousOp = 0;
+ size_t m_nextOp = 0;
// Used to record a set of Jumps out of the generated code, typically
// used for jumps out to backtracking code, and a single reentry back
@@ -1119,6 +1132,228 @@ class YarrGenerator : private DefaultMacroAssembler {
backtrackTermDefault(opIndex);
}
+#if ENABLE(YARR_JIT_BACKREFERENCES)
+ void matchBackreference(size_t opIndex, JumpList& characterMatchFails, RegisterID character, RegisterID patternIndex, RegisterID patternCharacter)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+ unsigned subpatternId = term->backReferenceSubpatternId;
+
+ Label loop(this);
+
+ readCharacterDontDecodeSurrogates(0, patternCharacter, patternIndex);
+ readCharacterDontDecodeSurrogates(m_checkedOffset - term->inputPosition, character);
+
+ if (!m_pattern.ignoreCase())
+ characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
+ else {
+ Jump charactersMatch = branch32(Equal, character, patternCharacter);
+ ExtendedAddress characterTableEntry(character, reinterpret_cast<intptr_t>(&canonicalTableLChar));
+ load16(characterTableEntry, character);
+ ExtendedAddress patternTableEntry(patternCharacter, reinterpret_cast<intptr_t>(&canonicalTableLChar));
+ load16(patternTableEntry, patternCharacter);
+ characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
+ charactersMatch.link(this);
+ }
+
+
+ add32(TrustedImm32(1), index);
+ add32(TrustedImm32(1), patternIndex);
+
+ branch32(NotEqual, patternIndex, Address(output, ((subpatternId << 1) + 1) * sizeof(int))).linkTo(loop, this);
+ }
+
+ void generateBackReference(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ if (m_pattern.ignoreCase() && m_charSize != Char8) {
+ m_failureReason = JITFailureReason::BackReference;
+ return;
+ }
+
+ unsigned subpatternId = term->backReferenceSubpatternId;
+ unsigned parenthesesFrameLocation = term->frameLocation;
+
+ const RegisterID characterOrTemp = regT0;
+ const RegisterID patternIndex = regT1;
+ const RegisterID patternTemp = regT2;
+
+ storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
+ if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1)
+ storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+
+ JumpList matches;
+
+ if (term->quantityType != QuantifierNonGreedy) {
+ load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
+ load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
+
+ // An empty match is successful without consuming characters
+ if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1) {
+ matches.append(branch32(Equal, TrustedImm32(-1), patternIndex));
+ matches.append(branch32(Equal, patternIndex, patternTemp));
+ } else {
+ Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
+ Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
+ zeroLengthMatch.link(this);
+ storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ matches.append(jump());
+ tryNonZeroMatch.link(this);
+ }
+ }
+
+ switch (term->quantityType) {
+ case QuantifierFixedCount: {
+ Label outerLoop(this);
+
+ // PatternTemp should contain pattern end index at this point
+ sub32(patternIndex, patternTemp);
+ if (m_checkedOffset - term->inputPosition)
+ sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
+ op.m_jumps.append(checkNotEnoughInput(patternTemp));
+
+ matchBackreference(opIndex, op.m_jumps, characterOrTemp, patternIndex, patternTemp);
+
+ if (term->quantityMaxCount != 1) {
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
+ add32(TrustedImm32(1), characterOrTemp);
+ storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
+ load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
+ load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
+ jump(outerLoop);
+ }
+ matches.link(this);
+ break;
+ }
+
+ case QuantifierGreedy: {
+ JumpList incompleteMatches;
+
+ Label outerLoop(this);
+
+ // PatternTemp should contain pattern end index at this point
+ sub32(patternIndex, patternTemp);
+ if (m_checkedOffset - term->inputPosition)
+ sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
+ matches.append(checkNotEnoughInput(patternTemp));
+
+ matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
+
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), characterOrTemp);
+ add32(TrustedImm32(1), characterOrTemp);
+ storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ if (term->quantityMaxCount != quantifyInfinite)
+ matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
+ load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
+ load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
+
+ // Store current index in frame for restoring after a partial match
+ storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
+ jump(outerLoop);
+
+ incompleteMatches.link(this);
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
+
+ matches.link(this);
+ op.m_reentry = label();
+ break;
+ }
+
+ case QuantifierNonGreedy: {
+ JumpList incompleteMatches;
+
+ matches.append(jump());
+
+ op.m_reentry = label();
+
+ load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
+ load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
+
+ // An empty match is successful without consuming characters
+ Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
+ Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
+ zeroLengthMatch.link(this);
+ storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ matches.append(jump());
+ tryNonZeroMatch.link(this);
+
+ // Check if we have input remaining to match
+ sub32(patternIndex, patternTemp);
+ if (m_checkedOffset - term->inputPosition)
+ sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
+ matches.append(checkNotEnoughInput(patternTemp));
+
+ storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
+
+ matchBackreference(opIndex, incompleteMatches, characterOrTemp, patternIndex, patternTemp);
+
+ matches.append(jump());
+
+ incompleteMatches.link(this);
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
+
+ matches.link(this);
+ break;
+ }
+ }
+ }
+ void backtrackBackReference(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ unsigned subpatternId = term->backReferenceSubpatternId;
+
+ m_backtrackingState.link(this);
+ op.m_jumps.link(this);
+
+ JumpList failures;
+
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ switch (term->quantityType) {
+ case QuantifierFixedCount:
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), index);
+ break;
+
+ case QuantifierGreedy: {
+ const RegisterID matchAmount = regT0;
+ const RegisterID patternStartIndex = regT1;
+ const RegisterID patternEndIndexOrLen = regT2;
+
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
+ failures.append(branchTest32(Zero, matchAmount));
+
+ load32(Address(output, (subpatternId << 1) * sizeof(int)), patternStartIndex);
+ load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternEndIndexOrLen);
+ sub32(patternStartIndex, patternEndIndexOrLen);
+ sub32(patternEndIndexOrLen, index);
+
+ sub32(TrustedImm32(1), matchAmount);
+ storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ jump(op.m_reentry);
+ break;
+ }
+
+ case QuantifierNonGreedy: {
+ const RegisterID matchAmount = regT0;
+
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), matchAmount);
+ if (term->quantityMaxCount != quantifyInfinite)
+ failures.append(branch32(AboveOrEqual, Imm32(term->quantityMaxCount.unsafeGet()), matchAmount));
+ add32(TrustedImm32(1), matchAmount);
+ storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
+ jump(op.m_reentry);
+ break;
+ }
+ }
+ failures.link(this);
+ m_backtrackingState.fallthrough();
+ }
+#endif
+
void generatePatternCharacterOnce(size_t opIndex)
{
YarrOp& op = m_ops[opIndex];
@@ -1141,12 +1376,16 @@ class YarrGenerator : private DefaultMacroAssembler {
}
const RegisterID character = regT0;
+#if CPU(X86_64) || CPU(ARM64)
+ unsigned maxCharactersAtOnce = m_charSize == Char8 ? 8 : 4;
+#else
unsigned maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
- unsigned ignoreCaseMask = 0;
+#endif
+ uint64_t ignoreCaseMask = 0;
#if CPU(BIG_ENDIAN)
- int allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
+ uint64_t allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
#else
- int allCharacters = ch;
+ uint64_t allCharacters = ch;
#endif
unsigned numberCharacters;
unsigned startTermPosition = term->inputPosition;
@@ -1155,16 +1394,19 @@ class YarrGenerator : private DefaultMacroAssembler {
// upper & lower case representations are converted to a character class.
ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
- if (m_pattern.ignoreCase() && isASCIIAlpha(ch))
+ if (m_pattern.ignoreCase() && isASCIIAlpha(ch)) {
#if CPU(BIG_ENDIAN)
ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
#else
ignoreCaseMask |= 32;
#endif
+ }
for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
PatternTerm* nextTerm = nextOp->m_term;
-
+
+ // YarrJIT handles decoded surrogate pair as one character if unicode flag is enabled.
+ // Note that the numberCharacters become 1 while the width of the pattern character becomes 32bit in this case.
if (nextTerm->type != PatternTerm::TypePatternCharacter
|| nextTerm->quantityType != QuantifierFixedCount
|| nextTerm->quantityMaxCount != 1
@@ -1192,49 +1434,132 @@ class YarrGenerator : private DefaultMacroAssembler {
// upper & lower case representations are converted to a character class.
ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter, m_canonicalMode));
- allCharacters |= (currentCharacter << shiftAmount);
+ allCharacters |= (static_cast<uint64_t>(currentCharacter) << shiftAmount);
if ((m_pattern.ignoreCase()) && (isASCIIAlpha(currentCharacter)))
- ignoreCaseMask |= 32 << shiftAmount;
+ ignoreCaseMask |= 32ULL << shiftAmount;
}
+ if (m_decodeSurrogatePairs)
+ op.m_jumps.append(jumpIfNoAvailableInput());
+
if (m_charSize == Char8) {
+ auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
+ op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
+ };
+
+ auto check2 = [&] (Checked<unsigned> offset, uint16_t characters, uint16_t mask) {
+ load16Unaligned(negativeOffsetIndexedAddress(offset, character), character);
+ if (mask)
+ or32(Imm32(mask), character);
+ op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
+ };
+
+ auto check4 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
+ if (mask) {
+ load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
+ if (mask)
+ or32(Imm32(mask), character);
+ op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
+ return;
+ }
+ op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
+ };
+
+#if CPU(X86_64) || CPU(ARM64)
+ auto check8 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
+ load64(negativeOffsetIndexedAddress(offset, character), character);
+ if (mask)
+ or64(TrustedImm64(mask), character);
+ op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
+ };
+#endif
+
switch (numberCharacters) {
case 1:
- op.m_jumps.append(jumpIfCharNotEquals(ch, m_checkedOffset - startTermPosition, character));
+ // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
+ check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
return;
case 2: {
- load16Unaligned(negativeOffsetIndexedAddress(m_checkedOffset - startTermPosition, character), character);
- break;
+ check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
+ return;
}
case 3: {
- load16Unaligned(negativeOffsetIndexedAddress(m_checkedOffset - startTermPosition, character), character);
- if (ignoreCaseMask)
- or32(Imm32(ignoreCaseMask), character);
- op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask)));
- op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, m_checkedOffset - startTermPosition - 2, character));
+ check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
+ check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 16) & 0xff);
return;
}
case 4: {
- load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(m_checkedOffset- startTermPosition, character), character);
- break;
+ check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+ return;
+ }
+#if CPU(X86_64) || CPU(ARM64)
+ case 5: {
+ check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+ check1(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xff);
+ return;
+ }
+ case 6: {
+ check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+ check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
+ return;
+ }
+ case 7: {
+ check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+ check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
+ check1(m_checkedOffset - startTermPosition - 6, (allCharacters >> 48) & 0xff);
+ return;
+ }
+ case 8: {
+ check8(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
+ return;
}
+#endif
}
} else {
+ auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
+ op.m_jumps.append(jumpIfCharNotEquals(characters, offset, character));
+ };
+
+ auto check2 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
+ if (mask) {
+ load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(offset, character), character);
+ if (mask)
+ or32(Imm32(mask), character);
+ op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
+ return;
+ }
+ op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, negativeOffsetIndexedAddress(offset, character), TrustedImm32(characters)));
+ };
+
+#if CPU(X86_64) || CPU(ARM64)
+ auto check4 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
+ load64(negativeOffsetIndexedAddress(offset, character), character);
+ if (mask)
+ or64(TrustedImm64(mask), character);
+ op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
+ };
+#endif
+
switch (numberCharacters) {
case 1:
- op.m_jumps.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
+ // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
+ check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
return;
case 2:
- load32WithUnalignedHalfWords(negativeOffsetIndexedAddress(m_checkedOffset- term->inputPosition, character), character);
- break;
+ check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+ return;
+#if CPU(X86_64) || CPU(ARM64)
+ case 3:
+ check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+ check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 32) & 0xffff);
+ return;
+ case 4:
+ check4(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
+ return;
+#endif
}
}
-
- if (ignoreCaseMask)
- or32(Imm32(ignoreCaseMask), character);
- op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask)));
- return;
}
void backtrackPatternCharacterOnce(size_t opIndex)
{
@@ -1250,6 +1575,9 @@ class YarrGenerator : private DefaultMacroAssembler {
const RegisterID character = regT0;
const RegisterID countRegister = regT1;
+ if (m_decodeSurrogatePairs)
+ op.m_jumps.append(jumpIfNoAvailableInput());
+
move(index, countRegister);
Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
scaledMaxCount *= U_IS_BMP(ch) ? 1 : 2;
@@ -1403,8 +1731,10 @@ class YarrGenerator : private DefaultMacroAssembler {
const RegisterID character = regT0;
- if (m_decodeSurrogatePairs)
+ if (m_decodeSurrogatePairs) {
+ op.m_jumps.append(jumpIfNoAvailableInput());
storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
+ }
JumpList matchDest;
readCharacter(m_checkedOffset - term->inputPosition, character);
@@ -1451,6 +1781,9 @@ class YarrGenerator : private DefaultMacroAssembler {
const RegisterID character = regT0;
const RegisterID countRegister = regT1;
+ if (m_decodeSurrogatePairs)
+ op.m_jumps.append(jumpIfNoAvailableInput());
+
move(index, countRegister);
sub32(Imm32(term->quantityMaxCount.unsafeGet()), countRegister);
@@ -1780,13 +2113,19 @@ class YarrGenerator : private DefaultMacroAssembler {
break;
case PatternTerm::TypeForwardReference:
+ m_failureReason = JITFailureReason::ForwardReference;
break;
case PatternTerm::TypeParenthesesSubpattern:
case PatternTerm::TypeParentheticalAssertion:
RELEASE_ASSERT_NOT_REACHED();
+
case PatternTerm::TypeBackReference:
+#if ENABLE(YARR_JIT_BACKREFERENCES)
+ generateBackReference(opIndex);
+#else
m_failureReason = JITFailureReason::BackReference;
+#endif
break;
case PatternTerm::TypeDotStarEnclosure:
generateDotStarEnclosure(opIndex);
@@ -1846,18 +2185,23 @@ class YarrGenerator : private DefaultMacroAssembler {
break;
case PatternTerm::TypeForwardReference:
+ m_failureReason = JITFailureReason::ForwardReference;
break;
case PatternTerm::TypeParenthesesSubpattern:
case PatternTerm::TypeParentheticalAssertion:
RELEASE_ASSERT_NOT_REACHED();
- case PatternTerm::TypeDotStarEnclosure:
- backtrackDotStarEnclosure(opIndex);
- break;
-
case PatternTerm::TypeBackReference:
+#if ENABLE(YARR_JIT_BACKREFERENCES)
+ backtrackBackReference(opIndex);
+#else
m_failureReason = JITFailureReason::BackReference;
+#endif
+ break;
+
+ case PatternTerm::TypeDotStarEnclosure:
+ backtrackDotStarEnclosure(opIndex);
break;
}
}
@@ -2157,7 +2501,7 @@ class YarrGenerator : private DefaultMacroAssembler {
}
// If the parentheses are quantified Greedy then add a label to jump back
- // to if get a failed match from after the parentheses. For NonGreedy
+ // to if we get a failed match from after the parentheses. For NonGreedy
// parentheses, link the jump from before the subpattern to here.
if (term->quantityType == QuantifierGreedy)
op.m_reentry = label();
@@ -2221,11 +2565,11 @@ class YarrGenerator : private DefaultMacroAssembler {
// match within the parentheses, or the second having skipped over them.
// - To check for empty matches, which must be rejected.
//
- // At the head of a NonGreedy set of parentheses we'll immediately set the
- // value on the stack to -1 (indicating a match skipping the subpattern),
+ // At the head of a NonGreedy set of parentheses we'll immediately set 'begin'
+ // in the backtrack info to -1 (indicating a match skipping the subpattern),
// and plant a jump to the end. We'll also plant a label to backtrack to
- // to reenter the subpattern later, with a store to set up index on the
- // second iteration.
+ // to reenter the subpattern later, with a store to set 'begin' to current index
+ // on the second iteration.
//
// FIXME: for capturing parens, could use the index in the capture array?
if (term->quantityType == QuantifierGreedy || term->quantityType == QuantifierNonGreedy) {
@@ -2312,7 +2656,7 @@ class YarrGenerator : private DefaultMacroAssembler {
}
// If the parentheses are quantified Greedy then add a label to jump back
- // to if get a failed match from after the parentheses. For NonGreedy
+ // to if we get a failed match from after the parentheses. For NonGreedy
// parentheses, link the jump from before the subpattern to here.
if (term->quantityType == QuantifierGreedy) {
if (term->quantityMaxCount != quantifyInfinite)
@@ -2324,6 +2668,7 @@ class YarrGenerator : private DefaultMacroAssembler {
} else if (term->quantityType == QuantifierNonGreedy) {
YarrOp& beginOp = m_ops[op.m_previousOp];
beginOp.m_jumps.link(this);
+ op.m_reentry = label();
}
#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
RELEASE_ASSERT_NOT_REACHED();
@@ -2385,6 +2730,7 @@ class YarrGenerator : private DefaultMacroAssembler {
do {
--opIndex;
+
YarrOp& op = m_ops[opIndex];
switch (op.m_op) {
@@ -2881,32 +3227,32 @@ class YarrGenerator : private DefaultMacroAssembler {
if (term->quantityType != QuantifierFixedCount) {
m_backtrackingState.link(this);
- if (term->quantityType == QuantifierGreedy) {
- RegisterID currParenContextReg = regT0;
- RegisterID newParenContextReg = regT1;
+ RegisterID currParenContextReg = regT0;
+ RegisterID newParenContextReg = regT1;
- loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), currParenContextReg);
- restoreParenContext(currParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
+ restoreParenContext(currParenContextReg, regT2, term->parentheses.subpatternId, term->parentheses.lastSubpatternId, parenthesesFrameLocation);
- freeParenContext(currParenContextReg, newParenContextReg);
- storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
- const RegisterID countTemporary = regT0;
- loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
- Jump zeroLengthMatch = branchTest32(Zero, countTemporary);
+ freeParenContext(currParenContextReg, newParenContextReg);
+ storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
- sub32(TrustedImm32(1), countTemporary);
- storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
+ const RegisterID countTemporary = regT0;
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
+ Jump zeroLengthMatch = branchTest32(Zero, countTemporary);
- jump(m_ops[op.m_nextOp].m_reentry);
+ sub32(TrustedImm32(1), countTemporary);
+ storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
- zeroLengthMatch.link(this);
+ jump(m_ops[op.m_nextOp].m_reentry);
- // Clear the flag in the stackframe indicating we didn't run through the subpattern.
- storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
+ zeroLengthMatch.link(this);
+ // Clear the flag in the stackframe indicating we didn't run through the subpattern.
+ storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
+
+ if (term->quantityType == QuantifierGreedy)
jump(m_ops[op.m_nextOp].m_reentry);
- }
// If Greedy, jump to the end.
if (term->quantityType == QuantifierGreedy) {
@@ -2929,13 +3275,14 @@ class YarrGenerator : private DefaultMacroAssembler {
if (term->quantityType != QuantifierFixedCount) {
m_backtrackingState.link(this);
- // Check whether we should backtrack back into the parentheses, or if we
- // are currently in a state where we had skipped over the subpattern
- // (in which case the flag value on the stack will be -1).
unsigned parenthesesFrameLocation = term->frameLocation;
- Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
if (term->quantityType == QuantifierGreedy) {
+ // Check whether we should backtrack back into the parentheses, or if we
+ // are currently in a state where we had skipped over the subpattern
+ // (in which case the flag value on the stack will be -1).
+ Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
+
// For Greedy parentheses, we skip after having already tried going
// through the subpattern, so if we get here we're done.
YarrOp& beginOp = m_ops[op.m_previousOp];
@@ -2946,8 +3293,25 @@ class YarrGenerator : private DefaultMacroAssembler {
// next. Jump back to the start of the parentheses in the forwards
// matching path.
ASSERT(term->quantityType == QuantifierNonGreedy);
+
+ const RegisterID beginTemporary = regT0;
+ const RegisterID countTemporary = regT1;
+
YarrOp& beginOp = m_ops[op.m_previousOp];
- hadSkipped.linkTo(beginOp.m_reentry, this);
+
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex(), beginTemporary);
+ branch32(Equal, beginTemporary, TrustedImm32(-1)).linkTo(beginOp.m_reentry, this);
+
+ JumpList exceededMatchLimit;
+
+ if (term->quantityMaxCount != quantifyInfinite) {
+ loadFromFrame(parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), countTemporary);
+ exceededMatchLimit.append(branch32(AboveOrEqual, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())));
+ }
+
+ branch32(Above, index, beginTemporary).linkTo(beginOp.m_reentry, this);
+
+ exceededMatchLimit.link(this);
}
m_backtrackingState.fallthrough();
@@ -3021,7 +3385,7 @@ class YarrGenerator : private DefaultMacroAssembler {
// the parentheses.
// Supported types of parentheses are 'Once' (quantityMaxCount == 1),
// 'Terminal' (non-capturing parentheses quantified as greedy
- // and infinite), and 0 based greedy quantified parentheses.
+ // and infinite), and 0 based greedy / non-greedy quantified parentheses.
// Alternatives will use the 'Simple' set of ops if either the
// subpattern is terminal (in which case we will never need to
// backtrack), or if the subpattern only contains one alternative.
@@ -3043,7 +3407,9 @@ class YarrGenerator : private DefaultMacroAssembler {
if (term->quantityMinCount && term->quantityMinCount != term->quantityMaxCount) {
m_failureReason = JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum;
return;
- } if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) {
+ }
+
+ if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) {
// Select the 'Once' nodes.
parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
@@ -3060,10 +3426,10 @@ class YarrGenerator : private DefaultMacroAssembler {
parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
} else {
#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
- // We only handle generic parenthesis with greedy counts.
- if (term->quantityType != QuantifierGreedy) {
+ // We only handle generic parenthesis with non-fixed counts.
+ if (term->quantityType == QuantifierFixedCount) {
// This subpattern is not supported by the JIT.
- m_failureReason = JITFailureReason::NonGreedyParenthesizedSubpattern;
+ m_failureReason = JITFailureReason::FixedCountParenthesizedSubpattern;
return;
}
@@ -3369,7 +3735,7 @@ class YarrGenerator : private DefaultMacroAssembler {
// The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
zeroExtend32ToPtr(index, index);
zeroExtend32ToPtr(length, length);
-#elif CPU(ARM)
+#elif CPU(ARM_THUMB2)
push(ARMRegisters::r4);
push(ARMRegisters::r5);
push(ARMRegisters::r6);
@@ -3422,7 +3788,7 @@ class YarrGenerator : private DefaultMacroAssembler {
#elif CPU(ARM64)
if (m_decodeSurrogatePairs)
popPair(framePointerRegister, linkRegister);
-#elif CPU(ARM)
+#elif CPU(ARM_THUMB2)
pop(ARMRegisters::r8);
pop(ARMRegisters::r6);
pop(ARMRegisters::r5);
@@ -3460,10 +3826,14 @@ public:
}
#endif
-#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
- if (m_containsNestedSubpatterns)
- codeBlock.setUsesPaternContextBuffer();
+ if (m_pattern.m_containsBackreferences
+#if ENABLE(YARR_JIT_BACKREFERENCES)
+ && (compileMode == MatchOnly || (m_pattern.ignoreCase() && m_charSize != Char8))
#endif
+ ) {
+ codeBlock.setFallBackWithFailureReason(JITFailureReason::BackReference);
+ return;
+ }
// We need to compile before generating code since we set flags based on compilation that
// are used during generation.
@@ -3473,7 +3843,12 @@ public:
codeBlock.setFallBackWithFailureReason(*m_failureReason);
return;
}
-
+
+#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
+ if (m_containsNestedSubpatterns)
+ codeBlock.setUsesPatternContextBuffer();
+#endif
+
generateEnter();
Jump hasInput = checkInput();
@@ -3618,7 +3993,10 @@ static void dumpCompileFailure(JITFailureReason failure)
dataLog("Can't JIT a pattern decoding surrogate pairs\n");
break;
case JITFailureReason::BackReference:
- dataLog("Can't JIT a pattern containing back references\n");
+ dataLog("Can't JIT some patterns containing back references\n");
+ break;
+ case JITFailureReason::ForwardReference:
+ dataLog("Can't JIT a pattern containing forward references\n");
break;
case JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum:
dataLog("Can't JIT a pattern containing a variable counted parenthesis with a non-zero minimum\n");
@@ -3626,8 +4004,8 @@ static void dumpCompileFailure(JITFailureReason failure)
case JITFailureReason::ParenthesizedSubpattern:
dataLog("Can't JIT a pattern containing parenthesized subpatterns\n");
break;
- case JITFailureReason::NonGreedyParenthesizedSubpattern:
- dataLog("Can't JIT a pattern containing non-greedy parenthesized subpatterns\n");
+ case JITFailureReason::FixedCountParenthesizedSubpattern:
+ dataLog("Can't JIT a pattern containing fixed count parenthesized subpatterns\n");
break;
case JITFailureReason::ExecutableMemoryAllocationFailure:
dataLog("Can't JIT because of failure of allocation of executable memory\n");