aboutsummaryrefslogtreecommitdiffstats
path: root/src/3rdparty/masm/yarr/YarrJIT.cpp
diff options
context:
space:
mode:
authorSimon Hausmann <simon.hausmann@digia.com>2013-04-15 11:34:00 +0200
committerSimon Hausmann <simon.hausmann@digia.com>2013-04-15 12:47:53 +0200
commit2cd8a90bd4d171ed2404822b6046455a94d4b6ed (patch)
treeed8eda56d54cb13d777be1cfa294c85ddd76e583 /src/3rdparty/masm/yarr/YarrJIT.cpp
parent5bf33901429e64ab91f30037e25ec04aab4b4c11 (diff)
parentbec019b5fe35e1701c944eb340df458d5e3d1cdb (diff)
Merge branch 'master' of ssh://codereview.qt-project.org:29418/playground/v4vm into v4
This is the initial merge of the v4vm JS engine, designed specifically for QML. The engine is tested on Linux and Mac OS X, works on x86, x86-64 and ARM. Change-Id: I826b72cfa3d3575007b70d78604080582db568db Reviewed-by: Lars Knoll <lars.knoll@digia.com>
Diffstat (limited to 'src/3rdparty/masm/yarr/YarrJIT.cpp')
-rw-r--r--src/3rdparty/masm/yarr/YarrJIT.cpp2702
1 files changed, 2702 insertions, 0 deletions
diff --git a/src/3rdparty/masm/yarr/YarrJIT.cpp b/src/3rdparty/masm/yarr/YarrJIT.cpp
new file mode 100644
index 0000000000..20b26c1eb9
--- /dev/null
+++ b/src/3rdparty/masm/yarr/YarrJIT.cpp
@@ -0,0 +1,2702 @@
+/*
+ * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "YarrJIT.h"
+
+#include <wtf/ASCIICType.h>
+#include "LinkBuffer.h"
+#include "Options.h"
+#include "Yarr.h"
+#include "YarrCanonicalizeUCS2.h"
+
+#if ENABLE(YARR_JIT)
+
+using namespace WTF;
+
+namespace JSC { namespace Yarr {
+
+template<YarrJITCompileMode compileMode>
+class YarrGenerator : private MacroAssembler {
+ friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
+
+#if CPU(ARM)
+ static const RegisterID input = ARMRegisters::r0;
+ static const RegisterID index = ARMRegisters::r1;
+ static const RegisterID length = ARMRegisters::r2;
+ static const RegisterID output = ARMRegisters::r4;
+
+ static const RegisterID regT0 = ARMRegisters::r5;
+ static const RegisterID regT1 = ARMRegisters::r6;
+
+ static const RegisterID returnRegister = ARMRegisters::r0;
+ static const RegisterID returnRegister2 = ARMRegisters::r1;
+#elif CPU(MIPS)
+ static const RegisterID input = MIPSRegisters::a0;
+ static const RegisterID index = MIPSRegisters::a1;
+ static const RegisterID length = MIPSRegisters::a2;
+ static const RegisterID output = MIPSRegisters::a3;
+
+ static const RegisterID regT0 = MIPSRegisters::t4;
+ static const RegisterID regT1 = MIPSRegisters::t5;
+
+ static const RegisterID returnRegister = MIPSRegisters::v0;
+ static const RegisterID returnRegister2 = MIPSRegisters::v1;
+#elif CPU(SH4)
+ static const RegisterID input = SH4Registers::r4;
+ static const RegisterID index = SH4Registers::r5;
+ static const RegisterID length = SH4Registers::r6;
+ static const RegisterID output = SH4Registers::r7;
+
+ static const RegisterID regT0 = SH4Registers::r0;
+ static const RegisterID regT1 = SH4Registers::r1;
+
+ static const RegisterID returnRegister = SH4Registers::r0;
+ static const RegisterID returnRegister2 = SH4Registers::r1;
+#elif CPU(X86)
+ static const RegisterID input = X86Registers::eax;
+ static const RegisterID index = X86Registers::edx;
+ static const RegisterID length = X86Registers::ecx;
+ static const RegisterID output = X86Registers::edi;
+
+ static const RegisterID regT0 = X86Registers::ebx;
+ static const RegisterID regT1 = X86Registers::esi;
+
+ static const RegisterID returnRegister = X86Registers::eax;
+ static const RegisterID returnRegister2 = X86Registers::edx;
+#elif CPU(X86_64)
+#if !OS(WINDOWS)
+ static const RegisterID input = X86Registers::edi;
+ static const RegisterID index = X86Registers::esi;
+ static const RegisterID length = X86Registers::edx;
+ static const RegisterID output = X86Registers::ecx;
+#else
+ // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
+ // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
+ COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits);
+ static const RegisterID input = X86Registers::edx;
+ static const RegisterID index = X86Registers::r8;
+ static const RegisterID length = X86Registers::r9;
+ static const RegisterID output = X86Registers::r10;
+#endif
+
+ static const RegisterID regT0 = X86Registers::eax;
+ static const RegisterID regT1 = X86Registers::ebx;
+
+ static const RegisterID returnRegister = X86Registers::eax;
+ static const RegisterID returnRegister2 = X86Registers::edx;
+#endif
+
+ void optimizeAlternative(PatternAlternative* alternative)
+ {
+ if (!alternative->m_terms.size())
+ return;
+
+ for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
+ PatternTerm& term = alternative->m_terms[i];
+ PatternTerm& nextTerm = alternative->m_terms[i + 1];
+
+ if ((term.type == PatternTerm::TypeCharacterClass)
+ && (term.quantityType == QuantifierFixedCount)
+ && (nextTerm.type == PatternTerm::TypePatternCharacter)
+ && (nextTerm.quantityType == QuantifierFixedCount)) {
+ PatternTerm termCopy = term;
+ alternative->m_terms[i] = nextTerm;
+ alternative->m_terms[i + 1] = termCopy;
+ }
+ }
+ }
+
+ void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
+ {
+ do {
+ // pick which range we're going to generate
+ int which = count >> 1;
+ char lo = ranges[which].begin;
+ char hi = ranges[which].end;
+
+ // check if there are any ranges or matches below lo. If not, just jl to failure -
+ // if there is anything else to check, check that first, if it falls through jmp to failure.
+ if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
+ Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
+
+ // generate code for all ranges before this one
+ if (which)
+ matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
+
+ while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
+ matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
+ ++*matchIndex;
+ }
+ failures.append(jump());
+
+ loOrAbove.link(this);
+ } else if (which) {
+ Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
+
+ matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
+ failures.append(jump());
+
+ loOrAbove.link(this);
+ } else
+ failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
+
+ while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
+ ++*matchIndex;
+
+ matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
+ // fall through to here, the value is above hi.
+
+ // shuffle along & loop around if there are any more matches to handle.
+ unsigned next = which + 1;
+ ranges += next;
+ count -= next;
+ } while (count);
+ }
+
+ void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
+ {
+ if (charClass->m_table) {
+ ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
+ matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
+ return;
+ }
+ Jump unicodeFail;
+ if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
+ Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f));
+
+ if (charClass->m_matchesUnicode.size()) {
+ for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
+ UChar ch = charClass->m_matchesUnicode[i];
+ matchDest.append(branch32(Equal, character, Imm32(ch)));
+ }
+ }
+
+ if (charClass->m_rangesUnicode.size()) {
+ for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
+ UChar lo = charClass->m_rangesUnicode[i].begin;
+ UChar hi = charClass->m_rangesUnicode[i].end;
+
+ Jump below = branch32(LessThan, character, Imm32(lo));
+ matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
+ below.link(this);
+ }
+ }
+
+ unicodeFail = jump();
+ isAscii.link(this);
+ }
+
+ if (charClass->m_ranges.size()) {
+ unsigned matchIndex = 0;
+ JumpList failures;
+ matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
+ while (matchIndex < charClass->m_matches.size())
+ matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
+
+ failures.link(this);
+ } else if (charClass->m_matches.size()) {
+ // optimization: gather 'a','A' etc back together, can mask & test once.
+ Vector<char> matchesAZaz;
+
+ for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
+ char ch = charClass->m_matches[i];
+ if (m_pattern.m_ignoreCase) {
+ if (isASCIILower(ch)) {
+ matchesAZaz.append(ch);
+ continue;
+ }
+ if (isASCIIUpper(ch))
+ continue;
+ }
+ matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
+ }
+
+ if (unsigned countAZaz = matchesAZaz.size()) {
+ or32(TrustedImm32(32), character);
+ for (unsigned i = 0; i < countAZaz; ++i)
+ matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
+ }
+ }
+
+ if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
+ unicodeFail.link(this);
+ }
+
+ // Jumps if input not available; will have (incorrectly) incremented already!
+ Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
+ {
+ if (countToCheck)
+ add32(Imm32(countToCheck), index);
+ return branch32(Above, index, length);
+ }
+
+ Jump jumpIfAvailableInput(unsigned countToCheck)
+ {
+ add32(Imm32(countToCheck), index);
+ return branch32(BelowOrEqual, index, length);
+ }
+
+ Jump checkInput()
+ {
+ return branch32(BelowOrEqual, index, length);
+ }
+
+ Jump atEndOfInput()
+ {
+ return branch32(Equal, index, length);
+ }
+
+ Jump notAtEndOfInput()
+ {
+ return branch32(NotEqual, index, length);
+ }
+
+ Jump jumpIfCharNotEquals(UChar ch, int inputPosition, RegisterID character)
+ {
+ readCharacter(inputPosition, character);
+
+ // For case-insesitive compares, non-ascii characters that have different
+ // upper & lower case representations are converted to a character class.
+ ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ or32(TrustedImm32(0x20), character);
+ ch |= 0x20;
+ }
+
+ return branch32(NotEqual, character, Imm32(ch));
+ }
+
+ void readCharacter(int inputPosition, RegisterID reg)
+ {
+ if (m_charSize == Char8)
+ load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg);
+ else
+ load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
+ }
+
+ void storeToFrame(RegisterID reg, unsigned frameLocation)
+ {
+ poke(reg, frameLocation);
+ }
+
+ void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
+ {
+ poke(imm, frameLocation);
+ }
+
+ DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
+ {
+ return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
+ }
+
+ void loadFromFrame(unsigned frameLocation, RegisterID reg)
+ {
+ peek(reg, frameLocation);
+ }
+
+ void loadFromFrameAndJump(unsigned frameLocation)
+ {
+ jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
+ }
+
+ void initCallFrame()
+ {
+ unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
+ if (callFrameSize)
+ subPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister);
+ }
+ void removeCallFrame()
+ {
+ unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
+ if (callFrameSize)
+ addPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister);
+ }
+
+ // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns.
+ void setSubpatternStart(RegisterID reg, unsigned subpattern)
+ {
+ ASSERT(subpattern);
+ // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
+ store32(reg, Address(output, (subpattern << 1) * sizeof(int)));
+ }
+ void setSubpatternEnd(RegisterID reg, unsigned subpattern)
+ {
+ ASSERT(subpattern);
+ // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
+ store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int)));
+ }
+ void clearSubpatternStart(unsigned subpattern)
+ {
+ ASSERT(subpattern);
+ // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
+ store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
+ }
+
+ // We use one of three different strategies to track the start of the current match,
+ // while matching.
+ // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
+ // at the end of matching. This is irrespective of compileMode, and in this case
+ // these methods should never be called.
+ // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
+ // vector, store the match start in the output vector.
+ // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
+ // in this register.
+ void setMatchStart(RegisterID reg)
+ {
+ ASSERT(!m_pattern.m_body->m_hasFixedSize);
+ if (compileMode == IncludeSubpatterns)
+ store32(reg, output);
+ else
+ move(reg, output);
+ }
+ void getMatchStart(RegisterID reg)
+ {
+ ASSERT(!m_pattern.m_body->m_hasFixedSize);
+ if (compileMode == IncludeSubpatterns)
+ load32(output, reg);
+ else
+ move(output, reg);
+ }
+
+ enum YarrOpCode {
+ // These nodes wrap body alternatives - those in the main disjunction,
+ // rather than subpatterns or assertions. These are chained together in
+ // a doubly linked list, with a 'begin' node for the first alternative,
+ // a 'next' node for each subsequent alternative, and an 'end' node at
+ // the end. In the case of repeating alternatives, the 'end' node also
+ // has a reference back to 'begin'.
+ OpBodyAlternativeBegin,
+ OpBodyAlternativeNext,
+ OpBodyAlternativeEnd,
+ // Similar to the body alternatives, but used for subpatterns with two
+ // or more alternatives.
+ OpNestedAlternativeBegin,
+ OpNestedAlternativeNext,
+ OpNestedAlternativeEnd,
+ // Used for alternatives in subpatterns where there is only a single
+ // alternative (backtrackingis easier in these cases), or for alternatives
+ // which never need to be backtracked (those in parenthetical assertions,
+ // terminal subpatterns).
+ OpSimpleNestedAlternativeBegin,
+ OpSimpleNestedAlternativeNext,
+ OpSimpleNestedAlternativeEnd,
+ // Used to wrap 'Once' subpattern matches (quantityCount == 1).
+ OpParenthesesSubpatternOnceBegin,
+ OpParenthesesSubpatternOnceEnd,
+ // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
+ OpParenthesesSubpatternTerminalBegin,
+ OpParenthesesSubpatternTerminalEnd,
+ // Used to wrap parenthetical assertions.
+ OpParentheticalAssertionBegin,
+ OpParentheticalAssertionEnd,
+ // Wraps all simple terms (pattern characters, character classes).
+ OpTerm,
+ // Where an expression contains only 'once through' body alternatives
+ // and no repeating ones, this op is used to return match failure.
+ OpMatchFailed
+ };
+
+ // This structure is used to hold the compiled opcode information,
+ // including reference back to the original PatternTerm/PatternAlternatives,
+ // and JIT compilation data structures.
+ struct YarrOp {
+ explicit YarrOp(PatternTerm* term)
+ : m_op(OpTerm)
+ , m_term(term)
+ , m_isDeadCode(false)
+ {
+ }
+
+ explicit YarrOp(YarrOpCode op)
+ : m_op(op)
+ , m_isDeadCode(false)
+ {
+ }
+
+ // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
+ YarrOpCode m_op;
+ PatternTerm* m_term;
+
+ // For alternatives, this holds the PatternAlternative and doubly linked
+ // references to this alternative's siblings. In the case of the
+ // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
+ // m_nextOp will reference the OpBodyAlternativeBegin node of the first
+ // repeating alternative.
+ PatternAlternative* m_alternative;
+ size_t m_previousOp;
+ size_t m_nextOp;
+
+ // Used to record a set of Jumps out of the generated code, typically
+ // used for jumps out to backtracking code, and a single reentry back
+ // into the code for a node (likely where a backtrack will trigger
+ // rematching).
+ Label m_reentry;
+ JumpList m_jumps;
+
+ // Used for backtracking when the prior alternative did not consume any
+ // characters but matched.
+ Jump m_zeroLengthMatch;
+
+ // This flag is used to null out the second pattern character, when
+ // two are fused to match a pair together.
+ bool m_isDeadCode;
+
+ // Currently used in the case of some of the more complex management of
+ // 'm_checked', to cache the offset used in this alternative, to avoid
+ // recalculating it.
+ int m_checkAdjust;
+
+ // Used by OpNestedAlternativeNext/End to hold the pointer to the
+ // value that will be pushed into the pattern's frame to return to,
+ // upon backtracking back into the disjunction.
+ DataLabelPtr m_returnAddress;
+ };
+
+ // BacktrackingState
+ // This class encapsulates information about the state of code generation
+ // whilst generating the code for backtracking, when a term fails to match.
+ // Upon entry to code generation of the backtracking code for a given node,
+ // the Backtracking state will hold references to all control flow sources
+ // that are outputs in need of further backtracking from the prior node
+ // generated (which is the subsequent operation in the regular expression,
+ // and in the m_ops Vector, since we generated backtracking backwards).
+ // These references to control flow take the form of:
+ // - A jump list of jumps, to be linked to code that will backtrack them
+ // further.
+ // - A set of DataLabelPtr values, to be populated with values to be
+ // treated effectively as return addresses backtracking into complex
+ // subpatterns.
+ // - A flag indicating that the current sequence of generated code up to
+ // this point requires backtracking.
+ class BacktrackingState {
+ public:
+ BacktrackingState()
+ : m_pendingFallthrough(false)
+ {
+ }
+
+ // Add a jump or jumps, a return address, or set the flag indicating
+ // that the current 'fallthrough' control flow requires backtracking.
+ void append(const Jump& jump)
+ {
+ m_laterFailures.append(jump);
+ }
+ void append(JumpList& jumpList)
+ {
+ m_laterFailures.append(jumpList);
+ }
+ void append(const DataLabelPtr& returnAddress)
+ {
+ m_pendingReturns.append(returnAddress);
+ }
+ void fallthrough()
+ {
+ ASSERT(!m_pendingFallthrough);
+ m_pendingFallthrough = true;
+ }
+
+ // These methods clear the backtracking state, either linking to the
+ // current location, a provided label, or copying the backtracking out
+ // to a JumpList. All actions may require code generation to take place,
+ // and as such are passed a pointer to the assembler.
+ void link(MacroAssembler* assembler)
+ {
+ if (m_pendingReturns.size()) {
+ Label here(assembler);
+ for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
+ m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
+ m_pendingReturns.clear();
+ }
+ m_laterFailures.link(assembler);
+ m_laterFailures.clear();
+ m_pendingFallthrough = false;
+ }
+ void linkTo(Label label, MacroAssembler* assembler)
+ {
+ if (m_pendingReturns.size()) {
+ for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
+ m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
+ m_pendingReturns.clear();
+ }
+ if (m_pendingFallthrough)
+ assembler->jump(label);
+ m_laterFailures.linkTo(label, assembler);
+ m_laterFailures.clear();
+ m_pendingFallthrough = false;
+ }
+ void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
+ {
+ if (m_pendingReturns.size()) {
+ Label here(assembler);
+ for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
+ m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
+ m_pendingReturns.clear();
+ m_pendingFallthrough = true;
+ }
+ if (m_pendingFallthrough)
+ jumpList.append(assembler->jump());
+ jumpList.append(m_laterFailures);
+ m_laterFailures.clear();
+ m_pendingFallthrough = false;
+ }
+
+ bool isEmpty()
+ {
+ return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
+ }
+
+ // Called at the end of code generation to link all return addresses.
+ void linkDataLabels(LinkBuffer& linkBuffer)
+ {
+ ASSERT(isEmpty());
+ for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
+ linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation));
+ }
+
+ private:
+ struct ReturnAddressRecord {
+ ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
+ : m_dataLabel(dataLabel)
+ , m_backtrackLocation(backtrackLocation)
+ {
+ }
+
+ DataLabelPtr m_dataLabel;
+ Label m_backtrackLocation;
+ };
+
+ JumpList m_laterFailures;
+ bool m_pendingFallthrough;
+ Vector<DataLabelPtr, 4> m_pendingReturns;
+ Vector<ReturnAddressRecord, 4> m_backtrackRecords;
+ };
+
+ // Generation methods:
+ // ===================
+
+ // This method provides a default implementation of backtracking common
+ // to many terms; terms commonly jump out of the forwards matching path
+ // on any failed conditions, and add these jumps to the m_jumps list. If
+ // no special handling is required we can often just backtrack to m_jumps.
+ void backtrackTermDefault(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ m_backtrackingState.append(op.m_jumps);
+ }
+
+ void generateAssertionBOL(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ if (m_pattern.m_multiline) {
+ const RegisterID character = regT0;
+
+ JumpList matchDest;
+ if (!term->inputPosition)
+ matchDest.append(branch32(Equal, index, Imm32(m_checked)));
+
+ readCharacter((term->inputPosition - m_checked) - 1, character);
+ matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
+ op.m_jumps.append(jump());
+
+ matchDest.link(this);
+ } else {
+ // Erk, really should poison out these alternatives early. :-/
+ if (term->inputPosition)
+ op.m_jumps.append(jump());
+ else
+ op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked)));
+ }
+ }
+ void backtrackAssertionBOL(size_t opIndex)
+ {
+ backtrackTermDefault(opIndex);
+ }
+
+ void generateAssertionEOL(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ if (m_pattern.m_multiline) {
+ const RegisterID character = regT0;
+
+ JumpList matchDest;
+ if (term->inputPosition == m_checked)
+ matchDest.append(atEndOfInput());
+
+ readCharacter(term->inputPosition - m_checked, character);
+ matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
+ op.m_jumps.append(jump());
+
+ matchDest.link(this);
+ } else {
+ if (term->inputPosition == m_checked)
+ op.m_jumps.append(notAtEndOfInput());
+ // Erk, really should poison out these alternatives early. :-/
+ else
+ op.m_jumps.append(jump());
+ }
+ }
+ void backtrackAssertionEOL(size_t opIndex)
+ {
+ backtrackTermDefault(opIndex);
+ }
+
+ // Also falls though on nextIsNotWordChar.
+ void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ const RegisterID character = regT0;
+
+ if (term->inputPosition == m_checked)
+ nextIsNotWordChar.append(atEndOfInput());
+
+ readCharacter((term->inputPosition - m_checked), character);
+ matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
+ }
+
+ void generateAssertionWordBoundary(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ const RegisterID character = regT0;
+
+ Jump atBegin;
+ JumpList matchDest;
+ if (!term->inputPosition)
+ atBegin = branch32(Equal, index, Imm32(m_checked));
+ readCharacter((term->inputPosition - m_checked) - 1, character);
+ matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
+ if (!term->inputPosition)
+ atBegin.link(this);
+
+ // We fall through to here if the last character was not a wordchar.
+ JumpList nonWordCharThenWordChar;
+ JumpList nonWordCharThenNonWordChar;
+ if (term->invert()) {
+ matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
+ nonWordCharThenWordChar.append(jump());
+ } else {
+ matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
+ nonWordCharThenNonWordChar.append(jump());
+ }
+ op.m_jumps.append(nonWordCharThenNonWordChar);
+
+ // We jump here if the last character was a wordchar.
+ matchDest.link(this);
+ JumpList wordCharThenWordChar;
+ JumpList wordCharThenNonWordChar;
+ if (term->invert()) {
+ matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
+ wordCharThenWordChar.append(jump());
+ } else {
+ matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
+ // This can fall-though!
+ }
+
+ op.m_jumps.append(wordCharThenWordChar);
+
+ nonWordCharThenWordChar.link(this);
+ wordCharThenNonWordChar.link(this);
+ }
+ void backtrackAssertionWordBoundary(size_t opIndex)
+ {
+ backtrackTermDefault(opIndex);
+ }
+
+ void generatePatternCharacterOnce(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+
+ if (op.m_isDeadCode)
+ return;
+
+ // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
+ // node, so there must always be at least one more node.
+ ASSERT(opIndex + 1 < m_ops.size());
+ YarrOp* nextOp = &m_ops[opIndex + 1];
+
+ PatternTerm* term = op.m_term;
+ UChar ch = term->patternCharacter;
+
+ if ((ch > 0xff) && (m_charSize == Char8)) {
+ // Have a 16 bit pattern character and an 8 bit string - short circuit
+ op.m_jumps.append(jump());
+ return;
+ }
+
+ const RegisterID character = regT0;
+ int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
+ unsigned ignoreCaseMask = 0;
+#if CPU(BIG_ENDIAN)
+ int allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
+#else
+ int allCharacters = ch;
+#endif
+ int numberCharacters;
+ int startTermPosition = term->inputPosition;
+
+ // For case-insesitive compares, non-ascii characters that have different
+ // upper & lower case representations are converted to a character class.
+ ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
+
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch))
+#if CPU(BIG_ENDIAN)
+ ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
+#else
+ ignoreCaseMask |= 32;
+#endif
+
+ for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
+ PatternTerm* nextTerm = nextOp->m_term;
+
+ if (nextTerm->type != PatternTerm::TypePatternCharacter
+ || nextTerm->quantityType != QuantifierFixedCount
+ || nextTerm->quantityCount != 1
+ || nextTerm->inputPosition != (startTermPosition + numberCharacters))
+ break;
+
+ nextOp->m_isDeadCode = true;
+
+#if CPU(BIG_ENDIAN)
+ int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
+#else
+ int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
+#endif
+
+ UChar currentCharacter = nextTerm->patternCharacter;
+
+ if ((currentCharacter > 0xff) && (m_charSize == Char8)) {
+ // Have a 16 bit pattern character and an 8 bit string - short circuit
+ op.m_jumps.append(jump());
+ return;
+ }
+
+ // For case-insesitive compares, non-ascii characters that have different
+ // upper & lower case representations are converted to a character class.
+ ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter));
+
+ allCharacters |= (currentCharacter << shiftAmount);
+
+ if ((m_pattern.m_ignoreCase) && (isASCIIAlpha(currentCharacter)))
+ ignoreCaseMask |= 32 << shiftAmount;
+ }
+
+ if (m_charSize == Char8) {
+ switch (numberCharacters) {
+ case 1:
+ op.m_jumps.append(jumpIfCharNotEquals(ch, startTermPosition - m_checked, character));
+ return;
+ case 2: {
+ BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
+ load16Unaligned(address, character);
+ break;
+ }
+ case 3: {
+ BaseIndex highAddress(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
+ load16Unaligned(highAddress, character);
+ if (ignoreCaseMask)
+ or32(Imm32(ignoreCaseMask), character);
+ op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask)));
+ op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, startTermPosition + 2 - m_checked, character));
+ return;
+ }
+ case 4: {
+ BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
+ load32WithUnalignedHalfWords(address, character);
+ break;
+ }
+ }
+ } else {
+ switch (numberCharacters) {
+ case 1:
+ op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
+ return;
+ case 2:
+ BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
+ load32WithUnalignedHalfWords(address, character);
+ break;
+ }
+ }
+
+ if (ignoreCaseMask)
+ or32(Imm32(ignoreCaseMask), character);
+ op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask)));
+ return;
+ }
+ void backtrackPatternCharacterOnce(size_t opIndex)
+ {
+ backtrackTermDefault(opIndex);
+ }
+
+ void generatePatternCharacterFixed(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+ UChar ch = term->patternCharacter;
+
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+
+ move(index, countRegister);
+ sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
+
+ Label loop(this);
+ BaseIndex address(input, countRegister, m_charScale, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).unsafeGet());
+
+ if (m_charSize == Char8)
+ load8(address, character);
+ else
+ load16(address, character);
+
+ // For case-insesitive compares, non-ascii characters that have different
+ // upper & lower case representations are converted to a character class.
+ ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ or32(TrustedImm32(0x20), character);
+ ch |= 0x20;
+ }
+
+ op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
+ add32(TrustedImm32(1), countRegister);
+ branch32(NotEqual, countRegister, index).linkTo(loop, this);
+ }
+ void backtrackPatternCharacterFixed(size_t opIndex)
+ {
+ backtrackTermDefault(opIndex);
+ }
+
+ void generatePatternCharacterGreedy(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+ UChar ch = term->patternCharacter;
+
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+
+ move(TrustedImm32(0), countRegister);
+
+ // Unless have a 16 bit pattern character and an 8 bit string - short circuit
+ if (!((ch > 0xff) && (m_charSize == Char8))) {
+ JumpList failures;
+ Label loop(this);
+ failures.append(atEndOfInput());
+ failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
+
+ add32(TrustedImm32(1), countRegister);
+ add32(TrustedImm32(1), index);
+ if (term->quantityCount == quantifyInfinite)
+ jump(loop);
+ else
+ branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
+
+ failures.link(this);
+ }
+ op.m_reentry = label();
+
+ storeToFrame(countRegister, term->frameLocation);
+ }
+ void backtrackPatternCharacterGreedy(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ const RegisterID countRegister = regT1;
+
+ m_backtrackingState.link(this);
+
+ loadFromFrame(term->frameLocation, countRegister);
+ m_backtrackingState.append(branchTest32(Zero, countRegister));
+ sub32(TrustedImm32(1), countRegister);
+ sub32(TrustedImm32(1), index);
+ jump(op.m_reentry);
+ }
+
+ void generatePatternCharacterNonGreedy(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ const RegisterID countRegister = regT1;
+
+ move(TrustedImm32(0), countRegister);
+ op.m_reentry = label();
+ storeToFrame(countRegister, term->frameLocation);
+ }
+ void backtrackPatternCharacterNonGreedy(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+ UChar ch = term->patternCharacter;
+
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+
+ m_backtrackingState.link(this);
+
+ loadFromFrame(term->frameLocation, countRegister);
+
+ // Unless have a 16 bit pattern character and an 8 bit string - short circuit
+ if (!((ch > 0xff) && (m_charSize == Char8))) {
+ JumpList nonGreedyFailures;
+ nonGreedyFailures.append(atEndOfInput());
+ if (term->quantityCount != quantifyInfinite)
+ nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
+ nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
+
+ add32(TrustedImm32(1), countRegister);
+ add32(TrustedImm32(1), index);
+
+ jump(op.m_reentry);
+ nonGreedyFailures.link(this);
+ }
+
+ sub32(countRegister, index);
+ m_backtrackingState.fallthrough();
+ }
+
+ void generateCharacterClassOnce(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ const RegisterID character = regT0;
+
+ JumpList matchDest;
+ readCharacter(term->inputPosition - m_checked, character);
+ matchCharacterClass(character, matchDest, term->characterClass);
+
+ if (term->invert())
+ op.m_jumps.append(matchDest);
+ else {
+ op.m_jumps.append(jump());
+ matchDest.link(this);
+ }
+ }
+ void backtrackCharacterClassOnce(size_t opIndex)
+ {
+ backtrackTermDefault(opIndex);
+ }
+
+ void generateCharacterClassFixed(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+
+ move(index, countRegister);
+ sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
+
+ Label loop(this);
+ JumpList matchDest;
+ if (m_charSize == Char8)
+ load8(BaseIndex(input, countRegister, TimesOne, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(char))).unsafeGet()), character);
+ else
+ load16(BaseIndex(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character);
+ matchCharacterClass(character, matchDest, term->characterClass);
+
+ if (term->invert())
+ op.m_jumps.append(matchDest);
+ else {
+ op.m_jumps.append(jump());
+ matchDest.link(this);
+ }
+
+ add32(TrustedImm32(1), countRegister);
+ branch32(NotEqual, countRegister, index).linkTo(loop, this);
+ }
+ void backtrackCharacterClassFixed(size_t opIndex)
+ {
+ backtrackTermDefault(opIndex);
+ }
+
+ void generateCharacterClassGreedy(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+
+ move(TrustedImm32(0), countRegister);
+
+ JumpList failures;
+ Label loop(this);
+ failures.append(atEndOfInput());
+
+ if (term->invert()) {
+ readCharacter(term->inputPosition - m_checked, character);
+ matchCharacterClass(character, failures, term->characterClass);
+ } else {
+ JumpList matchDest;
+ readCharacter(term->inputPosition - m_checked, character);
+ matchCharacterClass(character, matchDest, term->characterClass);
+ failures.append(jump());
+ matchDest.link(this);
+ }
+
+ add32(TrustedImm32(1), countRegister);
+ add32(TrustedImm32(1), index);
+ if (term->quantityCount != quantifyInfinite) {
+ branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
+ failures.append(jump());
+ } else
+ jump(loop);
+
+ failures.link(this);
+ op.m_reentry = label();
+
+ storeToFrame(countRegister, term->frameLocation);
+ }
+ void backtrackCharacterClassGreedy(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ const RegisterID countRegister = regT1;
+
+ m_backtrackingState.link(this);
+
+ loadFromFrame(term->frameLocation, countRegister);
+ m_backtrackingState.append(branchTest32(Zero, countRegister));
+ sub32(TrustedImm32(1), countRegister);
+ sub32(TrustedImm32(1), index);
+ jump(op.m_reentry);
+ }
+
+ void generateCharacterClassNonGreedy(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ const RegisterID countRegister = regT1;
+
+ move(TrustedImm32(0), countRegister);
+ op.m_reentry = label();
+ storeToFrame(countRegister, term->frameLocation);
+ }
+ void backtrackCharacterClassNonGreedy(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+
+ JumpList nonGreedyFailures;
+
+ m_backtrackingState.link(this);
+
+ loadFromFrame(term->frameLocation, countRegister);
+
+ nonGreedyFailures.append(atEndOfInput());
+ nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
+
+ JumpList matchDest;
+ readCharacter(term->inputPosition - m_checked, character);
+ matchCharacterClass(character, matchDest, term->characterClass);
+
+ if (term->invert())
+ nonGreedyFailures.append(matchDest);
+ else {
+ nonGreedyFailures.append(jump());
+ matchDest.link(this);
+ }
+
+ add32(TrustedImm32(1), countRegister);
+ add32(TrustedImm32(1), index);
+
+ jump(op.m_reentry);
+
+ nonGreedyFailures.link(this);
+ sub32(countRegister, index);
+ m_backtrackingState.fallthrough();
+ }
+
+ void generateDotStarEnclosure(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ const RegisterID character = regT0;
+ const RegisterID matchPos = regT1;
+
+ JumpList foundBeginningNewLine;
+ JumpList saveStartIndex;
+ JumpList foundEndingNewLine;
+
+ ASSERT(!m_pattern.m_body->m_hasFixedSize);
+ getMatchStart(matchPos);
+
+ saveStartIndex.append(branchTest32(Zero, matchPos));
+ Label findBOLLoop(this);
+ sub32(TrustedImm32(1), matchPos);
+ if (m_charSize == Char8)
+ load8(BaseIndex(input, matchPos, TimesOne, 0), character);
+ else
+ load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
+ matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
+ branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this);
+ saveStartIndex.append(jump());
+
+ foundBeginningNewLine.link(this);
+ add32(TrustedImm32(1), matchPos); // Advance past newline
+ saveStartIndex.link(this);
+
+ if (!m_pattern.m_multiline && term->anchors.bolAnchor)
+ op.m_jumps.append(branchTest32(NonZero, matchPos));
+
+ ASSERT(!m_pattern.m_body->m_hasFixedSize);
+ setMatchStart(matchPos);
+
+ move(index, matchPos);
+
+ Label findEOLLoop(this);
+ foundEndingNewLine.append(branch32(Equal, matchPos, length));
+ if (m_charSize == Char8)
+ load8(BaseIndex(input, matchPos, TimesOne, 0), character);
+ else
+ load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
+ matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
+ add32(TrustedImm32(1), matchPos);
+ jump(findEOLLoop);
+
+ foundEndingNewLine.link(this);
+
+ if (!m_pattern.m_multiline && term->anchors.eolAnchor)
+ op.m_jumps.append(branch32(NotEqual, matchPos, length));
+
+ move(matchPos, index);
+ }
+
+ void backtrackDotStarEnclosure(size_t opIndex)
+ {
+ backtrackTermDefault(opIndex);
+ }
+
+ // Code generation/backtracking for simple terms
+ // (pattern characters, character classes, and assertions).
+ // These methods farm out work to the set of functions above.
+ void generateTerm(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ switch (term->type) {
+ case PatternTerm::TypePatternCharacter:
+ switch (term->quantityType) {
+ case QuantifierFixedCount:
+ if (term->quantityCount == 1)
+ generatePatternCharacterOnce(opIndex);
+ else
+ generatePatternCharacterFixed(opIndex);
+ break;
+ case QuantifierGreedy:
+ generatePatternCharacterGreedy(opIndex);
+ break;
+ case QuantifierNonGreedy:
+ generatePatternCharacterNonGreedy(opIndex);
+ break;
+ }
+ break;
+
+ case PatternTerm::TypeCharacterClass:
+ switch (term->quantityType) {
+ case QuantifierFixedCount:
+ if (term->quantityCount == 1)
+ generateCharacterClassOnce(opIndex);
+ else
+ generateCharacterClassFixed(opIndex);
+ break;
+ case QuantifierGreedy:
+ generateCharacterClassGreedy(opIndex);
+ break;
+ case QuantifierNonGreedy:
+ generateCharacterClassNonGreedy(opIndex);
+ break;
+ }
+ break;
+
+ case PatternTerm::TypeAssertionBOL:
+ generateAssertionBOL(opIndex);
+ break;
+
+ case PatternTerm::TypeAssertionEOL:
+ generateAssertionEOL(opIndex);
+ break;
+
+ case PatternTerm::TypeAssertionWordBoundary:
+ generateAssertionWordBoundary(opIndex);
+ break;
+
+ case PatternTerm::TypeForwardReference:
+ break;
+
+ case PatternTerm::TypeParenthesesSubpattern:
+ case PatternTerm::TypeParentheticalAssertion:
+ RELEASE_ASSERT_NOT_REACHED();
+ case PatternTerm::TypeBackReference:
+ m_shouldFallBack = true;
+ break;
+ case PatternTerm::TypeDotStarEnclosure:
+ generateDotStarEnclosure(opIndex);
+ break;
+ }
+ }
+ void backtrackTerm(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ switch (term->type) {
+ case PatternTerm::TypePatternCharacter:
+ switch (term->quantityType) {
+ case QuantifierFixedCount:
+ if (term->quantityCount == 1)
+ backtrackPatternCharacterOnce(opIndex);
+ else
+ backtrackPatternCharacterFixed(opIndex);
+ break;
+ case QuantifierGreedy:
+ backtrackPatternCharacterGreedy(opIndex);
+ break;
+ case QuantifierNonGreedy:
+ backtrackPatternCharacterNonGreedy(opIndex);
+ break;
+ }
+ break;
+
+ case PatternTerm::TypeCharacterClass:
+ switch (term->quantityType) {
+ case QuantifierFixedCount:
+ if (term->quantityCount == 1)
+ backtrackCharacterClassOnce(opIndex);
+ else
+ backtrackCharacterClassFixed(opIndex);
+ break;
+ case QuantifierGreedy:
+ backtrackCharacterClassGreedy(opIndex);
+ break;
+ case QuantifierNonGreedy:
+ backtrackCharacterClassNonGreedy(opIndex);
+ break;
+ }
+ break;
+
+ case PatternTerm::TypeAssertionBOL:
+ backtrackAssertionBOL(opIndex);
+ break;
+
+ case PatternTerm::TypeAssertionEOL:
+ backtrackAssertionEOL(opIndex);
+ break;
+
+ case PatternTerm::TypeAssertionWordBoundary:
+ backtrackAssertionWordBoundary(opIndex);
+ break;
+
+ case PatternTerm::TypeForwardReference:
+ break;
+
+ case PatternTerm::TypeParenthesesSubpattern:
+ case PatternTerm::TypeParentheticalAssertion:
+ RELEASE_ASSERT_NOT_REACHED();
+
+ case PatternTerm::TypeDotStarEnclosure:
+ backtrackDotStarEnclosure(opIndex);
+ break;
+
+ case PatternTerm::TypeBackReference:
+ m_shouldFallBack = true;
+ break;
+ }
+ }
+
+ void generate()
+ {
+ // Forwards generate the matching code.
+ ASSERT(m_ops.size());
+ size_t opIndex = 0;
+
+ do {
+ YarrOp& op = m_ops[opIndex];
+ switch (op.m_op) {
+
+ case OpTerm:
+ generateTerm(opIndex);
+ break;
+
+ // OpBodyAlternativeBegin/Next/End
+ //
+ // These nodes wrap the set of alternatives in the body of the regular expression.
+ // There may be either one or two chains of OpBodyAlternative nodes, one representing
+ // the 'once through' sequence of alternatives (if any exist), and one representing
+ // the repeating alternatives (again, if any exist).
+ //
+ // Upon normal entry to the Begin alternative, we will check that input is available.
+ // Reentry to the Begin alternative will take place after the check has taken place,
+ // and will assume that the input position has already been progressed as appropriate.
+ //
+ // Entry to subsequent Next/End alternatives occurs when the prior alternative has
+ // successfully completed a match - return a success state from JIT code.
+ //
+ // Next alternatives allow for reentry optimized to suit backtracking from its
+ // preceding alternative. It expects the input position to still be set to a position
+ // appropriate to its predecessor, and it will only perform an input check if the
+ // predecessor had a minimum size less than its own.
+ //
+ // In the case 'once through' expressions, the End node will also have a reentry
+ // point to jump to when the last alternative fails. Again, this expects the input
+ // position to still reflect that expected by the prior alternative.
+ case OpBodyAlternativeBegin: {
+ PatternAlternative* alternative = op.m_alternative;
+
+ // Upon entry at the head of the set of alternatives, check if input is available
+ // to run the first alternative. (This progresses the input position).
+ op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
+ // We will reenter after the check, and assume the input position to have been
+ // set as appropriate to this alternative.
+ op.m_reentry = label();
+
+ m_checked += alternative->m_minimumSize;
+ break;
+ }
+ case OpBodyAlternativeNext:
+ case OpBodyAlternativeEnd: {
+ PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
+ PatternAlternative* alternative = op.m_alternative;
+
+ // If we get here, the prior alternative matched - return success.
+
+ // Adjust the stack pointer to remove the pattern's frame.
+ removeCallFrame();
+
+ // Load appropriate values into the return register and the first output
+ // slot, and return. In the case of pattern with a fixed size, we will
+ // not have yet set the value in the first
+ ASSERT(index != returnRegister);
+ if (m_pattern.m_body->m_hasFixedSize) {
+ move(index, returnRegister);
+ if (priorAlternative->m_minimumSize)
+ sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
+ if (compileMode == IncludeSubpatterns)
+ store32(returnRegister, output);
+ } else
+ getMatchStart(returnRegister);
+ if (compileMode == IncludeSubpatterns)
+ store32(index, Address(output, 4));
+ move(index, returnRegister2);
+
+ generateReturn();
+
+ // This is the divide between the tail of the prior alternative, above, and
+ // the head of the subsequent alternative, below.
+
+ if (op.m_op == OpBodyAlternativeNext) {
+ // This is the reentry point for the Next alternative. We expect any code
+ // that jumps here to do so with the input position matching that of the
+ // PRIOR alteranative, and we will only check input availability if we
+ // need to progress it forwards.
+ op.m_reentry = label();
+ if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
+ add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
+ op.m_jumps.append(jumpIfNoAvailableInput());
+ } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
+ sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
+ } else if (op.m_nextOp == notFound) {
+ // This is the reentry point for the End of 'once through' alternatives,
+ // jumped to when the last alternative fails to match.
+ op.m_reentry = label();
+ sub32(Imm32(priorAlternative->m_minimumSize), index);
+ }
+
+ if (op.m_op == OpBodyAlternativeNext)
+ m_checked += alternative->m_minimumSize;
+ m_checked -= priorAlternative->m_minimumSize;
+ break;
+ }
+
+ // OpSimpleNestedAlternativeBegin/Next/End
+ // OpNestedAlternativeBegin/Next/End
+ //
+ // These nodes are used to handle sets of alternatives that are nested within
+ // subpatterns and parenthetical assertions. The 'simple' forms are used where
+ // we do not need to be able to backtrack back into any alternative other than
+ // the last, the normal forms allow backtracking into any alternative.
+ //
+ // Each Begin/Next node is responsible for planting an input check to ensure
+ // sufficient input is available on entry. Next nodes additionally need to
+ // jump to the end - Next nodes use the End node's m_jumps list to hold this
+ // set of jumps.
+ //
+ // In the non-simple forms, successful alternative matches must store a
+ // 'return address' using a DataLabelPtr, used to store the address to jump
+ // to when backtracking, to get to the code for the appropriate alternative.
+ case OpSimpleNestedAlternativeBegin:
+ case OpNestedAlternativeBegin: {
+ PatternTerm* term = op.m_term;
+ PatternAlternative* alternative = op.m_alternative;
+ PatternDisjunction* disjunction = term->parentheses.disjunction;
+
+ // Calculate how much input we need to check for, and if non-zero check.
+ op.m_checkAdjust = alternative->m_minimumSize;
+ if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
+ op.m_checkAdjust -= disjunction->m_minimumSize;
+ if (op.m_checkAdjust)
+ op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
+
+ m_checked += op.m_checkAdjust;
+ break;
+ }
+ case OpSimpleNestedAlternativeNext:
+ case OpNestedAlternativeNext: {
+ PatternTerm* term = op.m_term;
+ PatternAlternative* alternative = op.m_alternative;
+ PatternDisjunction* disjunction = term->parentheses.disjunction;
+
+ // In the non-simple case, store a 'return address' so we can backtrack correctly.
+ if (op.m_op == OpNestedAlternativeNext) {
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ unsigned alternativeFrameLocation = parenthesesFrameLocation;
+ if (term->quantityType != QuantifierFixedCount)
+ alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+ op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
+ }
+
+ if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
+ // If the previous alternative matched without consuming characters then
+ // backtrack to try to match while consumming some input.
+ op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
+ }
+
+ // If we reach here then the last alternative has matched - jump to the
+ // End node, to skip over any further alternatives.
+ //
+ // FIXME: this is logically O(N^2) (though N can be expected to be very
+ // small). We could avoid this either by adding an extra jump to the JIT
+ // data structures, or by making backtracking code that jumps to Next
+ // alternatives are responsible for checking that input is available (if
+ // we didn't need to plant the input checks, then m_jumps would be free).
+ YarrOp* endOp = &m_ops[op.m_nextOp];
+ while (endOp->m_nextOp != notFound) {
+ ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
+ endOp = &m_ops[endOp->m_nextOp];
+ }
+ ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
+ endOp->m_jumps.append(jump());
+
+ // This is the entry point for the next alternative.
+ op.m_reentry = label();
+
+ // Calculate how much input we need to check for, and if non-zero check.
+ op.m_checkAdjust = alternative->m_minimumSize;
+ if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
+ op.m_checkAdjust -= disjunction->m_minimumSize;
+ if (op.m_checkAdjust)
+ op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
+
+ YarrOp& lastOp = m_ops[op.m_previousOp];
+ m_checked -= lastOp.m_checkAdjust;
+ m_checked += op.m_checkAdjust;
+ break;
+ }
+ case OpSimpleNestedAlternativeEnd:
+ case OpNestedAlternativeEnd: {
+ PatternTerm* term = op.m_term;
+
+ // In the non-simple case, store a 'return address' so we can backtrack correctly.
+ if (op.m_op == OpNestedAlternativeEnd) {
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ unsigned alternativeFrameLocation = parenthesesFrameLocation;
+ if (term->quantityType != QuantifierFixedCount)
+ alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+ op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
+ }
+
+ if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
+ // If the previous alternative matched without consuming characters then
+ // backtrack to try to match while consumming some input.
+ op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
+ }
+
+ // If this set of alternatives contains more than one alternative,
+ // then the Next nodes will have planted jumps to the End, and added
+ // them to this node's m_jumps list.
+ op.m_jumps.link(this);
+ op.m_jumps.clear();
+
+ YarrOp& lastOp = m_ops[op.m_previousOp];
+ m_checked -= lastOp.m_checkAdjust;
+ break;
+ }
+
+ // OpParenthesesSubpatternOnceBegin/End
+ //
+ // These nodes support (optionally) capturing subpatterns, that have a
+ // quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
+ case OpParenthesesSubpatternOnceBegin: {
+ PatternTerm* term = op.m_term;
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ const RegisterID indexTemporary = regT0;
+ ASSERT(term->quantityCount == 1);
+
+ // Upon entry to a Greedy quantified set of parenthese store the index.
+ // We'll use this for two purposes:
+ // - To indicate which iteration we are on of mathing the remainder of
+ // the expression after the parentheses - the first, including the
+ // match within the parentheses, or the second having skipped over them.
+ // - To check for empty matches, which must be rejected.
+ //
+ // At the head of a NonGreedy set of parentheses we'll immediately set the
+ // value on the stack to -1 (indicating a match skipping the subpattern),
+ // and plant a jump to the end. We'll also plant a label to backtrack to
+ // to reenter the subpattern later, with a store to set up index on the
+ // second iteration.
+ //
+ // FIXME: for capturing parens, could use the index in the capture array?
+ if (term->quantityType == QuantifierGreedy)
+ storeToFrame(index, parenthesesFrameLocation);
+ else if (term->quantityType == QuantifierNonGreedy) {
+ storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
+ op.m_jumps.append(jump());
+ op.m_reentry = label();
+ storeToFrame(index, parenthesesFrameLocation);
+ }
+
+ // If the parenthese are capturing, store the starting index value to the
+ // captures array, offsetting as necessary.
+ //
+ // FIXME: could avoid offsetting this value in JIT code, apply
+ // offsets only afterwards, at the point the results array is
+ // being accessed.
+ if (term->capture() && compileMode == IncludeSubpatterns) {
+ int inputOffset = term->inputPosition - m_checked;
+ if (term->quantityType == QuantifierFixedCount)
+ inputOffset -= term->parentheses.disjunction->m_minimumSize;
+ if (inputOffset) {
+ move(index, indexTemporary);
+ add32(Imm32(inputOffset), indexTemporary);
+ setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
+ } else
+ setSubpatternStart(index, term->parentheses.subpatternId);
+ }
+ break;
+ }
+ case OpParenthesesSubpatternOnceEnd: {
+ PatternTerm* term = op.m_term;
+ const RegisterID indexTemporary = regT0;
+ ASSERT(term->quantityCount == 1);
+
+#ifndef NDEBUG
+ // Runtime ASSERT to make sure that the nested alternative handled the
+ // "no input consumed" check.
+ if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
+ Jump pastBreakpoint;
+ pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
+ breakpoint();
+ pastBreakpoint.link(this);
+ }
+#endif
+
+ // If the parenthese are capturing, store the ending index value to the
+ // captures array, offsetting as necessary.
+ //
+ // FIXME: could avoid offsetting this value in JIT code, apply
+ // offsets only afterwards, at the point the results array is
+ // being accessed.
+ if (term->capture() && compileMode == IncludeSubpatterns) {
+ int inputOffset = term->inputPosition - m_checked;
+ if (inputOffset) {
+ move(index, indexTemporary);
+ add32(Imm32(inputOffset), indexTemporary);
+ setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
+ } else
+ setSubpatternEnd(index, term->parentheses.subpatternId);
+ }
+
+ // If the parentheses are quantified Greedy then add a label to jump back
+ // to if get a failed match from after the parentheses. For NonGreedy
+ // parentheses, link the jump from before the subpattern to here.
+ if (term->quantityType == QuantifierGreedy)
+ op.m_reentry = label();
+ else if (term->quantityType == QuantifierNonGreedy) {
+ YarrOp& beginOp = m_ops[op.m_previousOp];
+ beginOp.m_jumps.link(this);
+ }
+ break;
+ }
+
+ // OpParenthesesSubpatternTerminalBegin/End
+ case OpParenthesesSubpatternTerminalBegin: {
+ PatternTerm* term = op.m_term;
+ ASSERT(term->quantityType == QuantifierGreedy);
+ ASSERT(term->quantityCount == quantifyInfinite);
+ ASSERT(!term->capture());
+
+ // Upon entry set a label to loop back to.
+ op.m_reentry = label();
+
+ // Store the start index of the current match; we need to reject zero
+ // length matches.
+ storeToFrame(index, term->frameLocation);
+ break;
+ }
+ case OpParenthesesSubpatternTerminalEnd: {
+ YarrOp& beginOp = m_ops[op.m_previousOp];
+#ifndef NDEBUG
+ PatternTerm* term = op.m_term;
+
+ // Runtime ASSERT to make sure that the nested alternative handled the
+ // "no input consumed" check.
+ Jump pastBreakpoint;
+ pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
+ breakpoint();
+ pastBreakpoint.link(this);
+#endif
+
+ // We know that the match is non-zero, we can accept it and
+ // loop back up to the head of the subpattern.
+ jump(beginOp.m_reentry);
+
+ // This is the entry point to jump to when we stop matching - we will
+ // do so once the subpattern cannot match any more.
+ op.m_reentry = label();
+ break;
+ }
+
+ // OpParentheticalAssertionBegin/End
+ case OpParentheticalAssertionBegin: {
+ PatternTerm* term = op.m_term;
+
+ // Store the current index - assertions should not update index, so
+ // we will need to restore it upon a successful match.
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ storeToFrame(index, parenthesesFrameLocation);
+
+ // Check
+ op.m_checkAdjust = m_checked - term->inputPosition;
+ if (op.m_checkAdjust)
+ sub32(Imm32(op.m_checkAdjust), index);
+
+ m_checked -= op.m_checkAdjust;
+ break;
+ }
+ case OpParentheticalAssertionEnd: {
+ PatternTerm* term = op.m_term;
+
+ // Restore the input index value.
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ loadFromFrame(parenthesesFrameLocation, index);
+
+ // If inverted, a successful match of the assertion must be treated
+ // as a failure, so jump to backtracking.
+ if (term->invert()) {
+ op.m_jumps.append(jump());
+ op.m_reentry = label();
+ }
+
+ YarrOp& lastOp = m_ops[op.m_previousOp];
+ m_checked += lastOp.m_checkAdjust;
+ break;
+ }
+
+ case OpMatchFailed:
+ removeCallFrame();
+ move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
+ move(TrustedImm32(0), returnRegister2);
+ generateReturn();
+ break;
+ }
+
+ ++opIndex;
+ } while (opIndex < m_ops.size());
+ }
+
+ void backtrack()
+ {
+ // Backwards generate the backtracking code.
+ size_t opIndex = m_ops.size();
+ ASSERT(opIndex);
+
+ do {
+ --opIndex;
+ YarrOp& op = m_ops[opIndex];
+ switch (op.m_op) {
+
+ case OpTerm:
+ backtrackTerm(opIndex);
+ break;
+
+ // OpBodyAlternativeBegin/Next/End
+ //
+ // For each Begin/Next node representing an alternative, we need to decide what to do
+ // in two circumstances:
+ // - If we backtrack back into this node, from within the alternative.
+ // - If the input check at the head of the alternative fails (if this exists).
+ //
+ // We treat these two cases differently since in the former case we have slightly
+ // more information - since we are backtracking out of a prior alternative we know
+ // that at least enough input was available to run it. For example, given the regular
+ // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
+ // character match of 'a'), then we need not perform an additional input availability
+ // check before running the second alternative.
+ //
+ // Backtracking required differs for the last alternative, which in the case of the
+ // repeating set of alternatives must loop. The code generated for the last alternative
+ // will also be used to handle all input check failures from any prior alternatives -
+ // these require similar functionality, in seeking the next available alternative for
+ // which there is sufficient input.
+ //
+ // Since backtracking of all other alternatives simply requires us to link backtracks
+ // to the reentry point for the subsequent alternative, we will only be generating any
+ // code when backtracking the last alternative.
+ case OpBodyAlternativeBegin:
+ case OpBodyAlternativeNext: {
+ PatternAlternative* alternative = op.m_alternative;
+
+ if (op.m_op == OpBodyAlternativeNext) {
+ PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
+ m_checked += priorAlternative->m_minimumSize;
+ }
+ m_checked -= alternative->m_minimumSize;
+
+ // Is this the last alternative? If not, then if we backtrack to this point we just
+ // need to jump to try to match the next alternative.
+ if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
+ m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
+ break;
+ }
+ YarrOp& endOp = m_ops[op.m_nextOp];
+
+ YarrOp* beginOp = &op;
+ while (beginOp->m_op != OpBodyAlternativeBegin) {
+ ASSERT(beginOp->m_op == OpBodyAlternativeNext);
+ beginOp = &m_ops[beginOp->m_previousOp];
+ }
+
+ bool onceThrough = endOp.m_nextOp == notFound;
+
+ // First, generate code to handle cases where we backtrack out of an attempted match
+ // of the last alternative. If this is a 'once through' set of alternatives then we
+ // have nothing to do - link this straight through to the End.
+ if (onceThrough)
+ m_backtrackingState.linkTo(endOp.m_reentry, this);
+ else {
+ // If we don't need to move the input poistion, and the pattern has a fixed size
+ // (in which case we omit the store of the start index until the pattern has matched)
+ // then we can just link the backtrack out of the last alternative straight to the
+ // head of the first alternative.
+ if (m_pattern.m_body->m_hasFixedSize
+ && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
+ && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
+ m_backtrackingState.linkTo(beginOp->m_reentry, this);
+ else {
+ // We need to generate a trampoline of code to execute before looping back
+ // around to the first alternative.
+ m_backtrackingState.link(this);
+
+ // If the pattern size is not fixed, then store the start index, for use if we match.
+ if (!m_pattern.m_body->m_hasFixedSize) {
+ if (alternative->m_minimumSize == 1)
+ setMatchStart(index);
+ else {
+ move(index, regT0);
+ if (alternative->m_minimumSize)
+ sub32(Imm32(alternative->m_minimumSize - 1), regT0);
+ else
+ add32(TrustedImm32(1), regT0);
+ setMatchStart(regT0);
+ }
+ }
+
+ // Generate code to loop. Check whether the last alternative is longer than the
+ // first (e.g. /a|xy/ or /a|xyz/).
+ if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
+ // We want to loop, and increment input position. If the delta is 1, it is
+ // already correctly incremented, if more than one then decrement as appropriate.
+ unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
+ ASSERT(delta);
+ if (delta != 1)
+ sub32(Imm32(delta - 1), index);
+ jump(beginOp->m_reentry);
+ } else {
+ // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
+ // be sufficent input available to handle this, so just fall through.
+ unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
+ if (delta != 0xFFFFFFFFu) {
+ // We need to check input because we are incrementing the input.
+ add32(Imm32(delta + 1), index);
+ checkInput().linkTo(beginOp->m_reentry, this);
+ }
+ }
+ }
+ }
+
+ // We can reach this point in the code in two ways:
+ // - Fallthrough from the code above (a repeating alternative backtracked out of its
+ // last alternative, and did not have sufficent input to run the first).
+ // - We will loop back up to the following label when a releating alternative loops,
+ // following a failed input check.
+ //
+ // Either way, we have just failed the input check for the first alternative.
+ Label firstInputCheckFailed(this);
+
+ // Generate code to handle input check failures from alternatives except the last.
+ // prevOp is the alternative we're handling a bail out from (initially Begin), and
+ // nextOp is the alternative we will be attempting to reenter into.
+ //
+ // We will link input check failures from the forwards matching path back to the code
+ // that can handle them.
+ YarrOp* prevOp = beginOp;
+ YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
+ while (nextOp->m_op != OpBodyAlternativeEnd) {
+ prevOp->m_jumps.link(this);
+
+ // We only get here if an input check fails, it is only worth checking again
+ // if the next alternative has a minimum size less than the last.
+ if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
+ // FIXME: if we added an extra label to YarrOp, we could avoid needing to
+ // subtract delta back out, and reduce this code. Should performance test
+ // the benefit of this.
+ unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
+ sub32(Imm32(delta), index);
+ Jump fail = jumpIfNoAvailableInput();
+ add32(Imm32(delta), index);
+ jump(nextOp->m_reentry);
+ fail.link(this);
+ } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
+ add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
+ prevOp = nextOp;
+ nextOp = &m_ops[nextOp->m_nextOp];
+ }
+
+ // We fall through to here if there is insufficient input to run the last alternative.
+
+ // If there is insufficient input to run the last alternative, then for 'once through'
+ // alternatives we are done - just jump back up into the forwards matching path at the End.
+ if (onceThrough) {
+ op.m_jumps.linkTo(endOp.m_reentry, this);
+ jump(endOp.m_reentry);
+ break;
+ }
+
+ // For repeating alternatives, link any input check failure from the last alternative to
+ // this point.
+ op.m_jumps.link(this);
+
+ bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
+
+ // Check for cases where input position is already incremented by 1 for the last
+ // alternative (this is particularly useful where the minimum size of the body
+ // disjunction is 0, e.g. /a*|b/).
+ if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
+ // index is already incremented by 1, so just store it now!
+ setMatchStart(index);
+ needsToUpdateMatchStart = false;
+ }
+
+ // Check whether there is sufficient input to loop. Increment the input position by
+ // one, and check. Also add in the minimum disjunction size before checking - there
+ // is no point in looping if we're just going to fail all the input checks around
+ // the next iteration.
+ ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
+ if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
+ // If the last alternative had the same minimum size as the disjunction,
+ // just simply increment input pos by 1, no adjustment based on minimum size.
+ add32(TrustedImm32(1), index);
+ } else {
+ // If the minumum for the last alternative was one greater than than that
+ // for the disjunction, we're already progressed by 1, nothing to do!
+ unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
+ if (delta)
+ sub32(Imm32(delta), index);
+ }
+ Jump matchFailed = jumpIfNoAvailableInput();
+
+ if (needsToUpdateMatchStart) {
+ if (!m_pattern.m_body->m_minimumSize)
+ setMatchStart(index);
+ else {
+ move(index, regT0);
+ sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
+ setMatchStart(regT0);
+ }
+ }
+
+ // Calculate how much more input the first alternative requires than the minimum
+ // for the body as a whole. If no more is needed then we dont need an additional
+ // input check here - jump straight back up to the start of the first alternative.
+ if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
+ jump(beginOp->m_reentry);
+ else {
+ if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
+ add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
+ else
+ sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
+ checkInput().linkTo(beginOp->m_reentry, this);
+ jump(firstInputCheckFailed);
+ }
+
+ // We jump to here if we iterate to the point that there is insufficient input to
+ // run any matches, and need to return a failure state from JIT code.
+ matchFailed.link(this);
+
+ removeCallFrame();
+ move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
+ move(TrustedImm32(0), returnRegister2);
+ generateReturn();
+ break;
+ }
+ case OpBodyAlternativeEnd: {
+ // We should never backtrack back into a body disjunction.
+ ASSERT(m_backtrackingState.isEmpty());
+
+ PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
+ m_checked += priorAlternative->m_minimumSize;
+ break;
+ }
+
+ // OpSimpleNestedAlternativeBegin/Next/End
+ // OpNestedAlternativeBegin/Next/End
+ //
+ // Generate code for when we backtrack back out of an alternative into
+ // a Begin or Next node, or when the entry input count check fails. If
+ // there are more alternatives we need to jump to the next alternative,
+ // if not we backtrack back out of the current set of parentheses.
+ //
+ // In the case of non-simple nested assertions we need to also link the
+ // 'return address' appropriately to backtrack back out into the correct
+ // alternative.
+ case OpSimpleNestedAlternativeBegin:
+ case OpSimpleNestedAlternativeNext:
+ case OpNestedAlternativeBegin:
+ case OpNestedAlternativeNext: {
+ YarrOp& nextOp = m_ops[op.m_nextOp];
+ bool isBegin = op.m_previousOp == notFound;
+ bool isLastAlternative = nextOp.m_nextOp == notFound;
+ ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
+ ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
+
+ // Treat an input check failure the same as a failed match.
+ m_backtrackingState.append(op.m_jumps);
+
+ // Set the backtracks to jump to the appropriate place. We may need
+ // to link the backtracks in one of three different way depending on
+ // the type of alternative we are dealing with:
+ // - A single alternative, with no simplings.
+ // - The last alternative of a set of two or more.
+ // - An alternative other than the last of a set of two or more.
+ //
+ // In the case of a single alternative on its own, we don't need to
+ // jump anywhere - if the alternative fails to match we can just
+ // continue to backtrack out of the parentheses without jumping.
+ //
+ // In the case of the last alternative in a set of more than one, we
+ // need to jump to return back out to the beginning. We'll do so by
+ // adding a jump to the End node's m_jumps list, and linking this
+ // when we come to generate the Begin node. For alternatives other
+ // than the last, we need to jump to the next alternative.
+ //
+ // If the alternative had adjusted the input position we must link
+ // backtracking to here, correct, and then jump on. If not we can
+ // link the backtracks directly to their destination.
+ if (op.m_checkAdjust) {
+ // Handle the cases where we need to link the backtracks here.
+ m_backtrackingState.link(this);
+ sub32(Imm32(op.m_checkAdjust), index);
+ if (!isLastAlternative) {
+ // An alternative that is not the last should jump to its successor.
+ jump(nextOp.m_reentry);
+ } else if (!isBegin) {
+ // The last of more than one alternatives must jump back to the beginning.
+ nextOp.m_jumps.append(jump());
+ } else {
+ // A single alternative on its own can fall through.
+ m_backtrackingState.fallthrough();
+ }
+ } else {
+ // Handle the cases where we can link the backtracks directly to their destinations.
+ if (!isLastAlternative) {
+ // An alternative that is not the last should jump to its successor.
+ m_backtrackingState.linkTo(nextOp.m_reentry, this);
+ } else if (!isBegin) {
+ // The last of more than one alternatives must jump back to the beginning.
+ m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
+ }
+ // In the case of a single alternative on its own do nothing - it can fall through.
+ }
+
+ // If there is a backtrack jump from a zero length match link it here.
+ if (op.m_zeroLengthMatch.isSet())
+ m_backtrackingState.append(op.m_zeroLengthMatch);
+
+ // At this point we've handled the backtracking back into this node.
+ // Now link any backtracks that need to jump to here.
+
+ // For non-simple alternatives, link the alternative's 'return address'
+ // so that we backtrack back out into the previous alternative.
+ if (op.m_op == OpNestedAlternativeNext)
+ m_backtrackingState.append(op.m_returnAddress);
+
+ // If there is more than one alternative, then the last alternative will
+ // have planted a jump to be linked to the end. This jump was added to the
+ // End node's m_jumps list. If we are back at the beginning, link it here.
+ if (isBegin) {
+ YarrOp* endOp = &m_ops[op.m_nextOp];
+ while (endOp->m_nextOp != notFound) {
+ ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
+ endOp = &m_ops[endOp->m_nextOp];
+ }
+ ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
+ m_backtrackingState.append(endOp->m_jumps);
+ }
+
+ if (!isBegin) {
+ YarrOp& lastOp = m_ops[op.m_previousOp];
+ m_checked += lastOp.m_checkAdjust;
+ }
+ m_checked -= op.m_checkAdjust;
+ break;
+ }
+ case OpSimpleNestedAlternativeEnd:
+ case OpNestedAlternativeEnd: {
+ PatternTerm* term = op.m_term;
+
+ // If there is a backtrack jump from a zero length match link it here.
+ if (op.m_zeroLengthMatch.isSet())
+ m_backtrackingState.append(op.m_zeroLengthMatch);
+
+ // If we backtrack into the end of a simple subpattern do nothing;
+ // just continue through into the last alternative. If we backtrack
+ // into the end of a non-simple set of alterntives we need to jump
+ // to the backtracking return address set up during generation.
+ if (op.m_op == OpNestedAlternativeEnd) {
+ m_backtrackingState.link(this);
+
+ // Plant a jump to the return address.
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ unsigned alternativeFrameLocation = parenthesesFrameLocation;
+ if (term->quantityType != QuantifierFixedCount)
+ alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+ loadFromFrameAndJump(alternativeFrameLocation);
+
+ // Link the DataLabelPtr associated with the end of the last
+ // alternative to this point.
+ m_backtrackingState.append(op.m_returnAddress);
+ }
+
+ YarrOp& lastOp = m_ops[op.m_previousOp];
+ m_checked += lastOp.m_checkAdjust;
+ break;
+ }
+
+ // OpParenthesesSubpatternOnceBegin/End
+ //
+ // When we are backtracking back out of a capturing subpattern we need
+ // to clear the start index in the matches output array, to record that
+ // this subpattern has not been captured.
+ //
+ // When backtracking back out of a Greedy quantified subpattern we need
+ // to catch this, and try running the remainder of the alternative after
+ // the subpattern again, skipping the parentheses.
+ //
+ // Upon backtracking back into a quantified set of parentheses we need to
+ // check whether we were currently skipping the subpattern. If not, we
+ // can backtrack into them, if we were we need to either backtrack back
+ // out of the start of the parentheses, or jump back to the forwards
+ // matching start, depending of whether the match is Greedy or NonGreedy.
+ case OpParenthesesSubpatternOnceBegin: {
+ PatternTerm* term = op.m_term;
+ ASSERT(term->quantityCount == 1);
+
+ // We only need to backtrack to thispoint if capturing or greedy.
+ if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) {
+ m_backtrackingState.link(this);
+
+ // If capturing, clear the capture (we only need to reset start).
+ if (term->capture() && compileMode == IncludeSubpatterns)
+ clearSubpatternStart(term->parentheses.subpatternId);
+
+ // If Greedy, jump to the end.
+ if (term->quantityType == QuantifierGreedy) {
+ // Clear the flag in the stackframe indicating we ran through the subpattern.
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
+ // Jump to after the parentheses, skipping the subpattern.
+ jump(m_ops[op.m_nextOp].m_reentry);
+ // A backtrack from after the parentheses, when skipping the subpattern,
+ // will jump back to here.
+ op.m_jumps.link(this);
+ }
+
+ m_backtrackingState.fallthrough();
+ }
+ break;
+ }
+ case OpParenthesesSubpatternOnceEnd: {
+ PatternTerm* term = op.m_term;
+
+ if (term->quantityType != QuantifierFixedCount) {
+ m_backtrackingState.link(this);
+
+ // Check whether we should backtrack back into the parentheses, or if we
+ // are currently in a state where we had skipped over the subpattern
+ // (in which case the flag value on the stack will be -1).
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1));
+
+ if (term->quantityType == QuantifierGreedy) {
+ // For Greedy parentheses, we skip after having already tried going
+ // through the subpattern, so if we get here we're done.
+ YarrOp& beginOp = m_ops[op.m_previousOp];
+ beginOp.m_jumps.append(hadSkipped);
+ } else {
+ // For NonGreedy parentheses, we try skipping the subpattern first,
+ // so if we get here we need to try running through the subpattern
+ // next. Jump back to the start of the parentheses in the forwards
+ // matching path.
+ ASSERT(term->quantityType == QuantifierNonGreedy);
+ YarrOp& beginOp = m_ops[op.m_previousOp];
+ hadSkipped.linkTo(beginOp.m_reentry, this);
+ }
+
+ m_backtrackingState.fallthrough();
+ }
+
+ m_backtrackingState.append(op.m_jumps);
+ break;
+ }
+
+ // OpParenthesesSubpatternTerminalBegin/End
+ //
+ // Terminal subpatterns will always match - there is nothing after them to
+ // force a backtrack, and they have a minimum count of 0, and as such will
+ // always produce an acceptable result.
+ case OpParenthesesSubpatternTerminalBegin: {
+ // We will backtrack to this point once the subpattern cannot match any
+ // more. Since no match is accepted as a successful match (we are Greedy
+ // quantified with a minimum of zero) jump back to the forwards matching
+ // path at the end.
+ YarrOp& endOp = m_ops[op.m_nextOp];
+ m_backtrackingState.linkTo(endOp.m_reentry, this);
+ break;
+ }
+ case OpParenthesesSubpatternTerminalEnd:
+ // We should never be backtracking to here (hence the 'terminal' in the name).
+ ASSERT(m_backtrackingState.isEmpty());
+ m_backtrackingState.append(op.m_jumps);
+ break;
+
+ // OpParentheticalAssertionBegin/End
+ case OpParentheticalAssertionBegin: {
+ PatternTerm* term = op.m_term;
+ YarrOp& endOp = m_ops[op.m_nextOp];
+
+ // We need to handle the backtracks upon backtracking back out
+ // of a parenthetical assertion if either we need to correct
+ // the input index, or the assertion was inverted.
+ if (op.m_checkAdjust || term->invert()) {
+ m_backtrackingState.link(this);
+
+ if (op.m_checkAdjust)
+ add32(Imm32(op.m_checkAdjust), index);
+
+ // In an inverted assertion failure to match the subpattern
+ // is treated as a successful match - jump to the end of the
+ // subpattern. We already have adjusted the input position
+ // back to that before the assertion, which is correct.
+ if (term->invert())
+ jump(endOp.m_reentry);
+
+ m_backtrackingState.fallthrough();
+ }
+
+ // The End node's jump list will contain any backtracks into
+ // the end of the assertion. Also, if inverted, we will have
+ // added the failure caused by a successful match to this.
+ m_backtrackingState.append(endOp.m_jumps);
+
+ m_checked += op.m_checkAdjust;
+ break;
+ }
+ case OpParentheticalAssertionEnd: {
+ // FIXME: We should really be clearing any nested subpattern
+ // matches on bailing out from after the pattern. Firefox has
+ // this bug too (presumably because they use YARR!)
+
+ // Never backtrack into an assertion; later failures bail to before the begin.
+ m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
+
+ YarrOp& lastOp = m_ops[op.m_previousOp];
+ m_checked -= lastOp.m_checkAdjust;
+ break;
+ }
+
+ case OpMatchFailed:
+ break;
+ }
+
+ } while (opIndex);
+ }
+
+ // Compilation methods:
+ // ====================
+
+ // opCompileParenthesesSubpattern
+ // Emits ops for a subpattern (set of parentheses). These consist
+ // of a set of alternatives wrapped in an outer set of nodes for
+ // the parentheses.
+ // Supported types of parentheses are 'Once' (quantityCount == 1)
+ // and 'Terminal' (non-capturing parentheses quantified as greedy
+ // and infinite).
+ // Alternatives will use the 'Simple' set of ops if either the
+ // subpattern is terminal (in which case we will never need to
+ // backtrack), or if the subpattern only contains one alternative.
+ void opCompileParenthesesSubpattern(PatternTerm* term)
+ {
+ YarrOpCode parenthesesBeginOpCode;
+ YarrOpCode parenthesesEndOpCode;
+ YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
+ YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
+ YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
+
+ // We can currently only compile quantity 1 subpatterns that are
+ // not copies. We generate a copy in the case of a range quantifier,
+ // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
+ // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
+ // comes where the subpattern is capturing, in which case we would
+ // need to restore the capture from the first subpattern upon a
+ // failure in the second.
+ if (term->quantityCount == 1 && !term->parentheses.isCopy) {
+ // Select the 'Once' nodes.
+ parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
+ parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
+
+ // If there is more than one alternative we cannot use the 'simple' nodes.
+ if (term->parentheses.disjunction->m_alternatives.size() != 1) {
+ alternativeBeginOpCode = OpNestedAlternativeBegin;
+ alternativeNextOpCode = OpNestedAlternativeNext;
+ alternativeEndOpCode = OpNestedAlternativeEnd;
+ }
+ } else if (term->parentheses.isTerminal) {
+ // Select the 'Terminal' nodes.
+ parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
+ parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
+ } else {
+ // This subpattern is not supported by the JIT.
+ m_shouldFallBack = true;
+ return;
+ }
+
+ size_t parenBegin = m_ops.size();
+ m_ops.append(parenthesesBeginOpCode);
+
+ m_ops.append(alternativeBeginOpCode);
+ m_ops.last().m_previousOp = notFound;
+ m_ops.last().m_term = term;
+ Vector<OwnPtr<PatternAlternative> >& alternatives = term->parentheses.disjunction->m_alternatives;
+ for (unsigned i = 0; i < alternatives.size(); ++i) {
+ size_t lastOpIndex = m_ops.size() - 1;
+
+ PatternAlternative* nestedAlternative = alternatives[i].get();
+ opCompileAlternative(nestedAlternative);
+
+ size_t thisOpIndex = m_ops.size();
+ m_ops.append(YarrOp(alternativeNextOpCode));
+
+ YarrOp& lastOp = m_ops[lastOpIndex];
+ YarrOp& thisOp = m_ops[thisOpIndex];
+
+ lastOp.m_alternative = nestedAlternative;
+ lastOp.m_nextOp = thisOpIndex;
+ thisOp.m_previousOp = lastOpIndex;
+ thisOp.m_term = term;
+ }
+ YarrOp& lastOp = m_ops.last();
+ ASSERT(lastOp.m_op == alternativeNextOpCode);
+ lastOp.m_op = alternativeEndOpCode;
+ lastOp.m_alternative = 0;
+ lastOp.m_nextOp = notFound;
+
+ size_t parenEnd = m_ops.size();
+ m_ops.append(parenthesesEndOpCode);
+
+ m_ops[parenBegin].m_term = term;
+ m_ops[parenBegin].m_previousOp = notFound;
+ m_ops[parenBegin].m_nextOp = parenEnd;
+ m_ops[parenEnd].m_term = term;
+ m_ops[parenEnd].m_previousOp = parenBegin;
+ m_ops[parenEnd].m_nextOp = notFound;
+ }
+
+ // opCompileParentheticalAssertion
+ // Emits ops for a parenthetical assertion. These consist of an
+ // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
+ // the alternatives, with these wrapped by an outer pair of
+ // OpParentheticalAssertionBegin/End nodes.
+ // We can always use the OpSimpleNestedAlternative nodes in the
+ // case of parenthetical assertions since these only ever match
+ // once, and will never backtrack back into the assertion.
+ void opCompileParentheticalAssertion(PatternTerm* term)
+ {
+ size_t parenBegin = m_ops.size();
+ m_ops.append(OpParentheticalAssertionBegin);
+
+ m_ops.append(OpSimpleNestedAlternativeBegin);
+ m_ops.last().m_previousOp = notFound;
+ m_ops.last().m_term = term;
+ Vector<OwnPtr<PatternAlternative> >& alternatives = term->parentheses.disjunction->m_alternatives;
+ for (unsigned i = 0; i < alternatives.size(); ++i) {
+ size_t lastOpIndex = m_ops.size() - 1;
+
+ PatternAlternative* nestedAlternative = alternatives[i].get();
+ opCompileAlternative(nestedAlternative);
+
+ size_t thisOpIndex = m_ops.size();
+ m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
+
+ YarrOp& lastOp = m_ops[lastOpIndex];
+ YarrOp& thisOp = m_ops[thisOpIndex];
+
+ lastOp.m_alternative = nestedAlternative;
+ lastOp.m_nextOp = thisOpIndex;
+ thisOp.m_previousOp = lastOpIndex;
+ thisOp.m_term = term;
+ }
+ YarrOp& lastOp = m_ops.last();
+ ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
+ lastOp.m_op = OpSimpleNestedAlternativeEnd;
+ lastOp.m_alternative = 0;
+ lastOp.m_nextOp = notFound;
+
+ size_t parenEnd = m_ops.size();
+ m_ops.append(OpParentheticalAssertionEnd);
+
+ m_ops[parenBegin].m_term = term;
+ m_ops[parenBegin].m_previousOp = notFound;
+ m_ops[parenBegin].m_nextOp = parenEnd;
+ m_ops[parenEnd].m_term = term;
+ m_ops[parenEnd].m_previousOp = parenBegin;
+ m_ops[parenEnd].m_nextOp = notFound;
+ }
+
+ // opCompileAlternative
+ // Called to emit nodes for all terms in an alternative.
+ void opCompileAlternative(PatternAlternative* alternative)
+ {
+ optimizeAlternative(alternative);
+
+ for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
+ PatternTerm* term = &alternative->m_terms[i];
+
+ switch (term->type) {
+ case PatternTerm::TypeParenthesesSubpattern:
+ opCompileParenthesesSubpattern(term);
+ break;
+
+ case PatternTerm::TypeParentheticalAssertion:
+ opCompileParentheticalAssertion(term);
+ break;
+
+ default:
+ m_ops.append(term);
+ }
+ }
+ }
+
+ // opCompileBody
+ // This method compiles the body disjunction of the regular expression.
+ // The body consists of two sets of alternatives - zero or more 'once
+ // through' (BOL anchored) alternatives, followed by zero or more
+ // repeated alternatives.
+ // For each of these two sets of alteratives, if not empty they will be
+ // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
+ // 'begin' node referencing the first alternative, and 'next' nodes
+ // referencing any further alternatives. The begin/next/end nodes are
+ // linked together in a doubly linked list. In the case of repeating
+ // alternatives, the end node is also linked back to the beginning.
+ // If no repeating alternatives exist, then a OpMatchFailed node exists
+ // to return the failing result.
+ void opCompileBody(PatternDisjunction* disjunction)
+ {
+ Vector<OwnPtr<PatternAlternative> >& alternatives = disjunction->m_alternatives;
+ size_t currentAlternativeIndex = 0;
+
+ // Emit the 'once through' alternatives.
+ if (alternatives.size() && alternatives[0]->onceThrough()) {
+ m_ops.append(YarrOp(OpBodyAlternativeBegin));
+ m_ops.last().m_previousOp = notFound;
+
+ do {
+ size_t lastOpIndex = m_ops.size() - 1;
+ PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
+ opCompileAlternative(alternative);
+
+ size_t thisOpIndex = m_ops.size();
+ m_ops.append(YarrOp(OpBodyAlternativeNext));
+
+ YarrOp& lastOp = m_ops[lastOpIndex];
+ YarrOp& thisOp = m_ops[thisOpIndex];
+
+ lastOp.m_alternative = alternative;
+ lastOp.m_nextOp = thisOpIndex;
+ thisOp.m_previousOp = lastOpIndex;
+
+ ++currentAlternativeIndex;
+ } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
+
+ YarrOp& lastOp = m_ops.last();
+
+ ASSERT(lastOp.m_op == OpBodyAlternativeNext);
+ lastOp.m_op = OpBodyAlternativeEnd;
+ lastOp.m_alternative = 0;
+ lastOp.m_nextOp = notFound;
+ }
+
+ if (currentAlternativeIndex == alternatives.size()) {
+ m_ops.append(YarrOp(OpMatchFailed));
+ return;
+ }
+
+ // Emit the repeated alternatives.
+ size_t repeatLoop = m_ops.size();
+ m_ops.append(YarrOp(OpBodyAlternativeBegin));
+ m_ops.last().m_previousOp = notFound;
+ do {
+ size_t lastOpIndex = m_ops.size() - 1;
+ PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
+ ASSERT(!alternative->onceThrough());
+ opCompileAlternative(alternative);
+
+ size_t thisOpIndex = m_ops.size();
+ m_ops.append(YarrOp(OpBodyAlternativeNext));
+
+ YarrOp& lastOp = m_ops[lastOpIndex];
+ YarrOp& thisOp = m_ops[thisOpIndex];
+
+ lastOp.m_alternative = alternative;
+ lastOp.m_nextOp = thisOpIndex;
+ thisOp.m_previousOp = lastOpIndex;
+
+ ++currentAlternativeIndex;
+ } while (currentAlternativeIndex < alternatives.size());
+ YarrOp& lastOp = m_ops.last();
+ ASSERT(lastOp.m_op == OpBodyAlternativeNext);
+ lastOp.m_op = OpBodyAlternativeEnd;
+ lastOp.m_alternative = 0;
+ lastOp.m_nextOp = repeatLoop;
+ }
+
+ void generateEnter()
+ {
+#if CPU(X86_64)
+ push(X86Registers::ebp);
+ move(stackPointerRegister, X86Registers::ebp);
+ push(X86Registers::ebx);
+ // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
+ zeroExtend32ToPtr(index, index);
+ zeroExtend32ToPtr(length, length);
+#if OS(WINDOWS)
+ if (compileMode == IncludeSubpatterns)
+ loadPtr(Address(X86Registers::ebp, 6 * sizeof(void*)), output);
+#endif
+#elif CPU(X86)
+ push(X86Registers::ebp);
+ move(stackPointerRegister, X86Registers::ebp);
+ // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
+ push(X86Registers::ebx);
+ push(X86Registers::edi);
+ push(X86Registers::esi);
+ // load output into edi (2 = saved ebp + return address).
+ #if COMPILER(MSVC)
+ loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
+ loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
+ loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
+ if (compileMode == IncludeSubpatterns)
+ loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
+ #else
+ if (compileMode == IncludeSubpatterns)
+ loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
+ #endif
+#elif CPU(ARM)
+ push(ARMRegisters::r4);
+ push(ARMRegisters::r5);
+ push(ARMRegisters::r6);
+#if CPU(ARM_TRADITIONAL)
+ push(ARMRegisters::r8); // scratch register
+#endif
+ if (compileMode == IncludeSubpatterns)
+ move(ARMRegisters::r3, output);
+#elif CPU(SH4)
+ push(SH4Registers::r11);
+ push(SH4Registers::r13);
+#elif CPU(MIPS)
+ // Do nothing.
+#endif
+ }
+
+ void generateReturn()
+ {
+#if CPU(X86_64)
+#if OS(WINDOWS)
+ // Store the return value in the allocated space pointed by rcx.
+ store64(returnRegister, Address(X86Registers::ecx));
+ store64(returnRegister2, Address(X86Registers::ecx, sizeof(void*)));
+ move(X86Registers::ecx, returnRegister);
+#endif
+ pop(X86Registers::ebx);
+ pop(X86Registers::ebp);
+#elif CPU(X86)
+ pop(X86Registers::esi);
+ pop(X86Registers::edi);
+ pop(X86Registers::ebx);
+ pop(X86Registers::ebp);
+#elif CPU(ARM)
+#if CPU(ARM_TRADITIONAL)
+ pop(ARMRegisters::r8); // scratch register
+#endif
+ pop(ARMRegisters::r6);
+ pop(ARMRegisters::r5);
+ pop(ARMRegisters::r4);
+#elif CPU(SH4)
+ pop(SH4Registers::r13);
+ pop(SH4Registers::r11);
+#elif CPU(MIPS)
+ // Do nothing
+#endif
+ ret();
+ }
+
+public:
+ YarrGenerator(YarrPattern& pattern, YarrCharSize charSize)
+ : m_pattern(pattern)
+ , m_charSize(charSize)
+ , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo)
+ , m_shouldFallBack(false)
+ , m_checked(0)
+ {
+ }
+
+ void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject)
+ {
+ generateEnter();
+
+ Jump hasInput = checkInput();
+ move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
+ move(TrustedImm32(0), returnRegister2);
+ generateReturn();
+ hasInput.link(this);
+
+ if (compileMode == IncludeSubpatterns) {
+ for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i)
+ store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int)));
+ }
+
+ if (!m_pattern.m_body->m_hasFixedSize)
+ setMatchStart(index);
+
+ initCallFrame();
+
+ // Compile the pattern to the internal 'YarrOp' representation.
+ opCompileBody(m_pattern.m_body);
+
+ // If we encountered anything we can't handle in the JIT code
+ // (e.g. backreferences) then return early.
+ if (m_shouldFallBack) {
+ jitObject.setFallBack(true);
+ return;
+ }
+
+ generate();
+ backtrack();
+
+ // Link & finalize the code.
+ LinkBuffer linkBuffer(*globalData, this, REGEXP_CODE_ID);
+ m_backtrackingState.linkDataLabels(linkBuffer);
+
+ if (compileMode == MatchOnly) {
+ if (m_charSize == Char8)
+ jitObject.set8BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 8-bit regular expression")));
+ else
+ jitObject.set16BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 16-bit regular expression")));
+ } else {
+ if (m_charSize == Char8)
+ jitObject.set8BitCode(FINALIZE_CODE(linkBuffer, ("8-bit regular expression")));
+ else
+ jitObject.set16BitCode(FINALIZE_CODE(linkBuffer, ("16-bit regular expression")));
+ }
+ jitObject.setFallBack(m_shouldFallBack);
+ }
+
+private:
+ YarrPattern& m_pattern;
+
+ YarrCharSize m_charSize;
+
+ Scale m_charScale;
+
+ // Used to detect regular expression constructs that are not currently
+ // supported in the JIT; fall back to the interpreter when this is detected.
+ bool m_shouldFallBack;
+
+ // The regular expression expressed as a linear sequence of operations.
+ Vector<YarrOp, 128> m_ops;
+
+ // This records the current input offset being applied due to the current
+ // set of alternatives we are nested within. E.g. when matching the
+ // character 'b' within the regular expression /abc/, we will know that
+ // the minimum size for the alternative is 3, checked upon entry to the
+ // alternative, and that 'b' is at offset 1 from the start, and as such
+ // when matching 'b' we need to apply an offset of -2 to the load.
+ //
+ // FIXME: This should go away. Rather than tracking this value throughout
+ // code generation, we should gather this information up front & store it
+ // on the YarrOp structure.
+ int m_checked;
+
+ // This class records state whilst generating the backtracking path of code.
+ BacktrackingState m_backtrackingState;
+};
+
+void jitCompile(YarrPattern& pattern, YarrCharSize charSize, JSGlobalData* globalData, YarrCodeBlock& jitObject, YarrJITCompileMode mode)
+{
+ if (mode == MatchOnly)
+ YarrGenerator<MatchOnly>(pattern, charSize).compile(globalData, jitObject);
+ else
+ YarrGenerator<IncludeSubpatterns>(pattern, charSize).compile(globalData, jitObject);
+}
+
+}}
+
+#endif