diff options
Diffstat (limited to 'src/3rdparty/v8/src/jsregexp.h')
-rw-r--r-- | src/3rdparty/v8/src/jsregexp.h | 317 |
1 files changed, 187 insertions, 130 deletions
diff --git a/src/3rdparty/v8/src/jsregexp.h b/src/3rdparty/v8/src/jsregexp.h index 20313ca..96825ce 100644 --- a/src/3rdparty/v8/src/jsregexp.h +++ b/src/3rdparty/v8/src/jsregexp.h @@ -71,7 +71,8 @@ class RegExpImpl { // Returns false if compilation fails. static Handle<Object> Compile(Handle<JSRegExp> re, Handle<String> pattern, - Handle<String> flags); + Handle<String> flags, + Zone* zone); // See ECMA-262 section 15.10.6.2. // This function calls the garbage collector if necessary. @@ -92,6 +93,14 @@ class RegExpImpl { JSRegExp::Flags flags, Handle<String> match_pattern); + + static int AtomExecRaw(Handle<JSRegExp> regexp, + Handle<String> subject, + int index, + int32_t* output, + int output_size); + + static Handle<Object> AtomExec(Handle<JSRegExp> regexp, Handle<String> subject, int index, @@ -104,31 +113,71 @@ class RegExpImpl { // This ensures that the regexp is compiled for the subject, and that // the subject is flat. // Returns the number of integer spaces required by IrregexpExecOnce - // as its "registers" argument. If the regexp cannot be compiled, + // as its "registers" argument. If the regexp cannot be compiled, // an exception is set as pending, and this function returns negative. static int IrregexpPrepare(Handle<JSRegExp> regexp, Handle<String> subject); - // Execute a regular expression once on the subject, starting from - // character "index". - // If successful, returns RE_SUCCESS and set the capture positions - // in the first registers. + // Execute a regular expression on the subject, starting from index. + // If matching succeeds, return the number of matches. This can be larger + // than one in the case of global regular expressions. + // The captures and subcaptures are stored into the registers vector. // If matching fails, returns RE_FAILURE. // If execution fails, sets a pending exception and returns RE_EXCEPTION. - static IrregexpResult IrregexpExecOnce(Handle<JSRegExp> regexp, - Handle<String> subject, - int index, - Vector<int> registers); + static int IrregexpExecRaw(Handle<JSRegExp> regexp, + Handle<String> subject, + int index, + int32_t* output, + int output_size); // Execute an Irregexp bytecode pattern. // On a successful match, the result is a JSArray containing - // captured positions. On a failure, the result is the null value. + // captured positions. On a failure, the result is the null value. // Returns an empty handle in case of an exception. static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp, Handle<String> subject, int index, Handle<JSArray> lastMatchInfo); + // Set last match info. If match is NULL, then setting captures is omitted. + static Handle<JSArray> SetLastMatchInfo(Handle<JSArray> last_match_info, + Handle<String> subject, + int capture_count, + int32_t* match); + + + class GlobalCache { + public: + GlobalCache(Handle<JSRegExp> regexp, + Handle<String> subject, + bool is_global, + Isolate* isolate); + + ~GlobalCache(); + + // Fetch the next entry in the cache for global regexp match results. + // This does not set the last match info. Upon failure, NULL is returned. + // The cause can be checked with Result(). The previous + // result is still in available in memory when a failure happens. + int32_t* FetchNext(); + + int32_t* LastSuccessfulMatch(); + + inline bool HasException() { return num_matches_ < 0; } + + private: + int num_matches_; + int max_matches_; + int current_match_index_; + int registers_per_match_; + // Pointer to the last set of captures. + int32_t* register_array_; + int register_array_size_; + Handle<JSRegExp> regexp_; + Handle<String> subject_; + }; + + // Array index in the lastMatchInfo array. static const int kLastCaptureCount = 0; static const int kLastSubject = 1; @@ -188,30 +237,10 @@ class RegExpImpl { static const int kRegWxpCompiledLimit = 1 * MB; private: - static String* last_ascii_string_; - static String* two_byte_cached_string_; - static bool CompileIrregexp( Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); static inline bool EnsureCompiledIrregexp( Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); - - - // Set the subject cache. The previous string buffer is not deleted, so the - // caller should ensure that it doesn't leak. - static void SetSubjectCache(String* subject, - char* utf8_subject, - int uft8_length, - int character_position, - int utf8_position); - - // A one element cache of the last utf8_subject string and its length. The - // subject JS String object is cached in the heap. We also cache a - // translation between position and utf8 position. - static char* utf8_subject_cache_; - static int utf8_length_cache_; - static int utf8_position_; - static int character_position_; }; @@ -233,7 +262,8 @@ class CharacterRange { // For compatibility with the CHECK_OK macro CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { } - static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges); + static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, + Zone* zone); static Vector<const int> GetWordBounds(); static inline CharacterRange Singleton(uc16 value) { return CharacterRange(value, value); @@ -253,11 +283,13 @@ class CharacterRange { bool is_valid() { return from_ <= to_; } bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } bool IsSingleton() { return (from_ == to_); } - void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii); + void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii, + Zone* zone); static void Split(ZoneList<CharacterRange>* base, Vector<const int> overlay, ZoneList<CharacterRange>** included, - ZoneList<CharacterRange>** excluded); + ZoneList<CharacterRange>** excluded, + Zone* zone); // Whether a range list is in canonical form: Ranges ordered by from value, // and ranges non-overlapping and non-adjacent. static bool IsCanonical(ZoneList<CharacterRange>* ranges); @@ -268,7 +300,8 @@ class CharacterRange { static void Canonicalize(ZoneList<CharacterRange>* ranges); // Negate the contents of a character range in canonical form. static void Negate(ZoneList<CharacterRange>* src, - ZoneList<CharacterRange>* dst); + ZoneList<CharacterRange>* dst, + Zone* zone); static const int kStartMarker = (1 << 24); static const int kPayloadMask = (1 << 24) - 1; @@ -283,7 +316,7 @@ class CharacterRange { class OutSet: public ZoneObject { public: OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } - OutSet* Extend(unsigned value); + OutSet* Extend(unsigned value, Zone* zone); bool Get(unsigned value); static const unsigned kFirstLimit = 32; @@ -291,12 +324,12 @@ class OutSet: public ZoneObject { // Destructively set a value in this set. In most cases you want // to use Extend instead to ensure that only one instance exists // that contains the same values. - void Set(unsigned value); + void Set(unsigned value, Zone* zone); // The successors are a list of sets that contain the same values // as this set and the one more value that is not present in this // set. - ZoneList<OutSet*>* successors() { return successors_; } + ZoneList<OutSet*>* successors(Zone* zone) { return successors_; } OutSet(uint32_t first, ZoneList<unsigned>* remaining) : first_(first), remaining_(remaining), successors_(NULL) { } @@ -311,6 +344,8 @@ class OutSet: public ZoneObject { // Used for mapping character ranges to choices. class DispatchTable : public ZoneObject { public: + explicit DispatchTable(Zone* zone) : tree_(zone) { } + class Entry { public: Entry() : from_(0), to_(0), out_set_(NULL) { } @@ -319,7 +354,9 @@ class DispatchTable : public ZoneObject { uc16 from() { return from_; } uc16 to() { return to_; } void set_to(uc16 value) { to_ = value; } - void AddValue(int value) { out_set_ = out_set_->Extend(value); } + void AddValue(int value, Zone* zone) { + out_set_ = out_set_->Extend(value, zone); + } OutSet* out_set() { return out_set_; } private: uc16 from_; @@ -343,12 +380,14 @@ class DispatchTable : public ZoneObject { } }; - void AddRange(CharacterRange range, int value); + void AddRange(CharacterRange range, int value, Zone* zone); OutSet* Get(uc16 value); void Dump(); template <typename Callback> - void ForEach(Callback* callback) { return tree()->ForEach(callback); } + void ForEach(Callback* callback) { + return tree()->ForEach(callback); + } private: // There can't be a static empty set since it allocates its @@ -528,7 +567,8 @@ extern int kUninitializedRegExpNodePlaceHolder; class RegExpNode: public ZoneObject { public: - RegExpNode() : replacement_(NULL), trace_count_(0) { + explicit RegExpNode(Zone* zone) + : replacement_(NULL), trace_count_(0), zone_(zone) { bm_info_[0] = bm_info_[1] = NULL; } virtual ~RegExpNode(); @@ -574,9 +614,14 @@ class RegExpNode: public ZoneObject { // Collects information on the possible code units (mod 128) that can match if // we look forward. This is used for a Boyer-Moore-like string searching // implementation. TODO(erikcorry): This should share more code with - // EatsAtLeast, GetQuickCheckDetails. - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start) { + // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit + // the number of nodes we are willing to look at in order to create this data. + static const int kFillInBMBudget = 200; + virtual void FillInBMInfo(int offset, + int recursion_depth, + int budget, + BoyerMooreLookahead* bm, + bool not_at_start) { UNREACHABLE(); } @@ -617,6 +662,8 @@ class RegExpNode: public ZoneObject { return bm_info_[not_at_start ? 1 : 0]; } + Zone* zone() const { return zone_; } + protected: enum LimitResult { DONE, CONTINUE }; RegExpNode* replacement_; @@ -638,6 +685,8 @@ class RegExpNode: public ZoneObject { // deferred operations in the current trace and generating a goto. int trace_count_; BoyerMooreLookahead* bm_info_[2]; + + Zone* zone_; }; @@ -671,13 +720,17 @@ class Interval { class SeqRegExpNode: public RegExpNode { public: explicit SeqRegExpNode(RegExpNode* on_success) - : on_success_(on_success) { } + : RegExpNode(on_success->zone()), on_success_(on_success) { } RegExpNode* on_success() { return on_success_; } void set_on_success(RegExpNode* node) { on_success_ = node; } virtual RegExpNode* FilterASCII(int depth); - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start) { - on_success_->FillInBMInfo(offset, bm, not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + int budget, + BoyerMooreLookahead* bm, + bool not_at_start) { + on_success_->FillInBMInfo( + offset, recursion_depth + 1, budget - 1, bm, not_at_start); if (offset == 0) set_bm_info(not_at_start, bm); } @@ -730,8 +783,11 @@ class ActionNode: public SeqRegExpNode { return on_success()->GetQuickCheckDetails( details, compiler, filled_in, not_at_start); } - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + int budget, + BoyerMooreLookahead* bm, + bool not_at_start); Type type() { return type_; } // TODO(erikcorry): We should allow some action nodes in greedy loops. virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } @@ -782,8 +838,8 @@ class TextNode: public SeqRegExpNode { TextNode(RegExpCharacterClass* that, RegExpNode* on_success) : SeqRegExpNode(on_success), - elms_(new ZoneList<TextElement>(1)) { - elms_->Add(TextElement::CharClass(that)); + elms_(new(zone()) ZoneList<TextElement>(1, zone())) { + elms_->Add(TextElement::CharClass(that), zone()); } virtual void Accept(NodeVisitor* visitor); virtual void Emit(RegExpCompiler* compiler, Trace* trace); @@ -799,8 +855,11 @@ class TextNode: public SeqRegExpNode { virtual int GreedyLoopTextLength(); virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( RegExpCompiler* compiler); - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + int budget, + BoyerMooreLookahead* bm, + bool not_at_start); void CalculateOffsets(); virtual RegExpNode* FilterASCII(int depth); @@ -836,19 +895,19 @@ class AssertionNode: public SeqRegExpNode { AFTER_NEWLINE }; static AssertionNode* AtEnd(RegExpNode* on_success) { - return new AssertionNode(AT_END, on_success); + return new(on_success->zone()) AssertionNode(AT_END, on_success); } static AssertionNode* AtStart(RegExpNode* on_success) { - return new AssertionNode(AT_START, on_success); + return new(on_success->zone()) AssertionNode(AT_START, on_success); } static AssertionNode* AtBoundary(RegExpNode* on_success) { - return new AssertionNode(AT_BOUNDARY, on_success); + return new(on_success->zone()) AssertionNode(AT_BOUNDARY, on_success); } static AssertionNode* AtNonBoundary(RegExpNode* on_success) { - return new AssertionNode(AT_NON_BOUNDARY, on_success); + return new(on_success->zone()) AssertionNode(AT_NON_BOUNDARY, on_success); } static AssertionNode* AfterNewline(RegExpNode* on_success) { - return new AssertionNode(AFTER_NEWLINE, on_success); + return new(on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success); } virtual void Accept(NodeVisitor* visitor); virtual void Emit(RegExpCompiler* compiler, Trace* trace); @@ -859,8 +918,11 @@ class AssertionNode: public SeqRegExpNode { RegExpCompiler* compiler, int filled_in, bool not_at_start); - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + int budget, + BoyerMooreLookahead* bm, + bool not_at_start); AssertionNodeType type() { return type_; } void set_type(AssertionNodeType type) { type_ = type; } @@ -897,8 +959,11 @@ class BackReferenceNode: public SeqRegExpNode { bool not_at_start) { return; } - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + int budget, + BoyerMooreLookahead* bm, + bool not_at_start); private: int start_reg_; @@ -909,7 +974,8 @@ class BackReferenceNode: public SeqRegExpNode { class EndNode: public RegExpNode { public: enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; - explicit EndNode(Action action) : action_(action) { } + explicit EndNode(Action action, Zone* zone) + : RegExpNode(zone), action_(action) { } virtual void Accept(NodeVisitor* visitor); virtual void Emit(RegExpCompiler* compiler, Trace* trace); virtual int EatsAtLeast(int still_to_find, @@ -922,8 +988,11 @@ class EndNode: public RegExpNode { // Returning 0 from EatsAtLeast should ensure we never get here. UNREACHABLE(); } - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start) { + virtual void FillInBMInfo(int offset, + int recursion_depth, + int budget, + BoyerMooreLookahead* bm, + bool not_at_start) { // Returning 0 from EatsAtLeast should ensure we never get here. UNREACHABLE(); } @@ -938,8 +1007,9 @@ class NegativeSubmatchSuccess: public EndNode { NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg, int clear_capture_count, - int clear_capture_start) - : EndNode(NEGATIVE_SUBMATCH_SUCCESS), + int clear_capture_start, + Zone* zone) + : EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone), stack_pointer_register_(stack_pointer_reg), current_position_register_(position_reg), clear_capture_count_(clear_capture_count), @@ -975,7 +1045,7 @@ class Guard: public ZoneObject { class GuardedAlternative { public: explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { } - void AddGuard(Guard* guard); + void AddGuard(Guard* guard, Zone* zone); RegExpNode* node() { return node_; } void set_node(RegExpNode* node) { node_ = node; } ZoneList<Guard*>* guards() { return guards_; } @@ -991,13 +1061,17 @@ class AlternativeGeneration; class ChoiceNode: public RegExpNode { public: - explicit ChoiceNode(int expected_size) - : alternatives_(new ZoneList<GuardedAlternative>(expected_size)), + explicit ChoiceNode(int expected_size, Zone* zone) + : RegExpNode(zone), + alternatives_(new(zone) + ZoneList<GuardedAlternative>(expected_size, zone)), table_(NULL), not_at_start_(false), being_calculated_(false) { } virtual void Accept(NodeVisitor* visitor); - void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); } + void AddAlternative(GuardedAlternative node) { + alternatives()->Add(node, zone()); + } ZoneList<GuardedAlternative>* alternatives() { return alternatives_; } DispatchTable* GetTable(bool ignore_case); virtual void Emit(RegExpCompiler* compiler, Trace* trace); @@ -1012,8 +1086,11 @@ class ChoiceNode: public RegExpNode { RegExpCompiler* compiler, int characters_filled_in, bool not_at_start); - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + int budget, + BoyerMooreLookahead* bm, + bool not_at_start); bool being_calculated() { return being_calculated_; } bool not_at_start() { return not_at_start_; } @@ -1050,8 +1127,9 @@ class ChoiceNode: public RegExpNode { class NegativeLookaheadChoiceNode: public ChoiceNode { public: explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, - GuardedAlternative then_do_this) - : ChoiceNode(2) { + GuardedAlternative then_do_this, + Zone* zone) + : ChoiceNode(2, zone) { AddAlternative(this_must_fail); AddAlternative(then_do_this); } @@ -1062,9 +1140,13 @@ class NegativeLookaheadChoiceNode: public ChoiceNode { RegExpCompiler* compiler, int characters_filled_in, bool not_at_start); - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start) { - alternatives_->at(1).node()->FillInBMInfo(offset, bm, not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + int budget, + BoyerMooreLookahead* bm, + bool not_at_start) { + alternatives_->at(1).node()->FillInBMInfo( + offset, recursion_depth + 1, budget - 1, bm, not_at_start); if (offset == 0) set_bm_info(not_at_start, bm); } // For a negative lookahead we don't emit the quick check for the @@ -1079,8 +1161,8 @@ class NegativeLookaheadChoiceNode: public ChoiceNode { class LoopChoiceNode: public ChoiceNode { public: - explicit LoopChoiceNode(bool body_can_be_zero_length) - : ChoiceNode(2), + explicit LoopChoiceNode(bool body_can_be_zero_length, Zone* zone) + : ChoiceNode(2, zone), loop_node_(NULL), continue_node_(NULL), body_can_be_zero_length_(body_can_be_zero_length) { } @@ -1094,8 +1176,11 @@ class LoopChoiceNode: public ChoiceNode { RegExpCompiler* compiler, int characters_filled_in, bool not_at_start); - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + int budget, + BoyerMooreLookahead* bm, + bool not_at_start); RegExpNode* loop_node() { return loop_node_; } RegExpNode* continue_node() { return continue_node_; } bool body_can_be_zero_length() { return body_can_be_zero_length_; } @@ -1162,15 +1247,15 @@ ContainedInLattice AddRange(ContainedInLattice a, class BoyerMoorePositionInfo : public ZoneObject { public: - BoyerMoorePositionInfo() - : map_(new ZoneList<bool>(kMapSize)), + explicit BoyerMoorePositionInfo(Zone* zone) + : map_(new(zone) ZoneList<bool>(kMapSize, zone)), map_count_(0), w_(kNotYet), s_(kNotYet), d_(kNotYet), surrogate_(kNotYet) { for (int i = 0; i < kMapSize; i++) { - map_->Add(false); + map_->Add(false, zone); } } @@ -1199,7 +1284,7 @@ class BoyerMoorePositionInfo : public ZoneObject { class BoyerMooreLookahead : public ZoneObject { public: - BoyerMooreLookahead(int length, RegExpCompiler* compiler); + BoyerMooreLookahead(int length, RegExpCompiler* compiler, Zone* zone); int length() { return length_; } int max_char() { return max_char_; } @@ -1401,12 +1486,13 @@ class Trace { void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler); private: - int FindAffectedRegisters(OutSet* affected_registers); + int FindAffectedRegisters(OutSet* affected_registers, Zone* zone); void PerformDeferredActions(RegExpMacroAssembler* macro, - int max_register, - OutSet& affected_registers, - OutSet* registers_to_pop, - OutSet* registers_to_clear); + int max_register, + OutSet& affected_registers, + OutSet* registers_to_pop, + OutSet* registers_to_clear, + Zone* zone); void RestoreAffectedRegisters(RegExpMacroAssembler* macro, int max_register, OutSet& registers_to_pop, @@ -1439,15 +1525,17 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT) // dispatch table of a choice node. class DispatchTableConstructor: public NodeVisitor { public: - DispatchTableConstructor(DispatchTable* table, bool ignore_case) + DispatchTableConstructor(DispatchTable* table, bool ignore_case, + Zone* zone) : table_(table), choice_index_(-1), - ignore_case_(ignore_case) { } + ignore_case_(ignore_case), + zone_(zone) { } void BuildTable(ChoiceNode* node); void AddRange(CharacterRange range) { - table()->AddRange(range, choice_index_); + table()->AddRange(range, choice_index_, zone_); } void AddInverse(ZoneList<CharacterRange>* ranges); @@ -1464,6 +1552,7 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT) DispatchTable* table_; int choice_index_; bool ignore_case_; + Zone* zone_; }; @@ -1545,48 +1634,16 @@ class RegExpEngine: public AllStatic { static CompilationResult Compile(RegExpCompileData* input, bool ignore_case, + bool global, bool multiline, Handle<String> pattern, Handle<String> sample_subject, - bool is_ascii); + bool is_ascii, Zone* zone); static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); }; -class OffsetsVector { - public: - inline OffsetsVector(int num_registers, Isolate* isolate) - : offsets_vector_length_(num_registers) { - if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { - vector_ = NewArray<int>(offsets_vector_length_); - } else { - vector_ = isolate->jsregexp_static_offsets_vector(); - } - } - inline ~OffsetsVector() { - if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { - DeleteArray(vector_); - vector_ = NULL; - } - } - inline int* vector() { return vector_; } - inline int length() { return offsets_vector_length_; } - - static const int kStaticOffsetsVectorSize = 50; - - private: - static Address static_offsets_vector_address(Isolate* isolate) { - return reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector()); - } - - int* vector_; - int offsets_vector_length_; - - friend class ExternalReference; -}; - - } } // namespace v8::internal #endif // V8_JSREGEXP_H_ |