aboutsummaryrefslogtreecommitdiffstats
path: root/src/3rdparty/masm/yarr/YarrPattern.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/masm/yarr/YarrPattern.h')
-rw-r--r--src/3rdparty/masm/yarr/YarrPattern.h350
1 files changed, 274 insertions, 76 deletions
diff --git a/src/3rdparty/masm/yarr/YarrPattern.h b/src/3rdparty/masm/yarr/YarrPattern.h
index e7d187c2b3..59decbac46 100644
--- a/src/3rdparty/masm/yarr/YarrPattern.h
+++ b/src/3rdparty/masm/yarr/YarrPattern.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
+ * Copyright (C) 2009, 2013-2017 Apple Inc. All rights reserved.
* Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
*
* Redistribution and use in source and binary forms, with or without
@@ -24,26 +24,27 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef YarrPattern_h
-#define YarrPattern_h
+#pragma once
+#include "RegExpKey.h"
+#include "YarrErrorCode.h"
+#include "YarrUnicodeProperties.h"
#include <wtf/CheckedArithmetic.h>
-#include <wtf/OwnPtr.h>
-#include <wtf/PassOwnPtr.h>
-#include <wtf/RefCounted.h>
+#include <wtf/HashMap.h>
+#include <wtf/PrintStream.h>
#include <wtf/Vector.h>
#include <wtf/text/WTFString.h>
-#include <wtf/unicode/Unicode.h>
namespace JSC { namespace Yarr {
+struct YarrPattern;
struct PatternDisjunction;
struct CharacterRange {
- UChar begin;
- UChar end;
+ UChar32 begin { 0 };
+ UChar32 end { 0x10ffff };
- CharacterRange(UChar begin, UChar end)
+ CharacterRange(UChar32 begin, UChar32 end)
: begin(begin)
, end(end)
{
@@ -58,20 +59,38 @@ public:
// specified matches and ranges)
CharacterClass()
: m_table(0)
+ , m_hasNonBMPCharacters(false)
+ , m_anyCharacter(false)
{
}
CharacterClass(const char* table, bool inverted)
: m_table(table)
, m_tableInverted(inverted)
+ , m_hasNonBMPCharacters(false)
+ , m_anyCharacter(false)
{
}
- Vector<UChar> m_matches;
+ CharacterClass(std::initializer_list<UChar32> matches, std::initializer_list<CharacterRange> ranges, std::initializer_list<UChar32> matchesUnicode, std::initializer_list<CharacterRange> rangesUnicode)
+ : m_matches(matches)
+ , m_ranges(ranges)
+ , m_matchesUnicode(matchesUnicode)
+ , m_rangesUnicode(rangesUnicode)
+ , m_table(0)
+ , m_tableInverted(false)
+ , m_hasNonBMPCharacters(false)
+ , m_anyCharacter(false)
+ {
+ }
+
+ Vector<UChar32> m_matches;
Vector<CharacterRange> m_ranges;
- Vector<UChar> m_matchesUnicode;
+ Vector<UChar32> m_matchesUnicode;
Vector<CharacterRange> m_rangesUnicode;
const char* m_table;
- bool m_tableInverted;
+ bool m_tableInverted : 1;
+ bool m_hasNonBMPCharacters : 1;
+ bool m_anyCharacter : 1;
};
enum QuantifierType {
@@ -96,7 +115,7 @@ struct PatternTerm {
bool m_capture :1;
bool m_invert :1;
union {
- UChar patternCharacter;
+ UChar32 patternCharacter;
CharacterClass* characterClass;
unsigned backReferenceSubpatternId;
struct {
@@ -112,18 +131,19 @@ struct PatternTerm {
} anchors;
};
QuantifierType quantityType;
- Checked<unsigned> quantityCount;
- int inputPosition;
+ Checked<unsigned> quantityMinCount;
+ Checked<unsigned> quantityMaxCount;
+ unsigned inputPosition;
unsigned frameLocation;
- PatternTerm(UChar ch)
+ PatternTerm(UChar32 ch)
: type(PatternTerm::TypePatternCharacter)
, m_capture(false)
, m_invert(false)
{
patternCharacter = ch;
quantityType = QuantifierFixedCount;
- quantityCount = 1;
+ quantityMinCount = quantityMaxCount = 1;
}
PatternTerm(CharacterClass* charClass, bool invert)
@@ -133,7 +153,7 @@ struct PatternTerm {
{
characterClass = charClass;
quantityType = QuantifierFixedCount;
- quantityCount = 1;
+ quantityMinCount = quantityMaxCount = 1;
}
PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
@@ -146,7 +166,7 @@ struct PatternTerm {
parentheses.isCopy = false;
parentheses.isTerminal = false;
quantityType = QuantifierFixedCount;
- quantityCount = 1;
+ quantityMinCount = quantityMaxCount = 1;
}
PatternTerm(Type type, bool invert = false)
@@ -155,7 +175,7 @@ struct PatternTerm {
, m_invert(invert)
{
quantityType = QuantifierFixedCount;
- quantityCount = 1;
+ quantityMinCount = quantityMaxCount = 1;
}
PatternTerm(unsigned spatternId)
@@ -165,7 +185,7 @@ struct PatternTerm {
{
backReferenceSubpatternId = spatternId;
quantityType = QuantifierFixedCount;
- quantityCount = 1;
+ quantityMinCount = quantityMaxCount = 1;
}
PatternTerm(bool bolAnchor, bool eolAnchor)
@@ -176,7 +196,7 @@ struct PatternTerm {
anchors.bolAnchor = bolAnchor;
anchors.eolAnchor = eolAnchor;
quantityType = QuantifierFixedCount;
- quantityCount = 1;
+ quantityMinCount = quantityMaxCount = 1;
}
static PatternTerm ForwardReference()
@@ -208,12 +228,32 @@ struct PatternTerm {
{
return m_capture;
}
-
+
+ bool containsAnyCaptures()
+ {
+ ASSERT(this->type == TypeParenthesesSubpattern);
+ return parentheses.lastSubpatternId >= parentheses.subpatternId;
+ }
+
void quantify(unsigned count, QuantifierType type)
{
- quantityCount = count;
+ quantityMinCount = 0;
+ quantityMaxCount = count;
quantityType = type;
}
+
+ void quantify(unsigned minCount, unsigned maxCount, QuantifierType type)
+ {
+ // Currently only Parentheses can specify a non-zero min with a different max.
+ ASSERT(this->type == TypeParenthesesSubpattern || !minCount || minCount == maxCount);
+ ASSERT(minCount <= maxCount);
+ quantityMinCount = minCount;
+ quantityMaxCount = maxCount;
+ quantityType = type;
+ }
+
+ void dumpQuantifier(PrintStream&);
+ void dump(PrintStream&, YarrPattern*, unsigned);
};
struct PatternAlternative {
@@ -250,6 +290,8 @@ public:
return m_onceThrough;
}
+ void dump(PrintStream&, YarrPattern*, unsigned);
+
Vector<PatternTerm> m_terms;
PatternDisjunction* m_parent;
unsigned m_minimumSize;
@@ -270,12 +312,13 @@ public:
PatternAlternative* addNewAlternative()
{
- PatternAlternative* alternative = new PatternAlternative(this);
- m_alternatives.append(adoptPtr(alternative));
- return alternative;
+ m_alternatives.append(std::make_unique<PatternAlternative>(this));
+ return static_cast<PatternAlternative*>(m_alternatives.last().get());
}
- Vector<OwnPtr<PatternAlternative> > m_alternatives;
+ void dump(PrintStream&, YarrPattern*, unsigned);
+
+ Vector<std::unique_ptr<PatternAlternative>> m_alternatives;
PatternAlternative* m_parent;
unsigned m_minimumSize;
unsigned m_callFrameSize;
@@ -286,13 +329,17 @@ public:
// (please to be calling newlineCharacterClass() et al on your
// friendly neighborhood YarrPattern instance to get nicely
// cached copies).
-CharacterClass* newlineCreate();
-CharacterClass* digitsCreate();
-CharacterClass* spacesCreate();
-CharacterClass* wordcharCreate();
-CharacterClass* nondigitsCreate();
-CharacterClass* nonspacesCreate();
-CharacterClass* nonwordcharCreate();
+
+std::unique_ptr<CharacterClass> anycharCreate();
+std::unique_ptr<CharacterClass> newlineCreate();
+std::unique_ptr<CharacterClass> digitsCreate();
+std::unique_ptr<CharacterClass> spacesCreate();
+std::unique_ptr<CharacterClass> wordcharCreate();
+std::unique_ptr<CharacterClass> wordUnicodeIgnoreCaseCharCreate();
+std::unique_ptr<CharacterClass> nondigitsCreate();
+std::unique_ptr<CharacterClass> nonspacesCreate();
+std::unique_ptr<CharacterClass> nonwordcharCreate();
+std::unique_ptr<CharacterClass> nonwordUnicodeIgnoreCaseCharCreate();
struct TermChain {
TermChain(PatternTerm term)
@@ -303,27 +350,37 @@ struct TermChain {
Vector<TermChain> hotTerms;
};
+
struct YarrPattern {
- JS_EXPORT_PRIVATE YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error);
+ JS_EXPORT_PRIVATE YarrPattern(const String& pattern, RegExpFlags, ErrorCode&, void* stackLimit = nullptr);
void reset()
{
m_numSubpatterns = 0;
m_maxBackReference = 0;
+ m_initialStartValueFrameLocation = 0;
m_containsBackreferences = false;
m_containsBOL = false;
-
- newlineCached = 0;
- digitsCached = 0;
- spacesCached = 0;
- wordcharCached = 0;
- nondigitsCached = 0;
- nonspacesCached = 0;
- nonwordcharCached = 0;
+ m_containsUnsignedLengthPattern = false;
+ m_hasCopiedParenSubexpressions = false;
+ m_saveInitialStartValue = false;
+
+ anycharCached = nullptr;
+ newlineCached = nullptr;
+ digitsCached = nullptr;
+ spacesCached = nullptr;
+ wordcharCached = nullptr;
+ wordUnicodeIgnoreCaseCharCached = nullptr;
+ nondigitsCached = nullptr;
+ nonspacesCached = nullptr;
+ nonwordcharCached = nullptr;
+ nonwordUnicodeIgnoreCasecharCached = nullptr;
+ unicodePropertiesCached.clear();
m_disjunctions.clear();
m_userCharacterClasses.clear();
+ m_captureGroupNames.shrink(0);
}
bool containsIllegalBackReference()
@@ -331,71 +388,212 @@ struct YarrPattern {
return m_maxBackReference > m_numSubpatterns;
}
+ bool containsUnsignedLengthPattern()
+ {
+ return m_containsUnsignedLengthPattern;
+ }
+
+ CharacterClass* anyCharacterClass()
+ {
+ if (!anycharCached) {
+ m_userCharacterClasses.append(anycharCreate());
+ anycharCached = m_userCharacterClasses.last().get();
+ }
+ return anycharCached;
+ }
CharacterClass* newlineCharacterClass()
{
- if (!newlineCached)
- m_userCharacterClasses.append(adoptPtr(newlineCached = newlineCreate()));
+ if (!newlineCached) {
+ m_userCharacterClasses.append(newlineCreate());
+ newlineCached = m_userCharacterClasses.last().get();
+ }
return newlineCached;
}
CharacterClass* digitsCharacterClass()
{
- if (!digitsCached)
- m_userCharacterClasses.append(adoptPtr(digitsCached = digitsCreate()));
+ if (!digitsCached) {
+ m_userCharacterClasses.append(digitsCreate());
+ digitsCached = m_userCharacterClasses.last().get();
+ }
return digitsCached;
}
CharacterClass* spacesCharacterClass()
{
- if (!spacesCached)
- m_userCharacterClasses.append(adoptPtr(spacesCached = spacesCreate()));
+ if (!spacesCached) {
+ m_userCharacterClasses.append(spacesCreate());
+ spacesCached = m_userCharacterClasses.last().get();
+ }
return spacesCached;
}
CharacterClass* wordcharCharacterClass()
{
- if (!wordcharCached)
- m_userCharacterClasses.append(adoptPtr(wordcharCached = wordcharCreate()));
+ if (!wordcharCached) {
+ m_userCharacterClasses.append(wordcharCreate());
+ wordcharCached = m_userCharacterClasses.last().get();
+ }
return wordcharCached;
}
+ CharacterClass* wordUnicodeIgnoreCaseCharCharacterClass()
+ {
+ if (!wordUnicodeIgnoreCaseCharCached) {
+ m_userCharacterClasses.append(wordUnicodeIgnoreCaseCharCreate());
+ wordUnicodeIgnoreCaseCharCached = m_userCharacterClasses.last().get();
+ }
+ return wordUnicodeIgnoreCaseCharCached;
+ }
CharacterClass* nondigitsCharacterClass()
{
- if (!nondigitsCached)
- m_userCharacterClasses.append(adoptPtr(nondigitsCached = nondigitsCreate()));
+ if (!nondigitsCached) {
+ m_userCharacterClasses.append(nondigitsCreate());
+ nondigitsCached = m_userCharacterClasses.last().get();
+ }
return nondigitsCached;
}
CharacterClass* nonspacesCharacterClass()
{
- if (!nonspacesCached)
- m_userCharacterClasses.append(adoptPtr(nonspacesCached = nonspacesCreate()));
+ if (!nonspacesCached) {
+ m_userCharacterClasses.append(nonspacesCreate());
+ nonspacesCached = m_userCharacterClasses.last().get();
+ }
return nonspacesCached;
}
CharacterClass* nonwordcharCharacterClass()
{
- if (!nonwordcharCached)
- m_userCharacterClasses.append(adoptPtr(nonwordcharCached = nonwordcharCreate()));
+ if (!nonwordcharCached) {
+ m_userCharacterClasses.append(nonwordcharCreate());
+ nonwordcharCached = m_userCharacterClasses.last().get();
+ }
return nonwordcharCached;
}
+ CharacterClass* nonwordUnicodeIgnoreCaseCharCharacterClass()
+ {
+ if (!nonwordUnicodeIgnoreCasecharCached) {
+ m_userCharacterClasses.append(nonwordUnicodeIgnoreCaseCharCreate());
+ nonwordUnicodeIgnoreCasecharCached = m_userCharacterClasses.last().get();
+ }
+ return nonwordUnicodeIgnoreCasecharCached;
+ }
+ CharacterClass* unicodeCharacterClassFor(BuiltInCharacterClassID unicodeClassID)
+ {
+ ASSERT(unicodeClassID >= BuiltInCharacterClassID::BaseUnicodePropertyID);
+
+ unsigned classID = static_cast<unsigned>(unicodeClassID);
+
+ if (unicodePropertiesCached.find(classID) == unicodePropertiesCached.end()) {
+ m_userCharacterClasses.append(createUnicodeCharacterClassFor(unicodeClassID));
+ CharacterClass* result = m_userCharacterClasses.last().get();
+ unicodePropertiesCached.add(classID, result);
+ return result;
+ }
+
+ return unicodePropertiesCached.get(classID);
+ }
+
+ void dumpPattern(const String& pattern);
+ void dumpPattern(PrintStream& out, const String& pattern);
+
+ bool global() const { return m_flags & FlagGlobal; }
+ bool ignoreCase() const { return m_flags & FlagIgnoreCase; }
+ bool multiline() const { return m_flags & FlagMultiline; }
+ bool sticky() const { return m_flags & FlagSticky; }
+ bool unicode() const { return m_flags & FlagUnicode; }
+ bool dotAll() const { return m_flags & FlagDotAll; }
- bool m_ignoreCase : 1;
- bool m_multiline : 1;
bool m_containsBackreferences : 1;
bool m_containsBOL : 1;
- unsigned m_numSubpatterns;
- unsigned m_maxBackReference;
+ bool m_containsUnsignedLengthPattern : 1;
+ bool m_hasCopiedParenSubexpressions : 1;
+ bool m_saveInitialStartValue : 1;
+ RegExpFlags m_flags;
+ unsigned m_numSubpatterns { 0 };
+ unsigned m_maxBackReference { 0 };
+ unsigned m_initialStartValueFrameLocation { 0 };
PatternDisjunction* m_body;
- Vector<OwnPtr<PatternDisjunction>, 4> m_disjunctions;
- Vector<OwnPtr<CharacterClass> > m_userCharacterClasses;
+ Vector<std::unique_ptr<PatternDisjunction>, 4> m_disjunctions;
+ Vector<std::unique_ptr<CharacterClass>> m_userCharacterClasses;
+ Vector<String> m_captureGroupNames;
+ HashMap<String, unsigned> m_namedGroupToParenIndex;
private:
- const char* compile(const String& patternString);
-
- CharacterClass* newlineCached;
- CharacterClass* digitsCached;
- CharacterClass* spacesCached;
- CharacterClass* wordcharCached;
- CharacterClass* nondigitsCached;
- CharacterClass* nonspacesCached;
- CharacterClass* nonwordcharCached;
+ ErrorCode compile(const String& patternString, void* stackLimit);
+
+ CharacterClass* anycharCached { nullptr };
+ CharacterClass* newlineCached { nullptr };
+ CharacterClass* digitsCached { nullptr };
+ CharacterClass* spacesCached { nullptr };
+ CharacterClass* wordcharCached { nullptr };
+ CharacterClass* wordUnicodeIgnoreCaseCharCached { nullptr };
+ CharacterClass* nondigitsCached { nullptr };
+ CharacterClass* nonspacesCached { nullptr };
+ CharacterClass* nonwordcharCached { nullptr };
+ CharacterClass* nonwordUnicodeIgnoreCasecharCached { nullptr };
+ HashMap<unsigned, CharacterClass*> unicodePropertiesCached;
};
-} } // namespace JSC::Yarr
+ void indentForNestingLevel(PrintStream&, unsigned);
+ void dumpUChar32(PrintStream&, UChar32);
+ void dumpCharacterClass(PrintStream&, YarrPattern*, CharacterClass*);
+
+ struct BackTrackInfoPatternCharacter {
+ uintptr_t begin; // Only needed for unicode patterns
+ uintptr_t matchAmount;
+
+ static unsigned beginIndex() { return offsetof(BackTrackInfoPatternCharacter, begin) / sizeof(uintptr_t); }
+ static unsigned matchAmountIndex() { return offsetof(BackTrackInfoPatternCharacter, matchAmount) / sizeof(uintptr_t); }
+ };
-#endif // YarrPattern_h
+ struct BackTrackInfoCharacterClass {
+ uintptr_t begin; // Only needed for unicode patterns
+ uintptr_t matchAmount;
+
+ static unsigned beginIndex() { return offsetof(BackTrackInfoCharacterClass, begin) / sizeof(uintptr_t); }
+ static unsigned matchAmountIndex() { return offsetof(BackTrackInfoCharacterClass, matchAmount) / sizeof(uintptr_t); }
+ };
+
+ struct BackTrackInfoBackReference {
+ uintptr_t begin; // Not really needed for greedy quantifiers.
+ uintptr_t matchAmount; // Not really needed for fixed quantifiers.
+
+ unsigned beginIndex() { return offsetof(BackTrackInfoBackReference, begin) / sizeof(uintptr_t); }
+ unsigned matchAmountIndex() { return offsetof(BackTrackInfoBackReference, matchAmount) / sizeof(uintptr_t); }
+ };
+
+ struct BackTrackInfoAlternative {
+ union {
+ uintptr_t offset;
+ };
+ };
+
+ struct BackTrackInfoParentheticalAssertion {
+ uintptr_t begin;
+
+ static unsigned beginIndex() { return offsetof(BackTrackInfoParentheticalAssertion, begin) / sizeof(uintptr_t); }
+ };
+
+ struct BackTrackInfoParenthesesOnce {
+ uintptr_t begin;
+ uintptr_t returnAddress;
+
+ static unsigned beginIndex() { return offsetof(BackTrackInfoParenthesesOnce, begin) / sizeof(uintptr_t); }
+ static unsigned returnAddressIndex() { return offsetof(BackTrackInfoParenthesesOnce, returnAddress) / sizeof(uintptr_t); }
+ };
+
+ struct BackTrackInfoParenthesesTerminal {
+ uintptr_t begin;
+
+ static unsigned beginIndex() { return offsetof(BackTrackInfoParenthesesTerminal, begin) / sizeof(uintptr_t); }
+ };
+
+ struct BackTrackInfoParentheses {
+ uintptr_t begin;
+ uintptr_t returnAddress;
+ uintptr_t matchAmount;
+ uintptr_t parenContextHead;
+
+ static unsigned beginIndex() { return offsetof(BackTrackInfoParentheses, begin) / sizeof(uintptr_t); }
+ static unsigned returnAddressIndex() { return offsetof(BackTrackInfoParentheses, returnAddress) / sizeof(uintptr_t); }
+ static unsigned matchAmountIndex() { return offsetof(BackTrackInfoParentheses, matchAmount) / sizeof(uintptr_t); }
+ static unsigned parenContextHeadIndex() { return offsetof(BackTrackInfoParentheses, parenContextHead) / sizeof(uintptr_t); }
+ };
+
+} } // namespace JSC::Yarr