aboutsummaryrefslogtreecommitdiffstats
path: root/src/3rdparty/masm/yarr/YarrPattern.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/masm/yarr/YarrPattern.cpp')
-rw-r--r--src/3rdparty/masm/yarr/YarrPattern.cpp137
1 files changed, 48 insertions, 89 deletions
diff --git a/src/3rdparty/masm/yarr/YarrPattern.cpp b/src/3rdparty/masm/yarr/YarrPattern.cpp
index ac66ea1b9a..9c1cdadf3f 100644
--- a/src/3rdparty/masm/yarr/YarrPattern.cpp
+++ b/src/3rdparty/masm/yarr/YarrPattern.cpp
@@ -33,12 +33,9 @@
#include "YarrParser.h"
#include <wtf/DataLog.h>
#include <wtf/Optional.h>
-//#include <wtf/Threading.h>
#include <wtf/Vector.h>
#include <wtf/text/WTFString.h>
-using namespace WTF;
-
namespace JSC { namespace Yarr {
#include "RegExpJitTables.h"
@@ -334,7 +331,7 @@ private:
ranges.insert(i, CharacterRange(lo, hi));
return;
}
- // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining
+ // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the beginning
// If the new range start at or before the end of the last range, then the overlap (if it starts one after the
// end of the last range they concatenate, which is just as good.
if (lo <= (ranges[i].end + 1)) {
@@ -446,9 +443,9 @@ public:
{
}
- void reset()
+ void resetForReparsing()
{
- m_pattern.reset();
+ m_pattern.resetForReparsing();
m_characterClassConstructor.reset();
auto body = std::make_unique<PatternDisjunction>();
@@ -456,7 +453,17 @@ public:
m_alternative = body->addNewAlternative();
m_pattern.m_disjunctions.append(WTFMove(body));
}
-
+
+ void saveUnmatchedNamedForwardReferences()
+ {
+ m_unmatchedNamedForwardReferences.shrink(0);
+
+ for (auto& entry : m_pattern.m_namedForwardReferences) {
+ if (!m_pattern.m_captureGroupNames.contains(entry))
+ m_unmatchedNamedForwardReferences.append(entry);
+ }
+ }
+
void assertionBOL()
{
if (!m_alternative->m_terms.size() && !m_invertParentheticalAssertion) {
@@ -666,12 +673,24 @@ public:
m_alternative->m_terms.append(PatternTerm(subpatternId));
}
- void atomNamedBackReference(String subpatternName)
+ void atomNamedBackReference(const String& subpatternName)
{
ASSERT(m_pattern.m_namedGroupToParenIndex.find(subpatternName) != m_pattern.m_namedGroupToParenIndex.end());
atomBackReference(m_pattern.m_namedGroupToParenIndex.get(subpatternName));
}
+ bool isValidNamedForwardReference(const String& subpatternName)
+ {
+ return !m_unmatchedNamedForwardReferences.contains(subpatternName);
+ }
+
+ void atomNamedForwardReference(const String& subpatternName)
+ {
+ if (!m_pattern.m_namedForwardReferences.contains(subpatternName))
+ m_pattern.m_namedForwardReferences.append(subpatternName);
+ m_alternative->m_terms.append(PatternTerm::ForwardReference());
+ }
+
// deep copy the argument disjunction. If filterStartsWithBOL is true,
// skip alternatives with m_startsWithBOL set true.
PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false)
@@ -1079,6 +1098,7 @@ private:
YarrPattern& m_pattern;
PatternAlternative* m_alternative;
CharacterClassConstructor m_characterClassConstructor;
+ Vector<String> m_unmatchedNamedForwardReferences;
void* m_stackLimit;
bool m_invertCharacterClass;
bool m_invertParentheticalAssertion { false };
@@ -1101,13 +1121,14 @@ ErrorCode YarrPattern::compile(const String& patternString, void* stackLimit)
// Quoting Netscape's "What's new in JavaScript 1.2",
// "Note: if the number of left parentheses is less than the number specified
// in \#, the \# is taken as an octal escape as described in the next row."
- if (containsIllegalBackReference()) {
+ if (containsIllegalBackReference() || containsIllegalNamedForwardReferences()) {
if (unicode())
return ErrorCode::InvalidBackreference;
unsigned numSubpatterns = m_numSubpatterns;
- constructor.reset();
+ constructor.saveUnmatchedNamedForwardReferences();
+ constructor.resetForReparsing();
ErrorCode error = parse(constructor, patternString, unicode(), numSubpatterns);
ASSERT_UNUSED(error, !hasError(error));
ASSERT(numSubpatterns == m_numSubpatterns);
@@ -1168,7 +1189,7 @@ void dumpCharacterClass(PrintStream& out, YarrPattern* pattern, CharacterClass*
else if (characterClass == pattern->wordcharCharacterClass())
out.print("<word>");
else if (characterClass == pattern->wordUnicodeIgnoreCaseCharCharacterClass())
- out.print("<unicode ignore case>");
+ out.print("<unicode word ignore case>");
else if (characterClass == pattern->nondigitsCharacterClass())
out.print("<non-digits>");
else if (characterClass == pattern->nonspacesCharacterClass())
@@ -1176,7 +1197,7 @@ void dumpCharacterClass(PrintStream& out, YarrPattern* pattern, CharacterClass*
else if (characterClass == pattern->nonwordcharCharacterClass())
out.print("<non-word>");
else if (characterClass == pattern->nonwordUnicodeIgnoreCaseCharCharacterClass())
- out.print("<unicode non-ignore case>");
+ out.print("<unicode non-word ignore case>");
else {
bool needMatchesRangesSeperator = false;
@@ -1298,75 +1319,7 @@ void PatternTerm::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nest
break;
case TypeCharacterClass:
out.print("character class ");
- if (characterClass->m_anyCharacter)
- out.print("<any character>");
- else if (characterClass == thisPattern->newlineCharacterClass())
- out.print("<newline>");
- else if (characterClass == thisPattern->digitsCharacterClass())
- out.print("<digits>");
- else if (characterClass == thisPattern->spacesCharacterClass())
- out.print("<whitespace>");
- else if (characterClass == thisPattern->wordcharCharacterClass())
- out.print("<word>");
- else if (characterClass == thisPattern->wordUnicodeIgnoreCaseCharCharacterClass())
- out.print("<unicode ignore case>");
- else if (characterClass == thisPattern->nondigitsCharacterClass())
- out.print("<non-digits>");
- else if (characterClass == thisPattern->nonspacesCharacterClass())
- out.print("<non-whitespace>");
- else if (characterClass == thisPattern->nonwordcharCharacterClass())
- out.print("<non-word>");
- else if (characterClass == thisPattern->nonwordUnicodeIgnoreCaseCharCharacterClass())
- out.print("<unicode non-ignore case>");
- else {
- bool needMatchesRangesSeperator = false;
-
- auto dumpMatches = [&] (const char* prefix, Vector<UChar32> matches) {
- size_t matchesSize = matches.size();
- if (matchesSize) {
- if (needMatchesRangesSeperator)
- out.print(",");
- needMatchesRangesSeperator = true;
-
- out.print(prefix, ":(");
- for (size_t i = 0; i < matchesSize; ++i) {
- if (i)
- out.print(",");
- dumpUChar32(out, matches[i]);
- }
- out.print(")");
- }
- };
-
- auto dumpRanges = [&] (const char* prefix, Vector<CharacterRange> ranges) {
- size_t rangeSize = ranges.size();
- if (rangeSize) {
- if (needMatchesRangesSeperator)
- out.print(",");
- needMatchesRangesSeperator = true;
-
- out.print(prefix, " ranges:(");
- for (size_t i = 0; i < rangeSize; ++i) {
- if (i)
- out.print(",");
- CharacterRange range = ranges[i];
- out.print("(");
- dumpUChar32(out, range.begin);
- out.print("..");
- dumpUChar32(out, range.end);
- out.print(")");
- }
- out.print(")");
- }
- };
-
- out.print("[");
- dumpMatches("ASCII", characterClass->m_matches);
- dumpRanges("ASCII", characterClass->m_ranges);
- dumpMatches("Unicode", characterClass->m_matchesUnicode);
- dumpRanges("Unicode", characterClass->m_rangesUnicode);
- out.print("]");
- }
+ dumpCharacterClass(out, thisPattern, characterClass);
dumpQuantifier(out);
if (quantityType != QuantifierFixedCount || thisPattern->unicode())
out.print(",frame location ", frameLocation);
@@ -1439,16 +1392,10 @@ void PatternDisjunction::dump(PrintStream& out, YarrPattern* thisPattern, unsign
}
}
-void YarrPattern::dumpPattern(const String& patternString)
+void YarrPattern::dumpPatternString(PrintStream& out, const String& patternString)
{
- dumpPattern(WTF::dataFile(), patternString);
-}
+ out.print("/", patternString, "/");
-void YarrPattern::dumpPattern(PrintStream& out, const String& patternString)
-{
- out.print("RegExp pattern for /");
- out.print(patternString);
- out.print("/");
if (global())
out.print("g");
if (ignoreCase())
@@ -1459,6 +1406,18 @@ void YarrPattern::dumpPattern(PrintStream& out, const String& patternString)
out.print("u");
if (sticky())
out.print("y");
+}
+
+void YarrPattern::dumpPattern(const String& patternString)
+{
+ dumpPattern(WTF::dataFile(), patternString);
+}
+
+void YarrPattern::dumpPattern(PrintStream& out, const String& patternString)
+{
+ out.print("RegExp pattern for ");
+ dumpPatternString(out, patternString);
+
if (m_flags != NoFlags) {
bool printSeperator = false;
out.print(" (");