1 files changed, 795 insertions, 167 deletions
diff --git a/src/3rdparty/masm/yarr/YarrPattern.cpp b/src/3rdparty/masm/yarr/YarrPattern.cpp
index c7e5b6b09b..ac66ea1b9a 100644
--- a/src/3rdparty/masm/yarr/YarrPattern.cpp
+++ b/src/3rdparty/masm/yarr/YarrPattern.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
+ * Copyright (C) 2009, 2013-2016 Apple Inc. All rights reserved.
  * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
  *
  * Redistribution and use in source and binary forms, with or without
@@ -27,10 +27,15 @@
 #include "config.h"
 #include "YarrPattern.h"
 
+#include "Options.h"
 #include "Yarr.h"
-#include "YarrCanonicalizeUCS2.h"
+#include "YarrCanonicalize.h"
 #include "YarrParser.h"
+#include <wtf/DataLog.h>
+#include <wtf/Optional.h>
+//#include <wtf/Threading.h>
 #include <wtf/Vector.h>
+#include <wtf/text/WTFString.h>
 
 using namespace WTF;
 
@@ -40,8 +45,11 @@ namespace JSC { namespace Yarr {
 
 class CharacterClassConstructor {
 public:
-    CharacterClassConstructor(bool isCaseInsensitive = false)
+    CharacterClassConstructor(bool isCaseInsensitive, CanonicalMode canonicalMode)
         : m_isCaseInsensitive(isCaseInsensitive)
+        , m_hasNonBMPCharacters(false)
+        , m_anyCharacter(false)
+        , m_canonicalMode(canonicalMode)
     {
     }
     
@@ -51,6 +59,8 @@ public:
         m_ranges.clear();
         m_matchesUnicode.clear();
         m_rangesUnicode.clear();
+        m_hasNonBMPCharacters = false;
+        m_anyCharacter = false;
     }
 
     void append(const CharacterClass* other)
@@ -65,11 +75,71 @@ public:
             addSortedRange(m_rangesUnicode, other->m_rangesUnicode[i].begin, other->m_rangesUnicode[i].end);
     }
 
-    void putChar(UChar ch)
+    void appendInverted(const CharacterClass* other)
     {
-        // Handle ascii cases.
-        if (ch <= 0x7f) {
-            if (m_isCaseInsensitive && isASCIIAlpha(ch)) {
+        auto addSortedInverted = [&](UChar32 min, UChar32 max,
+            const Vector<UChar32>& srcMatches, const Vector<CharacterRange>& srcRanges,
+            Vector<UChar32>& destMatches, Vector<CharacterRange>& destRanges) {
+
+            auto addSortedMatchOrRange = [&](UChar32 lo, UChar32 hiPlusOne) {
+                if (lo < hiPlusOne) {
+                    if (lo + 1 == hiPlusOne)
+                        addSorted(destMatches, lo);
+                    else
+                        addSortedRange(destRanges, lo, hiPlusOne - 1);
+                }
+            };
+
+            UChar32 lo = min;
+            size_t matchesIndex = 0;
+            size_t rangesIndex = 0;
+            bool matchesRemaining = matchesIndex < srcMatches.size();
+            bool rangesRemaining = rangesIndex < srcRanges.size();
+
+            if (!matchesRemaining && !rangesRemaining) {
+                addSortedMatchOrRange(min, max + 1);
+                return;
+            }
+
+            while (matchesRemaining || rangesRemaining) {
+                UChar32 hiPlusOne;
+                UChar32 nextLo;
+
+                if (matchesRemaining
+                    && (!rangesRemaining || srcMatches[matchesIndex] < srcRanges[rangesIndex].begin)) {
+                    hiPlusOne = srcMatches[matchesIndex];
+                    nextLo = hiPlusOne + 1;
+                    ++matchesIndex;
+                    matchesRemaining = matchesIndex < srcMatches.size();
+                } else {
+                    hiPlusOne = srcRanges[rangesIndex].begin;
+                    nextLo = srcRanges[rangesIndex].end + 1;
+                    ++rangesIndex;
+                    rangesRemaining = rangesIndex < srcRanges.size();
+                }
+
+                addSortedMatchOrRange(lo, hiPlusOne);
+
+                lo = nextLo;
+            }
+
+            addSortedMatchOrRange(lo, max + 1);
+        };
+
+        addSortedInverted(0, 0x7f, other->m_matches, other->m_ranges, m_matches, m_ranges);
+        addSortedInverted(0x80, 0x10ffff, other->m_matchesUnicode, other->m_rangesUnicode, m_matchesUnicode, m_rangesUnicode);
+    }
+
+    void putChar(UChar32 ch)
+    {
+        if (!m_isCaseInsensitive) {
+            addSorted(ch);
+            return;
+        }
+
+        if (m_canonicalMode == CanonicalMode::UCS2 && isASCII(ch)) {
+            // Handle ASCII cases.
+            if (isASCIIAlpha(ch)) {
                 addSorted(m_matches, toASCIIUpper(ch));
                 addSorted(m_matches, toASCIILower(ch));
             } else
@@ -77,40 +147,33 @@ public:
             return;
         }
 
-        // Simple case, not a case-insensitive match.
-        if (!m_isCaseInsensitive) {
-            addSorted(m_matchesUnicode, ch);
-            return;
-        }
-
         // Add multiple matches, if necessary.
-        UCS2CanonicalizationRange* info = rangeInfoFor(ch);
+        const CanonicalizationRange* info = canonicalRangeInfoFor(ch, m_canonicalMode);
         if (info->type == CanonicalizeUnique)
-            addSorted(m_matchesUnicode, ch);
+            addSorted(ch);
         else
             putUnicodeIgnoreCase(ch, info);
     }
 
-    void putUnicodeIgnoreCase(UChar ch, UCS2CanonicalizationRange* info)
+    void putUnicodeIgnoreCase(UChar32 ch, const CanonicalizationRange* info)
     {
         ASSERT(m_isCaseInsensitive);
-        ASSERT(ch > 0x7f);
         ASSERT(ch >= info->begin && ch <= info->end);
         ASSERT(info->type != CanonicalizeUnique);
         if (info->type == CanonicalizeSet) {
-            for (uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
-                addSorted(m_matchesUnicode, ch);
+            for (const UChar32* set = canonicalCharacterSetInfo(info->value, m_canonicalMode); (ch = *set); ++set)
+                addSorted(ch);
         } else {
-            addSorted(m_matchesUnicode, ch);
-            addSorted(m_matchesUnicode, getCanonicalPair(info, ch));
+            addSorted(ch);
+            addSorted(getCanonicalPair(info, ch));
         }
     }
 
-    void putRange(UChar lo, UChar hi)
+    void putRange(UChar32 lo, UChar32 hi)
     {
-        if (lo <= 0x7f) {
+        if (isASCII(lo)) {
             char asciiLo = lo;
-            char asciiHi = std::min(hi, (UChar)0x7f);
+            char asciiHi = std::min(hi, (UChar32)0x7f);
             addSortedRange(m_ranges, lo, asciiHi);
             
             if (m_isCaseInsensitive) {
@@ -120,19 +183,19 @@ public:
                     addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a'));
             }
         }
-        if (hi <= 0x7f)
+        if (isASCII(hi))
             return;
 
-        lo = std::max(lo, (UChar)0x80);
+        lo = std::max(lo, (UChar32)0x80);
         addSortedRange(m_rangesUnicode, lo, hi);
         
         if (!m_isCaseInsensitive)
             return;
 
-        UCS2CanonicalizationRange* info = rangeInfoFor(lo);
+        const CanonicalizationRange* info = canonicalRangeInfoFor(lo, m_canonicalMode);
         while (true) {
             // Handle the range [lo .. end]
-            UChar end = std::min<UChar>(info->end, hi);
+            UChar32 end = std::min<UChar32>(info->end, hi);
 
             switch (info->type) {
             case CanonicalizeUnique:
@@ -140,7 +203,7 @@ public:
                 break;
             case CanonicalizeSet: {
                 UChar ch;
-                for (uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
+                for (const UChar32* set = canonicalCharacterSetInfo(info->value, m_canonicalMode); (ch = *set); ++set)
                     addSorted(m_matchesUnicode, ch);
                 break;
             }
@@ -175,24 +238,38 @@ public:
 
     }
 
-    PassOwnPtr<CharacterClass> charClass()
+    std::unique_ptr<CharacterClass> charClass()
     {
-        OwnPtr<CharacterClass> characterClass = adoptPtr(new CharacterClass);
+        coalesceTables();
+
+        auto characterClass = std::make_unique<CharacterClass>();
 
         characterClass->m_matches.swap(m_matches);
         characterClass->m_ranges.swap(m_ranges);
         characterClass->m_matchesUnicode.swap(m_matchesUnicode);
         characterClass->m_rangesUnicode.swap(m_rangesUnicode);
+        characterClass->m_hasNonBMPCharacters = hasNonBMPCharacters();
+        characterClass->m_anyCharacter = anyCharacter();
+
+        m_hasNonBMPCharacters = false;
+        m_anyCharacter = false;
 
-        return characterClass.release();
+        return characterClass;
     }
 
 private:
-    void addSorted(Vector<UChar>& matches, UChar ch)
+    void addSorted(UChar32 ch)
+    {
+        addSorted(isASCII(ch) ? m_matches : m_matchesUnicode, ch);
+    }
+
+    void addSorted(Vector<UChar32>& matches, UChar32 ch)
     {
         unsigned pos = 0;
-        ASSERT(matches.size() <= UINT_MAX);
-        unsigned range = static_cast<unsigned>(matches.size());
+        unsigned range = matches.size();
+
+        if (!U_IS_BMP(ch))
+            m_hasNonBMPCharacters = true;
 
         // binary chop, find position to insert char.
         while (range) {
@@ -201,9 +278,31 @@ private:
             int val = matches[pos+index] - ch;
             if (!val)
                 return;
-            else if (val > 0)
+            else if (val > 0) {
+                if (val == 1) {
+                    UChar32 lo = ch;
+                    UChar32 hi = ch + 1;
+                    matches.remove(pos + index);
+                    if (pos + index > 0 && matches[pos + index - 1] == ch - 1) {
+                        lo = ch - 1;
+                        matches.remove(pos + index - 1);
+                    }
+                    addSortedRange(isASCII(ch) ? m_ranges : m_rangesUnicode, lo, hi);
+                    return;
+                }
                 range = index;
-            else {
+            } else {
+                if (val == -1) {
+                    UChar32 lo = ch - 1;
+                    UChar32 hi = ch;
+                    matches.remove(pos + index);
+                    if (pos + index + 1 < matches.size() && matches[pos + index + 1] == ch + 1) {
+                        hi = ch + 1;
+                        matches.remove(pos + index + 1);
+                    }
+                    addSortedRange(isASCII(ch) ? m_ranges : m_rangesUnicode, lo, hi);
+                    return;
+                }
                 pos += (index+1);
                 range -= (index+1);
             }
@@ -215,17 +314,19 @@ private:
             matches.insert(pos, ch);
     }
 
-    void addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UChar hi)
+    void addSortedRange(Vector<CharacterRange>& ranges, UChar32 lo, UChar32 hi)
     {
-        ASSERT(ranges.size() <= UINT_MAX);
-        unsigned end = static_cast<unsigned>(ranges.size());
-        
+        size_t end = ranges.size();
+
+        if (!U_IS_BMP(hi))
+            m_hasNonBMPCharacters = true;
+
         // Simple linear scan - I doubt there are that many ranges anyway...
         // feel free to fix this with something faster (eg binary chop).
-        for (unsigned i = 0; i < end; ++i) {
+        for (size_t i = 0; i < end; ++i) {
             // does the new range fall before the current position in the array
             if (hi < ranges[i].begin) {
-                // optional optimization: concatenate appending ranges? - may not be worthwhile.
+                // Concatenate appending ranges.
                 if (hi == (ranges[i].begin - 1)) {
                     ranges[i].begin = lo;
                     return;
@@ -233,7 +334,7 @@ private:
                 ranges.insert(i, CharacterRange(lo, hi));
                 return;
             }
-            // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the beginning
+            // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining
             // If the new range start at or before the end of the last range, then the overlap (if it starts one after the
             // end of the last range they concatenate, which is just as good.
             if (lo <= (ranges[i].end + 1)) {
@@ -241,18 +342,7 @@ private:
                 ranges[i].begin = std::min(ranges[i].begin, lo);
                 ranges[i].end = std::max(ranges[i].end, hi);
 
-                // now check if the new range can subsume any subsequent ranges.
-                unsigned next = i+1;
-                // each iteration of the loop we will either remove something from the list, or break the loop.
-                while (next < ranges.size()) {
-                    if (ranges[next].begin <= (ranges[i].end + 1)) {
-                        // the next entry now overlaps / concatenates this one.
-                        ranges[i].end = std::max(ranges[i].end, ranges[next].end);
-                        ranges.remove(next);
-                    } else
-                        break;
-                }
-                
+                mergeRangesFrom(ranges, i);
                 return;
             }
         }
@@ -261,25 +351,95 @@ private:
         ranges.append(CharacterRange(lo, hi));
     }
 
-    bool m_isCaseInsensitive;
+    void mergeRangesFrom(Vector<CharacterRange>& ranges, size_t index)
+    {
+        size_t next = index + 1;
+
+        // each iteration of the loop we will either remove something from the list, or break out of the loop.
+        while (next < ranges.size()) {
+            if (ranges[next].begin <= (ranges[index].end + 1)) {
+                // the next entry now overlaps / concatenates with this one.
+                ranges[index].end = std::max(ranges[index].end, ranges[next].end);
+                ranges.remove(next);
+            } else
+                break;
+        }
+
+    }
+
+    void coalesceTables()
+    {
+        auto coalesceMatchesAndRanges = [&](Vector<UChar32>& matches, Vector<CharacterRange>& ranges) {
+
+            size_t matchesIndex = 0;
+            size_t rangesIndex = 0;
+
+            while (matchesIndex < matches.size() && rangesIndex < ranges.size()) {
+                while (matchesIndex < matches.size() && matches[matchesIndex] < ranges[rangesIndex].begin - 1)
+                    matchesIndex++;
+
+                if (matchesIndex < matches.size() && matches[matchesIndex] == ranges[rangesIndex].begin - 1) {
+                    ranges[rangesIndex].begin = matches[matchesIndex];
+                    matches.remove(matchesIndex);
+                }
+
+                while (matchesIndex < matches.size() && matches[matchesIndex] < ranges[rangesIndex].end + 1)
+                    matchesIndex++;
+
+                if (matchesIndex < matches.size()) {
+                    if (matches[matchesIndex] == ranges[rangesIndex].end + 1) {
+                        ranges[rangesIndex].end = matches[matchesIndex];
+                        matches.remove(matchesIndex);
+
+                        mergeRangesFrom(ranges, rangesIndex);
+                    } else
+                        matchesIndex++;
+                }
+            }
+        };
+
+        coalesceMatchesAndRanges(m_matches, m_ranges);
+        coalesceMatchesAndRanges(m_matchesUnicode, m_rangesUnicode);
+
+        if (!m_matches.size() && !m_matchesUnicode.size()
+            && m_ranges.size() == 1 && m_rangesUnicode.size() == 1
+            && m_ranges[0].begin == 0 && m_ranges[0].end == 0x7f
+            && m_rangesUnicode[0].begin == 0x80 && m_rangesUnicode[0].end == 0x10ffff)
+            m_anyCharacter = true;
+    }
 
-    Vector<UChar> m_matches;
+    bool hasNonBMPCharacters()
+    {
+        return m_hasNonBMPCharacters;
+    }
+
+    bool anyCharacter()
+    {
+        return m_anyCharacter;
+    }
+
+    bool m_isCaseInsensitive : 1;
+    bool m_hasNonBMPCharacters : 1;
+    bool m_anyCharacter : 1;
+    CanonicalMode m_canonicalMode;
+
+    Vector<UChar32> m_matches;
     Vector<CharacterRange> m_ranges;
-    Vector<UChar> m_matchesUnicode;
+    Vector<UChar32> m_matchesUnicode;
     Vector<CharacterRange> m_rangesUnicode;
 };
 
 class YarrPatternConstructor {
 public:
-    YarrPatternConstructor(YarrPattern& pattern)
+    YarrPatternConstructor(YarrPattern& pattern, void* stackLimit)
         : m_pattern(pattern)
-        , m_characterClassConstructor(pattern.m_ignoreCase)
-        , m_invertParentheticalAssertion(false)
+        , m_characterClassConstructor(pattern.ignoreCase(), pattern.unicode() ? CanonicalMode::Unicode : CanonicalMode::UCS2)
+        , m_stackLimit(stackLimit)
     {
-        OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction);
+        auto body = std::make_unique<PatternDisjunction>();
         m_pattern.m_body = body.get();
         m_alternative = body->addNewAlternative();
-        m_pattern.m_disjunctions.append(body.release());
+        m_pattern.m_disjunctions.append(WTFMove(body));
     }
 
     ~YarrPatternConstructor()
@@ -291,15 +451,15 @@ public:
         m_pattern.reset();
         m_characterClassConstructor.reset();
 
-        OwnPtr<PatternDisjunction> body = adoptPtr(new PatternDisjunction);
+        auto body = std::make_unique<PatternDisjunction>();
         m_pattern.m_body = body.get();
         m_alternative = body->addNewAlternative();
-        m_pattern.m_disjunctions.append(body.release());
+        m_pattern.m_disjunctions.append(WTFMove(body));
     }
     
     void assertionBOL()
     {
-        if (!m_alternative->m_terms.size() & !m_invertParentheticalAssertion) {
+        if (!m_alternative->m_terms.size() && !m_invertParentheticalAssertion) {
             m_alternative->m_startsWithBOL = true;
             m_alternative->m_containsBOL = true;
             m_pattern.m_containsBOL = true;
@@ -315,41 +475,51 @@ public:
         m_alternative->m_terms.append(PatternTerm::WordBoundary(invert));
     }
 
-    void atomPatternCharacter(UChar ch)
+    void atomPatternCharacter(UChar32 ch)
     {
         // We handle case-insensitive checking of unicode characters which do have both
         // cases by handling them as if they were defined using a CharacterClass.
-        if (!m_pattern.m_ignoreCase || isASCII(ch)) {
+        if (!m_pattern.ignoreCase() || (isASCII(ch) && !m_pattern.unicode())) {
             m_alternative->m_terms.append(PatternTerm(ch));
             return;
         }
 
-        UCS2CanonicalizationRange* info = rangeInfoFor(ch);
+        const CanonicalizationRange* info = canonicalRangeInfoFor(ch, m_pattern.unicode() ? CanonicalMode::Unicode : CanonicalMode::UCS2);
         if (info->type == CanonicalizeUnique) {
             m_alternative->m_terms.append(PatternTerm(ch));
             return;
         }
 
         m_characterClassConstructor.putUnicodeIgnoreCase(ch, info);
-        OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass();
+        auto newCharacterClass = m_characterClassConstructor.charClass();
         m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), false));
-        m_pattern.m_userCharacterClasses.append(newCharacterClass.release());
+        m_pattern.m_userCharacterClasses.append(WTFMove(newCharacterClass));
     }
 
     void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
     {
         switch (classID) {
-        case DigitClassID:
+        case BuiltInCharacterClassID::DigitClassID:
             m_alternative->m_terms.append(PatternTerm(m_pattern.digitsCharacterClass(), invert));
             break;
-        case SpaceClassID:
+        case BuiltInCharacterClassID::SpaceClassID:
             m_alternative->m_terms.append(PatternTerm(m_pattern.spacesCharacterClass(), invert));
             break;
-        case WordClassID:
-            m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert));
+        case BuiltInCharacterClassID::WordClassID:
+            if (m_pattern.unicode() && m_pattern.ignoreCase())
+                m_alternative->m_terms.append(PatternTerm(m_pattern.wordUnicodeIgnoreCaseCharCharacterClass(), invert));
+            else
+                m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert));
+            break;
+        case BuiltInCharacterClassID::DotClassID:
+            ASSERT(!invert);
+            if (m_pattern.dotAll())
+                m_alternative->m_terms.append(PatternTerm(m_pattern.anyCharacterClass(), false));
+            else
+                m_alternative->m_terms.append(PatternTerm(m_pattern.newlineCharacterClass(), true));
             break;
-        case NewlineClassID:
-            m_alternative->m_terms.append(PatternTerm(m_pattern.newlineCharacterClass(), invert));
+        default:
+            m_alternative->m_terms.append(PatternTerm(m_pattern.unicodeCharacterClassFor(classID), invert));
             break;
         }
     }
@@ -359,64 +529,83 @@ public:
         m_invertCharacterClass = invert;
     }
 
-    void atomCharacterClassAtom(UChar ch)
+    void atomCharacterClassAtom(UChar32 ch)
     {
         m_characterClassConstructor.putChar(ch);
     }
 
-    void atomCharacterClassRange(UChar begin, UChar end)
+    void atomCharacterClassRange(UChar32 begin, UChar32 end)
     {
         m_characterClassConstructor.putRange(begin, end);
     }
 
     void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
     {
-        ASSERT(classID != NewlineClassID);
+        ASSERT(classID != BuiltInCharacterClassID::DotClassID);
 
         switch (classID) {
-        case DigitClassID:
+        case BuiltInCharacterClassID::DigitClassID:
             m_characterClassConstructor.append(invert ? m_pattern.nondigitsCharacterClass() : m_pattern.digitsCharacterClass());
             break;
         
-        case SpaceClassID:
+        case BuiltInCharacterClassID::SpaceClassID:
             m_characterClassConstructor.append(invert ? m_pattern.nonspacesCharacterClass() : m_pattern.spacesCharacterClass());
             break;
         
-        case WordClassID:
-            m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass());
+        case BuiltInCharacterClassID::WordClassID:
+            if (m_pattern.unicode() && m_pattern.ignoreCase())
+                m_characterClassConstructor.append(invert ? m_pattern.nonwordUnicodeIgnoreCaseCharCharacterClass() : m_pattern.wordUnicodeIgnoreCaseCharCharacterClass());
+            else
+                m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass());
             break;
         
         default:
-            RELEASE_ASSERT_NOT_REACHED();
+            if (!invert)
+                m_characterClassConstructor.append(m_pattern.unicodeCharacterClassFor(classID));
+            else
+                m_characterClassConstructor.appendInverted(m_pattern.unicodeCharacterClassFor(classID));
         }
     }
 
     void atomCharacterClassEnd()
     {
-        OwnPtr<CharacterClass> newCharacterClass = m_characterClassConstructor.charClass();
+        auto newCharacterClass = m_characterClassConstructor.charClass();
+
+        if (!m_invertCharacterClass && newCharacterClass.get()->m_anyCharacter) {
+            m_alternative->m_terms.append(PatternTerm(m_pattern.anyCharacterClass(), false));
+            return;
+        }
         m_alternative->m_terms.append(PatternTerm(newCharacterClass.get(), m_invertCharacterClass));
-        m_pattern.m_userCharacterClasses.append(newCharacterClass.release());
+        m_pattern.m_userCharacterClasses.append(WTFMove(newCharacterClass));
     }
 
-    void atomParenthesesSubpatternBegin(bool capture = true)
+    void atomParenthesesSubpatternBegin(bool capture = true, std::optional<String> optGroupName = std::nullopt)
     {
         unsigned subpatternId = m_pattern.m_numSubpatterns + 1;
-        if (capture)
+        if (capture) {
             m_pattern.m_numSubpatterns++;
+            if (optGroupName) {
+                while (m_pattern.m_captureGroupNames.size() < subpatternId)
+                    m_pattern.m_captureGroupNames.append(String());
+                m_pattern.m_captureGroupNames.append(optGroupName.value());
+                m_pattern.m_namedGroupToParenIndex.add(optGroupName.value(), subpatternId);
+            }
+        } else
+            ASSERT(!optGroupName);
 
-        OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative));
+        auto parenthesesDisjunction = std::make_unique<PatternDisjunction>(m_alternative);
         m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, false));
         m_alternative = parenthesesDisjunction->addNewAlternative();
-        m_pattern.m_disjunctions.append(parenthesesDisjunction.release());
+        m_pattern.m_disjunctions.append(WTFMove(parenthesesDisjunction));
     }
 
     void atomParentheticalAssertionBegin(bool invert = false)
     {
-        OwnPtr<PatternDisjunction> parenthesesDisjunction = adoptPtr(new PatternDisjunction(m_alternative));
+        auto parenthesesDisjunction = std::make_unique<PatternDisjunction>(m_alternative);
         m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction.get(), false, invert));
         m_alternative = parenthesesDisjunction->addNewAlternative();
         m_invertParentheticalAssertion = invert;
-        m_pattern.m_disjunctions.append(parenthesesDisjunction.release());
+        m_pattern.m_disjunctions.append(WTFMove(parenthesesDisjunction));
     }
 
     void atomParenthesesEnd()
@@ -429,8 +618,7 @@ public:
 
         PatternTerm& lastTerm = m_alternative->lastTerm();
 
-        ASSERT(parenthesesDisjunction->m_alternatives.size() <= UINT_MAX);
-        unsigned numParenAlternatives = static_cast<unsigned>(parenthesesDisjunction->m_alternatives.size());
+        unsigned numParenAlternatives = parenthesesDisjunction->m_alternatives.size();
         unsigned numBOLAnchoredAlts = 0;
 
         for (unsigned i = 0; i < numParenAlternatives; i++) {
@@ -478,16 +666,22 @@ public:
         m_alternative->m_terms.append(PatternTerm(subpatternId));
     }
 
-    // deep copy the argument disjunction.  If filterStartsWithBOL is true, 
+    void atomNamedBackReference(String subpatternName)
+    {
+        ASSERT(m_pattern.m_namedGroupToParenIndex.find(subpatternName) != m_pattern.m_namedGroupToParenIndex.end());
+        atomBackReference(m_pattern.m_namedGroupToParenIndex.get(subpatternName));
+    }
+
+    // deep copy the argument disjunction.  If filterStartsWithBOL is true,
     // skip alternatives with m_startsWithBOL set true.
     PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false)
     {
-        OwnPtr<PatternDisjunction> newDisjunction;
+        std::unique_ptr<PatternDisjunction> newDisjunction;
         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
             PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
             if (!filterStartsWithBOL || !alternative->m_startsWithBOL) {
                 if (!newDisjunction) {
-                    newDisjunction = adoptPtr(new PatternDisjunction());
+                    newDisjunction = std::make_unique<PatternDisjunction>();
                     newDisjunction->m_parent = disjunction->m_parent;
                 }
                 PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
@@ -501,7 +695,7 @@ public:
             return 0;
 
         PatternDisjunction* copiedDisjunction = newDisjunction.get();
-        m_pattern.m_disjunctions.append(newDisjunction.release());
+        m_pattern.m_disjunctions.append(WTFMove(newDisjunction));
         return copiedDisjunction;
     }
     
@@ -512,6 +706,7 @@ public:
         
         PatternTerm termCopy = term;
         termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction, filterStartsWithBOL);
+        m_pattern.m_hasCopiedParenSubexpressions = true;
         return termCopy;
     }
     
@@ -527,7 +722,7 @@ public:
 
         PatternTerm& term = m_alternative->lastTerm();
         ASSERT(term.type > PatternTerm::TypeAssertionWordBoundary);
-        ASSERT((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount));
+        ASSERT(term.quantityMinCount == 1 && term.quantityMaxCount == 1 && term.quantityType == QuantifierFixedCount);
 
         if (term.type == PatternTerm::TypeParentheticalAssertion) {
             // If an assertion is quantified with a minimum count of zero, it can simply be removed.
@@ -549,12 +744,12 @@ public:
             return;
         }
 
-        if (min == 0)
-            term.quantify(max, greedy   ? QuantifierGreedy : QuantifierNonGreedy);
-        else if (min == max)
-            term.quantify(min, QuantifierFixedCount);
+        if (min == max)
+            term.quantify(min, max, QuantifierFixedCount);
+        else if (!min || (term.type == PatternTerm::TypeParenthesesSubpattern && m_pattern.m_hasCopiedParenSubexpressions))
+            term.quantify(min, max, greedy ? QuantifierGreedy : QuantifierNonGreedy);
         else {
-            term.quantify(min, QuantifierFixedCount);
+            term.quantify(min, min, QuantifierFixedCount);
             m_alternative->m_terms.append(copyTerm(term));
             // NOTE: this term is interesting from an analysis perspective, in that it can be ignored.....
             m_alternative->lastTerm().quantify((max == quantifyInfinite) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy);
@@ -568,10 +763,14 @@ public:
         m_alternative = m_alternative->m_parent->addNewAlternative();
     }
 
-    unsigned setupAlternativeOffsets(PatternAlternative* alternative, unsigned currentCallFrameSize, unsigned initialInputPosition)
+    ErrorCode setupAlternativeOffsets(PatternAlternative* alternative, unsigned currentCallFrameSize, unsigned initialInputPosition, unsigned& newCallFrameSize) WARN_UNUSED_RETURN
     {
+        if (UNLIKELY(!isSafeToRecurse()))
+            return ErrorCode::TooManyDisjunctions;
+
+        ErrorCode error = ErrorCode::NoError;
         alternative->m_hasFixedSize = true;
-        Checked<unsigned> currentInputPosition = initialInputPosition;
+        Checked<unsigned, RecordOverflow> currentInputPosition = initialInputPosition;
 
         for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
             PatternTerm& term = alternative->m_terms[i];
@@ -599,8 +798,14 @@ public:
                     term.frameLocation = currentCallFrameSize;
                     currentCallFrameSize += YarrStackSpaceForBackTrackInfoPatternCharacter;
                     alternative->m_hasFixedSize = false;
+                } else if (m_pattern.unicode()) {
+                    Checked<unsigned, RecordOverflow> tempCount = term.quantityMaxCount;
+                    tempCount *= U16_LENGTH(term.patternCharacter);
+                    if (tempCount.hasOverflowed())
+                        return ErrorCode::OffsetTooLarge;
+                    currentInputPosition += tempCount;
                 } else
-                    currentInputPosition += term.quantityCount;
+                    currentInputPosition += term.quantityMaxCount;
                 break;
 
             case PatternTerm::TypeCharacterClass:
@@ -609,29 +814,39 @@ public:
                     term.frameLocation = currentCallFrameSize;
                     currentCallFrameSize += YarrStackSpaceForBackTrackInfoCharacterClass;
                     alternative->m_hasFixedSize = false;
+                } else if (m_pattern.unicode()) {
+                    term.frameLocation = currentCallFrameSize;
+                    currentCallFrameSize += YarrStackSpaceForBackTrackInfoCharacterClass;
+                    currentInputPosition += term.quantityMaxCount;
+                    alternative->m_hasFixedSize = false;
                 } else
-                    currentInputPosition += term.quantityCount;
+                    currentInputPosition += term.quantityMaxCount;
                 break;
 
             case PatternTerm::TypeParenthesesSubpattern:
                 // Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own.
                 term.frameLocation = currentCallFrameSize;
-                if (term.quantityCount == 1 && !term.parentheses.isCopy) {
-                    if (term.quantityType != QuantifierFixedCount)
-                        currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesOnce;
-                    currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet());
+                if (term.quantityMaxCount == 1 && !term.parentheses.isCopy) {
+                    currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+                    error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet(), currentCallFrameSize);
+                    if (hasError(error))
+                        return error;
                     // If quantity is fixed, then pre-check its minimum size.
                     if (term.quantityType == QuantifierFixedCount)
                         currentInputPosition += term.parentheses.disjunction->m_minimumSize;
                     term.inputPosition = currentInputPosition.unsafeGet();
                 } else if (term.parentheses.isTerminal) {
                     currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesTerminal;
-                    currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet());
+                    error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet(), currentCallFrameSize);
+                    if (hasError(error))
+                        return error;
                     term.inputPosition = currentInputPosition.unsafeGet();
                 } else {
                     term.inputPosition = currentInputPosition.unsafeGet();
-                    setupDisjunctionOffsets(term.parentheses.disjunction, 0, currentInputPosition.unsafeGet());
                     currentCallFrameSize += YarrStackSpaceForBackTrackInfoParentheses;
+                    error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition.unsafeGet(), currentCallFrameSize);
+                    if (hasError(error))
+                        return error;
                 }
                 // Fixed count of 1 could be accepted, if they have a fixed size *AND* if all alternatives are of the same length.
                 alternative->m_hasFixedSize = false;
@@ -640,35 +855,53 @@ public:
             case PatternTerm::TypeParentheticalAssertion:
                 term.inputPosition = currentInputPosition.unsafeGet();
                 term.frameLocation = currentCallFrameSize;
-                currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + YarrStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition.unsafeGet());
+                error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + YarrStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition.unsafeGet(), currentCallFrameSize);
+                if (hasError(error))
+                    return error;
                 break;
 
             case PatternTerm::TypeDotStarEnclosure:
+                ASSERT(!m_pattern.m_saveInitialStartValue);
                 alternative->m_hasFixedSize = false;
                 term.inputPosition = initialInputPosition;
+                m_pattern.m_initialStartValueFrameLocation = currentCallFrameSize;
+                currentCallFrameSize += YarrStackSpaceForDotStarEnclosure;
+                m_pattern.m_saveInitialStartValue = true;
                 break;
             }
+            if (currentInputPosition.hasOverflowed())
+                return ErrorCode::OffsetTooLarge;
         }
 
         alternative->m_minimumSize = (currentInputPosition - initialInputPosition).unsafeGet();
-        return currentCallFrameSize;
+        newCallFrameSize = currentCallFrameSize;
+        return error;
     }
 
-    unsigned setupDisjunctionOffsets(PatternDisjunction* disjunction, unsigned initialCallFrameSize, unsigned initialInputPosition)
+    ErrorCode setupDisjunctionOffsets(PatternDisjunction* disjunction, unsigned initialCallFrameSize, unsigned initialInputPosition, unsigned& callFrameSize)
     {
+        if (UNLIKELY(!isSafeToRecurse()))
+            return ErrorCode::TooManyDisjunctions;
+
         if ((disjunction != m_pattern.m_body) && (disjunction->m_alternatives.size() > 1))
             initialCallFrameSize += YarrStackSpaceForBackTrackInfoAlternative;
 
         unsigned minimumInputSize = UINT_MAX;
         unsigned maximumCallFrameSize = 0;
         bool hasFixedSize = true;
+        ErrorCode error = ErrorCode::NoError;
 
         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
             PatternAlternative* alternative = disjunction->m_alternatives[alt].get();
-            unsigned currentAlternativeCallFrameSize = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition);
+            unsigned currentAlternativeCallFrameSize;
+            error = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition, currentAlternativeCallFrameSize);
+            if (hasError(error))
+                return error;
             minimumInputSize = std::min(minimumInputSize, alternative->m_minimumSize);
             maximumCallFrameSize = std::max(maximumCallFrameSize, currentAlternativeCallFrameSize);
             hasFixedSize &= alternative->m_hasFixedSize;
+            if (alternative->m_minimumSize > INT_MAX)
+                m_pattern.m_containsUnsignedLengthPattern = true;
         }
         
         ASSERT(minimumInputSize != UINT_MAX);
@@ -677,12 +910,15 @@ public:
         disjunction->m_hasFixedSize = hasFixedSize;
         disjunction->m_minimumSize = minimumInputSize;
         disjunction->m_callFrameSize = maximumCallFrameSize;
-        return maximumCallFrameSize;
+        callFrameSize = maximumCallFrameSize;
+        return error;
     }
 
-    void setupOffsets()
+    ErrorCode setupOffsets()
     {
-        setupDisjunctionOffsets(m_pattern.m_body, 0, 0);
+        // FIXME: Yarr should not use the stack to handle subpatterns (rdar://problem/26436314).
+        unsigned ignoredCallFrameSize;
+        return setupDisjunctionOffsets(m_pattern.m_body, 0, 0, ignoredCallFrameSize);
     }
 
     // This optimization identifies sets of parentheses that we will never need to backtrack.
@@ -699,14 +935,15 @@ public:
         if (m_pattern.m_numSubpatterns)
             return;
 
-        Vector<OwnPtr<PatternAlternative> >& alternatives = m_pattern.m_body->m_alternatives;
+        Vector<std::unique_ptr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives;
         for (size_t i = 0; i < alternatives.size(); ++i) {
             Vector<PatternTerm>& terms = alternatives[i]->m_terms;
             if (terms.size()) {
                 PatternTerm& term = terms.last();
                 if (term.type == PatternTerm::TypeParenthesesSubpattern
                     && term.quantityType == QuantifierGreedy
-                    && term.quantityCount == quantifyInfinite
+                    && term.quantityMinCount == 0
+                    && term.quantityMaxCount == quantifyInfinite
                     && !term.capture())
                     term.parentheses.isTerminal = true;
             }
@@ -722,7 +959,7 @@ public:
         // At this point, this is only valid for non-multiline expressions.
         PatternDisjunction* disjunction = m_pattern.m_body;
         
-        if (!m_pattern.m_containsBOL || m_pattern.m_multiline)
+        if (!m_pattern.m_containsBOL || m_pattern.multiline())
             return;
         
         PatternDisjunction* loopDisjunction = copyDisjunction(disjunction, true);
@@ -740,11 +977,12 @@ public:
         }
     }
 
-    bool containsCapturingTerms(PatternAlternative* alternative, size_t firstTermIndex, size_t lastTermIndex)
+    bool containsCapturingTerms(PatternAlternative* alternative, size_t firstTermIndex, size_t endIndex)
     {
         Vector<PatternTerm>& terms = alternative->m_terms;
 
-        for (size_t termIndex = firstTermIndex; termIndex <= lastTermIndex; ++termIndex) {
+        ASSERT(endIndex <= terms.size());
+        for (size_t termIndex = firstTermIndex; termIndex < endIndex; ++termIndex) {
             PatternTerm& term = terms[termIndex];
 
             if (term.m_capture)
@@ -753,7 +991,7 @@ public:
             if (term.type == PatternTerm::TypeParenthesesSubpattern) {
                 PatternDisjunction* nestedDisjunction = term.parentheses.disjunction;
                 for (unsigned alt = 0; alt < nestedDisjunction->m_alternatives.size(); ++alt) {
-                    if (containsCapturingTerms(nestedDisjunction->m_alternatives[alt].get(), 0, nestedDisjunction->m_alternatives[alt]->m_terms.size() - 1))
+                    if (containsCapturingTerms(nestedDisjunction->m_alternatives[alt].get(), 0, nestedDisjunction->m_alternatives[alt]->m_terms.size()))
                         return true;
                 }
             }
@@ -769,16 +1007,17 @@ public:
     // beginning and the end of the match.
     void optimizeDotStarWrappedExpressions()
     {
-        Vector<OwnPtr<PatternAlternative> >& alternatives = m_pattern.m_body->m_alternatives;
+        Vector<std::unique_ptr<PatternAlternative>>& alternatives = m_pattern.m_body->m_alternatives;
         if (alternatives.size() != 1)
             return;
 
+        CharacterClass* dotCharacterClass = m_pattern.dotAll() ? m_pattern.anyCharacterClass() : m_pattern.newlineCharacterClass();
         PatternAlternative* alternative = alternatives[0].get();
         Vector<PatternTerm>& terms = alternative->m_terms;
         if (terms.size() >= 3) {
             bool startsWithBOL = false;
             bool endsWithEOL = false;
-            size_t termIndex, firstExpressionTerm, lastExpressionTerm;
+            size_t termIndex, firstExpressionTerm;
 
             termIndex = 0;
             if (terms[termIndex].type == PatternTerm::TypeAssertionBOL) {
@@ -787,7 +1026,10 @@ public:
             }
             
             PatternTerm& firstNonAnchorTerm = terms[termIndex];
-            if ((firstNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (firstNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || !((firstNonAnchorTerm.quantityType == QuantifierGreedy) || (firstNonAnchorTerm.quantityType == QuantifierNonGreedy)))
+            if (firstNonAnchorTerm.type != PatternTerm::TypeCharacterClass
+                || firstNonAnchorTerm.characterClass != dotCharacterClass
+                || firstNonAnchorTerm.quantityMinCount
+                || firstNonAnchorTerm.quantityMaxCount != quantifyInfinite)
                 return;
             
             firstExpressionTerm = termIndex + 1;
@@ -799,16 +1041,19 @@ public:
             }
             
             PatternTerm& lastNonAnchorTerm = terms[termIndex];
-            if ((lastNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (lastNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || (lastNonAnchorTerm.quantityType != QuantifierGreedy))
+            if (lastNonAnchorTerm.type != PatternTerm::TypeCharacterClass
+                || lastNonAnchorTerm.characterClass != dotCharacterClass
+                || lastNonAnchorTerm.quantityType != QuantifierGreedy
+                || lastNonAnchorTerm.quantityMinCount
+                || lastNonAnchorTerm.quantityMaxCount != quantifyInfinite)
                 return;
-            
-            lastExpressionTerm = termIndex - 1;
 
-            if (firstExpressionTerm > lastExpressionTerm)
+            size_t endIndex = termIndex;
+            if (firstExpressionTerm >= endIndex)
                 return;
 
-            if (!containsCapturingTerms(alternative, firstExpressionTerm, lastExpressionTerm)) {
-                for (termIndex = terms.size() - 1; termIndex > lastExpressionTerm; --termIndex)
+            if (!containsCapturingTerms(alternative, firstExpressionTerm, endIndex)) {
+                for (termIndex = terms.size() - 1; termIndex >= endIndex; --termIndex)
                     terms.remove(termIndex);
 
                 for (termIndex = firstExpressionTerm; termIndex > 0; --termIndex)
@@ -822,62 +1067,445 @@ public:
     }
 
 private:
+    bool isSafeToRecurse() const
+    {
+        if (!m_stackLimit)
+            return true;
+        int8_t* curr = reinterpret_cast<int8_t*>(&curr);
+        int8_t* limit = reinterpret_cast<int8_t*>(m_stackLimit);
+        return curr >= limit;
+    }
+
     YarrPattern& m_pattern;
     PatternAlternative* m_alternative;
     CharacterClassConstructor m_characterClassConstructor;
+    void* m_stackLimit;
     bool m_invertCharacterClass;
-    bool m_invertParentheticalAssertion;
+    bool m_invertParentheticalAssertion { false };
 };
 
-const char* YarrPattern::compile(const String& patternString)
+ErrorCode YarrPattern::compile(const String& patternString, void* stackLimit)
 {
-    YarrPatternConstructor constructor(*this);
+    YarrPatternConstructor constructor(*this, stackLimit);
 
-    if (const char* error = parse(constructor, patternString))
-        return error;
+    if (m_flags == InvalidFlags)
+        return ErrorCode::InvalidRegularExpressionFlags;
+
+    {
+        ErrorCode error = parse(constructor, patternString, unicode());
+        if (hasError(error))
+            return error;
+    }
     
     // If the pattern contains illegal backreferences reset & reparse.
     // Quoting Netscape's "What's new in JavaScript 1.2",
     //      "Note: if the number of left parentheses is less than the number specified
     //       in \#, the \# is taken as an octal escape as described in the next row."
     if (containsIllegalBackReference()) {
+        if (unicode())
+            return ErrorCode::InvalidBackreference;
+
         unsigned numSubpatterns = m_numSubpatterns;
 
         constructor.reset();
-#if !ASSERT_DISABLED
-        const char* error =
-#endif
-            parse(constructor, patternString, numSubpatterns);
-
-        ASSERT(!error);
+        ErrorCode error = parse(constructor, patternString, unicode(), numSubpatterns);
+        ASSERT_UNUSED(error, !hasError(error));
         ASSERT(numSubpatterns == m_numSubpatterns);
     }
 
     constructor.checkForTerminalParentheses();
     constructor.optimizeDotStarWrappedExpressions();
     constructor.optimizeBOL();
-        
-    constructor.setupOffsets();
 
-    return 0;
+    {
+        ErrorCode error = constructor.setupOffsets();
+        if (hasError(error))
+            return error;
+    }
+
+    if (Options::dumpCompiledRegExpPatterns())
+        dumpPattern(patternString);
+
+    return ErrorCode::NoError;
 }
 
-YarrPattern::YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error)
-    : m_ignoreCase(ignoreCase)
-    , m_multiline(multiline)
-    , m_containsBackreferences(false)
+YarrPattern::YarrPattern(const String& pattern, RegExpFlags flags, ErrorCode& error, void* stackLimit)
+    : m_containsBackreferences(false)
     , m_containsBOL(false)
-    , m_numSubpatterns(0)
-    , m_maxBackReference(0)
-    , newlineCached(0)
-    , digitsCached(0)
-    , spacesCached(0)
-    , wordcharCached(0)
-    , nondigitsCached(0)
-    , nonspacesCached(0)
-    , nonwordcharCached(0)
+    , m_containsUnsignedLengthPattern(false)
+    , m_hasCopiedParenSubexpressions(false)
+    , m_saveInitialStartValue(false)
+    , m_flags(flags)
+{
+    error = compile(pattern, stackLimit);
+}
+
+void indentForNestingLevel(PrintStream& out, unsigned nestingDepth)
+{
+    out.print("    ");
+    for (; nestingDepth; --nestingDepth)
+        out.print("  ");
+}
+
+void dumpUChar32(PrintStream& out, UChar32 c)
+{
+    if (c >= ' '&& c <= 0xff)
+        out.printf("'%c'", static_cast<char>(c));
+    else
+        out.printf("0x%04x", c);
+}
+
+void dumpCharacterClass(PrintStream& out, YarrPattern* pattern, CharacterClass* characterClass)
+{
+    if (characterClass == pattern->anyCharacterClass())
+        out.print("<any character>");
+    else if (characterClass == pattern->newlineCharacterClass())
+        out.print("<newline>");
+    else if (characterClass == pattern->digitsCharacterClass())
+        out.print("<digits>");
+    else if (characterClass == pattern->spacesCharacterClass())
+        out.print("<whitespace>");
+    else if (characterClass == pattern->wordcharCharacterClass())
+        out.print("<word>");
+    else if (characterClass == pattern->wordUnicodeIgnoreCaseCharCharacterClass())
+        out.print("<unicode ignore case>");
+    else if (characterClass == pattern->nondigitsCharacterClass())
+        out.print("<non-digits>");
+    else if (characterClass == pattern->nonspacesCharacterClass())
+        out.print("<non-whitespace>");
+    else if (characterClass == pattern->nonwordcharCharacterClass())
+        out.print("<non-word>");
+    else if (characterClass == pattern->nonwordUnicodeIgnoreCaseCharCharacterClass())
+        out.print("<unicode non-ignore case>");
+    else {
+        bool needMatchesRangesSeperator = false;
+
+        auto dumpMatches = [&] (const char* prefix, Vector<UChar32> matches) {
+            size_t matchesSize = matches.size();
+            if (matchesSize) {
+                if (needMatchesRangesSeperator)
+                    out.print(",");
+                needMatchesRangesSeperator = true;
+
+                out.print(prefix, ":(");
+                for (size_t i = 0; i < matchesSize; ++i) {
+                    if (i)
+                        out.print(",");
+                    dumpUChar32(out, matches[i]);
+                }
+                out.print(")");
+            }
+        };
+
+        auto dumpRanges = [&] (const char* prefix, Vector<CharacterRange> ranges) {
+            size_t rangeSize = ranges.size();
+            if (rangeSize) {
+                if (needMatchesRangesSeperator)
+                    out.print(",");
+                needMatchesRangesSeperator = true;
+
+                out.print(prefix, " ranges:(");
+                for (size_t i = 0; i < rangeSize; ++i) {
+                    if (i)
+                        out.print(",");
+                    CharacterRange range = ranges[i];
+                    out.print("(");
+                    dumpUChar32(out, range.begin);
+                    out.print("..");
+                    dumpUChar32(out, range.end);
+                    out.print(")");
+                }
+                out.print(")");
+            }
+        };
+
+        out.print("[");
+        dumpMatches("ASCII", characterClass->m_matches);
+        dumpRanges("ASCII", characterClass->m_ranges);
+        dumpMatches("Unicode", characterClass->m_matchesUnicode);
+        dumpRanges("Unicode", characterClass->m_rangesUnicode);
+        out.print("]");
+    }
+}
+
+void PatternAlternative::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nestingDepth)
+{
+    out.print("minimum size: ", m_minimumSize);
+    if (m_hasFixedSize)
+        out.print(",fixed size");
+    if (m_onceThrough)
+        out.print(",once through");
+    if (m_startsWithBOL)
+        out.print(",starts with ^");
+    if (m_containsBOL)
+        out.print(",contains ^");
+    out.print("\n");
+
+    for (size_t i = 0; i < m_terms.size(); ++i)
+        m_terms[i].dump(out, thisPattern, nestingDepth);
+}
+
+void PatternTerm::dumpQuantifier(PrintStream& out)
+{
+    if (quantityType == QuantifierFixedCount && quantityMinCount == 1 && quantityMaxCount == 1)
+        return;
+    out.print(" {", quantityMinCount.unsafeGet());
+    if (quantityMinCount != quantityMaxCount) {
+        if (quantityMaxCount == UINT_MAX)
+            out.print(",...");
+        else
+            out.print(",", quantityMaxCount.unsafeGet());
+    }
+    out.print("}");
+    if (quantityType == QuantifierGreedy)
+        out.print(" greedy");
+    else if (quantityType == QuantifierNonGreedy)
+        out.print(" non-greedy");
+}
+
+void PatternTerm::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nestingDepth)
+{
+    indentForNestingLevel(out, nestingDepth);
+
+    if (type != TypeParenthesesSubpattern && type != TypeParentheticalAssertion) {
+        if (invert())
+            out.print("not ");
+    }
+
+    switch (type) {
+    case TypeAssertionBOL:
+        out.println("BOL");
+        break;
+    case TypeAssertionEOL:
+        out.println("EOL");
+        break;
+    case TypeAssertionWordBoundary:
+        out.println("word boundary");
+        break;
+    case TypePatternCharacter:
+        out.printf("character ");
+        out.printf("inputPosition %u ", inputPosition);
+        if (thisPattern->ignoreCase() && isASCIIAlpha(patternCharacter)) {
+            dumpUChar32(out, toASCIIUpper(patternCharacter));
+            out.print("/");
+            dumpUChar32(out, toASCIILower(patternCharacter));
+        } else
+            dumpUChar32(out, patternCharacter);
+        dumpQuantifier(out);
+        if (quantityType != QuantifierFixedCount)
+            out.print(",frame location ", frameLocation);
+        out.println();
+        break;
+    case TypeCharacterClass:
+        out.print("character class ");
+        if (characterClass->m_anyCharacter)
+            out.print("<any character>");
+        else if (characterClass == thisPattern->newlineCharacterClass())
+            out.print("<newline>");
+        else if (characterClass == thisPattern->digitsCharacterClass())
+            out.print("<digits>");
+        else if (characterClass == thisPattern->spacesCharacterClass())
+            out.print("<whitespace>");
+        else if (characterClass == thisPattern->wordcharCharacterClass())
+            out.print("<word>");
+        else if (characterClass == thisPattern->wordUnicodeIgnoreCaseCharCharacterClass())
+            out.print("<unicode ignore case>");
+        else if (characterClass == thisPattern->nondigitsCharacterClass())
+            out.print("<non-digits>");
+        else if (characterClass == thisPattern->nonspacesCharacterClass())
+            out.print("<non-whitespace>");
+        else if (characterClass == thisPattern->nonwordcharCharacterClass())
+            out.print("<non-word>");
+        else if (characterClass == thisPattern->nonwordUnicodeIgnoreCaseCharCharacterClass())
+            out.print("<unicode non-ignore case>");
+        else {
+            bool needMatchesRangesSeperator = false;
+
+            auto dumpMatches = [&] (const char* prefix, Vector<UChar32> matches) {
+                size_t matchesSize = matches.size();
+                if (matchesSize) {
+                    if (needMatchesRangesSeperator)
+                        out.print(",");
+                    needMatchesRangesSeperator = true;
+
+                    out.print(prefix, ":(");
+                    for (size_t i = 0; i < matchesSize; ++i) {
+                        if (i)
+                            out.print(",");
+                        dumpUChar32(out, matches[i]);
+                    }
+                    out.print(")");
+                }
+            };
+
+            auto dumpRanges = [&] (const char* prefix, Vector<CharacterRange> ranges) {
+                size_t rangeSize = ranges.size();
+                if (rangeSize) {
+                    if (needMatchesRangesSeperator)
+                        out.print(",");
+                    needMatchesRangesSeperator = true;
+
+                    out.print(prefix, " ranges:(");
+                    for (size_t i = 0; i < rangeSize; ++i) {
+                        if (i)
+                            out.print(",");
+                        CharacterRange range = ranges[i];
+                        out.print("(");
+                        dumpUChar32(out, range.begin);
+                        out.print("..");
+                        dumpUChar32(out, range.end);
+                        out.print(")");
+                    }
+                    out.print(")");
+                }
+            };
+
+            out.print("[");
+            dumpMatches("ASCII", characterClass->m_matches);
+            dumpRanges("ASCII", characterClass->m_ranges);
+            dumpMatches("Unicode", characterClass->m_matchesUnicode);
+            dumpRanges("Unicode", characterClass->m_rangesUnicode);
+            out.print("]");
+        }
+        dumpQuantifier(out);
+        if (quantityType != QuantifierFixedCount || thisPattern->unicode())
+            out.print(",frame location ", frameLocation);
+        out.println();
+        break;
+    case TypeBackReference:
+        out.print("back reference to subpattern #", backReferenceSubpatternId);
+        out.println(",frame location ", frameLocation);
+        break;
+    case TypeForwardReference:
+        out.println("forward reference");
+        break;
+    case TypeParenthesesSubpattern:
+        if (m_capture)
+            out.print("captured ");
+        else
+            out.print("non-captured ");
+
+        FALLTHROUGH;
+    case TypeParentheticalAssertion:
+        if (m_invert)
+            out.print("inverted ");
+
+        if (type == TypeParenthesesSubpattern)
+            out.print("subpattern");
+        else if (type == TypeParentheticalAssertion)
+            out.print("assertion");
+
+        if (m_capture)
+            out.print(" #", parentheses.subpatternId);
+
+        dumpQuantifier(out);
+
+        if (parentheses.isCopy)
+            out.print(",copy");
+
+        if (parentheses.isTerminal)
+            out.print(",terminal");
+
+        out.println(",frame location ", frameLocation);
+
+        if (parentheses.disjunction->m_alternatives.size() > 1) {
+            indentForNestingLevel(out, nestingDepth + 1);
+            unsigned alternativeFrameLocation = frameLocation;
+            if (quantityMaxCount == 1 && !parentheses.isCopy)
+                alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+            else if (parentheses.isTerminal)
+                alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesTerminal;
+            else
+                alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
+            out.println("alternative list,frame location ", alternativeFrameLocation);
+        }
+
+        parentheses.disjunction->dump(out, thisPattern, nestingDepth + 1);
+        break;
+    case TypeDotStarEnclosure:
+        out.println(".* enclosure,frame location ", thisPattern->m_initialStartValueFrameLocation);
+        break;
+    }
+}
+
+void PatternDisjunction::dump(PrintStream& out, YarrPattern* thisPattern, unsigned nestingDepth = 0)
+{
+    unsigned alternativeCount = m_alternatives.size();
+    for (unsigned i = 0; i < alternativeCount; ++i) {
+        indentForNestingLevel(out, nestingDepth);
+        if (alternativeCount > 1)
+            out.print("alternative #", i, ": ");
+        m_alternatives[i].get()->dump(out, thisPattern, nestingDepth + (alternativeCount > 1));
+    }
+}
+
+void YarrPattern::dumpPattern(const String& patternString)
+{
+    dumpPattern(WTF::dataFile(), patternString);
+}
+
+void YarrPattern::dumpPattern(PrintStream& out, const String& patternString)
+{
+    out.print("RegExp pattern for /");
+    out.print(patternString);
+    out.print("/");
+    if (global())
+        out.print("g");
+    if (ignoreCase())
+        out.print("i");
+    if (multiline())
+        out.print("m");
+    if (unicode())
+        out.print("u");
+    if (sticky())
+        out.print("y");
+    if (m_flags != NoFlags) {
+        bool printSeperator = false;
+        out.print(" (");
+        if (global()) {
+            out.print("global");
+            printSeperator = true;
+        }
+        if (ignoreCase()) {
+            if (printSeperator)
+                out.print("|");
+            out.print("ignore case");
+            printSeperator = true;
+        }
+        if (multiline()) {
+            if (printSeperator)
+                out.print("|");
+            out.print("multiline");
+            printSeperator = true;
+        }
+        if (unicode()) {
+            if (printSeperator)
+                out.print("|");
+            out.print("unicode");
+            printSeperator = true;
+        }
+        if (sticky()) {
+            if (printSeperator)
+                out.print("|");
+            out.print("sticky");
+            printSeperator = true;
+        }
+        out.print(")");
+    }
+    out.print(":\n");
+    if (m_body->m_callFrameSize)
+        out.print("    callframe size: ", m_body->m_callFrameSize, "\n");
+    m_body->dump(out, this);
+}
+
+std::unique_ptr<CharacterClass> anycharCreate()
 {
-    *error = compile(pattern);
+    auto characterClass = std::make_unique<CharacterClass>();
+    characterClass->m_ranges.append(CharacterRange(0x00, 0x7f));
+    characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x10ffff));
+    characterClass->m_hasNonBMPCharacters = true;
+    characterClass->m_anyCharacter = true;
+    return characterClass;
 }
 
-} }
+} } // namespace JSC::Yarr