From 886d463061ba34802bf844133396e3706d6912a4 Mon Sep 17 00:00:00 2001 From: Lars Knoll Date: Tue, 7 Aug 2018 22:27:23 +0200 Subject: Enable unicode regular expressions Add support for the 'u' flag for regular expressions. Change-Id: I409054eaa9c50183619752d14f2638f5a38c0ea7 Reviewed-by: Simon Hausmann --- src/qml/compiler/qv4compileddata.cpp | 5 ++++- src/qml/compiler/qv4compileddata_p.h | 3 ++- src/qml/compiler/qv4compiler.cpp | 2 ++ src/qml/jsruntime/qv4engine.cpp | 5 ++++- src/qml/jsruntime/qv4regexp.cpp | 9 ++++++--- src/qml/jsruntime/qv4regexp_p.h | 5 +++-- src/qml/jsruntime/qv4regexpobject.cpp | 3 +++ src/qml/jsruntime/qv4regexpobject_p.h | 4 +++- src/qml/parser/qqmljslexer.cpp | 1 + src/qml/parser/qqmljslexer_p.h | 3 ++- tests/auto/qml/ecmascripttests/TestExpectations | 14 -------------- 11 files changed, 30 insertions(+), 24 deletions(-) diff --git a/src/qml/compiler/qv4compileddata.cpp b/src/qml/compiler/qv4compileddata.cpp index 8e46ebf230..1c311267b9 100644 --- a/src/qml/compiler/qv4compileddata.cpp +++ b/src/qml/compiler/qv4compileddata.cpp @@ -152,13 +152,16 @@ QV4::Function *CompilationUnit::linkToEngine(ExecutionEngine *engine) bool global = false; bool multiline = false; bool ignoreCase = false; + bool unicode = false; if (re->flags & CompiledData::RegExp::RegExp_Global) global = true; if (re->flags & CompiledData::RegExp::RegExp_IgnoreCase) ignoreCase = true; if (re->flags & CompiledData::RegExp::RegExp_Multiline) multiline = true; - runtimeRegularExpressions[i] = QV4::RegExp::create(engine, stringAt(re->stringIndex), ignoreCase, multiline, global); + if (re->flags & CompiledData::RegExp::RegExp_Unicode) + unicode = true; + runtimeRegularExpressions[i] = QV4::RegExp::create(engine, stringAt(re->stringIndex), ignoreCase, multiline, global, unicode); } if (data->lookupTableSize) { diff --git a/src/qml/compiler/qv4compileddata_p.h b/src/qml/compiler/qv4compileddata_p.h index e65c04ad69..f6face5f6e 100644 --- a/src/qml/compiler/qv4compileddata_p.h +++ b/src/qml/compiler/qv4compileddata_p.h @@ -139,7 +139,8 @@ struct RegExp enum Flags : unsigned int { RegExp_Global = 0x01, RegExp_IgnoreCase = 0x02, - RegExp_Multiline = 0x04 + RegExp_Multiline = 0x04, + RegExp_Unicode = 0x08 }; union { quint32 _dummy; diff --git a/src/qml/compiler/qv4compiler.cpp b/src/qml/compiler/qv4compiler.cpp index 4e902eca65..d7c7563315 100644 --- a/src/qml/compiler/qv4compiler.cpp +++ b/src/qml/compiler/qv4compiler.cpp @@ -178,6 +178,8 @@ int QV4::Compiler::JSUnitGenerator::registerRegExp(QQmlJS::AST::RegExpLiteral *r re.flags |= CompiledData::RegExp::RegExp_IgnoreCase; if (regexp->flags & QQmlJS::Lexer::RegExp_Multiline) re.flags |= CompiledData::RegExp::RegExp_Multiline; + if (regexp->flags & QQmlJS::Lexer::RegExp_Unicode) + re.flags |= CompiledData::RegExp::RegExp_Unicode; regexps.append(re); return regexps.size() - 1; diff --git a/src/qml/jsruntime/qv4engine.cpp b/src/qml/jsruntime/qv4engine.cpp index 69b23484a8..6d5a43dd1f 100644 --- a/src/qml/jsruntime/qv4engine.cpp +++ b/src/qml/jsruntime/qv4engine.cpp @@ -377,6 +377,8 @@ ExecutionEngine::ExecutionEngine(QJSEngine *jsEngine) Q_ASSERT(index == RegExpObject::Index_IgnoreCase); ic = ic->addMember((str = newIdentifier(QStringLiteral("multiline")))->propertyKey(), Attr_ReadOnly, &index); Q_ASSERT(index == RegExpObject::Index_Multiline); + ic = ic->addMember((str = newIdentifier(QStringLiteral("unicode")))->propertyKey(), Attr_ReadOnly, &index); + Q_ASSERT(index == RegExpObject::Index_Unicode); jsObjects[RegExpProto] = memoryManager->allocObject(ic->d()); classes[Class_RegExpObject] = ic->changePrototype(regExpPrototype()->d()); @@ -787,9 +789,10 @@ Heap::RegExpObject *ExecutionEngine::newRegExpObject(const QString &pattern, int bool global = (flags & QV4::CompiledData::RegExp::RegExp_Global); bool ignoreCase = (flags & QV4::CompiledData::RegExp::RegExp_IgnoreCase); bool multiline = (flags & QV4::CompiledData::RegExp::RegExp_Multiline); + bool unicode = (flags & QV4::CompiledData::RegExp::RegExp_Unicode); Scope scope(this); - Scoped re(scope, RegExp::create(this, pattern, ignoreCase, multiline, global)); + Scoped re(scope, RegExp::create(this, pattern, ignoreCase, multiline, global, unicode)); return newRegExpObject(re); } diff --git a/src/qml/jsruntime/qv4regexp.cpp b/src/qml/jsruntime/qv4regexp.cpp index 89fd9fc233..e562482395 100644 --- a/src/qml/jsruntime/qv4regexp.cpp +++ b/src/qml/jsruntime/qv4regexp.cpp @@ -70,7 +70,7 @@ uint RegExp::match(const QString &string, int start, uint *matchOffsets) return JSC::Yarr::interpret(byteCode(), s.characters16(), string.length(), start, matchOffsets); } -Heap::RegExp *RegExp::create(ExecutionEngine* engine, const QString& pattern, bool ignoreCase, bool multiline, bool global) +Heap::RegExp *RegExp::create(ExecutionEngine* engine, const QString& pattern, bool ignoreCase, bool multiline, bool global, bool unicode) { RegExpCacheKey key(pattern, ignoreCase, multiline, global); @@ -83,7 +83,7 @@ Heap::RegExp *RegExp::create(ExecutionEngine* engine, const QString& pattern, bo return result->d(); Scope scope(engine); - Scoped result(scope, engine->memoryManager->alloc(engine, pattern, ignoreCase, multiline, global)); + Scoped result(scope, engine->memoryManager->alloc(engine, pattern, ignoreCase, multiline, global, unicode)); result->d()->cache = cache; cachedValue.set(engine, result); @@ -91,12 +91,13 @@ Heap::RegExp *RegExp::create(ExecutionEngine* engine, const QString& pattern, bo return result->d(); } -void Heap::RegExp::init(ExecutionEngine *engine, const QString &pattern, bool ignoreCase, bool multiline, bool global) +void Heap::RegExp::init(ExecutionEngine *engine, const QString &pattern, bool ignoreCase, bool multiline, bool global, bool unicode) { Base::init(); this->pattern = new QString(pattern); this->ignoreCase = ignoreCase; this->multiLine = multiline; + this->unicode = unicode; this->global = global; valid = false; @@ -109,6 +110,8 @@ void Heap::RegExp::init(ExecutionEngine *engine, const QString &pattern, bool ig flags = static_cast(flags | JSC::FlagMultiline); if (global) flags = static_cast(flags | JSC::FlagGlobal); + if (unicode) + flags = static_cast(flags | JSC::FlagUnicode); JSC::Yarr::YarrPattern yarrPattern(WTF::String(pattern), flags, error); if (error != JSC::Yarr::ErrorCode::NoError) diff --git a/src/qml/jsruntime/qv4regexp_p.h b/src/qml/jsruntime/qv4regexp_p.h index 597e42538a..9090aaa7d5 100644 --- a/src/qml/jsruntime/qv4regexp_p.h +++ b/src/qml/jsruntime/qv4regexp_p.h @@ -76,7 +76,7 @@ struct RegExpCacheKey; namespace Heap { struct RegExp : Base { - void init(ExecutionEngine *engine, const QString& pattern, bool ignoreCase, bool multiline, bool global); + void init(ExecutionEngine *engine, const QString& pattern, bool ignoreCase, bool multiline, bool global, bool unicode); void destroy(); QString *pattern; @@ -96,6 +96,7 @@ struct RegExp : Base { bool ignoreCase; bool multiLine; bool global; + bool unicode; bool valid; int captureCount() const { return subPatternCount + 1; } @@ -122,7 +123,7 @@ struct RegExp : public Managed bool multiLine() const { return d()->multiLine; } bool global() const { return d()->global; } - static Heap::RegExp *create(ExecutionEngine* engine, const QString& pattern, bool ignoreCase = false, bool multiline = false, bool global = false); + static Heap::RegExp *create(ExecutionEngine* engine, const QString& pattern, bool ignoreCase = false, bool multiline = false, bool global = false, bool unicode = false); bool isValid() const { return d()->valid; } diff --git a/src/qml/jsruntime/qv4regexpobject.cpp b/src/qml/jsruntime/qv4regexpobject.cpp index 8429b96baa..f8caf404e9 100644 --- a/src/qml/jsruntime/qv4regexpobject.cpp +++ b/src/qml/jsruntime/qv4regexpobject.cpp @@ -151,6 +151,7 @@ void RegExpObject::initProperties() setProperty(Index_Global, Primitive::fromBoolean(global())); setProperty(Index_IgnoreCase, Primitive::fromBoolean(value()->ignoreCase)); setProperty(Index_Multiline, Primitive::fromBoolean(value()->multiLine)); + setProperty(Index_Unicode, Primitive::fromBoolean(value()->unicode)); } // Converts a JS RegExp to a QRegExp. @@ -191,6 +192,8 @@ uint RegExpObject::flags() const f |= QV4::RegExpObject::RegExp_IgnoreCase; if (value()->multiLine) f |= QV4::RegExpObject::RegExp_Multiline; + if (value()->unicode) + f |= QV4::RegExpObject::RegExp_Unicode; return f; } diff --git a/src/qml/jsruntime/qv4regexpobject_p.h b/src/qml/jsruntime/qv4regexpobject_p.h index 0d4fe760eb..e52220c257 100644 --- a/src/qml/jsruntime/qv4regexpobject_p.h +++ b/src/qml/jsruntime/qv4regexpobject_p.h @@ -108,7 +108,8 @@ struct RegExpObject: Object { enum Flags { RegExp_Global = 0x01, RegExp_IgnoreCase = 0x02, - RegExp_Multiline = 0x04 + RegExp_Multiline = 0x04, + RegExp_Unicode = 0x08 }; enum { @@ -117,6 +118,7 @@ struct RegExpObject: Object { Index_Global = 2, Index_IgnoreCase = 3, Index_Multiline = 4, + Index_Unicode = 5, Index_ArrayIndex = Heap::ArrayObject::LengthPropertyIndex + 1, Index_ArrayInput = Index_ArrayIndex + 1 }; diff --git a/src/qml/parser/qqmljslexer.cpp b/src/qml/parser/qqmljslexer.cpp index 86c8ac714c..e93dda942a 100644 --- a/src/qml/parser/qqmljslexer.cpp +++ b/src/qml/parser/qqmljslexer.cpp @@ -58,6 +58,7 @@ static inline int regExpFlagFromChar(const QChar &ch) case 'g': return Lexer::RegExp_Global; case 'i': return Lexer::RegExp_IgnoreCase; case 'm': return Lexer::RegExp_Multiline; + case 'u': return Lexer::RegExp_Unicode; } return 0; } diff --git a/src/qml/parser/qqmljslexer_p.h b/src/qml/parser/qqmljslexer_p.h index a6ac8cb354..64db62625a 100644 --- a/src/qml/parser/qqmljslexer_p.h +++ b/src/qml/parser/qqmljslexer_p.h @@ -113,7 +113,8 @@ public: enum RegExpFlag { RegExp_Global = 0x01, RegExp_IgnoreCase = 0x02, - RegExp_Multiline = 0x04 + RegExp_Multiline = 0x04, + RegExp_Unicode = 0x08 }; enum ParseModeFlags { diff --git a/tests/auto/qml/ecmascripttests/TestExpectations b/tests/auto/qml/ecmascripttests/TestExpectations index f7b9065c15..b3fdcbb249 100644 --- a/tests/auto/qml/ecmascripttests/TestExpectations +++ b/tests/auto/qml/ecmascripttests/TestExpectations @@ -707,7 +707,6 @@ built-ins/RegExp/S15.10.4.1_A2_T2.js fails built-ins/RegExp/call_with_non_regexp_same_constructor.js fails built-ins/RegExp/call_with_regexp_match_falsy.js fails built-ins/RegExp/call_with_regexp_not_same_constructor.js fails -built-ins/RegExp/dotall/without-dotall-unicode.js fails built-ins/RegExp/from-regexp-like-flag-override.js fails built-ins/RegExp/from-regexp-like-get-source-err.js fails built-ins/RegExp/from-regexp-like-short-circuit.js fails @@ -874,9 +873,7 @@ built-ins/RegExp/prototype/Symbol.split/u-lastindex-adv-thru-match.js fails built-ins/RegExp/prototype/exec/S15.10.6.2_A5_T3.js fails built-ins/RegExp/prototype/exec/failure-lastindex-access.js fails built-ins/RegExp/prototype/exec/success-lastindex-access.js fails -built-ins/RegExp/prototype/exec/u-captured-value.js fails built-ins/RegExp/prototype/exec/u-lastindex-adv.js fails -built-ins/RegExp/prototype/exec/u-lastindex-value.js fails built-ins/RegExp/prototype/exec/y-fail-lastindex-no-write.js fails built-ins/RegExp/prototype/exec/y-fail-lastindex.js fails built-ins/RegExp/prototype/exec/y-fail-return.js fails @@ -913,7 +910,6 @@ built-ins/RegExp/prototype/source/name.js fails built-ins/RegExp/prototype/source/prop-desc.js fails built-ins/RegExp/prototype/source/this-val-regexp-prototype.js fails built-ins/RegExp/prototype/source/value-line-terminator.js fails -built-ins/RegExp/prototype/source/value-u.js fails built-ins/RegExp/prototype/sticky/length.js fails built-ins/RegExp/prototype/sticky/name.js fails built-ins/RegExp/prototype/sticky/prop-desc.js fails @@ -930,12 +926,9 @@ built-ins/RegExp/prototype/test/y-set-lastindex.js fails built-ins/RegExp/prototype/unicode/length.js fails built-ins/RegExp/prototype/unicode/name.js fails built-ins/RegExp/prototype/unicode/prop-desc.js fails -built-ins/RegExp/prototype/unicode/this-val-invalid-obj.js fails -built-ins/RegExp/prototype/unicode/this-val-non-obj.js fails built-ins/RegExp/prototype/unicode/this-val-regexp-prototype.js fails built-ins/RegExp/prototype/unicode/this-val-regexp.js fails built-ins/RegExp/u180e.js fails -built-ins/RegExp/unicode_identity_escape.js fails built-ins/RegExp/valid-flags-y.js fails built-ins/Set/proto-from-ctor-realm.js fails built-ins/Set/prototype/add/does-not-have-setdata-internal-slot-weakset.js fails @@ -2162,14 +2155,7 @@ language/global-code/script-decl-var.js fails language/identifiers/other_id_continue.js fails language/identifiers/other_id_start-escaped.js fails language/identifiers/other_id_start.js fails -language/literals/regexp/u-astral.js fails language/literals/regexp/u-case-mapping.js fails -language/literals/regexp/u-surrogate-pairs-atom-char-class.js fails -language/literals/regexp/u-surrogate-pairs-atom-dot.js fails -language/literals/regexp/u-surrogate-pairs-atom-escape-char-class.js fails -language/literals/regexp/u-surrogate-pairs-atom-escape-decimal.js fails -language/literals/regexp/u-surrogate-pairs.js fails -language/literals/regexp/u-unicode-esc.js fails language/literals/regexp/y-assertion-start.js fails language/module-code/eval-export-dflt-cls-anon.js strictFails language/module-code/eval-export-dflt-cls-name-meth.js strictFails -- cgit v1.2.3