From b7887f9b4faad2227691a2af589e9d7680d6ae08 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Wed, 19 Sep 2018 00:05:54 -0500 Subject: Linux: Remove our use of syscall() for statx(2) and renameat2(2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those system calls are present in glibc 2.28. Instead of using syscall(3) to place the system calls directly, let's use only the glibc functions. That also means we no longer accept ENOSYS from either function, if they were detected in glibc. Change-Id: I44e7d800c68141bdaae0fffd1555b4b8fe63786b Reviewed-by: Oswald Buddenhagen Reviewed-by: Lars Knoll Reviewed-by: Jüri Valdmann --- src/corelib/global/minimum-linux_p.h | 7 ++++- src/corelib/io/qfilesystemengine_unix.cpp | 45 +++---------------------------- 2 files changed, 9 insertions(+), 43 deletions(-) diff --git a/src/corelib/global/minimum-linux_p.h b/src/corelib/global/minimum-linux_p.h index bad2488b4d..9c074e13ba 100644 --- a/src/corelib/global/minimum-linux_p.h +++ b/src/corelib/global/minimum-linux_p.h @@ -75,9 +75,14 @@ QT_BEGIN_NAMESPACE * - accept4 2.6.28 * - renameat2 3.16 QT_CONFIG(renameat2) * - getrandom 3.17 QT_CONFIG(getentropy) + * - statx 4.11 QT_CONFIG(statx) */ -#if QT_CONFIG(getentropy) +#if QT_CONFIG(statx) +# define MINLINUX_MAJOR 4 +# define MINLINUX_MINOR 11 +# define MINLINUX_PATCH 0 +#elif QT_CONFIG(getentropy) # define MINLINUX_MAJOR 3 # define MINLINUX_MINOR 17 # define MINLINUX_PATCH 0 diff --git a/src/corelib/io/qfilesystemengine_unix.cpp b/src/corelib/io/qfilesystemengine_unix.cpp index deb4a9f220..40e8f82a80 100644 --- a/src/corelib/io/qfilesystemengine_unix.cpp +++ b/src/corelib/io/qfilesystemengine_unix.cpp @@ -1,6 +1,6 @@ /**************************************************************************** ** -** Copyright (C) 2017 Intel Corporation. +** Copyright (C) 2018 Intel Corporation. ** Copyright (C) 2016 The Qt Company Ltd. ** Copyright (C) 2013 Samuel Gaist ** Contact: https://www.qt.io/licensing/ @@ -88,7 +88,6 @@ extern "C" NSString *NSTemporaryDirectory(); #if defined(Q_OS_LINUX) # include -# include # include # include @@ -96,28 +95,6 @@ extern "C" NSString *NSTemporaryDirectory(); #ifndef FICLONE # define FICLONE _IOW(0x94, 9, int) #endif - -# if defined(Q_OS_ANDROID) -// renameat2() and statx() are disabled on Android because quite a few systems -// come with sandboxes that kill applications that make system calls outside a -// whitelist and several Android vendors can't be bothered to update the list. -# undef SYS_renameat2 -# undef SYS_statx -# undef STATX_BASIC_STATS -# else -# if !QT_CONFIG(renameat2) && defined(SYS_renameat2) -static int renameat2(int oldfd, const char *oldpath, int newfd, const char *newpath, unsigned flags) -{ return syscall(SYS_renameat2, oldfd, oldpath, newfd, newpath, flags); } -# endif - -# if !QT_CONFIG(statx) && defined(SYS_statx) -# include -static int statx(int dirfd, const char *pathname, int flag, unsigned mask, struct statx *statxbuf) -{ return syscall(SYS_statx, dirfd, pathname, flag, mask, statxbuf); } -# elif !QT_CONFIG(statx) && !defined(SYS_statx) -# undef STATX_BASIC_STATS -# endif -# endif // !Q_OS_ANDROID #endif #ifndef STATX_ALL @@ -331,22 +308,8 @@ mtime(const T &statBuffer, int) #ifdef STATX_BASIC_STATS static int qt_real_statx(int fd, const char *pathname, int flags, struct statx *statxBuffer) { -#ifdef Q_ATOMIC_INT8_IS_SUPPORTED - static QBasicAtomicInteger statxTested = Q_BASIC_ATOMIC_INITIALIZER(0); -#else - static QBasicAtomicInt statxTested = Q_BASIC_ATOMIC_INITIALIZER(0); -#endif - - if (statxTested.load() == -1) - return -ENOSYS; - unsigned mask = STATX_BASIC_STATS | STATX_BTIME; int ret = statx(fd, pathname, flags, mask, statxBuffer); - if (ret == -1 && errno == ENOSYS) { - statxTested.store(-1); - return -ENOSYS; - } - statxTested.store(1); return ret == -1 ? -errno : 0; } @@ -1282,14 +1245,12 @@ bool QFileSystemEngine::renameFile(const QFileSystemEntry &source, const QFileSy if (Q_UNLIKELY(srcPath.isEmpty() || tgtPath.isEmpty())) return emptyFileEntryWarning(), false; -#if defined(RENAME_NOREPLACE) && (QT_CONFIG(renameat2) || defined(SYS_renameat2)) +#if defined(RENAME_NOREPLACE) && QT_CONFIG(renameat2) if (renameat2(AT_FDCWD, srcPath, AT_FDCWD, tgtPath, RENAME_NOREPLACE) == 0) return true; - // If we're using syscall(), check for ENOSYS; - // if renameat2 came from libc, we don't accept ENOSYS. // We can also get EINVAL for some non-local filesystems. - if ((QT_CONFIG(renameat2) || errno != ENOSYS) && errno != EINVAL) { + if (errno != EINVAL) { error = QSystemError(errno, QSystemError::StandardLibraryError); return false; } -- cgit v1.2.3 From fc4b0769a5d65960eea959730d5cd20d3496d40b Mon Sep 17 00:00:00 2001 From: Eirik Aavitsland Date: Fri, 12 Oct 2018 11:45:13 +0200 Subject: Fix pdf printing in static builds The pdf engine uses a resource file, but Q_INIT_RESOURCE() was lacking. Task-number: QTBUG-71070 Change-Id: I685961b3f2eea0ffe6b5313c72d504a8ad9a98e5 Reviewed-by: Lars Knoll --- src/gui/painting/qpdf.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/gui/painting/qpdf.cpp b/src/gui/painting/qpdf.cpp index e58f9cee4c..8a0020bd2d 100644 --- a/src/gui/painting/qpdf.cpp +++ b/src/gui/painting/qpdf.cpp @@ -71,6 +71,11 @@ static const bool do_compress = true; // Can't use it though, as gs generates completely wrong images if this is true. static const bool interpolateImages = false; +static void initResources() +{ + Q_INIT_RESOURCE(qpdf); +} + QT_BEGIN_NAMESPACE inline QPaintEngine::PaintEngineFeatures qt_pdf_decide_features() @@ -1445,6 +1450,7 @@ QPdfEnginePrivate::QPdfEnginePrivate() grayscale(false), m_pageLayout(QPageSize(QPageSize::A4), QPageLayout::Portrait, QMarginsF(10, 10, 10, 10)) { + initResources(); resolution = 1200; currentObject = 1; currentPage = 0; -- cgit v1.2.3 From 1cd2955173e2248b92f44c9d52d81447ff87906c Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Mon, 15 Oct 2018 14:57:59 +0200 Subject: Fix enum passed to QFontDatabase::findFont The script taken here is a QChar::Script, not a QFontDatabase::WritingSystem. This means it was passing QChar::Unknown. Change-Id: I919ae7187ba277346a7719116a94776dce24dd84 Reviewed-by: Eskil Abrahamsen Blomfeldt --- src/gui/text/qfontdatabase.h | 4 ++-- src/gui/text/qfontengine.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gui/text/qfontdatabase.h b/src/gui/text/qfontdatabase.h index e6aef493bd..80b092f177 100644 --- a/src/gui/text/qfontdatabase.h +++ b/src/gui/text/qfontdatabase.h @@ -160,8 +160,8 @@ private: static void createDatabase(); static void parseFontName(const QString &name, QString &foundry, QString &family); static QString resolveFontFamilyAlias(const QString &family); - static QFontEngine *findFont(const QFontDef &request, int script); - static void load(const QFontPrivate *d, int script); + static QFontEngine *findFont(const QFontDef &request, int script /* QChar::Script */); + static void load(const QFontPrivate *d, int script /* QChar::Script */); friend struct QFontDef; friend class QFontPrivate; diff --git a/src/gui/text/qfontengine.cpp b/src/gui/text/qfontengine.cpp index 3b64ee0136..80fd288c04 100644 --- a/src/gui/text/qfontengine.cpp +++ b/src/gui/text/qfontengine.cpp @@ -1849,7 +1849,7 @@ QFontEngine *QFontEngineMulti::loadEngine(int at) // info about the actual script of the characters may have been discarded, // so we do not check for writing system support, but instead just load // the family indiscriminately. - if (QFontEngine *engine = QFontDatabase::findFont(request, QFontDatabase::Any)) { + if (QFontEngine *engine = QFontDatabase::findFont(request, QChar::Script_Common)) { engine->fontDef.weight = request.weight; if (request.style > QFont::StyleNormal) engine->fontDef.style = request.style; -- cgit v1.2.3 From 2624676b5731a9d93a1e46429d2c597f1e4bae38 Mon Sep 17 00:00:00 2001 From: Andy Shaw Date: Tue, 2 Oct 2018 14:02:01 +0200 Subject: qmake: Remove the extra space before -MT Removing the extra space before -MT ensures that the vcxproj generator gets valid input. Change-Id: Iccf88c5fc4473db406d714b646185a4fb60a3418 Reviewed-by: Oswald Buddenhagen --- mkspecs/features/static_runtime.prf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mkspecs/features/static_runtime.prf b/mkspecs/features/static_runtime.prf index 3275e6e2e2..1af3236189 100644 --- a/mkspecs/features/static_runtime.prf +++ b/mkspecs/features/static_runtime.prf @@ -1,7 +1,7 @@ msvc { # -MD becomes -MT, -MDd becomes -MTd - QMAKE_CFLAGS ~= s,^-MD(d?)$, -MT\1,g - QMAKE_CXXFLAGS ~= s,^-MD(d?)$, -MT\1,g + QMAKE_CFLAGS ~= s,^-MD(d?)$,-MT\1,g + QMAKE_CXXFLAGS ~= s,^-MD(d?)$,-MT\1,g } else: mingw { QMAKE_LFLAGS += -static } -- cgit v1.2.3 From 7f60940fbedef17984e283da41eae94f29fef428 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Wed, 17 Oct 2018 07:59:25 -0700 Subject: Re-disable statx() on Android Commit b7887f9b4faad2227691a2af589e9d7680d6ae08 removed this explicit disabling because it shouldn't be needed anymore. Turns out it was, as new Android SDK do include modern Linux headers and those define the structs and constants needed for statx(). Repeat of 8eb3944dac81b8c51d7bac7784204d457551b50c. Task-number: QTBUG-64490 Fixes: QTBUG-71200 Change-Id: If7e743cf8476463880ccfffd155e6d5c2b5a3da9 Reviewed-by: Simon Hausmann Reviewed-by: BogDan Vatra --- src/corelib/io/qfilesystemengine_unix.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/corelib/io/qfilesystemengine_unix.cpp b/src/corelib/io/qfilesystemengine_unix.cpp index 40e8f82a80..964dcebeb2 100644 --- a/src/corelib/io/qfilesystemengine_unix.cpp +++ b/src/corelib/io/qfilesystemengine_unix.cpp @@ -97,6 +97,13 @@ extern "C" NSString *NSTemporaryDirectory(); #endif #endif +#if defined(Q_OS_ANDROID) +// statx() is disabled on Android because quite a few systems +// come with sandboxes that kill applications that make system calls outside a +// whitelist and several Android vendors can't be bothered to update the list. +# undef STATX_BASIC_STATS +#endif + #ifndef STATX_ALL struct statx { mode_t stx_mode; }; // dummy #endif -- cgit v1.2.3 From 6599c1f75832cc9286a3bf88c6b179d006dbb96e Mon Sep 17 00:00:00 2001 From: Eirik Aavitsland Date: Wed, 17 Oct 2018 16:22:11 +0200 Subject: QPicture: fix crash for malformed picture A file with the correct QPicture magic bytes, but shorter than a full QPicture file header, could cause the header decoder to access memory out of bounds. Add a size check to avoid. As a driveby, generally harden the parsing against malformed files. [ChangeLog][QtGui][QPicture] Fix crash reading malformed picture file Task-number: QTBUG-71208 Change-Id: I86eb1f915ca9b3a4b91c7433036d76ed6061e2f0 Reviewed-by: Lars Knoll --- src/gui/image/qpicture.cpp | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/gui/image/qpicture.cpp b/src/gui/image/qpicture.cpp index 7aa221948e..7eede5ee26 100644 --- a/src/gui/image/qpicture.cpp +++ b/src/gui/image/qpicture.cpp @@ -636,7 +636,7 @@ bool QPicture::exec(QPainter *painter, QDataStream &s, int nrecords) if (d->formatMajor <= 5) { s >> ia >> i_8; painter->drawPolygon(ia, i_8 ? Qt::WindingFill : Qt::OddEvenFill); - a.clear(); + ia.clear(); } else { s >> a >> i_8; painter->drawPolygon(a, i_8 ? Qt::WindingFill : Qt::OddEvenFill); @@ -647,10 +647,10 @@ bool QPicture::exec(QPainter *painter, QDataStream &s, int nrecords) s >> ia; QPainterPath path; Q_ASSERT(ia.size() == 4); - path.moveTo(ia.at(0)); - path.cubicTo(ia.at(1), ia.at(2), ia.at(3)); + path.moveTo(ia.value(0)); + path.cubicTo(ia.value(1), ia.value(2), ia.value(3)); painter->strokePath(path, painter->pen()); - a.clear(); + ia.clear(); } break; case QPicturePrivate::PdcDrawText: @@ -730,7 +730,7 @@ bool QPicture::exec(QPainter *painter, QDataStream &s, int nrecords) int index; s >> r >> index >> sr; Q_ASSERT(index < d->pixmap_list.size()); - pixmap = d->pixmap_list.at(index); + pixmap = d->pixmap_list.value(index); } else { s >> r >> pixmap >> sr; } @@ -744,7 +744,7 @@ bool QPicture::exec(QPainter *painter, QDataStream &s, int nrecords) int index; s >> r >> index >> p; Q_ASSERT(index < d->pixmap_list.size()); - pixmap = d->pixmap_list.at(index); + pixmap = d->pixmap_list.value(index); } else { s >> r >> pixmap >> p; } @@ -765,7 +765,7 @@ bool QPicture::exec(QPainter *painter, QDataStream &s, int nrecords) int index; s >> r >> index >> sr >> ul; Q_ASSERT(index < d->image_list.size()); - image = d->image_list.at(index); + image = d->image_list.value(index); } else { s >> r >> image >> sr >> ul; } @@ -817,7 +817,7 @@ bool QPicture::exec(QPainter *painter, QDataStream &s, int nrecords) int index; s >> index; Q_ASSERT(index < d->pen_list.size()); - pen = d->pen_list.at(index); + pen = d->pen_list.value(index); } else { s >> pen; } @@ -828,7 +828,7 @@ bool QPicture::exec(QPainter *painter, QDataStream &s, int nrecords) int index; s >> index; Q_ASSERT(index < d->brush_list.size()); - brush = d->brush_list.at(index); + brush = d->brush_list.value(index); } else { s >> brush; } @@ -910,7 +910,7 @@ bool QPicture::exec(QPainter *painter, QDataStream &s, int nrecords) break; default: qWarning("QPicture::play: Invalid command %d", c); - if (len) // skip unknown command + if (len > 0) // skip unknown command s.device()->seek(s.device()->pos()+len); } #if defined(QT_DEBUG) @@ -1075,7 +1075,8 @@ bool QPicturePrivate::checkFormat() char mf_id[4]; // picture header tag s.readRawData(mf_id, 4); // read actual tag - if (memcmp(mf_id, qt_mfhdr_tag, 4) != 0) { // wrong header id + int bufSize = pictb.buffer().size(); + if (memcmp(mf_id, qt_mfhdr_tag, 4) != 0 || bufSize < 12) { // wrong header id or size qWarning("QPicturePaintEngine::checkFormat: Incorrect header"); pictb.close(); return false; -- cgit v1.2.3 From 44eeeb8e816fbdcd77ad734cfe7a7ec28da1c5ed Mon Sep 17 00:00:00 2001 From: Giuseppe D'Angelo Date: Mon, 1 Oct 2018 12:24:08 +0200 Subject: Upgrade PCRE2 to 10.32 [ChangeLog][Third-Party Code] PCRE2 was updated to version 10.32. Change-Id: Id3bf7df0003f626cd1135d0508a5a489ff02f1e5 Reviewed-by: Edward Welbourne --- src/3rdparty/pcre2/LICENCE | 14 +- src/3rdparty/pcre2/qt_attribution.json | 6 +- src/3rdparty/pcre2/src/pcre2.h | 30 +- src/3rdparty/pcre2/src/pcre2_auto_possess.c | 7 +- src/3rdparty/pcre2/src/pcre2_chartables.c | 50 +- src/3rdparty/pcre2/src/pcre2_compile.c | 336 +- src/3rdparty/pcre2/src/pcre2_dfa_match.c | 271 +- src/3rdparty/pcre2/src/pcre2_error.c | 15 +- src/3rdparty/pcre2/src/pcre2_extuni.c | 8 +- src/3rdparty/pcre2/src/pcre2_find_bracket.c | 3 +- src/3rdparty/pcre2/src/pcre2_internal.h | 131 +- src/3rdparty/pcre2/src/pcre2_intmodedep.h | 17 + src/3rdparty/pcre2/src/pcre2_jit_compile.c | 37 +- src/3rdparty/pcre2/src/pcre2_maketables.c | 9 +- src/3rdparty/pcre2/src/pcre2_match.c | 54 +- src/3rdparty/pcre2/src/pcre2_pattern_info.c | 3 +- src/3rdparty/pcre2/src/pcre2_serialize.c | 22 +- src/3rdparty/pcre2/src/pcre2_string_utils.c | 38 +- src/3rdparty/pcre2/src/pcre2_study.c | 6 +- src/3rdparty/pcre2/src/pcre2_substitute.c | 47 +- src/3rdparty/pcre2/src/pcre2_tables.c | 378 +- src/3rdparty/pcre2/src/pcre2_ucd.c | 6727 ++++++++++---------- src/3rdparty/pcre2/src/pcre2_ucp.h | 48 +- src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h | 29 +- src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c | 9 +- src/3rdparty/pcre2/src/sljit/sljitLir.c | 10 +- src/3rdparty/pcre2/src/sljit/sljitLir.h | 20 +- src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c | 250 +- .../pcre2/src/sljit/sljitNativeARM_T2_32.c | 112 +- src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c | 4 +- src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c | 4 +- .../pcre2/src/sljit/sljitNativeMIPS_common.c | 48 +- src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c | 79 +- src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c | 120 +- .../pcre2/src/sljit/sljitNativeX86_common.c | 17 - 35 files changed, 4892 insertions(+), 4067 deletions(-) diff --git a/src/3rdparty/pcre2/LICENCE b/src/3rdparty/pcre2/LICENCE index bfe3c8d528..b0f8804fff 100644 --- a/src/3rdparty/pcre2/LICENCE +++ b/src/3rdparty/pcre2/LICENCE @@ -4,11 +4,11 @@ PCRE2 LICENCE PCRE2 is a library of functions to support regular expressions whose syntax and semantics are as close as possible to those of the Perl 5 language. -Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as -specified below, with one exemption for certain binary redistributions. The -documentation for PCRE2, supplied in the "doc" directory, is distributed under -the same terms as the software itself. The data in the testdata directory is -not copyrighted and is in the public domain. +Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD" +licence, as specified below, with one exemption for certain binary +redistributions. The documentation for PCRE2, supplied in the "doc" directory, +is distributed under the same terms as the software itself. The data in the +testdata directory is not copyrighted and is in the public domain. The basic library functions are written in C and are freestanding. Also included in the distribution is a just-in-time compiler that can be used to @@ -35,7 +35,7 @@ PCRE2 JUST-IN-TIME COMPILATION SUPPORT Written by: Zoltan Herczeg Email local part: hzmester -Emain domain: freemail.hu +Email domain: freemail.hu Copyright(c) 2010-2018 Zoltan Herczeg All rights reserved. @@ -46,7 +46,7 @@ STACK-LESS JUST-IN-TIME COMPILER Written by: Zoltan Herczeg Email local part: hzmester -Emain domain: freemail.hu +Email domain: freemail.hu Copyright(c) 2009-2018 Zoltan Herczeg All rights reserved. diff --git a/src/3rdparty/pcre2/qt_attribution.json b/src/3rdparty/pcre2/qt_attribution.json index 4b635cafee..828c4e8314 100644 --- a/src/3rdparty/pcre2/qt_attribution.json +++ b/src/3rdparty/pcre2/qt_attribution.json @@ -7,7 +7,7 @@ "Description": "The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5.", "Homepage": "http://www.pcre.org/", - "Version": "10.31", + "Version": "10.32", "DownloadLocation": "https://ftp.pcre.org/pub/pcre/pcre2-10.31.tar.bz2", "License": "BSD 3-clause \"New\" or \"Revised\" License", "LicenseId": "BSD-3-Clause", @@ -26,7 +26,7 @@ Copyright (c) 2013-2013 Tilera Corporation (jiwang@tilera.com)" "Path": "src/sljit", "Description": "The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5.", "Homepage": "http://www.pcre.org/", - "Version": "10.31", + "Version": "10.32", "DownloadLocation": "https://ftp.pcre.org/pub/pcre/pcre2-10.31.tar.bz2", "License": "BSD 2-clause \"Simplified\" License", "LicenseId": "BSD-2-Clause", @@ -34,4 +34,4 @@ Copyright (c) 2013-2013 Tilera Corporation (jiwang@tilera.com)" "Copyright": "Copyright (c) Zoltan Herczeg Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com)" } -] \ No newline at end of file +] diff --git a/src/3rdparty/pcre2/src/pcre2.h b/src/3rdparty/pcre2/src/pcre2.h index fffcc307d0..3d2feb7a6b 100644 --- a/src/3rdparty/pcre2/src/pcre2.h +++ b/src/3rdparty/pcre2/src/pcre2.h @@ -5,7 +5,7 @@ /* This is the public header file for the PCRE library, second API, to be #included by applications that call PCRE2 functions. - Copyright (c) 2016-2017 University of Cambridge + Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -41,10 +41,16 @@ POSSIBILITY OF SUCH DAMAGE. /* The current PCRE version information. */ -#define PCRE2_MAJOR 10 -#define PCRE2_MINOR 31 -#define PCRE2_PRERELEASE -#define PCRE2_DATE 2018-02-12 +#define PCRE2_MAJOR 10 +#define PCRE2_MINOR 32 +#define PCRE2_PRERELEASE +#define PCRE2_DATE 2018-09-10 + +/* For the benefit of systems without stdint.h, an alternative is to use +inttypes.h. The existence of these headers is checked by configure or CMake. */ + +#define PCRE2_HAVE_STDINT_H 1 +#define PCRE2_HAVE_INTTYPES_H 1 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate @@ -81,12 +87,18 @@ set, we ensure here that it has no effect. */ #define PCRE2_CALL_CONVENTION #endif -/* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and -uint8_t, UCHAR_MAX, etc are defined. */ +/* Have to include limits.h, stdlib.h and stdint.h (or inttypes.h) to ensure +that size_t and uint8_t, UCHAR_MAX, etc are defined. If the system has neither +header, the relevant values must be provided by some other means. */ #include #include + +#if PCRE2_HAVE_STDINT_H #include +#elif PCRE2_HAVE_INTTYPES_H +#include +#endif /* Allow for C++ users compiling this directly. */ @@ -269,6 +281,7 @@ pcre2_pattern_convert(). */ #define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156 #define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157 #define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158 +/* Error 159 is obsolete and should now never occur */ #define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159 #define PCRE2_ERROR_VERB_UNKNOWN 160 #define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161 @@ -303,6 +316,8 @@ pcre2_pattern_convert(). */ #define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190 #define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191 #define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192 +#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193 +#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194 /* "Expected" matching error codes: no match and partial match. */ @@ -387,6 +402,7 @@ released, the numbers must not be changed. */ #define PCRE2_ERROR_BADSERIALIZEDDATA (-62) #define PCRE2_ERROR_HEAPLIMIT (-63) #define PCRE2_ERROR_CONVERT_SYNTAX (-64) +#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65) /* Request types for pcre2_pattern_info() */ diff --git a/src/3rdparty/pcre2/src/pcre2_auto_possess.c b/src/3rdparty/pcre2/src/pcre2_auto_possess.c index 23275a2e39..2ce152e952 100644 --- a/src/3rdparty/pcre2/src/pcre2_auto_possess.c +++ b/src/3rdparty/pcre2/src/pcre2_auto_possess.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2017 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -505,7 +505,7 @@ Arguments: utf TRUE in UTF mode cb compile data block base_list the data list of the base opcode - base_end the end of the data list + base_end the end of the base opcode rec_limit points to recursion depth counter Returns: TRUE if the auto-possessification is possible @@ -730,7 +730,7 @@ for(;;) if ((*xclass_flags & XCL_MAP) == 0) { /* No bits are set for characters < 256. */ - if (list[1] == 0) return TRUE; + if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0; /* Might be an empty repeat. */ continue; } @@ -1235,6 +1235,7 @@ for (;;) #endif case OP_MARK: + case OP_COMMIT_ARG: case OP_PRUNE_ARG: case OP_SKIP_ARG: case OP_THEN_ARG: diff --git a/src/3rdparty/pcre2/src/pcre2_chartables.c b/src/3rdparty/pcre2/src/pcre2_chartables.c index 203cb1a4ab..4046500c00 100644 --- a/src/3rdparty/pcre2/src/pcre2_chartables.c +++ b/src/3rdparty/pcre2/src/pcre2_chartables.c @@ -2,23 +2,24 @@ * Perl-Compatible Regular Expressions * *************************************************/ -/* This file contains character tables that are used when no external tables -are passed to PCRE2 by the application that calls it. The tables are used only -for characters whose code values are less than 256. - -This is a default version of the tables that assumes ASCII encoding. A program -called dftables (which is distributed with PCRE2) can be used to build -alternative versions of this file. This is necessary if you are running in an -EBCDIC environment, or if you want to default to a different encoding, for -example ISO-8859-1. When dftables is run, it creates these tables in the -current locale. If PCRE2 is configured with --enable-rebuild-chartables, this -happens automatically. - -The following #includes are present because without them gcc 4.x may remove the -array definition from the final binary if PCRE2 is built into a static library -and dead code stripping is activated. This leads to link errors. Pulling in the -header ensures that the array gets flagged as "someone outside this compilation -unit might reference this" and so it will always be supplied to the linker. */ +/* This file was automatically written by the dftables auxiliary +program. It contains character tables that are used when no external +tables are passed to PCRE2 by the application that calls it. The tables +are used only for characters whose code values are less than 256. */ + +/*The dftables program (which is distributed with PCRE2) can be used to +build alternative versions of this file. This is necessary if you are +running in an EBCDIC environment, or if you want to default to a different +encoding, for example ISO-8859-1. When dftables is run, it creates these +tables in the current locale. This happens automatically if PCRE2 is +configured with --enable-rebuild-chartables. */ + +/* The following #include is present because without it gcc 4.x may remove +the array definition from the final binary if PCRE2 is built into a static +library and dead code stripping is activated. This leads to link errors. +Pulling in the header ensures that the array gets flagged as "someone +outside this compilation unit might reference this" and so it will always +be supplied to the linker. */ #ifdef HAVE_CONFIG_H #include "config.h" @@ -101,7 +102,7 @@ const uint8_t PRIV(default_tables)[] = { /* This table contains bit maps for various character classes. Each map is 32 bytes long and the bits run from the least significant end of each byte. The classes that have their own maps are: space, xdigit, digit, upper, lower, word, -graph, print, punct, and cntrl. Other classes are built from combinations. */ +graph print, punct, and cntrl. Other classes are built from combinations. */ 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, @@ -159,25 +160,24 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */ 0x04 decimal digit 0x08 hexadecimal digit 0x10 alphanumeric or '_' - 0x80 regular expression metacharacter or binary zero */ - 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ - 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */ - 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */ + 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */ 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ - 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */ + 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */ 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */ - 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */ + 0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /* X - _ */ 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */ - 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */ + 0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x00, /* x -127 */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ diff --git a/src/3rdparty/pcre2/src/pcre2_compile.c b/src/3rdparty/pcre2/src/pcre2_compile.c index 87530fb584..6bb1de3610 100644 --- a/src/3rdparty/pcre2/src/pcre2_compile.c +++ b/src/3rdparty/pcre2/src/pcre2_compile.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2017 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -63,8 +63,8 @@ POSSIBILITY OF SUCH DAMAGE. /* Other debugging code can be enabled by these defines. */ -// #define DEBUG_SHOW_CAPTURES -// #define DEBUG_SHOW_PARSED +/* #define DEBUG_SHOW_CAPTURES */ +/* #define DEBUG_SHOW_PARSED */ /* There are a few things that vary with different code unit sizes. Handle them by defining macros in order to minimize #if usage. */ @@ -250,34 +250,35 @@ is present where expected in a conditional group. */ #define META_LOOKBEHINDNOT 0x80250000u /* (? 0 => must have an argument */ { 4, META_MARK, +1 }, - { 6, META_ACCEPT, -1 }, /* < 0 => must not have an argument */ - { 6, META_COMMIT, -1 }, + { 6, META_ACCEPT, -1 }, /* < 0 => Optional argument, convert to pre-MARK */ { 1, META_FAIL, -1 }, { 4, META_FAIL, -1 }, - { 5, META_PRUNE, 0 }, /* Argument is optional; bump META code if found */ + { 6, META_COMMIT, 0 }, + { 5, META_PRUNE, 0 }, /* Optional argument; bump META code if found */ { 4, META_SKIP, 0 }, { 4, META_THEN, 0 } }; @@ -610,8 +612,8 @@ static const int verbcount = sizeof(verbs)/sizeof(verbitem); /* Verb opcodes, indexed by their META code offset from META_MARK. */ static const uint32_t verbops[] = { - OP_MARK, OP_ACCEPT, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_PRUNE_ARG, OP_SKIP, - OP_SKIP_ARG, OP_THEN, OP_THEN_ARG }; + OP_MARK, OP_ACCEPT, OP_FAIL, OP_COMMIT, OP_COMMIT_ARG, OP_PRUNE, + OP_PRUNE_ARG, OP_SKIP, OP_SKIP_ARG, OP_THEN, OP_THEN_ARG }; /* Offsets from OP_STAR for case-independent and negative repeat opcodes. */ @@ -729,7 +731,7 @@ enum { ERR0 = COMPILE_ERROR_BASE, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90, - ERR91, ERR92}; + ERR91, ERR92, ERR93, ERR94 }; /* This is a table of start-of-pattern options such as (*UTF) and settings such as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward @@ -976,8 +978,8 @@ for (;;) case META_POSIX_NEG: fprintf(stderr, "META_POSIX_NEG %d", *pptr++); break; case META_ACCEPT: fprintf(stderr, "META (*ACCEPT)"); break; - case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break; case META_FAIL: fprintf(stderr, "META (*FAIL)"); break; + case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break; case META_PRUNE: fprintf(stderr, "META (*PRUNE)"); break; case META_SKIP: fprintf(stderr, "META (*SKIP)"); break; case META_THEN: fprintf(stderr, "META (*THEN)"); break; @@ -1067,6 +1069,10 @@ for (;;) fprintf(stderr, "META (*MARK:"); goto SHOWARG; + case META_COMMIT_ARG: + fprintf(stderr, "META (*COMMIT:"); + goto SHOWARG; + case META_PRUNE_ARG: fprintf(stderr, "META (*PRUNE:"); goto SHOWARG; @@ -1435,6 +1441,48 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0) escape = -i; /* Else return a special escape */ if (cb != NULL && (escape == ESC_P || escape == ESC_p || escape == ESC_X)) cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */ + + /* Perl supports \N{name} for character names and \N{U+dddd} for numerical + Unicode code points, as well as plain \N for "not newline". PCRE does not + support \N{name}. However, it does support quantification such as \N{2,3}, + so if \N{ is not followed by U+dddd we check for a quantifier. */ + + if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET) + { + PCRE2_SPTR p = ptr + 1; + + /* \N{U+ can be handled by the \x{ code. However, this construction is + not valid in EBCDIC environments because it specifies a Unicode + character, not a codepoint in the local code. For example \N{U+0041} + must be "A" in all environments. Also, in Perl, \N{U+ forces Unicode + casing semantics for the entire pattern, so allow it only in UTF (i.e. + Unicode) mode. */ + + if (ptrend - p > 1 && *p == CHAR_U && p[1] == CHAR_PLUS) + { +#ifdef EBCDIC + *errorcodeptr = ERR93; +#else + if (utf) + { + ptr = p + 1; + escape = 0; /* Not a fancy escape after all */ + goto COME_FROM_NU; + } + else *errorcodeptr = ERR93; +#endif + } + + /* Give an error if what follows is not a quantifier, but don't override + an error set by the quantifier reader (e.g. number overflow). */ + + else + { + if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) && + *errorcodeptr == 0) + *errorcodeptr = ERR37; + } + } } } @@ -1462,6 +1510,7 @@ else /* A number of Perl escapes are not handled by PCRE. We give an explicit error. */ + case CHAR_F: case CHAR_l: case CHAR_L: *errorcodeptr = ERR37; @@ -1719,6 +1768,9 @@ else { if (ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET) { +#ifndef EBCDIC + COME_FROM_NU: +#endif if (++ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET) { *errorcodeptr = ERR78; @@ -1852,19 +1904,6 @@ else } } -/* Perl supports \N{name} for character names, as well as plain \N for "not -newline". PCRE does not support \N{name}. However, it does support -quantification such as \N{2,3}. */ - -if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET && - ptrend - ptr > 2) - { - PCRE2_SPTR p = ptr + 1; - if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) && - *errorcodeptr == 0) - *errorcodeptr = ERR37; - } - /* Set the pointer to the next character before returning. */ *ptrptr = ptr; @@ -2251,11 +2290,14 @@ typedef struct nest_save { #define NSF_RESET 0x0001u #define NSF_CONDASSERT 0x0002u -/* Of the options that are changeable within the pattern, these are tracked -during parsing. The rest are used from META_OPTIONS items when compiling. */ +/* Options that are changeable within the pattern must be tracked during +parsing. Some (e.g. PCRE2_EXTENDED) are implemented entirely during parsing, +but all must be tracked so that META_OPTIONS items set the correct values for +the main compiling phase. */ -#define PARSE_TRACKED_OPTIONS \ - (PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE) +#define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \ + PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \ + PCRE2_UNGREEDY) /* States used for analyzing ranges in character classes. The two OK values must be last. */ @@ -2290,6 +2332,7 @@ uint32_t *previous_callout = NULL; uint32_t *parsed_pattern = cb->parsed_pattern; uint32_t *parsed_pattern_end = cb->parsed_pattern_end; uint32_t meta_quantifier = 0; +uint32_t add_after_mark = 0; uint16_t nest_depth = 0; int after_manual_callout = 0; int expect_cond_assert = 0; @@ -2434,11 +2477,17 @@ while (ptr < ptrend) /* EITHER: not both options set */ ((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) != (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) || - /* OR: character > 255 */ - c > 255 || - /* OR: not a # comment or white space */ - (c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0) - )) +#ifdef SUPPORT_UNICODE + /* OR: character > 255 AND not Unicode Pattern White Space */ + (c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) || +#endif + /* OR: not a # comment or isspace() white space */ + (c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0 +#ifdef SUPPORT_UNICODE + /* and not CHAR_NEL when Unicode is supported */ + && c != CHAR_NEL +#endif + ))) { PCRE2_SIZE verbnamelength; @@ -2461,6 +2510,16 @@ while (ptr < ptrend) goto FAILED; } *verblengthptr = (uint32_t)verbnamelength; + + /* If this name was on a verb such as (*ACCEPT) which does not continue, + a (*MARK) was generated for the name. We now add the original verb as the + next item. */ + + if (add_after_mark != 0) + { + *parsed_pattern++ = add_after_mark; + add_after_mark = 0; + } break; case CHAR_BACKSLASH: @@ -2510,11 +2569,18 @@ while (ptr < ptrend) /* Skip over whitespace and # comments in extended mode. Note that c is a character, not a code unit, so we must not use MAX_255 to test its size - because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */ + because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The + whitespace characters are those designated as "Pattern White Space" by + Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is + U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a + subset of space characters that match \h and \v. */ if ((options & PCRE2_EXTENDED) != 0) { if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue; +#ifdef SUPPORT_UNICODE + if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue; +#endif if (c == CHAR_NUMBER_SIGN) { while (ptr < ptrend) @@ -3206,7 +3272,6 @@ while (ptr < ptrend) tempptr = ptr; escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options, TRUE, cb); - if (errorcode != 0) { CLASS_ESCAPE_FAILED: @@ -3454,13 +3519,25 @@ while (ptr < ptrend) if (*ptr++ == CHAR_COLON) /* Skip past : or ) */ { - if (verbs[i].has_arg < 0) /* Argument is forbidden */ + /* Some optional arguments can be treated as a preceding (*MARK) */ + + if (verbs[i].has_arg < 0) { - errorcode = ERR59; - goto FAILED; + add_after_mark = verbs[i].meta; + *parsed_pattern++ = META_MARK; } - *parsed_pattern++ = verbs[i].meta + - ((verbs[i].meta != META_MARK)? 0x00010000u:0); + + /* The remaining verbs with arguments (except *MARK) need a different + opcode. */ + + else + { + *parsed_pattern++ = verbs[i].meta + + ((verbs[i].meta != META_MARK)? 0x00010000u:0); + } + + /* Set up for reading the name in the main loop. */ + verblengthptr = parsed_pattern++; verbnamestart = ptr; inverbname = TRUE; @@ -3521,17 +3598,39 @@ while (ptr < ptrend) else { + BOOL hyphenok = TRUE; + uint32_t oldoptions = options; + top_nest->reset_group = 0; top_nest->max_group = 0; set = unset = 0; optset = &set; + /* ^ at the start unsets imnsx and disables the subsequent use of - */ + + if (ptr < ptrend && *ptr == CHAR_CIRCUMFLEX_ACCENT) + { + options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| + PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE); + hyphenok = FALSE; + ptr++; + } + while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON) { switch (*ptr++) { - case CHAR_MINUS: optset = &unset; break; + case CHAR_MINUS: + if (!hyphenok) + { + errorcode = ERR94; + ptr--; /* Correct the offset */ + goto FAILED; + } + optset = &unset; + hyphenok = FALSE; + break; case CHAR_J: /* Record that it changed in the external options */ *optset |= PCRE2_DUPNAMES; @@ -3591,7 +3690,7 @@ while (ptr < ptrend) /* If nothing changed, no need to record. */ - if (set != 0 || unset != 0) + if (options != oldoptions) { *parsed_pattern++ = META_OPTIONS; *parsed_pattern++ = options; @@ -3896,9 +3995,8 @@ while (ptr < ptrend) if (*ptr == CHAR_DOT) { if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION; - if (!read_number(&ptr, ptrend, -1, 99 , ERR79, &minor, &errorcode)) - goto FAILED; - if (minor < 10) minor *= 10; + minor = (*ptr++ - CHAR_0) * 10; + if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0; if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) goto BAD_VERSION_CONDITION; } @@ -4261,11 +4359,11 @@ goto FAILED; /************************************************* -* Find first significant op code * +* Find first significant opcode * *************************************************/ /* This is called by several functions that scan a compiled expression looking -for a fixed first character, or an anchoring op code etc. It skips over things +for a fixed first character, or an anchoring opcode etc. It skips over things that do not influence this. For some calls, it makes sense to skip negative forward and all backward assertions, and also the \b assertion; for others it does not. @@ -5472,7 +5570,7 @@ for (;; pptr++) set xclass = TRUE. Then, in the pre-compile phase, accumulate the length of the extra data and reset the pointer. This is so that very large classes that contain a zillion wide characters or Unicode property tests - do not overwrite the work space (which is on the stack). */ + do not overwrite the workspace (which is on the stack). */ if (class_uchardata > class_uchardata_base) { @@ -5563,7 +5661,7 @@ for (;; pptr++) if (class_has_8bitchar > 0) { *code++ |= XCL_MAP; - memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, + (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, CU2BYTES(class_uchardata - code)); if (negate_class && !xclass_has_prop) for (i = 0; i < 32; i++) classbits[i] = ~classbits[i]; @@ -5655,6 +5753,7 @@ for (;; pptr++) cb->had_pruneorskip = TRUE; /* Fall through */ case META_MARK: + case META_COMMIT_ARG: VERB_ARG: *code++ = verbops[(meta - META_MARK) >> 16]; /* The length is in characters. */ @@ -6509,7 +6608,7 @@ for (;; pptr++) /* Wrap the recursion call in OP_BRA brackets. */ - memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE)); + (void)memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE)); op_previous = *previous = OP_BRA; PUT(previous, 1, 2 + 2*LINK_SIZE); previous[2 + 2*LINK_SIZE] = OP_KET; @@ -6589,7 +6688,7 @@ for (;; pptr++) if (repeat_max <= 1 || repeat_max == REPEAT_UNLIMITED) { - memmove(previous + 1, previous, CU2BYTES(len)); + (void)memmove(previous + 1, previous, CU2BYTES(len)); code++; if (repeat_max == 0) { @@ -6610,7 +6709,7 @@ for (;; pptr++) else { int linkoffset; - memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len)); + (void)memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len)); code += 2 + LINK_SIZE; *previous++ = OP_BRAZERO + repeat_type; *previous++ = OP_BRA; @@ -6811,7 +6910,7 @@ for (;; pptr++) if (*bracode == OP_COND || *bracode == OP_SCOND) { int nlen = (int)(code - bracode); - memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen)); + (void)memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen)); code += 1 + LINK_SIZE; nlen += 1 + LINK_SIZE; *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS; @@ -7082,7 +7181,7 @@ for (;; pptr++) else { - memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len)); + (void)memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len)); code += 1 + LINK_SIZE; len += 1 + LINK_SIZE; tempcode[0] = OP_ONCE; @@ -7460,7 +7559,7 @@ length of the BRA and KET and any extra code units that are required at the beginning. We accumulate in a local variable to save frequent testing of lengthptr for NULL. We cannot do this by looking at the value of 'code' at the start and end of each alternative, because compiled items are discarded during -the pre-compile phase so that the work space is not exceeded. */ +the pre-compile phase so that the workspace is not exceeded. */ length = 2 + 2*LINK_SIZE + skipunits; @@ -7622,7 +7721,7 @@ for (;;) { if (cb->open_caps->flag) { - memmove(start_bracket + 1 + LINK_SIZE, start_bracket, + (void)memmove(start_bracket + 1 + LINK_SIZE, start_bracket, CU2BYTES(code - start_bracket)); *start_bracket = OP_ONCE; code += 1 + LINK_SIZE; @@ -7765,10 +7864,11 @@ do { if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE; } - /* Condition */ + /* Condition. If there is no second branch, it can't be anchored. */ - else if (op == OP_COND) + else if (op == OP_COND || op == OP_SCOND) { + if (scode[GET(scode,1)] != OP_ALT) return FALSE; if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) return FALSE; } @@ -8003,6 +8103,7 @@ for (;;) break; case OP_MARK: + case OP_COMMIT_ARG: case OP_PRUNE_ARG: case OP_SKIP_ARG: case OP_THEN_ARG: @@ -8221,7 +8322,7 @@ for (i = 0; i < tablecount; i++) if (crc < 0) { - memmove(slot + cb->name_entry_size, slot, + (void)memmove(slot + cb->name_entry_size, slot, CU2BYTES((tablecount - i) * cb->name_entry_size)); break; } @@ -8311,6 +8412,7 @@ for (;; pptr++) break; case META_MARK: /* Add the length of the name. */ + case META_COMMIT_ARG: case META_PRUNE_ARG: case META_SKIP_ARG: case META_THEN_ARG: @@ -8501,6 +8603,7 @@ for (;; pptr++) goto EXIT; case META_MARK: + case META_COMMIT_ARG: case META_PRUNE_ARG: case META_SKIP_ARG: case META_THEN_ARG: @@ -8572,6 +8675,32 @@ for (;; pptr++) case META_LOOKAHEADNOT: pptr = parsed_skip(pptr + 1, PSKIP_KET); if (pptr == NULL) goto PARSED_SKIP_FAILED; + + /* Also ignore any qualifiers that follow a lookahead assertion. */ + + switch (pptr[1]) + { + case META_ASTERISK: + case META_ASTERISK_PLUS: + case META_ASTERISK_QUERY: + case META_PLUS: + case META_PLUS_PLUS: + case META_PLUS_QUERY: + case META_QUERY: + case META_QUERY_PLUS: + case META_QUERY_QUERY: + pptr++; + break; + + case META_MINMAX: + case META_MINMAX_PLUS: + case META_MINMAX_QUERY: + pptr += 3; + break; + + default: + break; + } break; /* Lookbehinds can be ignored, but must themselves be checked. */ @@ -8942,6 +9071,7 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++) break; case META_MARK: + case META_COMMIT_ARG: case META_PRUNE_ARG: case META_SKIP_ARG: case META_THEN_ARG: diff --git a/src/3rdparty/pcre2/src/pcre2_dfa_match.c b/src/3rdparty/pcre2/src/pcre2_dfa_match.c index c6184ff5e9..9b43237da7 100644 --- a/src/3rdparty/pcre2/src/pcre2_dfa_match.c +++ b/src/3rdparty/pcre2/src/pcre2_dfa_match.c @@ -181,7 +181,8 @@ static const uint8_t coptable[] = { 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */ 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */ - 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ + 0, 0, /* COMMIT, COMMIT_ARG */ + 0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */ 0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */ }; @@ -254,7 +255,8 @@ static const uint8_t poptable[] = { 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */ 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */ - 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ + 0, 0, /* COMMIT, COMMIT_ARG */ + 0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */ 0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */ }; @@ -292,6 +294,35 @@ typedef struct stateblock { #define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int)) +/* Before version 10.32 the recursive calls of internal_dfa_match() were passed +local working space and output vectors that were created on the stack. This has +caused issues for some patterns, especially in small-stack environments such as +Windows. A new scheme is now in use which sets up a vector on the stack, but if +this is too small, heap memory is used, up to the heap_limit. The main +parameters are all numbers of ints because the workspace is a vector of ints. + +The size of the starting stack vector, DFA_START_RWS_SIZE, is in bytes, and is +defined in pcre2_internal.h so as to be available to pcre2test when it is +finding the minimum heap requirement for a match. */ + +#define OVEC_UNIT (sizeof(PCRE2_SIZE)/sizeof(int)) + +#define RWS_BASE_SIZE (DFA_START_RWS_SIZE/sizeof(int)) /* Stack vector */ +#define RWS_RSIZE 1000 /* Work size for recursion */ +#define RWS_OVEC_RSIZE (1000*OVEC_UNIT) /* Ovector for recursion */ +#define RWS_OVEC_OSIZE (2*OVEC_UNIT) /* Ovector in other cases */ + +/* This structure is at the start of each workspace block. */ + +typedef struct RWS_anchor { + struct RWS_anchor *next; + unsigned int size; /* Number of ints */ + unsigned int free; /* Number of ints */ +} RWS_anchor; + +#define RWS_ANCHOR_SIZE (sizeof(RWS_anchor)/sizeof(int)) + + /************************************************* * Process a callout * @@ -353,6 +384,61 @@ return (mb->callout)(cb, mb->callout_data); +/************************************************* +* Expand local workspace memory * +*************************************************/ + +/* This function is called when internal_dfa_match() is about to be called +recursively and there is insufficient working space left in the current +workspace block. If there's an existing next block, use it; otherwise get a new +block unless the heap limit is reached. + +Arguments: + rwsptr pointer to block pointer (updated) + ovecsize space needed for an ovector + mb the match block + +Returns: 0 rwsptr has been updated + !0 an error code +*/ + +static int +more_workspace(RWS_anchor **rwsptr, unsigned int ovecsize, dfa_match_block *mb) +{ +RWS_anchor *rws = *rwsptr; +RWS_anchor *new; + +if (rws->next != NULL) + { + new = rws->next; + } + +/* All sizes are in units of sizeof(int), except for mb->heaplimit, which is in +kibibytes. */ + +else + { + unsigned int newsize = rws->size * 2; + unsigned int heapleft = (unsigned int) + (((1024/sizeof(int))*mb->heap_limit - mb->heap_used)); + if (newsize > heapleft) newsize = heapleft; + if (newsize < RWS_RSIZE + ovecsize + RWS_ANCHOR_SIZE) + return PCRE2_ERROR_HEAPLIMIT; + new = mb->memctl.malloc(newsize*sizeof(int), mb->memctl.memory_data); + if (new == NULL) return PCRE2_ERROR_NOMEMORY; + mb->heap_used += newsize; + new->next = NULL; + new->size = newsize; + rws->next = new; + } + +new->free = new->size - RWS_ANCHOR_SIZE; +*rwsptr = new; +return 0; +} + + + /************************************************* * Match a Regular Expression - DFA engine * *************************************************/ @@ -431,7 +517,8 @@ internal_dfa_match( uint32_t offsetcount, int *workspace, int wscount, - uint32_t rlevel) + uint32_t rlevel, + int *RWS) { stateblock *active_states, *new_states, *temp_states; stateblock *next_active_state, *next_new_state; @@ -788,7 +875,7 @@ for (;;) else if (match_count > 0 && ++match_count * 2 > (int)offsetcount) match_count = 0; count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2; - if (count > 0) memmove(offsets + 2, offsets, + if (count > 0) (void)memmove(offsets + 2, offsets, (size_t)count * sizeof(PCRE2_SIZE)); if (offsetcount >= 2) { @@ -2587,10 +2674,22 @@ for (;;) case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: { - PCRE2_SPTR endasscode = code + GET(code, 1); - PCRE2_SIZE local_offsets[2]; int rc; - int local_workspace[1000]; + int *local_workspace; + PCRE2_SIZE *local_offsets; + PCRE2_SPTR endasscode = code + GET(code, 1); + RWS_anchor *rws = (RWS_anchor *)RWS; + + if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE) + { + rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb); + if (rc != 0) return rc; + RWS = (int *)rws; + } + + local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free); + local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE; + rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE; while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); @@ -2600,10 +2699,13 @@ for (;;) ptr, /* where we currently are */ (PCRE2_SIZE)(ptr - start_subject), /* start offset */ local_offsets, /* offset vector */ - sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */ + RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */ local_workspace, /* workspace vector */ - sizeof(local_workspace)/sizeof(int), /* size of same */ - rlevel); /* function recursion level */ + RWS_RSIZE, /* size of same */ + rlevel, /* function recursion level */ + RWS); /* recursion workspace */ + + rws->free += RWS_RSIZE + RWS_OVEC_OSIZE; if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc; if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK)) @@ -2615,8 +2717,6 @@ for (;;) case OP_COND: case OP_SCOND: { - PCRE2_SIZE local_offsets[1000]; - int local_workspace[1000]; int codelink = (int)GET(code, 1); PCRE2_UCHAR condcode; @@ -2673,8 +2773,22 @@ for (;;) else { int rc; + int *local_workspace; + PCRE2_SIZE *local_offsets; PCRE2_SPTR asscode = code + LINK_SIZE + 1; PCRE2_SPTR endasscode = asscode + GET(asscode, 1); + RWS_anchor *rws = (RWS_anchor *)RWS; + + if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE) + { + rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb); + if (rc != 0) return rc; + RWS = (int *)rws; + } + + local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free); + local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE; + rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE; while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); @@ -2684,10 +2798,13 @@ for (;;) ptr, /* where we currently are */ (PCRE2_SIZE)(ptr - start_subject), /* start offset */ local_offsets, /* offset vector */ - sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */ + RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */ local_workspace, /* workspace vector */ - sizeof(local_workspace)/sizeof(int), /* size of same */ - rlevel); /* function recursion level */ + RWS_RSIZE, /* size of same */ + rlevel, /* function recursion level */ + RWS); /* recursion workspace */ + + rws->free += RWS_RSIZE + RWS_OVEC_OSIZE; if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc; if ((rc >= 0) == @@ -2702,13 +2819,25 @@ for (;;) /*-----------------------------------------------------------------*/ case OP_RECURSE: { + int rc; + int *local_workspace; + PCRE2_SIZE *local_offsets; + RWS_anchor *rws = (RWS_anchor *)RWS; dfa_recursion_info *ri; - PCRE2_SIZE local_offsets[1000]; - int local_workspace[1000]; PCRE2_SPTR callpat = start_code + GET(code, 1); uint32_t recno = (callpat == mb->start_code)? 0 : GET2(callpat, 1 + LINK_SIZE); - int rc; + + if (rws->free < RWS_RSIZE + RWS_OVEC_RSIZE) + { + rc = more_workspace(&rws, RWS_OVEC_RSIZE, mb); + if (rc != 0) return rc; + RWS = (int *)rws; + } + + local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free); + local_workspace = ((int *)local_offsets) + RWS_OVEC_RSIZE; + rws->free -= RWS_RSIZE + RWS_OVEC_RSIZE; /* Check for repeating a recursion without advancing the subject pointer. This should catch convoluted mutual recursions. (Some simple @@ -2732,11 +2861,13 @@ for (;;) ptr, /* where we currently are */ (PCRE2_SIZE)(ptr - start_subject), /* start offset */ local_offsets, /* offset vector */ - sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */ + RWS_OVEC_RSIZE/OVEC_UNIT, /* size of same */ local_workspace, /* workspace vector */ - sizeof(local_workspace)/sizeof(int), /* size of same */ - rlevel); /* function recursion level */ + RWS_RSIZE, /* size of same */ + rlevel, /* function recursion level */ + RWS); /* recursion workspace */ + rws->free += RWS_RSIZE + RWS_OVEC_RSIZE; mb->recursive = new_recursive.prevrec; /* Done this recursion */ /* Ran out of internal offsets */ @@ -2782,10 +2913,25 @@ for (;;) case OP_SCBRAPOS: case OP_BRAPOSZERO: { + int rc; + int *local_workspace; + PCRE2_SIZE *local_offsets; PCRE2_SIZE charcount, matched_count; PCRE2_SPTR local_ptr = ptr; + RWS_anchor *rws = (RWS_anchor *)RWS; BOOL allow_zero; + if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE) + { + rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb); + if (rc != 0) return rc; + RWS = (int *)rws; + } + + local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free); + local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE; + rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE; + if (codevalue == OP_BRAPOSZERO) { allow_zero = TRUE; @@ -2798,19 +2944,17 @@ for (;;) for (matched_count = 0;; matched_count++) { - PCRE2_SIZE local_offsets[2]; - int local_workspace[1000]; - - int rc = internal_dfa_match( + rc = internal_dfa_match( mb, /* fixed match data */ code, /* this subexpression's code */ local_ptr, /* where we currently are */ (PCRE2_SIZE)(ptr - start_subject), /* start offset */ local_offsets, /* offset vector */ - sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */ + RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */ local_workspace, /* workspace vector */ - sizeof(local_workspace)/sizeof(int), /* size of same */ - rlevel); /* function recursion level */ + RWS_RSIZE, /* size of same */ + rlevel, /* function recursion level */ + RWS); /* recursion workspace */ /* Failed to match */ @@ -2827,6 +2971,8 @@ for (;;) local_ptr += charcount; /* Advance temporary position ptr */ } + rws->free += RWS_RSIZE + RWS_OVEC_OSIZE; + /* At this point we have matched the subpattern matched_count times, and local_ptr is pointing to the character after the end of the last match. */ @@ -2869,19 +3015,35 @@ for (;;) /*-----------------------------------------------------------------*/ case OP_ONCE: { - PCRE2_SIZE local_offsets[2]; - int local_workspace[1000]; + int rc; + int *local_workspace; + PCRE2_SIZE *local_offsets; + RWS_anchor *rws = (RWS_anchor *)RWS; - int rc = internal_dfa_match( + if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE) + { + rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb); + if (rc != 0) return rc; + RWS = (int *)rws; + } + + local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free); + local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE; + rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE; + + rc = internal_dfa_match( mb, /* fixed match data */ code, /* this subexpression's code */ ptr, /* where we currently are */ (PCRE2_SIZE)(ptr - start_subject), /* start offset */ local_offsets, /* offset vector */ - sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */ + RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */ local_workspace, /* workspace vector */ - sizeof(local_workspace)/sizeof(int), /* size of same */ - rlevel); /* function recursion level */ + RWS_RSIZE, /* size of same */ + rlevel, /* function recursion level */ + RWS); /* recursion workspace */ + + rws->free += RWS_RSIZE + RWS_OVEC_OSIZE; if (rc >= 0) { @@ -3063,6 +3225,7 @@ pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount) { +int rc; const pcre2_real_code *re = (const pcre2_real_code *)code; PCRE2_SPTR start_match; @@ -3071,9 +3234,9 @@ PCRE2_SPTR bumpalong_limit; PCRE2_SPTR req_cu_ptr; BOOL utf, anchored, startline, firstline; - BOOL has_first_cu = FALSE; BOOL has_req_cu = FALSE; + PCRE2_UCHAR first_cu = 0; PCRE2_UCHAR first_cu2 = 0; PCRE2_UCHAR req_cu = 0; @@ -3088,6 +3251,17 @@ pcre2_callout_block cb; dfa_match_block actual_match_block; dfa_match_block *mb = &actual_match_block; +/* Set up a starting block of memory for use during recursive calls to +internal_dfa_match(). By putting this on the stack, it minimizes resource use +in the case when it is not needed. If this is too small, more memory is +obtained from the heap. At the start of each block is an anchor structure.*/ + +int base_recursion_workspace[RWS_BASE_SIZE]; +RWS_anchor *rws = (RWS_anchor *)base_recursion_workspace; +rws->next = NULL; +rws->size = RWS_BASE_SIZE; +rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE; + /* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated subject string. */ @@ -3184,6 +3358,7 @@ if (mcontext == NULL) mb->memctl = re->memctl; mb->match_limit = PRIV(default_match_context).match_limit; mb->match_limit_depth = PRIV(default_match_context).depth_limit; + mb->heap_limit = PRIV(default_match_context).heap_limit; } else { @@ -3198,6 +3373,7 @@ else mb->memctl = mcontext->memctl; mb->match_limit = mcontext->match_limit; mb->match_limit_depth = mcontext->depth_limit; + mb->heap_limit = mcontext->heap_limit; } if (mb->match_limit > re->limit_match) @@ -3206,6 +3382,9 @@ if (mb->match_limit > re->limit_match) if (mb->match_limit_depth > re->limit_depth) mb->match_limit_depth = re->limit_depth; +if (mb->heap_limit > re->limit_heap) + mb->heap_limit = re->limit_heap; + mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + re->name_count * re->name_entry_size; mb->tables = re->tables; @@ -3215,6 +3394,7 @@ mb->start_offset = start_offset; mb->moptions = options; mb->poptions = re->overall_options; mb->match_call_count = 0; +mb->heap_used = 0; /* Process the \R and newline settings. */ @@ -3351,8 +3531,6 @@ a match. */ for (;;) { - int rc; - /* ----------------- Start of match optimizations ---------------- */ /* There are some optimizations that avoid running the match if a known @@ -3544,7 +3722,7 @@ for (;;) in characters, we treat it as code units to avoid spending too much time in this optimization. */ - if (end_subject - start_match < re->minlength) return PCRE2_ERROR_NOMATCH; + if (end_subject - start_match < re->minlength) goto NOMATCH_EXIT; /* If req_cu is set, we know that that code unit must appear in the subject for the match to succeed. If the first code unit is set, req_cu @@ -3621,7 +3799,8 @@ for (;;) (uint32_t)match_data->oveccount * 2, /* actual size of same */ workspace, /* workspace vector */ (int)wscount, /* size of same */ - 0); /* function recurse level */ + 0, /* function recurse level */ + base_recursion_workspace); /* initial workspace for recursion */ /* Anything other than "no match" means we are done, always; otherwise, carry on only if not anchored. */ @@ -3637,7 +3816,7 @@ for (;;) match_data->rightchar = (PCRE2_SIZE)( mb->last_used_ptr - subject); match_data->startchar = (PCRE2_SIZE)(start_match - subject); match_data->rc = rc; - return rc; + goto EXIT; } /* Advance to the next subject character unless we are at the end of a line @@ -3668,8 +3847,18 @@ for (;;) } /* "Bumpalong" loop */ +NOMATCH_EXIT: +rc = PCRE2_ERROR_NOMATCH; + +EXIT: +while (rws->next != NULL) + { + RWS_anchor *next = rws->next; + rws->next = next->next; + mb->memctl.free(next, mb->memctl.memory_data); + } -return PCRE2_ERROR_NOMATCH; +return rc; } /* End of pcre2_dfa_match.c */ diff --git a/src/3rdparty/pcre2/src/pcre2_error.c b/src/3rdparty/pcre2/src/pcre2_error.c index d98cae9963..4b3b3f1bc0 100644 --- a/src/3rdparty/pcre2/src/pcre2_error.c +++ b/src/3rdparty/pcre2/src/pcre2_error.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2017 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -107,7 +107,7 @@ static const unsigned char compile_error_texts[] = /* 35 */ "lookbehind is too complicated\0" "\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0" - "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0" + "PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0" "number after (?C is greater than 255\0" "closing parenthesis for (?C expected\0" /* 40 */ @@ -133,7 +133,8 @@ static const unsigned char compile_error_texts[] = "internal error: unknown newline setting\0" "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0" "(?R (recursive pattern call) must be followed by a closing parenthesis\0" - "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" + /* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */ + "obsolete error (should not occur)\0" /* Was the above */ /* 60 */ "(*VERB) not recognized or malformed\0" "group number is too big\0" @@ -160,7 +161,7 @@ static const unsigned char compile_error_texts[] = "using UCP is disabled by the application\0" "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" "character code point value in \\u.... sequence is too large\0" - "digits missing in \\x{} or \\o{}\0" + "digits missing in \\x{} or \\o{} or \\N{U+}\0" "syntax error or number too big in (?(VERSION condition\0" /* 80 */ "internal error: unknown opcode in auto_possessify()\0" @@ -178,6 +179,8 @@ static const unsigned char compile_error_texts[] = "internal error: bad code value in parsed_skip()\0" "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0" "invalid option bits with PCRE2_LITERAL\0" + "\\N{U+dddd} is supported only in Unicode (UTF) mode\0" + "invalid hyphen in option setting\0" ; /* Match-time and UTF error texts are in the same format. */ @@ -255,11 +258,13 @@ static const unsigned char match_error_texts[] = "expected closing curly bracket in replacement string\0" "bad substitution in replacement string\0" /* 60 */ - "match with end before start is not supported\0" + "match with end before start or start moved backwards is not supported\0" "too many replacements (more than INT_MAX)\0" "bad serialized data\0" "heap limit exceeded\0" "invalid syntax\0" + /* 65 */ + "internal error - duplicate substitution match\0" ; diff --git a/src/3rdparty/pcre2/src/pcre2_extuni.c b/src/3rdparty/pcre2/src/pcre2_extuni.c index 11a0bfbdd6..237211abf7 100644 --- a/src/3rdparty/pcre2/src/pcre2_extuni.c +++ b/src/3rdparty/pcre2/src/pcre2_extuni.c @@ -129,11 +129,11 @@ while (eptr < end_subject) if ((ricount & 1) != 0) break; /* Grapheme break required */ } - /* If Extend follows E_Base[_GAZ] do not update lgb; this allows - any number of Extend before a following E_Modifier. */ + /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this + allows any number of them before a following Extended_Pictographic. */ - if (rgb != ucp_gbExtend || - (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ)) + if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || + lgb != ucp_gbExtended_Pictographic) lgb = rgb; eptr += len; diff --git a/src/3rdparty/pcre2/src/pcre2_find_bracket.c b/src/3rdparty/pcre2/src/pcre2_find_bracket.c index 357385a11c..70baa1394f 100644 --- a/src/3rdparty/pcre2/src/pcre2_find_bracket.c +++ b/src/3rdparty/pcre2/src/pcre2_find_bracket.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -131,6 +131,7 @@ for (;;) break; case OP_MARK: + case OP_COMMIT_ARG: case OP_PRUNE_ARG: case OP_SKIP_ARG: case OP_THEN_ARG: diff --git a/src/3rdparty/pcre2/src/pcre2_internal.h b/src/3rdparty/pcre2/src/pcre2_internal.h index 3db9d604f4..8750f2f174 100644 --- a/src/3rdparty/pcre2/src/pcre2_internal.h +++ b/src/3rdparty/pcre2/src/pcre2_internal.h @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2017 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -165,6 +165,16 @@ by "configure". */ #define INT64_OR_DOUBLE double #endif +/* External (in the C sense) functions and tables that are private to the +libraries are always referenced using the PRIV macro. This makes it possible +for pcre2test.c to include some of the source files from the libraries using a +different PRIV definition to avoid name clashes. It also makes it clear in the +code that a non-static object is being referenced. */ + +#ifndef PRIV +#define PRIV(name) _pcre2_##name +#endif + /* When compiling for use with the Virtual Pascal compiler, these functions need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT option on the command line. */ @@ -178,50 +188,15 @@ option on the command line. */ #define memset(s,c,n) _memset(s,c,n) #else /* VPCOMPAT */ -/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), -define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY -is set. Otherwise, include an emulating function for those systems that have -neither (there some non-Unix environments where this is the case). */ +/* Otherwise, to cope with SunOS4 and other systems that lack memmove(), define +a macro that calls an emulating function. */ #ifndef HAVE_MEMMOVE -#undef memmove /* some systems may have a macro */ -#ifdef HAVE_BCOPY -#define memmove(a, b, c) bcopy(b, a, c) -#else /* HAVE_BCOPY */ -static void * -pcre2_memmove(void *d, const void *s, size_t n) -{ -size_t i; -unsigned char *dest = (unsigned char *)d; -const unsigned char *src = (const unsigned char *)s; -if (dest > src) - { - dest += n; - src += n; - for (i = 0; i < n; ++i) *(--dest) = *(--src); - return (void *)dest; - } -else - { - for (i = 0; i < n; ++i) *dest++ = *src++; - return (void *)(dest - n); - } -} -#define memmove(a, b, c) pcre2_memmove(a, b, c) -#endif /* not HAVE_BCOPY */ +#undef memmove /* Some systems may have a macro */ +#define memmove(a, b, c) PRIV(memmove)(a, b, c) #endif /* not HAVE_MEMMOVE */ #endif /* not VPCOMPAT */ -/* External (in the C sense) functions and tables that are private to the -libraries are always referenced using the PRIV macro. This makes it possible -for pcre2test.c to include some of the source files from the libraries using a -different PRIV definition to avoid name clashes. It also makes it clear in the -code that a non-static object is being referenced. */ - -#ifndef PRIV -#define PRIV(name) _pcre2_##name -#endif - /* This is an unsigned int value that no UTF character can ever have, as Unicode doesn't go beyond 0x0010ffff. */ @@ -247,12 +222,17 @@ not rely on this. */ pcre2_match() is allocated on the system stack, of this size (bytes). The size must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends -on the number of capturing parentheses) so 20K handles quite a few frames. A +on the number of capturing parentheses) so 20KiB handles quite a few frames. A larger vector on the heap is obtained for patterns that need more frames. The maximum size of this can be limited. */ #define START_FRAMES_SIZE 20480 +/* Similarly, for DFA matching, an initial internal workspace vector is +allocated on the stack. */ + +#define DFA_START_RWS_SIZE 30720 + /* Define the default BSR convention. */ #ifdef BSR_ANYCRLF @@ -585,14 +565,15 @@ these tables. */ #define cbit_cntrl 288 /* [:cntrl:] */ #define cbit_length 320 /* Length of the cbits table */ -/* Bit definitions for entries in the ctypes table. */ +/* Bit definitions for entries in the ctypes table. Do not change these values +without checking pcre2_jit_compile.c, which has an assertion to ensure that +ctype_word has the value 16. */ #define ctype_space 0x01 #define ctype_letter 0x02 #define ctype_digit 0x04 -#define ctype_xdigit 0x08 +#define ctype_xdigit 0x08 /* not actually used any more */ #define ctype_word 0x10 /* alphanumeric or '_' */ -#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */ /* Offsets of the various tables from the base tables pointer, and total length of the tables. */ @@ -1267,36 +1248,6 @@ contain characters with values greater than 255. */ #define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ #define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ -/* Escape items that are just an encoding of a particular data value. These -appear in the escapes[] table in pcre2_compile.c as positive numbers. */ - -#ifndef ESC_a -#define ESC_a CHAR_BEL -#endif - -#ifndef ESC_e -#define ESC_e CHAR_ESC -#endif - -#ifndef ESC_f -#define ESC_f CHAR_FF -#endif - -#ifndef ESC_n -#define ESC_n CHAR_LF -#endif - -#ifndef ESC_r -#define ESC_r CHAR_CR -#endif - -/* We can't officially use ESC_t because it is a POSIX reserved identifier -(presumably because of all the others like size_t). */ - -#ifndef ESC_tee -#define ESC_tee CHAR_HT -#endif - /* These are escaped items that aren't just an encoding of a particular data value such as \n. They must have non-zero values, as check_escape() returns 0 for a data character. In the escapes[] table in pcre2_compile.c their values @@ -1578,23 +1529,26 @@ enum { OP_THEN, /* 155 */ OP_THEN_ARG, /* 156 same, but with argument */ OP_COMMIT, /* 157 */ + OP_COMMIT_ARG, /* 158 same, but with argument */ - /* These are forced failure and success verbs */ + /* These are forced failure and success verbs. FAIL and ACCEPT do accept an + argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL) + without the need for a special opcode. */ - OP_FAIL, /* 158 */ - OP_ACCEPT, /* 159 */ - OP_ASSERT_ACCEPT, /* 160 Used inside assertions */ - OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */ + OP_FAIL, /* 159 */ + OP_ACCEPT, /* 160 */ + OP_ASSERT_ACCEPT, /* 161 Used inside assertions */ + OP_CLOSE, /* 162 Used before OP_ACCEPT to close open captures */ /* This is used to skip a subpattern with a {0} quantifier */ - OP_SKIPZERO, /* 162 */ + OP_SKIPZERO, /* 163 */ /* This is used to identify a DEFINE group during compilation so that it can be checked for having only one branch. It is changed to OP_FALSE before compilation finishes. */ - OP_DEFINE, /* 163 */ + OP_DEFINE, /* 164 */ /* This is not an opcode, but is used to check that tables indexed by opcode are the correct length, in order to catch updating errors - there have been @@ -1650,7 +1604,7 @@ some cases doesn't actually use these names at all). */ "Cond false", "Cond true", \ "Brazero", "Braminzero", "Braposzero", \ "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \ - "*THEN", "*THEN", "*COMMIT", "*FAIL", \ + "*THEN", "*THEN", "*COMMIT", "*COMMIT", "*FAIL", \ "*ACCEPT", "*ASSERT_ACCEPT", \ "Close", "Skip zero", "Define" @@ -1742,7 +1696,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \ 1, 3, /* SKIP, SKIP_ARG */ \ 1, 3, /* THEN, THEN_ARG */ \ - 1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \ + 1, 3, /* COMMIT, COMMIT_ARG */ \ + 1, 1, 1, /* FAIL, ACCEPT, ASSERT_ACCEPT */ \ 1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \ 1 /* DEFINE */ @@ -1896,7 +1851,7 @@ extern const ucd_record PRIV(ucd_records)[]; #if PCRE2_CODE_UNIT_WIDTH == 32 extern const ucd_record PRIV(dummy_ucd_record)[]; #endif -extern const uint8_t PRIV(ucd_stage1)[]; +extern const uint16_t PRIV(ucd_stage1)[]; extern const uint16_t PRIV(ucd_stage2)[]; extern const uint32_t PRIV(ucp_gbtable)[]; extern const uint32_t PRIV(ucp_gentype)[]; @@ -1976,6 +1931,14 @@ extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *); extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *, BOOL); extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL); + +/* This function is needed only when memmove() is not available. */ + +#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE) +#define _pcre2_memmove PCRE2_SUFFIX(_pcre2_memmove) +extern void * _pcre2_memmove(void *, const void *, size_t); +#endif + #endif /* PCRE2_CODE_UNIT_WIDTH */ #endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */ diff --git a/src/3rdparty/pcre2/src/pcre2_intmodedep.h b/src/3rdparty/pcre2/src/pcre2_intmodedep.h index c4c4c3adb9..62626d0a8a 100644 --- a/src/3rdparty/pcre2/src/pcre2_intmodedep.h +++ b/src/3rdparty/pcre2/src/pcre2_intmodedep.h @@ -793,11 +793,23 @@ typedef struct heapframe { uint8_t return_id; /* Where to go on in internal "return" */ uint8_t op; /* Processing opcode */ + /* At this point, the structure is 16-bit aligned. On most architectures + the alignment requirement for a pointer will ensure that the eptr field below + is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer + that is 16-bit aligned. We must therefore ensure that what comes between here + and eptr is an odd multiple of 16 bits so as to get back into 32-bit + alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs + fudges in the other cases. In the 32-bit case the padding comes first so that + the occu field itself is 32-bit aligned. Without the padding, this structure + is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */ + #if PCRE2_CODE_UNIT_WIDTH == 8 PCRE2_UCHAR occu[6]; /* Used for other case code units */ #elif PCRE2_CODE_UNIT_WIDTH == 16 PCRE2_UCHAR occu[2]; /* Used for other case code units */ + uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */ #else + uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */ PCRE2_UCHAR occu[1]; /* Used for other case code units */ #endif @@ -818,6 +830,9 @@ typedef struct heapframe { PCRE2_SIZE ovector[131072]; /* Must be last in the structure */ } heapframe; +/* This typedef is a check that the size of the heapframe structure is a +multiple of PCRE2_SIZE. See various comments above. */ + typedef char check_heapframe_size[ ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)]; @@ -881,6 +896,8 @@ typedef struct dfa_match_block { PCRE2_SPTR last_used_ptr; /* Latest consulted character */ const uint8_t *tables; /* Character tables */ PCRE2_SIZE start_offset; /* The start offset value */ + PCRE2_SIZE heap_limit; /* As it says */ + PCRE2_SIZE heap_used; /* As it says */ uint32_t match_limit; /* As it says */ uint32_t match_limit_depth; /* As it says */ uint32_t match_call_count; /* Number of calls of internal function */ diff --git a/src/3rdparty/pcre2/src/pcre2_jit_compile.c b/src/3rdparty/pcre2/src/pcre2_jit_compile.c index 80ed1c4ca6..32e985b793 100644 --- a/src/3rdparty/pcre2/src/pcre2_jit_compile.c +++ b/src/3rdparty/pcre2/src/pcre2_jit_compile.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2017 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -839,6 +839,7 @@ switch(*cc) #endif case OP_MARK: + case OP_COMMIT_ARG: case OP_PRUNE_ARG: case OP_SKIP_ARG: case OP_THEN_ARG: @@ -939,6 +940,7 @@ while (cc < ccend) common->control_head_ptr = 1; /* Fall through. */ + case OP_COMMIT_ARG: case OP_PRUNE_ARG: case OP_MARK: if (common->mark_ptr == 0) @@ -1553,6 +1555,7 @@ while (cc < ccend) break; case OP_MARK: + case OP_COMMIT_ARG: case OP_PRUNE_ARG: case OP_THEN_ARG: SLJIT_ASSERT(common->mark_ptr != 0); @@ -1733,6 +1736,7 @@ while (cc < ccend) break; case OP_MARK: + case OP_COMMIT_ARG: case OP_PRUNE_ARG: case OP_THEN_ARG: SLJIT_ASSERT(common->mark_ptr != 0); @@ -2041,6 +2045,7 @@ while (cc < ccend) break; case OP_MARK: + case OP_COMMIT_ARG: case OP_PRUNE_ARG: case OP_THEN_ARG: SLJIT_ASSERT(common->mark_ptr != 0); @@ -2428,6 +2433,7 @@ while (cc < ccend) break; case OP_MARK: + case OP_COMMIT_ARG: case OP_PRUNE_ARG: case OP_THEN_ARG: SLJIT_ASSERT(common->mark_ptr != 0); @@ -3666,7 +3672,8 @@ if (!common->utf) #endif OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); -OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); +OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); @@ -5894,6 +5901,8 @@ for (i = 0; i < 32; i++) } } +if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */ + i = 0; j = 0; @@ -6627,7 +6636,8 @@ if (needstype || needsscript) #endif OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); - OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); + OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); @@ -7254,10 +7264,11 @@ while (cc < end_subject) if ((ricount & 1) != 0) break; /* Grapheme break required */ } - /* If Extend follows E_Base[_GAZ] do not update lgb; this allows - any number of Extend before a following E_Modifier. */ + /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this + allows any number of them before a following Extended_Pictographic. */ - if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ)) + if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || + lgb != ucp_gbExtended_Pictographic) lgb = rgb; prevcc = cc; @@ -7309,10 +7320,11 @@ while (cc < end_subject) if ((ricount & 1) != 0) break; /* Grapheme break required */ } - /* If Extend follows E_Base[_GAZ] do not update lgb; this allows - any number of Extend before a following E_Modifier. */ + /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this + allows any number of them before a following Extended_Pictographic. */ - if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ)) + if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || + lgb != ucp_gbExtended_Pictographic) lgb = rgb; cc++; @@ -10346,7 +10358,8 @@ backtrack_common *backtrack; PCRE2_UCHAR opcode = *cc; PCRE2_SPTR ccend = cc + 1; -if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG) +if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || + opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG) ccend += 2 + cc[1]; PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); @@ -10358,7 +10371,7 @@ if (opcode == OP_SKIP) return ccend; } -if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG) +if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG) { OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); @@ -10677,6 +10690,7 @@ while (cc < ccend) case OP_THEN: case OP_THEN_ARG: case OP_COMMIT: + case OP_COMMIT_ARG: cc = compile_control_verb_matchingpath(common, cc, parent); break; @@ -11751,6 +11765,7 @@ while (current) break; case OP_COMMIT: + case OP_COMMIT_ARG: if (!common->local_quit_available) OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); if (common->quit_label == NULL) diff --git a/src/3rdparty/pcre2/src/pcre2_maketables.c b/src/3rdparty/pcre2/src/pcre2_maketables.c index 2c7ae84d86..537edba8c3 100644 --- a/src/3rdparty/pcre2/src/pcre2_maketables.c +++ b/src/3rdparty/pcre2/src/pcre2_maketables.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -141,13 +141,6 @@ for (i = 0; i < 256; i++) if (isdigit(i)) x += ctype_digit; if (isxdigit(i)) x += ctype_xdigit; if (isalnum(i) || i == '_') x += ctype_word; - - /* Note: strchr includes the terminating zero in the characters it considers. - In this instance, that is ok because we want binary zero to be flagged as a - meta-character, which in this sense is any character that terminates a run - of data characters. */ - - if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta; *p++ = x; } diff --git a/src/3rdparty/pcre2/src/pcre2_match.c b/src/3rdparty/pcre2/src/pcre2_match.c index 79cc93f918..8741e1432d 100644 --- a/src/3rdparty/pcre2/src/pcre2_match.c +++ b/src/3rdparty/pcre2/src/pcre2_match.c @@ -43,11 +43,11 @@ POSSIBILITY OF SUCH DAMAGE. #include "config.h" #endif -/* These defines enables debugging code */ +/* These defines enable debugging code */ -//#define DEBUG_FRAMES_DISPLAY -//#define DEBUG_SHOW_OPS -//#define DEBUG_SHOW_RMATCH +/* #define DEBUG_FRAMES_DISPLAY */ +/* #define DEBUG_SHOW_OPS */ +/* #define DEBUG_SHOW_RMATCH */ #ifdef DEBUG_FRAME_DISPLAY #include @@ -149,7 +149,7 @@ changed, the code at RETURN_SWITCH below must be updated in sync. */ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10, RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20, RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30, - RM31, RM32, RM33, RM34, RM35 }; + RM31, RM32, RM33, RM34, RM35, RM36 }; #ifdef SUPPORT_WIDE_CHARS enum { RM100=100, RM101 }; @@ -770,7 +770,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); /* ===================================================================== */ /* Real or forced end of the pattern, assertion, or recursion. In an assertion ACCEPT, update the last used pointer and remember the current - frame so that the captures can be fished out of it. */ + frame so that the captures and mark can be fished out of it. */ case OP_ASSERT_ACCEPT: if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; @@ -1776,7 +1776,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); /* ===================================================================== */ - /* Match a bit-mapped character class, possibly repeatedly. These op codes + /* Match a bit-mapped character class, possibly repeatedly. These opcodes are used when all the characters in the class have values in the range 0-255, and either the matching is caseful, or the characters are in the range 0-127 when UTF processing is enabled. The only difference between @@ -1962,11 +1962,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode); if (reptype == REPTYPE_POS) continue; /* No backtracking */ + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + for (;;) { RMATCH(Fecode, RM201); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (Feptr-- == Lstart_eptr) break; /* Tried at original position */ + if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */ BACKCHAR(Feptr); } } @@ -2126,11 +2130,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode); if (reptype == REPTYPE_POS) continue; /* No backtracking */ + /* After \C in UTF mode, Lstart_eptr might be in the middle of a + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ + for(;;) { RMATCH(Fecode, RM101); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (Feptr-- == Lstart_eptr) break; /* Tried at original position */ + if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */ #ifdef SUPPORT_UNICODE if (utf) BACKCHAR(Feptr); #endif @@ -2456,7 +2464,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); /* ===================================================================== */ /* Match a single character type repeatedly. Note that the property type - does not need to be in a stack frame as it not used within an RMATCH() + does not need to be in a stack frame as it is not used within an RMATCH() loop. */ #define Lstart_eptr F->temp_sptr[0] @@ -4002,8 +4010,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode); if (reptype == REPTYPE_POS) continue; /* No backtracking */ /* After \C in UTF mode, Lstart_eptr might be in the middle of a - Unicode character. Use <= pp to ensure backtracking doesn't go too far. - */ + Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't + go too far. */ for(;;) { @@ -4135,7 +4143,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } break; - /* The "byte" (i.e. "code unit") case is the same as non-UTF */ + /* The "byte" (i.e. "code unit") case is the same as non-UTF */ case OP_ANYBYTE: fc = Lmax - Lmin; @@ -5111,7 +5119,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); /* Positive assertions are like other groups except that PCRE doesn't allow the effect of (*THEN) to escape beyond an assertion; it is therefore treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its - captures retained. Any other return is an error. */ + captures and mark retained. Any other return is an error. */ #define Lframe_type F->temp_32[0] @@ -5128,6 +5136,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); (char *)assert_accept_frame + offsetof(heapframe, ovector), assert_accept_frame->offset_top * sizeof(PCRE2_SIZE)); Foffset_top = assert_accept_frame->offset_top; + Fmark = assert_accept_frame->mark; break; } if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); @@ -5416,7 +5425,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); Feptr -= number; } - /* Save the earliest consulted character, then skip to next op code */ + /* Save the earliest consulted character, then skip to next opcode */ if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr; Fecode += 1 + LINK_SIZE; @@ -5501,7 +5510,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); frame so that it points to the final branch. */ case OP_ONCE: - Fback_frame = ((char *)F - (char *)P) + frame_size; + Fback_frame = ((char *)F - (char *)P); for (;;) { uint32_t y = GET(P->ecode,1); @@ -5829,6 +5838,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode); mb->verb_current_recurse = Fcurrent_recurse; RRETURN(MATCH_COMMIT); + case OP_COMMIT_ARG: + Fmark = mb->nomatch_mark = Fecode + 2; + RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + mb->verb_current_recurse = Fcurrent_recurse; + RRETURN(MATCH_COMMIT); + case OP_PRUNE: RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14); if (rrc != MATCH_NOMATCH) RRETURN(rrc); @@ -5921,7 +5937,7 @@ in rrc. */ RETURN_SWITCH: if (Frdepth == 0) return rrc; /* Exit from the top level */ -F = (heapframe *)((char *)F - Fback_frame); /* Back track */ +F = (heapframe *)((char *)F - Fback_frame); /* Backtrack */ mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */ #ifdef DEBUG_SHOW_RMATCH @@ -5934,7 +5950,7 @@ switch (Freturn_id) LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16) LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24) LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32) - LBL(33) LBL(34) LBL(35) + LBL(33) LBL(34) LBL(35) LBL(36) #ifdef SUPPORT_WIDE_CHARS LBL(100) LBL(101) @@ -6275,7 +6291,7 @@ mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)? /* If a pattern has very many capturing parentheses, the frame size may be very large. Ensure that there are at least 10 available frames by getting an initial vector on the heap if necessary, except when the heap limit prevents this. Get -fewer if possible. (The heap limit is in kilobytes.) */ +fewer if possible. (The heap limit is in kibibytes.) */ if (frame_size <= START_FRAMES_SIZE/10) { diff --git a/src/3rdparty/pcre2/src/pcre2_pattern_info.c b/src/3rdparty/pcre2/src/pcre2_pattern_info.c index 906e9198f5..a29f5eff67 100644 --- a/src/3rdparty/pcre2/src/pcre2_pattern_info.c +++ b/src/3rdparty/pcre2/src/pcre2_pattern_info.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2017 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -390,6 +390,7 @@ while (TRUE) #endif case OP_MARK: + case OP_COMMIT_ARG: case OP_PRUNE_ARG: case OP_SKIP_ARG: case OP_THEN_ARG: diff --git a/src/3rdparty/pcre2/src/pcre2_serialize.c b/src/3rdparty/pcre2/src/pcre2_serialize.c index d2cc603cbb..cec1a035d1 100644 --- a/src/3rdparty/pcre2/src/pcre2_serialize.c +++ b/src/3rdparty/pcre2/src/pcre2_serialize.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2017 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -127,7 +127,25 @@ dst_bytes += tables_length; for (i = 0; i < number_of_codes; i++) { re = (const pcre2_real_code *)(codes[i]); - memcpy(dst_bytes, (char *)re, re->blocksize); + (void)memcpy(dst_bytes, (char *)re, re->blocksize); + + /* Certain fields in the compiled code block are re-set during + deserialization. In order to ensure that the serialized data stream is always + the same for the same pattern, set them to zero here. We can't assume the + copy of the pattern is correctly aligned for accessing the fields as part of + a structure. Note the use of sizeof(void *) in the second of these, to + specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a + pointer to uint8_t), gcc gives a warning because the first argument is also a + pointer to uint8_t. Casting the first argument to (void *) can stop this, but + it didn't stop Coverity giving the same complaint. */ + + (void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0, + sizeof(pcre2_memctl)); + (void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0, + sizeof(void *)); + (void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0, + sizeof(void *)); + dst_bytes += re->blocksize; } diff --git a/src/3rdparty/pcre2/src/pcre2_string_utils.c b/src/3rdparty/pcre2/src/pcre2_string_utils.c index 2a1f282629..d6be01acf5 100644 --- a/src/3rdparty/pcre2/src/pcre2_string_utils.c +++ b/src/3rdparty/pcre2/src/pcre2_string_utils.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -50,6 +50,42 @@ functions work only on 8-bit data. */ #include "pcre2_internal.h" +/************************************************* +* Emulated memmove() for systems without it * +*************************************************/ + +/* This function can make use of bcopy() if it is available. Otherwise do it by +steam, as there some non-Unix environments that lack both memmove() and +bcopy(). */ + +#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE) +void * +PRIV(memmove)(void *d, const void *s, size_t n) +{ +#ifdef HAVE_BCOPY +bcopy(s, d, n); +return d; +#else +size_t i; +unsigned char *dest = (unsigned char *)d; +const unsigned char *src = (const unsigned char *)s; +if (dest > src) + { + dest += n; + src += n; + for (i = 0; i < n; ++i) *(--dest) = *(--src); + return (void *)dest; + } +else + { + for (i = 0; i < n; ++i) *dest++ = *src++; + return (void *)(dest - n); + } +#endif /* not HAVE_BCOPY */ +} +#endif /* not VPCOMPAT && not HAVE_MEMMOVE */ + + /************************************************* * Compare two zero-terminated PCRE2 strings * *************************************************/ diff --git a/src/3rdparty/pcre2/src/pcre2_study.c b/src/3rdparty/pcre2/src/pcre2_study.c index b92686759d..acbf98b41b 100644 --- a/src/3rdparty/pcre2/src/pcre2_study.c +++ b/src/3rdparty/pcre2/src/pcre2_study.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2017 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -707,6 +707,7 @@ for (;;) /* Skip these, but we need to add in the name length. */ case OP_MARK: + case OP_COMMIT_ARG: case OP_PRUNE_ARG: case OP_SKIP_ARG: case OP_THEN_ARG: @@ -956,6 +957,7 @@ do case OP_CIRCM: case OP_CLOSE: case OP_COMMIT: + case OP_COMMIT_ARG: case OP_COND: case OP_CREF: case OP_FALSE: @@ -1274,7 +1276,7 @@ do break; /* Single character types set the bits and stop. Note that if PCRE2_UCP - is set, we do not see these op codes because \d etc are converted to + is set, we do not see these opcodes because \d etc are converted to properties. Therefore, these apply in the case when only characters less than 256 are recognized to match the types. */ diff --git a/src/3rdparty/pcre2/src/pcre2_substitute.c b/src/3rdparty/pcre2/src/pcre2_substitute.c index 8da951fc6e..ab8d10908a 100644 --- a/src/3rdparty/pcre2/src/pcre2_substitute.c +++ b/src/3rdparty/pcre2/src/pcre2_substitute.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -238,10 +238,12 @@ PCRE2_SPTR repend; PCRE2_SIZE extra_needed = 0; PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength; PCRE2_SIZE *ovector; +PCRE2_SIZE ovecsave[3]; buff_offset = 0; lengthleft = buff_length = *blength; *blength = PCRE2_UNSET; +ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET; /* Partial matching is not valid. */ @@ -361,13 +363,33 @@ do } /* Handle a successful match. Matches that use \K to end before they start - are not supported. */ - - if (ovector[1] < ovector[0]) + or start before the current point in the subject are not supported. */ + + if (ovector[1] < ovector[0] || ovector[0] < start_offset) { rc = PCRE2_ERROR_BADSUBSPATTERN; goto EXIT; } + + /* Check for the same match as previous. This is legitimate after matching an + empty string that starts after the initial match offset. We have tried again + at the match point in case the pattern is one like /(?<=\G.)/ which can never + match at its starting point, so running the match achieves the bumpalong. If + we do get the same (null) match at the original match point, it isn't such a + pattern, so we now do the empty string magic. In all other cases, a repeat + match should never occur. */ + + if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1]) + { + if (ovector[0] == ovector[1] && ovecsave[2] != start_offset) + { + goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + ovecsave[2] = start_offset; + continue; /* Back to the top of the loop */ + } + rc = PCRE2_ERROR_INTERNAL_DUPMATCH; + goto EXIT; + } /* Count substitutions with a paranoid check for integer overflow; surely no real call to this function would ever hit this! */ @@ -799,13 +821,18 @@ do } /* End handling a literal code unit */ } /* End of loop for scanning the replacement. */ - /* The replacement has been copied to the output. Update the start offset to - point to the rest of the subject string. If we matched an empty string, - do the magic for global matches. */ - - start_offset = ovector[1]; - goptions = (ovector[0] != ovector[1])? 0 : + /* The replacement has been copied to the output. Save the details of this + match. See above for how this data is used. If we matched an empty string, do + the magic for global matches. Finally, update the start offset to point to + the rest of the subject string. */ + + ovecsave[0] = ovector[0]; + ovecsave[1] = ovector[1]; + ovecsave[2] = start_offset; + + goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 : PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART; + start_offset = ovector[1]; } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */ /* Copy the rest of the subject. */ diff --git a/src/3rdparty/pcre2/src/pcre2_tables.c b/src/3rdparty/pcre2/src/pcre2_tables.c index 9f8dc293aa..83d6f9de55 100644 --- a/src/3rdparty/pcre2/src/pcre2_tables.c +++ b/src/3rdparty/pcre2/src/pcre2_tables.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2017 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -137,9 +137,10 @@ const uint32_t PRIV(ucp_gentype)[] = { /* This table encodes the rules for finding the end of an extended grapheme cluster. Every code point has a grapheme break property which is one of the -ucp_gbXX values defined in pcre2_ucp.h. The 2-dimensional table is indexed by -the properties of two adjacent code points. The left property selects a word -from the table, and the right property selects a bit from that word like this: +ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions +10 and 11. The 2-dimensional table is indexed by the properties of two adjacent +code points. The left property selects a word from the table, and the right +property selects a bit from that word like this: PRIV(ucp_gbtable)[left-property] & (1 << right-property) @@ -166,49 +167,41 @@ are implementing). 6. Do not break after Prepend characters. -7. Do not break within emoji modifier sequences (E_Base or E_Base_GAZ followed - by E_Modifier). Extend characters are allowed before the modifier; this - cannot be represented in this table, the code has to deal with it. +7. Do not break within emoji modifier sequences or emoji zwj sequences. That + is, do not break between characters with the Extended_Pictographic property. + Extend and ZWJ characters are allowed between the characters; this cannot be + represented in this table, the code has to deal with it. -8. Do not break within emoji zwj sequences (ZWJ followed by Glue_After_Zwj or - E_Base_GAZ). - -9. Do not break within emoji flag sequences. That is, do not break between +8. Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there are an odd number of RI characters before the break point. This table encodes "join RI characters"; the code has to deal with checking for previous adjoining RIs. -10. Otherwise, break everywhere. +9. Otherwise, break everywhere. */ #define ESZ (1<= 7 && defined(_M_ARMT)) || defined(__thumb2__) +#define SLJIT_CONFIG_ARM_THUMB2 1 +#elif (defined(_M_ARM) && _M_ARM >= 7) +#define SLJIT_CONFIG_ARM_V7 1 #elif defined(_ARM_) #define SLJIT_CONFIG_ARM_V5 1 +#elif defined(_M_ARM64) || defined(__aarch64__) +#define SLJIT_CONFIG_ARM_64 1 #else #define SLJIT_CONFIG_X86_32 1 #endif -#endif /* !WIN32 */ +#endif /* !_WIN32 */ #endif /* SLJIT_CONFIG_AUTO */ #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) @@ -324,6 +330,11 @@ sparc_cache_flush((from), (to)) #define SLJIT_CACHE_FLUSH_OWN_IMPL 1 +#elif defined _WIN32 + +#define SLJIT_CACHE_FLUSH(from, to) \ + FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from)) + #else /* Calls __ARM_NR_cacheflush on ARM-Linux. */ @@ -371,12 +382,18 @@ typedef int sljit_sw; #define SLJIT_64BIT_ARCHITECTURE 1 #define SLJIT_WORD_SHIFT 3 #ifdef _WIN32 +#ifdef __GNUC__ +/* These types do not require windows.h */ +typedef unsigned long long sljit_uw; +typedef long long sljit_sw; +#else typedef unsigned __int64 sljit_uw; typedef __int64 sljit_sw; -#else +#endif +#else /* !_WIN32 */ typedef unsigned long int sljit_uw; typedef long int sljit_sw; -#endif +#endif /* _WIN32 */ #endif typedef sljit_uw sljit_p; @@ -590,7 +607,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_REGISTERS 26 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 -#define SLJIT_LOCALS_OFFSET_BASE (2 * sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE 0 #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) diff --git a/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c b/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c index f5009788f6..7c18578618 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c +++ b/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c @@ -99,7 +99,14 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size) void *retval; #ifdef MAP_ANON - retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); + + int flags = MAP_PRIVATE | MAP_ANON; + +#ifdef MAP_JIT + flags |= MAP_JIT; +#endif + + retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0); #else if (dev_zero < 0) { if (open_dev_zero()) diff --git a/src/3rdparty/pcre2/src/sljit/sljitLir.c b/src/3rdparty/pcre2/src/sljit/sljitLir.c index 5e435f0154..5bdddc10cf 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitLir.c +++ b/src/3rdparty/pcre2/src/sljit/sljitLir.c @@ -26,6 +26,13 @@ #include "sljitLir.h" +#ifdef _WIN32 + +/* For SLJIT_CACHE_FLUSH, which can expand to FlushInstructionCache. */ +#include + +#endif /* _WIN32 */ + #if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) /* These libraries are needed for the macros below. */ @@ -2178,7 +2185,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil #endif -#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) { diff --git a/src/3rdparty/pcre2/src/sljit/sljitLir.h b/src/3rdparty/pcre2/src/sljit/sljitLir.h index 920f6d4f78..e71890cf7b 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitLir.h +++ b/src/3rdparty/pcre2/src/sljit/sljitLir.h @@ -138,7 +138,7 @@ of sljitConfigInternal.h */ be specified as scratch registers and the fifth one as saved register on the CPU above and any user code which requires four scratch registers can run unmodified. The SLJIT compiler automatically saves - the content of the two extra scrath register on the stack. Scratch + the content of the two extra scratch register on the stack. Scratch registers can also be preserved by saving their value on the stack but this needs to be done manually. @@ -746,7 +746,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler be mixed. The only exception is SLJIT_MOV32 and SLJIT_MOVU32 whose source register can hold any 32 or 64 bit value, and it is converted to a 32 bit compatible format first. This conversion is free (no instructions are - emitted) on most CPUs. A 32 bit value can also be coverted to a 64 bit + emitted) on most CPUs. A 32 bit value can also be converted to a 64 bit value by SLJIT_MOV_S32 (sign extension) or SLJIT_MOV_U32 (zero extension). Note: memory addressing always uses 64 bit values on 64 bit systems so @@ -773,8 +773,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler */ #define SLJIT_F32_OP SLJIT_I32_OP -/* Many CPUs (x86, ARM, PPC) has status flags which can be set according - to the result of an operation. Other CPUs (MIPS) does not have status +/* Many CPUs (x86, ARM, PPC) have status flags which can be set according + to the result of an operation. Other CPUs (MIPS) do not have status flags, and results must be stored in registers. To cover both architecture types efficiently only two flags are defined by SLJIT: @@ -810,14 +810,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler Using these flags can reduce the number of emitted instructions. E.g. a fast loop can be implemented by decreasing a counter register and set the - zero flag to jump back if the counter register is not reached zero. + zero flag to jump back if the counter register has not reached zero. Motivation: although CPUs can set a large number of flags, usually their values are ignored or only one of them is used. Emulating a large number of flags on systems without flag register is complicated so SLJIT instructions must specify the flag they want to use and only that flag will be emulated. The last arithmetic instruction can be repeated if - multiple flags needs to be checked. + multiple flags need to be checked. */ /* Set Zero status flag. */ @@ -884,7 +884,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile /* Starting index of opcodes for sljit_emit_op1. */ #define SLJIT_OP1_BASE 32 -/* The MOV instruction transfer data from source to destination. +/* The MOV instruction transfers data from source to destination. MOV instruction suffixes: @@ -1156,7 +1156,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi #define SLJIT_FAST_CALL 25 /* Called function must be declared with the SLJIT_FUNC attribute. */ #define SLJIT_CALL 26 - /* Called function must be decalred with cdecl attribute. + /* Called function must be declared with cdecl attribute. This is the default attribute for C functions. */ #define SLJIT_CALL_CDECL 27 @@ -1210,7 +1210,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sl /* Set the destination address of the jump to this label. */ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target); -/* Emit an indirect jump or fast call. Both direct and indirect form +/* Emit an indirect jump or fast call. Direct form: set src to SLJIT_IMM() and srcw to the address Indirect form: any other valid addressing mode type must be between SLJIT_JUMP and SLJIT_FAST_CALL @@ -1274,7 +1274,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil #define SLJIT_MEM_POST 0x1000 /* Emit a single memory load or store with update instruction. When the - requested instruction from is not supported by the CPU, it returns + requested instruction form is not supported by the CPU, it returns with SLJIT_ERR_UNSUPPORTED instead of emulating the instruction. This allows specializing tight loops based on the supported instruction forms (see SLJIT_MEM_SUPP flag). diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c index 8a437bd6a0..27af741487 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c @@ -37,14 +37,14 @@ typedef sljit_u32 sljit_ins; #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) #define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4) -#define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 5) +#define TMP_FP (SLJIT_NUMBER_OF_REGISTERS + 5) #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) /* r18 - platform register, currently not used */ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { - 31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 30, 31 + 31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 31, 9, 10, 30, 29 }; static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { @@ -68,6 +68,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define ADC 0x9a000000 #define ADD 0x8b000000 +#define ADDE 0x8b200000 #define ADDI 0x91000000 #define AND 0x8a000000 #define ANDI 0x92000000 @@ -96,7 +97,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define FSUB 0x1e603800 #define LDRI 0xf9400000 #define LDP 0xa9400000 -#define LDP_PST 0xa8c00000 +#define LDP_PRE 0xa9c00000 +#define LDR_PRE 0xf8400c00 #define LSLV 0x9ac02000 #define LSRV 0x9ac02400 #define MADD 0x9b000000 @@ -873,73 +875,51 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0); - local_size += saved_regs_size + SLJIT_LOCALS_OFFSET; + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + if (saved_regs_size & 0x8) + saved_regs_size += sizeof(sljit_sw); + local_size = (local_size + 15) & ~0xf; - compiler->local_size = local_size; - - if (local_size <= (63 * sizeof(sljit_sw))) { - FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) - | RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15))); - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10))); - offs = (local_size - saved_regs_size) << (15 - 3); - } else { - offs = 0 << 15; - if (saved_regs_size & 0x8) { - offs = 1 << 15; - saved_regs_size += sizeof(sljit_sw); - } - local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET; - if (saved_regs_size > 0) - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10))); - } + compiler->local_size = local_size + saved_regs_size; + + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15))); + +#ifdef _WIN32 + if (local_size >= 4096) + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + else if (local_size > 256) + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (local_size << 10))); +#endif tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; prev = -1; + offs = 2 << 15; for (i = SLJIT_S0; i >= tmp; i--) { if (prev == -1) { - if (!(offs & (1 << 15))) { - prev = i; - continue; - } - FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); - offs += 1 << 15; + prev = i; continue; } - FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); offs += 2 << 15; prev = -1; } for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { if (prev == -1) { - if (!(offs & (1 << 15))) { - prev = i; - continue; - } - FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); - offs += 1 << 15; + prev = i; continue; } - FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); offs += 2 << 15; prev = -1; } - SLJIT_ASSERT(prev == -1); + if (prev != -1) + FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5))); - if (compiler->local_size > (63 * sizeof(sljit_sw))) { - /* The local_size is already adjusted by the saved registers. */ - if (local_size > 0xfff) { - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22))); - local_size &= 0xfff; - } - if (local_size) - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); - FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) - | RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15))); - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10))); - } + + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10))); args = get_arg_count(arg_types); @@ -950,6 +930,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi if (args >= 3) FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2))); +#ifdef _WIN32 + if (local_size >= 4096) { + if (local_size < 4 * 4096) { + /* No need for a loop. */ + if (local_size >= 2 * 4096) { + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22))); + local_size -= 4096; + } + + if (local_size >= 2 * 4096) { + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22))); + local_size -= 4096; + } + + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); + local_size -= 4096; + } + else { + FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG2) | (((local_size >> 12) - 1) << 5))); + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22))); + FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG2) | RN(TMP_REG2) | (1 << 10))); + FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */)); + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); + + local_size &= 0xfff; + } + + if (local_size > 256) { + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (local_size << 10))); + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); + } + else if (local_size > 0) + FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(TMP_REG1) | ((-local_size & 0x1ff) << 12))); + + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10))); + } + else if (local_size > 256) { + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10))); + } + else if (local_size > 0) + FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(SLJIT_SP) | ((-local_size & 0x1ff) << 12))); + +#else /* !_WIN32 */ + + /* The local_size does not include saved registers size. */ + if (local_size > 0xfff) { + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22))); + local_size &= 0xfff; + } + if (local_size != 0) + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10))); + +#endif /* _WIN32 */ + return SLJIT_SUCCESS; } @@ -957,13 +995,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { + sljit_s32 saved_regs_size; + CHECK_ERROR(); CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET; - local_size = (local_size + 15) & ~0xf; - compiler->local_size = local_size; + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + if (saved_regs_size & 0x8) + saved_regs_size += sizeof(sljit_sw); + + compiler->local_size = saved_regs_size + ((local_size + 15) & ~0xf); return SLJIT_SUCCESS; } @@ -977,71 +1019,59 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - local_size = compiler->local_size; + saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 2); + if (saved_regs_size & 0x8) + saved_regs_size += sizeof(sljit_sw); - saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0); - if (local_size <= (63 * sizeof(sljit_sw))) - offs = (local_size - saved_regs_size) << (15 - 3); + local_size = compiler->local_size - saved_regs_size; + + /* Load LR as early as possible. */ + if (local_size == 0) + FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + else if (local_size < 63 * sizeof(sljit_sw)) { + FAIL_IF(push_inst(compiler, LDP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (local_size << (15 - 3)))); + } else { - FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) - | RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15))); - offs = 0 << 15; - if (saved_regs_size & 0x8) { - offs = 1 << 15; - saved_regs_size += sizeof(sljit_sw); - } - local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET; if (local_size > 0xfff) { - FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22))); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22))); local_size &= 0xfff; } if (local_size) - FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10))); + + FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); } tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; prev = -1; + offs = 2 << 15; for (i = SLJIT_S0; i >= tmp; i--) { if (prev == -1) { - if (!(offs & (1 << 15))) { - prev = i; - continue; - } - FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); - offs += 1 << 15; + prev = i; continue; } - FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); offs += 2 << 15; prev = -1; } for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { if (prev == -1) { - if (!(offs & (1 << 15))) { - prev = i; - continue; - } - FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); - offs += 1 << 15; + prev = i; continue; } - FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); offs += 2 << 15; prev = -1; } - SLJIT_ASSERT(prev == -1); + if (prev != -1) + FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5))); - if (compiler->local_size <= (63 * sizeof(sljit_sw))) { - FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) - | RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15))); - } else if (saved_regs_size > 0) { - FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10))); - } - - FAIL_IF(push_inst(compiler, RET | RN(TMP_LR))); - return SLJIT_SUCCESS; + /* These two can be executed in parallel. */ + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (saved_regs_size << 10))); + return push_inst(compiler, RET | RN(TMP_LR)); } /* --------------------------------------------------------------------- */ @@ -1856,6 +1886,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12)); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) +{ + sljit_s32 dst_reg; + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); + + SLJIT_ASSERT (SLJIT_LOCALS_OFFSET_BASE == 0); + + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (offset <= 0xffffff && offset >= -0xffffff) { + ins = ADDI; + if (offset < 0) { + offset = -offset; + ins = SUBI; + } + + if (offset <= 0xfff) + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (offset << 10))); + else { + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | ((offset & 0xfff000) >> (12 - 10)) | (1 << 22))); + + offset &= 0xfff; + if (offset != 0) + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (offset << 10))); + } + } + else { + FAIL_IF(load_immediate (compiler, dst_reg, offset)); + /* Add extended register form. */ + FAIL_IF(push_inst(compiler, ADDE | (0x3 << 13) | RD(dst_reg) | RN(SLJIT_SP) | RM(dst_reg))); + } + + if (SLJIT_UNLIKELY(dst & SLJIT_MEM)) + return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1); + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { struct sljit_const *const_; diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c index 75e7a38b5f..d7024b6d7d 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c @@ -110,6 +110,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define ASRSI 0x1000 #define ASR_W 0xfa40f000 #define ASR_WI 0xea4f0020 +#define BCC 0xd000 #define BICI 0xf0200000 #define BKPT 0xbe00 #define BLX 0x4780 @@ -125,6 +126,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define EORS 0x4040 #define EOR_W 0xea800000 #define IT 0xbf00 +#define LDRI 0xf8500800 #define LSLS 0x4080 #define LSLSI 0x0000 #define LSL_W 0xfa00f000 @@ -158,6 +160,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SBCI 0xf1600000 #define SBCS 0x4180 #define SBC_W 0xeb600000 +#define SDIV 0xfb90f0f0 #define SMULL 0xfb800000 #define STR_SP 0x9000 #define SUBS 0x1a00 @@ -172,6 +175,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SXTH 0xb200 #define SXTH_W 0xfa0ff080 #define TST 0x4200 +#define UDIV 0xfbb0f0f0 #define UMULL 0xfba00000 #define UXTB 0xb2c0 #define UXTB_W 0xfa5ff080 @@ -339,8 +343,8 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw /* Really complex instruction form for branches. */ s = (diff >> 23) & 0x1; - j1 = (~(diff >> 21) ^ s) & 0x1; - j2 = (~(diff >> 22) ^ s) & 0x1; + j1 = (~(diff >> 22) ^ s) & 0x1; + j2 = (~(diff >> 21) ^ s) & 0x1; jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10); jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff); @@ -520,6 +524,8 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, { sljit_uw tmp; + /* MOVS cannot be used since it destroy flags. */ + if (imm >= 0x10000) { tmp = get_imm(imm); if (tmp != INVALID_IMM) @@ -1032,6 +1038,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi { sljit_s32 args, size, i, tmp; sljit_ins push = 0; +#ifdef _WIN32 + sljit_uw imm; +#endif CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); @@ -1052,12 +1061,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); local_size = ((size + local_size + 7) & ~7) - size; compiler->local_size = local_size; + +#ifdef _WIN32 + if (local_size >= 256) { + if (local_size > 4096) + imm = get_imm(4096); + else + imm = get_imm(local_size & ~0xff); + + SLJIT_ASSERT(imm != INVALID_IMM); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(SLJIT_SP) | imm)); + } +#else if (local_size > 0) { if (local_size <= (127 << 2)) FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2))); else FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size)); } +#endif args = get_arg_count(arg_types); @@ -1068,6 +1090,61 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi if (args >= 3) FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2))); +#ifdef _WIN32 + if (local_size >= 256) { + if (local_size > 4096) { + imm = get_imm(4096); + SLJIT_ASSERT(imm != INVALID_IMM); + + if (local_size < 4 * 4096) { + if (local_size > 2 * 4096) { + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); + local_size -= 4096; + } + + if (local_size > 2 * 4096) { + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); + local_size -= 4096; + } + + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); + local_size -= 4096; + + SLJIT_ASSERT(local_size > 0); + } + else { + FAIL_IF(load_immediate(compiler, SLJIT_R3, (local_size >> 12) - 1)); + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); + SLJIT_ASSERT(reg_map[SLJIT_R3] < 7); + FAIL_IF(push_inst16(compiler, SUBSI8 | RDN3(SLJIT_R3) | 1)); + FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-7 & 0xff))); + + local_size &= 0xfff; + + if (local_size != 0) + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); + } + + if (local_size >= 256) { + imm = get_imm(local_size & ~0xff); + SLJIT_ASSERT(imm != INVALID_IMM); + + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); + } + } + + local_size &= 0xff; + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | (local_size > 0 ? 0x100 : 0) | RT4(TMP_REG2) | RN4(TMP_REG1) | local_size)); + + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SP, TMP_REG1))); + } + else if (local_size > 0) + FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | local_size)); +#endif + return SLJIT_SUCCESS; } @@ -1119,11 +1196,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp /* Operators */ /* --------------------------------------------------------------------- */ +#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__) + #ifdef __cplusplus extern "C" { #endif -#if defined(__GNUC__) +#ifdef _WIN32 +extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator); +extern long long __rt_sdiv(int denominator, int numerator); +#elif defined(__GNUC__) extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator); extern int __aeabi_idivmod(int numerator, int denominator); #else @@ -1134,10 +1216,14 @@ extern int __aeabi_idivmod(int numerator, int denominator); } #endif +#endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */ + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { +#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__) sljit_sw saved_reg_list[3]; sljit_sw saved_reg_count; +#endif CHECK_ERROR(); CHECK(check_sljit_emit_op0(compiler, op)); @@ -1155,6 +1241,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile | (reg_map[SLJIT_R0] << 12) | (reg_map[SLJIT_R0] << 16) | reg_map[SLJIT_R1]); +#if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__) + case SLJIT_DIVMOD_UW: + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0))); + FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1))); + FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1))); + return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1)); + case SLJIT_DIV_UW: + case SLJIT_DIV_SW: + return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)); +#else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */ case SLJIT_DIVMOD_UW: case SLJIT_DIVMOD_SW: case SLJIT_DIV_UW: @@ -1183,7 +1280,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile } } -#if defined(__GNUC__) +#ifdef _WIN32 + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1))); + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__rt_udiv) : SLJIT_FUNC_OFFSET(__rt_sdiv)))); +#elif defined(__GNUC__) FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); #else @@ -1203,6 +1306,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); } return SLJIT_SUCCESS; +#endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */ } return SLJIT_SUCCESS; diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c index 9f9e157a05..094c9923bc 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c @@ -448,7 +448,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t sljit_ins ins = NOP; sljit_u8 offsets[4]; - SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12); + SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12); arg_types >>= SLJIT_DEF_SHIFT; @@ -516,7 +516,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t else if (arg_count != word_arg_count) ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsets[arg_count - 1] >> 2)); else if (arg_count == 1) - ins = ADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3); + ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4); arg_count--; word_arg_count--; diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c index ff6f048659..f841aef5dd 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c @@ -547,7 +547,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t sljit_ins prev_ins = NOP; sljit_ins ins = NOP; - SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12); + SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12); arg_types >>= SLJIT_DEF_SHIFT; @@ -591,7 +591,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t if (arg_count != word_arg_count) ins = DADDU | S(word_arg_count) | TA(0) | D(arg_count); else if (arg_count == 1) - ins = DADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3); + ins = DADDU | S(SLJIT_R0) | TA(0) | DA(4); arg_count--; word_arg_count--; break; diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c index e108433f70..894e21304b 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c @@ -57,14 +57,14 @@ typedef sljit_u32 sljit_ins; #define RETURN_ADDR_REG 31 /* Flags are kept in volatile registers. */ -#define EQUAL_FLAG 31 +#define EQUAL_FLAG 3 #define OTHER_FLAG 1 #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { - 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4 + 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31 }; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -612,16 +612,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi /* Frequent case. */ FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP))); base = S(SLJIT_SP); + offs = local_size - (sljit_sw)sizeof(sljit_sw); } else { - FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); + FAIL_IF(load_immediate(compiler, DR(OTHER_FLAG), local_size)); FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP))); base = S(TMP_REG2); local_size = 0; + offs = -(sljit_sw)sizeof(sljit_sw); } - offs = local_size - (sljit_sw)(sizeof(sljit_sw)); FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS)); tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; @@ -805,7 +806,8 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) { tmp_ar = reg_ar; delay_slot = reg_ar; - } else { + } + else { tmp_ar = DR(TMP_REG1); delay_slot = MOVABLE_INS; } @@ -881,11 +883,39 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) { + sljit_s32 tmp_ar, base, delay_slot; + if (getput_arg_fast(compiler, flags, reg_ar, arg, argw)) return compiler->error; - compiler->cache_arg = 0; - compiler->cache_argw = 0; - return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0); + + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) { + tmp_ar = reg_ar; + delay_slot = reg_ar; + } + else { + tmp_ar = DR(TMP_REG1); + delay_slot = MOVABLE_INS; + } + base = arg & REG_MASK; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (SLJIT_UNLIKELY(argw)) { + FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | DA(tmp_ar) | SH_IMM(argw), tmp_ar)); + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar)); + } + else + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(OFFS_REG(arg)) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); + } + + FAIL_IF(load_immediate(compiler, tmp_ar, argw)); + + if (base != 0) + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar)); + + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); } static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c index 8a83e273a4..074e64b9f2 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c @@ -123,34 +123,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (args > 0) { - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2]; + inst[0] = MOV_r_rm; + inst[1] = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2]; + inst += 2; } if (args > 1) { - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1]; + inst[0] = MOV_r_rm; + inst[1] = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1]; + inst += 2; } if (args > 2) { - *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */; - *inst++ = 0x24; - *inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */ + inst[0] = MOV_r_rm; + inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */; + inst[2] = 0x24; + inst[3] = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */ } #else if (args > 0) { - *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1]; - *inst++ = sizeof(sljit_sw) * 2; + inst[0] = MOV_r_rm; + inst[1] = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1]; + inst[2] = sizeof(sljit_sw) * 2; + inst += 3; } if (args > 1) { - *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1]; - *inst++ = sizeof(sljit_sw) * 3; + inst[0] = MOV_r_rm; + inst[1] = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1]; + inst[2] = sizeof(sljit_sw) * 3; + inst += 3; } if (args > 2) { - *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1]; - *inst++ = sizeof(sljit_sw) * 4; + inst[0] = MOV_r_rm; + inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1]; + inst[2] = sizeof(sljit_sw) * 4; } #endif @@ -170,17 +174,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi compiler->local_size = local_size; #ifdef _WIN32 - if (local_size > 1024) { -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); -#else - /* Space for a single argument. This amount is excluded when the stack is allocated below. */ - local_size -= sizeof(sljit_sw); - FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw))); -#endif - FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); + if (local_size > 0) { + if (local_size <= 4 * 4096) { + if (local_size > 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096); + if (local_size > 2 * 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); + if (local_size > 3 * 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); + } + else { + EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0); + EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12); + + SLJIT_ASSERT (reg_map[SLJIT_R0] == 0); + + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_R0), -4096); + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096)); + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + + INC_SIZE(2); + inst[0] = JNE_i8; + inst[1] = (sljit_s8) -16; + } + + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); } #endif diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c index 635ebd087c..8506565614 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c @@ -83,6 +83,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + compiler->mode32 = 0; + #ifdef _WIN64 /* Two/four register slots for parameters plus space for xmm6 register if needed. */ if (fscratches >= 6 || fsaveds >= 1) @@ -126,35 +128,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi #ifndef _WIN64 if (args > 0) { - *inst++ = REX_W; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */; + inst[0] = REX_W; + inst[1] = MOV_r_rm; + inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */; + inst += 3; } if (args > 1) { - *inst++ = REX_W | REX_R; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */; + inst[0] = REX_W | REX_R; + inst[1] = MOV_r_rm; + inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */; + inst += 3; } if (args > 2) { - *inst++ = REX_W | REX_R; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */; + inst[0] = REX_W | REX_R; + inst[1] = MOV_r_rm; + inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */; } #else if (args > 0) { - *inst++ = REX_W; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */; + inst[0] = REX_W; + inst[1] = MOV_r_rm; + inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */; + inst += 3; } if (args > 1) { - *inst++ = REX_W; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */; + inst[0] = REX_W; + inst[1] = MOV_r_rm; + inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */; + inst += 3; } if (args > 2) { - *inst++ = REX_W | REX_B; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */; + inst[0] = REX_W | REX_B; + inst[1] = MOV_r_rm; + inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */; } #endif } @@ -163,58 +169,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi compiler->local_size = local_size; #ifdef _WIN64 - if (local_size > 1024) { - /* Allocate stack for the callback, which grows the stack. */ - inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32))); - FAIL_IF(!inst); - INC_SIZE(4 + (3 + sizeof(sljit_s32))); - *inst++ = REX_W; - *inst++ = GROUP_BINARY_83; - *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP]; - /* Allocated size for registers must be divisible by 8. */ - SLJIT_ASSERT(!(saved_register_size & 0x7)); - /* Aligned to 16 byte. */ - if (saved_register_size & 0x8) { - *inst++ = 5 * sizeof(sljit_sw); - local_size -= 5 * sizeof(sljit_sw); - } else { - *inst++ = 4 * sizeof(sljit_sw); - local_size -= 4 * sizeof(sljit_sw); - } - /* Second instruction */ - SLJIT_ASSERT(reg_map[SLJIT_R0] < 8); - *inst++ = REX_W; - *inst++ = MOV_rm_i32; - *inst++ = MOD_REG | reg_lmap[SLJIT_R0]; - sljit_unaligned_store_s32(inst, local_size); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); - } -#endif - if (local_size > 0) { - if (local_size <= 127) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!inst); - INC_SIZE(4); - *inst++ = REX_W; - *inst++ = GROUP_BINARY_83; - *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP]; - *inst++ = local_size; + if (local_size <= 4 * 4096) { + if (local_size > 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096); + if (local_size > 2 * 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); + if (local_size > 3 * 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); } else { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 7); + EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12); + + SLJIT_ASSERT (reg_map[SLJIT_R0] == 0); + + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096); + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096)); + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); FAIL_IF(!inst); - INC_SIZE(7); - *inst++ = REX_W; - *inst++ = GROUP_BINARY_81; - *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP]; - sljit_unaligned_store_s32(inst, local_size); - inst += sizeof(sljit_s32); + + INC_SIZE(2); + inst[0] = JNE_i8; + inst[1] = (sljit_s8) -19; } + + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); + } +#endif + + if (local_size > 0) { + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); } #ifdef _WIN64 diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c index ab7b36adb2..6f02ee3e8b 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c @@ -669,23 +669,6 @@ static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw); -#ifdef _WIN32 -#include - -static void SLJIT_FUNC sljit_grow_stack(sljit_sw local_size) -{ - /* Workaround for calling the internal _chkstk() function on Windows. - This function touches all 4k pages belongs to the requested stack space, - which size is passed in local_size. This is necessary on Windows where - the stack can only grow in 4k steps. However, this function just burn - CPU cycles if the stack is large enough. However, you don't know it in - advance, so it must always be called. I think this is a bad design in - general even if it has some reasons. */ - *(volatile sljit_s32*)alloca(local_size) = 0; -} - -#endif - #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #include "sljitNativeX86_32.c" #else -- cgit v1.2.3 From 948f8ce2ecb2d6d2713279311d6090268321f0fb Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Tue, 28 Aug 2018 17:51:51 -0700 Subject: QWinEventNotifier: fix crash on application shutdown The event dispatcher can be null already but we may have outstanding QWinEventNotifier objects (like in a QProcess). Patch-By: Tamas Karpati Task-number: QTBUG-70214 Change-Id: I5e432e273def425ea334fffd154f34abfd6cb11a Reviewed-by: Friedemann Kleint --- src/corelib/kernel/qwineventnotifier.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/corelib/kernel/qwineventnotifier.cpp b/src/corelib/kernel/qwineventnotifier.cpp index 24de491326..fe086acb5d 100644 --- a/src/corelib/kernel/qwineventnotifier.cpp +++ b/src/corelib/kernel/qwineventnotifier.cpp @@ -256,6 +256,14 @@ static void CALLBACK wfsoCallback(void *context, BOOLEAN /*ignore*/) { QWinEventNotifierPrivate *nd = reinterpret_cast(context); QAbstractEventDispatcher *eventDispatcher = nd->threadData->eventDispatcher.load(); + + // Happens when Q(Core)Application is destroyed before QWinEventNotifier. + // https://bugreports.qt.io/browse/QTBUG-70214 + if (!eventDispatcher) { // perhaps application is shutting down + qWarning("QWinEventNotifier: no event dispatcher, application shutting down? Cannot deliver event."); + return; + } + QEventDispatcherWin32Private *edp = QEventDispatcherWin32Private::get( static_cast(eventDispatcher)); ++nd->signaledCount; -- cgit v1.2.3 From 2708c6c11d685ab25c12d558961d924c9a4533d2 Mon Sep 17 00:00:00 2001 From: Giuseppe D'Angelo Date: Mon, 1 Oct 2018 13:43:44 +0200 Subject: OpenSSL: force the "1.0.0" soname when loading OpenSSL 1.0 Some Linux distributions patch OpenSSL's soname, making builds on such distributions not deployable elsewhere. The problem is that the code loading OpenSSL symbols would attempt to use the soname of the build machine, and therefore not finding the OpenSSL libraries on the deploy system. The binary builds of Qt for Linux are affected by this problem, as they build under RHEL7.4 which changes to soname of OpenSSL to a non-standard string. This makes the binary builds not pick up OpenSSL 1.0 from the machine where the build gets installed on. Given that in the pre-1.1 versions only the 1.0 series is supported, bump the minimum requirement of Qt to that. The 1.0.x releases (up to 1.0.2, at the time of this writing) have kept binary compatibility, and advertise a soname of "1.0.0", which is used by most distributions. So, if loading of OpenSSL with the build-time soname fails, try to load them with the "1.0.0" hardcoded soname. [ChangeLog][QtNetwork][SSL] OpenSSL >= 1.0 is now required to build Qt with OpenSSL support. Task-number: QTBUG-68156 Change-Id: Ieff1561a3c1d278b511f09fef06580f034f188c6 Reviewed-by: Timur Pocheptsov --- config.tests/openssl/openssl.cpp | 4 ++-- src/network/doc/src/ssl.qdoc | 5 ++--- src/network/ssl/qsslsocket_openssl_symbols.cpp | 23 +++++++++++++++++++++++ 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/config.tests/openssl/openssl.cpp b/config.tests/openssl/openssl.cpp index d33b62389c..9188fb008f 100644 --- a/config.tests/openssl/openssl.cpp +++ b/config.tests/openssl/openssl.cpp @@ -39,8 +39,8 @@ #include -#if !defined(OPENSSL_VERSION_NUMBER) || OPENSSL_VERSION_NUMBER-0 < 0x0090700fL -# error "OpenSSL >= 0.9.7 is required" +#if !defined(OPENSSL_VERSION_NUMBER) || OPENSSL_VERSION_NUMBER-0 < 0x10000000L +# error "OpenSSL >= 1.0.0 is required" #endif #include diff --git a/src/network/doc/src/ssl.qdoc b/src/network/doc/src/ssl.qdoc index e4948c393c..58589f8479 100644 --- a/src/network/doc/src/ssl.qdoc +++ b/src/network/doc/src/ssl.qdoc @@ -36,9 +36,8 @@ the Secure Sockets Layer (SSL) protocol, using the \l{OpenSSL Toolkit} to perform encryption and protocol handling. - From Qt version 5.2 onwards, the officially supported version for OpenSSL - is 1.0.0 or later. Versions >= 0.9.7 and < 1.0.0 might work, but are not - guaranteed to work. + From Qt version 5.6 onwards, the officially supported version for OpenSSL + is 1.0.0 or later. \annotatedlist ssl diff --git a/src/network/ssl/qsslsocket_openssl_symbols.cpp b/src/network/ssl/qsslsocket_openssl_symbols.cpp index 466eba0bd0..59c93677dd 100644 --- a/src/network/ssl/qsslsocket_openssl_symbols.cpp +++ b/src/network/ssl/qsslsocket_openssl_symbols.cpp @@ -771,6 +771,17 @@ static QPair loadOpenSsl() // reason, we will search a few common paths (see findAllLibSsl() above) in hopes // we find one that works. // + // If that fails, for OpenSSL 1.0 we also try a fallback -- just look up + // libssl.so with a hardcoded soname. The reason is QTBUG-68156: the binary + // builds of Qt happen (at the time of this writing) on RHEL machines, + // which change SHLIB_VERSION_NUMBER to a non-portable string. When running + // those binaries on the target systems, this code won't pick up + // libssl.so.MODIFIED_SHLIB_VERSION_NUMBER because it doesn't exist there. + // Given that the only 1.0 supported release (at the time of this writing) + // is 1.0.2, with soname "1.0.0", give that a try too. Note that we mandate + // OpenSSL >= 1.0.0 with a configure-time check, and OpenSSL has kept binary + // compatibility between 1.0.0 and 1.0.2. + // // It is important, however, to try the canonical name and the unversioned name // without going through the loop. By not specifying a path, we let the system // dlopen(3) function determine it for us. This will include any DT_RUNPATH or @@ -791,6 +802,18 @@ static QPair loadOpenSsl() libssl->unload(); libcrypto->unload(); } + +#if !QT_CONFIG(opensslv11) + // first-and-half attempt: for OpenSSL 1.0 try to load an hardcoded soname. + libssl->setFileNameAndVersion(QLatin1String("ssl"), QLatin1String("1.0.0")); + libcrypto->setFileNameAndVersion(QLatin1String("crypto"), QLatin1String("1.0.0")); + if (libcrypto->load() && libssl->load()) { + return pair; + } else { + libssl->unload(); + libcrypto->unload(); + } +#endif #endif #ifndef Q_OS_DARWIN -- cgit v1.2.3