58 files changed, 2250 insertions, 940 deletions
diff --git a/src/3rdparty/libjpeg/LICENSE b/src/3rdparty/libjpeg/LICENSE
index 99c9aadcc4..a1cdad52fa 100644
--- a/src/3rdparty/libjpeg/LICENSE
+++ b/src/3rdparty/libjpeg/LICENSE
@@ -91,7 +91,7 @@ best of our understanding.
 The Modified (3-clause) BSD License
 ===================================
 
-Copyright (C)2009-2020 D. R. Commander.  All Rights Reserved.
+Copyright (C)2009-2021 D. R. Commander.  All Rights Reserved.<br>
 Copyright (C)2015 Viktor Szathmáry.  All Rights Reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/src/3rdparty/libjpeg/import_from_libjpeg_tarball.sh b/src/3rdparty/libjpeg/import_from_libjpeg_tarball.sh
index 9b7e1fff82..2153eca35a 100755
--- a/src/3rdparty/libjpeg/import_from_libjpeg_tarball.sh
+++ b/src/3rdparty/libjpeg/import_from_libjpeg_tarball.sh
@@ -165,4 +165,4 @@ for i in $FILES; do
     copy_file "$i" "src/$i"
 done
 
-echo Done. $TARGET_DIR/jconfig.h and jconfigint.h may need manual updating.
+echo Done. $TARGET_DIR/src/jconfig.h and jconfigint.h may need manual updating.
diff --git a/src/3rdparty/libjpeg/libjpeg.pro b/src/3rdparty/libjpeg/libjpeg.pro
index 5c3563aae3..d478719af8 100644
--- a/src/3rdparty/libjpeg/libjpeg.pro
+++ b/src/3rdparty/libjpeg/libjpeg.pro
@@ -12,7 +12,7 @@ MODULE_EXT_HEADERS = $$PWD/src/jpeglib.h \
                      $$PWD/src/jconfig.h \
                      $$PWD/src/jmorecfg.h
 
-INCLUDEPATH += $$PWD $$PWD/src
+INCLUDEPATH += $$PWD/src
 
 load(qt_helper_lib)
 
diff --git a/src/3rdparty/libjpeg/qt_attribution.json b/src/3rdparty/libjpeg/qt_attribution.json
index a1dd9990c4..0940636da3 100644
--- a/src/3rdparty/libjpeg/qt_attribution.json
+++ b/src/3rdparty/libjpeg/qt_attribution.json
@@ -6,11 +6,11 @@
 
     "Description": "The Independent JPEG Group's JPEG software",
     "Homepage": "http://libjpeg-turbo.virtualgl.org/",
-    "Version": "2.0.6",
+    "Version": "2.1.0",
     "License": "Independent JPEG Group License",
     "LicenseId": "IJG",
     "LicenseFile": "LICENSE",
-    "Copyright": "Copyright (C) 2009-2020 D. R. Commander
+    "Copyright": "Copyright (C) 2009-2021 D. R. Commander
 Copyright (C) 2015, 2020 Google, Inc.
 Copyright (C) 2019 Arm Limited
 Copyright (C) 2015-2016, 2018 Matthieu Darbois
diff --git a/src/3rdparty/libjpeg/src/ChangeLog.md b/src/3rdparty/libjpeg/src/ChangeLog.md
index 7b5f875464..5ecbf3b510 100644
--- a/src/3rdparty/libjpeg/src/ChangeLog.md
+++ b/src/3rdparty/libjpeg/src/ChangeLog.md
@@ -1,3 +1,208 @@
+2.1.1
+=====
+
+### Significant changes relative to 2.1.0
+
+1. Fixed a regression introduced in 2.1.0 that caused build failures with
+non-GCC-compatible compilers for Un*x/Arm platforms.
+
+
+2.1.0
+=====
+
+### Significant changes relative to 2.1 beta1
+
+1. Fixed a regression introduced by 2.1 beta1[6(b)] whereby attempting to
+decompress certain progressive JPEG images with one or more component planes of
+width 8 or less caused a buffer overrun.
+
+2. Fixed a regression introduced by 2.1 beta1[6(b)] whereby attempting to
+decompress a specially-crafted malformed progressive JPEG image caused the
+block smoothing algorithm to read from uninitialized memory.
+
+3. Fixed an issue in the Arm Neon SIMD Huffman encoders that caused the
+encoders to generate incorrect results when using the Clang compiler with
+Visual Studio.
+
+4. Fixed a floating point exception (CVE-2021-20205) that occurred when
+attempting to compress a specially-crafted malformed GIF image with a specified
+image width of 0 using cjpeg.
+
+5. Fixed a regression introduced by 2.0 beta1[15] whereby attempting to
+generate a progressive JPEG image on an SSE2-capable CPU using a scan script
+containing one or more scans with lengths divisible by 32 and non-zero
+successive approximation low bit positions would, under certain circumstances,
+result in an error ("Missing Huffman code table entry") and an invalid JPEG
+image.
+
+6. Introduced a new flag (`TJFLAG_LIMITSCANS` in the TurboJPEG C API and
+`TJ.FLAG_LIMIT_SCANS` in the TurboJPEG Java API) and a corresponding TJBench
+command-line argument (`-limitscans`) that causes the TurboJPEG decompression
+and transform functions/operations to return/throw an error if a progressive
+JPEG image contains an unreasonably large number of scans.  This allows
+applications that use the TurboJPEG API to guard against an exploit of the
+progressive JPEG format described in the report
+["Two Issues with the JPEG Standard"](https://libjpeg-turbo.org/pmwiki/uploads/About/TwoIssueswiththeJPEGStandard.pdf).
+
+7. The PPM reader now throws an error, rather than segfaulting (due to a buffer
+overrun) or generating incorrect pixels, if an application attempts to use the
+`tjLoadImage()` function to load a 16-bit binary PPM file (a binary PPM file
+with a maximum value greater than 255) into a grayscale image buffer or to load
+a 16-bit binary PGM file into an RGB image buffer.
+
+8. Fixed an issue in the PPM reader that caused incorrect pixels to be
+generated when using the `tjLoadImage()` function to load a 16-bit binary PPM
+file into an extended RGB image buffer.
+
+9. Fixed an issue whereby, if a JPEG buffer was automatically re-allocated by
+one of the TurboJPEG compression or transform functions and an error
+subsequently occurred during compression or transformation, the JPEG buffer
+pointer passed by the application was not updated when the function returned.
+
+
+2.0.90 (2.1 beta1)
+==================
+
+### Significant changes relative to 2.0.6:
+
+1. The build system, x86-64 SIMD extensions, and accelerated Huffman codec now
+support the x32 ABI on Linux, which allows for using x86-64 instructions with
+32-bit pointers.  The x32 ABI is generally enabled by adding `-mx32` to the
+compiler flags.
+
+     Caveats:
+     - CMake 3.9.0 or later is required in order for the build system to
+automatically detect an x32 build.
+     - Java does not support the x32 ABI, and thus the TurboJPEG Java API will
+automatically be disabled with x32 builds.
+
+2. Added Loongson MMI SIMD implementations of the RGB-to-grayscale, 4:2:2 fancy
+chroma upsampling, 4:2:2 and 4:2:0 merged chroma upsampling/color conversion,
+and fast integer DCT/IDCT algorithms.  Relative to libjpeg-turbo 2.0.x, this
+speeds up:
+
+     - the compression of RGB source images into grayscale JPEG images by
+approximately 20%
+     - the decompression of 4:2:2 JPEG images by approximately 40-60% when
+using fancy upsampling
+     - the decompression of 4:2:2 and 4:2:0 JPEG images by approximately
+15-20% when using merged upsampling
+     - the compression of RGB source images by approximately 30-45% when using
+the fast integer DCT
+     - the decompression of JPEG images into RGB destination images by
+approximately 2x when using the fast integer IDCT
+
+    The overall decompression speedup for RGB images is now approximately
+2.3-3.7x (compared to 2-3.5x with libjpeg-turbo 2.0.x.)
+
+3. 32-bit (Armv7 or Armv7s) iOS builds of libjpeg-turbo are no longer
+supported, and the libjpeg-turbo build system can no longer be used to package
+such builds.  32-bit iOS apps cannot run in iOS 11 and later, and the App Store
+no longer allows them.
+
+4. 32-bit (i386) OS X/macOS builds of libjpeg-turbo are no longer supported,
+and the libjpeg-turbo build system can no longer be used to package such
+builds.  32-bit Mac applications cannot run in macOS 10.15 "Catalina" and
+later, and the App Store no longer allows them.
+
+5. The SSE2 (x86 SIMD) and C Huffman encoding algorithms have been
+significantly optimized, resulting in a measured average overall compression
+speedup of 12-28% for 64-bit code and 22-52% for 32-bit code on various Intel
+and AMD CPUs, as well as a measured average overall compression speedup of
+0-23% on platforms that do not have a SIMD-accelerated Huffman encoding
+implementation.
+
+6. The block smoothing algorithm that is applied by default when decompressing
+progressive Huffman-encoded JPEG images has been improved in the following
+ways:
+
+     - The algorithm is now more fault-tolerant.  Previously, if a particular
+scan was incomplete, then the smoothing parameters for the incomplete scan
+would be applied to the entire output image, including the parts of the image
+that were generated by the prior (complete) scan.  Visually, this had the
+effect of removing block smoothing from lower-frequency scans if they were
+followed by an incomplete higher-frequency scan.  libjpeg-turbo now applies
+block smoothing parameters to each iMCU row based on which scan generated the
+pixels in that row, rather than always using the block smoothing parameters for
+the most recent scan.
+     - When applying block smoothing to DC scans, a Gaussian-like kernel with a
+5x5 window is used to reduce the "blocky" appearance.
+
+7. Added SIMD acceleration for progressive Huffman encoding on Arm platforms.
+This speeds up the compression of full-color progressive JPEGs by about 30-40%
+on average (relative to libjpeg-turbo 2.0.x) when using modern Arm CPUs.
+
+8. Added configure-time and run-time auto-detection of Loongson MMI SIMD
+instructions, so that the Loongson MMI SIMD extensions can be included in any
+MIPS64 libjpeg-turbo build.
+
+9. Added fault tolerance features to djpeg and jpegtran, mainly to demonstrate
+methods by which applications can guard against the exploits of the JPEG format
+described in the report
+["Two Issues with the JPEG Standard"](https://libjpeg-turbo.org/pmwiki/uploads/About/TwoIssueswiththeJPEGStandard.pdf).
+
+     - Both programs now accept a `-maxscans` argument, which can be used to
+limit the number of allowable scans in the input file.
+     - Both programs now accept a `-strict` argument, which can be used to
+treat all warnings as fatal.
+
+10. CMake package config files are now included for both the libjpeg and
+TurboJPEG API libraries.  This facilitates using libjpeg-turbo with CMake's
+`find_package()` function.  For example:
+
+        find_package(libjpeg-turbo CONFIG REQUIRED)
+
+        add_executable(libjpeg_program libjpeg_program.c)
+        target_link_libraries(libjpeg_program PUBLIC libjpeg-turbo::jpeg)
+
+        add_executable(libjpeg_program_static libjpeg_program.c)
+        target_link_libraries(libjpeg_program_static PUBLIC
+          libjpeg-turbo::jpeg-static)
+
+        add_executable(turbojpeg_program turbojpeg_program.c)
+        target_link_libraries(turbojpeg_program PUBLIC
+          libjpeg-turbo::turbojpeg)
+
+        add_executable(turbojpeg_program_static turbojpeg_program.c)
+        target_link_libraries(turbojpeg_program_static PUBLIC
+          libjpeg-turbo::turbojpeg-static)
+
+11. Since the Unisys LZW patent has long expired, cjpeg and djpeg can now
+read/write both LZW-compressed and uncompressed GIF files (feature ported from
+jpeg-6a and jpeg-9d.)
+
+12. jpegtran now includes the `-wipe` and `-drop` options from jpeg-9a and
+jpeg-9d, as well as the ability to expand the image size using the `-crop`
+option.  Refer to jpegtran.1 or usage.txt for more details.
+
+13. Added a complete intrinsics implementation of the Arm Neon SIMD extensions,
+thus providing SIMD acceleration on Arm platforms for all of the algorithms
+that are SIMD-accelerated on x86 platforms.  This new implementation is
+significantly faster in some cases than the old GAS implementation--
+depending on the algorithms used, the type of CPU core, and the compiler.  GCC,
+as of this writing, does not provide a full or optimal set of Neon intrinsics,
+so for performance reasons, the default when building libjpeg-turbo with GCC is
+to continue using the GAS implementation of the following algorithms:
+
+     - 32-bit RGB-to-YCbCr color conversion
+     - 32-bit fast and accurate inverse DCT
+     - 64-bit RGB-to-YCbCr and YCbCr-to-RGB color conversion
+     - 64-bit accurate forward and inverse DCT
+     - 64-bit Huffman encoding
+
+    A new CMake variable (`NEON_INTRINSICS`) can be used to override this
+default.
+
+    Since the new intrinsics implementation includes SIMD acceleration
+for merged upsampling/color conversion, 1.5.1[5] is no longer necessary and has
+been reverted.
+
+14. The Arm Neon SIMD extensions can now be built using Visual Studio.
+
+15. The build system can now be used to generate a universal x86-64 + Armv8
+libjpeg-turbo SDK package for both iOS and macOS.
+
+
 2.0.6
 =====
 
diff --git a/src/3rdparty/libjpeg/src/README.ijg b/src/3rdparty/libjpeg/src/README.ijg
index d681cf1273..9453c19501 100644
--- a/src/3rdparty/libjpeg/src/README.ijg
+++ b/src/3rdparty/libjpeg/src/README.ijg
@@ -128,7 +128,7 @@ with respect to this software, its quality, accuracy, merchantability, or
 fitness for a particular purpose.  This software is provided "AS IS", and you,
 its user, assume the entire risk as to its quality and accuracy.
 
-This software is copyright (C) 1991-2016, Thomas G. Lane, Guido Vollbeding.
+This software is copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
 All Rights Reserved except as specified below.
 
 Permission is hereby granted to use, copy, modify, and distribute this
@@ -159,19 +159,6 @@ commercial products, provided that all warranty or liability claims are
 assumed by the product vendor.
 
 
-The IJG distribution formerly included code to read and write GIF files.
-To avoid entanglement with the Unisys LZW patent (now expired), GIF reading
-support has been removed altogether, and the GIF writer has been simplified
-to produce "uncompressed GIFs".  This technique does not use the LZW
-algorithm; the resulting GIF files are larger than usual, but are readable
-by all standard GIF decoders.
-
-We are required to state that
-    "The Graphics Interchange Format(c) is the Copyright property of
-    CompuServe Incorporated.  GIF(sm) is a Service Mark property of
-    CompuServe Incorporated."
-
-
 REFERENCES
 ==========
 
diff --git a/src/3rdparty/libjpeg/src/README.md b/src/3rdparty/libjpeg/src/README.md
index 90a4a43ee1..01e391ea7c 100644
--- a/src/3rdparty/libjpeg/src/README.md
+++ b/src/3rdparty/libjpeg/src/README.md
@@ -3,7 +3,7 @@ Background
 
 libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate
 baseline JPEG compression and decompression on x86, x86-64, Arm, PowerPC, and
-MIPS systems, as well as progressive JPEG compression on x86 and x86-64
+MIPS systems, as well as progressive JPEG compression on x86, x86-64, and Arm
 systems.  On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg,
 all else being equal.  On other types of systems, libjpeg-turbo can still
 outperform libjpeg by a significant amount, by virtue of its highly-optimized
diff --git a/src/3rdparty/libjpeg/src/change.log b/src/3rdparty/libjpeg/src/change.log
index f090d7788c..e4d0ddc4bd 100644
--- a/src/3rdparty/libjpeg/src/change.log
+++ b/src/3rdparty/libjpeg/src/change.log
@@ -6,6 +6,25 @@ reference.  Please see ChangeLog.md for information specific to libjpeg-turbo.
 CHANGE LOG for Independent JPEG Group's JPEG software
 
 
+Version 9d  12-Jan-2020
+-----------------------
+
+Restore GIF read and write support from libjpeg version 6a.
+Thank to Wolfgang Werner (W.W.) Heinz for suggestion.
+
+Add jpegtran -drop option; add options to the crop extension and wipe
+to fill the extra area with content from the source image region,
+instead of gray out.
+
+
+Version 9c  14-Jan-2018
+-----------------------
+
+jpegtran: add an option to the -wipe switch to fill the region
+with the average of adjacent blocks, instead of gray out.
+Thank to Caitlyn Feddock and Maddie Ziegler for inspiration.
+
+
 Version 9b  17-Jan-2016
 -----------------------
 
@@ -13,6 +32,13 @@ Document 'f' specifier for jpegtran -crop specification.
 Thank to Michele Martone for suggestion.
 
 
+Version 9a  19-Jan-2014
+-----------------------
+
+Add jpegtran -wipe option and extension for -crop.
+Thank to Andrew Senior, David Clunie, and Josef Schmid for suggestion.
+
+
 Version 9  13-Jan-2013
 ----------------------
 
@@ -138,11 +164,6 @@ Huffman tables being used.
 
 Huffman tables are checked for validity much more carefully than before.
 
-To avoid the Unisys LZW patent, djpeg's GIF output capability has been
-changed to produce "uncompressed GIFs", and cjpeg's GIF input capability
-has been removed altogether.  We're not happy about it either, but there
-seems to be no good alternative.
-
 The configure script now supports building libjpeg as a shared library
 on many flavors of Unix (all the ones that GNU libtool knows how to
 build shared libraries for).  Use "./configure --enable-shared" to
diff --git a/src/3rdparty/libjpeg/src/jccolext.c b/src/3rdparty/libjpeg/src/jccolext.c
index 19c955c9d6..303b322ce6 100644
--- a/src/3rdparty/libjpeg/src/jccolext.c
+++ b/src/3rdparty/libjpeg/src/jccolext.c
@@ -48,9 +48,9 @@ rgb_ycc_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
     outptr2 = output_buf[2][output_row];
     output_row++;
     for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr[RGB_RED]);
-      g = GETJSAMPLE(inptr[RGB_GREEN]);
-      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      r = inptr[RGB_RED];
+      g = inptr[RGB_GREEN];
+      b = inptr[RGB_BLUE];
       inptr += RGB_PIXELSIZE;
       /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
        * must be too; we do not need an explicit range-limiting operation.
@@ -100,9 +100,9 @@ rgb_gray_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
     outptr = output_buf[0][output_row];
     output_row++;
     for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr[RGB_RED]);
-      g = GETJSAMPLE(inptr[RGB_GREEN]);
-      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      r = inptr[RGB_RED];
+      g = inptr[RGB_GREEN];
+      b = inptr[RGB_BLUE];
       inptr += RGB_PIXELSIZE;
       /* Y */
       outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
@@ -135,9 +135,9 @@ rgb_rgb_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
     outptr2 = output_buf[2][output_row];
     output_row++;
     for (col = 0; col < num_cols; col++) {
-      outptr0[col] = GETJSAMPLE(inptr[RGB_RED]);
-      outptr1[col] = GETJSAMPLE(inptr[RGB_GREEN]);
-      outptr2[col] = GETJSAMPLE(inptr[RGB_BLUE]);
+      outptr0[col] = inptr[RGB_RED];
+      outptr1[col] = inptr[RGB_GREEN];
+      outptr2[col] = inptr[RGB_BLUE];
       inptr += RGB_PIXELSIZE;
     }
   }
diff --git a/src/3rdparty/libjpeg/src/jccolor.c b/src/3rdparty/libjpeg/src/jccolor.c
index 036f6016d1..bdc563c723 100644
--- a/src/3rdparty/libjpeg/src/jccolor.c
+++ b/src/3rdparty/libjpeg/src/jccolor.c
@@ -392,11 +392,11 @@ cmyk_ycck_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
     outptr3 = output_buf[3][output_row];
     output_row++;
     for (col = 0; col < num_cols; col++) {
-      r = MAXJSAMPLE - GETJSAMPLE(inptr[0]);
-      g = MAXJSAMPLE - GETJSAMPLE(inptr[1]);
-      b = MAXJSAMPLE - GETJSAMPLE(inptr[2]);
+      r = MAXJSAMPLE - inptr[0];
+      g = MAXJSAMPLE - inptr[1];
+      b = MAXJSAMPLE - inptr[2];
       /* K passes through as-is */
-      outptr3[col] = inptr[3];  /* don't need GETJSAMPLE here */
+      outptr3[col] = inptr[3];
       inptr += 4;
       /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
        * must be too; we do not need an explicit range-limiting operation.
@@ -438,7 +438,7 @@ grayscale_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
     outptr = output_buf[0][output_row];
     output_row++;
     for (col = 0; col < num_cols; col++) {
-      outptr[col] = inptr[0];   /* don't need GETJSAMPLE() here */
+      outptr[col] = inptr[0];
       inptr += instride;
     }
   }
@@ -497,7 +497,7 @@ null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
         inptr = *input_buf;
         outptr = output_buf[ci][output_row];
         for (col = 0; col < num_cols; col++) {
-          outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */
+          outptr[col] = inptr[ci];
           inptr += nc;
         }
       }
diff --git a/src/3rdparty/libjpeg/src/jcdctmgr.c b/src/3rdparty/libjpeg/src/jcdctmgr.c
index c04058e6ce..7dae17a6e1 100644
--- a/src/3rdparty/libjpeg/src/jcdctmgr.c
+++ b/src/3rdparty/libjpeg/src/jcdctmgr.c
@@ -381,19 +381,19 @@ convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
     elemptr = sample_data[elemr] + start_col;
 
 #if DCTSIZE == 8                /* unroll the inner loop */
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
 #else
     {
       register int elemc;
       for (elemc = DCTSIZE; elemc > 0; elemc--)
-        *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+        *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
     }
 #endif
   }
@@ -533,20 +533,19 @@ convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
   for (elemr = 0; elemr < DCTSIZE; elemr++) {
     elemptr = sample_data[elemr] + start_col;
 #if DCTSIZE == 8                /* unroll the inner loop */
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
 #else
     {
       register int elemc;
       for (elemc = DCTSIZE; elemc > 0; elemc--)
-        *workspaceptr++ = (FAST_FLOAT)
-                          (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+        *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
     }
 #endif
   }
diff --git a/src/3rdparty/libjpeg/src/jchuff.c b/src/3rdparty/libjpeg/src/jchuff.c
index db85ce114f..8ff817b151 100644
--- a/src/3rdparty/libjpeg/src/jchuff.c
+++ b/src/3rdparty/libjpeg/src/jchuff.c
@@ -4,8 +4,10 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2014-2016, 2018-2019, D. R. Commander.
+ * Copyright (C) 2009-2011, 2014-2016, 2018-2021, D. R. Commander.
  * Copyright (C) 2015, Matthieu Darbois.
+ * Copyright (C) 2018, Matthias Räncker.
+ * Copyright (C) 2020, Arm Limited.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -43,14 +45,19 @@
  */
 
 /* NOTE: Both GCC and Clang define __GNUC__ */
-#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
+#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
+    defined(_M_ARM) || defined(_M_ARM64)
 #if !defined(__thumb__) || defined(__thumb2__)
 #define USE_CLZ_INTRINSIC
 #endif
 #endif
 
 #ifdef USE_CLZ_INTRINSIC
+#if defined(_MSC_VER) && !defined(__clang__)
+#define JPEG_NBITS_NONZERO(x)  (32 - _CountLeadingZeros(x))
+#else
 #define JPEG_NBITS_NONZERO(x)  (32 - __builtin_clz(x))
+#endif
 #define JPEG_NBITS(x)          (x ? JPEG_NBITS_NONZERO(x) : 0)
 #else
 #include "jpeg_nbits_table.h"
@@ -65,31 +72,42 @@
  * but must not be updated permanently until we complete the MCU.
  */
 
-typedef struct {
-  size_t put_buffer;                    /* current bit-accumulation buffer */
-  int put_bits;                         /* # of bits now in it */
-  int last_dc_val[MAX_COMPS_IN_SCAN];   /* last DC coef for each component */
-} savable_state;
+#if defined(__x86_64__) && defined(__ILP32__)
+typedef unsigned long long bit_buf_type;
+#else
+typedef size_t bit_buf_type;
+#endif
 
-/* This macro is to work around compilers with missing or broken
- * structure assignment.  You'll need to fix this code if you have
- * such a compiler and you change MAX_COMPS_IN_SCAN.
+/* NOTE: The more optimal Huffman encoding algorithm is only used by the
+ * intrinsics implementation of the Arm Neon SIMD extensions, which is why we
+ * retain the old Huffman encoder behavior when using the GAS implementation.
  */
-
-#ifndef NO_STRUCT_ASSIGN
-#define ASSIGN_STATE(dest, src)  ((dest) = (src))
+#if defined(WITH_SIMD) && !(defined(__arm__) || defined(__aarch64__) || \
+                            defined(_M_ARM) || defined(_M_ARM64))
+typedef unsigned long long simd_bit_buf_type;
 #else
-#if MAX_COMPS_IN_SCAN == 4
-#define ASSIGN_STATE(dest, src) \
-  ((dest).put_buffer = (src).put_buffer, \
-   (dest).put_bits = (src).put_bits, \
-   (dest).last_dc_val[0] = (src).last_dc_val[0], \
-   (dest).last_dc_val[1] = (src).last_dc_val[1], \
-   (dest).last_dc_val[2] = (src).last_dc_val[2], \
-   (dest).last_dc_val[3] = (src).last_dc_val[3])
+typedef bit_buf_type simd_bit_buf_type;
 #endif
+
+#if (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 8) || defined(_WIN64) || \
+    (defined(__x86_64__) && defined(__ILP32__))
+#define BIT_BUF_SIZE  64
+#elif (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 4) || defined(_WIN32)
+#define BIT_BUF_SIZE  32
+#else
+#error Cannot determine word size
 #endif
+#define SIMD_BIT_BUF_SIZE  (sizeof(simd_bit_buf_type) * 8)
 
+typedef struct {
+  union {
+    bit_buf_type c;
+    simd_bit_buf_type simd;
+  } put_buffer;                         /* current bit accumulation buffer */
+  int free_bits;                        /* # of bits available in it */
+                                        /* (Neon GAS: # of bits now in it) */
+  int last_dc_val[MAX_COMPS_IN_SCAN];   /* last DC coef for each component */
+} savable_state;
 
 typedef struct {
   struct jpeg_entropy_encoder pub; /* public fields */
@@ -123,6 +141,7 @@ typedef struct {
   size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
   savable_state cur;            /* Current bit buffer & DC state */
   j_compress_ptr cinfo;         /* dump_buffer needs access to this */
+  int simd;
 } working_state;
 
 
@@ -201,8 +220,17 @@ start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics)
   }
 
   /* Initialize bit buffer to empty */
-  entropy->saved.put_buffer = 0;
-  entropy->saved.put_bits = 0;
+  if (entropy->simd) {
+    entropy->saved.put_buffer.simd = 0;
+#if defined(__aarch64__) && !defined(NEON_INTRINSICS)
+    entropy->saved.free_bits = 0;
+#else
+    entropy->saved.free_bits = SIMD_BIT_BUF_SIZE;
+#endif
+  } else {
+    entropy->saved.put_buffer.c = 0;
+    entropy->saved.free_bits = BIT_BUF_SIZE;
+  }
 
   /* Initialize restart stuff */
   entropy->restarts_to_go = cinfo->restart_interval;
@@ -287,6 +315,7 @@ jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno,
    * this lets us detect duplicate VAL entries here, and later
    * allows emit_bits to detect any attempt to emit such symbols.
    */
+  MEMZERO(dtbl->ehufco, sizeof(dtbl->ehufco));
   MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi));
 
   /* This is also a convenient place to check for out-of-range
@@ -334,94 +363,94 @@ dump_buffer(working_state *state)
 
 /* Outputting bits to the file */
 
-/* These macros perform the same task as the emit_bits() function in the
- * original libjpeg code.  In addition to reducing overhead by explicitly
- * inlining the code, additional performance is achieved by taking into
- * account the size of the bit buffer and waiting until it is almost full
- * before emptying it.  This mostly benefits 64-bit platforms, since 6
- * bytes can be stored in a 64-bit bit buffer before it has to be emptied.
+/* Output byte b and, speculatively, an additional 0 byte.  0xFF must be
+ * encoded as 0xFF 0x00, so the output buffer pointer is advanced by 2 if the
+ * byte is 0xFF.  Otherwise, the output buffer pointer is advanced by 1, and
+ * the speculative 0 byte will be overwritten by the next byte.
  */
-
-#define EMIT_BYTE() { \
-  JOCTET c; \
-  put_bits -= 8; \
-  c = (JOCTET)GETJOCTET(put_buffer >> put_bits); \
-  *buffer++ = c; \
-  if (c == 0xFF)  /* need to stuff a zero byte? */ \
-    *buffer++ = 0; \
+#define EMIT_BYTE(b) { \
+  buffer[0] = (JOCTET)(b); \
+  buffer[1] = 0; \
+  buffer -= -2 + ((JOCTET)(b) < 0xFF); \
 }
 
-#define PUT_BITS(code, size) { \
-  put_bits += size; \
-  put_buffer = (put_buffer << size) | code; \
-}
-
-#if SIZEOF_SIZE_T != 8 && !defined(_WIN64)
-
-#define CHECKBUF15() { \
-  if (put_bits > 15) { \
-    EMIT_BYTE() \
-    EMIT_BYTE() \
+/* Output the entire bit buffer.  If there are no 0xFF bytes in it, then write
+ * directly to the output buffer.  Otherwise, use the EMIT_BYTE() macro to
+ * encode 0xFF as 0xFF 0x00.
+ */
+#if BIT_BUF_SIZE == 64
+
+#define FLUSH() { \
+  if (put_buffer & 0x8080808080808080 & ~(put_buffer + 0x0101010101010101)) { \
+    EMIT_BYTE(put_buffer >> 56) \
+    EMIT_BYTE(put_buffer >> 48) \
+    EMIT_BYTE(put_buffer >> 40) \
+    EMIT_BYTE(put_buffer >> 32) \
+    EMIT_BYTE(put_buffer >> 24) \
+    EMIT_BYTE(put_buffer >> 16) \
+    EMIT_BYTE(put_buffer >>  8) \
+    EMIT_BYTE(put_buffer      ) \
+  } else { \
+    buffer[0] = (JOCTET)(put_buffer >> 56); \
+    buffer[1] = (JOCTET)(put_buffer >> 48); \
+    buffer[2] = (JOCTET)(put_buffer >> 40); \
+    buffer[3] = (JOCTET)(put_buffer >> 32); \
+    buffer[4] = (JOCTET)(put_buffer >> 24); \
+    buffer[5] = (JOCTET)(put_buffer >> 16); \
+    buffer[6] = (JOCTET)(put_buffer >> 8); \
+    buffer[7] = (JOCTET)(put_buffer); \
+    buffer += 8; \
   } \
 }
 
-#endif
-
-#define CHECKBUF31() { \
-  if (put_bits > 31) { \
-    EMIT_BYTE() \
-    EMIT_BYTE() \
-    EMIT_BYTE() \
-    EMIT_BYTE() \
-  } \
-}
+#else
 
-#define CHECKBUF47() { \
-  if (put_bits > 47) { \
-    EMIT_BYTE() \
-    EMIT_BYTE() \
-    EMIT_BYTE() \
-    EMIT_BYTE() \
-    EMIT_BYTE() \
-    EMIT_BYTE() \
+#define FLUSH() { \
+  if (put_buffer & 0x80808080 & ~(put_buffer + 0x01010101)) { \
+    EMIT_BYTE(put_buffer >> 24) \
+    EMIT_BYTE(put_buffer >> 16) \
+    EMIT_BYTE(put_buffer >>  8) \
+    EMIT_BYTE(put_buffer      ) \
+  } else { \
+    buffer[0] = (JOCTET)(put_buffer >> 24); \
+    buffer[1] = (JOCTET)(put_buffer >> 16); \
+    buffer[2] = (JOCTET)(put_buffer >> 8); \
+    buffer[3] = (JOCTET)(put_buffer); \
+    buffer += 4; \
   } \
 }
 
-#if !defined(_WIN32) && !defined(SIZEOF_SIZE_T)
-#error Cannot determine word size
 #endif
 
-#if SIZEOF_SIZE_T == 8 || defined(_WIN64)
-
-#define EMIT_BITS(code, size) { \
-  CHECKBUF47() \
-  PUT_BITS(code, size) \
-}
-
-#define EMIT_CODE(code, size) { \
-  temp2 &= (((JLONG)1) << nbits) - 1; \
-  CHECKBUF31() \
-  PUT_BITS(code, size) \
-  PUT_BITS(temp2, nbits) \
+/* Fill the bit buffer to capacity with the leading bits from code, then output
+ * the bit buffer and put the remaining bits from code into the bit buffer.
+ */
+#define PUT_AND_FLUSH(code, size) { \
+  put_buffer = (put_buffer << (size + free_bits)) | (code >> -free_bits); \
+  FLUSH() \
+  free_bits += BIT_BUF_SIZE; \
+  put_buffer = code; \
 }
 
-#else
-
-#define EMIT_BITS(code, size) { \
-  PUT_BITS(code, size) \
-  CHECKBUF15() \
+/* Insert code into the bit buffer and output the bit buffer if needed.
+ * NOTE: We can't flush with free_bits == 0, since the left shift in
+ * PUT_AND_FLUSH() would have undefined behavior.
+ */
+#define PUT_BITS(code, size) { \
+  free_bits -= size; \
+  if (free_bits < 0) \
+    PUT_AND_FLUSH(code, size) \
+  else \
+    put_buffer = (put_buffer << size) | code; \
 }
 
-#define EMIT_CODE(code, size) { \
-  temp2 &= (((JLONG)1) << nbits) - 1; \
-  PUT_BITS(code, size) \
-  CHECKBUF15() \
-  PUT_BITS(temp2, nbits) \
-  CHECKBUF15() \
+#define PUT_CODE(code, size) { \
+  temp &= (((JLONG)1) << nbits) - 1; \
+  temp |= code << nbits; \
+  nbits += size; \
+  PUT_BITS(temp, nbits) \
 }
 
-#endif
-
 
 /* Although it is exceedingly rare, it is possible for a Huffman-encoded
  * coefficient block to be larger than the 128-byte unencoded block.  For each
@@ -444,6 +473,7 @@ dump_buffer(working_state *state)
 
 #define STORE_BUFFER() { \
   if (localbuf) { \
+    size_t bytes, bytestocopy; \
     bytes = buffer - _buffer; \
     buffer = _buffer; \
     while (bytes > 0) { \
@@ -466,20 +496,46 @@ dump_buffer(working_state *state)
 LOCAL(boolean)
 flush_bits(working_state *state)
 {
-  JOCTET _buffer[BUFSIZE], *buffer;
-  size_t put_buffer;  int put_bits;
-  size_t bytes, bytestocopy;  int localbuf = 0;
+  JOCTET _buffer[BUFSIZE], *buffer, temp;
+  simd_bit_buf_type put_buffer;  int put_bits;
+  int localbuf = 0;
+
+  if (state->simd) {
+#if defined(__aarch64__) && !defined(NEON_INTRINSICS)
+    put_bits = state->cur.free_bits;
+#else
+    put_bits = SIMD_BIT_BUF_SIZE - state->cur.free_bits;
+#endif
+    put_buffer = state->cur.put_buffer.simd;
+  } else {
+    put_bits = BIT_BUF_SIZE - state->cur.free_bits;
+    put_buffer = state->cur.put_buffer.c;
+  }
 
-  put_buffer = state->cur.put_buffer;
-  put_bits = state->cur.put_bits;
   LOAD_BUFFER()
 
-  /* fill any partial byte with ones */
-  PUT_BITS(0x7F, 7)
-  while (put_bits >= 8) EMIT_BYTE()
+  while (put_bits >= 8) {
+    put_bits -= 8;
+    temp = (JOCTET)(put_buffer >> put_bits);
+    EMIT_BYTE(temp)
+  }
+  if (put_bits) {
+    /* fill partial byte with ones */
+    temp = (JOCTET)((put_buffer << (8 - put_bits)) | (0xFF >> put_bits));
+    EMIT_BYTE(temp)
+  }
 
-  state->cur.put_buffer = 0;    /* and reset bit-buffer to empty */
-  state->cur.put_bits = 0;
+  if (state->simd) {                    /* and reset bit buffer to empty */
+    state->cur.put_buffer.simd = 0;
+#if defined(__aarch64__) && !defined(NEON_INTRINSICS)
+    state->cur.free_bits = 0;
+#else
+    state->cur.free_bits = SIMD_BIT_BUF_SIZE;
+#endif
+  } else {
+    state->cur.put_buffer.c = 0;
+    state->cur.free_bits = BIT_BUF_SIZE;
+  }
   STORE_BUFFER()
 
   return TRUE;
@@ -493,7 +549,7 @@ encode_one_block_simd(working_state *state, JCOEFPTR block, int last_dc_val,
                       c_derived_tbl *dctbl, c_derived_tbl *actbl)
 {
   JOCTET _buffer[BUFSIZE], *buffer;
-  size_t bytes, bytestocopy;  int localbuf = 0;
+  int localbuf = 0;
 
   LOAD_BUFFER()
 
@@ -509,53 +565,41 @@ LOCAL(boolean)
 encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
                  c_derived_tbl *dctbl, c_derived_tbl *actbl)
 {
-  int temp, temp2, temp3;
-  int nbits;
-  int r, code, size;
+  int temp, nbits, free_bits;
+  bit_buf_type put_buffer;
   JOCTET _buffer[BUFSIZE], *buffer;
-  size_t put_buffer;  int put_bits;
-  int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0];
-  size_t bytes, bytestocopy;  int localbuf = 0;
+  int localbuf = 0;
 
-  put_buffer = state->cur.put_buffer;
-  put_bits = state->cur.put_bits;
+  free_bits = state->cur.free_bits;
+  put_buffer = state->cur.put_buffer.c;
   LOAD_BUFFER()
 
   /* Encode the DC coefficient difference per section F.1.2.1 */
 
-  temp = temp2 = block[0] - last_dc_val;
+  temp = block[0] - last_dc_val;
 
   /* This is a well-known technique for obtaining the absolute value without a
    * branch.  It is derived from an assembly language technique presented in
    * "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by
-   * Agner Fog.
+   * Agner Fog.  This code assumes we are on a two's complement machine.
    */
-  temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
-  temp ^= temp3;
-  temp -= temp3;
-
-  /* For a negative input, want temp2 = bitwise complement of abs(input) */
-  /* This code assumes we are on a two's complement machine */
-  temp2 += temp3;
+  nbits = temp >> (CHAR_BIT * sizeof(int) - 1);
+  temp += nbits;
+  nbits ^= temp;
 
   /* Find the number of bits needed for the magnitude of the coefficient */
-  nbits = JPEG_NBITS(temp);
-
-  /* Emit the Huffman-coded symbol for the number of bits */
-  code = dctbl->ehufco[nbits];
-  size = dctbl->ehufsi[nbits];
-  EMIT_BITS(code, size)
+  nbits = JPEG_NBITS(nbits);
 
-  /* Mask off any extra bits in code */
-  temp2 &= (((JLONG)1) << nbits) - 1;
-
-  /* Emit that number of bits of the value, if positive, */
-  /* or the complement of its magnitude, if negative. */
-  EMIT_BITS(temp2, nbits)
+  /* Emit the Huffman-coded symbol for the number of bits.
+   * Emit that number of bits of the value, if positive,
+   * or the complement of its magnitude, if negative.
+   */
+  PUT_CODE(dctbl->ehufco[nbits], dctbl->ehufsi[nbits])
 
   /* Encode the AC coefficients per section F.1.2.2 */
 
-  r = 0;                        /* r = run length of zeros */
+  {
+    int r = 0;                  /* r = run length of zeros */
 
 /* Manually unroll the k loop to eliminate the counter variable.  This
  * improves performance greatly on systems with a limited number of
@@ -563,51 +607,46 @@ encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
  */
 #define kloop(jpeg_natural_order_of_k) { \
   if ((temp = block[jpeg_natural_order_of_k]) == 0) { \
-    r++; \
+    r += 16; \
   } else { \
-    temp2 = temp; \
     /* Branch-less absolute value, bitwise complement, etc., same as above */ \
-    temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); \
-    temp ^= temp3; \
-    temp -= temp3; \
-    temp2 += temp3; \
-    nbits = JPEG_NBITS_NONZERO(temp); \
+    nbits = temp >> (CHAR_BIT * sizeof(int) - 1); \
+    temp += nbits; \
+    nbits ^= temp; \
+    nbits = JPEG_NBITS_NONZERO(nbits); \
     /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
-    while (r > 15) { \
-      EMIT_BITS(code_0xf0, size_0xf0) \
-      r -= 16; \
+    while (r >= 16 * 16) { \
+      r -= 16 * 16; \
+      PUT_BITS(actbl->ehufco[0xf0], actbl->ehufsi[0xf0]) \
     } \
     /* Emit Huffman symbol for run length / number of bits */ \
-    temp3 = (r << 4) + nbits; \
-    code = actbl->ehufco[temp3]; \
-    size = actbl->ehufsi[temp3]; \
-    EMIT_CODE(code, size) \
+    r += nbits; \
+    PUT_CODE(actbl->ehufco[r], actbl->ehufsi[r]) \
     r = 0; \
   } \
 }
 
-  /* One iteration for each value in jpeg_natural_order[] */
-  kloop(1);   kloop(8);   kloop(16);  kloop(9);   kloop(2);   kloop(3);
-  kloop(10);  kloop(17);  kloop(24);  kloop(32);  kloop(25);  kloop(18);
-  kloop(11);  kloop(4);   kloop(5);   kloop(12);  kloop(19);  kloop(26);
-  kloop(33);  kloop(40);  kloop(48);  kloop(41);  kloop(34);  kloop(27);
-  kloop(20);  kloop(13);  kloop(6);   kloop(7);   kloop(14);  kloop(21);
-  kloop(28);  kloop(35);  kloop(42);  kloop(49);  kloop(56);  kloop(57);
-  kloop(50);  kloop(43);  kloop(36);  kloop(29);  kloop(22);  kloop(15);
-  kloop(23);  kloop(30);  kloop(37);  kloop(44);  kloop(51);  kloop(58);
-  kloop(59);  kloop(52);  kloop(45);  kloop(38);  kloop(31);  kloop(39);
-  kloop(46);  kloop(53);  kloop(60);  kloop(61);  kloop(54);  kloop(47);
-  kloop(55);  kloop(62);  kloop(63);
-
-  /* If the last coef(s) were zero, emit an end-of-block code */
-  if (r > 0) {
-    code = actbl->ehufco[0];
-    size = actbl->ehufsi[0];
-    EMIT_BITS(code, size)
+    /* One iteration for each value in jpeg_natural_order[] */
+    kloop(1);   kloop(8);   kloop(16);  kloop(9);   kloop(2);   kloop(3);
+    kloop(10);  kloop(17);  kloop(24);  kloop(32);  kloop(25);  kloop(18);
+    kloop(11);  kloop(4);   kloop(5);   kloop(12);  kloop(19);  kloop(26);
+    kloop(33);  kloop(40);  kloop(48);  kloop(41);  kloop(34);  kloop(27);
+    kloop(20);  kloop(13);  kloop(6);   kloop(7);   kloop(14);  kloop(21);
+    kloop(28);  kloop(35);  kloop(42);  kloop(49);  kloop(56);  kloop(57);
+    kloop(50);  kloop(43);  kloop(36);  kloop(29);  kloop(22);  kloop(15);
+    kloop(23);  kloop(30);  kloop(37);  kloop(44);  kloop(51);  kloop(58);
+    kloop(59);  kloop(52);  kloop(45);  kloop(38);  kloop(31);  kloop(39);
+    kloop(46);  kloop(53);  kloop(60);  kloop(61);  kloop(54);  kloop(47);
+    kloop(55);  kloop(62);  kloop(63);
+
+    /* If the last coef(s) were zero, emit an end-of-block code */
+    if (r > 0) {
+      PUT_BITS(actbl->ehufco[0], actbl->ehufsi[0])
+    }
   }
 
-  state->cur.put_buffer = put_buffer;
-  state->cur.put_bits = put_bits;
+  state->cur.put_buffer.c = put_buffer;
+  state->cur.free_bits = free_bits;
   STORE_BUFFER()
 
   return TRUE;
@@ -654,8 +693,9 @@ encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
   /* Load up working state */
   state.next_output_byte = cinfo->dest->next_output_byte;
   state.free_in_buffer = cinfo->dest->free_in_buffer;
-  ASSIGN_STATE(state.cur, entropy->saved);
+  state.cur = entropy->saved;
   state.cinfo = cinfo;
+  state.simd = entropy->simd;
 
   /* Emit restart marker if needed */
   if (cinfo->restart_interval) {
@@ -694,7 +734,7 @@ encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
   /* Completed MCU, so update state */
   cinfo->dest->next_output_byte = state.next_output_byte;
   cinfo->dest->free_in_buffer = state.free_in_buffer;
-  ASSIGN_STATE(entropy->saved, state.cur);
+  entropy->saved = state.cur;
 
   /* Update restart-interval state too */
   if (cinfo->restart_interval) {
@@ -723,8 +763,9 @@ finish_pass_huff(j_compress_ptr cinfo)
   /* Load up working state ... flush_bits needs it */
   state.next_output_byte = cinfo->dest->next_output_byte;
   state.free_in_buffer = cinfo->dest->free_in_buffer;
-  ASSIGN_STATE(state.cur, entropy->saved);
+  state.cur = entropy->saved;
   state.cinfo = cinfo;
+  state.simd = entropy->simd;
 
   /* Flush out the last data */
   if (!flush_bits(&state))
@@ -733,7 +774,7 @@ finish_pass_huff(j_compress_ptr cinfo)
   /* Update state */
   cinfo->dest->next_output_byte = state.next_output_byte;
   cinfo->dest->free_in_buffer = state.free_in_buffer;
-  ASSIGN_STATE(entropy->saved, state.cur);
+  entropy->saved = state.cur;
 }
 
 
diff --git a/src/3rdparty/libjpeg/src/jconfig.h b/src/3rdparty/libjpeg/src/jconfig.h
index 74a74f754c..fa82022edc 100644
--- a/src/3rdparty/libjpeg/src/jconfig.h
+++ b/src/3rdparty/libjpeg/src/jconfig.h
@@ -2,9 +2,9 @@
 
 #define JPEG_LIB_VERSION 80
 
-#define LIBJPEG_TURBO_VERSION 2.0.6
+#define LIBJPEG_TURBO_VERSION 2.1.0
 
-#define LIBJPEG_TURBO_VERSION_NUMBER 2000006
+#define LIBJPEG_TURBO_VERSION_NUMBER 2001000
 
 #define C_ARITH_CODING_SUPPORTED 1
 
diff --git a/src/3rdparty/libjpeg/src/jconfig.h.in b/src/3rdparty/libjpeg/src/jconfig.h.in
index 18a69a4814..d4284d97b8 100644
--- a/src/3rdparty/libjpeg/src/jconfig.h.in
+++ b/src/3rdparty/libjpeg/src/jconfig.h.in
@@ -61,11 +61,6 @@
    unsigned. */
 #cmakedefine RIGHT_SHIFT_IS_UNSIGNED 1
 
-/* Define to 1 if type `char' is unsigned and you are not using gcc.  */
-#ifndef __CHAR_UNSIGNED__
-  #cmakedefine __CHAR_UNSIGNED__ 1
-#endif
-
 /* Define to empty if `const' does not conform to ANSI C. */
 /* #undef const */
 
diff --git a/src/3rdparty/libjpeg/jconfigint.h b/src/3rdparty/libjpeg/src/jconfigint.h
index 40d7748e10..cfcac904c5 100644
--- a/src/3rdparty/libjpeg/jconfigint.h
+++ b/src/3rdparty/libjpeg/src/jconfigint.h
@@ -8,7 +8,7 @@
 
 #define PACKAGE_NAME "libjpeg-turbo"
 
-#define VERSION "2.0.6"
+#define VERSION "2.1.0"
 
 #if SIZE_MAX == 0xffffffff
 #define SIZEOF_SIZE_T 4
diff --git a/src/3rdparty/libjpeg/src/jcphuff.c b/src/3rdparty/libjpeg/src/jcphuff.c
index a8b94bed84..9bf96124b4 100644
--- a/src/3rdparty/libjpeg/src/jcphuff.c
+++ b/src/3rdparty/libjpeg/src/jcphuff.c
@@ -4,8 +4,9 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2011, 2015, 2018, D. R. Commander.
+ * Copyright (C) 2011, 2015, 2018, 2021, D. R. Commander.
  * Copyright (C) 2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2020, Arm Limited.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -52,14 +53,19 @@
  */
 
 /* NOTE: Both GCC and Clang define __GNUC__ */
-#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
+#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
+    defined(_M_ARM) || defined(_M_ARM64)
 #if !defined(__thumb__) || defined(__thumb2__)
 #define USE_CLZ_INTRINSIC
 #endif
 #endif
 
 #ifdef USE_CLZ_INTRINSIC
+#if defined(_MSC_VER) && !defined(__clang__)
+#define JPEG_NBITS_NONZERO(x)  (32 - _CountLeadingZeros(x))
+#else
 #define JPEG_NBITS_NONZERO(x)  (32 - __builtin_clz(x))
+#endif
 #define JPEG_NBITS(x)          (x ? JPEG_NBITS_NONZERO(x) : 0)
 #else
 #include "jpeg_nbits_table.h"
@@ -169,24 +175,26 @@ INLINE
 METHODDEF(int)
 count_zeroes(size_t *x)
 {
-  int result;
 #if defined(HAVE_BUILTIN_CTZL)
+  int result;
   result = __builtin_ctzl(*x);
   *x >>= result;
 #elif defined(HAVE_BITSCANFORWARD64)
+  unsigned long result;
   _BitScanForward64(&result, *x);
   *x >>= result;
 #elif defined(HAVE_BITSCANFORWARD)
+  unsigned long result;
   _BitScanForward(&result, *x);
   *x >>= result;
 #else
-  result = 0;
+  int result = 0;
   while ((*x & 1) == 0) {
     ++result;
     *x >>= 1;
   }
 #endif
-  return result;
+  return (int)result;
 }
 
 
@@ -860,7 +868,7 @@ encode_mcu_AC_refine_prepare(const JCOEF *block,
 
 #define ENCODE_COEFS_AC_REFINE(label) { \
   while (zerobits) { \
-    int idx = count_zeroes(&zerobits); \
+    idx = count_zeroes(&zerobits); \
     r += idx; \
     cabsvalue += idx; \
     signbits >>= idx; \
@@ -917,7 +925,7 @@ METHODDEF(boolean)
 encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
-  register int temp, r;
+  register int temp, r, idx;
   char *BR_buffer;
   unsigned int BR;
   int Sl = cinfo->Se - cinfo->Ss + 1;
@@ -968,7 +976,7 @@ encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 
   if (zerobits) {
     int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue);
-    int idx = count_zeroes(&zerobits);
+    idx = count_zeroes(&zerobits);
     signbits >>= idx;
     idx += diff;
     r += idx;
diff --git a/src/3rdparty/libjpeg/src/jcsample.c b/src/3rdparty/libjpeg/src/jcsample.c
index bd27b84e06..e8515ebf0f 100644
--- a/src/3rdparty/libjpeg/src/jcsample.c
+++ b/src/3rdparty/libjpeg/src/jcsample.c
@@ -6,7 +6,7 @@
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2014, MIPS Technologies, Inc., California.
- * Copyright (C) 2015, D. R. Commander.
+ * Copyright (C) 2015, 2019, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -103,7 +103,7 @@ expand_right_edge(JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols,
   if (numcols > 0) {
     for (row = 0; row < num_rows; row++) {
       ptr = image_data[row] + input_cols;
-      pixval = ptr[-1];         /* don't need GETJSAMPLE() here */
+      pixval = ptr[-1];
       for (count = numcols; count > 0; count--)
         *ptr++ = pixval;
     }
@@ -174,7 +174,7 @@ int_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
       for (v = 0; v < v_expand; v++) {
         inptr = input_data[inrow + v] + outcol_h;
         for (h = 0; h < h_expand; h++) {
-          outvalue += (JLONG)GETJSAMPLE(*inptr++);
+          outvalue += (JLONG)(*inptr++);
         }
       }
       *outptr++ = (JSAMPLE)((outvalue + numpix2) / numpix);
@@ -237,8 +237,7 @@ h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
     inptr = input_data[outrow];
     bias = 0;                   /* bias = 0,1,0,1,... for successive samples */
     for (outcol = 0; outcol < output_cols; outcol++) {
-      *outptr++ =
-        (JSAMPLE)((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1]) + bias) >> 1);
+      *outptr++ = (JSAMPLE)((inptr[0] + inptr[1] + bias) >> 1);
       bias ^= 1;                /* 0=>1, 1=>0 */
       inptr += 2;
     }
@@ -277,8 +276,7 @@ h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
     bias = 1;                   /* bias = 1,2,1,2,... for successive samples */
     for (outcol = 0; outcol < output_cols; outcol++) {
       *outptr++ =
-        (JSAMPLE)((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-                   GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]) + bias) >> 2);
+        (JSAMPLE)((inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1] + bias) >> 2);
       bias ^= 3;                /* 1=>2, 2=>1 */
       inptr0 += 2;  inptr1 += 2;
     }
@@ -337,33 +335,25 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
     below_ptr = input_data[inrow + 2];
 
     /* Special case for first column: pretend column -1 is same as column 0 */
-    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-                GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
-    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-               GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-               GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
-               GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
+    membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
+    neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
+               inptr0[0] + inptr0[2] + inptr1[0] + inptr1[2];
     neighsum += neighsum;
-    neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) +
-                GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
+    neighsum += above_ptr[0] + above_ptr[2] + below_ptr[0] + below_ptr[2];
     membersum = membersum * memberscale + neighsum * neighscale;
     *outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
     inptr0 += 2;  inptr1 += 2;  above_ptr += 2;  below_ptr += 2;
 
     for (colctr = output_cols - 2; colctr > 0; colctr--) {
       /* sum of pixels directly mapped to this output element */
-      membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-                  GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+      membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
       /* sum of edge-neighbor pixels */
-      neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-                 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-                 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
-                 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
+      neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
+                 inptr0[-1] + inptr0[2] + inptr1[-1] + inptr1[2];
       /* The edge-neighbors count twice as much as corner-neighbors */
       neighsum += neighsum;
       /* Add in the corner-neighbors */
-      neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) +
-                  GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
+      neighsum += above_ptr[-1] + above_ptr[2] + below_ptr[-1] + below_ptr[2];
       /* form final output scaled up by 2^16 */
       membersum = membersum * memberscale + neighsum * neighscale;
       /* round, descale and output it */
@@ -372,15 +362,11 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
     }
 
     /* Special case for last column */
-    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-                GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
-    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-               GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-               GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
-               GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
+    membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
+    neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
+               inptr0[-1] + inptr0[1] + inptr1[-1] + inptr1[1];
     neighsum += neighsum;
-    neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) +
-                GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
+    neighsum += above_ptr[-1] + above_ptr[1] + below_ptr[-1] + below_ptr[1];
     membersum = membersum * memberscale + neighsum * neighscale;
     *outptr = (JSAMPLE)((membersum + 32768) >> 16);
 
@@ -429,21 +415,18 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
     below_ptr = input_data[outrow + 1];
 
     /* Special case for first column */
-    colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
-             GETJSAMPLE(*inptr);
-    membersum = GETJSAMPLE(*inptr++);
-    nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
-                 GETJSAMPLE(*inptr);
+    colsum = (*above_ptr++) + (*below_ptr++) + inptr[0];
+    membersum = *inptr++;
+    nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0];
     neighsum = colsum + (colsum - membersum) + nextcolsum;
     membersum = membersum * memberscale + neighsum * neighscale;
     *outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
     lastcolsum = colsum;  colsum = nextcolsum;
 
     for (colctr = output_cols - 2; colctr > 0; colctr--) {
-      membersum = GETJSAMPLE(*inptr++);
+      membersum = *inptr++;
       above_ptr++;  below_ptr++;
-      nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
-                   GETJSAMPLE(*inptr);
+      nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0];
       neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
       membersum = membersum * memberscale + neighsum * neighscale;
       *outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
@@ -451,7 +434,7 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
     }
 
     /* Special case for last column */
-    membersum = GETJSAMPLE(*inptr);
+    membersum = *inptr;
     neighsum = lastcolsum + (colsum - membersum) + colsum;
     membersum = membersum * memberscale + neighsum * neighscale;
     *outptr = (JSAMPLE)((membersum + 32768) >> 16);
diff --git a/src/3rdparty/libjpeg/src/jdapistd.c b/src/3rdparty/libjpeg/src/jdapistd.c
index 38bd1110d9..695a620099 100644
--- a/src/3rdparty/libjpeg/src/jdapistd.c
+++ b/src/3rdparty/libjpeg/src/jdapistd.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2015-2018, 2020, D. R. Commander.
+ * Copyright (C) 2010, 2015-2020, D. R. Commander.
  * Copyright (C) 2015, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -319,6 +319,8 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
 {
   JDIMENSION n;
   my_master_ptr master = (my_master_ptr)cinfo->master;
+  JSAMPLE dummy_sample[1] = { 0 };
+  JSAMPROW dummy_row = dummy_sample;
   JSAMPARRAY scanlines = NULL;
   void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                          JDIMENSION input_row, JSAMPARRAY output_buf,
@@ -329,6 +331,10 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
   if (cinfo->cconvert && cinfo->cconvert->color_convert) {
     color_convert = cinfo->cconvert->color_convert;
     cinfo->cconvert->color_convert = noop_convert;
+    /* This just prevents UBSan from complaining about adding 0 to a NULL
+     * pointer.  The pointer isn't actually used.
+     */
+    scanlines = &dummy_row;
   }
 
   if (cinfo->cquantize && cinfo->cquantize->color_quantize) {
@@ -532,6 +538,8 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
          * decoded coefficients.  This is ~5% faster for large subsets, but
          * it's tough to tell a difference for smaller images.
          */
+        if (!cinfo->entropy->insufficient_data)
+          cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row;
         (*cinfo->entropy->decode_mcu) (cinfo, NULL);
       }
     }
diff --git a/src/3rdparty/libjpeg/src/jdarith.c b/src/3rdparty/libjpeg/src/jdarith.c
index 6002481e24..7f0d3a785c 100644
--- a/src/3rdparty/libjpeg/src/jdarith.c
+++ b/src/3rdparty/libjpeg/src/jdarith.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Developed 1997-2015 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015-2018, D. R. Commander.
+ * Copyright (C) 2015-2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -80,7 +80,7 @@ get_byte(j_decompress_ptr cinfo)
     if (!(*src->fill_input_buffer) (cinfo))
       ERREXIT(cinfo, JERR_CANT_SUSPEND);
   src->bytes_in_buffer--;
-  return GETJOCTET(*src->next_input_byte++);
+  return *src->next_input_byte++;
 }
 
 
@@ -665,8 +665,16 @@ bad:
     for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
       int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
       int *coef_bit_ptr = &cinfo->coef_bits[cindex][0];
+      int *prev_coef_bit_ptr =
+        &cinfo->coef_bits[cindex + cinfo->num_components][0];
       if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
         WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+      for (coefi = MIN(cinfo->Ss, 1); coefi <= MAX(cinfo->Se, 9); coefi++) {
+        if (cinfo->input_scan_number > 1)
+          prev_coef_bit_ptr[coefi] = coef_bit_ptr[coefi];
+        else
+          prev_coef_bit_ptr[coefi] = 0;
+      }
       for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
         int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
         if (cinfo->Ah != expected)
@@ -727,6 +735,7 @@ bad:
   entropy->c = 0;
   entropy->a = 0;
   entropy->ct = -16;    /* force reading 2 initial bytes to fill C */
+  entropy->pub.insufficient_data = FALSE;
 
   /* Initialize restart counter */
   entropy->restarts_to_go = cinfo->restart_interval;
@@ -763,7 +772,7 @@ jinit_arith_decoder(j_decompress_ptr cinfo)
     int *coef_bit_ptr, ci;
     cinfo->coef_bits = (int (*)[DCTSIZE2])
       (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                  cinfo->num_components * DCTSIZE2 *
+                                  cinfo->num_components * 2 * DCTSIZE2 *
                                   sizeof(int));
     coef_bit_ptr = &cinfo->coef_bits[0][0];
     for (ci = 0; ci < cinfo->num_components; ci++)
diff --git a/src/3rdparty/libjpeg/src/jdcoefct.c b/src/3rdparty/libjpeg/src/jdcoefct.c
index 2ba6aa11e4..15e6cded62 100644
--- a/src/3rdparty/libjpeg/src/jdcoefct.c
+++ b/src/3rdparty/libjpeg/src/jdcoefct.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2010, 2015-2016, D. R. Commander.
+ * Copyright (C) 2010, 2015-2016, 2019-2020, D. R. Commander.
  * Copyright (C) 2015, 2020, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -102,6 +102,8 @@ decompress_onepass(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
       /* Try to fetch an MCU.  Entropy decoder expects buffer to be zeroed. */
       jzero_far((void *)coef->MCU_buffer[0],
                 (size_t)(cinfo->blocks_in_MCU * sizeof(JBLOCK)));
+      if (!cinfo->entropy->insufficient_data)
+        cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row;
       if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
         /* Suspension forced; update state counters and exit */
         coef->MCU_vert_offset = yoffset;
@@ -227,6 +229,8 @@ consume_data(j_decompress_ptr cinfo)
           }
         }
       }
+      if (!cinfo->entropy->insufficient_data)
+        cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row;
       /* Try to fetch the MCU. */
       if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
         /* Suspension forced; update state counters and exit */
@@ -326,19 +330,22 @@ decompress_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
 #ifdef BLOCK_SMOOTHING_SUPPORTED
 
 /*
- * This code applies interblock smoothing as described by section K.8
- * of the JPEG standard: the first 5 AC coefficients are estimated from
- * the DC values of a DCT block and its 8 neighboring blocks.
+ * This code applies interblock smoothing; the first 9 AC coefficients are
+ * estimated from the DC values of a DCT block and its 24 neighboring blocks.
  * We apply smoothing only for progressive JPEG decoding, and only if
  * the coefficients it can estimate are not yet known to full precision.
  */
 
-/* Natural-order array positions of the first 5 zigzag-order coefficients */
+/* Natural-order array positions of the first 9 zigzag-order coefficients */
 #define Q01_POS  1
 #define Q10_POS  8
 #define Q20_POS  16
 #define Q11_POS  9
 #define Q02_POS  2
+#define Q03_POS  3
+#define Q12_POS  10
+#define Q21_POS  17
+#define Q30_POS  24
 
 /*
  * Determine whether block smoothing is applicable and safe.
@@ -356,8 +363,8 @@ smoothing_ok(j_decompress_ptr cinfo)
   int ci, coefi;
   jpeg_component_info *compptr;
   JQUANT_TBL *qtable;
-  int *coef_bits;
-  int *coef_bits_latch;
+  int *coef_bits, *prev_coef_bits;
+  int *coef_bits_latch, *prev_coef_bits_latch;
 
   if (!cinfo->progressive_mode || cinfo->coef_bits == NULL)
     return FALSE;
@@ -366,34 +373,47 @@ smoothing_ok(j_decompress_ptr cinfo)
   if (coef->coef_bits_latch == NULL)
     coef->coef_bits_latch = (int *)
       (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                  cinfo->num_components *
+                                  cinfo->num_components * 2 *
                                   (SAVED_COEFS * sizeof(int)));
   coef_bits_latch = coef->coef_bits_latch;
+  prev_coef_bits_latch =
+    &coef->coef_bits_latch[cinfo->num_components * SAVED_COEFS];
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
     /* All components' quantization values must already be latched. */
     if ((qtable = compptr->quant_table) == NULL)
       return FALSE;
-    /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */
+    /* Verify DC & first 9 AC quantizers are nonzero to avoid zero-divide. */
     if (qtable->quantval[0] == 0 ||
         qtable->quantval[Q01_POS] == 0 ||
         qtable->quantval[Q10_POS] == 0 ||
         qtable->quantval[Q20_POS] == 0 ||
         qtable->quantval[Q11_POS] == 0 ||
-        qtable->quantval[Q02_POS] == 0)
+        qtable->quantval[Q02_POS] == 0 ||
+        qtable->quantval[Q03_POS] == 0 ||
+        qtable->quantval[Q12_POS] == 0 ||
+        qtable->quantval[Q21_POS] == 0 ||
+        qtable->quantval[Q30_POS] == 0)
       return FALSE;
     /* DC values must be at least partly known for all components. */
     coef_bits = cinfo->coef_bits[ci];
+    prev_coef_bits = cinfo->coef_bits[ci + cinfo->num_components];
     if (coef_bits[0] < 0)
       return FALSE;
+    coef_bits_latch[0] = coef_bits[0];
     /* Block smoothing is helpful if some AC coefficients remain inaccurate. */
-    for (coefi = 1; coefi <= 5; coefi++) {
+    for (coefi = 1; coefi < SAVED_COEFS; coefi++) {
+      if (cinfo->input_scan_number > 1)
+        prev_coef_bits_latch[coefi] = prev_coef_bits[coefi];
+      else
+        prev_coef_bits_latch[coefi] = -1;
       coef_bits_latch[coefi] = coef_bits[coefi];
       if (coef_bits[coefi] != 0)
         smoothing_useful = TRUE;
     }
     coef_bits_latch += SAVED_COEFS;
+    prev_coef_bits_latch += SAVED_COEFS;
   }
 
   return smoothing_useful;
@@ -412,17 +432,20 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
   JDIMENSION block_num, last_block_column;
   int ci, block_row, block_rows, access_rows;
   JBLOCKARRAY buffer;
-  JBLOCKROW buffer_ptr, prev_block_row, next_block_row;
+  JBLOCKROW buffer_ptr, prev_prev_block_row, prev_block_row;
+  JBLOCKROW next_block_row, next_next_block_row;
   JSAMPARRAY output_ptr;
   JDIMENSION output_col;
   jpeg_component_info *compptr;
   inverse_DCT_method_ptr inverse_DCT;
-  boolean first_row, last_row;
+  boolean change_dc;
   JCOEF *workspace;
   int *coef_bits;
   JQUANT_TBL *quanttbl;
-  JLONG Q00, Q01, Q02, Q10, Q11, Q20, num;
-  int DC1, DC2, DC3, DC4, DC5, DC6, DC7, DC8, DC9;
+  JLONG Q00, Q01, Q02, Q03 = 0, Q10, Q11, Q12 = 0, Q20, Q21 = 0, Q30 = 0, num;
+  int DC01, DC02, DC03, DC04, DC05, DC06, DC07, DC08, DC09, DC10, DC11, DC12,
+      DC13, DC14, DC15, DC16, DC17, DC18, DC19, DC20, DC21, DC22, DC23, DC24,
+      DC25;
   int Al, pred;
 
   /* Keep a local variable to avoid looking it up more than once */
@@ -434,10 +457,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
     if (cinfo->input_scan_number == cinfo->output_scan_number) {
       /* If input is working on current scan, we ordinarily want it to
        * have completed the current row.  But if input scan is DC,
-       * we want it to keep one row ahead so that next block row's DC
+       * we want it to keep two rows ahead so that next two block rows' DC
        * values are up to date.
        */
-      JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0;
+      JDIMENSION delta = (cinfo->Ss == 0) ? 2 : 0;
       if (cinfo->input_iMCU_row > cinfo->output_iMCU_row + delta)
         break;
     }
@@ -452,34 +475,53 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
     if (!compptr->component_needed)
       continue;
     /* Count non-dummy DCT block rows in this iMCU row. */
-    if (cinfo->output_iMCU_row < last_iMCU_row) {
+    if (cinfo->output_iMCU_row < last_iMCU_row - 1) {
+      block_rows = compptr->v_samp_factor;
+      access_rows = block_rows * 3; /* this and next two iMCU rows */
+    } else if (cinfo->output_iMCU_row < last_iMCU_row) {
       block_rows = compptr->v_samp_factor;
       access_rows = block_rows * 2; /* this and next iMCU row */
-      last_row = FALSE;
     } else {
       /* NB: can't use last_row_height here; it is input-side-dependent! */
       block_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
       if (block_rows == 0) block_rows = compptr->v_samp_factor;
       access_rows = block_rows; /* this iMCU row only */
-      last_row = TRUE;
     }
     /* Align the virtual buffer for this component. */
-    if (cinfo->output_iMCU_row > 0) {
-      access_rows += compptr->v_samp_factor; /* prior iMCU row too */
+    if (cinfo->output_iMCU_row > 1) {
+      access_rows += 2 * compptr->v_samp_factor; /* prior two iMCU rows too */
+      buffer = (*cinfo->mem->access_virt_barray)
+        ((j_common_ptr)cinfo, coef->whole_image[ci],
+         (cinfo->output_iMCU_row - 2) * compptr->v_samp_factor,
+         (JDIMENSION)access_rows, FALSE);
+      buffer += 2 * compptr->v_samp_factor; /* point to current iMCU row */
+    } else if (cinfo->output_iMCU_row > 0) {
       buffer = (*cinfo->mem->access_virt_barray)
         ((j_common_ptr)cinfo, coef->whole_image[ci],
          (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
          (JDIMENSION)access_rows, FALSE);
       buffer += compptr->v_samp_factor; /* point to current iMCU row */
-      first_row = FALSE;
     } else {
       buffer = (*cinfo->mem->access_virt_barray)
         ((j_common_ptr)cinfo, coef->whole_image[ci],
          (JDIMENSION)0, (JDIMENSION)access_rows, FALSE);
-      first_row = TRUE;
     }
-    /* Fetch component-dependent info */
-    coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
+    /* Fetch component-dependent info.
+     * If the current scan is incomplete, then we use the component-dependent
+     * info from the previous scan.
+     */
+    if (cinfo->output_iMCU_row > cinfo->master->last_good_iMCU_row)
+      coef_bits =
+        coef->coef_bits_latch + ((ci + cinfo->num_components) * SAVED_COEFS);
+    else
+      coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
+
+    /* We only do DC interpolation if no AC coefficient data is available. */
+    change_dc =
+      coef_bits[1] == -1 && coef_bits[2] == -1 && coef_bits[3] == -1 &&
+      coef_bits[4] == -1 && coef_bits[5] == -1 && coef_bits[6] == -1 &&
+      coef_bits[7] == -1 && coef_bits[8] == -1 && coef_bits[9] == -1;
+
     quanttbl = compptr->quant_table;
     Q00 = quanttbl->quantval[0];
     Q01 = quanttbl->quantval[Q01_POS];
@@ -487,27 +529,51 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
     Q20 = quanttbl->quantval[Q20_POS];
     Q11 = quanttbl->quantval[Q11_POS];
     Q02 = quanttbl->quantval[Q02_POS];
+    if (change_dc) {
+      Q03 = quanttbl->quantval[Q03_POS];
+      Q12 = quanttbl->quantval[Q12_POS];
+      Q21 = quanttbl->quantval[Q21_POS];
+      Q30 = quanttbl->quantval[Q30_POS];
+    }
     inverse_DCT = cinfo->idct->inverse_DCT[ci];
     output_ptr = output_buf[ci];
     /* Loop over all DCT blocks to be processed. */
     for (block_row = 0; block_row < block_rows; block_row++) {
       buffer_ptr = buffer[block_row] + cinfo->master->first_MCU_col[ci];
-      if (first_row && block_row == 0)
+
+      if (block_row > 0 || cinfo->output_iMCU_row > 0)
+        prev_block_row =
+          buffer[block_row - 1] + cinfo->master->first_MCU_col[ci];
+      else
         prev_block_row = buffer_ptr;
+
+      if (block_row > 1 || cinfo->output_iMCU_row > 1)
+        prev_prev_block_row =
+          buffer[block_row - 2] + cinfo->master->first_MCU_col[ci];
+      else
+        prev_prev_block_row = prev_block_row;
+
+      if (block_row < block_rows - 1 || cinfo->output_iMCU_row < last_iMCU_row)
+        next_block_row =
+          buffer[block_row + 1] + cinfo->master->first_MCU_col[ci];
       else
-        prev_block_row = buffer[block_row - 1] +
-                         cinfo->master->first_MCU_col[ci];
-      if (last_row && block_row == block_rows - 1)
         next_block_row = buffer_ptr;
+
+      if (block_row < block_rows - 2 ||
+          cinfo->output_iMCU_row < last_iMCU_row - 1)
+        next_next_block_row =
+          buffer[block_row + 2] + cinfo->master->first_MCU_col[ci];
       else
-        next_block_row = buffer[block_row + 1] +
-                         cinfo->master->first_MCU_col[ci];
+        next_next_block_row = next_block_row;
+
       /* We fetch the surrounding DC values using a sliding-register approach.
-       * Initialize all nine here so as to do the right thing on narrow pics.
+       * Initialize all 25 here so as to do the right thing on narrow pics.
        */
-      DC1 = DC2 = DC3 = (int)prev_block_row[0][0];
-      DC4 = DC5 = DC6 = (int)buffer_ptr[0][0];
-      DC7 = DC8 = DC9 = (int)next_block_row[0][0];
+      DC01 = DC02 = DC03 = DC04 = DC05 = (int)prev_prev_block_row[0][0];
+      DC06 = DC07 = DC08 = DC09 = DC10 = (int)prev_block_row[0][0];
+      DC11 = DC12 = DC13 = DC14 = DC15 = (int)buffer_ptr[0][0];
+      DC16 = DC17 = DC18 = DC19 = DC20 = (int)next_block_row[0][0];
+      DC21 = DC22 = DC23 = DC24 = DC25 = (int)next_next_block_row[0][0];
       output_col = 0;
       last_block_column = compptr->width_in_blocks - 1;
       for (block_num = cinfo->master->first_MCU_col[ci];
@@ -515,18 +581,39 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
         /* Fetch current DCT block into workspace so we can modify it. */
         jcopy_block_row(buffer_ptr, (JBLOCKROW)workspace, (JDIMENSION)1);
         /* Update DC values */
-        if (block_num < last_block_column) {
-          DC3 = (int)prev_block_row[1][0];
-          DC6 = (int)buffer_ptr[1][0];
-          DC9 = (int)next_block_row[1][0];
+        if (block_num == cinfo->master->first_MCU_col[ci] &&
+            block_num < last_block_column) {
+          DC04 = (int)prev_prev_block_row[1][0];
+          DC09 = (int)prev_block_row[1][0];
+          DC14 = (int)buffer_ptr[1][0];
+          DC19 = (int)next_block_row[1][0];
+          DC24 = (int)next_next_block_row[1][0];
         }
-        /* Compute coefficient estimates per K.8.
-         * An estimate is applied only if coefficient is still zero,
-         * and is not known to be fully accurate.
+        if (block_num + 1 < last_block_column) {
+          DC05 = (int)prev_prev_block_row[2][0];
+          DC10 = (int)prev_block_row[2][0];
+          DC15 = (int)buffer_ptr[2][0];
+          DC20 = (int)next_block_row[2][0];
+          DC25 = (int)next_next_block_row[2][0];
+        }
+        /* If DC interpolation is enabled, compute coefficient estimates using
+         * a Gaussian-like kernel, keeping the averages of the DC values.
+         *
+         * If DC interpolation is disabled, compute coefficient estimates using
+         * an algorithm similar to the one described in Section K.8 of the JPEG
+         * standard, except applied to a 5x5 window rather than a 3x3 window.
+         *
+         * An estimate is applied only if the coefficient is still zero and is
+         * not known to be fully accurate.
          */
         /* AC01 */
         if ((Al = coef_bits[1]) != 0 && workspace[1] == 0) {
-          num = 36 * Q00 * (DC4 - DC6);
+          num = Q00 * (change_dc ?
+                (-DC01 - DC02 + DC04 + DC05 - 3 * DC06 + 13 * DC07 -
+                 13 * DC09 + 3 * DC10 - 3 * DC11 + 38 * DC12 - 38 * DC14 +
+                 3 * DC15 - 3 * DC16 + 13 * DC17 - 13 * DC19 + 3 * DC20 -
+                 DC21 - DC22 + DC24 + DC25) :
+                (-7 * DC11 + 50 * DC12 - 50 * DC14 + 7 * DC15));
           if (num >= 0) {
             pred = (int)(((Q01 << 7) + num) / (Q01 << 8));
             if (Al > 0 && pred >= (1 << Al))
@@ -541,7 +628,12 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
         }
         /* AC10 */
         if ((Al = coef_bits[2]) != 0 && workspace[8] == 0) {
-          num = 36 * Q00 * (DC2 - DC8);
+          num = Q00 * (change_dc ?
+                (-DC01 - 3 * DC02 - 3 * DC03 - 3 * DC04 - DC05 - DC06 +
+                 13 * DC07 + 38 * DC08 + 13 * DC09 - DC10 + DC16 -
+                 13 * DC17 - 38 * DC18 - 13 * DC19 + DC20 + DC21 +
+                 3 * DC22 + 3 * DC23 + 3 * DC24 + DC25) :
+                (-7 * DC03 + 50 * DC08 - 50 * DC18 + 7 * DC23));
           if (num >= 0) {
             pred = (int)(((Q10 << 7) + num) / (Q10 << 8));
             if (Al > 0 && pred >= (1 << Al))
@@ -556,7 +648,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
         }
         /* AC20 */
         if ((Al = coef_bits[3]) != 0 && workspace[16] == 0) {
-          num = 9 * Q00 * (DC2 + DC8 - 2 * DC5);
+          num = Q00 * (change_dc ?
+                (DC03 + 2 * DC07 + 7 * DC08 + 2 * DC09 - 5 * DC12 - 14 * DC13 -
+                 5 * DC14 + 2 * DC17 + 7 * DC18 + 2 * DC19 + DC23) :
+                (-DC03 + 13 * DC08 - 24 * DC13 + 13 * DC18 - DC23));
           if (num >= 0) {
             pred = (int)(((Q20 << 7) + num) / (Q20 << 8));
             if (Al > 0 && pred >= (1 << Al))
@@ -571,7 +666,11 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
         }
         /* AC11 */
         if ((Al = coef_bits[4]) != 0 && workspace[9] == 0) {
-          num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
+          num = Q00 * (change_dc ?
+                (-DC01 + DC05 + 9 * DC07 - 9 * DC09 - 9 * DC17 +
+                 9 * DC19 + DC21 - DC25) :
+                (DC10 + DC16 - 10 * DC17 + 10 * DC19 - DC02 - DC20 + DC22 -
+                 DC24 + DC04 - DC06 + 10 * DC07 - 10 * DC09));
           if (num >= 0) {
             pred = (int)(((Q11 << 7) + num) / (Q11 << 8));
             if (Al > 0 && pred >= (1 << Al))
@@ -586,7 +685,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
         }
         /* AC02 */
         if ((Al = coef_bits[5]) != 0 && workspace[2] == 0) {
-          num = 9 * Q00 * (DC4 + DC6 - 2 * DC5);
+          num = Q00 * (change_dc ?
+                (2 * DC07 - 5 * DC08 + 2 * DC09 + DC11 + 7 * DC12 - 14 * DC13 +
+                 7 * DC14 + DC15 + 2 * DC17 - 5 * DC18 + 2 * DC19) :
+                (-DC11 + 13 * DC12 - 24 * DC13 + 13 * DC14 - DC15));
           if (num >= 0) {
             pred = (int)(((Q02 << 7) + num) / (Q02 << 8));
             if (Al > 0 && pred >= (1 << Al))
@@ -599,14 +701,96 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
           }
           workspace[2] = (JCOEF)pred;
         }
+        if (change_dc) {
+          /* AC03 */
+          if ((Al = coef_bits[6]) != 0 && workspace[3] == 0) {
+            num = Q00 * (DC07 - DC09 + 2 * DC12 - 2 * DC14 + DC17 - DC19);
+            if (num >= 0) {
+              pred = (int)(((Q03 << 7) + num) / (Q03 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+            } else {
+              pred = (int)(((Q03 << 7) - num) / (Q03 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+              pred = -pred;
+            }
+            workspace[3] = (JCOEF)pred;
+          }
+          /* AC12 */
+          if ((Al = coef_bits[7]) != 0 && workspace[10] == 0) {
+            num = Q00 * (DC07 - 3 * DC08 + DC09 - DC17 + 3 * DC18 - DC19);
+            if (num >= 0) {
+              pred = (int)(((Q12 << 7) + num) / (Q12 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+            } else {
+              pred = (int)(((Q12 << 7) - num) / (Q12 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+              pred = -pred;
+            }
+            workspace[10] = (JCOEF)pred;
+          }
+          /* AC21 */
+          if ((Al = coef_bits[8]) != 0 && workspace[17] == 0) {
+            num = Q00 * (DC07 - DC09 - 3 * DC12 + 3 * DC14 + DC17 - DC19);
+            if (num >= 0) {
+              pred = (int)(((Q21 << 7) + num) / (Q21 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+            } else {
+              pred = (int)(((Q21 << 7) - num) / (Q21 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+              pred = -pred;
+            }
+            workspace[17] = (JCOEF)pred;
+          }
+          /* AC30 */
+          if ((Al = coef_bits[9]) != 0 && workspace[24] == 0) {
+            num = Q00 * (DC07 + 2 * DC08 + DC09 - DC17 - 2 * DC18 - DC19);
+            if (num >= 0) {
+              pred = (int)(((Q30 << 7) + num) / (Q30 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+            } else {
+              pred = (int)(((Q30 << 7) - num) / (Q30 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+              pred = -pred;
+            }
+            workspace[24] = (JCOEF)pred;
+          }
+          /* coef_bits[0] is non-negative.  Otherwise this function would not
+           * be called.
+           */
+          num = Q00 *
+                (-2 * DC01 - 6 * DC02 - 8 * DC03 - 6 * DC04 - 2 * DC05 -
+                 6 * DC06 + 6 * DC07 + 42 * DC08 + 6 * DC09 - 6 * DC10 -
+                 8 * DC11 + 42 * DC12 + 152 * DC13 + 42 * DC14 - 8 * DC15 -
+                 6 * DC16 + 6 * DC17 + 42 * DC18 + 6 * DC19 - 6 * DC20 -
+                 2 * DC21 - 6 * DC22 - 8 * DC23 - 6 * DC24 - 2 * DC25);
+          if (num >= 0) {
+            pred = (int)(((Q00 << 7) + num) / (Q00 << 8));
+          } else {
+            pred = (int)(((Q00 << 7) - num) / (Q00 << 8));
+            pred = -pred;
+          }
+          workspace[0] = (JCOEF)pred;
+        }  /* change_dc */
+
         /* OK, do the IDCT */
         (*inverse_DCT) (cinfo, compptr, (JCOEFPTR)workspace, output_ptr,
                         output_col);
         /* Advance for next column */
-        DC1 = DC2;  DC2 = DC3;
-        DC4 = DC5;  DC5 = DC6;
-        DC7 = DC8;  DC8 = DC9;
-        buffer_ptr++, prev_block_row++, next_block_row++;
+        DC01 = DC02;  DC02 = DC03;  DC03 = DC04;  DC04 = DC05;
+        DC06 = DC07;  DC07 = DC08;  DC08 = DC09;  DC09 = DC10;
+        DC11 = DC12;  DC12 = DC13;  DC13 = DC14;  DC14 = DC15;
+        DC16 = DC17;  DC17 = DC18;  DC18 = DC19;  DC19 = DC20;
+        DC21 = DC22;  DC22 = DC23;  DC23 = DC24;  DC24 = DC25;
+        buffer_ptr++, prev_block_row++, next_block_row++,
+          prev_prev_block_row++, next_next_block_row++;
         output_col += compptr->_DCT_scaled_size;
       }
       output_ptr += compptr->_DCT_scaled_size;
@@ -655,7 +839,7 @@ jinit_d_coef_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
 #ifdef BLOCK_SMOOTHING_SUPPORTED
       /* If block smoothing could be used, need a bigger window */
       if (cinfo->progressive_mode)
-        access_rows *= 3;
+        access_rows *= 5;
 #endif
       coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
         ((j_common_ptr)cinfo, JPOOL_IMAGE, TRUE,
diff --git a/src/3rdparty/libjpeg/src/jdcoefct.h b/src/3rdparty/libjpeg/src/jdcoefct.h
index c4d1943dd4..9a0e780663 100644
--- a/src/3rdparty/libjpeg/src/jdcoefct.h
+++ b/src/3rdparty/libjpeg/src/jdcoefct.h
@@ -5,6 +5,7 @@
  * Copyright (C) 1994-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2020, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  */
@@ -51,7 +52,7 @@ typedef struct {
 #ifdef BLOCK_SMOOTHING_SUPPORTED
   /* When doing block smoothing, we latch coefficient Al values here */
   int *coef_bits_latch;
-#define SAVED_COEFS  6          /* we save coef_bits[0..5] */
+#define SAVED_COEFS  10         /* we save coef_bits[0..9] */
 #endif
 } my_coef_controller;
 
diff --git a/src/3rdparty/libjpeg/src/jdcol565.c b/src/3rdparty/libjpeg/src/jdcol565.c
index 40068ef84f..53c7bd9187 100644
--- a/src/3rdparty/libjpeg/src/jdcol565.c
+++ b/src/3rdparty/libjpeg/src/jdcol565.c
@@ -45,9 +45,9 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     outptr = *output_buf++;
 
     if (PACK_NEED_ALIGNMENT(outptr)) {
-      y  = GETJSAMPLE(*inptr0++);
-      cb = GETJSAMPLE(*inptr1++);
-      cr = GETJSAMPLE(*inptr2++);
+      y  = *inptr0++;
+      cb = *inptr1++;
+      cr = *inptr2++;
       r = range_limit[y + Crrtab[cr]];
       g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
                                             SCALEBITS))];
@@ -58,18 +58,18 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
       num_cols--;
     }
     for (col = 0; col < (num_cols >> 1); col++) {
-      y  = GETJSAMPLE(*inptr0++);
-      cb = GETJSAMPLE(*inptr1++);
-      cr = GETJSAMPLE(*inptr2++);
+      y  = *inptr0++;
+      cb = *inptr1++;
+      cr = *inptr2++;
       r = range_limit[y + Crrtab[cr]];
       g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
                                             SCALEBITS))];
       b = range_limit[y + Cbbtab[cb]];
       rgb = PACK_SHORT_565(r, g, b);
 
-      y  = GETJSAMPLE(*inptr0++);
-      cb = GETJSAMPLE(*inptr1++);
-      cr = GETJSAMPLE(*inptr2++);
+      y  = *inptr0++;
+      cb = *inptr1++;
+      cr = *inptr2++;
       r = range_limit[y + Crrtab[cr]];
       g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
                                             SCALEBITS))];
@@ -80,9 +80,9 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
       outptr += 4;
     }
     if (num_cols & 1) {
-      y  = GETJSAMPLE(*inptr0);
-      cb = GETJSAMPLE(*inptr1);
-      cr = GETJSAMPLE(*inptr2);
+      y  = *inptr0;
+      cb = *inptr1;
+      cr = *inptr2;
       r = range_limit[y + Crrtab[cr]];
       g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
                                             SCALEBITS))];
@@ -125,9 +125,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     input_row++;
     outptr = *output_buf++;
     if (PACK_NEED_ALIGNMENT(outptr)) {
-      y  = GETJSAMPLE(*inptr0++);
-      cb = GETJSAMPLE(*inptr1++);
-      cr = GETJSAMPLE(*inptr2++);
+      y  = *inptr0++;
+      cb = *inptr1++;
+      cr = *inptr2++;
       r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
       g = range_limit[DITHER_565_G(y +
                                    ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@@ -139,9 +139,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
       num_cols--;
     }
     for (col = 0; col < (num_cols >> 1); col++) {
-      y  = GETJSAMPLE(*inptr0++);
-      cb = GETJSAMPLE(*inptr1++);
-      cr = GETJSAMPLE(*inptr2++);
+      y  = *inptr0++;
+      cb = *inptr1++;
+      cr = *inptr2++;
       r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
       g = range_limit[DITHER_565_G(y +
                                    ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@@ -150,9 +150,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
       d0 = DITHER_ROTATE(d0);
       rgb = PACK_SHORT_565(r, g, b);
 
-      y  = GETJSAMPLE(*inptr0++);
-      cb = GETJSAMPLE(*inptr1++);
-      cr = GETJSAMPLE(*inptr2++);
+      y  = *inptr0++;
+      cb = *inptr1++;
+      cr = *inptr2++;
       r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
       g = range_limit[DITHER_565_G(y +
                                    ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@@ -165,9 +165,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
       outptr += 4;
     }
     if (num_cols & 1) {
-      y  = GETJSAMPLE(*inptr0);
-      cb = GETJSAMPLE(*inptr1);
-      cr = GETJSAMPLE(*inptr2);
+      y  = *inptr0;
+      cb = *inptr1;
+      cr = *inptr2;
       r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
       g = range_limit[DITHER_565_G(y +
                                    ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@@ -202,32 +202,32 @@ rgb_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     input_row++;
     outptr = *output_buf++;
     if (PACK_NEED_ALIGNMENT(outptr)) {
-      r = GETJSAMPLE(*inptr0++);
-      g = GETJSAMPLE(*inptr1++);
-      b = GETJSAMPLE(*inptr2++);
+      r = *inptr0++;
+      g = *inptr1++;
+      b = *inptr2++;
       rgb = PACK_SHORT_565(r, g, b);
       *(INT16 *)outptr = (INT16)rgb;
       outptr += 2;
       num_cols--;
     }
     for (col = 0; col < (num_cols >> 1); col++) {
-      r = GETJSAMPLE(*inptr0++);
-      g = GETJSAMPLE(*inptr1++);
-      b = GETJSAMPLE(*inptr2++);
+      r = *inptr0++;
+      g = *inptr1++;
+      b = *inptr2++;
       rgb = PACK_SHORT_565(r, g, b);
 
-      r = GETJSAMPLE(*inptr0++);
-      g = GETJSAMPLE(*inptr1++);
-      b = GETJSAMPLE(*inptr2++);
+      r = *inptr0++;
+      g = *inptr1++;
+      b = *inptr2++;
       rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
 
       WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
       outptr += 4;
     }
     if (num_cols & 1) {
-      r = GETJSAMPLE(*inptr0);
-      g = GETJSAMPLE(*inptr1);
-      b = GETJSAMPLE(*inptr2);
+      r = *inptr0;
+      g = *inptr1;
+      b = *inptr2;
       rgb = PACK_SHORT_565(r, g, b);
       *(INT16 *)outptr = (INT16)rgb;
     }
@@ -259,24 +259,24 @@ rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     input_row++;
     outptr = *output_buf++;
     if (PACK_NEED_ALIGNMENT(outptr)) {
-      r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
-      g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
-      b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
+      r = range_limit[DITHER_565_R(*inptr0++, d0)];
+      g = range_limit[DITHER_565_G(*inptr1++, d0)];
+      b = range_limit[DITHER_565_B(*inptr2++, d0)];
       rgb = PACK_SHORT_565(r, g, b);
       *(INT16 *)outptr = (INT16)rgb;
       outptr += 2;
       num_cols--;
     }
     for (col = 0; col < (num_cols >> 1); col++) {
-      r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
-      g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
-      b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
+      r = range_limit[DITHER_565_R(*inptr0++, d0)];
+      g = range_limit[DITHER_565_G(*inptr1++, d0)];
+      b = range_limit[DITHER_565_B(*inptr2++, d0)];
       d0 = DITHER_ROTATE(d0);
       rgb = PACK_SHORT_565(r, g, b);
 
-      r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
-      g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
-      b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
+      r = range_limit[DITHER_565_R(*inptr0++, d0)];
+      g = range_limit[DITHER_565_G(*inptr1++, d0)];
+      b = range_limit[DITHER_565_B(*inptr2++, d0)];
       d0 = DITHER_ROTATE(d0);
       rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
 
@@ -284,9 +284,9 @@ rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
       outptr += 4;
     }
     if (num_cols & 1) {
-      r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0), d0)];
-      g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1), d0)];
-      b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2), d0)];
+      r = range_limit[DITHER_565_R(*inptr0, d0)];
+      g = range_limit[DITHER_565_G(*inptr1, d0)];
+      b = range_limit[DITHER_565_B(*inptr2, d0)];
       rgb = PACK_SHORT_565(r, g, b);
       *(INT16 *)outptr = (INT16)rgb;
     }
diff --git a/src/3rdparty/libjpeg/src/jdcolext.c b/src/3rdparty/libjpeg/src/jdcolext.c
index 72a5301070..863c7a2fbc 100644
--- a/src/3rdparty/libjpeg/src/jdcolext.c
+++ b/src/3rdparty/libjpeg/src/jdcolext.c
@@ -53,9 +53,9 @@ ycc_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     input_row++;
     outptr = *output_buf++;
     for (col = 0; col < num_cols; col++) {
-      y  = GETJSAMPLE(inptr0[col]);
-      cb = GETJSAMPLE(inptr1[col]);
-      cr = GETJSAMPLE(inptr2[col]);
+      y  = inptr0[col];
+      cb = inptr1[col];
+      cr = inptr2[col];
       /* Range-limiting is essential due to noise introduced by DCT losses. */
       outptr[RGB_RED] =   range_limit[y + Crrtab[cr]];
       outptr[RGB_GREEN] = range_limit[y +
@@ -93,7 +93,6 @@ gray_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     inptr = input_buf[0][input_row++];
     outptr = *output_buf++;
     for (col = 0; col < num_cols; col++) {
-      /* We can dispense with GETJSAMPLE() here */
       outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
       /* Set unused byte to 0xFF so it can be interpreted as an opaque */
       /* alpha channel value */
@@ -128,7 +127,6 @@ rgb_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     input_row++;
     outptr = *output_buf++;
     for (col = 0; col < num_cols; col++) {
-      /* We can dispense with GETJSAMPLE() here */
       outptr[RGB_RED] = inptr0[col];
       outptr[RGB_GREEN] = inptr1[col];
       outptr[RGB_BLUE] = inptr2[col];
diff --git a/src/3rdparty/libjpeg/src/jdcolor.c b/src/3rdparty/libjpeg/src/jdcolor.c
index d3ae40c7da..8da2b4eaf2 100644
--- a/src/3rdparty/libjpeg/src/jdcolor.c
+++ b/src/3rdparty/libjpeg/src/jdcolor.c
@@ -341,9 +341,9 @@ rgb_gray_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     input_row++;
     outptr = *output_buf++;
     for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr0[col]);
-      g = GETJSAMPLE(inptr1[col]);
-      b = GETJSAMPLE(inptr2[col]);
+      r = inptr0[col];
+      g = inptr1[col];
+      b = inptr2[col];
       /* Y */
       outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
                                ctab[b + B_Y_OFF]) >> SCALEBITS);
@@ -550,9 +550,9 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     input_row++;
     outptr = *output_buf++;
     for (col = 0; col < num_cols; col++) {
-      y  = GETJSAMPLE(inptr0[col]);
-      cb = GETJSAMPLE(inptr1[col]);
-      cr = GETJSAMPLE(inptr2[col]);
+      y  = inptr0[col];
+      cb = inptr1[col];
+      cr = inptr2[col];
       /* Range-limiting is essential due to noise introduced by DCT losses. */
       outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];   /* red */
       outptr[1] = range_limit[MAXJSAMPLE - (y +                 /* green */
@@ -560,7 +560,7 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
                                                  SCALEBITS)))];
       outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];   /* blue */
       /* K passes through unchanged */
-      outptr[3] = inptr3[col];  /* don't need GETJSAMPLE here */
+      outptr[3] = inptr3[col];
       outptr += 4;
     }
   }
diff --git a/src/3rdparty/libjpeg/src/jdhuff.c b/src/3rdparty/libjpeg/src/jdhuff.c
index a1128178b0..f786c10547 100644
--- a/src/3rdparty/libjpeg/src/jdhuff.c
+++ b/src/3rdparty/libjpeg/src/jdhuff.c
@@ -5,6 +5,7 @@
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright (C) 2009-2011, 2016, 2018-2019, D. R. Commander.
+ * Copyright (C) 2018, Matthias Räncker.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -39,24 +40,6 @@ typedef struct {
   int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
 } savable_state;
 
-/* This macro is to work around compilers with missing or broken
- * structure assignment.  You'll need to fix this code if you have
- * such a compiler and you change MAX_COMPS_IN_SCAN.
- */
-
-#ifndef NO_STRUCT_ASSIGN
-#define ASSIGN_STATE(dest, src)  ((dest) = (src))
-#else
-#if MAX_COMPS_IN_SCAN == 4
-#define ASSIGN_STATE(dest, src) \
-  ((dest).last_dc_val[0] = (src).last_dc_val[0], \
-   (dest).last_dc_val[1] = (src).last_dc_val[1], \
-   (dest).last_dc_val[2] = (src).last_dc_val[2], \
-   (dest).last_dc_val[3] = (src).last_dc_val[3])
-#endif
-#endif
-
-
 typedef struct {
   struct jpeg_entropy_decoder pub; /* public fields */
 
@@ -325,7 +308,7 @@ jpeg_fill_bit_buffer(bitread_working_state *state,
         bytes_in_buffer = cinfo->src->bytes_in_buffer;
       }
       bytes_in_buffer--;
-      c = GETJOCTET(*next_input_byte++);
+      c = *next_input_byte++;
 
       /* If it's 0xFF, check and discard stuffed zero byte */
       if (c == 0xFF) {
@@ -342,7 +325,7 @@ jpeg_fill_bit_buffer(bitread_working_state *state,
             bytes_in_buffer = cinfo->src->bytes_in_buffer;
           }
           bytes_in_buffer--;
-          c = GETJOCTET(*next_input_byte++);
+          c = *next_input_byte++;
         } while (c == 0xFF);
 
         if (c == 0) {
@@ -405,8 +388,8 @@ no_more_bytes:
 
 #define GET_BYTE { \
   register int c0, c1; \
-  c0 = GETJOCTET(*buffer++); \
-  c1 = GETJOCTET(*buffer); \
+  c0 = *buffer++; \
+  c1 = *buffer; \
   /* Pre-execute most common case */ \
   get_buffer = (get_buffer << 8) | c0; \
   bits_left += 8; \
@@ -423,7 +406,7 @@ no_more_bytes:
   } \
 }
 
-#if SIZEOF_SIZE_T == 8 || defined(_WIN64)
+#if SIZEOF_SIZE_T == 8 || defined(_WIN64) || (defined(__x86_64__) && defined(__ILP32__))
 
 /* Pre-fetch 48 bytes, because the holding register is 64-bit */
 #define FILL_BIT_BUFFER_FAST \
@@ -557,6 +540,12 @@ process_restart(j_decompress_ptr cinfo)
 }
 
 
+#if defined(__has_feature)
+#if __has_feature(undefined_behavior_sanitizer)
+__attribute__((no_sanitize("signed-integer-overflow"),
+               no_sanitize("unsigned-integer-overflow")))
+#endif
+#endif
 LOCAL(boolean)
 decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 {
@@ -568,7 +557,7 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 
   /* Load up working state */
   BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-  ASSIGN_STATE(state, entropy->saved);
+  state = entropy->saved;
 
   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
     JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL;
@@ -589,11 +578,15 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
     if (entropy->dc_needed[blkn]) {
       /* Convert DC difference to actual value, update last_dc_val */
       int ci = cinfo->MCU_membership[blkn];
-      /* This is really just
-       *   s += state.last_dc_val[ci];
-       * It is written this way in order to shut up UBSan.
+      /* Certain malformed JPEG images produce repeated DC coefficient
+       * differences of 2047 or -2047, which causes state.last_dc_val[ci] to
+       * grow until it overflows or underflows a 32-bit signed integer.  This
+       * behavior is, to the best of our understanding, innocuous, and it is
+       * unclear how to work around it without potentially affecting
+       * performance.  Thus, we (hopefully temporarily) suppress UBSan integer
+       * overflow errors for this function.
        */
-      s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]);
+      s += state.last_dc_val[ci];
       state.last_dc_val[ci] = s;
       if (block) {
         /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
@@ -653,7 +646,7 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 
   /* Completed MCU, so update state */
   BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-  ASSIGN_STATE(entropy->saved, state);
+  entropy->saved = state;
   return TRUE;
 }
 
@@ -671,7 +664,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
   /* Load up working state */
   BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
   buffer = (JOCTET *)br_state.next_input_byte;
-  ASSIGN_STATE(state, entropy->saved);
+  state = entropy->saved;
 
   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
     JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL;
@@ -688,7 +681,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 
     if (entropy->dc_needed[blkn]) {
       int ci = cinfo->MCU_membership[blkn];
-      s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]);
+      s += state.last_dc_val[ci];
       state.last_dc_val[ci] = s;
       if (block)
         (*block)[0] = (JCOEF)s;
@@ -740,7 +733,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
   br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte);
   br_state.next_input_byte = buffer;
   BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-  ASSIGN_STATE(entropy->saved, state);
+  entropy->saved = state;
   return TRUE;
 }
 
@@ -795,7 +788,8 @@ use_slow:
   }
 
   /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
 
   return TRUE;
 }
diff --git a/src/3rdparty/libjpeg/src/jdhuff.h b/src/3rdparty/libjpeg/src/jdhuff.h
index 6a8d90f402..cfa0b7f558 100644
--- a/src/3rdparty/libjpeg/src/jdhuff.h
+++ b/src/3rdparty/libjpeg/src/jdhuff.h
@@ -4,7 +4,8 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010-2011, 2015-2016, D. R. Commander.
+ * Copyright (C) 2010-2011, 2015-2016, 2021, D. R. Commander.
+ * Copyright (C) 2018, Matthias Räncker.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -78,6 +79,11 @@ EXTERN(void) jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC,
 typedef size_t bit_buf_type;            /* type of bit-extraction buffer */
 #define BIT_BUF_SIZE  64                /* size of buffer in bits */
 
+#elif defined(__x86_64__) && defined(__ILP32__)
+
+typedef unsigned long long bit_buf_type; /* type of bit-extraction buffer */
+#define BIT_BUF_SIZE  64                 /* size of buffer in bits */
+
 #else
 
 typedef unsigned long bit_buf_type;     /* type of bit-extraction buffer */
@@ -228,7 +234,10 @@ slowlabel: \
       s |= GET_BITS(1); \
       nb++; \
     } \
-    s = htbl->pub->huffval[(int)(s + htbl->valoffset[nb]) & 0xFF]; \
+    if (nb > 16) \
+      s = 0; \
+    else \
+      s = htbl->pub->huffval[(int)(s + htbl->valoffset[nb]) & 0xFF]; \
   }
 
 /* Out-of-line case for Huffman code fetching */
diff --git a/src/3rdparty/libjpeg/src/jdmarker.c b/src/3rdparty/libjpeg/src/jdmarker.c
index c9c7ef6399..b964c3a1a6 100644
--- a/src/3rdparty/libjpeg/src/jdmarker.c
+++ b/src/3rdparty/libjpeg/src/jdmarker.c
@@ -151,7 +151,7 @@ typedef my_marker_reader *my_marker_ptr;
 #define INPUT_BYTE(cinfo, V, action) \
   MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \
             bytes_in_buffer--; \
-            V = GETJOCTET(*next_input_byte++); )
+            V = *next_input_byte++; )
 
 /* As above, but read two bytes interpreted as an unsigned 16-bit integer.
  * V should be declared unsigned int or perhaps JLONG.
@@ -159,10 +159,10 @@ typedef my_marker_reader *my_marker_ptr;
 #define INPUT_2BYTES(cinfo, V, action) \
   MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \
             bytes_in_buffer--; \
-            V = ((unsigned int)GETJOCTET(*next_input_byte++)) << 8; \
+            V = ((unsigned int)(*next_input_byte++)) << 8; \
             MAKE_BYTE_AVAIL(cinfo, action); \
             bytes_in_buffer--; \
-            V += GETJOCTET(*next_input_byte++); )
+            V += *next_input_byte++; )
 
 
 /*
@@ -608,18 +608,18 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
   JLONG totallen = (JLONG)datalen + remaining;
 
   if (datalen >= APP0_DATA_LEN &&
-      GETJOCTET(data[0]) == 0x4A &&
-      GETJOCTET(data[1]) == 0x46 &&
-      GETJOCTET(data[2]) == 0x49 &&
-      GETJOCTET(data[3]) == 0x46 &&
-      GETJOCTET(data[4]) == 0) {
+      data[0] == 0x4A &&
+      data[1] == 0x46 &&
+      data[2] == 0x49 &&
+      data[3] == 0x46 &&
+      data[4] == 0) {
     /* Found JFIF APP0 marker: save info */
     cinfo->saw_JFIF_marker = TRUE;
-    cinfo->JFIF_major_version = GETJOCTET(data[5]);
-    cinfo->JFIF_minor_version = GETJOCTET(data[6]);
-    cinfo->density_unit = GETJOCTET(data[7]);
-    cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]);
-    cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]);
+    cinfo->JFIF_major_version = data[5];
+    cinfo->JFIF_minor_version = data[6];
+    cinfo->density_unit = data[7];
+    cinfo->X_density = (data[8] << 8) + data[9];
+    cinfo->Y_density = (data[10] << 8) + data[11];
     /* Check version.
      * Major version must be 1, anything else signals an incompatible change.
      * (We used to treat this as an error, but now it's a nonfatal warning,
@@ -634,24 +634,22 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
              cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
              cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
     /* Validate thumbnail dimensions and issue appropriate messages */
-    if (GETJOCTET(data[12]) | GETJOCTET(data[13]))
-      TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL,
-               GETJOCTET(data[12]), GETJOCTET(data[13]));
+    if (data[12] | data[13])
+      TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL, data[12], data[13]);
     totallen -= APP0_DATA_LEN;
-    if (totallen !=
-        ((JLONG)GETJOCTET(data[12]) * (JLONG)GETJOCTET(data[13]) * (JLONG)3))
+    if (totallen != ((JLONG)data[12] * (JLONG)data[13] * (JLONG)3))
       TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int)totallen);
   } else if (datalen >= 6 &&
-             GETJOCTET(data[0]) == 0x4A &&
-             GETJOCTET(data[1]) == 0x46 &&
-             GETJOCTET(data[2]) == 0x58 &&
-             GETJOCTET(data[3]) == 0x58 &&
-             GETJOCTET(data[4]) == 0) {
+             data[0] == 0x4A &&
+             data[1] == 0x46 &&
+             data[2] == 0x58 &&
+             data[3] == 0x58 &&
+             data[4] == 0) {
     /* Found JFIF "JFXX" extension APP0 marker */
     /* The library doesn't actually do anything with these,
      * but we try to produce a helpful trace message.
      */
-    switch (GETJOCTET(data[5])) {
+    switch (data[5]) {
     case 0x10:
       TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int)totallen);
       break;
@@ -662,8 +660,7 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
       TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int)totallen);
       break;
     default:
-      TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION,
-               GETJOCTET(data[5]), (int)totallen);
+      TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION, data[5], (int)totallen);
       break;
     }
   } else {
@@ -684,16 +681,16 @@ examine_app14(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
   unsigned int version, flags0, flags1, transform;
 
   if (datalen >= APP14_DATA_LEN &&
-      GETJOCTET(data[0]) == 0x41 &&
-      GETJOCTET(data[1]) == 0x64 &&
-      GETJOCTET(data[2]) == 0x6F &&
-      GETJOCTET(data[3]) == 0x62 &&
-      GETJOCTET(data[4]) == 0x65) {
+      data[0] == 0x41 &&
+      data[1] == 0x64 &&
+      data[2] == 0x6F &&
+      data[3] == 0x62 &&
+      data[4] == 0x65) {
     /* Found Adobe APP14 marker */
-    version = (GETJOCTET(data[5]) << 8) + GETJOCTET(data[6]);
-    flags0 = (GETJOCTET(data[7]) << 8) + GETJOCTET(data[8]);
-    flags1 = (GETJOCTET(data[9]) << 8) + GETJOCTET(data[10]);
-    transform = GETJOCTET(data[11]);
+    version = (data[5] << 8) + data[6];
+    flags0 = (data[7] << 8) + data[8];
+    flags1 = (data[9] << 8) + data[10];
+    transform = data[11];
     TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform);
     cinfo->saw_Adobe_marker = TRUE;
     cinfo->Adobe_transform = (UINT8)transform;
diff --git a/src/3rdparty/libjpeg/src/jdmaster.c b/src/3rdparty/libjpeg/src/jdmaster.c
index b20906438e..cbc8774b1f 100644
--- a/src/3rdparty/libjpeg/src/jdmaster.c
+++ b/src/3rdparty/libjpeg/src/jdmaster.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 2002-2009 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2016, D. R. Commander.
+ * Copyright (C) 2009-2011, 2016, 2019, D. R. Commander.
  * Copyright (C) 2013, Linaro Limited.
  * Copyright (C) 2015, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
@@ -22,7 +22,6 @@
 #include "jpeglib.h"
 #include "jpegcomp.h"
 #include "jdmaster.h"
-#include "jsimd.h"
 
 
 /*
@@ -70,17 +69,6 @@ use_merged_upsample(j_decompress_ptr cinfo)
       cinfo->comp_info[1]._DCT_scaled_size != cinfo->_min_DCT_scaled_size ||
       cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size)
     return FALSE;
-#ifdef WITH_SIMD
-  /* If YCbCr-to-RGB color conversion is SIMD-accelerated but merged upsampling
-     isn't, then disabling merged upsampling is likely to be faster when
-     decompressing YCbCr JPEG images. */
-  if (!jsimd_can_h2v2_merged_upsample() && !jsimd_can_h2v1_merged_upsample() &&
-      jsimd_can_ycc_rgb() && cinfo->jpeg_color_space == JCS_YCbCr &&
-      (cinfo->out_color_space == JCS_RGB ||
-       (cinfo->out_color_space >= JCS_EXT_RGB &&
-        cinfo->out_color_space <= JCS_EXT_ARGB)))
-    return FALSE;
-#endif
   /* ??? also need to test for upsample-time rescaling, when & if supported */
   return TRUE;                  /* by golly, it'll work... */
 #else
@@ -580,6 +568,7 @@ master_selection(j_decompress_ptr cinfo)
    */
   cinfo->master->first_iMCU_col = 0;
   cinfo->master->last_iMCU_col = cinfo->MCUs_per_row - 1;
+  cinfo->master->last_good_iMCU_row = 0;
 
 #ifdef D_MULTISCAN_FILES_SUPPORTED
   /* If jpeg_start_decompress will read the whole file, initialize
diff --git a/src/3rdparty/libjpeg/src/jdmrg565.c b/src/3rdparty/libjpeg/src/jdmrg565.c
index 53f1e16700..980a4e216e 100644
--- a/src/3rdparty/libjpeg/src/jdmrg565.c
+++ b/src/3rdparty/libjpeg/src/jdmrg565.c
@@ -43,20 +43,20 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   /* Loop for each pair of output pixels */
   for (col = cinfo->output_width >> 1; col > 0; col--) {
     /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
+    cb = *inptr1++;
+    cr = *inptr2++;
     cred = Crrtab[cr];
     cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
 
     /* Fetch 2 Y values and emit 2 pixels */
-    y  = GETJSAMPLE(*inptr0++);
+    y  = *inptr0++;
     r = range_limit[y + cred];
     g = range_limit[y + cgreen];
     b = range_limit[y + cblue];
     rgb = PACK_SHORT_565(r, g, b);
 
-    y  = GETJSAMPLE(*inptr0++);
+    y  = *inptr0++;
     r = range_limit[y + cred];
     g = range_limit[y + cgreen];
     b = range_limit[y + cblue];
@@ -68,12 +68,12 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
   /* If image width is odd, do the last output column separately */
   if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
+    cb = *inptr1;
+    cr = *inptr2;
     cred = Crrtab[cr];
     cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr0);
+    y  = *inptr0;
     r = range_limit[y + cred];
     g = range_limit[y + cgreen];
     b = range_limit[y + cblue];
@@ -115,21 +115,21 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
   /* Loop for each pair of output pixels */
   for (col = cinfo->output_width >> 1; col > 0; col--) {
     /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
+    cb = *inptr1++;
+    cr = *inptr2++;
     cred = Crrtab[cr];
     cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
 
     /* Fetch 2 Y values and emit 2 pixels */
-    y  = GETJSAMPLE(*inptr0++);
+    y  = *inptr0++;
     r = range_limit[DITHER_565_R(y + cred, d0)];
     g = range_limit[DITHER_565_G(y + cgreen, d0)];
     b = range_limit[DITHER_565_B(y + cblue, d0)];
     d0 = DITHER_ROTATE(d0);
     rgb = PACK_SHORT_565(r, g, b);
 
-    y  = GETJSAMPLE(*inptr0++);
+    y  = *inptr0++;
     r = range_limit[DITHER_565_R(y + cred, d0)];
     g = range_limit[DITHER_565_G(y + cgreen, d0)];
     b = range_limit[DITHER_565_B(y + cblue, d0)];
@@ -142,12 +142,12 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
 
   /* If image width is odd, do the last output column separately */
   if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
+    cb = *inptr1;
+    cr = *inptr2;
     cred = Crrtab[cr];
     cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr0);
+    y  = *inptr0;
     r = range_limit[DITHER_565_R(y + cred, d0)];
     g = range_limit[DITHER_565_G(y + cgreen, d0)];
     b = range_limit[DITHER_565_B(y + cblue, d0)];
@@ -189,20 +189,20 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   /* Loop for each group of output pixels */
   for (col = cinfo->output_width >> 1; col > 0; col--) {
     /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
+    cb = *inptr1++;
+    cr = *inptr2++;
     cred = Crrtab[cr];
     cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
 
     /* Fetch 4 Y values and emit 4 pixels */
-    y  = GETJSAMPLE(*inptr00++);
+    y  = *inptr00++;
     r = range_limit[y + cred];
     g = range_limit[y + cgreen];
     b = range_limit[y + cblue];
     rgb = PACK_SHORT_565(r, g, b);
 
-    y  = GETJSAMPLE(*inptr00++);
+    y  = *inptr00++;
     r = range_limit[y + cred];
     g = range_limit[y + cgreen];
     b = range_limit[y + cblue];
@@ -211,13 +211,13 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     WRITE_TWO_PIXELS(outptr0, rgb);
     outptr0 += 4;
 
-    y  = GETJSAMPLE(*inptr01++);
+    y  = *inptr01++;
     r = range_limit[y + cred];
     g = range_limit[y + cgreen];
     b = range_limit[y + cblue];
     rgb = PACK_SHORT_565(r, g, b);
 
-    y  = GETJSAMPLE(*inptr01++);
+    y  = *inptr01++;
     r = range_limit[y + cred];
     g = range_limit[y + cgreen];
     b = range_limit[y + cblue];
@@ -229,20 +229,20 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
 
   /* If image width is odd, do the last output column separately */
   if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
+    cb = *inptr1;
+    cr = *inptr2;
     cred = Crrtab[cr];
     cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
 
-    y  = GETJSAMPLE(*inptr00);
+    y  = *inptr00;
     r = range_limit[y + cred];
     g = range_limit[y + cgreen];
     b = range_limit[y + cblue];
     rgb = PACK_SHORT_565(r, g, b);
     *(INT16 *)outptr0 = (INT16)rgb;
 
-    y  = GETJSAMPLE(*inptr01);
+    y  = *inptr01;
     r = range_limit[y + cred];
     g = range_limit[y + cgreen];
     b = range_limit[y + cblue];
@@ -287,21 +287,21 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
   /* Loop for each group of output pixels */
   for (col = cinfo->output_width >> 1; col > 0; col--) {
     /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
+    cb = *inptr1++;
+    cr = *inptr2++;
     cred = Crrtab[cr];
     cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
 
     /* Fetch 4 Y values and emit 4 pixels */
-    y  = GETJSAMPLE(*inptr00++);
+    y  = *inptr00++;
     r = range_limit[DITHER_565_R(y + cred, d0)];
     g = range_limit[DITHER_565_G(y + cgreen, d0)];
     b = range_limit[DITHER_565_B(y + cblue, d0)];
     d0 = DITHER_ROTATE(d0);
     rgb = PACK_SHORT_565(r, g, b);
 
-    y  = GETJSAMPLE(*inptr00++);
+    y  = *inptr00++;
     r = range_limit[DITHER_565_R(y + cred, d0)];
     g = range_limit[DITHER_565_G(y + cgreen, d0)];
     b = range_limit[DITHER_565_B(y + cblue, d0)];
@@ -311,14 +311,14 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
     WRITE_TWO_PIXELS(outptr0, rgb);
     outptr0 += 4;
 
-    y  = GETJSAMPLE(*inptr01++);
+    y  = *inptr01++;
     r = range_limit[DITHER_565_R(y + cred, d1)];
     g = range_limit[DITHER_565_G(y + cgreen, d1)];
     b = range_limit[DITHER_565_B(y + cblue, d1)];
     d1 = DITHER_ROTATE(d1);
     rgb = PACK_SHORT_565(r, g, b);
 
-    y  = GETJSAMPLE(*inptr01++);
+    y  = *inptr01++;
     r = range_limit[DITHER_565_R(y + cred, d1)];
     g = range_limit[DITHER_565_G(y + cgreen, d1)];
     b = range_limit[DITHER_565_B(y + cblue, d1)];
@@ -331,20 +331,20 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
 
   /* If image width is odd, do the last output column separately */
   if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
+    cb = *inptr1;
+    cr = *inptr2;
     cred = Crrtab[cr];
     cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
 
-    y  = GETJSAMPLE(*inptr00);
+    y  = *inptr00;
     r = range_limit[DITHER_565_R(y + cred, d0)];
     g = range_limit[DITHER_565_G(y + cgreen, d0)];
     b = range_limit[DITHER_565_B(y + cblue, d0)];
     rgb = PACK_SHORT_565(r, g, b);
     *(INT16 *)outptr0 = (INT16)rgb;
 
-    y  = GETJSAMPLE(*inptr01);
+    y  = *inptr01;
     r = range_limit[DITHER_565_R(y + cred, d1)];
     g = range_limit[DITHER_565_G(y + cgreen, d1)];
     b = range_limit[DITHER_565_B(y + cblue, d1)];
diff --git a/src/3rdparty/libjpeg/src/jdmrgext.c b/src/3rdparty/libjpeg/src/jdmrgext.c
index c9a44d8219..9bf4f1a307 100644
--- a/src/3rdparty/libjpeg/src/jdmrgext.c
+++ b/src/3rdparty/libjpeg/src/jdmrgext.c
@@ -46,13 +46,13 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   /* Loop for each pair of output pixels */
   for (col = cinfo->output_width >> 1; col > 0; col--) {
     /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
+    cb = *inptr1++;
+    cr = *inptr2++;
     cred = Crrtab[cr];
     cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
     /* Fetch 2 Y values and emit 2 pixels */
-    y  = GETJSAMPLE(*inptr0++);
+    y  = *inptr0++;
     outptr[RGB_RED] =   range_limit[y + cred];
     outptr[RGB_GREEN] = range_limit[y + cgreen];
     outptr[RGB_BLUE] =  range_limit[y + cblue];
@@ -60,7 +60,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     outptr[RGB_ALPHA] = 0xFF;
 #endif
     outptr += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr0++);
+    y  = *inptr0++;
     outptr[RGB_RED] =   range_limit[y + cred];
     outptr[RGB_GREEN] = range_limit[y + cgreen];
     outptr[RGB_BLUE] =  range_limit[y + cblue];
@@ -71,12 +71,12 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   }
   /* If image width is odd, do the last output column separately */
   if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
+    cb = *inptr1;
+    cr = *inptr2;
     cred = Crrtab[cr];
     cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr0);
+    y  = *inptr0;
     outptr[RGB_RED] =   range_limit[y + cred];
     outptr[RGB_GREEN] = range_limit[y + cgreen];
     outptr[RGB_BLUE] =  range_limit[y + cblue];
@@ -120,13 +120,13 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   /* Loop for each group of output pixels */
   for (col = cinfo->output_width >> 1; col > 0; col--) {
     /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
+    cb = *inptr1++;
+    cr = *inptr2++;
     cred = Crrtab[cr];
     cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
     /* Fetch 4 Y values and emit 4 pixels */
-    y  = GETJSAMPLE(*inptr00++);
+    y  = *inptr00++;
     outptr0[RGB_RED] =   range_limit[y + cred];
     outptr0[RGB_GREEN] = range_limit[y + cgreen];
     outptr0[RGB_BLUE] =  range_limit[y + cblue];
@@ -134,7 +134,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     outptr0[RGB_ALPHA] = 0xFF;
 #endif
     outptr0 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr00++);
+    y  = *inptr00++;
     outptr0[RGB_RED] =   range_limit[y + cred];
     outptr0[RGB_GREEN] = range_limit[y + cgreen];
     outptr0[RGB_BLUE] =  range_limit[y + cblue];
@@ -142,7 +142,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     outptr0[RGB_ALPHA] = 0xFF;
 #endif
     outptr0 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr01++);
+    y  = *inptr01++;
     outptr1[RGB_RED] =   range_limit[y + cred];
     outptr1[RGB_GREEN] = range_limit[y + cgreen];
     outptr1[RGB_BLUE] =  range_limit[y + cblue];
@@ -150,7 +150,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
     outptr1[RGB_ALPHA] = 0xFF;
 #endif
     outptr1 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr01++);
+    y  = *inptr01++;
     outptr1[RGB_RED] =   range_limit[y + cred];
     outptr1[RGB_GREEN] = range_limit[y + cgreen];
     outptr1[RGB_BLUE] =  range_limit[y + cblue];
@@ -161,19 +161,19 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
   }
   /* If image width is odd, do the last output column separately */
   if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
+    cb = *inptr1;
+    cr = *inptr2;
     cred = Crrtab[cr];
     cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr00);
+    y  = *inptr00;
     outptr0[RGB_RED] =   range_limit[y + cred];
     outptr0[RGB_GREEN] = range_limit[y + cgreen];
     outptr0[RGB_BLUE] =  range_limit[y + cblue];
 #ifdef RGB_ALPHA
     outptr0[RGB_ALPHA] = 0xFF;
 #endif
-    y  = GETJSAMPLE(*inptr01);
+    y  = *inptr01;
     outptr1[RGB_RED] =   range_limit[y + cred];
     outptr1[RGB_GREEN] = range_limit[y + cgreen];
     outptr1[RGB_BLUE] =  range_limit[y + cblue];
diff --git a/src/3rdparty/libjpeg/src/jdphuff.c b/src/3rdparty/libjpeg/src/jdphuff.c
index 9e82636bbd..c6d82ca14b 100644
--- a/src/3rdparty/libjpeg/src/jdphuff.c
+++ b/src/3rdparty/libjpeg/src/jdphuff.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015-2016, 2018, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018-2021, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -41,25 +41,6 @@ typedef struct {
   int last_dc_val[MAX_COMPS_IN_SCAN];   /* last DC coef for each component */
 } savable_state;
 
-/* This macro is to work around compilers with missing or broken
- * structure assignment.  You'll need to fix this code if you have
- * such a compiler and you change MAX_COMPS_IN_SCAN.
- */
-
-#ifndef NO_STRUCT_ASSIGN
-#define ASSIGN_STATE(dest, src)  ((dest) = (src))
-#else
-#if MAX_COMPS_IN_SCAN == 4
-#define ASSIGN_STATE(dest, src) \
-  ((dest).EOBRUN = (src).EOBRUN, \
-   (dest).last_dc_val[0] = (src).last_dc_val[0], \
-   (dest).last_dc_val[1] = (src).last_dc_val[1], \
-   (dest).last_dc_val[2] = (src).last_dc_val[2], \
-   (dest).last_dc_val[3] = (src).last_dc_val[3])
-#endif
-#endif
-
-
 typedef struct {
   struct jpeg_entropy_decoder pub; /* public fields */
 
@@ -102,7 +83,7 @@ start_pass_phuff_decoder(j_decompress_ptr cinfo)
   boolean is_DC_band, bad;
   int ci, coefi, tbl;
   d_derived_tbl **pdtbl;
-  int *coef_bit_ptr;
+  int *coef_bit_ptr, *prev_coef_bit_ptr;
   jpeg_component_info *compptr;
 
   is_DC_band = (cinfo->Ss == 0);
@@ -143,8 +124,15 @@ start_pass_phuff_decoder(j_decompress_ptr cinfo)
   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
     int cindex = cinfo->cur_comp_info[ci]->component_index;
     coef_bit_ptr = &cinfo->coef_bits[cindex][0];
+    prev_coef_bit_ptr = &cinfo->coef_bits[cindex + cinfo->num_components][0];
     if (!is_DC_band && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
       WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+    for (coefi = MIN(cinfo->Ss, 1); coefi <= MAX(cinfo->Se, 9); coefi++) {
+      if (cinfo->input_scan_number > 1)
+        prev_coef_bit_ptr[coefi] = coef_bit_ptr[coefi];
+      else
+        prev_coef_bit_ptr[coefi] = 0;
+    }
     for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
       int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
       if (cinfo->Ah != expected)
@@ -323,7 +311,7 @@ decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 
     /* Load up working state */
     BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(state, entropy->saved);
+    state = entropy->saved;
 
     /* Outer loop handles each block in the MCU */
 
@@ -356,11 +344,12 @@ decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 
     /* Completed MCU, so update state */
     BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(entropy->saved, state);
+    entropy->saved = state;
   }
 
   /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
 
   return TRUE;
 }
@@ -444,7 +433,8 @@ decode_mcu_AC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
   }
 
   /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
 
   return TRUE;
 }
@@ -495,7 +485,8 @@ decode_mcu_DC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
   BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
 
   /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
 
   return TRUE;
 }
@@ -638,7 +629,8 @@ decode_mcu_AC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
   }
 
   /* Account for restart interval (no-op if not using restarts) */
-  entropy->restarts_to_go--;
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
 
   return TRUE;
 
@@ -676,7 +668,7 @@ jinit_phuff_decoder(j_decompress_ptr cinfo)
   /* Create progression status table */
   cinfo->coef_bits = (int (*)[DCTSIZE2])
     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
-                                cinfo->num_components * DCTSIZE2 *
+                                cinfo->num_components * 2 * DCTSIZE2 *
                                 sizeof(int));
   coef_bit_ptr = &cinfo->coef_bits[0][0];
   for (ci = 0; ci < cinfo->num_components; ci++)
diff --git a/src/3rdparty/libjpeg/src/jdsample.c b/src/3rdparty/libjpeg/src/jdsample.c
index 50a68b3013..eaad72a030 100644
--- a/src/3rdparty/libjpeg/src/jdsample.c
+++ b/src/3rdparty/libjpeg/src/jdsample.c
@@ -8,7 +8,7 @@
  * Copyright (C) 2010, 2015-2016, D. R. Commander.
  * Copyright (C) 2014, MIPS Technologies, Inc., California.
  * Copyright (C) 2015, Google, Inc.
- * Copyright (C) 2019, Arm Limited.
+ * Copyright (C) 2019-2020, Arm Limited.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -177,7 +177,7 @@ int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
     outptr = output_data[outrow];
     outend = outptr + cinfo->output_width;
     while (outptr < outend) {
-      invalue = *inptr++;       /* don't need GETJSAMPLE() here */
+      invalue = *inptr++;
       for (h = h_expand; h > 0; h--) {
         *outptr++ = invalue;
       }
@@ -213,7 +213,7 @@ h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
     outptr = output_data[inrow];
     outend = outptr + cinfo->output_width;
     while (outptr < outend) {
-      invalue = *inptr++;       /* don't need GETJSAMPLE() here */
+      invalue = *inptr++;
       *outptr++ = invalue;
       *outptr++ = invalue;
     }
@@ -242,7 +242,7 @@ h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
     outptr = output_data[outrow];
     outend = outptr + cinfo->output_width;
     while (outptr < outend) {
-      invalue = *inptr++;       /* don't need GETJSAMPLE() here */
+      invalue = *inptr++;
       *outptr++ = invalue;
       *outptr++ = invalue;
     }
@@ -283,20 +283,20 @@ h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
     inptr = input_data[inrow];
     outptr = output_data[inrow];
     /* Special case for first column */
-    invalue = GETJSAMPLE(*inptr++);
+    invalue = *inptr++;
     *outptr++ = (JSAMPLE)invalue;
-    *outptr++ = (JSAMPLE)((invalue * 3 + GETJSAMPLE(*inptr) + 2) >> 2);
+    *outptr++ = (JSAMPLE)((invalue * 3 + inptr[0] + 2) >> 2);
 
     for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
       /* General case: 3/4 * nearer pixel + 1/4 * further pixel */
-      invalue = GETJSAMPLE(*inptr++) * 3;
-      *outptr++ = (JSAMPLE)((invalue + GETJSAMPLE(inptr[-2]) + 1) >> 2);
-      *outptr++ = (JSAMPLE)((invalue + GETJSAMPLE(*inptr) + 2) >> 2);
+      invalue = (*inptr++) * 3;
+      *outptr++ = (JSAMPLE)((invalue + inptr[-2] + 1) >> 2);
+      *outptr++ = (JSAMPLE)((invalue + inptr[0] + 2) >> 2);
     }
 
     /* Special case for last column */
-    invalue = GETJSAMPLE(*inptr);
-    *outptr++ = (JSAMPLE)((invalue * 3 + GETJSAMPLE(inptr[-1]) + 1) >> 2);
+    invalue = *inptr;
+    *outptr++ = (JSAMPLE)((invalue * 3 + inptr[-1] + 1) >> 2);
     *outptr++ = (JSAMPLE)invalue;
   }
 }
@@ -338,7 +338,7 @@ h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
       outptr = output_data[outrow++];
 
       for (colctr = 0; colctr < compptr->downsampled_width; colctr++) {
-        thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+        thiscolsum = (*inptr0++) * 3 + (*inptr1++);
         *outptr++ = (JSAMPLE)((thiscolsum + bias) >> 2);
       }
     }
@@ -381,8 +381,8 @@ h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
       outptr = output_data[outrow++];
 
       /* Special case for first column */
-      thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
-      nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+      thiscolsum = (*inptr0++) * 3 + (*inptr1++);
+      nextcolsum = (*inptr0++) * 3 + (*inptr1++);
       *outptr++ = (JSAMPLE)((thiscolsum * 4 + 8) >> 4);
       *outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
       lastcolsum = thiscolsum;  thiscolsum = nextcolsum;
@@ -390,7 +390,7 @@ h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
       for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
         /* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */
         /* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */
-        nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+        nextcolsum = (*inptr0++) * 3 + (*inptr1++);
         *outptr++ = (JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4);
         *outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
         lastcolsum = thiscolsum;  thiscolsum = nextcolsum;
@@ -477,7 +477,13 @@ jinit_upsampler(j_decompress_ptr cinfo)
     } else if (h_in_group == h_out_group &&
                v_in_group * 2 == v_out_group && do_fancy) {
       /* Non-fancy upsampling is handled by the generic method */
-      upsample->methods[ci] = h1v2_fancy_upsample;
+#if defined(__arm__) || defined(__aarch64__) || \
+    defined(_M_ARM) || defined(_M_ARM64)
+      if (jsimd_can_h1v2_fancy_upsample())
+        upsample->methods[ci] = jsimd_h1v2_fancy_upsample;
+      else
+#endif
+        upsample->methods[ci] = h1v2_fancy_upsample;
       upsample->pub.need_context_rows = TRUE;
     } else if (h_in_group * 2 == h_out_group &&
                v_in_group * 2 == v_out_group) {
diff --git a/src/3rdparty/libjpeg/src/jerror.h b/src/3rdparty/libjpeg/src/jerror.h
index 933a3690fd..4476df2c93 100644
--- a/src/3rdparty/libjpeg/src/jerror.h
+++ b/src/3rdparty/libjpeg/src/jerror.h
@@ -207,6 +207,10 @@ JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
 #endif
 #endif
 JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker")
+#if JPEG_LIB_VERSION < 70
+JMESSAGE(JERR_BAD_DROP_SAMPLING,
+         "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
+#endif
 
 #ifdef JMAKE_ENUM_LIST
 
@@ -252,6 +256,15 @@ JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker")
    (cinfo)->err->msg_parm.i[2] = (p3), \
    (cinfo)->err->msg_parm.i[3] = (p4), \
    (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
+#define ERREXIT6(cinfo, code, p1, p2, p3, p4, p5, p6) \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (cinfo)->err->msg_parm.i[3] = (p4), \
+   (cinfo)->err->msg_parm.i[4] = (p5), \
+   (cinfo)->err->msg_parm.i[5] = (p6), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
 #define ERREXITS(cinfo, code, str) \
   ((cinfo)->err->msg_code = (code), \
    strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
diff --git a/src/3rdparty/libjpeg/src/jidctint.c b/src/3rdparty/libjpeg/src/jidctint.c
index 50f385da33..bb08748019 100644
--- a/src/3rdparty/libjpeg/src/jidctint.c
+++ b/src/3rdparty/libjpeg/src/jidctint.c
@@ -3,7 +3,7 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modification developed 2002-2009 by Guido Vollbeding.
+ * Modification developed 2002-2018 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
  * Copyright (C) 2015, 2020, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
@@ -417,7 +417,7 @@ jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 
 /*
  * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 7x7 output block.
+ * producing a reduced-size 7x7 output block.
  *
  * Optimized algorithm with 12 multiplications in the 1-D kernel.
  * cK represents sqrt(2) * cos(K*pi/14).
@@ -1258,7 +1258,7 @@ jpeg_idct_10x10(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 
 /*
  * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 11x11 output block.
+ * producing an 11x11 output block.
  *
  * Optimized algorithm with 24 multiplications in the 1-D kernel.
  * cK represents sqrt(2) * cos(K*pi/22).
@@ -2398,7 +2398,7 @@ jpeg_idct_16x16(j_decompress_ptr cinfo, jpeg_component_info *compptr,
     tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
     tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
     /* Add fudge factor here for final descale. */
-    tmp0 += 1 << (CONST_BITS - PASS1_BITS - 1);
+    tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1);
 
     z1 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
     tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
diff --git a/src/3rdparty/libjpeg/src/jmorecfg.h b/src/3rdparty/libjpeg/src/jmorecfg.h
index aa29f0f9f1..fb3a9cf411 100644
--- a/src/3rdparty/libjpeg/src/jmorecfg.h
+++ b/src/3rdparty/libjpeg/src/jmorecfg.h
@@ -43,25 +43,11 @@
 
 #if BITS_IN_JSAMPLE == 8
 /* JSAMPLE should be the smallest type that will hold the values 0..255.
- * You can use a signed char by having GETJSAMPLE mask it with 0xFF.
  */
 
-#ifdef HAVE_UNSIGNED_CHAR
-
 typedef unsigned char JSAMPLE;
 #define GETJSAMPLE(value)  ((int)(value))
 
-#else /* not HAVE_UNSIGNED_CHAR */
-
-typedef char JSAMPLE;
-#ifdef __CHAR_UNSIGNED__
-#define GETJSAMPLE(value)  ((int)(value))
-#else
-#define GETJSAMPLE(value)  ((int)(value) & 0xFF)
-#endif /* __CHAR_UNSIGNED__ */
-
-#endif /* HAVE_UNSIGNED_CHAR */
-
 #define MAXJSAMPLE      255
 #define CENTERJSAMPLE   128
 
@@ -97,22 +83,9 @@ typedef short JCOEF;
  * managers, this is also the data type passed to fread/fwrite.
  */
 
-#ifdef HAVE_UNSIGNED_CHAR
-
 typedef unsigned char JOCTET;
 #define GETJOCTET(value)  (value)
 
-#else /* not HAVE_UNSIGNED_CHAR */
-
-typedef char JOCTET;
-#ifdef __CHAR_UNSIGNED__
-#define GETJOCTET(value)  (value)
-#else
-#define GETJOCTET(value)  ((value) & 0xFF)
-#endif /* __CHAR_UNSIGNED__ */
-
-#endif /* HAVE_UNSIGNED_CHAR */
-
 
 /* These typedefs are used for various table entries and so forth.
  * They must be at least as wide as specified; but making them too big
@@ -123,15 +96,7 @@ typedef char JOCTET;
 
 /* UINT8 must hold at least the values 0..255. */
 
-#ifdef HAVE_UNSIGNED_CHAR
 typedef unsigned char UINT8;
-#else /* not HAVE_UNSIGNED_CHAR */
-#ifdef __CHAR_UNSIGNED__
-typedef char UINT8;
-#else /* not __CHAR_UNSIGNED__ */
-typedef short UINT8;
-#endif /* __CHAR_UNSIGNED__ */
-#endif /* HAVE_UNSIGNED_CHAR */
 
 /* UINT16 must hold at least the values 0..65535. */
 
diff --git a/src/3rdparty/libjpeg/src/jpegint.h b/src/3rdparty/libjpeg/src/jpegint.h
index ad36ca8b56..195fbcb9b6 100644
--- a/src/3rdparty/libjpeg/src/jpegint.h
+++ b/src/3rdparty/libjpeg/src/jpegint.h
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 1997-2009 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2015-2016, D. R. Commander.
+ * Copyright (C) 2015-2016, 2019, D. R. Commander.
  * Copyright (C) 2015, Google, Inc.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -158,6 +158,9 @@ struct jpeg_decomp_master {
   JDIMENSION first_MCU_col[MAX_COMPONENTS];
   JDIMENSION last_MCU_col[MAX_COMPONENTS];
   boolean jinit_upsampler_no_alloc;
+
+  /* Last iMCU row that was successfully decoded */
+  JDIMENSION last_good_iMCU_row;
 };
 
 /* Input control module */
diff --git a/src/3rdparty/libjpeg/src/jquant1.c b/src/3rdparty/libjpeg/src/jquant1.c
index 40bbb28cc7..73b83e16e5 100644
--- a/src/3rdparty/libjpeg/src/jquant1.c
+++ b/src/3rdparty/libjpeg/src/jquant1.c
@@ -479,7 +479,7 @@ color_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
     for (col = width; col > 0; col--) {
       pixcode = 0;
       for (ci = 0; ci < nc; ci++) {
-        pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
+        pixcode += colorindex[ci][*ptrin++];
       }
       *ptrout++ = (JSAMPLE)pixcode;
     }
@@ -506,9 +506,9 @@ color_quantize3(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
     ptrin = input_buf[row];
     ptrout = output_buf[row];
     for (col = width; col > 0; col--) {
-      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]);
-      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]);
-      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]);
+      pixcode  = colorindex0[*ptrin++];
+      pixcode += colorindex1[*ptrin++];
+      pixcode += colorindex2[*ptrin++];
       *ptrout++ = (JSAMPLE)pixcode;
     }
   }
@@ -552,7 +552,7 @@ quantize_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
          * required amount of padding.
          */
         *output_ptr +=
-          colorindex_ci[GETJSAMPLE(*input_ptr) + dither[col_index]];
+          colorindex_ci[*input_ptr + dither[col_index]];
         input_ptr += nc;
         output_ptr++;
         col_index = (col_index + 1) & ODITHER_MASK;
@@ -595,12 +595,9 @@ quantize3_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
     col_index = 0;
 
     for (col = width; col > 0; col--) {
-      pixcode  =
-        GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) + dither0[col_index]]);
-      pixcode +=
-        GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) + dither1[col_index]]);
-      pixcode +=
-        GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) + dither2[col_index]]);
+      pixcode  = colorindex0[(*input_ptr++) + dither0[col_index]];
+      pixcode += colorindex1[(*input_ptr++) + dither1[col_index]];
+      pixcode += colorindex2[(*input_ptr++) + dither2[col_index]];
       *output_ptr++ = (JSAMPLE)pixcode;
       col_index = (col_index + 1) & ODITHER_MASK;
     }
@@ -677,15 +674,15 @@ quantize_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
          * The maximum error is +- MAXJSAMPLE; this sets the required size
          * of the range_limit array.
          */
-        cur += GETJSAMPLE(*input_ptr);
-        cur = GETJSAMPLE(range_limit[cur]);
+        cur += *input_ptr;
+        cur = range_limit[cur];
         /* Select output value, accumulate into output code for this pixel */
-        pixcode = GETJSAMPLE(colorindex_ci[cur]);
+        pixcode = colorindex_ci[cur];
         *output_ptr += (JSAMPLE)pixcode;
         /* Compute actual representation error at this pixel */
         /* Note: we can do this even though we don't have the final */
         /* pixel code, because the colormap is orthogonal. */
-        cur -= GETJSAMPLE(colormap_ci[pixcode]);
+        cur -= colormap_ci[pixcode];
         /* Compute error fractions to be propagated to adjacent pixels.
          * Add these into the running sums, and simultaneously shift the
          * next-line error sums left by 1 column.
diff --git a/src/3rdparty/libjpeg/src/jquant2.c b/src/3rdparty/libjpeg/src/jquant2.c
index 6570613bb9..44efb18cad 100644
--- a/src/3rdparty/libjpeg/src/jquant2.c
+++ b/src/3rdparty/libjpeg/src/jquant2.c
@@ -215,9 +215,9 @@ prescan_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
     ptr = input_buf[row];
     for (col = width; col > 0; col--) {
       /* get pixel value and index into the histogram */
-      histp = &histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
-                        [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
-                        [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
+      histp = &histogram[ptr[0] >> C0_SHIFT]
+                        [ptr[1] >> C1_SHIFT]
+                        [ptr[2] >> C2_SHIFT];
       /* increment, check for overflow and undo increment if so. */
       if (++(*histp) <= 0)
         (*histp)--;
@@ -665,7 +665,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
 
   for (i = 0; i < numcolors; i++) {
     /* We compute the squared-c0-distance term, then add in the other two. */
-    x = GETJSAMPLE(cinfo->colormap[0][i]);
+    x = cinfo->colormap[0][i];
     if (x < minc0) {
       tdist = (x - minc0) * C0_SCALE;
       min_dist = tdist * tdist;
@@ -688,7 +688,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
       }
     }
 
-    x = GETJSAMPLE(cinfo->colormap[1][i]);
+    x = cinfo->colormap[1][i];
     if (x < minc1) {
       tdist = (x - minc1) * C1_SCALE;
       min_dist += tdist * tdist;
@@ -710,7 +710,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
       }
     }
 
-    x = GETJSAMPLE(cinfo->colormap[2][i]);
+    x = cinfo->colormap[2][i];
     if (x < minc2) {
       tdist = (x - minc2) * C2_SCALE;
       min_dist += tdist * tdist;
@@ -788,13 +788,13 @@ find_best_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
 #define STEP_C2  ((1 << C2_SHIFT) * C2_SCALE)
 
   for (i = 0; i < numcolors; i++) {
-    icolor = GETJSAMPLE(colorlist[i]);
+    icolor = colorlist[i];
     /* Compute (square of) distance from minc0/c1/c2 to this color */
-    inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE;
+    inc0 = (minc0 - cinfo->colormap[0][icolor]) * C0_SCALE;
     dist0 = inc0 * inc0;
-    inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE;
+    inc1 = (minc1 - cinfo->colormap[1][icolor]) * C1_SCALE;
     dist0 += inc1 * inc1;
-    inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE;
+    inc2 = (minc2 - cinfo->colormap[2][icolor]) * C2_SCALE;
     dist0 += inc2 * inc2;
     /* Form the initial difference increments */
     inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
@@ -879,7 +879,7 @@ fill_inverse_cmap(j_decompress_ptr cinfo, int c0, int c1, int c2)
     for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
       cachep = &histogram[c0 + ic0][c1 + ic1][c2];
       for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
-        *cachep++ = (histcell)(GETJSAMPLE(*cptr++) + 1);
+        *cachep++ = (histcell)((*cptr++) + 1);
       }
     }
   }
@@ -909,9 +909,9 @@ pass2_no_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
     outptr = output_buf[row];
     for (col = width; col > 0; col--) {
       /* get pixel value and index into the cache */
-      c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT;
-      c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT;
-      c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT;
+      c0 = (*inptr++) >> C0_SHIFT;
+      c1 = (*inptr++) >> C1_SHIFT;
+      c2 = (*inptr++) >> C2_SHIFT;
       cachep = &histogram[c0][c1][c2];
       /* If we have not seen this color before, find nearest colormap entry */
       /* and update the cache */
@@ -996,12 +996,12 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
        * The maximum error is +- MAXJSAMPLE (or less with error limiting);
        * this sets the required size of the range_limit array.
        */
-      cur0 += GETJSAMPLE(inptr[0]);
-      cur1 += GETJSAMPLE(inptr[1]);
-      cur2 += GETJSAMPLE(inptr[2]);
-      cur0 = GETJSAMPLE(range_limit[cur0]);
-      cur1 = GETJSAMPLE(range_limit[cur1]);
-      cur2 = GETJSAMPLE(range_limit[cur2]);
+      cur0 += inptr[0];
+      cur1 += inptr[1];
+      cur2 += inptr[2];
+      cur0 = range_limit[cur0];
+      cur1 = range_limit[cur1];
+      cur2 = range_limit[cur2];
       /* Index into the cache with adjusted pixel value */
       cachep =
         &histogram[cur0 >> C0_SHIFT][cur1 >> C1_SHIFT][cur2 >> C2_SHIFT];
@@ -1015,9 +1015,9 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
         register int pixcode = *cachep - 1;
         *outptr = (JSAMPLE)pixcode;
         /* Compute representation error for this pixel */
-        cur0 -= GETJSAMPLE(colormap0[pixcode]);
-        cur1 -= GETJSAMPLE(colormap1[pixcode]);
-        cur2 -= GETJSAMPLE(colormap2[pixcode]);
+        cur0 -= colormap0[pixcode];
+        cur1 -= colormap1[pixcode];
+        cur2 -= colormap2[pixcode];
       }
       /* Compute error fractions to be propagated to adjacent pixels.
        * Add these into the running sums, and simultaneously shift the
diff --git a/src/3rdparty/libjpeg/src/jsimd.h b/src/3rdparty/libjpeg/src/jsimd.h
index 51e2b8c89d..6c203655ef 100644
--- a/src/3rdparty/libjpeg/src/jsimd.h
+++ b/src/3rdparty/libjpeg/src/jsimd.h
@@ -4,6 +4,7 @@
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2011, 2014, D. R. Commander.
  * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2020, Arm Limited.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -75,6 +76,7 @@ EXTERN(void) jsimd_int_upsample(j_decompress_ptr cinfo,
 
 EXTERN(int) jsimd_can_h2v2_fancy_upsample(void);
 EXTERN(int) jsimd_can_h2v1_fancy_upsample(void);
+EXTERN(int) jsimd_can_h1v2_fancy_upsample(void);
 
 EXTERN(void) jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,
                                        jpeg_component_info *compptr,
@@ -84,6 +86,10 @@ EXTERN(void) jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,
                                        jpeg_component_info *compptr,
                                        JSAMPARRAY input_data,
                                        JSAMPARRAY *output_data_ptr);
+EXTERN(void) jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo,
+                                       jpeg_component_info *compptr,
+                                       JSAMPARRAY input_data,
+                                       JSAMPARRAY *output_data_ptr);
 
 EXTERN(int) jsimd_can_h2v2_merged_upsample(void);
 EXTERN(int) jsimd_can_h2v1_merged_upsample(void);
diff --git a/src/3rdparty/libjpeg/src/jsimd_none.c b/src/3rdparty/libjpeg/src/jsimd_none.c
index 3cb6c80f8a..5b38a9fb5c 100644
--- a/src/3rdparty/libjpeg/src/jsimd_none.c
+++ b/src/3rdparty/libjpeg/src/jsimd_none.c
@@ -4,6 +4,7 @@
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2009-2011, 2014, D. R. Commander.
  * Copyright (C) 2015-2016, 2018, Matthieu Darbois.
+ * Copyright (C) 2020, Arm Limited.
  *
  * Based on the x86 SIMD extension for IJG JPEG library,
  * Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -169,6 +170,12 @@ jsimd_can_h2v1_fancy_upsample(void)
   return 0;
 }
 
+GLOBAL(int)
+jsimd_can_h1v2_fancy_upsample(void)
+{
+  return 0;
+}
+
 GLOBAL(void)
 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
@@ -181,6 +188,12 @@ jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
 {
 }
 
+GLOBAL(void)
+jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                          JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
+{
+}
+
 GLOBAL(int)
 jsimd_can_h2v2_merged_upsample(void)
 {
diff --git a/src/3rdparty/libjpeg/src/jversion.h b/src/3rdparty/libjpeg/src/jversion.h
index 4462b94104..2ab534af41 100644
--- a/src/3rdparty/libjpeg/src/jversion.h
+++ b/src/3rdparty/libjpeg/src/jversion.h
@@ -2,9 +2,9 @@
  * jversion.h
  *
  * This file was part of the Independent JPEG Group's software:
- * Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
+ * Copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2012-2020, D. R. Commander.
+ * Copyright (C) 2010, 2012-2021, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -37,9 +37,9 @@
  */
 
 #define JCOPYRIGHT \
-  "Copyright (C) 2009-2020 D. R. Commander\n" \
+  "Copyright (C) 2009-2021 D. R. Commander\n" \
   "Copyright (C) 2015, 2020 Google, Inc.\n" \
-  "Copyright (C) 2019 Arm Limited\n" \
+  "Copyright (C) 2019-2020 Arm Limited\n" \
   "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
   "Copyright (C) 2011-2016 Siarhei Siamashka\n" \
   "Copyright (C) 2015 Intel Corporation\n" \
@@ -48,7 +48,7 @@
   "Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \
   "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
   "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
-  "Copyright (C) 1991-2017 Thomas G. Lane, Guido Vollbeding"
+  "Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding"
 
 #define JCOPYRIGHT_SHORT \
-  "Copyright (C) 1991-2020 The libjpeg-turbo Project and many others"
+  "Copyright (C) 1991-2021 The libjpeg-turbo Project and many others"
diff --git a/src/3rdparty/pcre2/AUTHORS b/src/3rdparty/pcre2/AUTHORS
index f001cb770e..c61b5f3aff 100644
--- a/src/3rdparty/pcre2/AUTHORS
+++ b/src/3rdparty/pcre2/AUTHORS
@@ -8,7 +8,7 @@ Email domain:     gmail.com
 University of Cambridge Computing Service,
 Cambridge, England.
 
-Copyright (c) 1997-2020 University of Cambridge
+Copyright (c) 1997-2021 University of Cambridge
 All rights reserved
 
 
@@ -19,7 +19,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2010-2020 Zoltan Herczeg
+Copyright(c) 2010-2021 Zoltan Herczeg
 All rights reserved.
 
 
@@ -30,7 +30,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2009-2020 Zoltan Herczeg
+Copyright(c) 2009-2021 Zoltan Herczeg
 All rights reserved.
 
 ####
diff --git a/src/3rdparty/pcre2/LICENCE b/src/3rdparty/pcre2/LICENCE
index 155d073127..18684ceaa9 100644
--- a/src/3rdparty/pcre2/LICENCE
+++ b/src/3rdparty/pcre2/LICENCE
@@ -26,7 +26,7 @@ Email domain:     gmail.com
 University of Cambridge Computing Service,
 Cambridge, England.
 
-Copyright (c) 1997-2020 University of Cambridge
+Copyright (c) 1997-2021 University of Cambridge
 All rights reserved.
 
 
@@ -37,7 +37,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu
 
-Copyright(c) 2010-2020 Zoltan Herczeg
+Copyright(c) 2010-2021 Zoltan Herczeg
 All rights reserved.
 
 
@@ -48,7 +48,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu
 
-Copyright(c) 2009-2020 Zoltan Herczeg
+Copyright(c) 2009-2021 Zoltan Herczeg
 All rights reserved.
 
 
diff --git a/src/3rdparty/pcre2/qt_attribution.json b/src/3rdparty/pcre2/qt_attribution.json
index e7abb56762..04bdb6cc6b 100644
--- a/src/3rdparty/pcre2/qt_attribution.json
+++ b/src/3rdparty/pcre2/qt_attribution.json
@@ -7,12 +7,12 @@
 
     "Description": "The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5.",
     "Homepage": "http://www.pcre.org/",
-    "Version": "10.36",
-    "DownloadLocation": "https://ftp.pcre.org/pub/pcre/pcre2-10.36.tar.bz2",
+    "Version": "10.37",
+    "DownloadLocation": "https://ftp.pcre.org/pub/pcre/pcre2-10.37.tar.bz2",
     "License": "BSD 3-clause \"New\" or \"Revised\" License",
     "LicenseId": "BSD-3-Clause",
     "LicenseFile": "LICENCE",
-    "Copyright": "Copyright (c) 1997-2020 University of Cambridge
+    "Copyright": "Copyright (c) 1997-2021 University of Cambridge
 Copyright (c) 2010-2020 Zoltan Herczeg"
 },
 {
@@ -24,11 +24,11 @@ Copyright (c) 2010-2020 Zoltan Herczeg"
     "Path": "src/sljit",
     "Description": "The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5.",
     "Homepage": "http://www.pcre.org/",
-    "Version": "10.36",
-    "DownloadLocation": "https://ftp.pcre.org/pub/pcre/pcre2-10.36.tar.bz2",
+    "Version": "10.37",
+    "DownloadLocation": "https://ftp.pcre.org/pub/pcre/pcre2-10.37.tar.bz2",
     "License": "BSD 2-clause \"Simplified\" License",
     "LicenseId": "BSD-2-Clause",
     "LicenseFile": "LICENCE-SLJIT",
-    "Copyright": "Copyright (c) 2009-2020 Zoltan Herczeg"
+    "Copyright": "Copyright (c) 2009-2021 Zoltan Herczeg"
 }
 ]
diff --git a/src/3rdparty/pcre2/src/pcre2.h b/src/3rdparty/pcre2/src/pcre2.h
index f204ec8180..7ab6b39aeb 100644
--- a/src/3rdparty/pcre2/src/pcre2.h
+++ b/src/3rdparty/pcre2/src/pcre2.h
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */
 
 #define PCRE2_MAJOR           10
-#define PCRE2_MINOR           36
+#define PCRE2_MINOR           37
 #define PCRE2_PRERELEASE      
-#define PCRE2_DATE            2020-12-04
+#define PCRE2_DATE            2021-05-26
 
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE2, the appropriate
diff --git a/src/3rdparty/pcre2/src/pcre2_auto_possess.c b/src/3rdparty/pcre2/src/pcre2_auto_possess.c
index c64cf856d1..e5e0895682 100644
--- a/src/3rdparty/pcre2/src/pcre2_auto_possess.c
+++ b/src/3rdparty/pcre2/src/pcre2_auto_possess.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2020 University of Cambridge
+          New API code Copyright (c) 2016-2021 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -490,6 +490,7 @@ switch(c)
   list[2] = (uint32_t)(end - code);
   return end;
   }
+
 return NULL;    /* Opcode not accepted */
 }
 
@@ -1186,12 +1187,16 @@ for (;;)
     c = *repeat_opcode;
     if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
       {
-      /* end must not be NULL. */
-      end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
+      /* The return from get_chr_property_list() will never be NULL when
+      *code (aka c) is one of the three class opcodes. However, gcc with
+      -fanalyzer notes that a NULL return is possible, and grumbles. Hence we
+      put in a check. */
 
+      end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
       list[1] = (c & 1) == 0;
 
-      if (compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
+      if (end != NULL &&
+          compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
         {
         switch (c)
           {
diff --git a/src/3rdparty/pcre2/src/pcre2_compile.c b/src/3rdparty/pcre2/src/pcre2_compile.c
index e811f12f02..da449ae9ed 100644
--- a/src/3rdparty/pcre2/src/pcre2_compile.c
+++ b/src/3rdparty/pcre2/src/pcre2_compile.c
@@ -1398,32 +1398,47 @@ static BOOL
 read_repeat_counts(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *minp,
   uint32_t *maxp, int *errorcodeptr)
 {
-PCRE2_SPTR p = *ptrptr;
+PCRE2_SPTR p;
 BOOL yield = FALSE;
+BOOL had_comma = FALSE;
 int32_t min = 0;
 int32_t max = REPEAT_UNLIMITED; /* This value is larger than MAX_REPEAT_COUNT */
 
-/* NB read_number() initializes the error code to zero. The only error is for a
-number that is too big. */
+/* Check the syntax */
 
+*errorcodeptr = 0;
+for (p = *ptrptr;; p++)
+  {
+  uint32_t c;
+  if (p >= ptrend) return FALSE;
+  c = *p;
+  if (IS_DIGIT(c)) continue;
+  if (c == CHAR_RIGHT_CURLY_BRACKET) break;
+  if (c == CHAR_COMMA)
+    {
+    if (had_comma) return FALSE;
+    had_comma = TRUE;
+    }
+  else return FALSE;
+  }
+
+/* The only error from read_number() is for a number that is too big. */
+
+p = *ptrptr;
 if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &min, errorcodeptr))
   goto EXIT;
 
-if (p >= ptrend) goto EXIT;
-
 if (*p == CHAR_RIGHT_CURLY_BRACKET)
   {
   p++;
   max = min;
   }
-
 else
   {
-  if (*p++ != CHAR_COMMA || p >= ptrend) goto EXIT;
-  if (*p != CHAR_RIGHT_CURLY_BRACKET)
+  if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
     {
     if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &max,
-        errorcodeptr) || p >= ptrend ||  *p != CHAR_RIGHT_CURLY_BRACKET)
+        errorcodeptr))
       goto EXIT;
     if (max < min)
       {
@@ -1438,11 +1453,10 @@ yield = TRUE;
 if (minp != NULL) *minp = (uint32_t)min;
 if (maxp != NULL) *maxp = (uint32_t)max;
 
-/* Update the pattern pointer on success, or after an error, but not when
-the result is "not a repeat quantifier". */
+/* Update the pattern pointer */
 
 EXIT:
-if (yield || *errorcodeptr != 0) *ptrptr = p;
+*ptrptr = p;
 return yield;
 }
 
@@ -1776,19 +1790,23 @@ else
       {
       oldptr = ptr;
       ptr--;   /* Back to the digit */
-      if (!read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, ERR61, &s,
-          errorcodeptr))
-        break;
 
-      /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x
+      /* As we know we are at a digit, the only possible error from
+      read_number() is a number that is too large to be a group number. In this
+      case we fall through handle this as not a group reference. If we have
+      read a small enough number, check for a back reference.
+
+      \1 to \9 are always back references. \8x and \9x are too; \1x to \7x
       are octal escapes if there are not that many previous captures. */
 
-      if (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount)
+      if (read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, 0, &s, errorcodeptr) &&
+          (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount))
         {
         if (s > (int)MAX_GROUP_NUMBER) *errorcodeptr = ERR61;
           else escape = -s;     /* Indicates a back reference */
         break;
         }
+
       ptr = oldptr;      /* Put the pointer back and fall through */
       }
 
diff --git a/src/3rdparty/pcre2/src/pcre2_jit_compile.c b/src/3rdparty/pcre2/src/pcre2_jit_compile.c
index 1977d28aa5..f3a26aeee0 100644
--- a/src/3rdparty/pcre2/src/pcre2_jit_compile.c
+++ b/src/3rdparty/pcre2/src/pcre2_jit_compile.c
@@ -1226,7 +1226,7 @@ while (cc < ccend)
 return TRUE;
 }
 
-#define EARLY_FAIL_ENHANCE_MAX (1 + 1)
+#define EARLY_FAIL_ENHANCE_MAX (1 + 3)
 
 /*
 start:
@@ -1238,6 +1238,7 @@ return: current number of iterators enhanced with fast fail
 */
 static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth, int start)
 {
+PCRE2_SPTR begin = cc;
 PCRE2_SPTR next_alt;
 PCRE2_SPTR end;
 PCRE2_SPTR accelerated_start;
@@ -1475,31 +1476,19 @@ do
       case OP_CBRA:
       end = cc + GET(cc, 1);
 
-      if (*end == OP_KET && PRIVATE_DATA(end) == 0)
-        {
-        if (*cc == OP_CBRA)
-          {
-          if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
-            break;
-          cc += IMM2_SIZE;
-          }
-
-        cc += 1 + LINK_SIZE;
-        continue;
-        }
-
       fast_forward_allowed = FALSE;
       if (depth >= 4)
         break;
 
       end = bracketend(cc) - (1 + LINK_SIZE);
-      if (*end != OP_KET || PRIVATE_DATA(end) != 0)
-        break;
-
-      if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
+      if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
         break;
 
       count = detect_early_fail(common, cc, private_data_start, depth + 1, count);
+
+      if (PRIVATE_DATA(cc) != 0)
+        common->private_data_ptrs[begin - common->start] = 1;
+
       if (count < EARLY_FAIL_ENHANCE_MAX)
         {
         cc = end + (1 + LINK_SIZE);
@@ -1555,6 +1544,8 @@ do
           return EARLY_FAIL_ENHANCE_MAX;
         }
 
+      /* Cannot be part of a repeat. */
+      common->private_data_ptrs[begin - common->start] = 1;
       count++;
 
       if (count < EARLY_FAIL_ENHANCE_MAX)
@@ -1620,11 +1611,12 @@ sljit_sw length = end - begin;
 sljit_s32 min, max, i;
 
 /* Detect fixed iterations first. */
-if (end[-(1 + LINK_SIZE)] != OP_KET)
+if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
   return FALSE;
 
-/* Already detected repeat. */
-if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
+/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
+ * Skip the check of the second part. */
+if (PRIVATE_DATA(end - LINK_SIZE) == 0)
   return TRUE;
 
 next = end;
@@ -1763,6 +1755,7 @@ while (cc < ccend)
   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
     break;
 
+  /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
   if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
     {
     if (detect_repeat(common, cc))
@@ -1813,6 +1806,7 @@ while (cc < ccend)
 
     case OP_COND:
     /* Might be a hidden SCOND. */
+    common->private_data_ptrs[cc - common->start] = 0;
     alternative = cc + GET(cc, 1);
     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
       {
@@ -13661,10 +13655,12 @@ if (!common->private_data_ptrs)
 memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
 
 private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
-set_private_data_ptrs(common, &private_data_size, ccend);
+
 if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
   detect_early_fail(common, common->start, &private_data_size, 0, 0);
 
+set_private_data_ptrs(common, &private_data_size, ccend);
+
 SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
 
 if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
diff --git a/src/3rdparty/pcre2/src/pcre2_jit_simd_inc.h b/src/3rdparty/pcre2/src/pcre2_jit_simd_inc.h
index 5673d338c0..5fd97b15bd 100644
--- a/src/3rdparty/pcre2/src/pcre2_jit_simd_inc.h
+++ b/src/3rdparty/pcre2/src/pcre2_jit_simd_inc.h
@@ -39,7 +39,29 @@ POSSIBILITY OF SUCH DAMAGE.
 -----------------------------------------------------------------------------
 */
 
-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
+#if !(defined SUPPORT_VALGRIND)
+
+#if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
+     || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X))
+
+typedef enum {
+  vector_compare_match1,
+  vector_compare_match1i,
+  vector_compare_match2,
+} vector_compare_type;
+
+static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
+{
+#if PCRE2_CODE_UNIT_WIDTH == 8
+return 15;
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+return 7;
+#elif PCRE2_CODE_UNIT_WIDTH == 32
+return 3;
+#else
+#error "Unsupported unit width"
+#endif
+}
 
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
 static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
@@ -56,6 +78,10 @@ return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
 }
 #endif
 
+#endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X */
+
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+
 static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
 {
 sljit_u32 value = chr;
@@ -97,13 +123,7 @@ instruction[4] = (sljit_u8)offset;
 sljit_emit_op_custom(compiler, instruction, 5);
 }
 
-typedef enum {
-    sse2_compare_match1,
-    sse2_compare_match1i,
-    sse2_compare_match2,
-} sse2_compare_type;
-
-static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, sse2_compare_type compare_type,
+static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
   int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
 {
 sljit_u8 instruction[4];
@@ -112,11 +132,11 @@ instruction[1] = 0x0f;
 
 SLJIT_ASSERT(step >= 0 && step <= 3);
 
-if (compare_type != sse2_compare_match2)
+if (compare_type != vector_compare_match2)
   {
   if (step == 0)
     {
-    if (compare_type == sse2_compare_match1i)
+    if (compare_type == vector_compare_match1i)
       {
       /* POR xmm1, xmm2/m128 */
       /* instruction[0] = 0x66; */
@@ -185,14 +205,14 @@ switch (step)
 static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
 {
 DEFINE_COMPILER;
+sljit_u8 instruction[8];
 struct sljit_label *start;
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
 struct sljit_label *restart;
 #endif
 struct sljit_jump *quit;
 struct sljit_jump *partial_quit[2];
-sse2_compare_type compare_type = sse2_compare_match1;
-sljit_u8 instruction[8];
+vector_compare_type compare_type = vector_compare_match1;
 sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
 sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
 sljit_s32 data_ind = 0;
@@ -207,12 +227,12 @@ SLJIT_UNUSED_ARG(offset);
 if (char1 != char2)
   {
   bit = char1 ^ char2;
-  compare_type = sse2_compare_match1i;
+  compare_type = vector_compare_match1i;
 
   if (!is_powerof2(bit))
     {
     bit = 0;
-    compare_type = sse2_compare_match2;
+    compare_type = vector_compare_match2;
     }
   }
 
@@ -349,11 +369,11 @@ if (common->utf && offset > 0)
 static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
 {
 DEFINE_COMPILER;
+sljit_u8 instruction[8];
 struct sljit_label *start;
 struct sljit_jump *quit;
 jump_list *not_found = NULL;
-sse2_compare_type compare_type = sse2_compare_match1;
-sljit_u8 instruction[8];
+vector_compare_type compare_type = vector_compare_match1;
 sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
 sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
 sljit_s32 data_ind = 0;
@@ -366,12 +386,12 @@ int i;
 if (char1 != char2)
   {
   bit = char1 ^ char2;
-  compare_type = sse2_compare_match1i;
+  compare_type = vector_compare_match1i;
 
   if (!is_powerof2(bit))
     {
     bit = 0;
-    compare_type = sse2_compare_match2;
+    compare_type = vector_compare_match2;
     }
   }
 
@@ -476,27 +496,15 @@ return not_found;
 
 #ifndef _WIN64
 
-static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
-{
-#if PCRE2_CODE_UNIT_WIDTH == 8
-return 15;
-#elif PCRE2_CODE_UNIT_WIDTH == 16
-return 7;
-#elif PCRE2_CODE_UNIT_WIDTH == 32
-return 3;
-#else
-#error "Unsupported unit width"
-#endif
-}
-
 #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
 
 static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
   PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
 {
 DEFINE_COMPILER;
-sse2_compare_type compare1_type = sse2_compare_match1;
-sse2_compare_type compare2_type = sse2_compare_match1;
+sljit_u8 instruction[8];
+vector_compare_type compare1_type = vector_compare_match1;
+vector_compare_type compare2_type = vector_compare_match1;
 sljit_u32 bit1 = 0;
 sljit_u32 bit2 = 0;
 sljit_u32 diff = IN_UCHARS(offs1 - offs2);
@@ -516,7 +524,6 @@ struct sljit_label *start;
 struct sljit_label *restart;
 #endif
 struct sljit_jump *jump[2];
-sljit_u8 instruction[8];
 int i;
 
 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
@@ -549,13 +556,13 @@ else
   bit1 = char1a ^ char1b;
   if (is_powerof2(bit1))
     {
-    compare1_type = sse2_compare_match1i;
+    compare1_type = vector_compare_match1i;
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
     }
   else
     {
-    compare1_type = sse2_compare_match2;
+    compare1_type = vector_compare_match2;
     bit1 = 0;
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
@@ -578,13 +585,13 @@ else
   bit2 = char2a ^ char2b;
   if (is_powerof2(bit2))
     {
-    compare2_type = sse2_compare_match1i;
+    compare2_type = vector_compare_match1i;
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
     }
   else
     {
-    compare2_type = sse2_compare_match2;
+    compare2_type = vector_compare_match2;
     bit2 = 0;
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
@@ -731,9 +738,6 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
 
 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
 
-if (common->match_end_ptr != 0)
-  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
-
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
 if (common->utf)
   {
@@ -760,7 +764,7 @@ if (common->match_end_ptr != 0)
 
 #undef SSE2_COMPARE_TYPE_INDEX
 
-#endif /* SLJIT_CONFIG_X86 && !SUPPORT_VALGRIND */
+#endif /* SLJIT_CONFIG_X86 */
 
 #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))
 
@@ -1121,3 +1125,743 @@ JUMPHERE(partial_quit);
 }
 
 #endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */
+
+#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#define VECTOR_ELEMENT_SIZE 0
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+#define VECTOR_ELEMENT_SIZE 1
+#elif PCRE2_CODE_UNIT_WIDTH == 32
+#define VECTOR_ELEMENT_SIZE 2
+#else
+#error "Unsupported unit width"
+#endif
+
+static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg,
+  sljit_s32 base_reg, sljit_s32 index_reg)
+{
+sljit_u16 instruction[3];
+
+instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg);
+instruction[1] = (sljit_u16)(base_reg << 12);
+instruction[2] = (sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06));
+
+sljit_emit_op_custom(compiler, instruction, 6);
+}
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+
+static void replicate_imm_vector(struct sljit_compiler *compiler, int step, sljit_s32 dst_vreg,
+  PCRE2_UCHAR chr, sljit_s32 tmp_general_reg)
+{
+sljit_u16 instruction[3];
+
+SLJIT_ASSERT(step >= 0 && step <= 1);
+
+if (chr < 0x7fff)
+  {
+  if (step == 1)
+    return;
+
+  /* VREPI */
+  instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4));
+  instruction[1] = (sljit_u16)chr;
+  instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
+  sljit_emit_op_custom(compiler, instruction, 6);
+  return;
+  }
+
+if (step == 0)
+  {
+  OP1(SLJIT_MOV, tmp_general_reg, 0, SLJIT_IMM, chr);
+
+  /* VLVG */
+  instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(tmp_general_reg));
+  instruction[1] = 0;
+  instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22);
+  sljit_emit_op_custom(compiler, instruction, 6);
+  return;
+  }
+
+/* VREP */
+instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | dst_vreg);
+instruction[1] = 0;
+instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xc << 8) | 0x4d);
+sljit_emit_op_custom(compiler, instruction, 6);
+}
+
+#endif
+
+static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
+  int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
+{
+sljit_u16 instruction[3];
+
+SLJIT_ASSERT(step >= 0 && step <= 2);
+
+if (step == 1)
+  {
+  /* VCEQ */
+  instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
+  instruction[1] = (sljit_u16)(cmp1_ind << 12);
+  instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
+  sljit_emit_op_custom(compiler, instruction, 6);
+  return;
+  }
+
+if (compare_type != vector_compare_match2)
+  {
+  if (step == 0 && compare_type == vector_compare_match1i)
+    {
+    /* VO */
+    instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
+    instruction[1] = (sljit_u16)(cmp2_ind << 12);
+    instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
+    sljit_emit_op_custom(compiler, instruction, 6);
+    }
+  return;
+  }
+
+switch (step)
+  {
+  case 0:
+  /* VCEQ */
+  instruction[0] = (sljit_u16)(0xe700 | (tmp_ind << 4) | dst_ind);
+  instruction[1] = (sljit_u16)(cmp2_ind << 12);
+  instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
+  sljit_emit_op_custom(compiler, instruction, 6);
+  return;
+
+  case 2:
+  /* VO */
+  instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
+  instruction[1] = (sljit_u16)(tmp_ind << 12);
+  instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
+  sljit_emit_op_custom(compiler, instruction, 6);
+  return;
+  }
+}
+
+#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
+
+static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
+{
+DEFINE_COMPILER;
+sljit_u16 instruction[3];
+struct sljit_label *start;
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+struct sljit_label *restart;
+#endif
+struct sljit_jump *quit;
+struct sljit_jump *partial_quit[2];
+vector_compare_type compare_type = vector_compare_match1;
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
+sljit_s32 data_ind = 0;
+sljit_s32 tmp_ind = 1;
+sljit_s32 cmp1_ind = 2;
+sljit_s32 cmp2_ind = 3;
+sljit_s32 zero_ind = 4;
+sljit_u32 bit = 0;
+int i;
+
+SLJIT_UNUSED_ARG(offset);
+
+if (char1 != char2)
+  {
+  bit = char1 ^ char2;
+  compare_type = vector_compare_match1i;
+
+  if (!is_powerof2(bit))
+    {
+    bit = 0;
+    compare_type = vector_compare_match2;
+    }
+  }
+
+partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+if (common->mode == PCRE2_JIT_COMPLETE)
+  add_jump(compiler, &common->failed_match, partial_quit[0]);
+
+/* First part (unaligned start) */
+
+OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
+
+#if PCRE2_CODE_UNIT_WIDTH != 32
+
+/* VREPI */
+instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
+instruction[1] = (sljit_u16)(char1 | bit);
+instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+if (char1 != char2)
+  {
+  /* VREPI */
+  instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
+  instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
+  /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
+  sljit_emit_op_custom(compiler, instruction, 6);
+  }
+
+#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
+
+for (int i = 0; i < 2; i++)
+  {
+  replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1);
+
+  if (char1 != char2)
+    replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1);
+  }
+
+#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
+
+if (compare_type == vector_compare_match2)
+  {
+  /* VREPI */
+  instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
+  instruction[1] = 0;
+  instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
+  sljit_emit_op_custom(compiler, instruction, 6);
+  }
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+restart = LABEL();
+#endif
+
+load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
+
+if (compare_type != vector_compare_match2)
+  {
+  if (compare_type == vector_compare_match1i)
+    fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
+
+  /* VFEE */
+  instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
+  instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
+  instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
+  sljit_emit_op_custom(compiler, instruction, 6);
+  }
+else
+  {
+  for (i = 0; i < 3; i++)
+    fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
+
+  /* VFENE */
+  instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
+  instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
+  instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
+  sljit_emit_op_custom(compiler, instruction, 6);
+  }
+
+/* TODO: use sljit_set_current_flags */
+
+/* VLGVB */
+instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
+instruction[1] = 7;
+instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
+
+OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
+
+/* Second part (aligned) */
+start = LABEL();
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
+
+partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+if (common->mode == PCRE2_JIT_COMPLETE)
+  add_jump(compiler, &common->failed_match, partial_quit[1]);
+
+load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
+
+if (compare_type != vector_compare_match2)
+  {
+  if (compare_type == vector_compare_match1i)
+    fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
+
+  /* VFEE */
+  instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
+  instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
+  instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
+  sljit_emit_op_custom(compiler, instruction, 6);
+  }
+else
+  {
+  for (i = 0; i < 3; i++)
+    fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
+
+  /* VFENE */
+  instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
+  instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
+  instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
+  sljit_emit_op_custom(compiler, instruction, 6);
+  }
+
+/* TODO: use sljit_set_current_flags */
+
+/* VLGVB */
+instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
+instruction[1] = 7;
+instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 16, start);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+
+JUMPHERE(quit);
+
+if (common->mode != PCRE2_JIT_COMPLETE)
+  {
+  JUMPHERE(partial_quit[0]);
+  JUMPHERE(partial_quit[1]);
+  OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
+  CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
+  }
+else
+  add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+if (common->utf && offset > 0)
+  {
+  SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
+
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
+
+  quit = jump_if_utf_char_start(compiler, TMP1);
+
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+  OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
+  JUMPTO(SLJIT_JUMP, restart);
+
+  JUMPHERE(quit);
+  }
+#endif
+}
+
+#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD 1
+
+static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
+{
+DEFINE_COMPILER;
+sljit_u16 instruction[3];
+struct sljit_label *start;
+struct sljit_jump *quit;
+jump_list *not_found = NULL;
+vector_compare_type compare_type = vector_compare_match1;
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
+sljit_s32 tmp3_reg_ind = sljit_get_register_index(TMP3);
+sljit_s32 data_ind = 0;
+sljit_s32 tmp_ind = 1;
+sljit_s32 cmp1_ind = 2;
+sljit_s32 cmp2_ind = 3;
+sljit_s32 zero_ind = 4;
+sljit_u32 bit = 0;
+int i;
+
+if (char1 != char2)
+  {
+  bit = char1 ^ char2;
+  compare_type = vector_compare_match1i;
+
+  if (!is_powerof2(bit))
+    {
+    bit = 0;
+    compare_type = vector_compare_match2;
+    }
+  }
+
+add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
+
+/* First part (unaligned start) */
+
+OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, 16);
+
+#if PCRE2_CODE_UNIT_WIDTH != 32
+
+/* VREPI */
+instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
+instruction[1] = (sljit_u16)(char1 | bit);
+instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+if (char1 != char2)
+  {
+  /* VREPI */
+  instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
+  instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
+  /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
+  sljit_emit_op_custom(compiler, instruction, 6);
+  }
+
+#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
+
+for (int i = 0; i < 2; i++)
+  {
+  replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP3);
+
+  if (char1 != char2)
+    replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP3);
+  }
+
+#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
+
+if (compare_type == vector_compare_match2)
+  {
+  /* VREPI */
+  instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
+  instruction[1] = 0;
+  instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
+  sljit_emit_op_custom(compiler, instruction, 6);
+  }
+
+load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
+
+if (compare_type != vector_compare_match2)
+  {
+  if (compare_type == vector_compare_match1i)
+    fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
+
+  /* VFEE */
+  instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
+  instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
+  instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
+  sljit_emit_op_custom(compiler, instruction, 6);
+  }
+else
+  {
+  for (i = 0; i < 3; i++)
+    fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
+
+  /* VFENE */
+  instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
+  instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
+  instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
+  sljit_emit_op_custom(compiler, instruction, 6);
+  }
+
+/* TODO: use sljit_set_current_flags */
+
+/* VLGVB */
+instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
+instruction[1] = 7;
+instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
+quit = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
+
+OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 16);
+
+/* Second part (aligned) */
+start = LABEL();
+
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 16);
+
+add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
+
+load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
+
+if (compare_type != vector_compare_match2)
+  {
+  if (compare_type == vector_compare_match1i)
+    fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
+
+  /* VFEE */
+  instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
+  instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
+  instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
+  sljit_emit_op_custom(compiler, instruction, 6);
+  }
+else
+  {
+  for (i = 0; i < 3; i++)
+    fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
+
+  /* VFENE */
+  instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
+  instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
+  instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
+  sljit_emit_op_custom(compiler, instruction, 6);
+  }
+
+/* TODO: use sljit_set_current_flags */
+
+/* VLGVB */
+instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
+instruction[1] = 7;
+instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+CMPTO(SLJIT_GREATER_EQUAL, TMP3, 0, SLJIT_IMM, 16, start);
+
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
+
+JUMPHERE(quit);
+add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
+
+return not_found;
+}
+
+#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
+
+static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
+  PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
+{
+DEFINE_COMPILER;
+sljit_u16 instruction[3];
+struct sljit_label *start;
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+struct sljit_label *restart;
+#endif
+struct sljit_jump *quit;
+struct sljit_jump *jump[2];
+vector_compare_type compare1_type = vector_compare_match1;
+vector_compare_type compare2_type = vector_compare_match1;
+sljit_u32 bit1 = 0;
+sljit_u32 bit2 = 0;
+sljit_s32 diff = IN_UCHARS(offs2 - offs1);
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
+sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
+sljit_s32 data1_ind = 0;
+sljit_s32 data2_ind = 1;
+sljit_s32 tmp1_ind = 2;
+sljit_s32 tmp2_ind = 3;
+sljit_s32 cmp1a_ind = 4;
+sljit_s32 cmp1b_ind = 5;
+sljit_s32 cmp2a_ind = 6;
+sljit_s32 cmp2b_ind = 7;
+sljit_s32 zero_ind = 8;
+int i;
+
+SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
+SLJIT_ASSERT(-diff <= (sljit_s32)IN_UCHARS(max_fast_forward_char_pair_offset()));
+SLJIT_ASSERT(tmp1_reg_ind != 0 && tmp2_reg_ind != 0);
+
+if (char1a != char1b)
+  {
+  bit1 = char1a ^ char1b;
+  compare1_type = vector_compare_match1i;
+
+  if (!is_powerof2(bit1))
+    {
+    bit1 = 0;
+    compare1_type = vector_compare_match2;
+    }
+  }
+
+if (char2a != char2b)
+  {
+  bit2 = char2a ^ char2b;
+  compare2_type = vector_compare_match1i;
+
+  if (!is_powerof2(bit2))
+    {
+    bit2 = 0;
+    compare2_type = vector_compare_match2;
+    }
+  }
+
+/* Initialize. */
+if (common->match_end_ptr != 0)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
+  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
+  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
+
+  OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
+  CMOV(SLJIT_LESS, STR_END, TMP1, 0);
+  }
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
+
+#if PCRE2_CODE_UNIT_WIDTH != 32
+
+OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
+
+/* VREPI */
+instruction[0] = (sljit_u16)(0xe700 | (cmp1a_ind << 4));
+instruction[1] = (sljit_u16)(char1a | bit1);
+instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+if (char1a != char1b)
+  {
+  /* VREPI */
+  instruction[0] = (sljit_u16)(0xe700 | (cmp1b_ind << 4));
+  instruction[1] = (sljit_u16)(bit1 != 0 ? bit1 : char1b);
+  /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
+  sljit_emit_op_custom(compiler, instruction, 6);
+  }
+
+/* VREPI */
+instruction[0] = (sljit_u16)(0xe700 | (cmp2a_ind << 4));
+instruction[1] = (sljit_u16)(char2a | bit2);
+/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
+sljit_emit_op_custom(compiler, instruction, 6);
+
+if (char2a != char2b)
+  {
+  /* VREPI */
+  instruction[0] = (sljit_u16)(0xe700 | (cmp2b_ind << 4));
+  instruction[1] = (sljit_u16)(bit2 != 0 ? bit2 : char2b);
+  /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
+  sljit_emit_op_custom(compiler, instruction, 6);
+  }
+
+#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
+
+for (int i = 0; i < 2; i++)
+  {
+  replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1);
+
+  if (char1a != char1b)
+    replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1);
+
+  replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1);
+
+  if (char2a != char2b)
+    replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1);
+  }
+
+OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
+
+#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
+
+/* VREPI */
+instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
+instruction[1] = 0;
+instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+restart = LABEL();
+#endif
+
+jump[0] = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
+load_from_mem_vector(compiler, TRUE, data2_ind, tmp1_reg_ind, 0);
+jump[1] = JUMP(SLJIT_JUMP);
+JUMPHERE(jump[0]);
+load_from_mem_vector(compiler, FALSE, data2_ind, tmp1_reg_ind, 0);
+JUMPHERE(jump[1]);
+
+load_from_mem_vector(compiler, TRUE, data1_ind, str_ptr_reg_ind, 0);
+OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
+
+for (i = 0; i < 3; i++)
+  {
+  fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
+  fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
+  }
+
+/* VN */
+instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
+instruction[1] = (sljit_u16)(data2_ind << 12);
+instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+/* VFENE */
+instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
+instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
+instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+/* TODO: use sljit_set_current_flags */
+
+/* VLGVB */
+instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind);
+instruction[1] = 7;
+instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
+
+OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, diff);
+
+/* Main loop. */
+start = LABEL();
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+load_from_mem_vector(compiler, FALSE, data1_ind, str_ptr_reg_ind, 0);
+load_from_mem_vector(compiler, FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind);
+
+for (i = 0; i < 3; i++)
+  {
+  fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
+  fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
+  }
+
+/* VN */
+instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
+instruction[1] = (sljit_u16)(data2_ind << 12);
+instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+/* VFENE */
+instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
+instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
+instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+/* TODO: use sljit_set_current_flags */
+
+/* VLGVB */
+instruction[0] = (sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind);
+instruction[1] = 7;
+instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+CMPTO(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 16, start);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+
+JUMPHERE(quit);
+
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+if (common->utf)
+  {
+  SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
+
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
+
+  quit = jump_if_utf_char_start(compiler, TMP1);
+
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+  /* TMP1 contains diff. */
+  OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
+  OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
+  JUMPTO(SLJIT_JUMP, restart);
+
+  JUMPHERE(quit);
+  }
+#endif
+
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
+
+if (common->match_end_ptr != 0)
+  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+}
+
+#endif /* SLJIT_CONFIG_S390X */
+
+#endif /* !SUPPORT_VALGRIND */
diff --git a/src/3rdparty/pcre2/src/pcre2_match.c b/src/3rdparty/pcre2/src/pcre2_match.c
index e3f78c2ca3..ed60517131 100644
--- a/src/3rdparty/pcre2/src/pcre2_match.c
+++ b/src/3rdparty/pcre2/src/pcre2_match.c
@@ -818,10 +818,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 
       /* N is now the frame of the recursion; the previous frame is at the
       OP_RECURSE position. Go back there, copying the current subject position
-      and mark, and move on past the OP_RECURSE. */
+      and mark, and the start_match position (\K might have changed it), and
+      then move on past the OP_RECURSE. */
 
       P->eptr = Feptr;
       P->mark = Fmark;
+      P->start_match = Fstart_match;
       F = P;
       Fecode += 1 + LINK_SIZE;
       continue;
diff --git a/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h b/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h
index eb1132db30..ff36e5b7c6 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h
+++ b/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h
@@ -158,6 +158,8 @@ extern "C" {
 #define SLJIT_CONFIG_MIPS_64 1
 #elif defined(__sparc__) || defined(__sparc)
 #define SLJIT_CONFIG_SPARC_32 1
+#elif defined(__s390x__)
+#define SLJIT_CONFIG_S390X 1
 #else
 /* Unsupported architecture */
 #define SLJIT_CONFIG_UNSUPPORTED 1
diff --git a/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c b/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c
index 61a32f23e9..6e5bf78e45 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c
@@ -79,6 +79,7 @@
 */
 
 #ifdef _WIN32
+#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec)
 
 static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
 {
@@ -91,96 +92,108 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
 	VirtualFree(chunk, 0, MEM_RELEASE);
 }
 
-#else
-
-#ifdef __APPLE__
-#ifdef MAP_ANON
-/* Configures TARGET_OS_OSX when appropriate */
-#include <TargetConditionals.h>
-
-#if TARGET_OS_OSX && defined(MAP_JIT)
-#include <sys/utsname.h>
-#endif /* TARGET_OS_OSX && MAP_JIT */
-
-#ifdef MAP_JIT
+#else /* POSIX */
 
+#if defined(__APPLE__) && defined(MAP_JIT)
 /*
    On macOS systems, returns MAP_JIT if it is defined _and_ we're running on a
-   version where it's OK to have more than one JIT block.
+   version where it's OK to have more than one JIT block or where MAP_JIT is
+   required.
    On non-macOS systems, returns MAP_JIT if it is defined.
 */
+#include <TargetConditionals.h>
+#if TARGET_OS_OSX
+#if defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86
+#ifdef MAP_ANON
+#include <sys/utsname.h>
+#include <stdlib.h>
+
+#define SLJIT_MAP_JIT	(get_map_jit_flag())
+
 static SLJIT_INLINE int get_map_jit_flag()
 {
-#if TARGET_OS_OSX
-	sljit_sw page_size = get_page_alignment() + 1;
+	sljit_sw page_size;
 	void *ptr;
+	struct utsname name;
 	static int map_jit_flag = -1;
 
-	/*
-	  The following code is thread safe because multiple initialization
-	  sets map_jit_flag to the same value and the code has no side-effects.
-	  Changing the kernel version witout system restart is (very) unlikely.
-	*/
-	if (map_jit_flag == -1) {
-		struct utsname name;
-
+	if (map_jit_flag < 0) {
 		map_jit_flag = 0;
 		uname(&name);
 
-		/* Kernel version for 10.14.0 (Mojave) */
+		/* Kernel version for 10.14.0 (Mojave) or later */
 		if (atoi(name.release) >= 18) {
+			page_size = get_page_alignment() + 1;
 			/* Only use MAP_JIT if a hardened runtime is used */
+			ptr = mmap(NULL, page_size, PROT_WRITE | PROT_EXEC,
+					MAP_PRIVATE | MAP_ANON, -1, 0);
 
-			ptr = mmap(NULL, page_size, PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
-
-			if (ptr == MAP_FAILED) {
-				map_jit_flag = MAP_JIT;
-			} else {
+			if (ptr != MAP_FAILED)
 				munmap(ptr, page_size);
-			}
+			else
+				map_jit_flag = MAP_JIT;
 		}
 	}
-
 	return map_jit_flag;
-#else /* !TARGET_OS_OSX */
-	return MAP_JIT;
-#endif /* TARGET_OS_OSX */
 }
-
-#endif /* MAP_JIT */
 #endif /* MAP_ANON */
-#endif /* __APPLE__ */
+#else /* !SLJIT_CONFIG_X86 */
+#if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM)
+#error Unsupported architecture
+#endif /* SLJIT_CONFIG_ARM */
+#include <pthread.h>
+
+#define SLJIT_MAP_JIT	(MAP_JIT)
+#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) \
+                        apple_update_wx_flags(enable_exec)
+
+static SLJIT_INLINE void apple_update_wx_flags(sljit_s32 enable_exec)
+{
+	pthread_jit_write_protect_np(enable_exec);
+}
+#endif /* SLJIT_CONFIG_X86 */
+#else /* !TARGET_OS_OSX */
+#define SLJIT_MAP_JIT	(MAP_JIT)
+#endif /* TARGET_OS_OSX */
+#endif /* __APPLE__ && MAP_JIT */
+#ifndef SLJIT_UPDATE_WX_FLAGS
+#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec)
+#endif /* !SLJIT_UPDATE_WX_FLAGS */
+#ifndef SLJIT_MAP_JIT
+#define SLJIT_MAP_JIT	(0)
+#endif /* !SLJIT_MAP_JIT */
 
 static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
 {
 	void *retval;
-	const int prot = PROT_READ | PROT_WRITE | PROT_EXEC;
-
-#ifdef MAP_ANON
+	int prot = PROT_READ | PROT_WRITE | PROT_EXEC;
+	int flags = MAP_PRIVATE;
+	int fd = -1;
 
-	int flags = MAP_PRIVATE | MAP_ANON;
-
-#ifdef MAP_JIT
-	flags |= get_map_jit_flag();
+#ifdef PROT_MAX
+	prot |= PROT_MAX(prot);
 #endif
 
-	retval = mmap(NULL, size, prot, flags, -1, 0);
+#ifdef MAP_ANON
+	flags |= MAP_ANON | SLJIT_MAP_JIT;
 #else /* !MAP_ANON */
 	if (SLJIT_UNLIKELY((dev_zero < 0) && open_dev_zero()))
 		return NULL;
 
-	retval = mmap(NULL, size, prot, MAP_PRIVATE, dev_zero, 0);
+	fd = dev_zero;
 #endif /* MAP_ANON */
 
+	retval = mmap(NULL, size, prot, flags, fd, 0);
 	if (retval == MAP_FAILED)
-		retval = NULL;
-	else {
-		if (mprotect(retval, size, prot) < 0) {
-			munmap(retval, size);
-			retval = NULL;
-		}
+		return NULL;
+
+	if (mprotect(retval, size, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) {
+		munmap(retval, size);
+		return NULL;
 	}
 
+	SLJIT_UPDATE_WX_FLAGS(retval, (uint8_t *)retval + size, 0);
+
 	return retval;
 }
 
@@ -189,7 +202,7 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
 	munmap(chunk, size);
 }
 
-#endif
+#endif /* windows */
 
 /* --------------------------------------------------------------------- */
 /*  Common functions                                                     */
@@ -261,6 +274,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
 	while (free_block) {
 		if (free_block->size >= size) {
 			chunk_size = free_block->size;
+			SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0);
 			if (chunk_size > size + 64) {
 				/* We just cut a block from the end of the free block. */
 				chunk_size -= size;
@@ -326,6 +340,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
 	allocated_size -= header->size;
 
 	/* Connecting free blocks together if possible. */
+	SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0);
 
 	/* If header->prev_size == 0, free_block will equal to header.
 	   In this case, free_block->header.size will be > 0. */
@@ -358,6 +373,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
 		}
 	}
 
+	SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1);
 	SLJIT_ALLOCATOR_UNLOCK();
 }
 
@@ -367,6 +383,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
 	struct free_block* next_free_block;
 
 	SLJIT_ALLOCATOR_LOCK();
+	SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0);
 
 	free_block = free_blocks;
 	while (free_block) {
@@ -381,5 +398,6 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
 	}
 
 	SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks));
+	SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1);
 	SLJIT_ALLOCATOR_UNLOCK();
 }
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c b/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c
index a8b65112d4..3d007fe8a1 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c
@@ -42,7 +42,7 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
 typedef sljit_uw sljit_ins;
 
 /* Instruction tags (most significant halfword). */
-const sljit_ins sljit_ins_const = (sljit_ins)1 << 48;
+static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48;
 
 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
 	14, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1
@@ -66,22 +66,22 @@ typedef sljit_uw sljit_gpr;
  * will be retired ASAP (TODO: carenas)
  */
 
-const sljit_gpr r0 = 0;	/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
-const sljit_gpr r1 = 1;	/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
-const sljit_gpr r2 = 2;	/* reg_map[1]: 1st argument */
-const sljit_gpr r3 = 3;	/* reg_map[2]: 2nd argument */
-const sljit_gpr r4 = 4;	/* reg_map[3]: 3rd argument */
-const sljit_gpr r5 = 5;	/* reg_map[4]: 4th argument */
-const sljit_gpr r6 = 6;	/* reg_map[5]: 5th argument; 1st saved register */
-const sljit_gpr r7 = 7;	/* reg_map[6] */
-const sljit_gpr r8 = 8;	/* reg_map[7] */
-const sljit_gpr r9 = 9;	/* reg_map[8] */
-const sljit_gpr r10 = 10;	/* reg_map[9] */
-const sljit_gpr r11 = 11;	/* reg_map[10] */
-const sljit_gpr r12 = 12;	/* reg_map[11]: GOT */
-const sljit_gpr r13 = 13;	/* reg_map[12]: Literal Pool pointer */
-const sljit_gpr r14 = 14;	/* reg_map[0]: return address and flag register */
-const sljit_gpr r15 = 15;	/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
+static const sljit_gpr r0 = 0;		/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
+static const sljit_gpr r1 = 1;		/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
+static const sljit_gpr r2 = 2;		/* reg_map[1]: 1st argument */
+static const sljit_gpr r3 = 3;		/* reg_map[2]: 2nd argument */
+static const sljit_gpr r4 = 4;		/* reg_map[3]: 3rd argument */
+static const sljit_gpr r5 = 5;		/* reg_map[4]: 4th argument */
+static const sljit_gpr r6 = 6;		/* reg_map[5]: 5th argument; 1st saved register */
+static const sljit_gpr r7 = 7;		/* reg_map[6] */
+static const sljit_gpr r8 = 8;		/* reg_map[7] */
+static const sljit_gpr r9 = 9;		/* reg_map[8] */
+static const sljit_gpr r10 = 10;	/* reg_map[9] */
+static const sljit_gpr r11 = 11;	/* reg_map[10] */
+static const sljit_gpr r12 = 12;	/* reg_map[11]: GOT */
+static const sljit_gpr r13 = 13;	/* reg_map[12]: Literal Pool pointer */
+static const sljit_gpr r14 = 14;	/* reg_map[0]: return address and flag register */
+static const sljit_gpr r15 = 15;	/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
 
 /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
 /* TODO(carenas): r12 might conflict in PIC code, reserve? */
@@ -100,8 +100,8 @@ const sljit_gpr r15 = 15;	/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack point
 /* Link registers. The normal link register is r14, but since
    we use that for flags we need to use r0 instead to do fast
    calls so that flags are preserved. */
-const sljit_gpr link_r = 14;     /* r14 */
-const sljit_gpr fast_link_r = 0; /* r0 */
+static const sljit_gpr link_r = 14;     /* r14 */
+static const sljit_gpr fast_link_r = 0; /* r0 */
 
 /* Flag register layout:
 
@@ -110,7 +110,7 @@ const sljit_gpr fast_link_r = 0; /* r0 */
    |      ZERO     | 0 | 0 |  C C  |///////|
    +---------------+---+---+-------+-------+
 */
-const sljit_gpr flag_r = 14; /* r14 */
+static const sljit_gpr flag_r = 14; /* r14 */
 
 struct sljit_s390x_const {
 	struct sljit_const const_; /* must be first */
@@ -1465,7 +1465,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
 	op = GET_OPCODE(op) | (op & SLJIT_I32_OP);
 	switch (op) {
 	case SLJIT_BREAKPOINT:
-		/* TODO(mundaym): insert real breakpoint? */
+		/* The following invalid instruction is emitted by gdb. */
+		return push_inst(compiler, 0x0001 /* 2-byte trap */);
 	case SLJIT_NOP:
 		return push_inst(compiler, 0x0700 /* 2-byte nop */);
 	case SLJIT_LMUL_UW:
diff --git a/src/3rdparty/pcre2/src/sljit/sljitUtils.c b/src/3rdparty/pcre2/src/sljit/sljitUtils.c
index 08ca35cf37..9bce714735 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitUtils.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitUtils.c
@@ -48,7 +48,7 @@ static HANDLE allocator_lock;
 static SLJIT_INLINE void allocator_grab_lock(void)
 {
 	HANDLE lock;
-	if (SLJIT_UNLIKELY(!allocator_lock)) {
+	if (SLJIT_UNLIKELY(!InterlockedCompareExchangePointer(&allocator_lock, NULL, NULL))) {
 		lock = CreateMutex(NULL, FALSE, NULL);
 		if (InterlockedCompareExchangePointer(&allocator_lock, lock, NULL))
 			CloseHandle(lock);
@@ -146,9 +146,13 @@ static SLJIT_INLINE sljit_sw get_page_alignment(void) {
 #include <unistd.h>
 
 static SLJIT_INLINE sljit_sw get_page_alignment(void) {
-	static sljit_sw sljit_page_align;
-	if (!sljit_page_align) {
+	static sljit_sw sljit_page_align = -1;
+	if (sljit_page_align < 0) {
+#ifdef _SC_PAGESIZE
 		sljit_page_align = sysconf(_SC_PAGESIZE);
+#else
+		sljit_page_align = getpagesize();
+#endif
 		/* Should never happen. */
 		if (sljit_page_align < 0)
 			sljit_page_align = 4096;
diff --git a/src/3rdparty/pcre2/src/sljit/sljitWXExecAllocator.c b/src/3rdparty/pcre2/src/sljit/sljitWXExecAllocator.c
index 6ef71f7d83..72d5b8dd2b 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitWXExecAllocator.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitWXExecAllocator.c
@@ -121,14 +121,18 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
 	static pthread_mutex_t se_lock = PTHREAD_MUTEX_INITIALIZER;
 #endif
 	static int se_protected = !SLJIT_PROT_WX;
+	int prot = PROT_READ | PROT_WRITE | SLJIT_PROT_WX;
 	sljit_uw* ptr;
 
 	if (SLJIT_UNLIKELY(se_protected < 0))
 		return NULL;
 
+#ifdef PROT_MAX
+	prot |= PROT_MAX(PROT_READ | PROT_WRITE | PROT_EXEC);
+#endif
+
 	size += sizeof(sljit_uw);
-	ptr = (sljit_uw*)mmap(NULL, size, PROT_READ | PROT_WRITE | SLJIT_PROT_WX,
-				MAP_PRIVATE | MAP_ANON, -1, 0);
+	ptr = (sljit_uw*)mmap(NULL, size, prot, MAP_PRIVATE | MAP_ANON, -1, 0);
 
 	if (ptr == MAP_FAILED)
 		return NULL;
diff --git a/src/3rdparty/sqlite/qt_attribution.json b/src/3rdparty/sqlite/qt_attribution.json
index 1c1d9b7746..1d8f96ff03 100644
--- a/src/3rdparty/sqlite/qt_attribution.json
+++ b/src/3rdparty/sqlite/qt_attribution.json
@@ -6,8 +6,8 @@
 
     "Description": "SQLite is a small C library that implements a self-contained, embeddable, zero-configuration SQL database engine.",
     "Homepage": "https://www.sqlite.org/",
-    "Version": "3.35.2",
-    "DownloadLocation": "https://www.sqlite.org/2020/sqlite-amalgamation-3350200.zip",
+    "Version": "3.35.5",
+    "DownloadLocation": "https://www.sqlite.org/2020/sqlite-amalgamation-3350500.zip",
     "License": "Public Domain",
     "Copyright": "The authors disclaim copyright to the source code. However, a license can be obtained if needed."
 }
diff --git a/src/3rdparty/sqlite/sqlite3.c b/src/3rdparty/sqlite/sqlite3.c
index 54f8277a71..df53e437ba 100644
--- a/src/3rdparty/sqlite/sqlite3.c
+++ b/src/3rdparty/sqlite/sqlite3.c
@@ -1,6 +1,6 @@
 /******************************************************************************
 ** This file is an amalgamation of many separate C source files from SQLite
-** version 3.35.2.  By combining all the individual C code files into this
+** version 3.35.5.  By combining all the individual C code files into this
 ** single large file, the entire code can be compiled as a single translation
 ** unit.  This allows many compilers to do optimizations that would not be
 ** possible if the files were compiled separately.  Performance improvements
@@ -1186,9 +1186,9 @@ extern "C" {
 ** [sqlite3_libversion_number()], [sqlite3_sourceid()],
 ** [sqlite_version()] and [sqlite_source_id()].
 */
-#define SQLITE_VERSION        "3.35.2"
-#define SQLITE_VERSION_NUMBER 3035002
-#define SQLITE_SOURCE_ID      "2021-03-17 19:07:21 ea80f3002f4120f5dcee76e8779dfdc88e1e096c5cdd06904c20fd26d50c3827"
+#define SQLITE_VERSION        "3.35.5"
+#define SQLITE_VERSION_NUMBER 3035005
+#define SQLITE_SOURCE_ID      "2021-04-19 18:32:05 1b256d97b553a9611efca188a3d995a2fff712759044ba480f9a0c9e98fae886"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers
@@ -19764,6 +19764,7 @@ SQLITE_PRIVATE Expr *sqlite3ExprFunction(Parse*,ExprList*, Token*, int);
 SQLITE_PRIVATE void sqlite3ExprFunctionUsable(Parse*,Expr*,FuncDef*);
 SQLITE_PRIVATE void sqlite3ExprAssignVarNumber(Parse*, Expr*, u32);
 SQLITE_PRIVATE void sqlite3ExprDelete(sqlite3*, Expr*);
+SQLITE_PRIVATE void sqlite3ExprDeferredDelete(Parse*, Expr*);
 SQLITE_PRIVATE void sqlite3ExprUnmapAndDelete(Parse*, Expr*);
 SQLITE_PRIVATE ExprList *sqlite3ExprListAppend(Parse*,ExprList*,Expr*);
 SQLITE_PRIVATE ExprList *sqlite3ExprListAppendVector(Parse*,ExprList*,IdList*,Expr*);
@@ -21071,6 +21072,7 @@ struct VdbeCursor {
   Bool isEphemeral:1;     /* True for an ephemeral table */
   Bool useRandomRowid:1;  /* Generate new record numbers semi-randomly */
   Bool isOrdered:1;       /* True if the table is not BTREE_UNORDERED */
+  Bool hasBeenDuped:1;    /* This cursor was source or target of OP_OpenDup */
   u16 seekHit;            /* See the OP_SeekHit and OP_IfNoHope opcodes */
   Btree *pBtx;            /* Separate file holding temporary table */
   i64 seqCount;           /* Sequence counter */
@@ -40220,7 +40222,8 @@ static int unixBackupDir(const char *z, int *pJ){
   int j = *pJ;
   int i;
   if( j<=0 ) return 0;
-  for(i=j-1; ALWAYS(i>0) && z[i-1]!='/'; i--){}
+  for(i=j-1; i>0 && z[i-1]!='/'; i--){}
+  if( i==0 ) return 0;
   if( z[i]=='.' && i==j-2 && z[i+1]=='.' ) return 0;
   *pJ = i-1;
   return 1;
@@ -64277,7 +64280,7 @@ struct Btree {
   u8 hasIncrblobCur; /* True if there are one or more Incrblob cursors */
   int wantToLock;    /* Number of nested calls to sqlite3BtreeEnter() */
   int nBackup;       /* Number of backup operations reading this btree */
-  u32 iDataVersion;  /* Combines with pBt->pPager->iDataVersion */
+  u32 iBDataVersion; /* Combines with pBt->pPager->iDataVersion */
   Btree *pNext;      /* List of other sharable Btrees from the same db */
   Btree *pPrev;      /* Back pointer of the same list */
 #ifdef SQLITE_DEBUG
@@ -67690,19 +67693,23 @@ static void freeTempSpace(BtShared *pBt){
 */
 SQLITE_PRIVATE int sqlite3BtreeClose(Btree *p){
   BtShared *pBt = p->pBt;
-  BtCursor *pCur;
 
   /* Close all cursors opened via this handle.  */
   assert( sqlite3_mutex_held(p->db->mutex) );
   sqlite3BtreeEnter(p);
-  pCur = pBt->pCursor;
-  while( pCur ){
-    BtCursor *pTmp = pCur;
-    pCur = pCur->pNext;
-    if( pTmp->pBtree==p ){
-      sqlite3BtreeCloseCursor(pTmp);
+
+  /* Verify that no other cursors have this Btree open */
+#ifdef SQLITE_DEBUG
+  {
+    BtCursor *pCur = pBt->pCursor;
+    while( pCur ){
+      BtCursor *pTmp = pCur;
+      pCur = pCur->pNext;
+      assert( pTmp->pBtree!=p );
+
     }
   }
+#endif
 
   /* Rollback any active transaction and free the handle structure.
   ** The call to sqlite3BtreeRollback() drops any table-locks held by
@@ -69084,7 +69091,7 @@ SQLITE_PRIVATE int sqlite3BtreeCommitPhaseTwo(Btree *p, int bCleanup){
       sqlite3BtreeLeave(p);
       return rc;
     }
-    p->iDataVersion--;  /* Compensate for pPager->iDataVersion++; */
+    p->iBDataVersion--;  /* Compensate for pPager->iDataVersion++; */
     pBt->inTransaction = TRANS_READ;
     btreeClearHasContent(pBt);
   }
@@ -69494,7 +69501,14 @@ SQLITE_PRIVATE int sqlite3BtreeCloseCursor(BtCursor *pCur){
     unlockBtreeIfUnused(pBt);
     sqlite3_free(pCur->aOverflow);
     sqlite3_free(pCur->pKey);
-    sqlite3BtreeLeave(pBtree);
+    if( (pBt->openFlags & BTREE_SINGLE) && pBt->pCursor==0 ){
+      /* Since the BtShared is not sharable, there is no need to
+      ** worry about the missing sqlite3BtreeLeave() call here.  */
+      assert( pBtree->sharable==0 );
+      sqlite3BtreeClose(pBtree);
+    }else{
+      sqlite3BtreeLeave(pBtree);
+    }
     pCur->pBtree = 0;
   }
   return SQLITE_OK;
@@ -74607,7 +74621,7 @@ SQLITE_PRIVATE void sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){
   assert( idx>=0 && idx<=15 );
 
   if( idx==BTREE_DATA_VERSION ){
-    *pMeta = sqlite3PagerDataVersion(pBt->pPager) + p->iDataVersion;
+    *pMeta = sqlite3PagerDataVersion(pBt->pPager) + p->iBDataVersion;
   }else{
     *pMeta = get4byte(&pBt->pPage1->aData[36 + idx*4]);
   }
@@ -80835,20 +80849,15 @@ SQLITE_PRIVATE void sqlite3VdbeFreeCursor(Vdbe *p, VdbeCursor *pCx){
     return;
   }
   assert( pCx->pBtx==0 || pCx->eCurType==CURTYPE_BTREE );
+  assert( pCx->pBtx==0 || pCx->isEphemeral );
   switch( pCx->eCurType ){
     case CURTYPE_SORTER: {
       sqlite3VdbeSorterClose(p->db, pCx);
       break;
     }
     case CURTYPE_BTREE: {
-      if( pCx->isEphemeral ){
-        if( pCx->pBtx ) sqlite3BtreeClose(pCx->pBtx);
-        /* The pCx->pCursor will be close automatically, if it exists, by
-        ** the call above. */
-      }else{
-        assert( pCx->uc.pCursor!=0 );
-        sqlite3BtreeCloseCursor(pCx->uc.pCursor);
-      }
+      assert( pCx->uc.pCursor!=0 );
+      sqlite3BtreeCloseCursor(pCx->uc.pCursor);
       break;
     }
 #ifndef SQLITE_OMIT_VIRTUALTABLE
@@ -81932,6 +81941,7 @@ SQLITE_PRIVATE int sqlite3VdbeCursorMoveto(VdbeCursor **pp, u32 *piCol){
   assert( p->eCurType==CURTYPE_BTREE || p->eCurType==CURTYPE_PSEUDO );
   if( p->deferredMoveto ){
     u32 iMap;
+    assert( !p->isEphemeral );
     if( p->aAltMap && (iMap = p->aAltMap[1+*piCol])>0 && !p->nullRow ){
       *pp = p->pAltCursor;
       *piCol = iMap - 1;
@@ -86140,11 +86150,6 @@ static VdbeCursor *allocateCursor(
 
   assert( iCur>=0 && iCur<p->nCursor );
   if( p->apCsr[iCur] ){ /*OPTIMIZATION-IF-FALSE*/
-    /* Before calling sqlite3VdbeFreeCursor(), ensure the isEphemeral flag
-    ** is clear. Otherwise, if this is an ephemeral cursor created by
-    ** OP_OpenDup, the cursor will not be closed and will still be part
-    ** of a BtShared.pCursor list.  */
-    if( p->apCsr[iCur]->pBtx==0 ) p->apCsr[iCur]->isEphemeral = 0;
     sqlite3VdbeFreeCursor(p, p->apCsr[iCur]);
     p->apCsr[iCur] = 0;
   }
@@ -89830,7 +89835,7 @@ case OP_OpenDup: {
 
   pOrig = p->apCsr[pOp->p2];
   assert( pOrig );
-  assert( pOrig->pBtx!=0 );  /* Only ephemeral cursors can be duplicated */
+  assert( pOrig->isEphemeral );  /* Only ephemeral cursors can be duplicated */
 
   pCx = allocateCursor(p, pOp->p1, pOrig->nField, -1, CURTYPE_BTREE);
   if( pCx==0 ) goto no_mem;
@@ -89840,7 +89845,10 @@ case OP_OpenDup: {
   pCx->isTable = pOrig->isTable;
   pCx->pgnoRoot = pOrig->pgnoRoot;
   pCx->isOrdered = pOrig->isOrdered;
-  rc = sqlite3BtreeCursor(pOrig->pBtx, pCx->pgnoRoot, BTREE_WRCSR,
+  pCx->pBtx = pOrig->pBtx;
+  pCx->hasBeenDuped = 1;
+  pOrig->hasBeenDuped = 1;
+  rc = sqlite3BtreeCursor(pCx->pBtx, pCx->pgnoRoot, BTREE_WRCSR,
                           pCx->pKeyInfo, pCx->uc.pCursor);
   /* The sqlite3BtreeCursor() routine can only fail for the first cursor
   ** opened for a database.  Since there is already an open cursor when this
@@ -89906,9 +89914,10 @@ case OP_OpenEphemeral: {
     aMem[pOp->p3].z = "";
   }
   pCx = p->apCsr[pOp->p1];
-  if( pCx && ALWAYS(pCx->pBtx) ){
-    /* If the ephermeral table is already open, erase all existing content
-    ** so that the table is empty again, rather than creating a new table. */
+  if( pCx && !pCx->hasBeenDuped ){
+    /* If the ephermeral table is already open and has no duplicates from
+    ** OP_OpenDup, then erase all existing content so that the table is
+    ** empty again, rather than creating a new table. */
     assert( pCx->isEphemeral );
     pCx->seqCount = 0;
     pCx->cacheStatus = CACHE_STALE;
@@ -89922,33 +89931,36 @@ case OP_OpenEphemeral: {
                           vfsFlags);
     if( rc==SQLITE_OK ){
       rc = sqlite3BtreeBeginTrans(pCx->pBtx, 1, 0);
-    }
-    if( rc==SQLITE_OK ){
-      /* If a transient index is required, create it by calling
-      ** sqlite3BtreeCreateTable() with the BTREE_BLOBKEY flag before
-      ** opening it. If a transient table is required, just use the
-      ** automatically created table with root-page 1 (an BLOB_INTKEY table).
-      */
-      if( (pCx->pKeyInfo = pKeyInfo = pOp->p4.pKeyInfo)!=0 ){
-        assert( pOp->p4type==P4_KEYINFO );
-        rc = sqlite3BtreeCreateTable(pCx->pBtx, &pCx->pgnoRoot,
-                                     BTREE_BLOBKEY | pOp->p5);
-        if( rc==SQLITE_OK ){
-          assert( pCx->pgnoRoot==SCHEMA_ROOT+1 );
-          assert( pKeyInfo->db==db );
-          assert( pKeyInfo->enc==ENC(db) );
-          rc = sqlite3BtreeCursor(pCx->pBtx, pCx->pgnoRoot, BTREE_WRCSR,
-                                  pKeyInfo, pCx->uc.pCursor);
+      if( rc==SQLITE_OK ){
+        /* If a transient index is required, create it by calling
+        ** sqlite3BtreeCreateTable() with the BTREE_BLOBKEY flag before
+        ** opening it. If a transient table is required, just use the
+        ** automatically created table with root-page 1 (an BLOB_INTKEY table).
+        */
+        if( (pCx->pKeyInfo = pKeyInfo = pOp->p4.pKeyInfo)!=0 ){
+          assert( pOp->p4type==P4_KEYINFO );
+          rc = sqlite3BtreeCreateTable(pCx->pBtx, &pCx->pgnoRoot,
+              BTREE_BLOBKEY | pOp->p5);
+          if( rc==SQLITE_OK ){
+            assert( pCx->pgnoRoot==SCHEMA_ROOT+1 );
+            assert( pKeyInfo->db==db );
+            assert( pKeyInfo->enc==ENC(db) );
+            rc = sqlite3BtreeCursor(pCx->pBtx, pCx->pgnoRoot, BTREE_WRCSR,
+                pKeyInfo, pCx->uc.pCursor);
+          }
+          pCx->isTable = 0;
+        }else{
+          pCx->pgnoRoot = SCHEMA_ROOT;
+          rc = sqlite3BtreeCursor(pCx->pBtx, SCHEMA_ROOT, BTREE_WRCSR,
+              0, pCx->uc.pCursor);
+          pCx->isTable = 1;
         }
-        pCx->isTable = 0;
-      }else{
-        pCx->pgnoRoot = SCHEMA_ROOT;
-        rc = sqlite3BtreeCursor(pCx->pBtx, SCHEMA_ROOT, BTREE_WRCSR,
-                                0, pCx->uc.pCursor);
-        pCx->isTable = 1;
+      }
+      pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED);
+      if( rc ){
+        sqlite3BtreeClose(pCx->pBtx);
       }
     }
-    pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED);
   }
   if( rc ) goto abort_due_to_error;
   pCx->nullRow = 1;
@@ -90382,13 +90394,13 @@ seek_not_found:
 **
 ** There are three possible outcomes from this opcode:<ol>
 **
-** <li> If after This.P1 steps, the cursor is still point to a place that
-**      is earlier in the btree than the target row,
-**      then fall through into the subsquence OP_SeekGE opcode.
+** <li> If after This.P1 steps, the cursor is still pointing to a place that
+**      is earlier in the btree than the target row, then fall through
+**      into the subsquence OP_SeekGE opcode.
 **
 ** <li> If the cursor is successfully moved to the target row by 0 or more
 **      sqlite3BtreeNext() calls, then jump to This.P2, which will land just
-**      past the OP_IdxGT opcode that follows the OP_SeekGE.
+**      past the OP_IdxGT or OP_IdxGE opcode that follows the OP_SeekGE.
 **
 ** <li> If the cursor ends up past the target row (indicating the the target
 **      row does not exist in the btree) then jump to SeekOP.P2.
@@ -90405,7 +90417,8 @@ case OP_SeekScan: {
   /* pOp->p2 points to the first instruction past the OP_IdxGT that
   ** follows the OP_SeekGE.  */
   assert( pOp->p2>=(int)(pOp-aOp)+2 );
-  assert( aOp[pOp->p2-1].opcode==OP_IdxGT );
+  assert( aOp[pOp->p2-1].opcode==OP_IdxGT || aOp[pOp->p2-1].opcode==OP_IdxGE );
+  testcase( aOp[pOp->p2-1].opcode==OP_IdxGE );
   assert( pOp[1].p1==aOp[pOp->p2-1].p1 );
   assert( pOp[1].p2==aOp[pOp->p2-1].p2 );
   assert( pOp[1].p3==aOp[pOp->p2-1].p3 );
@@ -91940,6 +91953,8 @@ case OP_IdxRowid: {           /* out2 */
       pTabCur->deferredMoveto = 1;
       assert( pOp->p4type==P4_INTARRAY || pOp->p4.ai==0 );
       pTabCur->aAltMap = pOp->p4.ai;
+      assert( !pC->isEphemeral );
+      assert( !pTabCur->isEphemeral );
       pTabCur->pAltCursor = pC;
     }else{
       pOut = out2Prerelease(p, pOp);
@@ -98988,15 +99003,19 @@ static int lookupName(
       if( pParse->pTriggerTab!=0 ){
         int op = pParse->eTriggerOp;
         assert( op==TK_DELETE || op==TK_UPDATE || op==TK_INSERT );
-        if( op!=TK_DELETE && zTab && sqlite3StrICmp("new",zTab) == 0 ){
+        if( pParse->bReturning ){
+          if( (pNC->ncFlags & NC_UBaseReg)!=0
+           && (zTab==0 || sqlite3StrICmp(zTab,pParse->pTriggerTab->zName)==0)
+          ){
+            pExpr->iTable = op!=TK_DELETE;
+            pTab = pParse->pTriggerTab;
+          }
+        }else if( op!=TK_DELETE && zTab && sqlite3StrICmp("new",zTab) == 0 ){
           pExpr->iTable = 1;
           pTab = pParse->pTriggerTab;
         }else if( op!=TK_INSERT && zTab && sqlite3StrICmp("old",zTab)==0 ){
           pExpr->iTable = 0;
           pTab = pParse->pTriggerTab;
-        }else if( pParse->bReturning && (pNC->ncFlags & NC_UBaseReg)!=0 ){
-          pExpr->iTable = op!=TK_DELETE;
-          pTab = pParse->pTriggerTab;
         }
       }
 #endif /* SQLITE_OMIT_TRIGGER */
@@ -101591,8 +101610,8 @@ SQLITE_PRIVATE Expr *sqlite3ExprAnd(Parse *pParse, Expr *pLeft, Expr *pRight){
   }else if( (ExprAlwaysFalse(pLeft) || ExprAlwaysFalse(pRight))
          && !IN_RENAME_OBJECT
   ){
-    sqlite3ExprDelete(db, pLeft);
-    sqlite3ExprDelete(db, pRight);
+    sqlite3ExprDeferredDelete(pParse, pLeft);
+    sqlite3ExprDeferredDelete(pParse, pRight);
     return sqlite3Expr(db, TK_INTEGER, "0");
   }else{
     return sqlite3PExpr(pParse, TK_AND, pLeft, pRight);
@@ -101789,6 +101808,22 @@ SQLITE_PRIVATE void sqlite3ExprDelete(sqlite3 *db, Expr *p){
   if( p ) sqlite3ExprDeleteNN(db, p);
 }
 
+
+/*
+** Arrange to cause pExpr to be deleted when the pParse is deleted.
+** This is similar to sqlite3ExprDelete() except that the delete is
+** deferred untilthe pParse is deleted.
+**
+** The pExpr might be deleted immediately on an OOM error.
+**
+** The deferred delete is (currently) implemented by adding the
+** pExpr to the pParse->pConstExpr list with a register number of 0.
+*/
+SQLITE_PRIVATE void sqlite3ExprDeferredDelete(Parse *pParse, Expr *pExpr){
+  pParse->pConstExpr =
+      sqlite3ExprListAppend(pParse, pParse->pConstExpr, pExpr);
+}
+
 /* Invoke sqlite3RenameExprUnmap() and sqlite3ExprDelete() on the
 ** expression.
 */
@@ -106414,8 +106449,7 @@ static int agginfoPersistExprCb(Walker *pWalker, Expr *pExpr){
         pExpr = sqlite3ExprDup(db, pExpr, 0);
         if( pExpr ){
           pAggInfo->aCol[iAgg].pCExpr = pExpr;
-          pParse->pConstExpr =
-             sqlite3ExprListAppend(pParse, pParse->pConstExpr, pExpr);
+          sqlite3ExprDeferredDelete(pParse, pExpr);
         }
       }
     }else{
@@ -106424,8 +106458,7 @@ static int agginfoPersistExprCb(Walker *pWalker, Expr *pExpr){
         pExpr = sqlite3ExprDup(db, pExpr, 0);
         if( pExpr ){
           pAggInfo->aFunc[iAgg].pFExpr = pExpr;
-          pParse->pConstExpr =
-             sqlite3ExprListAppend(pParse, pParse->pConstExpr, pExpr);
+          sqlite3ExprDeferredDelete(pParse, pExpr);
         }
       }
     }
@@ -108668,33 +108701,44 @@ SQLITE_PRIVATE void sqlite3AlterDropColumn(Parse *pParse, SrcList *pSrc, Token *
     sqlite3OpenTable(pParse, iCur, iDb, pTab, OP_OpenWrite);
     addr = sqlite3VdbeAddOp1(v, OP_Rewind, iCur); VdbeCoverage(v);
     reg = ++pParse->nMem;
-    pParse->nMem += pTab->nCol;
     if( HasRowid(pTab) ){
       sqlite3VdbeAddOp2(v, OP_Rowid, iCur, reg);
+      pParse->nMem += pTab->nCol;
     }else{
       pPk = sqlite3PrimaryKeyIndex(pTab);
+      pParse->nMem += pPk->nColumn;
+      for(i=0; i<pPk->nKeyCol; i++){
+        sqlite3VdbeAddOp3(v, OP_Column, iCur, i, reg+i+1);
+      }
+      nField = pPk->nKeyCol;
     }
+    regRec = ++pParse->nMem;
     for(i=0; i<pTab->nCol; i++){
       if( i!=iCol && (pTab->aCol[i].colFlags & COLFLAG_VIRTUAL)==0 ){
         int regOut;
         if( pPk ){
           int iPos = sqlite3TableColumnToIndex(pPk, i);
           int iColPos = sqlite3TableColumnToIndex(pPk, iCol);
+          if( iPos<pPk->nKeyCol ) continue;
           regOut = reg+1+iPos-(iPos>iColPos);
         }else{
           regOut = reg+1+nField;
         }
-        sqlite3ExprCodeGetColumnOfTable(v, pTab, iCur, i, regOut);
+        if( i==pTab->iPKey ){
+          sqlite3VdbeAddOp2(v, OP_Null, 0, regOut);
+        }else{
+          sqlite3ExprCodeGetColumnOfTable(v, pTab, iCur, i, regOut);
+        }
         nField++;
       }
     }
-    regRec = reg + pTab->nCol;
     sqlite3VdbeAddOp3(v, OP_MakeRecord, reg+1, nField, regRec);
     if( pPk ){
       sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iCur, regRec, reg+1, pPk->nKeyCol);
     }else{
       sqlite3VdbeAddOp3(v, OP_Insert, iCur, regRec, reg);
     }
+    sqlite3VdbeChangeP5(v, OPFLAG_SAVEPOSITION);
 
     sqlite3VdbeAddOp2(v, OP_Next, iCur, addr+1); VdbeCoverage(v);
     sqlite3VdbeJumpHere(v, addr);
@@ -115684,7 +115728,11 @@ SQLITE_PRIVATE void sqlite3CreateIndex(
   /* Clean up before exiting */
 exit_create_index:
   if( pIndex ) sqlite3FreeIndex(db, pIndex);
-  if( pTab ){  /* Ensure all REPLACE indexes are at the end of the list */
+  if( pTab ){
+    /* Ensure all REPLACE indexes on pTab are at the end of the pIndex list.
+    ** The list was already ordered when this routine was entered, so at this
+    ** point at most a single index (the newly added index) will be out of
+    ** order.  So we have to reorder at most one index. */
     Index **ppFrom = &pTab->pIndex;
     Index *pThis;
     for(ppFrom=&pTab->pIndex; (pThis = *ppFrom)!=0; ppFrom=&pThis->pNext){
@@ -115698,6 +115746,16 @@ exit_create_index:
       }
       break;
     }
+#ifdef SQLITE_DEBUG
+    /* Verify that all REPLACE indexes really are now at the end
+    ** of the index list.  In other words, no other index type ever
+    ** comes after a REPLACE index on the list. */
+    for(pThis = pTab->pIndex; pThis; pThis=pThis->pNext){
+      assert( pThis->onError!=OE_Replace
+           || pThis->pNext==0
+           || pThis->pNext->onError==OE_Replace );
+    }
+#endif
   }
   sqlite3ExprDelete(db, pPIWhere);
   sqlite3ExprListDelete(db, pList);
@@ -123114,7 +123172,9 @@ SQLITE_PRIVATE void sqlite3Insert(
       pNx->iDataCur = iDataCur;
       pNx->iIdxCur = iIdxCur;
       if( pNx->pUpsertTarget ){
-        sqlite3UpsertAnalyzeTarget(pParse, pTabList, pNx);
+        if( sqlite3UpsertAnalyzeTarget(pParse, pTabList, pNx) ){
+          goto insert_cleanup;
+        }
       }
       pNx = pNx->pNextUpsert;
     }while( pNx!=0 );
@@ -124536,7 +124596,7 @@ static void codeWithoutRowidPreupdate(
   Vdbe *v = pParse->pVdbe;
   int r = sqlite3GetTempReg(pParse);
   assert( !HasRowid(pTab) );
-  assert( 0==(pParse->db->mDbFlags & DBFLAG_Vacuum) );
+  assert( 0==(pParse->db->mDbFlags & DBFLAG_Vacuum) || CORRUPT_DB );
   sqlite3VdbeAddOp2(v, OP_Integer, 0, r);
   sqlite3VdbeAddOp4(v, OP_Insert, iCur, regData, r, (char*)pTab, P4_TABLE);
   sqlite3VdbeChangeP5(v, OPFLAG_ISNOOP);
@@ -133084,7 +133144,7 @@ SQLITE_PRIVATE int sqlite3ColumnsFromExprList(
     nCol = pEList->nExpr;
     aCol = sqlite3DbMallocZero(db, sizeof(aCol[0])*nCol);
     testcase( aCol==0 );
-    if( nCol>32767 ) nCol = 32767;
+    if( NEVER(nCol>32767) ) nCol = 32767;
   }else{
     nCol = 0;
     aCol = 0;
@@ -136158,16 +136218,24 @@ static int resolveFromTermToCte(
     pSavedWith = pParse->pWith;
     pParse->pWith = pWith;
     if( pSel->selFlags & SF_Recursive ){
+      int rc;
       assert( pRecTerm!=0 );
       assert( (pRecTerm->selFlags & SF_Recursive)==0 );
       assert( pRecTerm->pNext!=0 );
       assert( (pRecTerm->pNext->selFlags & SF_Recursive)!=0 );
       assert( pRecTerm->pWith==0 );
       pRecTerm->pWith = pSel->pWith;
-      sqlite3WalkSelect(pWalker, pRecTerm);
+      rc = sqlite3WalkSelect(pWalker, pRecTerm);
       pRecTerm->pWith = 0;
+      if( rc ){
+        pParse->pWith = pSavedWith;
+        return 2;
+      }
     }else{
-      sqlite3WalkSelect(pWalker, pSel);
+      if( sqlite3WalkSelect(pWalker, pSel) ){
+        pParse->pWith = pSavedWith;
+        return 2;
+      }
     }
     pParse->pWith = pWith;
 
@@ -137476,7 +137544,9 @@ SQLITE_PRIVATE int sqlite3Select(
       sqlite3VdbeAddOp2(v, OP_OpenDup, pItem->iCursor, pPrior->iCursor);
       pSub->nSelectRow = pPrior->pSelect->nSelectRow;
     }else{
-      /* Generate a subroutine that will materialize the view. */
+      /* Materalize the view.  If the view is not correlated, generate a
+      ** subroutine to do the materialization so that subsequent uses of
+      ** the same view can reuse the materialization. */
       int topAddr;
       int onceAddr = 0;
       int retAddr;
@@ -137503,7 +137573,7 @@ SQLITE_PRIVATE int sqlite3Select(
       VdbeComment((v, "end %s", pItem->pTab->zName));
       sqlite3VdbeChangeP1(v, topAddr, retAddr);
       sqlite3ClearTempRegCache(pParse);
-      if( pItem->fg.isCte ){
+      if( pItem->fg.isCte && pItem->fg.isCorrelated==0 ){
         CteUse *pCteUse = pItem->u2.pCteUse;
         pCteUse->addrM9e = pItem->addrFillSub;
         pCteUse->regRtn = pItem->regReturn;
@@ -139275,6 +139345,25 @@ SQLITE_PRIVATE SrcList *sqlite3TriggerStepSrc(
   return pSrc;
 }
 
+/*
+** Return true if the pExpr term from the RETURNING clause argument
+** list is of the form "*".  Raise an error if the terms if of the
+** form "table.*".
+*/
+static int isAsteriskTerm(
+  Parse *pParse,      /* Parsing context */
+  Expr *pTerm         /* A term in the RETURNING clause */
+){
+  assert( pTerm!=0 );
+  if( pTerm->op==TK_ASTERISK ) return 1;
+  if( pTerm->op!=TK_DOT ) return 0;
+  assert( pTerm->pRight!=0 );
+  assert( pTerm->pLeft!=0 );
+  if( pTerm->pRight->op!=TK_ASTERISK ) return 0;
+  sqlite3ErrorMsg(pParse, "RETURNING may not use \"TABLE.*\" wildcards");
+  return 1;
+}
+
 /* The input list pList is the list of result set terms from a RETURNING
 ** clause.  The table that we are returning from is pTab.
 **
@@ -139292,7 +139381,8 @@ static ExprList *sqlite3ExpandReturning(
 
   for(i=0; i<pList->nExpr; i++){
     Expr *pOldExpr = pList->a[i].pExpr;
-    if( ALWAYS(pOldExpr!=0) && pOldExpr->op==TK_ASTERISK ){
+    if( NEVER(pOldExpr==0) ) continue;
+    if( isAsteriskTerm(pParse, pOldExpr) ){
       int jj;
       for(jj=0; jj<pTab->nCol; jj++){
         Expr *pNewExpr;
@@ -146825,6 +146915,7 @@ static void whereCombineDisjuncts(
   int op;                /* Operator for the combined expression */
   int idxNew;            /* Index in pWC of the next virtual term */
 
+  if( (pOne->wtFlags | pTwo->wtFlags) & TERM_VNULL ) return;
   if( (pOne->eOperator & (WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE))==0 ) return;
   if( (pTwo->eOperator & (WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE))==0 ) return;
   if( (eOp & (WO_EQ|WO_LT|WO_LE))!=eOp
@@ -147530,6 +147621,7 @@ static void exprAnalyzeExists(
 #endif
   if( pSel->pPrior ) return;
   if( pSel->pWhere==0 ) return;
+  if( pSel->pLimit ) return;
   if( 0==exprAnalyzeExistsFindEq(pSel, 0, 0) ) return;
 
   pDup = sqlite3ExprDup(db, pExpr, 0);
@@ -155314,6 +155406,7 @@ static void windowCheckValue(Parse *pParse, int reg, int eCond){
     VdbeCoverageIf(v, eCond==2);
   }
   sqlite3VdbeAddOp3(v, aOp[eCond], regZero, sqlite3VdbeCurrentAddr(v)+2, reg);
+  sqlite3VdbeChangeP5(v, SQLITE_AFF_NUMERIC);
   VdbeCoverageNeverNullIf(v, eCond==0); /* NULL case captured by */
   VdbeCoverageNeverNullIf(v, eCond==1); /*   the OP_MustBeInt */
   VdbeCoverageNeverNullIf(v, eCond==2);
@@ -162308,7 +162401,7 @@ static const unsigned char aiClass[] = {
 #ifdef SQLITE_EBCDIC
 /*         x0  x1  x2  x3  x4  x5  x6  x7  x8  x9  xa  xb  xc  xd  xe  xf */
 /* 0x */   29, 28, 28, 28, 28,  7, 28, 28, 28, 28, 28, 28,  7,  7, 28, 28,
-/* 1x */   28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+/* 1x */   28, 28, 28, 28, 28,  7, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
 /* 2x */   28, 28, 28, 28, 28,  7, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
 /* 3x */   28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
 /* 4x */    7, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 26, 12, 17, 20, 10,
@@ -229213,7 +229306,7 @@ static void fts5SourceIdFunc(
 ){
   assert( nArg==0 );
   UNUSED_PARAM2(nArg, apUnused);
-  sqlite3_result_text(pCtx, "fts5: 2021-03-17 19:07:21 ea80f3002f4120f5dcee76e8779dfdc88e1e096c5cdd06904c20fd26d50c3827", -1, SQLITE_TRANSIENT);
+  sqlite3_result_text(pCtx, "fts5: 2021-04-19 18:32:05 1b256d97b553a9611efca188a3d995a2fff712759044ba480f9a0c9e98fae886", -1, SQLITE_TRANSIENT);
 }
 
 /*
@@ -234139,9 +234232,9 @@ SQLITE_API int sqlite3_stmt_init(
 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_STMTVTAB) */
 
 /************** End of stmt.c ************************************************/
-#if __LINE__!=234142
+#if __LINE__!=234235
 #undef SQLITE_SOURCE_ID
-#define SQLITE_SOURCE_ID      "2021-03-17 19:07:21 ea80f3002f4120f5dcee76e8779dfdc88e1e096c5cdd06904c20fd26d50calt2"
+#define SQLITE_SOURCE_ID      "2021-04-19 18:32:05 1b256d97b553a9611efca188a3d995a2fff712759044ba480f9a0c9e98faalt2"
 #endif
 /* Return the source-id for this library */
 SQLITE_API const char *sqlite3_sourceid(void){ return SQLITE_SOURCE_ID; }
diff --git a/src/3rdparty/sqlite/sqlite3.h b/src/3rdparty/sqlite/sqlite3.h
index f636b294d2..19ee767fe8 100644
--- a/src/3rdparty/sqlite/sqlite3.h
+++ b/src/3rdparty/sqlite/sqlite3.h
@@ -123,9 +123,9 @@ extern "C" {
 ** [sqlite3_libversion_number()], [sqlite3_sourceid()],
 ** [sqlite_version()] and [sqlite_source_id()].
 */
-#define SQLITE_VERSION        "3.35.2"
-#define SQLITE_VERSION_NUMBER 3035002
-#define SQLITE_SOURCE_ID      "2021-03-17 19:07:21 ea80f3002f4120f5dcee76e8779dfdc88e1e096c5cdd06904c20fd26d50c3827"
+#define SQLITE_VERSION        "3.35.5"
+#define SQLITE_VERSION_NUMBER 3035005
+#define SQLITE_SOURCE_ID      "2021-04-19 18:32:05 1b256d97b553a9611efca188a3d995a2fff712759044ba480f9a0c9e98fae886"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers