diff options
Diffstat (limited to 'src/3rdparty')
58 files changed, 2250 insertions, 940 deletions
diff --git a/src/3rdparty/libjpeg/LICENSE b/src/3rdparty/libjpeg/LICENSE index 99c9aadcc4..a1cdad52fa 100644 --- a/src/3rdparty/libjpeg/LICENSE +++ b/src/3rdparty/libjpeg/LICENSE @@ -91,7 +91,7 @@ best of our understanding. The Modified (3-clause) BSD License =================================== -Copyright (C)2009-2020 D. R. Commander. All Rights Reserved. +Copyright (C)2009-2021 D. R. Commander. All Rights Reserved.<br> Copyright (C)2015 Viktor Szathmáry. All Rights Reserved. Redistribution and use in source and binary forms, with or without diff --git a/src/3rdparty/libjpeg/import_from_libjpeg_tarball.sh b/src/3rdparty/libjpeg/import_from_libjpeg_tarball.sh index 9b7e1fff82..2153eca35a 100755 --- a/src/3rdparty/libjpeg/import_from_libjpeg_tarball.sh +++ b/src/3rdparty/libjpeg/import_from_libjpeg_tarball.sh @@ -165,4 +165,4 @@ for i in $FILES; do copy_file "$i" "src/$i" done -echo Done. $TARGET_DIR/jconfig.h and jconfigint.h may need manual updating. +echo Done. $TARGET_DIR/src/jconfig.h and jconfigint.h may need manual updating. diff --git a/src/3rdparty/libjpeg/libjpeg.pro b/src/3rdparty/libjpeg/libjpeg.pro index 5c3563aae3..d478719af8 100644 --- a/src/3rdparty/libjpeg/libjpeg.pro +++ b/src/3rdparty/libjpeg/libjpeg.pro @@ -12,7 +12,7 @@ MODULE_EXT_HEADERS = $$PWD/src/jpeglib.h \ $$PWD/src/jconfig.h \ $$PWD/src/jmorecfg.h -INCLUDEPATH += $$PWD $$PWD/src +INCLUDEPATH += $$PWD/src load(qt_helper_lib) diff --git a/src/3rdparty/libjpeg/qt_attribution.json b/src/3rdparty/libjpeg/qt_attribution.json index a1dd9990c4..0940636da3 100644 --- a/src/3rdparty/libjpeg/qt_attribution.json +++ b/src/3rdparty/libjpeg/qt_attribution.json @@ -6,11 +6,11 @@ "Description": "The Independent JPEG Group's JPEG software", "Homepage": "http://libjpeg-turbo.virtualgl.org/", - "Version": "2.0.6", + "Version": "2.1.0", "License": "Independent JPEG Group License", "LicenseId": "IJG", "LicenseFile": "LICENSE", - "Copyright": "Copyright (C) 2009-2020 D. R. Commander + "Copyright": "Copyright (C) 2009-2021 D. R. Commander Copyright (C) 2015, 2020 Google, Inc. Copyright (C) 2019 Arm Limited Copyright (C) 2015-2016, 2018 Matthieu Darbois diff --git a/src/3rdparty/libjpeg/src/ChangeLog.md b/src/3rdparty/libjpeg/src/ChangeLog.md index 7b5f875464..5ecbf3b510 100644 --- a/src/3rdparty/libjpeg/src/ChangeLog.md +++ b/src/3rdparty/libjpeg/src/ChangeLog.md @@ -1,3 +1,208 @@ +2.1.1 +===== + +### Significant changes relative to 2.1.0 + +1. Fixed a regression introduced in 2.1.0 that caused build failures with +non-GCC-compatible compilers for Un*x/Arm platforms. + + +2.1.0 +===== + +### Significant changes relative to 2.1 beta1 + +1. Fixed a regression introduced by 2.1 beta1[6(b)] whereby attempting to +decompress certain progressive JPEG images with one or more component planes of +width 8 or less caused a buffer overrun. + +2. Fixed a regression introduced by 2.1 beta1[6(b)] whereby attempting to +decompress a specially-crafted malformed progressive JPEG image caused the +block smoothing algorithm to read from uninitialized memory. + +3. Fixed an issue in the Arm Neon SIMD Huffman encoders that caused the +encoders to generate incorrect results when using the Clang compiler with +Visual Studio. + +4. Fixed a floating point exception (CVE-2021-20205) that occurred when +attempting to compress a specially-crafted malformed GIF image with a specified +image width of 0 using cjpeg. + +5. Fixed a regression introduced by 2.0 beta1[15] whereby attempting to +generate a progressive JPEG image on an SSE2-capable CPU using a scan script +containing one or more scans with lengths divisible by 32 and non-zero +successive approximation low bit positions would, under certain circumstances, +result in an error ("Missing Huffman code table entry") and an invalid JPEG +image. + +6. Introduced a new flag (`TJFLAG_LIMITSCANS` in the TurboJPEG C API and +`TJ.FLAG_LIMIT_SCANS` in the TurboJPEG Java API) and a corresponding TJBench +command-line argument (`-limitscans`) that causes the TurboJPEG decompression +and transform functions/operations to return/throw an error if a progressive +JPEG image contains an unreasonably large number of scans. This allows +applications that use the TurboJPEG API to guard against an exploit of the +progressive JPEG format described in the report +["Two Issues with the JPEG Standard"](https://libjpeg-turbo.org/pmwiki/uploads/About/TwoIssueswiththeJPEGStandard.pdf). + +7. The PPM reader now throws an error, rather than segfaulting (due to a buffer +overrun) or generating incorrect pixels, if an application attempts to use the +`tjLoadImage()` function to load a 16-bit binary PPM file (a binary PPM file +with a maximum value greater than 255) into a grayscale image buffer or to load +a 16-bit binary PGM file into an RGB image buffer. + +8. Fixed an issue in the PPM reader that caused incorrect pixels to be +generated when using the `tjLoadImage()` function to load a 16-bit binary PPM +file into an extended RGB image buffer. + +9. Fixed an issue whereby, if a JPEG buffer was automatically re-allocated by +one of the TurboJPEG compression or transform functions and an error +subsequently occurred during compression or transformation, the JPEG buffer +pointer passed by the application was not updated when the function returned. + + +2.0.90 (2.1 beta1) +================== + +### Significant changes relative to 2.0.6: + +1. The build system, x86-64 SIMD extensions, and accelerated Huffman codec now +support the x32 ABI on Linux, which allows for using x86-64 instructions with +32-bit pointers. The x32 ABI is generally enabled by adding `-mx32` to the +compiler flags. + + Caveats: + - CMake 3.9.0 or later is required in order for the build system to +automatically detect an x32 build. + - Java does not support the x32 ABI, and thus the TurboJPEG Java API will +automatically be disabled with x32 builds. + +2. Added Loongson MMI SIMD implementations of the RGB-to-grayscale, 4:2:2 fancy +chroma upsampling, 4:2:2 and 4:2:0 merged chroma upsampling/color conversion, +and fast integer DCT/IDCT algorithms. Relative to libjpeg-turbo 2.0.x, this +speeds up: + + - the compression of RGB source images into grayscale JPEG images by +approximately 20% + - the decompression of 4:2:2 JPEG images by approximately 40-60% when +using fancy upsampling + - the decompression of 4:2:2 and 4:2:0 JPEG images by approximately +15-20% when using merged upsampling + - the compression of RGB source images by approximately 30-45% when using +the fast integer DCT + - the decompression of JPEG images into RGB destination images by +approximately 2x when using the fast integer IDCT + + The overall decompression speedup for RGB images is now approximately +2.3-3.7x (compared to 2-3.5x with libjpeg-turbo 2.0.x.) + +3. 32-bit (Armv7 or Armv7s) iOS builds of libjpeg-turbo are no longer +supported, and the libjpeg-turbo build system can no longer be used to package +such builds. 32-bit iOS apps cannot run in iOS 11 and later, and the App Store +no longer allows them. + +4. 32-bit (i386) OS X/macOS builds of libjpeg-turbo are no longer supported, +and the libjpeg-turbo build system can no longer be used to package such +builds. 32-bit Mac applications cannot run in macOS 10.15 "Catalina" and +later, and the App Store no longer allows them. + +5. The SSE2 (x86 SIMD) and C Huffman encoding algorithms have been +significantly optimized, resulting in a measured average overall compression +speedup of 12-28% for 64-bit code and 22-52% for 32-bit code on various Intel +and AMD CPUs, as well as a measured average overall compression speedup of +0-23% on platforms that do not have a SIMD-accelerated Huffman encoding +implementation. + +6. The block smoothing algorithm that is applied by default when decompressing +progressive Huffman-encoded JPEG images has been improved in the following +ways: + + - The algorithm is now more fault-tolerant. Previously, if a particular +scan was incomplete, then the smoothing parameters for the incomplete scan +would be applied to the entire output image, including the parts of the image +that were generated by the prior (complete) scan. Visually, this had the +effect of removing block smoothing from lower-frequency scans if they were +followed by an incomplete higher-frequency scan. libjpeg-turbo now applies +block smoothing parameters to each iMCU row based on which scan generated the +pixels in that row, rather than always using the block smoothing parameters for +the most recent scan. + - When applying block smoothing to DC scans, a Gaussian-like kernel with a +5x5 window is used to reduce the "blocky" appearance. + +7. Added SIMD acceleration for progressive Huffman encoding on Arm platforms. +This speeds up the compression of full-color progressive JPEGs by about 30-40% +on average (relative to libjpeg-turbo 2.0.x) when using modern Arm CPUs. + +8. Added configure-time and run-time auto-detection of Loongson MMI SIMD +instructions, so that the Loongson MMI SIMD extensions can be included in any +MIPS64 libjpeg-turbo build. + +9. Added fault tolerance features to djpeg and jpegtran, mainly to demonstrate +methods by which applications can guard against the exploits of the JPEG format +described in the report +["Two Issues with the JPEG Standard"](https://libjpeg-turbo.org/pmwiki/uploads/About/TwoIssueswiththeJPEGStandard.pdf). + + - Both programs now accept a `-maxscans` argument, which can be used to +limit the number of allowable scans in the input file. + - Both programs now accept a `-strict` argument, which can be used to +treat all warnings as fatal. + +10. CMake package config files are now included for both the libjpeg and +TurboJPEG API libraries. This facilitates using libjpeg-turbo with CMake's +`find_package()` function. For example: + + find_package(libjpeg-turbo CONFIG REQUIRED) + + add_executable(libjpeg_program libjpeg_program.c) + target_link_libraries(libjpeg_program PUBLIC libjpeg-turbo::jpeg) + + add_executable(libjpeg_program_static libjpeg_program.c) + target_link_libraries(libjpeg_program_static PUBLIC + libjpeg-turbo::jpeg-static) + + add_executable(turbojpeg_program turbojpeg_program.c) + target_link_libraries(turbojpeg_program PUBLIC + libjpeg-turbo::turbojpeg) + + add_executable(turbojpeg_program_static turbojpeg_program.c) + target_link_libraries(turbojpeg_program_static PUBLIC + libjpeg-turbo::turbojpeg-static) + +11. Since the Unisys LZW patent has long expired, cjpeg and djpeg can now +read/write both LZW-compressed and uncompressed GIF files (feature ported from +jpeg-6a and jpeg-9d.) + +12. jpegtran now includes the `-wipe` and `-drop` options from jpeg-9a and +jpeg-9d, as well as the ability to expand the image size using the `-crop` +option. Refer to jpegtran.1 or usage.txt for more details. + +13. Added a complete intrinsics implementation of the Arm Neon SIMD extensions, +thus providing SIMD acceleration on Arm platforms for all of the algorithms +that are SIMD-accelerated on x86 platforms. This new implementation is +significantly faster in some cases than the old GAS implementation-- +depending on the algorithms used, the type of CPU core, and the compiler. GCC, +as of this writing, does not provide a full or optimal set of Neon intrinsics, +so for performance reasons, the default when building libjpeg-turbo with GCC is +to continue using the GAS implementation of the following algorithms: + + - 32-bit RGB-to-YCbCr color conversion + - 32-bit fast and accurate inverse DCT + - 64-bit RGB-to-YCbCr and YCbCr-to-RGB color conversion + - 64-bit accurate forward and inverse DCT + - 64-bit Huffman encoding + + A new CMake variable (`NEON_INTRINSICS`) can be used to override this +default. + + Since the new intrinsics implementation includes SIMD acceleration +for merged upsampling/color conversion, 1.5.1[5] is no longer necessary and has +been reverted. + +14. The Arm Neon SIMD extensions can now be built using Visual Studio. + +15. The build system can now be used to generate a universal x86-64 + Armv8 +libjpeg-turbo SDK package for both iOS and macOS. + + 2.0.6 ===== diff --git a/src/3rdparty/libjpeg/src/README.ijg b/src/3rdparty/libjpeg/src/README.ijg index d681cf1273..9453c19501 100644 --- a/src/3rdparty/libjpeg/src/README.ijg +++ b/src/3rdparty/libjpeg/src/README.ijg @@ -128,7 +128,7 @@ with respect to this software, its quality, accuracy, merchantability, or fitness for a particular purpose. This software is provided "AS IS", and you, its user, assume the entire risk as to its quality and accuracy. -This software is copyright (C) 1991-2016, Thomas G. Lane, Guido Vollbeding. +This software is copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding. All Rights Reserved except as specified below. Permission is hereby granted to use, copy, modify, and distribute this @@ -159,19 +159,6 @@ commercial products, provided that all warranty or liability claims are assumed by the product vendor. -The IJG distribution formerly included code to read and write GIF files. -To avoid entanglement with the Unisys LZW patent (now expired), GIF reading -support has been removed altogether, and the GIF writer has been simplified -to produce "uncompressed GIFs". This technique does not use the LZW -algorithm; the resulting GIF files are larger than usual, but are readable -by all standard GIF decoders. - -We are required to state that - "The Graphics Interchange Format(c) is the Copyright property of - CompuServe Incorporated. GIF(sm) is a Service Mark property of - CompuServe Incorporated." - - REFERENCES ========== diff --git a/src/3rdparty/libjpeg/src/README.md b/src/3rdparty/libjpeg/src/README.md index 90a4a43ee1..01e391ea7c 100644 --- a/src/3rdparty/libjpeg/src/README.md +++ b/src/3rdparty/libjpeg/src/README.md @@ -3,7 +3,7 @@ Background libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate baseline JPEG compression and decompression on x86, x86-64, Arm, PowerPC, and -MIPS systems, as well as progressive JPEG compression on x86 and x86-64 +MIPS systems, as well as progressive JPEG compression on x86, x86-64, and Arm systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg, all else being equal. On other types of systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue of its highly-optimized diff --git a/src/3rdparty/libjpeg/src/change.log b/src/3rdparty/libjpeg/src/change.log index f090d7788c..e4d0ddc4bd 100644 --- a/src/3rdparty/libjpeg/src/change.log +++ b/src/3rdparty/libjpeg/src/change.log @@ -6,6 +6,25 @@ reference. Please see ChangeLog.md for information specific to libjpeg-turbo. CHANGE LOG for Independent JPEG Group's JPEG software +Version 9d 12-Jan-2020 +----------------------- + +Restore GIF read and write support from libjpeg version 6a. +Thank to Wolfgang Werner (W.W.) Heinz for suggestion. + +Add jpegtran -drop option; add options to the crop extension and wipe +to fill the extra area with content from the source image region, +instead of gray out. + + +Version 9c 14-Jan-2018 +----------------------- + +jpegtran: add an option to the -wipe switch to fill the region +with the average of adjacent blocks, instead of gray out. +Thank to Caitlyn Feddock and Maddie Ziegler for inspiration. + + Version 9b 17-Jan-2016 ----------------------- @@ -13,6 +32,13 @@ Document 'f' specifier for jpegtran -crop specification. Thank to Michele Martone for suggestion. +Version 9a 19-Jan-2014 +----------------------- + +Add jpegtran -wipe option and extension for -crop. +Thank to Andrew Senior, David Clunie, and Josef Schmid for suggestion. + + Version 9 13-Jan-2013 ---------------------- @@ -138,11 +164,6 @@ Huffman tables being used. Huffman tables are checked for validity much more carefully than before. -To avoid the Unisys LZW patent, djpeg's GIF output capability has been -changed to produce "uncompressed GIFs", and cjpeg's GIF input capability -has been removed altogether. We're not happy about it either, but there -seems to be no good alternative. - The configure script now supports building libjpeg as a shared library on many flavors of Unix (all the ones that GNU libtool knows how to build shared libraries for). Use "./configure --enable-shared" to diff --git a/src/3rdparty/libjpeg/src/jccolext.c b/src/3rdparty/libjpeg/src/jccolext.c index 19c955c9d6..303b322ce6 100644 --- a/src/3rdparty/libjpeg/src/jccolext.c +++ b/src/3rdparty/libjpeg/src/jccolext.c @@ -48,9 +48,9 @@ rgb_ycc_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf, outptr2 = output_buf[2][output_row]; output_row++; for (col = 0; col < num_cols; col++) { - r = GETJSAMPLE(inptr[RGB_RED]); - g = GETJSAMPLE(inptr[RGB_GREEN]); - b = GETJSAMPLE(inptr[RGB_BLUE]); + r = inptr[RGB_RED]; + g = inptr[RGB_GREEN]; + b = inptr[RGB_BLUE]; inptr += RGB_PIXELSIZE; /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations * must be too; we do not need an explicit range-limiting operation. @@ -100,9 +100,9 @@ rgb_gray_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf, outptr = output_buf[0][output_row]; output_row++; for (col = 0; col < num_cols; col++) { - r = GETJSAMPLE(inptr[RGB_RED]); - g = GETJSAMPLE(inptr[RGB_GREEN]); - b = GETJSAMPLE(inptr[RGB_BLUE]); + r = inptr[RGB_RED]; + g = inptr[RGB_GREEN]; + b = inptr[RGB_BLUE]; inptr += RGB_PIXELSIZE; /* Y */ outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] + @@ -135,9 +135,9 @@ rgb_rgb_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf, outptr2 = output_buf[2][output_row]; output_row++; for (col = 0; col < num_cols; col++) { - outptr0[col] = GETJSAMPLE(inptr[RGB_RED]); - outptr1[col] = GETJSAMPLE(inptr[RGB_GREEN]); - outptr2[col] = GETJSAMPLE(inptr[RGB_BLUE]); + outptr0[col] = inptr[RGB_RED]; + outptr1[col] = inptr[RGB_GREEN]; + outptr2[col] = inptr[RGB_BLUE]; inptr += RGB_PIXELSIZE; } } diff --git a/src/3rdparty/libjpeg/src/jccolor.c b/src/3rdparty/libjpeg/src/jccolor.c index 036f6016d1..bdc563c723 100644 --- a/src/3rdparty/libjpeg/src/jccolor.c +++ b/src/3rdparty/libjpeg/src/jccolor.c @@ -392,11 +392,11 @@ cmyk_ycck_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, outptr3 = output_buf[3][output_row]; output_row++; for (col = 0; col < num_cols; col++) { - r = MAXJSAMPLE - GETJSAMPLE(inptr[0]); - g = MAXJSAMPLE - GETJSAMPLE(inptr[1]); - b = MAXJSAMPLE - GETJSAMPLE(inptr[2]); + r = MAXJSAMPLE - inptr[0]; + g = MAXJSAMPLE - inptr[1]; + b = MAXJSAMPLE - inptr[2]; /* K passes through as-is */ - outptr3[col] = inptr[3]; /* don't need GETJSAMPLE here */ + outptr3[col] = inptr[3]; inptr += 4; /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations * must be too; we do not need an explicit range-limiting operation. @@ -438,7 +438,7 @@ grayscale_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, outptr = output_buf[0][output_row]; output_row++; for (col = 0; col < num_cols; col++) { - outptr[col] = inptr[0]; /* don't need GETJSAMPLE() here */ + outptr[col] = inptr[0]; inptr += instride; } } @@ -497,7 +497,7 @@ null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf, inptr = *input_buf; outptr = output_buf[ci][output_row]; for (col = 0; col < num_cols; col++) { - outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */ + outptr[col] = inptr[ci]; inptr += nc; } } diff --git a/src/3rdparty/libjpeg/src/jcdctmgr.c b/src/3rdparty/libjpeg/src/jcdctmgr.c index c04058e6ce..7dae17a6e1 100644 --- a/src/3rdparty/libjpeg/src/jcdctmgr.c +++ b/src/3rdparty/libjpeg/src/jcdctmgr.c @@ -381,19 +381,19 @@ convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace) elemptr = sample_data[elemr] + start_col; #if DCTSIZE == 8 /* unroll the inner loop */ - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; #else { register int elemc; for (elemc = DCTSIZE; elemc > 0; elemc--) - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; + *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE; } #endif } @@ -533,20 +533,19 @@ convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, for (elemr = 0; elemr < DCTSIZE; elemr++) { elemptr = sample_data[elemr] + start_col; #if DCTSIZE == 8 /* unroll the inner loop */ - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); #else { register int elemc; for (elemc = DCTSIZE; elemc > 0; elemc--) - *workspaceptr++ = (FAST_FLOAT) - (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); + *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE); } #endif } diff --git a/src/3rdparty/libjpeg/src/jchuff.c b/src/3rdparty/libjpeg/src/jchuff.c index db85ce114f..8ff817b151 100644 --- a/src/3rdparty/libjpeg/src/jchuff.c +++ b/src/3rdparty/libjpeg/src/jchuff.c @@ -4,8 +4,10 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2009-2011, 2014-2016, 2018-2019, D. R. Commander. + * Copyright (C) 2009-2011, 2014-2016, 2018-2021, D. R. Commander. * Copyright (C) 2015, Matthieu Darbois. + * Copyright (C) 2018, Matthias Räncker. + * Copyright (C) 2020, Arm Limited. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -43,14 +45,19 @@ */ /* NOTE: Both GCC and Clang define __GNUC__ */ -#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__)) +#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \ + defined(_M_ARM) || defined(_M_ARM64) #if !defined(__thumb__) || defined(__thumb2__) #define USE_CLZ_INTRINSIC #endif #endif #ifdef USE_CLZ_INTRINSIC +#if defined(_MSC_VER) && !defined(__clang__) +#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x)) +#else #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) +#endif #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) #else #include "jpeg_nbits_table.h" @@ -65,31 +72,42 @@ * but must not be updated permanently until we complete the MCU. */ -typedef struct { - size_t put_buffer; /* current bit-accumulation buffer */ - int put_bits; /* # of bits now in it */ - int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ -} savable_state; +#if defined(__x86_64__) && defined(__ILP32__) +typedef unsigned long long bit_buf_type; +#else +typedef size_t bit_buf_type; +#endif -/* This macro is to work around compilers with missing or broken - * structure assignment. You'll need to fix this code if you have - * such a compiler and you change MAX_COMPS_IN_SCAN. +/* NOTE: The more optimal Huffman encoding algorithm is only used by the + * intrinsics implementation of the Arm Neon SIMD extensions, which is why we + * retain the old Huffman encoder behavior when using the GAS implementation. */ - -#ifndef NO_STRUCT_ASSIGN -#define ASSIGN_STATE(dest, src) ((dest) = (src)) +#if defined(WITH_SIMD) && !(defined(__arm__) || defined(__aarch64__) || \ + defined(_M_ARM) || defined(_M_ARM64)) +typedef unsigned long long simd_bit_buf_type; #else -#if MAX_COMPS_IN_SCAN == 4 -#define ASSIGN_STATE(dest, src) \ - ((dest).put_buffer = (src).put_buffer, \ - (dest).put_bits = (src).put_bits, \ - (dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) +typedef bit_buf_type simd_bit_buf_type; #endif + +#if (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 8) || defined(_WIN64) || \ + (defined(__x86_64__) && defined(__ILP32__)) +#define BIT_BUF_SIZE 64 +#elif (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 4) || defined(_WIN32) +#define BIT_BUF_SIZE 32 +#else +#error Cannot determine word size #endif +#define SIMD_BIT_BUF_SIZE (sizeof(simd_bit_buf_type) * 8) +typedef struct { + union { + bit_buf_type c; + simd_bit_buf_type simd; + } put_buffer; /* current bit accumulation buffer */ + int free_bits; /* # of bits available in it */ + /* (Neon GAS: # of bits now in it) */ + int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ +} savable_state; typedef struct { struct jpeg_entropy_encoder pub; /* public fields */ @@ -123,6 +141,7 @@ typedef struct { size_t free_in_buffer; /* # of byte spaces remaining in buffer */ savable_state cur; /* Current bit buffer & DC state */ j_compress_ptr cinfo; /* dump_buffer needs access to this */ + int simd; } working_state; @@ -201,8 +220,17 @@ start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics) } /* Initialize bit buffer to empty */ - entropy->saved.put_buffer = 0; - entropy->saved.put_bits = 0; + if (entropy->simd) { + entropy->saved.put_buffer.simd = 0; +#if defined(__aarch64__) && !defined(NEON_INTRINSICS) + entropy->saved.free_bits = 0; +#else + entropy->saved.free_bits = SIMD_BIT_BUF_SIZE; +#endif + } else { + entropy->saved.put_buffer.c = 0; + entropy->saved.free_bits = BIT_BUF_SIZE; + } /* Initialize restart stuff */ entropy->restarts_to_go = cinfo->restart_interval; @@ -287,6 +315,7 @@ jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno, * this lets us detect duplicate VAL entries here, and later * allows emit_bits to detect any attempt to emit such symbols. */ + MEMZERO(dtbl->ehufco, sizeof(dtbl->ehufco)); MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi)); /* This is also a convenient place to check for out-of-range @@ -334,94 +363,94 @@ dump_buffer(working_state *state) /* Outputting bits to the file */ -/* These macros perform the same task as the emit_bits() function in the - * original libjpeg code. In addition to reducing overhead by explicitly - * inlining the code, additional performance is achieved by taking into - * account the size of the bit buffer and waiting until it is almost full - * before emptying it. This mostly benefits 64-bit platforms, since 6 - * bytes can be stored in a 64-bit bit buffer before it has to be emptied. +/* Output byte b and, speculatively, an additional 0 byte. 0xFF must be + * encoded as 0xFF 0x00, so the output buffer pointer is advanced by 2 if the + * byte is 0xFF. Otherwise, the output buffer pointer is advanced by 1, and + * the speculative 0 byte will be overwritten by the next byte. */ - -#define EMIT_BYTE() { \ - JOCTET c; \ - put_bits -= 8; \ - c = (JOCTET)GETJOCTET(put_buffer >> put_bits); \ - *buffer++ = c; \ - if (c == 0xFF) /* need to stuff a zero byte? */ \ - *buffer++ = 0; \ +#define EMIT_BYTE(b) { \ + buffer[0] = (JOCTET)(b); \ + buffer[1] = 0; \ + buffer -= -2 + ((JOCTET)(b) < 0xFF); \ } -#define PUT_BITS(code, size) { \ - put_bits += size; \ - put_buffer = (put_buffer << size) | code; \ -} - -#if SIZEOF_SIZE_T != 8 && !defined(_WIN64) - -#define CHECKBUF15() { \ - if (put_bits > 15) { \ - EMIT_BYTE() \ - EMIT_BYTE() \ +/* Output the entire bit buffer. If there are no 0xFF bytes in it, then write + * directly to the output buffer. Otherwise, use the EMIT_BYTE() macro to + * encode 0xFF as 0xFF 0x00. + */ +#if BIT_BUF_SIZE == 64 + +#define FLUSH() { \ + if (put_buffer & 0x8080808080808080 & ~(put_buffer + 0x0101010101010101)) { \ + EMIT_BYTE(put_buffer >> 56) \ + EMIT_BYTE(put_buffer >> 48) \ + EMIT_BYTE(put_buffer >> 40) \ + EMIT_BYTE(put_buffer >> 32) \ + EMIT_BYTE(put_buffer >> 24) \ + EMIT_BYTE(put_buffer >> 16) \ + EMIT_BYTE(put_buffer >> 8) \ + EMIT_BYTE(put_buffer ) \ + } else { \ + buffer[0] = (JOCTET)(put_buffer >> 56); \ + buffer[1] = (JOCTET)(put_buffer >> 48); \ + buffer[2] = (JOCTET)(put_buffer >> 40); \ + buffer[3] = (JOCTET)(put_buffer >> 32); \ + buffer[4] = (JOCTET)(put_buffer >> 24); \ + buffer[5] = (JOCTET)(put_buffer >> 16); \ + buffer[6] = (JOCTET)(put_buffer >> 8); \ + buffer[7] = (JOCTET)(put_buffer); \ + buffer += 8; \ } \ } -#endif - -#define CHECKBUF31() { \ - if (put_bits > 31) { \ - EMIT_BYTE() \ - EMIT_BYTE() \ - EMIT_BYTE() \ - EMIT_BYTE() \ - } \ -} +#else -#define CHECKBUF47() { \ - if (put_bits > 47) { \ - EMIT_BYTE() \ - EMIT_BYTE() \ - EMIT_BYTE() \ - EMIT_BYTE() \ - EMIT_BYTE() \ - EMIT_BYTE() \ +#define FLUSH() { \ + if (put_buffer & 0x80808080 & ~(put_buffer + 0x01010101)) { \ + EMIT_BYTE(put_buffer >> 24) \ + EMIT_BYTE(put_buffer >> 16) \ + EMIT_BYTE(put_buffer >> 8) \ + EMIT_BYTE(put_buffer ) \ + } else { \ + buffer[0] = (JOCTET)(put_buffer >> 24); \ + buffer[1] = (JOCTET)(put_buffer >> 16); \ + buffer[2] = (JOCTET)(put_buffer >> 8); \ + buffer[3] = (JOCTET)(put_buffer); \ + buffer += 4; \ } \ } -#if !defined(_WIN32) && !defined(SIZEOF_SIZE_T) -#error Cannot determine word size #endif -#if SIZEOF_SIZE_T == 8 || defined(_WIN64) - -#define EMIT_BITS(code, size) { \ - CHECKBUF47() \ - PUT_BITS(code, size) \ -} - -#define EMIT_CODE(code, size) { \ - temp2 &= (((JLONG)1) << nbits) - 1; \ - CHECKBUF31() \ - PUT_BITS(code, size) \ - PUT_BITS(temp2, nbits) \ +/* Fill the bit buffer to capacity with the leading bits from code, then output + * the bit buffer and put the remaining bits from code into the bit buffer. + */ +#define PUT_AND_FLUSH(code, size) { \ + put_buffer = (put_buffer << (size + free_bits)) | (code >> -free_bits); \ + FLUSH() \ + free_bits += BIT_BUF_SIZE; \ + put_buffer = code; \ } -#else - -#define EMIT_BITS(code, size) { \ - PUT_BITS(code, size) \ - CHECKBUF15() \ +/* Insert code into the bit buffer and output the bit buffer if needed. + * NOTE: We can't flush with free_bits == 0, since the left shift in + * PUT_AND_FLUSH() would have undefined behavior. + */ +#define PUT_BITS(code, size) { \ + free_bits -= size; \ + if (free_bits < 0) \ + PUT_AND_FLUSH(code, size) \ + else \ + put_buffer = (put_buffer << size) | code; \ } -#define EMIT_CODE(code, size) { \ - temp2 &= (((JLONG)1) << nbits) - 1; \ - PUT_BITS(code, size) \ - CHECKBUF15() \ - PUT_BITS(temp2, nbits) \ - CHECKBUF15() \ +#define PUT_CODE(code, size) { \ + temp &= (((JLONG)1) << nbits) - 1; \ + temp |= code << nbits; \ + nbits += size; \ + PUT_BITS(temp, nbits) \ } -#endif - /* Although it is exceedingly rare, it is possible for a Huffman-encoded * coefficient block to be larger than the 128-byte unencoded block. For each @@ -444,6 +473,7 @@ dump_buffer(working_state *state) #define STORE_BUFFER() { \ if (localbuf) { \ + size_t bytes, bytestocopy; \ bytes = buffer - _buffer; \ buffer = _buffer; \ while (bytes > 0) { \ @@ -466,20 +496,46 @@ dump_buffer(working_state *state) LOCAL(boolean) flush_bits(working_state *state) { - JOCTET _buffer[BUFSIZE], *buffer; - size_t put_buffer; int put_bits; - size_t bytes, bytestocopy; int localbuf = 0; + JOCTET _buffer[BUFSIZE], *buffer, temp; + simd_bit_buf_type put_buffer; int put_bits; + int localbuf = 0; + + if (state->simd) { +#if defined(__aarch64__) && !defined(NEON_INTRINSICS) + put_bits = state->cur.free_bits; +#else + put_bits = SIMD_BIT_BUF_SIZE - state->cur.free_bits; +#endif + put_buffer = state->cur.put_buffer.simd; + } else { + put_bits = BIT_BUF_SIZE - state->cur.free_bits; + put_buffer = state->cur.put_buffer.c; + } - put_buffer = state->cur.put_buffer; - put_bits = state->cur.put_bits; LOAD_BUFFER() - /* fill any partial byte with ones */ - PUT_BITS(0x7F, 7) - while (put_bits >= 8) EMIT_BYTE() + while (put_bits >= 8) { + put_bits -= 8; + temp = (JOCTET)(put_buffer >> put_bits); + EMIT_BYTE(temp) + } + if (put_bits) { + /* fill partial byte with ones */ + temp = (JOCTET)((put_buffer << (8 - put_bits)) | (0xFF >> put_bits)); + EMIT_BYTE(temp) + } - state->cur.put_buffer = 0; /* and reset bit-buffer to empty */ - state->cur.put_bits = 0; + if (state->simd) { /* and reset bit buffer to empty */ + state->cur.put_buffer.simd = 0; +#if defined(__aarch64__) && !defined(NEON_INTRINSICS) + state->cur.free_bits = 0; +#else + state->cur.free_bits = SIMD_BIT_BUF_SIZE; +#endif + } else { + state->cur.put_buffer.c = 0; + state->cur.free_bits = BIT_BUF_SIZE; + } STORE_BUFFER() return TRUE; @@ -493,7 +549,7 @@ encode_one_block_simd(working_state *state, JCOEFPTR block, int last_dc_val, c_derived_tbl *dctbl, c_derived_tbl *actbl) { JOCTET _buffer[BUFSIZE], *buffer; - size_t bytes, bytestocopy; int localbuf = 0; + int localbuf = 0; LOAD_BUFFER() @@ -509,53 +565,41 @@ LOCAL(boolean) encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val, c_derived_tbl *dctbl, c_derived_tbl *actbl) { - int temp, temp2, temp3; - int nbits; - int r, code, size; + int temp, nbits, free_bits; + bit_buf_type put_buffer; JOCTET _buffer[BUFSIZE], *buffer; - size_t put_buffer; int put_bits; - int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0]; - size_t bytes, bytestocopy; int localbuf = 0; + int localbuf = 0; - put_buffer = state->cur.put_buffer; - put_bits = state->cur.put_bits; + free_bits = state->cur.free_bits; + put_buffer = state->cur.put_buffer.c; LOAD_BUFFER() /* Encode the DC coefficient difference per section F.1.2.1 */ - temp = temp2 = block[0] - last_dc_val; + temp = block[0] - last_dc_val; /* This is a well-known technique for obtaining the absolute value without a * branch. It is derived from an assembly language technique presented in * "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by - * Agner Fog. + * Agner Fog. This code assumes we are on a two's complement machine. */ - temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); - temp ^= temp3; - temp -= temp3; - - /* For a negative input, want temp2 = bitwise complement of abs(input) */ - /* This code assumes we are on a two's complement machine */ - temp2 += temp3; + nbits = temp >> (CHAR_BIT * sizeof(int) - 1); + temp += nbits; + nbits ^= temp; /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = JPEG_NBITS(temp); - - /* Emit the Huffman-coded symbol for the number of bits */ - code = dctbl->ehufco[nbits]; - size = dctbl->ehufsi[nbits]; - EMIT_BITS(code, size) + nbits = JPEG_NBITS(nbits); - /* Mask off any extra bits in code */ - temp2 &= (((JLONG)1) << nbits) - 1; - - /* Emit that number of bits of the value, if positive, */ - /* or the complement of its magnitude, if negative. */ - EMIT_BITS(temp2, nbits) + /* Emit the Huffman-coded symbol for the number of bits. + * Emit that number of bits of the value, if positive, + * or the complement of its magnitude, if negative. + */ + PUT_CODE(dctbl->ehufco[nbits], dctbl->ehufsi[nbits]) /* Encode the AC coefficients per section F.1.2.2 */ - r = 0; /* r = run length of zeros */ + { + int r = 0; /* r = run length of zeros */ /* Manually unroll the k loop to eliminate the counter variable. This * improves performance greatly on systems with a limited number of @@ -563,51 +607,46 @@ encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val, */ #define kloop(jpeg_natural_order_of_k) { \ if ((temp = block[jpeg_natural_order_of_k]) == 0) { \ - r++; \ + r += 16; \ } else { \ - temp2 = temp; \ /* Branch-less absolute value, bitwise complement, etc., same as above */ \ - temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); \ - temp ^= temp3; \ - temp -= temp3; \ - temp2 += temp3; \ - nbits = JPEG_NBITS_NONZERO(temp); \ + nbits = temp >> (CHAR_BIT * sizeof(int) - 1); \ + temp += nbits; \ + nbits ^= temp; \ + nbits = JPEG_NBITS_NONZERO(nbits); \ /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \ - while (r > 15) { \ - EMIT_BITS(code_0xf0, size_0xf0) \ - r -= 16; \ + while (r >= 16 * 16) { \ + r -= 16 * 16; \ + PUT_BITS(actbl->ehufco[0xf0], actbl->ehufsi[0xf0]) \ } \ /* Emit Huffman symbol for run length / number of bits */ \ - temp3 = (r << 4) + nbits; \ - code = actbl->ehufco[temp3]; \ - size = actbl->ehufsi[temp3]; \ - EMIT_CODE(code, size) \ + r += nbits; \ + PUT_CODE(actbl->ehufco[r], actbl->ehufsi[r]) \ r = 0; \ } \ } - /* One iteration for each value in jpeg_natural_order[] */ - kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3); - kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18); - kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26); - kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27); - kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21); - kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57); - kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15); - kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58); - kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39); - kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47); - kloop(55); kloop(62); kloop(63); - - /* If the last coef(s) were zero, emit an end-of-block code */ - if (r > 0) { - code = actbl->ehufco[0]; - size = actbl->ehufsi[0]; - EMIT_BITS(code, size) + /* One iteration for each value in jpeg_natural_order[] */ + kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3); + kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18); + kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26); + kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27); + kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21); + kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57); + kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15); + kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58); + kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39); + kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47); + kloop(55); kloop(62); kloop(63); + + /* If the last coef(s) were zero, emit an end-of-block code */ + if (r > 0) { + PUT_BITS(actbl->ehufco[0], actbl->ehufsi[0]) + } } - state->cur.put_buffer = put_buffer; - state->cur.put_bits = put_bits; + state->cur.put_buffer.c = put_buffer; + state->cur.free_bits = free_bits; STORE_BUFFER() return TRUE; @@ -654,8 +693,9 @@ encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data) /* Load up working state */ state.next_output_byte = cinfo->dest->next_output_byte; state.free_in_buffer = cinfo->dest->free_in_buffer; - ASSIGN_STATE(state.cur, entropy->saved); + state.cur = entropy->saved; state.cinfo = cinfo; + state.simd = entropy->simd; /* Emit restart marker if needed */ if (cinfo->restart_interval) { @@ -694,7 +734,7 @@ encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data) /* Completed MCU, so update state */ cinfo->dest->next_output_byte = state.next_output_byte; cinfo->dest->free_in_buffer = state.free_in_buffer; - ASSIGN_STATE(entropy->saved, state.cur); + entropy->saved = state.cur; /* Update restart-interval state too */ if (cinfo->restart_interval) { @@ -723,8 +763,9 @@ finish_pass_huff(j_compress_ptr cinfo) /* Load up working state ... flush_bits needs it */ state.next_output_byte = cinfo->dest->next_output_byte; state.free_in_buffer = cinfo->dest->free_in_buffer; - ASSIGN_STATE(state.cur, entropy->saved); + state.cur = entropy->saved; state.cinfo = cinfo; + state.simd = entropy->simd; /* Flush out the last data */ if (!flush_bits(&state)) @@ -733,7 +774,7 @@ finish_pass_huff(j_compress_ptr cinfo) /* Update state */ cinfo->dest->next_output_byte = state.next_output_byte; cinfo->dest->free_in_buffer = state.free_in_buffer; - ASSIGN_STATE(entropy->saved, state.cur); + entropy->saved = state.cur; } diff --git a/src/3rdparty/libjpeg/src/jconfig.h b/src/3rdparty/libjpeg/src/jconfig.h index 74a74f754c..fa82022edc 100644 --- a/src/3rdparty/libjpeg/src/jconfig.h +++ b/src/3rdparty/libjpeg/src/jconfig.h @@ -2,9 +2,9 @@ #define JPEG_LIB_VERSION 80 -#define LIBJPEG_TURBO_VERSION 2.0.6 +#define LIBJPEG_TURBO_VERSION 2.1.0 -#define LIBJPEG_TURBO_VERSION_NUMBER 2000006 +#define LIBJPEG_TURBO_VERSION_NUMBER 2001000 #define C_ARITH_CODING_SUPPORTED 1 diff --git a/src/3rdparty/libjpeg/src/jconfig.h.in b/src/3rdparty/libjpeg/src/jconfig.h.in index 18a69a4814..d4284d97b8 100644 --- a/src/3rdparty/libjpeg/src/jconfig.h.in +++ b/src/3rdparty/libjpeg/src/jconfig.h.in @@ -61,11 +61,6 @@ unsigned. */ #cmakedefine RIGHT_SHIFT_IS_UNSIGNED 1 -/* Define to 1 if type `char' is unsigned and you are not using gcc. */ -#ifndef __CHAR_UNSIGNED__ - #cmakedefine __CHAR_UNSIGNED__ 1 -#endif - /* Define to empty if `const' does not conform to ANSI C. */ /* #undef const */ diff --git a/src/3rdparty/libjpeg/jconfigint.h b/src/3rdparty/libjpeg/src/jconfigint.h index 40d7748e10..cfcac904c5 100644 --- a/src/3rdparty/libjpeg/jconfigint.h +++ b/src/3rdparty/libjpeg/src/jconfigint.h @@ -8,7 +8,7 @@ #define PACKAGE_NAME "libjpeg-turbo" -#define VERSION "2.0.6" +#define VERSION "2.1.0" #if SIZE_MAX == 0xffffffff #define SIZEOF_SIZE_T 4 diff --git a/src/3rdparty/libjpeg/src/jcphuff.c b/src/3rdparty/libjpeg/src/jcphuff.c index a8b94bed84..9bf96124b4 100644 --- a/src/3rdparty/libjpeg/src/jcphuff.c +++ b/src/3rdparty/libjpeg/src/jcphuff.c @@ -4,8 +4,9 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1995-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2011, 2015, 2018, D. R. Commander. + * Copyright (C) 2011, 2015, 2018, 2021, D. R. Commander. * Copyright (C) 2016, 2018, Matthieu Darbois. + * Copyright (C) 2020, Arm Limited. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -52,14 +53,19 @@ */ /* NOTE: Both GCC and Clang define __GNUC__ */ -#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__)) +#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \ + defined(_M_ARM) || defined(_M_ARM64) #if !defined(__thumb__) || defined(__thumb2__) #define USE_CLZ_INTRINSIC #endif #endif #ifdef USE_CLZ_INTRINSIC +#if defined(_MSC_VER) && !defined(__clang__) +#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x)) +#else #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) +#endif #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) #else #include "jpeg_nbits_table.h" @@ -169,24 +175,26 @@ INLINE METHODDEF(int) count_zeroes(size_t *x) { - int result; #if defined(HAVE_BUILTIN_CTZL) + int result; result = __builtin_ctzl(*x); *x >>= result; #elif defined(HAVE_BITSCANFORWARD64) + unsigned long result; _BitScanForward64(&result, *x); *x >>= result; #elif defined(HAVE_BITSCANFORWARD) + unsigned long result; _BitScanForward(&result, *x); *x >>= result; #else - result = 0; + int result = 0; while ((*x & 1) == 0) { ++result; *x >>= 1; } #endif - return result; + return (int)result; } @@ -860,7 +868,7 @@ encode_mcu_AC_refine_prepare(const JCOEF *block, #define ENCODE_COEFS_AC_REFINE(label) { \ while (zerobits) { \ - int idx = count_zeroes(&zerobits); \ + idx = count_zeroes(&zerobits); \ r += idx; \ cabsvalue += idx; \ signbits >>= idx; \ @@ -917,7 +925,7 @@ METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) { phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; - register int temp, r; + register int temp, r, idx; char *BR_buffer; unsigned int BR; int Sl = cinfo->Se - cinfo->Ss + 1; @@ -968,7 +976,7 @@ encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) if (zerobits) { int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue); - int idx = count_zeroes(&zerobits); + idx = count_zeroes(&zerobits); signbits >>= idx; idx += diff; r += idx; diff --git a/src/3rdparty/libjpeg/src/jcsample.c b/src/3rdparty/libjpeg/src/jcsample.c index bd27b84e06..e8515ebf0f 100644 --- a/src/3rdparty/libjpeg/src/jcsample.c +++ b/src/3rdparty/libjpeg/src/jcsample.c @@ -6,7 +6,7 @@ * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB * Copyright (C) 2014, MIPS Technologies, Inc., California. - * Copyright (C) 2015, D. R. Commander. + * Copyright (C) 2015, 2019, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -103,7 +103,7 @@ expand_right_edge(JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols, if (numcols > 0) { for (row = 0; row < num_rows; row++) { ptr = image_data[row] + input_cols; - pixval = ptr[-1]; /* don't need GETJSAMPLE() here */ + pixval = ptr[-1]; for (count = numcols; count > 0; count--) *ptr++ = pixval; } @@ -174,7 +174,7 @@ int_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, for (v = 0; v < v_expand; v++) { inptr = input_data[inrow + v] + outcol_h; for (h = 0; h < h_expand; h++) { - outvalue += (JLONG)GETJSAMPLE(*inptr++); + outvalue += (JLONG)(*inptr++); } } *outptr++ = (JSAMPLE)((outvalue + numpix2) / numpix); @@ -237,8 +237,7 @@ h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, inptr = input_data[outrow]; bias = 0; /* bias = 0,1,0,1,... for successive samples */ for (outcol = 0; outcol < output_cols; outcol++) { - *outptr++ = - (JSAMPLE)((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1]) + bias) >> 1); + *outptr++ = (JSAMPLE)((inptr[0] + inptr[1] + bias) >> 1); bias ^= 1; /* 0=>1, 1=>0 */ inptr += 2; } @@ -277,8 +276,7 @@ h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, bias = 1; /* bias = 1,2,1,2,... for successive samples */ for (outcol = 0; outcol < output_cols; outcol++) { *outptr++ = - (JSAMPLE)((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]) + bias) >> 2); + (JSAMPLE)((inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1] + bias) >> 2); bias ^= 3; /* 1=>2, 2=>1 */ inptr0 += 2; inptr1 += 2; } @@ -337,33 +335,25 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, below_ptr = input_data[inrow + 2]; /* Special case for first column: pretend column -1 is same as column 0 */ - membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); - neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + - GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]); + membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1]; + neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] + + inptr0[0] + inptr0[2] + inptr1[0] + inptr1[2]; neighsum += neighsum; - neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]); + neighsum += above_ptr[0] + above_ptr[2] + below_ptr[0] + below_ptr[2]; membersum = membersum * memberscale + neighsum * neighscale; *outptr++ = (JSAMPLE)((membersum + 32768) >> 16); inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2; for (colctr = output_cols - 2; colctr > 0; colctr--) { /* sum of pixels directly mapped to this output element */ - membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); + membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1]; /* sum of edge-neighbor pixels */ - neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + - GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) + - GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]); + neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] + + inptr0[-1] + inptr0[2] + inptr1[-1] + inptr1[2]; /* The edge-neighbors count twice as much as corner-neighbors */ neighsum += neighsum; /* Add in the corner-neighbors */ - neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) + - GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]); + neighsum += above_ptr[-1] + above_ptr[2] + below_ptr[-1] + below_ptr[2]; /* form final output scaled up by 2^16 */ membersum = membersum * memberscale + neighsum * neighscale; /* round, descale and output it */ @@ -372,15 +362,11 @@ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, } /* Special case for last column */ - membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); - neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + - GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) + - GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]); + membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1]; + neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] + + inptr0[-1] + inptr0[1] + inptr1[-1] + inptr1[1]; neighsum += neighsum; - neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) + - GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]); + neighsum += above_ptr[-1] + above_ptr[1] + below_ptr[-1] + below_ptr[1]; membersum = membersum * memberscale + neighsum * neighscale; *outptr = (JSAMPLE)((membersum + 32768) >> 16); @@ -429,21 +415,18 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, below_ptr = input_data[outrow + 1]; /* Special case for first column */ - colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) + - GETJSAMPLE(*inptr); - membersum = GETJSAMPLE(*inptr++); - nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) + - GETJSAMPLE(*inptr); + colsum = (*above_ptr++) + (*below_ptr++) + inptr[0]; + membersum = *inptr++; + nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0]; neighsum = colsum + (colsum - membersum) + nextcolsum; membersum = membersum * memberscale + neighsum * neighscale; *outptr++ = (JSAMPLE)((membersum + 32768) >> 16); lastcolsum = colsum; colsum = nextcolsum; for (colctr = output_cols - 2; colctr > 0; colctr--) { - membersum = GETJSAMPLE(*inptr++); + membersum = *inptr++; above_ptr++; below_ptr++; - nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) + - GETJSAMPLE(*inptr); + nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0]; neighsum = lastcolsum + (colsum - membersum) + nextcolsum; membersum = membersum * memberscale + neighsum * neighscale; *outptr++ = (JSAMPLE)((membersum + 32768) >> 16); @@ -451,7 +434,7 @@ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, } /* Special case for last column */ - membersum = GETJSAMPLE(*inptr); + membersum = *inptr; neighsum = lastcolsum + (colsum - membersum) + colsum; membersum = membersum * memberscale + neighsum * neighscale; *outptr = (JSAMPLE)((membersum + 32768) >> 16); diff --git a/src/3rdparty/libjpeg/src/jdapistd.c b/src/3rdparty/libjpeg/src/jdapistd.c index 38bd1110d9..695a620099 100644 --- a/src/3rdparty/libjpeg/src/jdapistd.c +++ b/src/3rdparty/libjpeg/src/jdapistd.c @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1994-1996, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2010, 2015-2018, 2020, D. R. Commander. + * Copyright (C) 2010, 2015-2020, D. R. Commander. * Copyright (C) 2015, Google, Inc. * For conditions of distribution and use, see the accompanying README.ijg * file. @@ -319,6 +319,8 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) { JDIMENSION n; my_master_ptr master = (my_master_ptr)cinfo->master; + JSAMPLE dummy_sample[1] = { 0 }; + JSAMPROW dummy_row = dummy_sample; JSAMPARRAY scanlines = NULL; void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row, JSAMPARRAY output_buf, @@ -329,6 +331,10 @@ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) if (cinfo->cconvert && cinfo->cconvert->color_convert) { color_convert = cinfo->cconvert->color_convert; cinfo->cconvert->color_convert = noop_convert; + /* This just prevents UBSan from complaining about adding 0 to a NULL + * pointer. The pointer isn't actually used. + */ + scanlines = &dummy_row; } if (cinfo->cquantize && cinfo->cquantize->color_quantize) { @@ -532,6 +538,8 @@ jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) * decoded coefficients. This is ~5% faster for large subsets, but * it's tough to tell a difference for smaller images. */ + if (!cinfo->entropy->insufficient_data) + cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row; (*cinfo->entropy->decode_mcu) (cinfo, NULL); } } diff --git a/src/3rdparty/libjpeg/src/jdarith.c b/src/3rdparty/libjpeg/src/jdarith.c index 6002481e24..7f0d3a785c 100644 --- a/src/3rdparty/libjpeg/src/jdarith.c +++ b/src/3rdparty/libjpeg/src/jdarith.c @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Developed 1997-2015 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2015-2018, D. R. Commander. + * Copyright (C) 2015-2020, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -80,7 +80,7 @@ get_byte(j_decompress_ptr cinfo) if (!(*src->fill_input_buffer) (cinfo)) ERREXIT(cinfo, JERR_CANT_SUSPEND); src->bytes_in_buffer--; - return GETJOCTET(*src->next_input_byte++); + return *src->next_input_byte++; } @@ -665,8 +665,16 @@ bad: for (ci = 0; ci < cinfo->comps_in_scan; ci++) { int coefi, cindex = cinfo->cur_comp_info[ci]->component_index; int *coef_bit_ptr = &cinfo->coef_bits[cindex][0]; + int *prev_coef_bit_ptr = + &cinfo->coef_bits[cindex + cinfo->num_components][0]; if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */ WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0); + for (coefi = MIN(cinfo->Ss, 1); coefi <= MAX(cinfo->Se, 9); coefi++) { + if (cinfo->input_scan_number > 1) + prev_coef_bit_ptr[coefi] = coef_bit_ptr[coefi]; + else + prev_coef_bit_ptr[coefi] = 0; + } for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) { int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi]; if (cinfo->Ah != expected) @@ -727,6 +735,7 @@ bad: entropy->c = 0; entropy->a = 0; entropy->ct = -16; /* force reading 2 initial bytes to fill C */ + entropy->pub.insufficient_data = FALSE; /* Initialize restart counter */ entropy->restarts_to_go = cinfo->restart_interval; @@ -763,7 +772,7 @@ jinit_arith_decoder(j_decompress_ptr cinfo) int *coef_bit_ptr, ci; cinfo->coef_bits = (int (*)[DCTSIZE2]) (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, - cinfo->num_components * DCTSIZE2 * + cinfo->num_components * 2 * DCTSIZE2 * sizeof(int)); coef_bit_ptr = &cinfo->coef_bits[0][0]; for (ci = 0; ci < cinfo->num_components; ci++) diff --git a/src/3rdparty/libjpeg/src/jdcoefct.c b/src/3rdparty/libjpeg/src/jdcoefct.c index 2ba6aa11e4..15e6cded62 100644 --- a/src/3rdparty/libjpeg/src/jdcoefct.c +++ b/src/3rdparty/libjpeg/src/jdcoefct.c @@ -5,7 +5,7 @@ * Copyright (C) 1994-1997, Thomas G. Lane. * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB - * Copyright (C) 2010, 2015-2016, D. R. Commander. + * Copyright (C) 2010, 2015-2016, 2019-2020, D. R. Commander. * Copyright (C) 2015, 2020, Google, Inc. * For conditions of distribution and use, see the accompanying README.ijg * file. @@ -102,6 +102,8 @@ decompress_onepass(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) /* Try to fetch an MCU. Entropy decoder expects buffer to be zeroed. */ jzero_far((void *)coef->MCU_buffer[0], (size_t)(cinfo->blocks_in_MCU * sizeof(JBLOCK))); + if (!cinfo->entropy->insufficient_data) + cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row; if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) { /* Suspension forced; update state counters and exit */ coef->MCU_vert_offset = yoffset; @@ -227,6 +229,8 @@ consume_data(j_decompress_ptr cinfo) } } } + if (!cinfo->entropy->insufficient_data) + cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row; /* Try to fetch the MCU. */ if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) { /* Suspension forced; update state counters and exit */ @@ -326,19 +330,22 @@ decompress_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) #ifdef BLOCK_SMOOTHING_SUPPORTED /* - * This code applies interblock smoothing as described by section K.8 - * of the JPEG standard: the first 5 AC coefficients are estimated from - * the DC values of a DCT block and its 8 neighboring blocks. + * This code applies interblock smoothing; the first 9 AC coefficients are + * estimated from the DC values of a DCT block and its 24 neighboring blocks. * We apply smoothing only for progressive JPEG decoding, and only if * the coefficients it can estimate are not yet known to full precision. */ -/* Natural-order array positions of the first 5 zigzag-order coefficients */ +/* Natural-order array positions of the first 9 zigzag-order coefficients */ #define Q01_POS 1 #define Q10_POS 8 #define Q20_POS 16 #define Q11_POS 9 #define Q02_POS 2 +#define Q03_POS 3 +#define Q12_POS 10 +#define Q21_POS 17 +#define Q30_POS 24 /* * Determine whether block smoothing is applicable and safe. @@ -356,8 +363,8 @@ smoothing_ok(j_decompress_ptr cinfo) int ci, coefi; jpeg_component_info *compptr; JQUANT_TBL *qtable; - int *coef_bits; - int *coef_bits_latch; + int *coef_bits, *prev_coef_bits; + int *coef_bits_latch, *prev_coef_bits_latch; if (!cinfo->progressive_mode || cinfo->coef_bits == NULL) return FALSE; @@ -366,34 +373,47 @@ smoothing_ok(j_decompress_ptr cinfo) if (coef->coef_bits_latch == NULL) coef->coef_bits_latch = (int *) (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, - cinfo->num_components * + cinfo->num_components * 2 * (SAVED_COEFS * sizeof(int))); coef_bits_latch = coef->coef_bits_latch; + prev_coef_bits_latch = + &coef->coef_bits_latch[cinfo->num_components * SAVED_COEFS]; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { /* All components' quantization values must already be latched. */ if ((qtable = compptr->quant_table) == NULL) return FALSE; - /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */ + /* Verify DC & first 9 AC quantizers are nonzero to avoid zero-divide. */ if (qtable->quantval[0] == 0 || qtable->quantval[Q01_POS] == 0 || qtable->quantval[Q10_POS] == 0 || qtable->quantval[Q20_POS] == 0 || qtable->quantval[Q11_POS] == 0 || - qtable->quantval[Q02_POS] == 0) + qtable->quantval[Q02_POS] == 0 || + qtable->quantval[Q03_POS] == 0 || + qtable->quantval[Q12_POS] == 0 || + qtable->quantval[Q21_POS] == 0 || + qtable->quantval[Q30_POS] == 0) return FALSE; /* DC values must be at least partly known for all components. */ coef_bits = cinfo->coef_bits[ci]; + prev_coef_bits = cinfo->coef_bits[ci + cinfo->num_components]; if (coef_bits[0] < 0) return FALSE; + coef_bits_latch[0] = coef_bits[0]; /* Block smoothing is helpful if some AC coefficients remain inaccurate. */ - for (coefi = 1; coefi <= 5; coefi++) { + for (coefi = 1; coefi < SAVED_COEFS; coefi++) { + if (cinfo->input_scan_number > 1) + prev_coef_bits_latch[coefi] = prev_coef_bits[coefi]; + else + prev_coef_bits_latch[coefi] = -1; coef_bits_latch[coefi] = coef_bits[coefi]; if (coef_bits[coefi] != 0) smoothing_useful = TRUE; } coef_bits_latch += SAVED_COEFS; + prev_coef_bits_latch += SAVED_COEFS; } return smoothing_useful; @@ -412,17 +432,20 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) JDIMENSION block_num, last_block_column; int ci, block_row, block_rows, access_rows; JBLOCKARRAY buffer; - JBLOCKROW buffer_ptr, prev_block_row, next_block_row; + JBLOCKROW buffer_ptr, prev_prev_block_row, prev_block_row; + JBLOCKROW next_block_row, next_next_block_row; JSAMPARRAY output_ptr; JDIMENSION output_col; jpeg_component_info *compptr; inverse_DCT_method_ptr inverse_DCT; - boolean first_row, last_row; + boolean change_dc; JCOEF *workspace; int *coef_bits; JQUANT_TBL *quanttbl; - JLONG Q00, Q01, Q02, Q10, Q11, Q20, num; - int DC1, DC2, DC3, DC4, DC5, DC6, DC7, DC8, DC9; + JLONG Q00, Q01, Q02, Q03 = 0, Q10, Q11, Q12 = 0, Q20, Q21 = 0, Q30 = 0, num; + int DC01, DC02, DC03, DC04, DC05, DC06, DC07, DC08, DC09, DC10, DC11, DC12, + DC13, DC14, DC15, DC16, DC17, DC18, DC19, DC20, DC21, DC22, DC23, DC24, + DC25; int Al, pred; /* Keep a local variable to avoid looking it up more than once */ @@ -434,10 +457,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) if (cinfo->input_scan_number == cinfo->output_scan_number) { /* If input is working on current scan, we ordinarily want it to * have completed the current row. But if input scan is DC, - * we want it to keep one row ahead so that next block row's DC + * we want it to keep two rows ahead so that next two block rows' DC * values are up to date. */ - JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0; + JDIMENSION delta = (cinfo->Ss == 0) ? 2 : 0; if (cinfo->input_iMCU_row > cinfo->output_iMCU_row + delta) break; } @@ -452,34 +475,53 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) if (!compptr->component_needed) continue; /* Count non-dummy DCT block rows in this iMCU row. */ - if (cinfo->output_iMCU_row < last_iMCU_row) { + if (cinfo->output_iMCU_row < last_iMCU_row - 1) { + block_rows = compptr->v_samp_factor; + access_rows = block_rows * 3; /* this and next two iMCU rows */ + } else if (cinfo->output_iMCU_row < last_iMCU_row) { block_rows = compptr->v_samp_factor; access_rows = block_rows * 2; /* this and next iMCU row */ - last_row = FALSE; } else { /* NB: can't use last_row_height here; it is input-side-dependent! */ block_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor); if (block_rows == 0) block_rows = compptr->v_samp_factor; access_rows = block_rows; /* this iMCU row only */ - last_row = TRUE; } /* Align the virtual buffer for this component. */ - if (cinfo->output_iMCU_row > 0) { - access_rows += compptr->v_samp_factor; /* prior iMCU row too */ + if (cinfo->output_iMCU_row > 1) { + access_rows += 2 * compptr->v_samp_factor; /* prior two iMCU rows too */ + buffer = (*cinfo->mem->access_virt_barray) + ((j_common_ptr)cinfo, coef->whole_image[ci], + (cinfo->output_iMCU_row - 2) * compptr->v_samp_factor, + (JDIMENSION)access_rows, FALSE); + buffer += 2 * compptr->v_samp_factor; /* point to current iMCU row */ + } else if (cinfo->output_iMCU_row > 0) { buffer = (*cinfo->mem->access_virt_barray) ((j_common_ptr)cinfo, coef->whole_image[ci], (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor, (JDIMENSION)access_rows, FALSE); buffer += compptr->v_samp_factor; /* point to current iMCU row */ - first_row = FALSE; } else { buffer = (*cinfo->mem->access_virt_barray) ((j_common_ptr)cinfo, coef->whole_image[ci], (JDIMENSION)0, (JDIMENSION)access_rows, FALSE); - first_row = TRUE; } - /* Fetch component-dependent info */ - coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS); + /* Fetch component-dependent info. + * If the current scan is incomplete, then we use the component-dependent + * info from the previous scan. + */ + if (cinfo->output_iMCU_row > cinfo->master->last_good_iMCU_row) + coef_bits = + coef->coef_bits_latch + ((ci + cinfo->num_components) * SAVED_COEFS); + else + coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS); + + /* We only do DC interpolation if no AC coefficient data is available. */ + change_dc = + coef_bits[1] == -1 && coef_bits[2] == -1 && coef_bits[3] == -1 && + coef_bits[4] == -1 && coef_bits[5] == -1 && coef_bits[6] == -1 && + coef_bits[7] == -1 && coef_bits[8] == -1 && coef_bits[9] == -1; + quanttbl = compptr->quant_table; Q00 = quanttbl->quantval[0]; Q01 = quanttbl->quantval[Q01_POS]; @@ -487,27 +529,51 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) Q20 = quanttbl->quantval[Q20_POS]; Q11 = quanttbl->quantval[Q11_POS]; Q02 = quanttbl->quantval[Q02_POS]; + if (change_dc) { + Q03 = quanttbl->quantval[Q03_POS]; + Q12 = quanttbl->quantval[Q12_POS]; + Q21 = quanttbl->quantval[Q21_POS]; + Q30 = quanttbl->quantval[Q30_POS]; + } inverse_DCT = cinfo->idct->inverse_DCT[ci]; output_ptr = output_buf[ci]; /* Loop over all DCT blocks to be processed. */ for (block_row = 0; block_row < block_rows; block_row++) { buffer_ptr = buffer[block_row] + cinfo->master->first_MCU_col[ci]; - if (first_row && block_row == 0) + + if (block_row > 0 || cinfo->output_iMCU_row > 0) + prev_block_row = + buffer[block_row - 1] + cinfo->master->first_MCU_col[ci]; + else prev_block_row = buffer_ptr; + + if (block_row > 1 || cinfo->output_iMCU_row > 1) + prev_prev_block_row = + buffer[block_row - 2] + cinfo->master->first_MCU_col[ci]; + else + prev_prev_block_row = prev_block_row; + + if (block_row < block_rows - 1 || cinfo->output_iMCU_row < last_iMCU_row) + next_block_row = + buffer[block_row + 1] + cinfo->master->first_MCU_col[ci]; else - prev_block_row = buffer[block_row - 1] + - cinfo->master->first_MCU_col[ci]; - if (last_row && block_row == block_rows - 1) next_block_row = buffer_ptr; + + if (block_row < block_rows - 2 || + cinfo->output_iMCU_row < last_iMCU_row - 1) + next_next_block_row = + buffer[block_row + 2] + cinfo->master->first_MCU_col[ci]; else - next_block_row = buffer[block_row + 1] + - cinfo->master->first_MCU_col[ci]; + next_next_block_row = next_block_row; + /* We fetch the surrounding DC values using a sliding-register approach. - * Initialize all nine here so as to do the right thing on narrow pics. + * Initialize all 25 here so as to do the right thing on narrow pics. */ - DC1 = DC2 = DC3 = (int)prev_block_row[0][0]; - DC4 = DC5 = DC6 = (int)buffer_ptr[0][0]; - DC7 = DC8 = DC9 = (int)next_block_row[0][0]; + DC01 = DC02 = DC03 = DC04 = DC05 = (int)prev_prev_block_row[0][0]; + DC06 = DC07 = DC08 = DC09 = DC10 = (int)prev_block_row[0][0]; + DC11 = DC12 = DC13 = DC14 = DC15 = (int)buffer_ptr[0][0]; + DC16 = DC17 = DC18 = DC19 = DC20 = (int)next_block_row[0][0]; + DC21 = DC22 = DC23 = DC24 = DC25 = (int)next_next_block_row[0][0]; output_col = 0; last_block_column = compptr->width_in_blocks - 1; for (block_num = cinfo->master->first_MCU_col[ci]; @@ -515,18 +581,39 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) /* Fetch current DCT block into workspace so we can modify it. */ jcopy_block_row(buffer_ptr, (JBLOCKROW)workspace, (JDIMENSION)1); /* Update DC values */ - if (block_num < last_block_column) { - DC3 = (int)prev_block_row[1][0]; - DC6 = (int)buffer_ptr[1][0]; - DC9 = (int)next_block_row[1][0]; + if (block_num == cinfo->master->first_MCU_col[ci] && + block_num < last_block_column) { + DC04 = (int)prev_prev_block_row[1][0]; + DC09 = (int)prev_block_row[1][0]; + DC14 = (int)buffer_ptr[1][0]; + DC19 = (int)next_block_row[1][0]; + DC24 = (int)next_next_block_row[1][0]; } - /* Compute coefficient estimates per K.8. - * An estimate is applied only if coefficient is still zero, - * and is not known to be fully accurate. + if (block_num + 1 < last_block_column) { + DC05 = (int)prev_prev_block_row[2][0]; + DC10 = (int)prev_block_row[2][0]; + DC15 = (int)buffer_ptr[2][0]; + DC20 = (int)next_block_row[2][0]; + DC25 = (int)next_next_block_row[2][0]; + } + /* If DC interpolation is enabled, compute coefficient estimates using + * a Gaussian-like kernel, keeping the averages of the DC values. + * + * If DC interpolation is disabled, compute coefficient estimates using + * an algorithm similar to the one described in Section K.8 of the JPEG + * standard, except applied to a 5x5 window rather than a 3x3 window. + * + * An estimate is applied only if the coefficient is still zero and is + * not known to be fully accurate. */ /* AC01 */ if ((Al = coef_bits[1]) != 0 && workspace[1] == 0) { - num = 36 * Q00 * (DC4 - DC6); + num = Q00 * (change_dc ? + (-DC01 - DC02 + DC04 + DC05 - 3 * DC06 + 13 * DC07 - + 13 * DC09 + 3 * DC10 - 3 * DC11 + 38 * DC12 - 38 * DC14 + + 3 * DC15 - 3 * DC16 + 13 * DC17 - 13 * DC19 + 3 * DC20 - + DC21 - DC22 + DC24 + DC25) : + (-7 * DC11 + 50 * DC12 - 50 * DC14 + 7 * DC15)); if (num >= 0) { pred = (int)(((Q01 << 7) + num) / (Q01 << 8)); if (Al > 0 && pred >= (1 << Al)) @@ -541,7 +628,12 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) } /* AC10 */ if ((Al = coef_bits[2]) != 0 && workspace[8] == 0) { - num = 36 * Q00 * (DC2 - DC8); + num = Q00 * (change_dc ? + (-DC01 - 3 * DC02 - 3 * DC03 - 3 * DC04 - DC05 - DC06 + + 13 * DC07 + 38 * DC08 + 13 * DC09 - DC10 + DC16 - + 13 * DC17 - 38 * DC18 - 13 * DC19 + DC20 + DC21 + + 3 * DC22 + 3 * DC23 + 3 * DC24 + DC25) : + (-7 * DC03 + 50 * DC08 - 50 * DC18 + 7 * DC23)); if (num >= 0) { pred = (int)(((Q10 << 7) + num) / (Q10 << 8)); if (Al > 0 && pred >= (1 << Al)) @@ -556,7 +648,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) } /* AC20 */ if ((Al = coef_bits[3]) != 0 && workspace[16] == 0) { - num = 9 * Q00 * (DC2 + DC8 - 2 * DC5); + num = Q00 * (change_dc ? + (DC03 + 2 * DC07 + 7 * DC08 + 2 * DC09 - 5 * DC12 - 14 * DC13 - + 5 * DC14 + 2 * DC17 + 7 * DC18 + 2 * DC19 + DC23) : + (-DC03 + 13 * DC08 - 24 * DC13 + 13 * DC18 - DC23)); if (num >= 0) { pred = (int)(((Q20 << 7) + num) / (Q20 << 8)); if (Al > 0 && pred >= (1 << Al)) @@ -571,7 +666,11 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) } /* AC11 */ if ((Al = coef_bits[4]) != 0 && workspace[9] == 0) { - num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9); + num = Q00 * (change_dc ? + (-DC01 + DC05 + 9 * DC07 - 9 * DC09 - 9 * DC17 + + 9 * DC19 + DC21 - DC25) : + (DC10 + DC16 - 10 * DC17 + 10 * DC19 - DC02 - DC20 + DC22 - + DC24 + DC04 - DC06 + 10 * DC07 - 10 * DC09)); if (num >= 0) { pred = (int)(((Q11 << 7) + num) / (Q11 << 8)); if (Al > 0 && pred >= (1 << Al)) @@ -586,7 +685,10 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) } /* AC02 */ if ((Al = coef_bits[5]) != 0 && workspace[2] == 0) { - num = 9 * Q00 * (DC4 + DC6 - 2 * DC5); + num = Q00 * (change_dc ? + (2 * DC07 - 5 * DC08 + 2 * DC09 + DC11 + 7 * DC12 - 14 * DC13 + + 7 * DC14 + DC15 + 2 * DC17 - 5 * DC18 + 2 * DC19) : + (-DC11 + 13 * DC12 - 24 * DC13 + 13 * DC14 - DC15)); if (num >= 0) { pred = (int)(((Q02 << 7) + num) / (Q02 << 8)); if (Al > 0 && pred >= (1 << Al)) @@ -599,14 +701,96 @@ decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf) } workspace[2] = (JCOEF)pred; } + if (change_dc) { + /* AC03 */ + if ((Al = coef_bits[6]) != 0 && workspace[3] == 0) { + num = Q00 * (DC07 - DC09 + 2 * DC12 - 2 * DC14 + DC17 - DC19); + if (num >= 0) { + pred = (int)(((Q03 << 7) + num) / (Q03 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + } else { + pred = (int)(((Q03 << 7) - num) / (Q03 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + pred = -pred; + } + workspace[3] = (JCOEF)pred; + } + /* AC12 */ + if ((Al = coef_bits[7]) != 0 && workspace[10] == 0) { + num = Q00 * (DC07 - 3 * DC08 + DC09 - DC17 + 3 * DC18 - DC19); + if (num >= 0) { + pred = (int)(((Q12 << 7) + num) / (Q12 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + } else { + pred = (int)(((Q12 << 7) - num) / (Q12 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + pred = -pred; + } + workspace[10] = (JCOEF)pred; + } + /* AC21 */ + if ((Al = coef_bits[8]) != 0 && workspace[17] == 0) { + num = Q00 * (DC07 - DC09 - 3 * DC12 + 3 * DC14 + DC17 - DC19); + if (num >= 0) { + pred = (int)(((Q21 << 7) + num) / (Q21 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + } else { + pred = (int)(((Q21 << 7) - num) / (Q21 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + pred = -pred; + } + workspace[17] = (JCOEF)pred; + } + /* AC30 */ + if ((Al = coef_bits[9]) != 0 && workspace[24] == 0) { + num = Q00 * (DC07 + 2 * DC08 + DC09 - DC17 - 2 * DC18 - DC19); + if (num >= 0) { + pred = (int)(((Q30 << 7) + num) / (Q30 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + } else { + pred = (int)(((Q30 << 7) - num) / (Q30 << 8)); + if (Al > 0 && pred >= (1 << Al)) + pred = (1 << Al) - 1; + pred = -pred; + } + workspace[24] = (JCOEF)pred; + } + /* coef_bits[0] is non-negative. Otherwise this function would not + * be called. + */ + num = Q00 * + (-2 * DC01 - 6 * DC02 - 8 * DC03 - 6 * DC04 - 2 * DC05 - + 6 * DC06 + 6 * DC07 + 42 * DC08 + 6 * DC09 - 6 * DC10 - + 8 * DC11 + 42 * DC12 + 152 * DC13 + 42 * DC14 - 8 * DC15 - + 6 * DC16 + 6 * DC17 + 42 * DC18 + 6 * DC19 - 6 * DC20 - + 2 * DC21 - 6 * DC22 - 8 * DC23 - 6 * DC24 - 2 * DC25); + if (num >= 0) { + pred = (int)(((Q00 << 7) + num) / (Q00 << 8)); + } else { + pred = (int)(((Q00 << 7) - num) / (Q00 << 8)); + pred = -pred; + } + workspace[0] = (JCOEF)pred; + } /* change_dc */ + /* OK, do the IDCT */ (*inverse_DCT) (cinfo, compptr, (JCOEFPTR)workspace, output_ptr, output_col); /* Advance for next column */ - DC1 = DC2; DC2 = DC3; - DC4 = DC5; DC5 = DC6; - DC7 = DC8; DC8 = DC9; - buffer_ptr++, prev_block_row++, next_block_row++; + DC01 = DC02; DC02 = DC03; DC03 = DC04; DC04 = DC05; + DC06 = DC07; DC07 = DC08; DC08 = DC09; DC09 = DC10; + DC11 = DC12; DC12 = DC13; DC13 = DC14; DC14 = DC15; + DC16 = DC17; DC17 = DC18; DC18 = DC19; DC19 = DC20; + DC21 = DC22; DC22 = DC23; DC23 = DC24; DC24 = DC25; + buffer_ptr++, prev_block_row++, next_block_row++, + prev_prev_block_row++, next_next_block_row++; output_col += compptr->_DCT_scaled_size; } output_ptr += compptr->_DCT_scaled_size; @@ -655,7 +839,7 @@ jinit_d_coef_controller(j_decompress_ptr cinfo, boolean need_full_buffer) #ifdef BLOCK_SMOOTHING_SUPPORTED /* If block smoothing could be used, need a bigger window */ if (cinfo->progressive_mode) - access_rows *= 3; + access_rows *= 5; #endif coef->whole_image[ci] = (*cinfo->mem->request_virt_barray) ((j_common_ptr)cinfo, JPOOL_IMAGE, TRUE, diff --git a/src/3rdparty/libjpeg/src/jdcoefct.h b/src/3rdparty/libjpeg/src/jdcoefct.h index c4d1943dd4..9a0e780663 100644 --- a/src/3rdparty/libjpeg/src/jdcoefct.h +++ b/src/3rdparty/libjpeg/src/jdcoefct.h @@ -5,6 +5,7 @@ * Copyright (C) 1994-1997, Thomas G. Lane. * libjpeg-turbo Modifications: * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB + * Copyright (C) 2020, Google, Inc. * For conditions of distribution and use, see the accompanying README.ijg * file. */ @@ -51,7 +52,7 @@ typedef struct { #ifdef BLOCK_SMOOTHING_SUPPORTED /* When doing block smoothing, we latch coefficient Al values here */ int *coef_bits_latch; -#define SAVED_COEFS 6 /* we save coef_bits[0..5] */ +#define SAVED_COEFS 10 /* we save coef_bits[0..9] */ #endif } my_coef_controller; diff --git a/src/3rdparty/libjpeg/src/jdcol565.c b/src/3rdparty/libjpeg/src/jdcol565.c index 40068ef84f..53c7bd9187 100644 --- a/src/3rdparty/libjpeg/src/jdcol565.c +++ b/src/3rdparty/libjpeg/src/jdcol565.c @@ -45,9 +45,9 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr = *output_buf++; if (PACK_NEED_ALIGNMENT(outptr)) { - y = GETJSAMPLE(*inptr0++); - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + y = *inptr0++; + cb = *inptr1++; + cr = *inptr2++; r = range_limit[y + Crrtab[cr]]; g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS))]; @@ -58,18 +58,18 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, num_cols--; } for (col = 0; col < (num_cols >> 1); col++) { - y = GETJSAMPLE(*inptr0++); - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + y = *inptr0++; + cb = *inptr1++; + cr = *inptr2++; r = range_limit[y + Crrtab[cr]]; g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS))]; b = range_limit[y + Cbbtab[cb]]; rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr0++); - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + y = *inptr0++; + cb = *inptr1++; + cr = *inptr2++; r = range_limit[y + Crrtab[cr]]; g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS))]; @@ -80,9 +80,9 @@ ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr += 4; } if (num_cols & 1) { - y = GETJSAMPLE(*inptr0); - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + y = *inptr0; + cb = *inptr1; + cr = *inptr2; r = range_limit[y + Crrtab[cr]]; g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS))]; @@ -125,9 +125,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, input_row++; outptr = *output_buf++; if (PACK_NEED_ALIGNMENT(outptr)) { - y = GETJSAMPLE(*inptr0++); - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + y = *inptr0++; + cb = *inptr1++; + cr = *inptr2++; r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; g = range_limit[DITHER_565_G(y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], @@ -139,9 +139,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, num_cols--; } for (col = 0; col < (num_cols >> 1); col++) { - y = GETJSAMPLE(*inptr0++); - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + y = *inptr0++; + cb = *inptr1++; + cr = *inptr2++; r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; g = range_limit[DITHER_565_G(y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], @@ -150,9 +150,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, d0 = DITHER_ROTATE(d0); rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr0++); - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + y = *inptr0++; + cb = *inptr1++; + cr = *inptr2++; r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; g = range_limit[DITHER_565_G(y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], @@ -165,9 +165,9 @@ ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr += 4; } if (num_cols & 1) { - y = GETJSAMPLE(*inptr0); - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + y = *inptr0; + cb = *inptr1; + cr = *inptr2; r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)]; g = range_limit[DITHER_565_G(y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], @@ -202,32 +202,32 @@ rgb_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, input_row++; outptr = *output_buf++; if (PACK_NEED_ALIGNMENT(outptr)) { - r = GETJSAMPLE(*inptr0++); - g = GETJSAMPLE(*inptr1++); - b = GETJSAMPLE(*inptr2++); + r = *inptr0++; + g = *inptr1++; + b = *inptr2++; rgb = PACK_SHORT_565(r, g, b); *(INT16 *)outptr = (INT16)rgb; outptr += 2; num_cols--; } for (col = 0; col < (num_cols >> 1); col++) { - r = GETJSAMPLE(*inptr0++); - g = GETJSAMPLE(*inptr1++); - b = GETJSAMPLE(*inptr2++); + r = *inptr0++; + g = *inptr1++; + b = *inptr2++; rgb = PACK_SHORT_565(r, g, b); - r = GETJSAMPLE(*inptr0++); - g = GETJSAMPLE(*inptr1++); - b = GETJSAMPLE(*inptr2++); + r = *inptr0++; + g = *inptr1++; + b = *inptr2++; rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); WRITE_TWO_ALIGNED_PIXELS(outptr, rgb); outptr += 4; } if (num_cols & 1) { - r = GETJSAMPLE(*inptr0); - g = GETJSAMPLE(*inptr1); - b = GETJSAMPLE(*inptr2); + r = *inptr0; + g = *inptr1; + b = *inptr2; rgb = PACK_SHORT_565(r, g, b); *(INT16 *)outptr = (INT16)rgb; } @@ -259,24 +259,24 @@ rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, input_row++; outptr = *output_buf++; if (PACK_NEED_ALIGNMENT(outptr)) { - r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)]; - g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)]; - b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)]; + r = range_limit[DITHER_565_R(*inptr0++, d0)]; + g = range_limit[DITHER_565_G(*inptr1++, d0)]; + b = range_limit[DITHER_565_B(*inptr2++, d0)]; rgb = PACK_SHORT_565(r, g, b); *(INT16 *)outptr = (INT16)rgb; outptr += 2; num_cols--; } for (col = 0; col < (num_cols >> 1); col++) { - r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)]; - g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)]; - b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)]; + r = range_limit[DITHER_565_R(*inptr0++, d0)]; + g = range_limit[DITHER_565_G(*inptr1++, d0)]; + b = range_limit[DITHER_565_B(*inptr2++, d0)]; d0 = DITHER_ROTATE(d0); rgb = PACK_SHORT_565(r, g, b); - r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)]; - g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)]; - b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)]; + r = range_limit[DITHER_565_R(*inptr0++, d0)]; + g = range_limit[DITHER_565_G(*inptr1++, d0)]; + b = range_limit[DITHER_565_B(*inptr2++, d0)]; d0 = DITHER_ROTATE(d0); rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b)); @@ -284,9 +284,9 @@ rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr += 4; } if (num_cols & 1) { - r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0), d0)]; - g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1), d0)]; - b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2), d0)]; + r = range_limit[DITHER_565_R(*inptr0, d0)]; + g = range_limit[DITHER_565_G(*inptr1, d0)]; + b = range_limit[DITHER_565_B(*inptr2, d0)]; rgb = PACK_SHORT_565(r, g, b); *(INT16 *)outptr = (INT16)rgb; } diff --git a/src/3rdparty/libjpeg/src/jdcolext.c b/src/3rdparty/libjpeg/src/jdcolext.c index 72a5301070..863c7a2fbc 100644 --- a/src/3rdparty/libjpeg/src/jdcolext.c +++ b/src/3rdparty/libjpeg/src/jdcolext.c @@ -53,9 +53,9 @@ ycc_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, input_row++; outptr = *output_buf++; for (col = 0; col < num_cols; col++) { - y = GETJSAMPLE(inptr0[col]); - cb = GETJSAMPLE(inptr1[col]); - cr = GETJSAMPLE(inptr2[col]); + y = inptr0[col]; + cb = inptr1[col]; + cr = inptr2[col]; /* Range-limiting is essential due to noise introduced by DCT losses. */ outptr[RGB_RED] = range_limit[y + Crrtab[cr]]; outptr[RGB_GREEN] = range_limit[y + @@ -93,7 +93,6 @@ gray_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, inptr = input_buf[0][input_row++]; outptr = *output_buf++; for (col = 0; col < num_cols; col++) { - /* We can dispense with GETJSAMPLE() here */ outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col]; /* Set unused byte to 0xFF so it can be interpreted as an opaque */ /* alpha channel value */ @@ -128,7 +127,6 @@ rgb_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, input_row++; outptr = *output_buf++; for (col = 0; col < num_cols; col++) { - /* We can dispense with GETJSAMPLE() here */ outptr[RGB_RED] = inptr0[col]; outptr[RGB_GREEN] = inptr1[col]; outptr[RGB_BLUE] = inptr2[col]; diff --git a/src/3rdparty/libjpeg/src/jdcolor.c b/src/3rdparty/libjpeg/src/jdcolor.c index d3ae40c7da..8da2b4eaf2 100644 --- a/src/3rdparty/libjpeg/src/jdcolor.c +++ b/src/3rdparty/libjpeg/src/jdcolor.c @@ -341,9 +341,9 @@ rgb_gray_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, input_row++; outptr = *output_buf++; for (col = 0; col < num_cols; col++) { - r = GETJSAMPLE(inptr0[col]); - g = GETJSAMPLE(inptr1[col]); - b = GETJSAMPLE(inptr2[col]); + r = inptr0[col]; + g = inptr1[col]; + b = inptr2[col]; /* Y */ outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] + ctab[b + B_Y_OFF]) >> SCALEBITS); @@ -550,9 +550,9 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, input_row++; outptr = *output_buf++; for (col = 0; col < num_cols; col++) { - y = GETJSAMPLE(inptr0[col]); - cb = GETJSAMPLE(inptr1[col]); - cr = GETJSAMPLE(inptr2[col]); + y = inptr0[col]; + cb = inptr1[col]; + cr = inptr2[col]; /* Range-limiting is essential due to noise introduced by DCT losses. */ outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])]; /* red */ outptr[1] = range_limit[MAXJSAMPLE - (y + /* green */ @@ -560,7 +560,7 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, SCALEBITS)))]; outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])]; /* blue */ /* K passes through unchanged */ - outptr[3] = inptr3[col]; /* don't need GETJSAMPLE here */ + outptr[3] = inptr3[col]; outptr += 4; } } diff --git a/src/3rdparty/libjpeg/src/jdhuff.c b/src/3rdparty/libjpeg/src/jdhuff.c index a1128178b0..f786c10547 100644 --- a/src/3rdparty/libjpeg/src/jdhuff.c +++ b/src/3rdparty/libjpeg/src/jdhuff.c @@ -5,6 +5,7 @@ * Copyright (C) 1991-1997, Thomas G. Lane. * libjpeg-turbo Modifications: * Copyright (C) 2009-2011, 2016, 2018-2019, D. R. Commander. + * Copyright (C) 2018, Matthias Räncker. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -39,24 +40,6 @@ typedef struct { int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ } savable_state; -/* This macro is to work around compilers with missing or broken - * structure assignment. You'll need to fix this code if you have - * such a compiler and you change MAX_COMPS_IN_SCAN. - */ - -#ifndef NO_STRUCT_ASSIGN -#define ASSIGN_STATE(dest, src) ((dest) = (src)) -#else -#if MAX_COMPS_IN_SCAN == 4 -#define ASSIGN_STATE(dest, src) \ - ((dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) -#endif -#endif - - typedef struct { struct jpeg_entropy_decoder pub; /* public fields */ @@ -325,7 +308,7 @@ jpeg_fill_bit_buffer(bitread_working_state *state, bytes_in_buffer = cinfo->src->bytes_in_buffer; } bytes_in_buffer--; - c = GETJOCTET(*next_input_byte++); + c = *next_input_byte++; /* If it's 0xFF, check and discard stuffed zero byte */ if (c == 0xFF) { @@ -342,7 +325,7 @@ jpeg_fill_bit_buffer(bitread_working_state *state, bytes_in_buffer = cinfo->src->bytes_in_buffer; } bytes_in_buffer--; - c = GETJOCTET(*next_input_byte++); + c = *next_input_byte++; } while (c == 0xFF); if (c == 0) { @@ -405,8 +388,8 @@ no_more_bytes: #define GET_BYTE { \ register int c0, c1; \ - c0 = GETJOCTET(*buffer++); \ - c1 = GETJOCTET(*buffer); \ + c0 = *buffer++; \ + c1 = *buffer; \ /* Pre-execute most common case */ \ get_buffer = (get_buffer << 8) | c0; \ bits_left += 8; \ @@ -423,7 +406,7 @@ no_more_bytes: } \ } -#if SIZEOF_SIZE_T == 8 || defined(_WIN64) +#if SIZEOF_SIZE_T == 8 || defined(_WIN64) || (defined(__x86_64__) && defined(__ILP32__)) /* Pre-fetch 48 bytes, because the holding register is 64-bit */ #define FILL_BIT_BUFFER_FAST \ @@ -557,6 +540,12 @@ process_restart(j_decompress_ptr cinfo) } +#if defined(__has_feature) +#if __has_feature(undefined_behavior_sanitizer) +__attribute__((no_sanitize("signed-integer-overflow"), + no_sanitize("unsigned-integer-overflow"))) +#endif +#endif LOCAL(boolean) decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) { @@ -568,7 +557,7 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Load up working state */ BITREAD_LOAD_STATE(cinfo, entropy->bitstate); - ASSIGN_STATE(state, entropy->saved); + state = entropy->saved; for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL; @@ -589,11 +578,15 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) if (entropy->dc_needed[blkn]) { /* Convert DC difference to actual value, update last_dc_val */ int ci = cinfo->MCU_membership[blkn]; - /* This is really just - * s += state.last_dc_val[ci]; - * It is written this way in order to shut up UBSan. + /* Certain malformed JPEG images produce repeated DC coefficient + * differences of 2047 or -2047, which causes state.last_dc_val[ci] to + * grow until it overflows or underflows a 32-bit signed integer. This + * behavior is, to the best of our understanding, innocuous, and it is + * unclear how to work around it without potentially affecting + * performance. Thus, we (hopefully temporarily) suppress UBSan integer + * overflow errors for this function. */ - s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]); + s += state.last_dc_val[ci]; state.last_dc_val[ci] = s; if (block) { /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */ @@ -653,7 +646,7 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Completed MCU, so update state */ BITREAD_SAVE_STATE(cinfo, entropy->bitstate); - ASSIGN_STATE(entropy->saved, state); + entropy->saved = state; return TRUE; } @@ -671,7 +664,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Load up working state */ BITREAD_LOAD_STATE(cinfo, entropy->bitstate); buffer = (JOCTET *)br_state.next_input_byte; - ASSIGN_STATE(state, entropy->saved); + state = entropy->saved; for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL; @@ -688,7 +681,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) if (entropy->dc_needed[blkn]) { int ci = cinfo->MCU_membership[blkn]; - s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]); + s += state.last_dc_val[ci]; state.last_dc_val[ci] = s; if (block) (*block)[0] = (JCOEF)s; @@ -740,7 +733,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte); br_state.next_input_byte = buffer; BITREAD_SAVE_STATE(cinfo, entropy->bitstate); - ASSIGN_STATE(entropy->saved, state); + entropy->saved = state; return TRUE; } @@ -795,7 +788,8 @@ use_slow: } /* Account for restart interval (no-op if not using restarts) */ - entropy->restarts_to_go--; + if (cinfo->restart_interval) + entropy->restarts_to_go--; return TRUE; } diff --git a/src/3rdparty/libjpeg/src/jdhuff.h b/src/3rdparty/libjpeg/src/jdhuff.h index 6a8d90f402..cfa0b7f558 100644 --- a/src/3rdparty/libjpeg/src/jdhuff.h +++ b/src/3rdparty/libjpeg/src/jdhuff.h @@ -4,7 +4,8 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2010-2011, 2015-2016, D. R. Commander. + * Copyright (C) 2010-2011, 2015-2016, 2021, D. R. Commander. + * Copyright (C) 2018, Matthias Räncker. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -78,6 +79,11 @@ EXTERN(void) jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC, typedef size_t bit_buf_type; /* type of bit-extraction buffer */ #define BIT_BUF_SIZE 64 /* size of buffer in bits */ +#elif defined(__x86_64__) && defined(__ILP32__) + +typedef unsigned long long bit_buf_type; /* type of bit-extraction buffer */ +#define BIT_BUF_SIZE 64 /* size of buffer in bits */ + #else typedef unsigned long bit_buf_type; /* type of bit-extraction buffer */ @@ -228,7 +234,10 @@ slowlabel: \ s |= GET_BITS(1); \ nb++; \ } \ - s = htbl->pub->huffval[(int)(s + htbl->valoffset[nb]) & 0xFF]; \ + if (nb > 16) \ + s = 0; \ + else \ + s = htbl->pub->huffval[(int)(s + htbl->valoffset[nb]) & 0xFF]; \ } /* Out-of-line case for Huffman code fetching */ diff --git a/src/3rdparty/libjpeg/src/jdmarker.c b/src/3rdparty/libjpeg/src/jdmarker.c index c9c7ef6399..b964c3a1a6 100644 --- a/src/3rdparty/libjpeg/src/jdmarker.c +++ b/src/3rdparty/libjpeg/src/jdmarker.c @@ -151,7 +151,7 @@ typedef my_marker_reader *my_marker_ptr; #define INPUT_BYTE(cinfo, V, action) \ MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \ bytes_in_buffer--; \ - V = GETJOCTET(*next_input_byte++); ) + V = *next_input_byte++; ) /* As above, but read two bytes interpreted as an unsigned 16-bit integer. * V should be declared unsigned int or perhaps JLONG. @@ -159,10 +159,10 @@ typedef my_marker_reader *my_marker_ptr; #define INPUT_2BYTES(cinfo, V, action) \ MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \ bytes_in_buffer--; \ - V = ((unsigned int)GETJOCTET(*next_input_byte++)) << 8; \ + V = ((unsigned int)(*next_input_byte++)) << 8; \ MAKE_BYTE_AVAIL(cinfo, action); \ bytes_in_buffer--; \ - V += GETJOCTET(*next_input_byte++); ) + V += *next_input_byte++; ) /* @@ -608,18 +608,18 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen, JLONG totallen = (JLONG)datalen + remaining; if (datalen >= APP0_DATA_LEN && - GETJOCTET(data[0]) == 0x4A && - GETJOCTET(data[1]) == 0x46 && - GETJOCTET(data[2]) == 0x49 && - GETJOCTET(data[3]) == 0x46 && - GETJOCTET(data[4]) == 0) { + data[0] == 0x4A && + data[1] == 0x46 && + data[2] == 0x49 && + data[3] == 0x46 && + data[4] == 0) { /* Found JFIF APP0 marker: save info */ cinfo->saw_JFIF_marker = TRUE; - cinfo->JFIF_major_version = GETJOCTET(data[5]); - cinfo->JFIF_minor_version = GETJOCTET(data[6]); - cinfo->density_unit = GETJOCTET(data[7]); - cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]); - cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]); + cinfo->JFIF_major_version = data[5]; + cinfo->JFIF_minor_version = data[6]; + cinfo->density_unit = data[7]; + cinfo->X_density = (data[8] << 8) + data[9]; + cinfo->Y_density = (data[10] << 8) + data[11]; /* Check version. * Major version must be 1, anything else signals an incompatible change. * (We used to treat this as an error, but now it's a nonfatal warning, @@ -634,24 +634,22 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen, cinfo->JFIF_major_version, cinfo->JFIF_minor_version, cinfo->X_density, cinfo->Y_density, cinfo->density_unit); /* Validate thumbnail dimensions and issue appropriate messages */ - if (GETJOCTET(data[12]) | GETJOCTET(data[13])) - TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL, - GETJOCTET(data[12]), GETJOCTET(data[13])); + if (data[12] | data[13]) + TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL, data[12], data[13]); totallen -= APP0_DATA_LEN; - if (totallen != - ((JLONG)GETJOCTET(data[12]) * (JLONG)GETJOCTET(data[13]) * (JLONG)3)) + if (totallen != ((JLONG)data[12] * (JLONG)data[13] * (JLONG)3)) TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int)totallen); } else if (datalen >= 6 && - GETJOCTET(data[0]) == 0x4A && - GETJOCTET(data[1]) == 0x46 && - GETJOCTET(data[2]) == 0x58 && - GETJOCTET(data[3]) == 0x58 && - GETJOCTET(data[4]) == 0) { + data[0] == 0x4A && + data[1] == 0x46 && + data[2] == 0x58 && + data[3] == 0x58 && + data[4] == 0) { /* Found JFIF "JFXX" extension APP0 marker */ /* The library doesn't actually do anything with these, * but we try to produce a helpful trace message. */ - switch (GETJOCTET(data[5])) { + switch (data[5]) { case 0x10: TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int)totallen); break; @@ -662,8 +660,7 @@ examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen, TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int)totallen); break; default: - TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION, - GETJOCTET(data[5]), (int)totallen); + TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION, data[5], (int)totallen); break; } } else { @@ -684,16 +681,16 @@ examine_app14(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen, unsigned int version, flags0, flags1, transform; if (datalen >= APP14_DATA_LEN && - GETJOCTET(data[0]) == 0x41 && - GETJOCTET(data[1]) == 0x64 && - GETJOCTET(data[2]) == 0x6F && - GETJOCTET(data[3]) == 0x62 && - GETJOCTET(data[4]) == 0x65) { + data[0] == 0x41 && + data[1] == 0x64 && + data[2] == 0x6F && + data[3] == 0x62 && + data[4] == 0x65) { /* Found Adobe APP14 marker */ - version = (GETJOCTET(data[5]) << 8) + GETJOCTET(data[6]); - flags0 = (GETJOCTET(data[7]) << 8) + GETJOCTET(data[8]); - flags1 = (GETJOCTET(data[9]) << 8) + GETJOCTET(data[10]); - transform = GETJOCTET(data[11]); + version = (data[5] << 8) + data[6]; + flags0 = (data[7] << 8) + data[8]; + flags1 = (data[9] << 8) + data[10]; + transform = data[11]; TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform); cinfo->saw_Adobe_marker = TRUE; cinfo->Adobe_transform = (UINT8)transform; diff --git a/src/3rdparty/libjpeg/src/jdmaster.c b/src/3rdparty/libjpeg/src/jdmaster.c index b20906438e..cbc8774b1f 100644 --- a/src/3rdparty/libjpeg/src/jdmaster.c +++ b/src/3rdparty/libjpeg/src/jdmaster.c @@ -5,7 +5,7 @@ * Copyright (C) 1991-1997, Thomas G. Lane. * Modified 2002-2009 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2009-2011, 2016, D. R. Commander. + * Copyright (C) 2009-2011, 2016, 2019, D. R. Commander. * Copyright (C) 2013, Linaro Limited. * Copyright (C) 2015, Google, Inc. * For conditions of distribution and use, see the accompanying README.ijg @@ -22,7 +22,6 @@ #include "jpeglib.h" #include "jpegcomp.h" #include "jdmaster.h" -#include "jsimd.h" /* @@ -70,17 +69,6 @@ use_merged_upsample(j_decompress_ptr cinfo) cinfo->comp_info[1]._DCT_scaled_size != cinfo->_min_DCT_scaled_size || cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size) return FALSE; -#ifdef WITH_SIMD - /* If YCbCr-to-RGB color conversion is SIMD-accelerated but merged upsampling - isn't, then disabling merged upsampling is likely to be faster when - decompressing YCbCr JPEG images. */ - if (!jsimd_can_h2v2_merged_upsample() && !jsimd_can_h2v1_merged_upsample() && - jsimd_can_ycc_rgb() && cinfo->jpeg_color_space == JCS_YCbCr && - (cinfo->out_color_space == JCS_RGB || - (cinfo->out_color_space >= JCS_EXT_RGB && - cinfo->out_color_space <= JCS_EXT_ARGB))) - return FALSE; -#endif /* ??? also need to test for upsample-time rescaling, when & if supported */ return TRUE; /* by golly, it'll work... */ #else @@ -580,6 +568,7 @@ master_selection(j_decompress_ptr cinfo) */ cinfo->master->first_iMCU_col = 0; cinfo->master->last_iMCU_col = cinfo->MCUs_per_row - 1; + cinfo->master->last_good_iMCU_row = 0; #ifdef D_MULTISCAN_FILES_SUPPORTED /* If jpeg_start_decompress will read the whole file, initialize diff --git a/src/3rdparty/libjpeg/src/jdmrg565.c b/src/3rdparty/libjpeg/src/jdmrg565.c index 53f1e16700..980a4e216e 100644 --- a/src/3rdparty/libjpeg/src/jdmrg565.c +++ b/src/3rdparty/libjpeg/src/jdmrg565.c @@ -43,20 +43,20 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, /* Loop for each pair of output pixels */ for (col = cinfo->output_width >> 1; col > 0; col--) { /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + cb = *inptr1++; + cr = *inptr2++; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 2 Y values and emit 2 pixels */ - y = GETJSAMPLE(*inptr0++); + y = *inptr0++; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr0++); + y = *inptr0++; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; @@ -68,12 +68,12 @@ h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, /* If image width is odd, do the last output column separately */ if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + cb = *inptr1; + cr = *inptr2; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr0); + y = *inptr0; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; @@ -115,21 +115,21 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo, /* Loop for each pair of output pixels */ for (col = cinfo->output_width >> 1; col > 0; col--) { /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + cb = *inptr1++; + cr = *inptr2++; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 2 Y values and emit 2 pixels */ - y = GETJSAMPLE(*inptr0++); + y = *inptr0++; r = range_limit[DITHER_565_R(y + cred, d0)]; g = range_limit[DITHER_565_G(y + cgreen, d0)]; b = range_limit[DITHER_565_B(y + cblue, d0)]; d0 = DITHER_ROTATE(d0); rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr0++); + y = *inptr0++; r = range_limit[DITHER_565_R(y + cred, d0)]; g = range_limit[DITHER_565_G(y + cgreen, d0)]; b = range_limit[DITHER_565_B(y + cblue, d0)]; @@ -142,12 +142,12 @@ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo, /* If image width is odd, do the last output column separately */ if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + cb = *inptr1; + cr = *inptr2; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr0); + y = *inptr0; r = range_limit[DITHER_565_R(y + cred, d0)]; g = range_limit[DITHER_565_G(y + cgreen, d0)]; b = range_limit[DITHER_565_B(y + cblue, d0)]; @@ -189,20 +189,20 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, /* Loop for each group of output pixels */ for (col = cinfo->output_width >> 1; col > 0; col--) { /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + cb = *inptr1++; + cr = *inptr2++; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 4 Y values and emit 4 pixels */ - y = GETJSAMPLE(*inptr00++); + y = *inptr00++; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr00++); + y = *inptr00++; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; @@ -211,13 +211,13 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, WRITE_TWO_PIXELS(outptr0, rgb); outptr0 += 4; - y = GETJSAMPLE(*inptr01++); + y = *inptr01++; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr01++); + y = *inptr01++; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; @@ -229,20 +229,20 @@ h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, /* If image width is odd, do the last output column separately */ if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + cb = *inptr1; + cr = *inptr2; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr00); + y = *inptr00; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; rgb = PACK_SHORT_565(r, g, b); *(INT16 *)outptr0 = (INT16)rgb; - y = GETJSAMPLE(*inptr01); + y = *inptr01; r = range_limit[y + cred]; g = range_limit[y + cgreen]; b = range_limit[y + cblue]; @@ -287,21 +287,21 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo, /* Loop for each group of output pixels */ for (col = cinfo->output_width >> 1; col > 0; col--) { /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + cb = *inptr1++; + cr = *inptr2++; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 4 Y values and emit 4 pixels */ - y = GETJSAMPLE(*inptr00++); + y = *inptr00++; r = range_limit[DITHER_565_R(y + cred, d0)]; g = range_limit[DITHER_565_G(y + cgreen, d0)]; b = range_limit[DITHER_565_B(y + cblue, d0)]; d0 = DITHER_ROTATE(d0); rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr00++); + y = *inptr00++; r = range_limit[DITHER_565_R(y + cred, d0)]; g = range_limit[DITHER_565_G(y + cgreen, d0)]; b = range_limit[DITHER_565_B(y + cblue, d0)]; @@ -311,14 +311,14 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo, WRITE_TWO_PIXELS(outptr0, rgb); outptr0 += 4; - y = GETJSAMPLE(*inptr01++); + y = *inptr01++; r = range_limit[DITHER_565_R(y + cred, d1)]; g = range_limit[DITHER_565_G(y + cgreen, d1)]; b = range_limit[DITHER_565_B(y + cblue, d1)]; d1 = DITHER_ROTATE(d1); rgb = PACK_SHORT_565(r, g, b); - y = GETJSAMPLE(*inptr01++); + y = *inptr01++; r = range_limit[DITHER_565_R(y + cred, d1)]; g = range_limit[DITHER_565_G(y + cgreen, d1)]; b = range_limit[DITHER_565_B(y + cblue, d1)]; @@ -331,20 +331,20 @@ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo, /* If image width is odd, do the last output column separately */ if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + cb = *inptr1; + cr = *inptr2; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr00); + y = *inptr00; r = range_limit[DITHER_565_R(y + cred, d0)]; g = range_limit[DITHER_565_G(y + cgreen, d0)]; b = range_limit[DITHER_565_B(y + cblue, d0)]; rgb = PACK_SHORT_565(r, g, b); *(INT16 *)outptr0 = (INT16)rgb; - y = GETJSAMPLE(*inptr01); + y = *inptr01; r = range_limit[DITHER_565_R(y + cred, d1)]; g = range_limit[DITHER_565_G(y + cgreen, d1)]; b = range_limit[DITHER_565_B(y + cblue, d1)]; diff --git a/src/3rdparty/libjpeg/src/jdmrgext.c b/src/3rdparty/libjpeg/src/jdmrgext.c index c9a44d8219..9bf4f1a307 100644 --- a/src/3rdparty/libjpeg/src/jdmrgext.c +++ b/src/3rdparty/libjpeg/src/jdmrgext.c @@ -46,13 +46,13 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, /* Loop for each pair of output pixels */ for (col = cinfo->output_width >> 1; col > 0; col--) { /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + cb = *inptr1++; + cr = *inptr2++; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 2 Y values and emit 2 pixels */ - y = GETJSAMPLE(*inptr0++); + y = *inptr0++; outptr[RGB_RED] = range_limit[y + cred]; outptr[RGB_GREEN] = range_limit[y + cgreen]; outptr[RGB_BLUE] = range_limit[y + cblue]; @@ -60,7 +60,7 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr[RGB_ALPHA] = 0xFF; #endif outptr += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr0++); + y = *inptr0++; outptr[RGB_RED] = range_limit[y + cred]; outptr[RGB_GREEN] = range_limit[y + cgreen]; outptr[RGB_BLUE] = range_limit[y + cblue]; @@ -71,12 +71,12 @@ h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, } /* If image width is odd, do the last output column separately */ if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + cb = *inptr1; + cr = *inptr2; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr0); + y = *inptr0; outptr[RGB_RED] = range_limit[y + cred]; outptr[RGB_GREEN] = range_limit[y + cgreen]; outptr[RGB_BLUE] = range_limit[y + cblue]; @@ -120,13 +120,13 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, /* Loop for each group of output pixels */ for (col = cinfo->output_width >> 1; col > 0; col--) { /* Do the chroma part of the calculation */ - cb = GETJSAMPLE(*inptr1++); - cr = GETJSAMPLE(*inptr2++); + cb = *inptr1++; + cr = *inptr2++; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 4 Y values and emit 4 pixels */ - y = GETJSAMPLE(*inptr00++); + y = *inptr00++; outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; @@ -134,7 +134,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr0[RGB_ALPHA] = 0xFF; #endif outptr0 += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr00++); + y = *inptr00++; outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; @@ -142,7 +142,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr0[RGB_ALPHA] = 0xFF; #endif outptr0 += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr01++); + y = *inptr01++; outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; @@ -150,7 +150,7 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, outptr1[RGB_ALPHA] = 0xFF; #endif outptr1 += RGB_PIXELSIZE; - y = GETJSAMPLE(*inptr01++); + y = *inptr01++; outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; @@ -161,19 +161,19 @@ h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, } /* If image width is odd, do the last output column separately */ if (cinfo->output_width & 1) { - cb = GETJSAMPLE(*inptr1); - cr = GETJSAMPLE(*inptr2); + cb = *inptr1; + cr = *inptr2; cred = Crrtab[cr]; cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; - y = GETJSAMPLE(*inptr00); + y = *inptr00; outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; #ifdef RGB_ALPHA outptr0[RGB_ALPHA] = 0xFF; #endif - y = GETJSAMPLE(*inptr01); + y = *inptr01; outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; diff --git a/src/3rdparty/libjpeg/src/jdphuff.c b/src/3rdparty/libjpeg/src/jdphuff.c index 9e82636bbd..c6d82ca14b 100644 --- a/src/3rdparty/libjpeg/src/jdphuff.c +++ b/src/3rdparty/libjpeg/src/jdphuff.c @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1995-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2015-2016, 2018, D. R. Commander. + * Copyright (C) 2015-2016, 2018-2021, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -41,25 +41,6 @@ typedef struct { int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ } savable_state; -/* This macro is to work around compilers with missing or broken - * structure assignment. You'll need to fix this code if you have - * such a compiler and you change MAX_COMPS_IN_SCAN. - */ - -#ifndef NO_STRUCT_ASSIGN -#define ASSIGN_STATE(dest, src) ((dest) = (src)) -#else -#if MAX_COMPS_IN_SCAN == 4 -#define ASSIGN_STATE(dest, src) \ - ((dest).EOBRUN = (src).EOBRUN, \ - (dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) -#endif -#endif - - typedef struct { struct jpeg_entropy_decoder pub; /* public fields */ @@ -102,7 +83,7 @@ start_pass_phuff_decoder(j_decompress_ptr cinfo) boolean is_DC_band, bad; int ci, coefi, tbl; d_derived_tbl **pdtbl; - int *coef_bit_ptr; + int *coef_bit_ptr, *prev_coef_bit_ptr; jpeg_component_info *compptr; is_DC_band = (cinfo->Ss == 0); @@ -143,8 +124,15 @@ start_pass_phuff_decoder(j_decompress_ptr cinfo) for (ci = 0; ci < cinfo->comps_in_scan; ci++) { int cindex = cinfo->cur_comp_info[ci]->component_index; coef_bit_ptr = &cinfo->coef_bits[cindex][0]; + prev_coef_bit_ptr = &cinfo->coef_bits[cindex + cinfo->num_components][0]; if (!is_DC_band && coef_bit_ptr[0] < 0) /* AC without prior DC scan */ WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0); + for (coefi = MIN(cinfo->Ss, 1); coefi <= MAX(cinfo->Se, 9); coefi++) { + if (cinfo->input_scan_number > 1) + prev_coef_bit_ptr[coefi] = coef_bit_ptr[coefi]; + else + prev_coef_bit_ptr[coefi] = 0; + } for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) { int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi]; if (cinfo->Ah != expected) @@ -323,7 +311,7 @@ decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Load up working state */ BITREAD_LOAD_STATE(cinfo, entropy->bitstate); - ASSIGN_STATE(state, entropy->saved); + state = entropy->saved; /* Outer loop handles each block in the MCU */ @@ -356,11 +344,12 @@ decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Completed MCU, so update state */ BITREAD_SAVE_STATE(cinfo, entropy->bitstate); - ASSIGN_STATE(entropy->saved, state); + entropy->saved = state; } /* Account for restart interval (no-op if not using restarts) */ - entropy->restarts_to_go--; + if (cinfo->restart_interval) + entropy->restarts_to_go--; return TRUE; } @@ -444,7 +433,8 @@ decode_mcu_AC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) } /* Account for restart interval (no-op if not using restarts) */ - entropy->restarts_to_go--; + if (cinfo->restart_interval) + entropy->restarts_to_go--; return TRUE; } @@ -495,7 +485,8 @@ decode_mcu_DC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) BITREAD_SAVE_STATE(cinfo, entropy->bitstate); /* Account for restart interval (no-op if not using restarts) */ - entropy->restarts_to_go--; + if (cinfo->restart_interval) + entropy->restarts_to_go--; return TRUE; } @@ -638,7 +629,8 @@ decode_mcu_AC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) } /* Account for restart interval (no-op if not using restarts) */ - entropy->restarts_to_go--; + if (cinfo->restart_interval) + entropy->restarts_to_go--; return TRUE; @@ -676,7 +668,7 @@ jinit_phuff_decoder(j_decompress_ptr cinfo) /* Create progression status table */ cinfo->coef_bits = (int (*)[DCTSIZE2]) (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, - cinfo->num_components * DCTSIZE2 * + cinfo->num_components * 2 * DCTSIZE2 * sizeof(int)); coef_bit_ptr = &cinfo->coef_bits[0][0]; for (ci = 0; ci < cinfo->num_components; ci++) diff --git a/src/3rdparty/libjpeg/src/jdsample.c b/src/3rdparty/libjpeg/src/jdsample.c index 50a68b3013..eaad72a030 100644 --- a/src/3rdparty/libjpeg/src/jdsample.c +++ b/src/3rdparty/libjpeg/src/jdsample.c @@ -8,7 +8,7 @@ * Copyright (C) 2010, 2015-2016, D. R. Commander. * Copyright (C) 2014, MIPS Technologies, Inc., California. * Copyright (C) 2015, Google, Inc. - * Copyright (C) 2019, Arm Limited. + * Copyright (C) 2019-2020, Arm Limited. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -177,7 +177,7 @@ int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, outptr = output_data[outrow]; outend = outptr + cinfo->output_width; while (outptr < outend) { - invalue = *inptr++; /* don't need GETJSAMPLE() here */ + invalue = *inptr++; for (h = h_expand; h > 0; h--) { *outptr++ = invalue; } @@ -213,7 +213,7 @@ h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, outptr = output_data[inrow]; outend = outptr + cinfo->output_width; while (outptr < outend) { - invalue = *inptr++; /* don't need GETJSAMPLE() here */ + invalue = *inptr++; *outptr++ = invalue; *outptr++ = invalue; } @@ -242,7 +242,7 @@ h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, outptr = output_data[outrow]; outend = outptr + cinfo->output_width; while (outptr < outend) { - invalue = *inptr++; /* don't need GETJSAMPLE() here */ + invalue = *inptr++; *outptr++ = invalue; *outptr++ = invalue; } @@ -283,20 +283,20 @@ h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, inptr = input_data[inrow]; outptr = output_data[inrow]; /* Special case for first column */ - invalue = GETJSAMPLE(*inptr++); + invalue = *inptr++; *outptr++ = (JSAMPLE)invalue; - *outptr++ = (JSAMPLE)((invalue * 3 + GETJSAMPLE(*inptr) + 2) >> 2); + *outptr++ = (JSAMPLE)((invalue * 3 + inptr[0] + 2) >> 2); for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) { /* General case: 3/4 * nearer pixel + 1/4 * further pixel */ - invalue = GETJSAMPLE(*inptr++) * 3; - *outptr++ = (JSAMPLE)((invalue + GETJSAMPLE(inptr[-2]) + 1) >> 2); - *outptr++ = (JSAMPLE)((invalue + GETJSAMPLE(*inptr) + 2) >> 2); + invalue = (*inptr++) * 3; + *outptr++ = (JSAMPLE)((invalue + inptr[-2] + 1) >> 2); + *outptr++ = (JSAMPLE)((invalue + inptr[0] + 2) >> 2); } /* Special case for last column */ - invalue = GETJSAMPLE(*inptr); - *outptr++ = (JSAMPLE)((invalue * 3 + GETJSAMPLE(inptr[-1]) + 1) >> 2); + invalue = *inptr; + *outptr++ = (JSAMPLE)((invalue * 3 + inptr[-1] + 1) >> 2); *outptr++ = (JSAMPLE)invalue; } } @@ -338,7 +338,7 @@ h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, outptr = output_data[outrow++]; for (colctr = 0; colctr < compptr->downsampled_width; colctr++) { - thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); + thiscolsum = (*inptr0++) * 3 + (*inptr1++); *outptr++ = (JSAMPLE)((thiscolsum + bias) >> 2); } } @@ -381,8 +381,8 @@ h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, outptr = output_data[outrow++]; /* Special case for first column */ - thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); - nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); + thiscolsum = (*inptr0++) * 3 + (*inptr1++); + nextcolsum = (*inptr0++) * 3 + (*inptr1++); *outptr++ = (JSAMPLE)((thiscolsum * 4 + 8) >> 4); *outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4); lastcolsum = thiscolsum; thiscolsum = nextcolsum; @@ -390,7 +390,7 @@ h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) { /* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */ /* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */ - nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); + nextcolsum = (*inptr0++) * 3 + (*inptr1++); *outptr++ = (JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4); *outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4); lastcolsum = thiscolsum; thiscolsum = nextcolsum; @@ -477,7 +477,13 @@ jinit_upsampler(j_decompress_ptr cinfo) } else if (h_in_group == h_out_group && v_in_group * 2 == v_out_group && do_fancy) { /* Non-fancy upsampling is handled by the generic method */ - upsample->methods[ci] = h1v2_fancy_upsample; +#if defined(__arm__) || defined(__aarch64__) || \ + defined(_M_ARM) || defined(_M_ARM64) + if (jsimd_can_h1v2_fancy_upsample()) + upsample->methods[ci] = jsimd_h1v2_fancy_upsample; + else +#endif + upsample->methods[ci] = h1v2_fancy_upsample; upsample->pub.need_context_rows = TRUE; } else if (h_in_group * 2 == h_out_group && v_in_group * 2 == v_out_group) { diff --git a/src/3rdparty/libjpeg/src/jerror.h b/src/3rdparty/libjpeg/src/jerror.h index 933a3690fd..4476df2c93 100644 --- a/src/3rdparty/libjpeg/src/jerror.h +++ b/src/3rdparty/libjpeg/src/jerror.h @@ -207,6 +207,10 @@ JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code") #endif #endif JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker") +#if JPEG_LIB_VERSION < 70 +JMESSAGE(JERR_BAD_DROP_SAMPLING, + "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c") +#endif #ifdef JMAKE_ENUM_LIST @@ -252,6 +256,15 @@ JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker") (cinfo)->err->msg_parm.i[2] = (p3), \ (cinfo)->err->msg_parm.i[3] = (p4), \ (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo))) +#define ERREXIT6(cinfo, code, p1, p2, p3, p4, p5, p6) \ + ((cinfo)->err->msg_code = (code), \ + (cinfo)->err->msg_parm.i[0] = (p1), \ + (cinfo)->err->msg_parm.i[1] = (p2), \ + (cinfo)->err->msg_parm.i[2] = (p3), \ + (cinfo)->err->msg_parm.i[3] = (p4), \ + (cinfo)->err->msg_parm.i[4] = (p5), \ + (cinfo)->err->msg_parm.i[5] = (p6), \ + (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo))) #define ERREXITS(cinfo, code, str) \ ((cinfo)->err->msg_code = (code), \ strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \ diff --git a/src/3rdparty/libjpeg/src/jidctint.c b/src/3rdparty/libjpeg/src/jidctint.c index 50f385da33..bb08748019 100644 --- a/src/3rdparty/libjpeg/src/jidctint.c +++ b/src/3rdparty/libjpeg/src/jidctint.c @@ -3,7 +3,7 @@ * * This file was part of the Independent JPEG Group's software: * Copyright (C) 1991-1998, Thomas G. Lane. - * Modification developed 2002-2009 by Guido Vollbeding. + * Modification developed 2002-2018 by Guido Vollbeding. * libjpeg-turbo Modifications: * Copyright (C) 2015, 2020, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg @@ -417,7 +417,7 @@ jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr, /* * Perform dequantization and inverse DCT on one block of coefficients, - * producing a 7x7 output block. + * producing a reduced-size 7x7 output block. * * Optimized algorithm with 12 multiplications in the 1-D kernel. * cK represents sqrt(2) * cos(K*pi/14). @@ -1258,7 +1258,7 @@ jpeg_idct_10x10(j_decompress_ptr cinfo, jpeg_component_info *compptr, /* * Perform dequantization and inverse DCT on one block of coefficients, - * producing a 11x11 output block. + * producing an 11x11 output block. * * Optimized algorithm with 24 multiplications in the 1-D kernel. * cK represents sqrt(2) * cos(K*pi/22). @@ -2398,7 +2398,7 @@ jpeg_idct_16x16(j_decompress_ptr cinfo, jpeg_component_info *compptr, tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]); tmp0 = LEFT_SHIFT(tmp0, CONST_BITS); /* Add fudge factor here for final descale. */ - tmp0 += 1 << (CONST_BITS - PASS1_BITS - 1); + tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1); z1 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]); tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ diff --git a/src/3rdparty/libjpeg/src/jmorecfg.h b/src/3rdparty/libjpeg/src/jmorecfg.h index aa29f0f9f1..fb3a9cf411 100644 --- a/src/3rdparty/libjpeg/src/jmorecfg.h +++ b/src/3rdparty/libjpeg/src/jmorecfg.h @@ -43,25 +43,11 @@ #if BITS_IN_JSAMPLE == 8 /* JSAMPLE should be the smallest type that will hold the values 0..255. - * You can use a signed char by having GETJSAMPLE mask it with 0xFF. */ -#ifdef HAVE_UNSIGNED_CHAR - typedef unsigned char JSAMPLE; #define GETJSAMPLE(value) ((int)(value)) -#else /* not HAVE_UNSIGNED_CHAR */ - -typedef char JSAMPLE; -#ifdef __CHAR_UNSIGNED__ -#define GETJSAMPLE(value) ((int)(value)) -#else -#define GETJSAMPLE(value) ((int)(value) & 0xFF) -#endif /* __CHAR_UNSIGNED__ */ - -#endif /* HAVE_UNSIGNED_CHAR */ - #define MAXJSAMPLE 255 #define CENTERJSAMPLE 128 @@ -97,22 +83,9 @@ typedef short JCOEF; * managers, this is also the data type passed to fread/fwrite. */ -#ifdef HAVE_UNSIGNED_CHAR - typedef unsigned char JOCTET; #define GETJOCTET(value) (value) -#else /* not HAVE_UNSIGNED_CHAR */ - -typedef char JOCTET; -#ifdef __CHAR_UNSIGNED__ -#define GETJOCTET(value) (value) -#else -#define GETJOCTET(value) ((value) & 0xFF) -#endif /* __CHAR_UNSIGNED__ */ - -#endif /* HAVE_UNSIGNED_CHAR */ - /* These typedefs are used for various table entries and so forth. * They must be at least as wide as specified; but making them too big @@ -123,15 +96,7 @@ typedef char JOCTET; /* UINT8 must hold at least the values 0..255. */ -#ifdef HAVE_UNSIGNED_CHAR typedef unsigned char UINT8; -#else /* not HAVE_UNSIGNED_CHAR */ -#ifdef __CHAR_UNSIGNED__ -typedef char UINT8; -#else /* not __CHAR_UNSIGNED__ */ -typedef short UINT8; -#endif /* __CHAR_UNSIGNED__ */ -#endif /* HAVE_UNSIGNED_CHAR */ /* UINT16 must hold at least the values 0..65535. */ diff --git a/src/3rdparty/libjpeg/src/jpegint.h b/src/3rdparty/libjpeg/src/jpegint.h index ad36ca8b56..195fbcb9b6 100644 --- a/src/3rdparty/libjpeg/src/jpegint.h +++ b/src/3rdparty/libjpeg/src/jpegint.h @@ -5,7 +5,7 @@ * Copyright (C) 1991-1997, Thomas G. Lane. * Modified 1997-2009 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2015-2016, D. R. Commander. + * Copyright (C) 2015-2016, 2019, D. R. Commander. * Copyright (C) 2015, Google, Inc. * For conditions of distribution and use, see the accompanying README.ijg * file. @@ -158,6 +158,9 @@ struct jpeg_decomp_master { JDIMENSION first_MCU_col[MAX_COMPONENTS]; JDIMENSION last_MCU_col[MAX_COMPONENTS]; boolean jinit_upsampler_no_alloc; + + /* Last iMCU row that was successfully decoded */ + JDIMENSION last_good_iMCU_row; }; /* Input control module */ diff --git a/src/3rdparty/libjpeg/src/jquant1.c b/src/3rdparty/libjpeg/src/jquant1.c index 40bbb28cc7..73b83e16e5 100644 --- a/src/3rdparty/libjpeg/src/jquant1.c +++ b/src/3rdparty/libjpeg/src/jquant1.c @@ -479,7 +479,7 @@ color_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf, for (col = width; col > 0; col--) { pixcode = 0; for (ci = 0; ci < nc; ci++) { - pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]); + pixcode += colorindex[ci][*ptrin++]; } *ptrout++ = (JSAMPLE)pixcode; } @@ -506,9 +506,9 @@ color_quantize3(j_decompress_ptr cinfo, JSAMPARRAY input_buf, ptrin = input_buf[row]; ptrout = output_buf[row]; for (col = width; col > 0; col--) { - pixcode = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]); - pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]); - pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]); + pixcode = colorindex0[*ptrin++]; + pixcode += colorindex1[*ptrin++]; + pixcode += colorindex2[*ptrin++]; *ptrout++ = (JSAMPLE)pixcode; } } @@ -552,7 +552,7 @@ quantize_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, * required amount of padding. */ *output_ptr += - colorindex_ci[GETJSAMPLE(*input_ptr) + dither[col_index]]; + colorindex_ci[*input_ptr + dither[col_index]]; input_ptr += nc; output_ptr++; col_index = (col_index + 1) & ODITHER_MASK; @@ -595,12 +595,9 @@ quantize3_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, col_index = 0; for (col = width; col > 0; col--) { - pixcode = - GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) + dither0[col_index]]); - pixcode += - GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) + dither1[col_index]]); - pixcode += - GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) + dither2[col_index]]); + pixcode = colorindex0[(*input_ptr++) + dither0[col_index]]; + pixcode += colorindex1[(*input_ptr++) + dither1[col_index]]; + pixcode += colorindex2[(*input_ptr++) + dither2[col_index]]; *output_ptr++ = (JSAMPLE)pixcode; col_index = (col_index + 1) & ODITHER_MASK; } @@ -677,15 +674,15 @@ quantize_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, * The maximum error is +- MAXJSAMPLE; this sets the required size * of the range_limit array. */ - cur += GETJSAMPLE(*input_ptr); - cur = GETJSAMPLE(range_limit[cur]); + cur += *input_ptr; + cur = range_limit[cur]; /* Select output value, accumulate into output code for this pixel */ - pixcode = GETJSAMPLE(colorindex_ci[cur]); + pixcode = colorindex_ci[cur]; *output_ptr += (JSAMPLE)pixcode; /* Compute actual representation error at this pixel */ /* Note: we can do this even though we don't have the final */ /* pixel code, because the colormap is orthogonal. */ - cur -= GETJSAMPLE(colormap_ci[pixcode]); + cur -= colormap_ci[pixcode]; /* Compute error fractions to be propagated to adjacent pixels. * Add these into the running sums, and simultaneously shift the * next-line error sums left by 1 column. diff --git a/src/3rdparty/libjpeg/src/jquant2.c b/src/3rdparty/libjpeg/src/jquant2.c index 6570613bb9..44efb18cad 100644 --- a/src/3rdparty/libjpeg/src/jquant2.c +++ b/src/3rdparty/libjpeg/src/jquant2.c @@ -215,9 +215,9 @@ prescan_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf, ptr = input_buf[row]; for (col = width; col > 0; col--) { /* get pixel value and index into the histogram */ - histp = &histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT] - [GETJSAMPLE(ptr[1]) >> C1_SHIFT] - [GETJSAMPLE(ptr[2]) >> C2_SHIFT]; + histp = &histogram[ptr[0] >> C0_SHIFT] + [ptr[1] >> C1_SHIFT] + [ptr[2] >> C2_SHIFT]; /* increment, check for overflow and undo increment if so. */ if (++(*histp) <= 0) (*histp)--; @@ -665,7 +665,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2, for (i = 0; i < numcolors; i++) { /* We compute the squared-c0-distance term, then add in the other two. */ - x = GETJSAMPLE(cinfo->colormap[0][i]); + x = cinfo->colormap[0][i]; if (x < minc0) { tdist = (x - minc0) * C0_SCALE; min_dist = tdist * tdist; @@ -688,7 +688,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2, } } - x = GETJSAMPLE(cinfo->colormap[1][i]); + x = cinfo->colormap[1][i]; if (x < minc1) { tdist = (x - minc1) * C1_SCALE; min_dist += tdist * tdist; @@ -710,7 +710,7 @@ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2, } } - x = GETJSAMPLE(cinfo->colormap[2][i]); + x = cinfo->colormap[2][i]; if (x < minc2) { tdist = (x - minc2) * C2_SCALE; min_dist += tdist * tdist; @@ -788,13 +788,13 @@ find_best_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2, #define STEP_C2 ((1 << C2_SHIFT) * C2_SCALE) for (i = 0; i < numcolors; i++) { - icolor = GETJSAMPLE(colorlist[i]); + icolor = colorlist[i]; /* Compute (square of) distance from minc0/c1/c2 to this color */ - inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE; + inc0 = (minc0 - cinfo->colormap[0][icolor]) * C0_SCALE; dist0 = inc0 * inc0; - inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE; + inc1 = (minc1 - cinfo->colormap[1][icolor]) * C1_SCALE; dist0 += inc1 * inc1; - inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE; + inc2 = (minc2 - cinfo->colormap[2][icolor]) * C2_SCALE; dist0 += inc2 * inc2; /* Form the initial difference increments */ inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0; @@ -879,7 +879,7 @@ fill_inverse_cmap(j_decompress_ptr cinfo, int c0, int c1, int c2) for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) { cachep = &histogram[c0 + ic0][c1 + ic1][c2]; for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) { - *cachep++ = (histcell)(GETJSAMPLE(*cptr++) + 1); + *cachep++ = (histcell)((*cptr++) + 1); } } } @@ -909,9 +909,9 @@ pass2_no_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, outptr = output_buf[row]; for (col = width; col > 0; col--) { /* get pixel value and index into the cache */ - c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT; - c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT; - c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT; + c0 = (*inptr++) >> C0_SHIFT; + c1 = (*inptr++) >> C1_SHIFT; + c2 = (*inptr++) >> C2_SHIFT; cachep = &histogram[c0][c1][c2]; /* If we have not seen this color before, find nearest colormap entry */ /* and update the cache */ @@ -996,12 +996,12 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, * The maximum error is +- MAXJSAMPLE (or less with error limiting); * this sets the required size of the range_limit array. */ - cur0 += GETJSAMPLE(inptr[0]); - cur1 += GETJSAMPLE(inptr[1]); - cur2 += GETJSAMPLE(inptr[2]); - cur0 = GETJSAMPLE(range_limit[cur0]); - cur1 = GETJSAMPLE(range_limit[cur1]); - cur2 = GETJSAMPLE(range_limit[cur2]); + cur0 += inptr[0]; + cur1 += inptr[1]; + cur2 += inptr[2]; + cur0 = range_limit[cur0]; + cur1 = range_limit[cur1]; + cur2 = range_limit[cur2]; /* Index into the cache with adjusted pixel value */ cachep = &histogram[cur0 >> C0_SHIFT][cur1 >> C1_SHIFT][cur2 >> C2_SHIFT]; @@ -1015,9 +1015,9 @@ pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf, register int pixcode = *cachep - 1; *outptr = (JSAMPLE)pixcode; /* Compute representation error for this pixel */ - cur0 -= GETJSAMPLE(colormap0[pixcode]); - cur1 -= GETJSAMPLE(colormap1[pixcode]); - cur2 -= GETJSAMPLE(colormap2[pixcode]); + cur0 -= colormap0[pixcode]; + cur1 -= colormap1[pixcode]; + cur2 -= colormap2[pixcode]; } /* Compute error fractions to be propagated to adjacent pixels. * Add these into the running sums, and simultaneously shift the diff --git a/src/3rdparty/libjpeg/src/jsimd.h b/src/3rdparty/libjpeg/src/jsimd.h index 51e2b8c89d..6c203655ef 100644 --- a/src/3rdparty/libjpeg/src/jsimd.h +++ b/src/3rdparty/libjpeg/src/jsimd.h @@ -4,6 +4,7 @@ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB * Copyright (C) 2011, 2014, D. R. Commander. * Copyright (C) 2015-2016, 2018, Matthieu Darbois. + * Copyright (C) 2020, Arm Limited. * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. @@ -75,6 +76,7 @@ EXTERN(void) jsimd_int_upsample(j_decompress_ptr cinfo, EXTERN(int) jsimd_can_h2v2_fancy_upsample(void); EXTERN(int) jsimd_can_h2v1_fancy_upsample(void); +EXTERN(int) jsimd_can_h1v2_fancy_upsample(void); EXTERN(void) jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, @@ -84,6 +86,10 @@ EXTERN(void) jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr); +EXTERN(void) jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, + jpeg_component_info *compptr, + JSAMPARRAY input_data, + JSAMPARRAY *output_data_ptr); EXTERN(int) jsimd_can_h2v2_merged_upsample(void); EXTERN(int) jsimd_can_h2v1_merged_upsample(void); diff --git a/src/3rdparty/libjpeg/src/jsimd_none.c b/src/3rdparty/libjpeg/src/jsimd_none.c index 3cb6c80f8a..5b38a9fb5c 100644 --- a/src/3rdparty/libjpeg/src/jsimd_none.c +++ b/src/3rdparty/libjpeg/src/jsimd_none.c @@ -4,6 +4,7 @@ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB * Copyright (C) 2009-2011, 2014, D. R. Commander. * Copyright (C) 2015-2016, 2018, Matthieu Darbois. + * Copyright (C) 2020, Arm Limited. * * Based on the x86 SIMD extension for IJG JPEG library, * Copyright (C) 1999-2006, MIYASAKA Masaru. @@ -169,6 +170,12 @@ jsimd_can_h2v1_fancy_upsample(void) return 0; } +GLOBAL(int) +jsimd_can_h1v2_fancy_upsample(void) +{ + return 0; +} + GLOBAL(void) jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) @@ -181,6 +188,12 @@ jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, { } +GLOBAL(void) +jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ +} + GLOBAL(int) jsimd_can_h2v2_merged_upsample(void) { diff --git a/src/3rdparty/libjpeg/src/jversion.h b/src/3rdparty/libjpeg/src/jversion.h index 4462b94104..2ab534af41 100644 --- a/src/3rdparty/libjpeg/src/jversion.h +++ b/src/3rdparty/libjpeg/src/jversion.h @@ -2,9 +2,9 @@ * jversion.h * * This file was part of the Independent JPEG Group's software: - * Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding. + * Copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2010, 2012-2020, D. R. Commander. + * Copyright (C) 2010, 2012-2021, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -37,9 +37,9 @@ */ #define JCOPYRIGHT \ - "Copyright (C) 2009-2020 D. R. Commander\n" \ + "Copyright (C) 2009-2021 D. R. Commander\n" \ "Copyright (C) 2015, 2020 Google, Inc.\n" \ - "Copyright (C) 2019 Arm Limited\n" \ + "Copyright (C) 2019-2020 Arm Limited\n" \ "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \ "Copyright (C) 2011-2016 Siarhei Siamashka\n" \ "Copyright (C) 2015 Intel Corporation\n" \ @@ -48,7 +48,7 @@ "Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \ "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \ "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \ - "Copyright (C) 1991-2017 Thomas G. Lane, Guido Vollbeding" + "Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding" #define JCOPYRIGHT_SHORT \ - "Copyright (C) 1991-2020 The libjpeg-turbo Project and many others" + "Copyright (C) 1991-2021 The libjpeg-turbo Project and many others" diff --git a/src/3rdparty/pcre2/AUTHORS b/src/3rdparty/pcre2/AUTHORS index f001cb770e..c61b5f3aff 100644 --- a/src/3rdparty/pcre2/AUTHORS +++ b/src/3rdparty/pcre2/AUTHORS @@ -8,7 +8,7 @@ Email domain: gmail.com University of Cambridge Computing Service, Cambridge, England. -Copyright (c) 1997-2020 University of Cambridge +Copyright (c) 1997-2021 University of Cambridge All rights reserved @@ -19,7 +19,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2010-2020 Zoltan Herczeg +Copyright(c) 2010-2021 Zoltan Herczeg All rights reserved. @@ -30,7 +30,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2009-2020 Zoltan Herczeg +Copyright(c) 2009-2021 Zoltan Herczeg All rights reserved. #### diff --git a/src/3rdparty/pcre2/LICENCE b/src/3rdparty/pcre2/LICENCE index 155d073127..18684ceaa9 100644 --- a/src/3rdparty/pcre2/LICENCE +++ b/src/3rdparty/pcre2/LICENCE @@ -26,7 +26,7 @@ Email domain: gmail.com University of Cambridge Computing Service, Cambridge, England. -Copyright (c) 1997-2020 University of Cambridge +Copyright (c) 1997-2021 University of Cambridge All rights reserved. @@ -37,7 +37,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Email domain: freemail.hu -Copyright(c) 2010-2020 Zoltan Herczeg +Copyright(c) 2010-2021 Zoltan Herczeg All rights reserved. @@ -48,7 +48,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Email domain: freemail.hu -Copyright(c) 2009-2020 Zoltan Herczeg +Copyright(c) 2009-2021 Zoltan Herczeg All rights reserved. diff --git a/src/3rdparty/pcre2/qt_attribution.json b/src/3rdparty/pcre2/qt_attribution.json index e7abb56762..04bdb6cc6b 100644 --- a/src/3rdparty/pcre2/qt_attribution.json +++ b/src/3rdparty/pcre2/qt_attribution.json @@ -7,12 +7,12 @@ "Description": "The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5.", "Homepage": "http://www.pcre.org/", - "Version": "10.36", - "DownloadLocation": "https://ftp.pcre.org/pub/pcre/pcre2-10.36.tar.bz2", + "Version": "10.37", + "DownloadLocation": "https://ftp.pcre.org/pub/pcre/pcre2-10.37.tar.bz2", "License": "BSD 3-clause \"New\" or \"Revised\" License", "LicenseId": "BSD-3-Clause", "LicenseFile": "LICENCE", - "Copyright": "Copyright (c) 1997-2020 University of Cambridge + "Copyright": "Copyright (c) 1997-2021 University of Cambridge Copyright (c) 2010-2020 Zoltan Herczeg" }, { @@ -24,11 +24,11 @@ Copyright (c) 2010-2020 Zoltan Herczeg" "Path": "src/sljit", "Description": "The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5.", "Homepage": "http://www.pcre.org/", - "Version": "10.36", - "DownloadLocation": "https://ftp.pcre.org/pub/pcre/pcre2-10.36.tar.bz2", + "Version": "10.37", + "DownloadLocation": "https://ftp.pcre.org/pub/pcre/pcre2-10.37.tar.bz2", "License": "BSD 2-clause \"Simplified\" License", "LicenseId": "BSD-2-Clause", "LicenseFile": "LICENCE-SLJIT", - "Copyright": "Copyright (c) 2009-2020 Zoltan Herczeg" + "Copyright": "Copyright (c) 2009-2021 Zoltan Herczeg" } ] diff --git a/src/3rdparty/pcre2/src/pcre2.h b/src/3rdparty/pcre2/src/pcre2.h index f204ec8180..7ab6b39aeb 100644 --- a/src/3rdparty/pcre2/src/pcre2.h +++ b/src/3rdparty/pcre2/src/pcre2.h @@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE. /* The current PCRE version information. */ #define PCRE2_MAJOR 10 -#define PCRE2_MINOR 36 +#define PCRE2_MINOR 37 #define PCRE2_PRERELEASE -#define PCRE2_DATE 2020-12-04 +#define PCRE2_DATE 2021-05-26 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate diff --git a/src/3rdparty/pcre2/src/pcre2_auto_possess.c b/src/3rdparty/pcre2/src/pcre2_auto_possess.c index c64cf856d1..e5e0895682 100644 --- a/src/3rdparty/pcre2/src/pcre2_auto_possess.c +++ b/src/3rdparty/pcre2/src/pcre2_auto_possess.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -490,6 +490,7 @@ switch(c) list[2] = (uint32_t)(end - code); return end; } + return NULL; /* Opcode not accepted */ } @@ -1186,12 +1187,16 @@ for (;;) c = *repeat_opcode; if (c >= OP_CRSTAR && c <= OP_CRMINRANGE) { - /* end must not be NULL. */ - end = get_chr_property_list(code, utf, ucp, cb->fcc, list); + /* The return from get_chr_property_list() will never be NULL when + *code (aka c) is one of the three class opcodes. However, gcc with + -fanalyzer notes that a NULL return is possible, and grumbles. Hence we + put in a check. */ + end = get_chr_property_list(code, utf, ucp, cb->fcc, list); list[1] = (c & 1) == 0; - if (compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit)) + if (end != NULL && + compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit)) { switch (c) { diff --git a/src/3rdparty/pcre2/src/pcre2_compile.c b/src/3rdparty/pcre2/src/pcre2_compile.c index e811f12f02..da449ae9ed 100644 --- a/src/3rdparty/pcre2/src/pcre2_compile.c +++ b/src/3rdparty/pcre2/src/pcre2_compile.c @@ -1398,32 +1398,47 @@ static BOOL read_repeat_counts(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *minp, uint32_t *maxp, int *errorcodeptr) { -PCRE2_SPTR p = *ptrptr; +PCRE2_SPTR p; BOOL yield = FALSE; +BOOL had_comma = FALSE; int32_t min = 0; int32_t max = REPEAT_UNLIMITED; /* This value is larger than MAX_REPEAT_COUNT */ -/* NB read_number() initializes the error code to zero. The only error is for a -number that is too big. */ +/* Check the syntax */ +*errorcodeptr = 0; +for (p = *ptrptr;; p++) + { + uint32_t c; + if (p >= ptrend) return FALSE; + c = *p; + if (IS_DIGIT(c)) continue; + if (c == CHAR_RIGHT_CURLY_BRACKET) break; + if (c == CHAR_COMMA) + { + if (had_comma) return FALSE; + had_comma = TRUE; + } + else return FALSE; + } + +/* The only error from read_number() is for a number that is too big. */ + +p = *ptrptr; if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &min, errorcodeptr)) goto EXIT; -if (p >= ptrend) goto EXIT; - if (*p == CHAR_RIGHT_CURLY_BRACKET) { p++; max = min; } - else { - if (*p++ != CHAR_COMMA || p >= ptrend) goto EXIT; - if (*p != CHAR_RIGHT_CURLY_BRACKET) + if (*(++p) != CHAR_RIGHT_CURLY_BRACKET) { if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &max, - errorcodeptr) || p >= ptrend || *p != CHAR_RIGHT_CURLY_BRACKET) + errorcodeptr)) goto EXIT; if (max < min) { @@ -1438,11 +1453,10 @@ yield = TRUE; if (minp != NULL) *minp = (uint32_t)min; if (maxp != NULL) *maxp = (uint32_t)max; -/* Update the pattern pointer on success, or after an error, but not when -the result is "not a repeat quantifier". */ +/* Update the pattern pointer */ EXIT: -if (yield || *errorcodeptr != 0) *ptrptr = p; +*ptrptr = p; return yield; } @@ -1776,19 +1790,23 @@ else { oldptr = ptr; ptr--; /* Back to the digit */ - if (!read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, ERR61, &s, - errorcodeptr)) - break; - /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x + /* As we know we are at a digit, the only possible error from + read_number() is a number that is too large to be a group number. In this + case we fall through handle this as not a group reference. If we have + read a small enough number, check for a back reference. + + \1 to \9 are always back references. \8x and \9x are too; \1x to \7x are octal escapes if there are not that many previous captures. */ - if (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount) + if (read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, 0, &s, errorcodeptr) && + (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount)) { if (s > (int)MAX_GROUP_NUMBER) *errorcodeptr = ERR61; else escape = -s; /* Indicates a back reference */ break; } + ptr = oldptr; /* Put the pointer back and fall through */ } diff --git a/src/3rdparty/pcre2/src/pcre2_jit_compile.c b/src/3rdparty/pcre2/src/pcre2_jit_compile.c index 1977d28aa5..f3a26aeee0 100644 --- a/src/3rdparty/pcre2/src/pcre2_jit_compile.c +++ b/src/3rdparty/pcre2/src/pcre2_jit_compile.c @@ -1226,7 +1226,7 @@ while (cc < ccend) return TRUE; } -#define EARLY_FAIL_ENHANCE_MAX (1 + 1) +#define EARLY_FAIL_ENHANCE_MAX (1 + 3) /* start: @@ -1238,6 +1238,7 @@ return: current number of iterators enhanced with fast fail */ static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth, int start) { +PCRE2_SPTR begin = cc; PCRE2_SPTR next_alt; PCRE2_SPTR end; PCRE2_SPTR accelerated_start; @@ -1475,31 +1476,19 @@ do case OP_CBRA: end = cc + GET(cc, 1); - if (*end == OP_KET && PRIVATE_DATA(end) == 0) - { - if (*cc == OP_CBRA) - { - if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) - break; - cc += IMM2_SIZE; - } - - cc += 1 + LINK_SIZE; - continue; - } - fast_forward_allowed = FALSE; if (depth >= 4) break; end = bracketend(cc) - (1 + LINK_SIZE); - if (*end != OP_KET || PRIVATE_DATA(end) != 0) - break; - - if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)) break; count = detect_early_fail(common, cc, private_data_start, depth + 1, count); + + if (PRIVATE_DATA(cc) != 0) + common->private_data_ptrs[begin - common->start] = 1; + if (count < EARLY_FAIL_ENHANCE_MAX) { cc = end + (1 + LINK_SIZE); @@ -1555,6 +1544,8 @@ do return EARLY_FAIL_ENHANCE_MAX; } + /* Cannot be part of a repeat. */ + common->private_data_ptrs[begin - common->start] = 1; count++; if (count < EARLY_FAIL_ENHANCE_MAX) @@ -1620,11 +1611,12 @@ sljit_sw length = end - begin; sljit_s32 min, max, i; /* Detect fixed iterations first. */ -if (end[-(1 + LINK_SIZE)] != OP_KET) +if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0) return FALSE; -/* Already detected repeat. */ -if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0) +/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/ + * Skip the check of the second part. */ +if (PRIVATE_DATA(end - LINK_SIZE) == 0) return TRUE; next = end; @@ -1763,6 +1755,7 @@ while (cc < ccend) if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE) break; + /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */ if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)) { if (detect_repeat(common, cc)) @@ -1813,6 +1806,7 @@ while (cc < ccend) case OP_COND: /* Might be a hidden SCOND. */ + common->private_data_ptrs[cc - common->start] = 0; alternative = cc + GET(cc, 1); if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) { @@ -13661,10 +13655,12 @@ if (!common->private_data_ptrs) memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32)); private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); -set_private_data_ptrs(common, &private_data_size, ccend); + if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back) detect_early_fail(common, common->start, &private_data_size, 0, 0); +set_private_data_ptrs(common, &private_data_size, ccend); + SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr); if (private_data_size > SLJIT_MAX_LOCAL_SIZE) diff --git a/src/3rdparty/pcre2/src/pcre2_jit_simd_inc.h b/src/3rdparty/pcre2/src/pcre2_jit_simd_inc.h index 5673d338c0..5fd97b15bd 100644 --- a/src/3rdparty/pcre2/src/pcre2_jit_simd_inc.h +++ b/src/3rdparty/pcre2/src/pcre2_jit_simd_inc.h @@ -39,7 +39,29 @@ POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) +#if !(defined SUPPORT_VALGRIND) + +#if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)) + +typedef enum { + vector_compare_match1, + vector_compare_match1i, + vector_compare_match2, +} vector_compare_type; + +static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void) +{ +#if PCRE2_CODE_UNIT_WIDTH == 8 +return 15; +#elif PCRE2_CODE_UNIT_WIDTH == 16 +return 7; +#elif PCRE2_CODE_UNIT_WIDTH == 32 +return 3; +#else +#error "Unsupported unit width" +#endif +} #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg) @@ -56,6 +78,10 @@ return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00); } #endif +#endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X */ + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + static sljit_s32 character_to_int32(PCRE2_UCHAR chr) { sljit_u32 value = chr; @@ -97,13 +123,7 @@ instruction[4] = (sljit_u8)offset; sljit_emit_op_custom(compiler, instruction, 5); } -typedef enum { - sse2_compare_match1, - sse2_compare_match1i, - sse2_compare_match2, -} sse2_compare_type; - -static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, sse2_compare_type compare_type, +static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type, int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) { sljit_u8 instruction[4]; @@ -112,11 +132,11 @@ instruction[1] = 0x0f; SLJIT_ASSERT(step >= 0 && step <= 3); -if (compare_type != sse2_compare_match2) +if (compare_type != vector_compare_match2) { if (step == 0) { - if (compare_type == sse2_compare_match1i) + if (compare_type == vector_compare_match1i) { /* POR xmm1, xmm2/m128 */ /* instruction[0] = 0x66; */ @@ -185,14 +205,14 @@ switch (step) static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) { DEFINE_COMPILER; +sljit_u8 instruction[8]; struct sljit_label *start; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 struct sljit_label *restart; #endif struct sljit_jump *quit; struct sljit_jump *partial_quit[2]; -sse2_compare_type compare_type = sse2_compare_match1; -sljit_u8 instruction[8]; +vector_compare_type compare_type = vector_compare_match1; sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); sljit_s32 data_ind = 0; @@ -207,12 +227,12 @@ SLJIT_UNUSED_ARG(offset); if (char1 != char2) { bit = char1 ^ char2; - compare_type = sse2_compare_match1i; + compare_type = vector_compare_match1i; if (!is_powerof2(bit)) { bit = 0; - compare_type = sse2_compare_match2; + compare_type = vector_compare_match2; } } @@ -349,11 +369,11 @@ if (common->utf && offset > 0) static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) { DEFINE_COMPILER; +sljit_u8 instruction[8]; struct sljit_label *start; struct sljit_jump *quit; jump_list *not_found = NULL; -sse2_compare_type compare_type = sse2_compare_match1; -sljit_u8 instruction[8]; +vector_compare_type compare_type = vector_compare_match1; sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); sljit_s32 data_ind = 0; @@ -366,12 +386,12 @@ int i; if (char1 != char2) { bit = char1 ^ char2; - compare_type = sse2_compare_match1i; + compare_type = vector_compare_match1i; if (!is_powerof2(bit)) { bit = 0; - compare_type = sse2_compare_match2; + compare_type = vector_compare_match2; } } @@ -476,27 +496,15 @@ return not_found; #ifndef _WIN64 -static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void) -{ -#if PCRE2_CODE_UNIT_WIDTH == 8 -return 15; -#elif PCRE2_CODE_UNIT_WIDTH == 16 -return 7; -#elif PCRE2_CODE_UNIT_WIDTH == 32 -return 3; -#else -#error "Unsupported unit width" -#endif -} - #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) { DEFINE_COMPILER; -sse2_compare_type compare1_type = sse2_compare_match1; -sse2_compare_type compare2_type = sse2_compare_match1; +sljit_u8 instruction[8]; +vector_compare_type compare1_type = vector_compare_match1; +vector_compare_type compare2_type = vector_compare_match1; sljit_u32 bit1 = 0; sljit_u32 bit2 = 0; sljit_u32 diff = IN_UCHARS(offs1 - offs2); @@ -516,7 +524,6 @@ struct sljit_label *start; struct sljit_label *restart; #endif struct sljit_jump *jump[2]; -sljit_u8 instruction[8]; int i; SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); @@ -549,13 +556,13 @@ else bit1 = char1a ^ char1b; if (is_powerof2(bit1)) { - compare1_type = sse2_compare_match1i; + compare1_type = vector_compare_match1i; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1)); } else { - compare1_type = sse2_compare_match2; + compare1_type = vector_compare_match2; bit1 = 0; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b)); @@ -578,13 +585,13 @@ else bit2 = char2a ^ char2b; if (is_powerof2(bit2)) { - compare2_type = sse2_compare_match1i; + compare2_type = vector_compare_match1i; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2)); } else { - compare2_type = sse2_compare_match2; + compare2_type = vector_compare_match2; bit2 = 0; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b)); @@ -731,9 +738,6 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); -if (common->match_end_ptr != 0) - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); - #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) { @@ -760,7 +764,7 @@ if (common->match_end_ptr != 0) #undef SSE2_COMPARE_TYPE_INDEX -#endif /* SLJIT_CONFIG_X86 && !SUPPORT_VALGRIND */ +#endif /* SLJIT_CONFIG_X86 */ #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__)) @@ -1121,3 +1125,743 @@ JUMPHERE(partial_quit); } #endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */ + +#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define VECTOR_ELEMENT_SIZE 0 +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#define VECTOR_ELEMENT_SIZE 1 +#elif PCRE2_CODE_UNIT_WIDTH == 32 +#define VECTOR_ELEMENT_SIZE 2 +#else +#error "Unsupported unit width" +#endif + +static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg, + sljit_s32 base_reg, sljit_s32 index_reg) +{ +sljit_u16 instruction[3]; + +instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg); +instruction[1] = (sljit_u16)(base_reg << 12); +instruction[2] = (sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06)); + +sljit_emit_op_custom(compiler, instruction, 6); +} + +#if PCRE2_CODE_UNIT_WIDTH == 32 + +static void replicate_imm_vector(struct sljit_compiler *compiler, int step, sljit_s32 dst_vreg, + PCRE2_UCHAR chr, sljit_s32 tmp_general_reg) +{ +sljit_u16 instruction[3]; + +SLJIT_ASSERT(step >= 0 && step <= 1); + +if (chr < 0x7fff) + { + if (step == 1) + return; + + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4)); + instruction[1] = (sljit_u16)chr; + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } + +if (step == 0) + { + OP1(SLJIT_MOV, tmp_general_reg, 0, SLJIT_IMM, chr); + + /* VLVG */ + instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(tmp_general_reg)); + instruction[1] = 0; + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } + +/* VREP */ +instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | dst_vreg); +instruction[1] = 0; +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xc << 8) | 0x4d); +sljit_emit_op_custom(compiler, instruction, 6); +} + +#endif + +static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type, + int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) +{ +sljit_u16 instruction[3]; + +SLJIT_ASSERT(step >= 0 && step <= 2); + +if (step == 1) + { + /* VCEQ */ + instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(cmp1_ind << 12); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } + +if (compare_type != vector_compare_match2) + { + if (step == 0 && compare_type == vector_compare_match1i) + { + /* VO */ + instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(cmp2_ind << 12); + instruction[2] = (sljit_u16)((0xe << 8) | 0x6a); + sljit_emit_op_custom(compiler, instruction, 6); + } + return; + } + +switch (step) + { + case 0: + /* VCEQ */ + instruction[0] = (sljit_u16)(0xe700 | (tmp_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(cmp2_ind << 12); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8); + sljit_emit_op_custom(compiler, instruction, 6); + return; + + case 2: + /* VO */ + instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(tmp_ind << 12); + instruction[2] = (sljit_u16)((0xe << 8) | 0x6a); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } +} + +#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1 + +static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) +{ +DEFINE_COMPILER; +sljit_u16 instruction[3]; +struct sljit_label *start; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *restart; +#endif +struct sljit_jump *quit; +struct sljit_jump *partial_quit[2]; +vector_compare_type compare_type = vector_compare_match1; +sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); +sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); +sljit_s32 data_ind = 0; +sljit_s32 tmp_ind = 1; +sljit_s32 cmp1_ind = 2; +sljit_s32 cmp2_ind = 3; +sljit_s32 zero_ind = 4; +sljit_u32 bit = 0; +int i; + +SLJIT_UNUSED_ARG(offset); + +if (char1 != char2) + { + bit = char1 ^ char2; + compare_type = vector_compare_match1i; + + if (!is_powerof2(bit)) + { + bit = 0; + compare_type = vector_compare_match2; + } + } + +partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit[0]); + +/* First part (unaligned start) */ + +OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16); + +#if PCRE2_CODE_UNIT_WIDTH != 32 + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4)); +instruction[1] = (sljit_u16)(char1 | bit); +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +if (char1 != char2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4)); + instruction[1] = (sljit_u16)(bit != 0 ? bit : char2); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +#else /* PCRE2_CODE_UNIT_WIDTH == 32 */ + +for (int i = 0; i < 2; i++) + { + replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1); + + if (char1 != char2) + replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1); + } + +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + +if (compare_type == vector_compare_match2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); + instruction[1] = 0; + instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); + sljit_emit_op_custom(compiler, instruction, 6); + } + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +restart = LABEL(); +#endif + +load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* TODO: use sljit_set_current_flags */ + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16); + +/* Second part (aligned) */ +start = LABEL(); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); + +partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit[1]); + +load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* TODO: use sljit_set_current_flags */ + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 16, start); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + +JUMPHERE(quit); + +if (common->mode != PCRE2_JIT_COMPLETE) + { + JUMPHERE(partial_quit[0]); + JUMPHERE(partial_quit[1]); + OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); + } +else + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && offset > 0) + { + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); + + quit = jump_if_utf_char_start(compiler, TMP1); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16); + JUMPTO(SLJIT_JUMP, restart); + + JUMPHERE(quit); + } +#endif +} + +#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD 1 + +static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) +{ +DEFINE_COMPILER; +sljit_u16 instruction[3]; +struct sljit_label *start; +struct sljit_jump *quit; +jump_list *not_found = NULL; +vector_compare_type compare_type = vector_compare_match1; +sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); +sljit_s32 tmp3_reg_ind = sljit_get_register_index(TMP3); +sljit_s32 data_ind = 0; +sljit_s32 tmp_ind = 1; +sljit_s32 cmp1_ind = 2; +sljit_s32 cmp2_ind = 3; +sljit_s32 zero_ind = 4; +sljit_u32 bit = 0; +int i; + +if (char1 != char2) + { + bit = char1 ^ char2; + compare_type = vector_compare_match1i; + + if (!is_powerof2(bit)) + { + bit = 0; + compare_type = vector_compare_match2; + } + } + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +/* First part (unaligned start) */ + +OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, 16); + +#if PCRE2_CODE_UNIT_WIDTH != 32 + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4)); +instruction[1] = (sljit_u16)(char1 | bit); +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +if (char1 != char2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4)); + instruction[1] = (sljit_u16)(bit != 0 ? bit : char2); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +#else /* PCRE2_CODE_UNIT_WIDTH == 32 */ + +for (int i = 0; i < 2; i++) + { + replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP3); + + if (char1 != char2) + replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP3); + } + +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + +if (compare_type == vector_compare_match2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); + instruction[1] = 0; + instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); + sljit_emit_op_custom(compiler, instruction, 6); + } + +load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* TODO: use sljit_set_current_flags */ + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0); +quit = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0); + +OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 16); + +/* Second part (aligned) */ +start = LABEL(); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 16); + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* TODO: use sljit_set_current_flags */ + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +CMPTO(SLJIT_GREATER_EQUAL, TMP3, 0, SLJIT_IMM, 16, start); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0); + +JUMPHERE(quit); +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +return not_found; +} + +#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1 + +static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, + PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) +{ +DEFINE_COMPILER; +sljit_u16 instruction[3]; +struct sljit_label *start; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *restart; +#endif +struct sljit_jump *quit; +struct sljit_jump *jump[2]; +vector_compare_type compare1_type = vector_compare_match1; +vector_compare_type compare2_type = vector_compare_match1; +sljit_u32 bit1 = 0; +sljit_u32 bit2 = 0; +sljit_s32 diff = IN_UCHARS(offs2 - offs1); +sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); +sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2); +sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); +sljit_s32 data1_ind = 0; +sljit_s32 data2_ind = 1; +sljit_s32 tmp1_ind = 2; +sljit_s32 tmp2_ind = 3; +sljit_s32 cmp1a_ind = 4; +sljit_s32 cmp1b_ind = 5; +sljit_s32 cmp2a_ind = 6; +sljit_s32 cmp2b_ind = 7; +sljit_s32 zero_ind = 8; +int i; + +SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); +SLJIT_ASSERT(-diff <= (sljit_s32)IN_UCHARS(max_fast_forward_char_pair_offset())); +SLJIT_ASSERT(tmp1_reg_ind != 0 && tmp2_reg_ind != 0); + +if (char1a != char1b) + { + bit1 = char1a ^ char1b; + compare1_type = vector_compare_match1i; + + if (!is_powerof2(bit1)) + { + bit1 = 0; + compare1_type = vector_compare_match2; + } + } + +if (char2a != char2b) + { + bit2 = char2a ^ char2b; + compare2_type = vector_compare_match1i; + + if (!is_powerof2(bit2)) + { + bit2 = 0; + compare2_type = vector_compare_match2; + } + } + +/* Initialize. */ +if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); + + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); + CMOV(SLJIT_LESS, STR_END, TMP1, 0); + } + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); +OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15); + +#if PCRE2_CODE_UNIT_WIDTH != 32 + +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp1a_ind << 4)); +instruction[1] = (sljit_u16)(char1a | bit1); +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +if (char1a != char1b) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp1b_ind << 4)); + instruction[1] = (sljit_u16)(bit1 != 0 ? bit1 : char1b); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp2a_ind << 4)); +instruction[1] = (sljit_u16)(char2a | bit2); +/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ +sljit_emit_op_custom(compiler, instruction, 6); + +if (char2a != char2b) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp2b_ind << 4)); + instruction[1] = (sljit_u16)(bit2 != 0 ? bit2 : char2b); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +#else /* PCRE2_CODE_UNIT_WIDTH == 32 */ + +for (int i = 0; i < 2; i++) + { + replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1); + + if (char1a != char1b) + replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1); + + replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1); + + if (char2a != char2b) + replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1); + } + +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); + +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); +instruction[1] = 0; +instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +restart = LABEL(); +#endif + +jump[0] = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0); +load_from_mem_vector(compiler, TRUE, data2_ind, tmp1_reg_ind, 0); +jump[1] = JUMP(SLJIT_JUMP); +JUMPHERE(jump[0]); +load_from_mem_vector(compiler, FALSE, data2_ind, tmp1_reg_ind, 0); +JUMPHERE(jump[1]); + +load_from_mem_vector(compiler, TRUE, data1_ind, str_ptr_reg_ind, 0); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 16); + +for (i = 0; i < 3; i++) + { + fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); + fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); + } + +/* VN */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)(data2_ind << 12); +instruction[2] = (sljit_u16)((0xe << 8) | 0x68); +sljit_emit_op_custom(compiler, instruction, 6); + +/* VFENE */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); +instruction[2] = (sljit_u16)((0xe << 8) | 0x81); +sljit_emit_op_custom(compiler, instruction, 6); + +/* TODO: use sljit_set_current_flags */ + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, diff); + +/* Main loop. */ +start = LABEL(); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +load_from_mem_vector(compiler, FALSE, data1_ind, str_ptr_reg_ind, 0); +load_from_mem_vector(compiler, FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind); + +for (i = 0; i < 3; i++) + { + fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); + fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); + } + +/* VN */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)(data2_ind << 12); +instruction[2] = (sljit_u16)((0xe << 8) | 0x68); +sljit_emit_op_custom(compiler, instruction, 6); + +/* VFENE */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); +instruction[2] = (sljit_u16)((0xe << 8) | 0x81); +sljit_emit_op_custom(compiler, instruction, 6); + +/* TODO: use sljit_set_current_flags */ + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +CMPTO(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 16, start); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +JUMPHERE(quit); + +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf) + { + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1)); + + quit = jump_if_utf_char_start(compiler, TMP1); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + /* TMP1 contains diff. */ + OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15); + OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); + JUMPTO(SLJIT_JUMP, restart); + + JUMPHERE(quit); + } +#endif + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); + +if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +} + +#endif /* SLJIT_CONFIG_S390X */ + +#endif /* !SUPPORT_VALGRIND */ diff --git a/src/3rdparty/pcre2/src/pcre2_match.c b/src/3rdparty/pcre2/src/pcre2_match.c index e3f78c2ca3..ed60517131 100644 --- a/src/3rdparty/pcre2/src/pcre2_match.c +++ b/src/3rdparty/pcre2/src/pcre2_match.c @@ -818,10 +818,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode); /* N is now the frame of the recursion; the previous frame is at the OP_RECURSE position. Go back there, copying the current subject position - and mark, and move on past the OP_RECURSE. */ + and mark, and the start_match position (\K might have changed it), and + then move on past the OP_RECURSE. */ P->eptr = Feptr; P->mark = Fmark; + P->start_match = Fstart_match; F = P; Fecode += 1 + LINK_SIZE; continue; diff --git a/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h b/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h index eb1132db30..ff36e5b7c6 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h +++ b/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h @@ -158,6 +158,8 @@ extern "C" { #define SLJIT_CONFIG_MIPS_64 1 #elif defined(__sparc__) || defined(__sparc) #define SLJIT_CONFIG_SPARC_32 1 +#elif defined(__s390x__) +#define SLJIT_CONFIG_S390X 1 #else /* Unsupported architecture */ #define SLJIT_CONFIG_UNSUPPORTED 1 diff --git a/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c b/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c index 61a32f23e9..6e5bf78e45 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c +++ b/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c @@ -79,6 +79,7 @@ */ #ifdef _WIN32 +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) static SLJIT_INLINE void* alloc_chunk(sljit_uw size) { @@ -91,96 +92,108 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) VirtualFree(chunk, 0, MEM_RELEASE); } -#else - -#ifdef __APPLE__ -#ifdef MAP_ANON -/* Configures TARGET_OS_OSX when appropriate */ -#include <TargetConditionals.h> - -#if TARGET_OS_OSX && defined(MAP_JIT) -#include <sys/utsname.h> -#endif /* TARGET_OS_OSX && MAP_JIT */ - -#ifdef MAP_JIT +#else /* POSIX */ +#if defined(__APPLE__) && defined(MAP_JIT) /* On macOS systems, returns MAP_JIT if it is defined _and_ we're running on a - version where it's OK to have more than one JIT block. + version where it's OK to have more than one JIT block or where MAP_JIT is + required. On non-macOS systems, returns MAP_JIT if it is defined. */ +#include <TargetConditionals.h> +#if TARGET_OS_OSX +#if defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86 +#ifdef MAP_ANON +#include <sys/utsname.h> +#include <stdlib.h> + +#define SLJIT_MAP_JIT (get_map_jit_flag()) + static SLJIT_INLINE int get_map_jit_flag() { -#if TARGET_OS_OSX - sljit_sw page_size = get_page_alignment() + 1; + sljit_sw page_size; void *ptr; + struct utsname name; static int map_jit_flag = -1; - /* - The following code is thread safe because multiple initialization - sets map_jit_flag to the same value and the code has no side-effects. - Changing the kernel version witout system restart is (very) unlikely. - */ - if (map_jit_flag == -1) { - struct utsname name; - + if (map_jit_flag < 0) { map_jit_flag = 0; uname(&name); - /* Kernel version for 10.14.0 (Mojave) */ + /* Kernel version for 10.14.0 (Mojave) or later */ if (atoi(name.release) >= 18) { + page_size = get_page_alignment() + 1; /* Only use MAP_JIT if a hardened runtime is used */ + ptr = mmap(NULL, page_size, PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANON, -1, 0); - ptr = mmap(NULL, page_size, PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); - - if (ptr == MAP_FAILED) { - map_jit_flag = MAP_JIT; - } else { + if (ptr != MAP_FAILED) munmap(ptr, page_size); - } + else + map_jit_flag = MAP_JIT; } } - return map_jit_flag; -#else /* !TARGET_OS_OSX */ - return MAP_JIT; -#endif /* TARGET_OS_OSX */ } - -#endif /* MAP_JIT */ #endif /* MAP_ANON */ -#endif /* __APPLE__ */ +#else /* !SLJIT_CONFIG_X86 */ +#if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) +#error Unsupported architecture +#endif /* SLJIT_CONFIG_ARM */ +#include <pthread.h> + +#define SLJIT_MAP_JIT (MAP_JIT) +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) \ + apple_update_wx_flags(enable_exec) + +static SLJIT_INLINE void apple_update_wx_flags(sljit_s32 enable_exec) +{ + pthread_jit_write_protect_np(enable_exec); +} +#endif /* SLJIT_CONFIG_X86 */ +#else /* !TARGET_OS_OSX */ +#define SLJIT_MAP_JIT (MAP_JIT) +#endif /* TARGET_OS_OSX */ +#endif /* __APPLE__ && MAP_JIT */ +#ifndef SLJIT_UPDATE_WX_FLAGS +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) +#endif /* !SLJIT_UPDATE_WX_FLAGS */ +#ifndef SLJIT_MAP_JIT +#define SLJIT_MAP_JIT (0) +#endif /* !SLJIT_MAP_JIT */ static SLJIT_INLINE void* alloc_chunk(sljit_uw size) { void *retval; - const int prot = PROT_READ | PROT_WRITE | PROT_EXEC; - -#ifdef MAP_ANON + int prot = PROT_READ | PROT_WRITE | PROT_EXEC; + int flags = MAP_PRIVATE; + int fd = -1; - int flags = MAP_PRIVATE | MAP_ANON; - -#ifdef MAP_JIT - flags |= get_map_jit_flag(); +#ifdef PROT_MAX + prot |= PROT_MAX(prot); #endif - retval = mmap(NULL, size, prot, flags, -1, 0); +#ifdef MAP_ANON + flags |= MAP_ANON | SLJIT_MAP_JIT; #else /* !MAP_ANON */ if (SLJIT_UNLIKELY((dev_zero < 0) && open_dev_zero())) return NULL; - retval = mmap(NULL, size, prot, MAP_PRIVATE, dev_zero, 0); + fd = dev_zero; #endif /* MAP_ANON */ + retval = mmap(NULL, size, prot, flags, fd, 0); if (retval == MAP_FAILED) - retval = NULL; - else { - if (mprotect(retval, size, prot) < 0) { - munmap(retval, size); - retval = NULL; - } + return NULL; + + if (mprotect(retval, size, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) { + munmap(retval, size); + return NULL; } + SLJIT_UPDATE_WX_FLAGS(retval, (uint8_t *)retval + size, 0); + return retval; } @@ -189,7 +202,7 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) munmap(chunk, size); } -#endif +#endif /* windows */ /* --------------------------------------------------------------------- */ /* Common functions */ @@ -261,6 +274,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) while (free_block) { if (free_block->size >= size) { chunk_size = free_block->size; + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); if (chunk_size > size + 64) { /* We just cut a block from the end of the free block. */ chunk_size -= size; @@ -326,6 +340,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) allocated_size -= header->size; /* Connecting free blocks together if possible. */ + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); /* If header->prev_size == 0, free_block will equal to header. In this case, free_block->header.size will be > 0. */ @@ -358,6 +373,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) } } + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1); SLJIT_ALLOCATOR_UNLOCK(); } @@ -367,6 +383,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) struct free_block* next_free_block; SLJIT_ALLOCATOR_LOCK(); + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); free_block = free_blocks; while (free_block) { @@ -381,5 +398,6 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) } SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks)); + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1); SLJIT_ALLOCATOR_UNLOCK(); } diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c b/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c index a8b65112d4..3d007fe8a1 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c @@ -42,7 +42,7 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) typedef sljit_uw sljit_ins; /* Instruction tags (most significant halfword). */ -const sljit_ins sljit_ins_const = (sljit_ins)1 << 48; +static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48; static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { 14, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1 @@ -66,22 +66,22 @@ typedef sljit_uw sljit_gpr; * will be retired ASAP (TODO: carenas) */ -const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */ -const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */ -const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */ -const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */ -const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */ -const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */ -const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */ -const sljit_gpr r7 = 7; /* reg_map[6] */ -const sljit_gpr r8 = 8; /* reg_map[7] */ -const sljit_gpr r9 = 9; /* reg_map[8] */ -const sljit_gpr r10 = 10; /* reg_map[9] */ -const sljit_gpr r11 = 11; /* reg_map[10] */ -const sljit_gpr r12 = 12; /* reg_map[11]: GOT */ -const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */ -const sljit_gpr r14 = 14; /* reg_map[0]: return address and flag register */ -const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */ +static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */ +static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */ +static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */ +static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */ +static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */ +static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */ +static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */ +static const sljit_gpr r7 = 7; /* reg_map[6] */ +static const sljit_gpr r8 = 8; /* reg_map[7] */ +static const sljit_gpr r9 = 9; /* reg_map[8] */ +static const sljit_gpr r10 = 10; /* reg_map[9] */ +static const sljit_gpr r11 = 11; /* reg_map[10] */ +static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */ +static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */ +static const sljit_gpr r14 = 14; /* reg_map[0]: return address and flag register */ +static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */ /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */ /* TODO(carenas): r12 might conflict in PIC code, reserve? */ @@ -100,8 +100,8 @@ const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack point /* Link registers. The normal link register is r14, but since we use that for flags we need to use r0 instead to do fast calls so that flags are preserved. */ -const sljit_gpr link_r = 14; /* r14 */ -const sljit_gpr fast_link_r = 0; /* r0 */ +static const sljit_gpr link_r = 14; /* r14 */ +static const sljit_gpr fast_link_r = 0; /* r0 */ /* Flag register layout: @@ -110,7 +110,7 @@ const sljit_gpr fast_link_r = 0; /* r0 */ | ZERO | 0 | 0 | C C |///////| +---------------+---+---+-------+-------+ */ -const sljit_gpr flag_r = 14; /* r14 */ +static const sljit_gpr flag_r = 14; /* r14 */ struct sljit_s390x_const { struct sljit_const const_; /* must be first */ @@ -1465,7 +1465,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile op = GET_OPCODE(op) | (op & SLJIT_I32_OP); switch (op) { case SLJIT_BREAKPOINT: - /* TODO(mundaym): insert real breakpoint? */ + /* The following invalid instruction is emitted by gdb. */ + return push_inst(compiler, 0x0001 /* 2-byte trap */); case SLJIT_NOP: return push_inst(compiler, 0x0700 /* 2-byte nop */); case SLJIT_LMUL_UW: diff --git a/src/3rdparty/pcre2/src/sljit/sljitUtils.c b/src/3rdparty/pcre2/src/sljit/sljitUtils.c index 08ca35cf37..9bce714735 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitUtils.c +++ b/src/3rdparty/pcre2/src/sljit/sljitUtils.c @@ -48,7 +48,7 @@ static HANDLE allocator_lock; static SLJIT_INLINE void allocator_grab_lock(void) { HANDLE lock; - if (SLJIT_UNLIKELY(!allocator_lock)) { + if (SLJIT_UNLIKELY(!InterlockedCompareExchangePointer(&allocator_lock, NULL, NULL))) { lock = CreateMutex(NULL, FALSE, NULL); if (InterlockedCompareExchangePointer(&allocator_lock, lock, NULL)) CloseHandle(lock); @@ -146,9 +146,13 @@ static SLJIT_INLINE sljit_sw get_page_alignment(void) { #include <unistd.h> static SLJIT_INLINE sljit_sw get_page_alignment(void) { - static sljit_sw sljit_page_align; - if (!sljit_page_align) { + static sljit_sw sljit_page_align = -1; + if (sljit_page_align < 0) { +#ifdef _SC_PAGESIZE sljit_page_align = sysconf(_SC_PAGESIZE); +#else + sljit_page_align = getpagesize(); +#endif /* Should never happen. */ if (sljit_page_align < 0) sljit_page_align = 4096; diff --git a/src/3rdparty/pcre2/src/sljit/sljitWXExecAllocator.c b/src/3rdparty/pcre2/src/sljit/sljitWXExecAllocator.c index 6ef71f7d83..72d5b8dd2b 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitWXExecAllocator.c +++ b/src/3rdparty/pcre2/src/sljit/sljitWXExecAllocator.c @@ -121,14 +121,18 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) static pthread_mutex_t se_lock = PTHREAD_MUTEX_INITIALIZER; #endif static int se_protected = !SLJIT_PROT_WX; + int prot = PROT_READ | PROT_WRITE | SLJIT_PROT_WX; sljit_uw* ptr; if (SLJIT_UNLIKELY(se_protected < 0)) return NULL; +#ifdef PROT_MAX + prot |= PROT_MAX(PROT_READ | PROT_WRITE | PROT_EXEC); +#endif + size += sizeof(sljit_uw); - ptr = (sljit_uw*)mmap(NULL, size, PROT_READ | PROT_WRITE | SLJIT_PROT_WX, - MAP_PRIVATE | MAP_ANON, -1, 0); + ptr = (sljit_uw*)mmap(NULL, size, prot, MAP_PRIVATE | MAP_ANON, -1, 0); if (ptr == MAP_FAILED) return NULL; diff --git a/src/3rdparty/sqlite/qt_attribution.json b/src/3rdparty/sqlite/qt_attribution.json index 1c1d9b7746..1d8f96ff03 100644 --- a/src/3rdparty/sqlite/qt_attribution.json +++ b/src/3rdparty/sqlite/qt_attribution.json @@ -6,8 +6,8 @@ "Description": "SQLite is a small C library that implements a self-contained, embeddable, zero-configuration SQL database engine.", "Homepage": "https://www.sqlite.org/", - "Version": "3.35.2", - "DownloadLocation": "https://www.sqlite.org/2020/sqlite-amalgamation-3350200.zip", + "Version": "3.35.5", + "DownloadLocation": "https://www.sqlite.org/2020/sqlite-amalgamation-3350500.zip", "License": "Public Domain", "Copyright": "The authors disclaim copyright to the source code. However, a license can be obtained if needed." } diff --git a/src/3rdparty/sqlite/sqlite3.c b/src/3rdparty/sqlite/sqlite3.c index 54f8277a71..df53e437ba 100644 --- a/src/3rdparty/sqlite/sqlite3.c +++ b/src/3rdparty/sqlite/sqlite3.c @@ -1,6 +1,6 @@ /****************************************************************************** ** This file is an amalgamation of many separate C source files from SQLite -** version 3.35.2. By combining all the individual C code files into this +** version 3.35.5. By combining all the individual C code files into this ** single large file, the entire code can be compiled as a single translation ** unit. This allows many compilers to do optimizations that would not be ** possible if the files were compiled separately. Performance improvements @@ -1186,9 +1186,9 @@ extern "C" { ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ -#define SQLITE_VERSION "3.35.2" -#define SQLITE_VERSION_NUMBER 3035002 -#define SQLITE_SOURCE_ID "2021-03-17 19:07:21 ea80f3002f4120f5dcee76e8779dfdc88e1e096c5cdd06904c20fd26d50c3827" +#define SQLITE_VERSION "3.35.5" +#define SQLITE_VERSION_NUMBER 3035005 +#define SQLITE_SOURCE_ID "2021-04-19 18:32:05 1b256d97b553a9611efca188a3d995a2fff712759044ba480f9a0c9e98fae886" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -19764,6 +19764,7 @@ SQLITE_PRIVATE Expr *sqlite3ExprFunction(Parse*,ExprList*, Token*, int); SQLITE_PRIVATE void sqlite3ExprFunctionUsable(Parse*,Expr*,FuncDef*); SQLITE_PRIVATE void sqlite3ExprAssignVarNumber(Parse*, Expr*, u32); SQLITE_PRIVATE void sqlite3ExprDelete(sqlite3*, Expr*); +SQLITE_PRIVATE void sqlite3ExprDeferredDelete(Parse*, Expr*); SQLITE_PRIVATE void sqlite3ExprUnmapAndDelete(Parse*, Expr*); SQLITE_PRIVATE ExprList *sqlite3ExprListAppend(Parse*,ExprList*,Expr*); SQLITE_PRIVATE ExprList *sqlite3ExprListAppendVector(Parse*,ExprList*,IdList*,Expr*); @@ -21071,6 +21072,7 @@ struct VdbeCursor { Bool isEphemeral:1; /* True for an ephemeral table */ Bool useRandomRowid:1; /* Generate new record numbers semi-randomly */ Bool isOrdered:1; /* True if the table is not BTREE_UNORDERED */ + Bool hasBeenDuped:1; /* This cursor was source or target of OP_OpenDup */ u16 seekHit; /* See the OP_SeekHit and OP_IfNoHope opcodes */ Btree *pBtx; /* Separate file holding temporary table */ i64 seqCount; /* Sequence counter */ @@ -40220,7 +40222,8 @@ static int unixBackupDir(const char *z, int *pJ){ int j = *pJ; int i; if( j<=0 ) return 0; - for(i=j-1; ALWAYS(i>0) && z[i-1]!='/'; i--){} + for(i=j-1; i>0 && z[i-1]!='/'; i--){} + if( i==0 ) return 0; if( z[i]=='.' && i==j-2 && z[i+1]=='.' ) return 0; *pJ = i-1; return 1; @@ -64277,7 +64280,7 @@ struct Btree { u8 hasIncrblobCur; /* True if there are one or more Incrblob cursors */ int wantToLock; /* Number of nested calls to sqlite3BtreeEnter() */ int nBackup; /* Number of backup operations reading this btree */ - u32 iDataVersion; /* Combines with pBt->pPager->iDataVersion */ + u32 iBDataVersion; /* Combines with pBt->pPager->iDataVersion */ Btree *pNext; /* List of other sharable Btrees from the same db */ Btree *pPrev; /* Back pointer of the same list */ #ifdef SQLITE_DEBUG @@ -67690,19 +67693,23 @@ static void freeTempSpace(BtShared *pBt){ */ SQLITE_PRIVATE int sqlite3BtreeClose(Btree *p){ BtShared *pBt = p->pBt; - BtCursor *pCur; /* Close all cursors opened via this handle. */ assert( sqlite3_mutex_held(p->db->mutex) ); sqlite3BtreeEnter(p); - pCur = pBt->pCursor; - while( pCur ){ - BtCursor *pTmp = pCur; - pCur = pCur->pNext; - if( pTmp->pBtree==p ){ - sqlite3BtreeCloseCursor(pTmp); + + /* Verify that no other cursors have this Btree open */ +#ifdef SQLITE_DEBUG + { + BtCursor *pCur = pBt->pCursor; + while( pCur ){ + BtCursor *pTmp = pCur; + pCur = pCur->pNext; + assert( pTmp->pBtree!=p ); + } } +#endif /* Rollback any active transaction and free the handle structure. ** The call to sqlite3BtreeRollback() drops any table-locks held by @@ -69084,7 +69091,7 @@ SQLITE_PRIVATE int sqlite3BtreeCommitPhaseTwo(Btree *p, int bCleanup){ sqlite3BtreeLeave(p); return rc; } - p->iDataVersion--; /* Compensate for pPager->iDataVersion++; */ + p->iBDataVersion--; /* Compensate for pPager->iDataVersion++; */ pBt->inTransaction = TRANS_READ; btreeClearHasContent(pBt); } @@ -69494,7 +69501,14 @@ SQLITE_PRIVATE int sqlite3BtreeCloseCursor(BtCursor *pCur){ unlockBtreeIfUnused(pBt); sqlite3_free(pCur->aOverflow); sqlite3_free(pCur->pKey); - sqlite3BtreeLeave(pBtree); + if( (pBt->openFlags & BTREE_SINGLE) && pBt->pCursor==0 ){ + /* Since the BtShared is not sharable, there is no need to + ** worry about the missing sqlite3BtreeLeave() call here. */ + assert( pBtree->sharable==0 ); + sqlite3BtreeClose(pBtree); + }else{ + sqlite3BtreeLeave(pBtree); + } pCur->pBtree = 0; } return SQLITE_OK; @@ -74607,7 +74621,7 @@ SQLITE_PRIVATE void sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){ assert( idx>=0 && idx<=15 ); if( idx==BTREE_DATA_VERSION ){ - *pMeta = sqlite3PagerDataVersion(pBt->pPager) + p->iDataVersion; + *pMeta = sqlite3PagerDataVersion(pBt->pPager) + p->iBDataVersion; }else{ *pMeta = get4byte(&pBt->pPage1->aData[36 + idx*4]); } @@ -80835,20 +80849,15 @@ SQLITE_PRIVATE void sqlite3VdbeFreeCursor(Vdbe *p, VdbeCursor *pCx){ return; } assert( pCx->pBtx==0 || pCx->eCurType==CURTYPE_BTREE ); + assert( pCx->pBtx==0 || pCx->isEphemeral ); switch( pCx->eCurType ){ case CURTYPE_SORTER: { sqlite3VdbeSorterClose(p->db, pCx); break; } case CURTYPE_BTREE: { - if( pCx->isEphemeral ){ - if( pCx->pBtx ) sqlite3BtreeClose(pCx->pBtx); - /* The pCx->pCursor will be close automatically, if it exists, by - ** the call above. */ - }else{ - assert( pCx->uc.pCursor!=0 ); - sqlite3BtreeCloseCursor(pCx->uc.pCursor); - } + assert( pCx->uc.pCursor!=0 ); + sqlite3BtreeCloseCursor(pCx->uc.pCursor); break; } #ifndef SQLITE_OMIT_VIRTUALTABLE @@ -81932,6 +81941,7 @@ SQLITE_PRIVATE int sqlite3VdbeCursorMoveto(VdbeCursor **pp, u32 *piCol){ assert( p->eCurType==CURTYPE_BTREE || p->eCurType==CURTYPE_PSEUDO ); if( p->deferredMoveto ){ u32 iMap; + assert( !p->isEphemeral ); if( p->aAltMap && (iMap = p->aAltMap[1+*piCol])>0 && !p->nullRow ){ *pp = p->pAltCursor; *piCol = iMap - 1; @@ -86140,11 +86150,6 @@ static VdbeCursor *allocateCursor( assert( iCur>=0 && iCur<p->nCursor ); if( p->apCsr[iCur] ){ /*OPTIMIZATION-IF-FALSE*/ - /* Before calling sqlite3VdbeFreeCursor(), ensure the isEphemeral flag - ** is clear. Otherwise, if this is an ephemeral cursor created by - ** OP_OpenDup, the cursor will not be closed and will still be part - ** of a BtShared.pCursor list. */ - if( p->apCsr[iCur]->pBtx==0 ) p->apCsr[iCur]->isEphemeral = 0; sqlite3VdbeFreeCursor(p, p->apCsr[iCur]); p->apCsr[iCur] = 0; } @@ -89830,7 +89835,7 @@ case OP_OpenDup: { pOrig = p->apCsr[pOp->p2]; assert( pOrig ); - assert( pOrig->pBtx!=0 ); /* Only ephemeral cursors can be duplicated */ + assert( pOrig->isEphemeral ); /* Only ephemeral cursors can be duplicated */ pCx = allocateCursor(p, pOp->p1, pOrig->nField, -1, CURTYPE_BTREE); if( pCx==0 ) goto no_mem; @@ -89840,7 +89845,10 @@ case OP_OpenDup: { pCx->isTable = pOrig->isTable; pCx->pgnoRoot = pOrig->pgnoRoot; pCx->isOrdered = pOrig->isOrdered; - rc = sqlite3BtreeCursor(pOrig->pBtx, pCx->pgnoRoot, BTREE_WRCSR, + pCx->pBtx = pOrig->pBtx; + pCx->hasBeenDuped = 1; + pOrig->hasBeenDuped = 1; + rc = sqlite3BtreeCursor(pCx->pBtx, pCx->pgnoRoot, BTREE_WRCSR, pCx->pKeyInfo, pCx->uc.pCursor); /* The sqlite3BtreeCursor() routine can only fail for the first cursor ** opened for a database. Since there is already an open cursor when this @@ -89906,9 +89914,10 @@ case OP_OpenEphemeral: { aMem[pOp->p3].z = ""; } pCx = p->apCsr[pOp->p1]; - if( pCx && ALWAYS(pCx->pBtx) ){ - /* If the ephermeral table is already open, erase all existing content - ** so that the table is empty again, rather than creating a new table. */ + if( pCx && !pCx->hasBeenDuped ){ + /* If the ephermeral table is already open and has no duplicates from + ** OP_OpenDup, then erase all existing content so that the table is + ** empty again, rather than creating a new table. */ assert( pCx->isEphemeral ); pCx->seqCount = 0; pCx->cacheStatus = CACHE_STALE; @@ -89922,33 +89931,36 @@ case OP_OpenEphemeral: { vfsFlags); if( rc==SQLITE_OK ){ rc = sqlite3BtreeBeginTrans(pCx->pBtx, 1, 0); - } - if( rc==SQLITE_OK ){ - /* If a transient index is required, create it by calling - ** sqlite3BtreeCreateTable() with the BTREE_BLOBKEY flag before - ** opening it. If a transient table is required, just use the - ** automatically created table with root-page 1 (an BLOB_INTKEY table). - */ - if( (pCx->pKeyInfo = pKeyInfo = pOp->p4.pKeyInfo)!=0 ){ - assert( pOp->p4type==P4_KEYINFO ); - rc = sqlite3BtreeCreateTable(pCx->pBtx, &pCx->pgnoRoot, - BTREE_BLOBKEY | pOp->p5); - if( rc==SQLITE_OK ){ - assert( pCx->pgnoRoot==SCHEMA_ROOT+1 ); - assert( pKeyInfo->db==db ); - assert( pKeyInfo->enc==ENC(db) ); - rc = sqlite3BtreeCursor(pCx->pBtx, pCx->pgnoRoot, BTREE_WRCSR, - pKeyInfo, pCx->uc.pCursor); + if( rc==SQLITE_OK ){ + /* If a transient index is required, create it by calling + ** sqlite3BtreeCreateTable() with the BTREE_BLOBKEY flag before + ** opening it. If a transient table is required, just use the + ** automatically created table with root-page 1 (an BLOB_INTKEY table). + */ + if( (pCx->pKeyInfo = pKeyInfo = pOp->p4.pKeyInfo)!=0 ){ + assert( pOp->p4type==P4_KEYINFO ); + rc = sqlite3BtreeCreateTable(pCx->pBtx, &pCx->pgnoRoot, + BTREE_BLOBKEY | pOp->p5); + if( rc==SQLITE_OK ){ + assert( pCx->pgnoRoot==SCHEMA_ROOT+1 ); + assert( pKeyInfo->db==db ); + assert( pKeyInfo->enc==ENC(db) ); + rc = sqlite3BtreeCursor(pCx->pBtx, pCx->pgnoRoot, BTREE_WRCSR, + pKeyInfo, pCx->uc.pCursor); + } + pCx->isTable = 0; + }else{ + pCx->pgnoRoot = SCHEMA_ROOT; + rc = sqlite3BtreeCursor(pCx->pBtx, SCHEMA_ROOT, BTREE_WRCSR, + 0, pCx->uc.pCursor); + pCx->isTable = 1; } - pCx->isTable = 0; - }else{ - pCx->pgnoRoot = SCHEMA_ROOT; - rc = sqlite3BtreeCursor(pCx->pBtx, SCHEMA_ROOT, BTREE_WRCSR, - 0, pCx->uc.pCursor); - pCx->isTable = 1; + } + pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED); + if( rc ){ + sqlite3BtreeClose(pCx->pBtx); } } - pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED); } if( rc ) goto abort_due_to_error; pCx->nullRow = 1; @@ -90382,13 +90394,13 @@ seek_not_found: ** ** There are three possible outcomes from this opcode:<ol> ** -** <li> If after This.P1 steps, the cursor is still point to a place that -** is earlier in the btree than the target row, -** then fall through into the subsquence OP_SeekGE opcode. +** <li> If after This.P1 steps, the cursor is still pointing to a place that +** is earlier in the btree than the target row, then fall through +** into the subsquence OP_SeekGE opcode. ** ** <li> If the cursor is successfully moved to the target row by 0 or more ** sqlite3BtreeNext() calls, then jump to This.P2, which will land just -** past the OP_IdxGT opcode that follows the OP_SeekGE. +** past the OP_IdxGT or OP_IdxGE opcode that follows the OP_SeekGE. ** ** <li> If the cursor ends up past the target row (indicating the the target ** row does not exist in the btree) then jump to SeekOP.P2. @@ -90405,7 +90417,8 @@ case OP_SeekScan: { /* pOp->p2 points to the first instruction past the OP_IdxGT that ** follows the OP_SeekGE. */ assert( pOp->p2>=(int)(pOp-aOp)+2 ); - assert( aOp[pOp->p2-1].opcode==OP_IdxGT ); + assert( aOp[pOp->p2-1].opcode==OP_IdxGT || aOp[pOp->p2-1].opcode==OP_IdxGE ); + testcase( aOp[pOp->p2-1].opcode==OP_IdxGE ); assert( pOp[1].p1==aOp[pOp->p2-1].p1 ); assert( pOp[1].p2==aOp[pOp->p2-1].p2 ); assert( pOp[1].p3==aOp[pOp->p2-1].p3 ); @@ -91940,6 +91953,8 @@ case OP_IdxRowid: { /* out2 */ pTabCur->deferredMoveto = 1; assert( pOp->p4type==P4_INTARRAY || pOp->p4.ai==0 ); pTabCur->aAltMap = pOp->p4.ai; + assert( !pC->isEphemeral ); + assert( !pTabCur->isEphemeral ); pTabCur->pAltCursor = pC; }else{ pOut = out2Prerelease(p, pOp); @@ -98988,15 +99003,19 @@ static int lookupName( if( pParse->pTriggerTab!=0 ){ int op = pParse->eTriggerOp; assert( op==TK_DELETE || op==TK_UPDATE || op==TK_INSERT ); - if( op!=TK_DELETE && zTab && sqlite3StrICmp("new",zTab) == 0 ){ + if( pParse->bReturning ){ + if( (pNC->ncFlags & NC_UBaseReg)!=0 + && (zTab==0 || sqlite3StrICmp(zTab,pParse->pTriggerTab->zName)==0) + ){ + pExpr->iTable = op!=TK_DELETE; + pTab = pParse->pTriggerTab; + } + }else if( op!=TK_DELETE && zTab && sqlite3StrICmp("new",zTab) == 0 ){ pExpr->iTable = 1; pTab = pParse->pTriggerTab; }else if( op!=TK_INSERT && zTab && sqlite3StrICmp("old",zTab)==0 ){ pExpr->iTable = 0; pTab = pParse->pTriggerTab; - }else if( pParse->bReturning && (pNC->ncFlags & NC_UBaseReg)!=0 ){ - pExpr->iTable = op!=TK_DELETE; - pTab = pParse->pTriggerTab; } } #endif /* SQLITE_OMIT_TRIGGER */ @@ -101591,8 +101610,8 @@ SQLITE_PRIVATE Expr *sqlite3ExprAnd(Parse *pParse, Expr *pLeft, Expr *pRight){ }else if( (ExprAlwaysFalse(pLeft) || ExprAlwaysFalse(pRight)) && !IN_RENAME_OBJECT ){ - sqlite3ExprDelete(db, pLeft); - sqlite3ExprDelete(db, pRight); + sqlite3ExprDeferredDelete(pParse, pLeft); + sqlite3ExprDeferredDelete(pParse, pRight); return sqlite3Expr(db, TK_INTEGER, "0"); }else{ return sqlite3PExpr(pParse, TK_AND, pLeft, pRight); @@ -101789,6 +101808,22 @@ SQLITE_PRIVATE void sqlite3ExprDelete(sqlite3 *db, Expr *p){ if( p ) sqlite3ExprDeleteNN(db, p); } + +/* +** Arrange to cause pExpr to be deleted when the pParse is deleted. +** This is similar to sqlite3ExprDelete() except that the delete is +** deferred untilthe pParse is deleted. +** +** The pExpr might be deleted immediately on an OOM error. +** +** The deferred delete is (currently) implemented by adding the +** pExpr to the pParse->pConstExpr list with a register number of 0. +*/ +SQLITE_PRIVATE void sqlite3ExprDeferredDelete(Parse *pParse, Expr *pExpr){ + pParse->pConstExpr = + sqlite3ExprListAppend(pParse, pParse->pConstExpr, pExpr); +} + /* Invoke sqlite3RenameExprUnmap() and sqlite3ExprDelete() on the ** expression. */ @@ -106414,8 +106449,7 @@ static int agginfoPersistExprCb(Walker *pWalker, Expr *pExpr){ pExpr = sqlite3ExprDup(db, pExpr, 0); if( pExpr ){ pAggInfo->aCol[iAgg].pCExpr = pExpr; - pParse->pConstExpr = - sqlite3ExprListAppend(pParse, pParse->pConstExpr, pExpr); + sqlite3ExprDeferredDelete(pParse, pExpr); } } }else{ @@ -106424,8 +106458,7 @@ static int agginfoPersistExprCb(Walker *pWalker, Expr *pExpr){ pExpr = sqlite3ExprDup(db, pExpr, 0); if( pExpr ){ pAggInfo->aFunc[iAgg].pFExpr = pExpr; - pParse->pConstExpr = - sqlite3ExprListAppend(pParse, pParse->pConstExpr, pExpr); + sqlite3ExprDeferredDelete(pParse, pExpr); } } } @@ -108668,33 +108701,44 @@ SQLITE_PRIVATE void sqlite3AlterDropColumn(Parse *pParse, SrcList *pSrc, Token * sqlite3OpenTable(pParse, iCur, iDb, pTab, OP_OpenWrite); addr = sqlite3VdbeAddOp1(v, OP_Rewind, iCur); VdbeCoverage(v); reg = ++pParse->nMem; - pParse->nMem += pTab->nCol; if( HasRowid(pTab) ){ sqlite3VdbeAddOp2(v, OP_Rowid, iCur, reg); + pParse->nMem += pTab->nCol; }else{ pPk = sqlite3PrimaryKeyIndex(pTab); + pParse->nMem += pPk->nColumn; + for(i=0; i<pPk->nKeyCol; i++){ + sqlite3VdbeAddOp3(v, OP_Column, iCur, i, reg+i+1); + } + nField = pPk->nKeyCol; } + regRec = ++pParse->nMem; for(i=0; i<pTab->nCol; i++){ if( i!=iCol && (pTab->aCol[i].colFlags & COLFLAG_VIRTUAL)==0 ){ int regOut; if( pPk ){ int iPos = sqlite3TableColumnToIndex(pPk, i); int iColPos = sqlite3TableColumnToIndex(pPk, iCol); + if( iPos<pPk->nKeyCol ) continue; regOut = reg+1+iPos-(iPos>iColPos); }else{ regOut = reg+1+nField; } - sqlite3ExprCodeGetColumnOfTable(v, pTab, iCur, i, regOut); + if( i==pTab->iPKey ){ + sqlite3VdbeAddOp2(v, OP_Null, 0, regOut); + }else{ + sqlite3ExprCodeGetColumnOfTable(v, pTab, iCur, i, regOut); + } nField++; } } - regRec = reg + pTab->nCol; sqlite3VdbeAddOp3(v, OP_MakeRecord, reg+1, nField, regRec); if( pPk ){ sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iCur, regRec, reg+1, pPk->nKeyCol); }else{ sqlite3VdbeAddOp3(v, OP_Insert, iCur, regRec, reg); } + sqlite3VdbeChangeP5(v, OPFLAG_SAVEPOSITION); sqlite3VdbeAddOp2(v, OP_Next, iCur, addr+1); VdbeCoverage(v); sqlite3VdbeJumpHere(v, addr); @@ -115684,7 +115728,11 @@ SQLITE_PRIVATE void sqlite3CreateIndex( /* Clean up before exiting */ exit_create_index: if( pIndex ) sqlite3FreeIndex(db, pIndex); - if( pTab ){ /* Ensure all REPLACE indexes are at the end of the list */ + if( pTab ){ + /* Ensure all REPLACE indexes on pTab are at the end of the pIndex list. + ** The list was already ordered when this routine was entered, so at this + ** point at most a single index (the newly added index) will be out of + ** order. So we have to reorder at most one index. */ Index **ppFrom = &pTab->pIndex; Index *pThis; for(ppFrom=&pTab->pIndex; (pThis = *ppFrom)!=0; ppFrom=&pThis->pNext){ @@ -115698,6 +115746,16 @@ exit_create_index: } break; } +#ifdef SQLITE_DEBUG + /* Verify that all REPLACE indexes really are now at the end + ** of the index list. In other words, no other index type ever + ** comes after a REPLACE index on the list. */ + for(pThis = pTab->pIndex; pThis; pThis=pThis->pNext){ + assert( pThis->onError!=OE_Replace + || pThis->pNext==0 + || pThis->pNext->onError==OE_Replace ); + } +#endif } sqlite3ExprDelete(db, pPIWhere); sqlite3ExprListDelete(db, pList); @@ -123114,7 +123172,9 @@ SQLITE_PRIVATE void sqlite3Insert( pNx->iDataCur = iDataCur; pNx->iIdxCur = iIdxCur; if( pNx->pUpsertTarget ){ - sqlite3UpsertAnalyzeTarget(pParse, pTabList, pNx); + if( sqlite3UpsertAnalyzeTarget(pParse, pTabList, pNx) ){ + goto insert_cleanup; + } } pNx = pNx->pNextUpsert; }while( pNx!=0 ); @@ -124536,7 +124596,7 @@ static void codeWithoutRowidPreupdate( Vdbe *v = pParse->pVdbe; int r = sqlite3GetTempReg(pParse); assert( !HasRowid(pTab) ); - assert( 0==(pParse->db->mDbFlags & DBFLAG_Vacuum) ); + assert( 0==(pParse->db->mDbFlags & DBFLAG_Vacuum) || CORRUPT_DB ); sqlite3VdbeAddOp2(v, OP_Integer, 0, r); sqlite3VdbeAddOp4(v, OP_Insert, iCur, regData, r, (char*)pTab, P4_TABLE); sqlite3VdbeChangeP5(v, OPFLAG_ISNOOP); @@ -133084,7 +133144,7 @@ SQLITE_PRIVATE int sqlite3ColumnsFromExprList( nCol = pEList->nExpr; aCol = sqlite3DbMallocZero(db, sizeof(aCol[0])*nCol); testcase( aCol==0 ); - if( nCol>32767 ) nCol = 32767; + if( NEVER(nCol>32767) ) nCol = 32767; }else{ nCol = 0; aCol = 0; @@ -136158,16 +136218,24 @@ static int resolveFromTermToCte( pSavedWith = pParse->pWith; pParse->pWith = pWith; if( pSel->selFlags & SF_Recursive ){ + int rc; assert( pRecTerm!=0 ); assert( (pRecTerm->selFlags & SF_Recursive)==0 ); assert( pRecTerm->pNext!=0 ); assert( (pRecTerm->pNext->selFlags & SF_Recursive)!=0 ); assert( pRecTerm->pWith==0 ); pRecTerm->pWith = pSel->pWith; - sqlite3WalkSelect(pWalker, pRecTerm); + rc = sqlite3WalkSelect(pWalker, pRecTerm); pRecTerm->pWith = 0; + if( rc ){ + pParse->pWith = pSavedWith; + return 2; + } }else{ - sqlite3WalkSelect(pWalker, pSel); + if( sqlite3WalkSelect(pWalker, pSel) ){ + pParse->pWith = pSavedWith; + return 2; + } } pParse->pWith = pWith; @@ -137476,7 +137544,9 @@ SQLITE_PRIVATE int sqlite3Select( sqlite3VdbeAddOp2(v, OP_OpenDup, pItem->iCursor, pPrior->iCursor); pSub->nSelectRow = pPrior->pSelect->nSelectRow; }else{ - /* Generate a subroutine that will materialize the view. */ + /* Materalize the view. If the view is not correlated, generate a + ** subroutine to do the materialization so that subsequent uses of + ** the same view can reuse the materialization. */ int topAddr; int onceAddr = 0; int retAddr; @@ -137503,7 +137573,7 @@ SQLITE_PRIVATE int sqlite3Select( VdbeComment((v, "end %s", pItem->pTab->zName)); sqlite3VdbeChangeP1(v, topAddr, retAddr); sqlite3ClearTempRegCache(pParse); - if( pItem->fg.isCte ){ + if( pItem->fg.isCte && pItem->fg.isCorrelated==0 ){ CteUse *pCteUse = pItem->u2.pCteUse; pCteUse->addrM9e = pItem->addrFillSub; pCteUse->regRtn = pItem->regReturn; @@ -139275,6 +139345,25 @@ SQLITE_PRIVATE SrcList *sqlite3TriggerStepSrc( return pSrc; } +/* +** Return true if the pExpr term from the RETURNING clause argument +** list is of the form "*". Raise an error if the terms if of the +** form "table.*". +*/ +static int isAsteriskTerm( + Parse *pParse, /* Parsing context */ + Expr *pTerm /* A term in the RETURNING clause */ +){ + assert( pTerm!=0 ); + if( pTerm->op==TK_ASTERISK ) return 1; + if( pTerm->op!=TK_DOT ) return 0; + assert( pTerm->pRight!=0 ); + assert( pTerm->pLeft!=0 ); + if( pTerm->pRight->op!=TK_ASTERISK ) return 0; + sqlite3ErrorMsg(pParse, "RETURNING may not use \"TABLE.*\" wildcards"); + return 1; +} + /* The input list pList is the list of result set terms from a RETURNING ** clause. The table that we are returning from is pTab. ** @@ -139292,7 +139381,8 @@ static ExprList *sqlite3ExpandReturning( for(i=0; i<pList->nExpr; i++){ Expr *pOldExpr = pList->a[i].pExpr; - if( ALWAYS(pOldExpr!=0) && pOldExpr->op==TK_ASTERISK ){ + if( NEVER(pOldExpr==0) ) continue; + if( isAsteriskTerm(pParse, pOldExpr) ){ int jj; for(jj=0; jj<pTab->nCol; jj++){ Expr *pNewExpr; @@ -146825,6 +146915,7 @@ static void whereCombineDisjuncts( int op; /* Operator for the combined expression */ int idxNew; /* Index in pWC of the next virtual term */ + if( (pOne->wtFlags | pTwo->wtFlags) & TERM_VNULL ) return; if( (pOne->eOperator & (WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE))==0 ) return; if( (pTwo->eOperator & (WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE))==0 ) return; if( (eOp & (WO_EQ|WO_LT|WO_LE))!=eOp @@ -147530,6 +147621,7 @@ static void exprAnalyzeExists( #endif if( pSel->pPrior ) return; if( pSel->pWhere==0 ) return; + if( pSel->pLimit ) return; if( 0==exprAnalyzeExistsFindEq(pSel, 0, 0) ) return; pDup = sqlite3ExprDup(db, pExpr, 0); @@ -155314,6 +155406,7 @@ static void windowCheckValue(Parse *pParse, int reg, int eCond){ VdbeCoverageIf(v, eCond==2); } sqlite3VdbeAddOp3(v, aOp[eCond], regZero, sqlite3VdbeCurrentAddr(v)+2, reg); + sqlite3VdbeChangeP5(v, SQLITE_AFF_NUMERIC); VdbeCoverageNeverNullIf(v, eCond==0); /* NULL case captured by */ VdbeCoverageNeverNullIf(v, eCond==1); /* the OP_MustBeInt */ VdbeCoverageNeverNullIf(v, eCond==2); @@ -162308,7 +162401,7 @@ static const unsigned char aiClass[] = { #ifdef SQLITE_EBCDIC /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ /* 0x */ 29, 28, 28, 28, 28, 7, 28, 28, 28, 28, 28, 28, 7, 7, 28, 28, -/* 1x */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +/* 1x */ 28, 28, 28, 28, 28, 7, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, /* 2x */ 28, 28, 28, 28, 28, 7, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, /* 3x */ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, /* 4x */ 7, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 26, 12, 17, 20, 10, @@ -229213,7 +229306,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2021-03-17 19:07:21 ea80f3002f4120f5dcee76e8779dfdc88e1e096c5cdd06904c20fd26d50c3827", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2021-04-19 18:32:05 1b256d97b553a9611efca188a3d995a2fff712759044ba480f9a0c9e98fae886", -1, SQLITE_TRANSIENT); } /* @@ -234139,9 +234232,9 @@ SQLITE_API int sqlite3_stmt_init( #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_STMTVTAB) */ /************** End of stmt.c ************************************************/ -#if __LINE__!=234142 +#if __LINE__!=234235 #undef SQLITE_SOURCE_ID -#define SQLITE_SOURCE_ID "2021-03-17 19:07:21 ea80f3002f4120f5dcee76e8779dfdc88e1e096c5cdd06904c20fd26d50calt2" +#define SQLITE_SOURCE_ID "2021-04-19 18:32:05 1b256d97b553a9611efca188a3d995a2fff712759044ba480f9a0c9e98faalt2" #endif /* Return the source-id for this library */ SQLITE_API const char *sqlite3_sourceid(void){ return SQLITE_SOURCE_ID; } diff --git a/src/3rdparty/sqlite/sqlite3.h b/src/3rdparty/sqlite/sqlite3.h index f636b294d2..19ee767fe8 100644 --- a/src/3rdparty/sqlite/sqlite3.h +++ b/src/3rdparty/sqlite/sqlite3.h @@ -123,9 +123,9 @@ extern "C" { ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ -#define SQLITE_VERSION "3.35.2" -#define SQLITE_VERSION_NUMBER 3035002 -#define SQLITE_SOURCE_ID "2021-03-17 19:07:21 ea80f3002f4120f5dcee76e8779dfdc88e1e096c5cdd06904c20fd26d50c3827" +#define SQLITE_VERSION "3.35.5" +#define SQLITE_VERSION_NUMBER 3035005 +#define SQLITE_SOURCE_ID "2021-04-19 18:32:05 1b256d97b553a9611efca188a3d995a2fff712759044ba480f9a0c9e98fae886" /* ** CAPI3REF: Run-Time Library Version Numbers |