diff options
Diffstat (limited to 'src/corelib/tools/qsimd_p.h')
-rw-r--r-- | src/corelib/tools/qsimd_p.h | 87 |
1 files changed, 69 insertions, 18 deletions
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index d689654b29..7002d34654 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -1,31 +1,38 @@ /**************************************************************************** ** -** Copyright (C) 2015 The Qt Company Ltd. -** Contact: http://www.qt.io/licensing/ +** Copyright (C) 2016 The Qt Company Ltd. +** Copyright (C) 2016 Intel Corporation. +** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtCore module of the Qt Toolkit. ** -** $QT_BEGIN_LICENSE:LGPL21$ +** $QT_BEGIN_LICENSE:LGPL$ ** Commercial License Usage ** Licensees holding valid commercial Qt licenses may use this file in ** accordance with the commercial license agreement provided with the ** Software or, alternatively, in accordance with the terms contained in ** a written agreement between you and The Qt Company. For licensing terms -** and conditions see http://www.qt.io/terms-conditions. For further -** information use the contact form at http://www.qt.io/contact-us. +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. ** ** GNU Lesser General Public License Usage ** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 2.1 or version 3 as published by the Free -** Software Foundation and appearing in the file LICENSE.LGPLv21 and -** LICENSE.LGPLv3 included in the packaging of this file. Please review the -** following information to ensure the GNU Lesser General Public License -** requirements will be met: https://www.gnu.org/licenses/lgpl.html and -** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. ** -** As a special exception, The Qt Company gives you certain additional -** rights. These rights are described in The Qt Company LGPL Exception -** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. ** ** $QT_END_LICENSE$ ** @@ -73,6 +80,7 @@ * SSE4_2 | x86 | I & C | I & C | I only | * AVX | x86 | I & C | I & C | I & C | * AVX2 | x86 | I & C | I & C | I only | + * AVX512xx | x86 | I & C | I & C | I only | * I = intrinsics; C = code generation * * Code can use the following constructs to determine compiler support & status: @@ -137,7 +145,18 @@ #define QT_COMPILER_SUPPORTS(x) (QT_COMPILER_SUPPORTS_ ## x - 0) -#if (defined(Q_CC_INTEL) || defined(Q_CC_MSVC) \ +#if defined(Q_PROCESSOR_ARM) +# define QT_COMPILER_SUPPORTS_HERE(x) (__ARM_FEATURE_ ## x) +# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && Q_CC_GNU >= 600 + /* GCC requires attributes for a function */ +# define QT_FUNCTION_TARGET(x) __attribute__((__target__(QT_FUNCTION_TARGET_STRING_ ## x))) +# else +# define QT_FUNCTION_TARGET(x) +# endif +# if !defined(__ARM_FEATURE_NEON) && defined(__ARM_NEON__) +# define __ARM_FEATURE_NEON // also support QT_COMPILER_SUPPORTS_HERE(NEON) +# endif +#elif (defined(Q_CC_INTEL) || defined(Q_CC_MSVC) \ || (defined(Q_CC_GNU) && !defined(Q_CC_CLANG) && (__GNUC__-0) * 100 + (__GNUC_MINOR__-0) >= 409)) \ && !defined(QT_BOOTSTRAPPED) # define QT_COMPILER_SUPPORTS_SIMD_ALWAYS @@ -156,7 +175,7 @@ // SSE intrinsics #define QT_FUNCTION_TARGET_STRING_SSE2 "sse2" #if defined(__SSE2__) || (defined(QT_COMPILER_SUPPORTS_SSE2) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS)) -#if defined(QT_LINUXBASE) || defined(Q_OS_ANDROID_NO_SDK) +#if defined(QT_LINUXBASE) /// this is an evil hack - the posix_memalign declaration in LSB /// is wrong - see http://bugs.linuxbase.org/show_bug.cgi?id=2431 # define posix_memalign _lsb_hack_posix_memalign @@ -248,16 +267,28 @@ # endif #endif +// Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html +// This should be tweaked with an "upper version" of clang once we know which release fixes the +// issue. At that point we can rely on __ARM_FEATURE_CRC32 again. +#if defined(Q_CC_CLANG) && defined(Q_OS_DARWIN) && defined (__ARM_FEATURE_CRC32) +# undef __ARM_FEATURE_CRC32 +#endif + // NEON intrinsics // note: as of GCC 4.9, does not support function targets for ARM #if defined(__ARM_NEON) || defined(__ARM_NEON__) #include <arm_neon.h> -#define QT_FUNCTION_TARGET_STRING_ARM_NEON "neon" +#define QT_FUNCTION_TARGET_STRING_NEON "+neon" // unused: gcc doesn't support function targets on non-aarch64, and on Aarch64 NEON is always available. #ifndef __ARM_NEON__ // __ARM_NEON__ is not defined on AArch64, but we need it in our NEON detection. #define __ARM_NEON__ #endif #endif +// AArch64/ARM64 +#if defined(Q_PROCESSOR_ARM_V8) && defined(__ARM_FEATURE_CRC32) +#define QT_FUNCTION_TARGET_STRING_CRC32 "+crc" +# include <arm_acle.h> +#endif #undef QT_COMPILER_SUPPORTS_SIMD_ALWAYS @@ -268,6 +299,7 @@ enum CPUFeatures { #if defined(Q_PROCESSOR_ARM) CpuFeatureNEON = 0, CpuFeatureARM_NEON = CpuFeatureNEON, + CpuFeatureCRC32 = 1, #elif defined(Q_PROCESSOR_MIPS) CpuFeatureDSP = 0, CpuFeatureDSPR2 = 1, @@ -395,6 +427,9 @@ static const quint64 qCompilerCpuFeatures = 0 #if defined __ARM_NEON__ | (Q_UINT64_C(1) << CpuFeatureNEON) #endif +#if defined __ARM_FEATURE_CRC32 + | (Q_UINT64_C(1) << CpuFeatureCRC32) +#endif #if defined __mips_dsp | (Q_UINT64_C(1) << CpuFeatureDSP) #endif @@ -430,7 +465,23 @@ static inline quint64 qCpuFeatures() #define qCpuHasFeature(feature) ((qCompilerCpuFeatures & (Q_UINT64_C(1) << CpuFeature ## feature)) \ || (qCpuFeatures() & (Q_UINT64_C(1) << CpuFeature ## feature))) -#ifdef Q_PROCESSOR_X86 +#if QT_HAS_BUILTIN(__builtin_clz) && QT_HAS_BUILTIN(__builtin_ctz) && defined(Q_CC_CLANG) && !defined(Q_CC_INTEL) +static Q_ALWAYS_INLINE unsigned _bit_scan_reverse(unsigned val) +{ + Q_ASSERT(val != 0); // if val==0, the result is undefined. + unsigned result = static_cast<unsigned>(__builtin_clz(val)); // Count Leading Zeros + // Now Invert the result: clz will count *down* from the msb to the lsb, so the msb index is 31 + // and the lsb inde is 0. The result for _bit_scan_reverse is expected to be the index when + // counting up: msb index is 0 (because it starts there), and the lsb index is 31. + result ^= sizeof(unsigned) * 8 - 1; + return result; +} +static Q_ALWAYS_INLINE unsigned _bit_scan_forward(unsigned val) +{ + Q_ASSERT(val != 0); // if val==0, the result is undefined. + return static_cast<unsigned>(__builtin_ctz(val)); // Count Trailing Zeros +} +#elif defined(Q_PROCESSOR_X86) // Bit scan functions for x86 # if defined(Q_CC_MSVC) # if defined _WIN32_WCE && _WIN32_WCE < 0x800 |