From 9061b7bde0b2741492cc2e603c3086c5bd99980a Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Fri, 17 Nov 2017 15:06:03 -0800 Subject: qsimd_p.h: Reorganize and simplify the x86 intrinsics #includes All of our compilers support #include , so we don't need the legacy code that includes the earlier versions. Change-Id: I938b024e38bf4aac9154fffd14f80214d1d744c8 Reviewed-by: Allan Sandfeld Jensen --- src/corelib/tools/qsimd_p.h | 96 +++++++++++++-------------------------------- 1 file changed, 28 insertions(+), 68 deletions(-) diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index fafc3e37b0..eb56b31348 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -177,55 +177,37 @@ # define QT_FUNCTION_TARGET(x) #endif -#if defined(Q_CC_MSVC) && (defined(_M_AVX) || defined(__AVX__)) -// Visual Studio defines __AVX__ when /arch:AVX is passed, but not the earlier macros -// See: https://msdn.microsoft.com/en-us/library/b0084kay.aspx -// SSE2 is handled by _M_IX86_FP below -# define __SSE3__ 1 -# define __SSSE3__ 1 -// no Intel CPU supports SSE4a, so don't define it -# define __SSE4_1__ 1 -# define __SSE4_2__ 1 -# ifndef __AVX__ -# define __AVX__ 1 -# endif -#endif - -// SSE intrinsics -#if defined(__SSE2__) || (defined(QT_COMPILER_SUPPORTS_SSE2) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS)) -#if defined(QT_LINUXBASE) -/// this is an evil hack - the posix_memalign declaration in LSB -/// is wrong - see http://bugs.linuxbase.org/show_bug.cgi?id=2431 -# define posix_memalign _lsb_hack_posix_memalign -# include -# undef posix_memalign -#else -# include -#endif -#if defined(Q_CC_MSVC) && (defined(_M_X64) || _M_IX86_FP >= 2) -# define __SSE__ 1 -# define __SSE2__ 1 -#endif -#endif +#ifdef Q_PROCESSOR_X86 +/* -- x86 intrinsic support -- */ -// SSE3 intrinsics -#if defined(__SSE3__) || (defined(QT_COMPILER_SUPPORTS_SSE3) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS)) -#include -#endif +# if defined(Q_CC_MSVC) && (defined(_M_X64) || _M_IX86_FP >= 2) +// MSVC doesn't define __SSE2__, so do it ourselves +# define __SSE__ 1 +# define __SSE2__ 1 +# endif -// SSSE3 intrinsics -#if defined(__SSSE3__) || (defined(QT_COMPILER_SUPPORTS_SSSE3) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS)) -#include -#endif +# ifdef __SSE2__ +// #include the intrinsics +# include +# endif -// SSE4.1 intrinsics -#if defined(__SSE4_1__) || (defined(QT_COMPILER_SUPPORTS_SSE4_1) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS)) -#include -#endif +# if defined(Q_CC_GNU) && !defined(Q_CC_INTEL) +// GCC 4.4 and Clang 2.8 added a few more intrinsics there +# include +# endif -// SSE4.2 intrinsics -#if defined(__SSE4_2__) || (defined(QT_COMPILER_SUPPORTS_SSE4_2) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS)) -#include +# if defined(Q_CC_MSVC) && (defined(_M_AVX) || defined(__AVX__)) +// Visual Studio defines __AVX__ when /arch:AVX is passed, but not the earlier macros +// See: https://msdn.microsoft.com/en-us/library/b0084kay.aspx +# define __SSE3__ 1 +# define __SSSE3__ 1 +// no Intel CPU supports SSE4a, so don't define it +# define __SSE4_1__ 1 +# define __SSE4_2__ 1 +# ifndef __AVX__ +# define __AVX__ 1 +# endif +# endif # if defined(__SSE4_2__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC)) // POPCNT instructions: @@ -233,13 +215,8 @@ // (but neither MSVC nor the Intel compiler define this macro) # define __POPCNT__ 1 # endif -#endif // AVX intrinsics -#if defined(__AVX__) || (defined(QT_COMPILER_SUPPORTS_AVX) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS)) -// immintrin.h is the ultimate header, we don't need anything else after this -#include - # if defined(__AVX__) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS) && (defined(Q_CC_INTEL) || defined(Q_CC_MSVC)) // AES, PCLMULQDQ instructions: // All processors that support AVX support AES, PCLMULQDQ @@ -255,11 +232,6 @@ # define __F16C__ 1 # define __RDRND__ 1 # endif -#endif - -#if defined(__AES__) || defined(__PCLMUL__) || (defined(QT_COMPILER_SUPPORTS_AES) && defined(QT_COMPILER_SUPPORTS_SIMD_ALWAYS)) -# include -#endif #define QT_FUNCTION_TARGET_STRING_SSE2 "sse2" #define QT_FUNCTION_TARGET_STRING_SSE3 "sse3" @@ -288,19 +260,7 @@ #define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed" #define QT_FUNCTION_TARGET_STRING_SHA "sha" -// other x86 intrinsics -#if defined(Q_PROCESSOR_X86) && ((defined(Q_CC_GNU) && (Q_CC_GNU >= 404)) \ - || (defined(Q_CC_CLANG) && (Q_CC_CLANG >= 208)) \ - || defined(Q_CC_INTEL)) -# define QT_COMPILER_SUPPORTS_X86INTRIN -# ifdef Q_CC_INTEL -// The Intel compiler has no -- all intrinsics are in ; -# include -# else -// GCC 4.4 and Clang 2.8 added a few more intrinsics there -# include -# endif -#endif +#endif /* Q_PROCESSOR_X86 */ // Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html // This should be tweaked with an "upper version" of clang once we know which release fixes the -- cgit v1.2.3