diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2018-05-23 00:54:19 -0300 |
---|---|---|
committer | Thiago Macieira <thiago.macieira@intel.com> | 2018-07-09 00:18:24 +0000 |
commit | c3a4ec5d0bbd5f2710f4fd1d3bd4a2d7f0f507ad (patch) | |
tree | f18c0e54743eb213df0e5374bb5fdf4281aa5c4e | |
parent | 746f15d0c213fef0e46207682815bd839a36ecc6 (diff) |
SIMD: Add a haswell sub-architecture selection to our support
As the comment says, Haswell is a nice divider and is a good
optimization target.
I'm using -march=core-avx2 instead of -march=haswell because the latter
form was only added to GCC 4.9 but we still support 4.7 and that has
support for AVX2.
This commit changes the AVX2-optimized code in QtGui to Haswell-
optimized instead. That means, for example, that qdrawhelper_avx2.cpp
can now use the FMA instructions.
Change-Id: If025d476890745368955fffd153129c1716ba006
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
-rw-r--r-- | mkspecs/common/gcc-base.conf | 3 | ||||
-rw-r--r-- | mkspecs/features/simd.prf | 24 | ||||
-rw-r--r-- | src/corelib/tools/qsimd_p.h | 43 | ||||
-rw-r--r-- | src/gui/painting/painting.pri | 2 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 2 |
5 files changed, 70 insertions, 4 deletions
diff --git a/mkspecs/common/gcc-base.conf b/mkspecs/common/gcc-base.conf index e51b173276..c2669e4833 100644 --- a/mkspecs/common/gcc-base.conf +++ b/mkspecs/common/gcc-base.conf @@ -108,6 +108,9 @@ QMAKE_CFLAGS_NEON += -mfpu=neon QMAKE_CFLAGS_MIPS_DSP += -mdsp QMAKE_CFLAGS_MIPS_DSPR2 += -mdspr2 +# -march=haswell is supported as of GCC 4.9 and Clang 3.6 +QMAKE_CFLAGS_ARCH_HASWELL = -march=core-avx2 + # Wrapper tools that understand .o/.a files with GIMPLE instead of machine code QMAKE_AR_LTCG = gcc-ar cqs QMAKE_NM_LTCG = gcc-nm -P diff --git a/mkspecs/features/simd.prf b/mkspecs/features/simd.prf index 65ba4b0d08..a0b40fcf11 100644 --- a/mkspecs/features/simd.prf +++ b/mkspecs/features/simd.prf @@ -141,6 +141,28 @@ addSimdCompiler(neon) addSimdCompiler(mips_dsp) addSimdCompiler(mips_dspr2) +# Haswell sub-architecture +defineTest(addSimdArch) { + name = arch_$$1 + dependencies = $$2 + upname = $$upper($$name) + + cpu_features_missing = + for(part, dependencies) { + !contains(QT_CPU_FEATURES, $$part): cpu_features_missing = 1 + } + + CONFIG += $$name + isEmpty(cpu_features_missing): QT_CPU_FEATURES += $$name + + export(QT_CPU_FEATURES) + export(CONFIG) + addSimdCompiler($$name) +} + +isEmpty(QMAKE_CFLAGS_ARCH_HASWELL): QMAKE_CFLAGS_ARCH_HASWELL = $$QMAKE_CFLAGS_AVX2 +avx2: addSimdArch(haswell, avx2 bmi bmi2 f16c fma lzcnt popcnt) + # Follow the Intel compiler's lead and define profiles of AVX512 instructions defineTest(addAvx512Profile) { name = $$1 @@ -149,7 +171,7 @@ defineTest(addAvx512Profile) { varname = QMAKE_CFLAGS_$$upname cpu_features_missing = - cflags = $$QMAKE_CFLAGS_AVX512F + cflags = $$QMAKE_CFLAGS_ARCH_HASWELL $$QMAKE_CFLAGS_AVX512F for(part, dependencies) { !CONFIG($$part): return() # Profile isn't supported by the compiler diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index af262ec88f..9f1321df94 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -1,7 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2016 The Qt Company Ltd. -** Copyright (C) 2016 Intel Corporation. +** Copyright (C) 2018 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -232,8 +232,49 @@ # define __RDRND__ 1 # endif +# if defined(__BMI__) && !defined(__BMI2__) && defined(Q_CC_INTEL) +// BMI2 instructions: +// All processors that support BMI support BMI2 (and AVX2) +// (but neither MSVC nor the Intel compiler define this macro) +# define __BMI2__ 1 +# endif + # include "qsimd_x86_p.h" +// Haswell sub-architecture +// +// The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2, +// BMI1, BMI2, FMA, LZCNT, MOVBE, which makes it a good divider for a +// sub-target for us. The first AMD processor with AVX2 support (Zen) has the +// same features. +// +// macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc +// ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell). +# define QT_FUNCTION_TARGET_STRING_ARCH_HASWELL "arch=haswell" +# if defined(__AVX2__) && defined(__BMI__) && defined(__BMI2__) && defined(__F16C__) && \ + defined(__FMA__) && defined(__LZCNT__) && defined(__RDRND__) +# define __haswell__ 1 +# endif + +// This constant does not include all CPU features found in a Haswell, only +// those that we'd have optimized code for. +// Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode. +QT_BEGIN_NAMESPACE +static const quint64 CpuFeatureArchHaswell = 0 + | CpuFeatureSSE2 + | CpuFeatureSSE3 + | CpuFeatureSSSE3 + | CpuFeatureSSE4_1 + | CpuFeatureSSE4_2 + | CpuFeatureFMA + | CpuFeaturePOPCNT + | CpuFeatureAVX + | CpuFeatureF16C + | CpuFeatureAVX2 + | CpuFeatureBMI + | CpuFeatureBMI2; +QT_END_NAMESPACE + #endif /* Q_PROCESSOR_X86 */ // Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html diff --git a/src/gui/painting/painting.pri b/src/gui/painting/painting.pri index 749cc221ff..c3585a4647 100644 --- a/src/gui/painting/painting.pri +++ b/src/gui/painting/painting.pri @@ -127,7 +127,7 @@ SSE2_SOURCES += painting/qdrawhelper_sse2.cpp SSSE3_SOURCES += painting/qdrawhelper_ssse3.cpp SSE4_1_SOURCES += painting/qdrawhelper_sse4.cpp \ painting/qimagescale_sse4.cpp -AVX2_SOURCES += painting/qdrawhelper_avx2.cpp +ARCH_HASWELL_SOURCES += painting/qdrawhelper_avx2.cpp NEON_SOURCES += painting/qdrawhelper_neon.cpp painting/qimagescale_neon.cpp NEON_HEADERS += painting/qdrawhelper_neon_p.h diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 5892d59a64..235bba7206 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -5772,7 +5772,7 @@ static void qInitDrawhelperFunctions() #endif #if defined(QT_COMPILER_SUPPORTS_AVX2) - if (qCpuHasFeature(AVX2)) { + if (qCpuHasFeature(ArchHaswell)) { extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, int const_alpha); |