summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2018-05-23 00:54:19 -0300
committerThiago Macieira <thiago.macieira@intel.com>2018-07-09 00:18:24 +0000
commitc3a4ec5d0bbd5f2710f4fd1d3bd4a2d7f0f507ad (patch)
treef18c0e54743eb213df0e5374bb5fdf4281aa5c4e
parent746f15d0c213fef0e46207682815bd839a36ecc6 (diff)
SIMD: Add a haswell sub-architecture selection to our support
As the comment says, Haswell is a nice divider and is a good optimization target. I'm using -march=core-avx2 instead of -march=haswell because the latter form was only added to GCC 4.9 but we still support 4.7 and that has support for AVX2. This commit changes the AVX2-optimized code in QtGui to Haswell- optimized instead. That means, for example, that qdrawhelper_avx2.cpp can now use the FMA instructions. Change-Id: If025d476890745368955fffd153129c1716ba006 Reviewed-by: Lars Knoll <lars.knoll@qt.io> Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
-rw-r--r--mkspecs/common/gcc-base.conf3
-rw-r--r--mkspecs/features/simd.prf24
-rw-r--r--src/corelib/tools/qsimd_p.h43
-rw-r--r--src/gui/painting/painting.pri2
-rw-r--r--src/gui/painting/qdrawhelper.cpp2
5 files changed, 70 insertions, 4 deletions
diff --git a/mkspecs/common/gcc-base.conf b/mkspecs/common/gcc-base.conf
index e51b173276..c2669e4833 100644
--- a/mkspecs/common/gcc-base.conf
+++ b/mkspecs/common/gcc-base.conf
@@ -108,6 +108,9 @@ QMAKE_CFLAGS_NEON += -mfpu=neon
QMAKE_CFLAGS_MIPS_DSP += -mdsp
QMAKE_CFLAGS_MIPS_DSPR2 += -mdspr2
+# -march=haswell is supported as of GCC 4.9 and Clang 3.6
+QMAKE_CFLAGS_ARCH_HASWELL = -march=core-avx2
+
# Wrapper tools that understand .o/.a files with GIMPLE instead of machine code
QMAKE_AR_LTCG = gcc-ar cqs
QMAKE_NM_LTCG = gcc-nm -P
diff --git a/mkspecs/features/simd.prf b/mkspecs/features/simd.prf
index 65ba4b0d08..a0b40fcf11 100644
--- a/mkspecs/features/simd.prf
+++ b/mkspecs/features/simd.prf
@@ -141,6 +141,28 @@ addSimdCompiler(neon)
addSimdCompiler(mips_dsp)
addSimdCompiler(mips_dspr2)
+# Haswell sub-architecture
+defineTest(addSimdArch) {
+ name = arch_$$1
+ dependencies = $$2
+ upname = $$upper($$name)
+
+ cpu_features_missing =
+ for(part, dependencies) {
+ !contains(QT_CPU_FEATURES, $$part): cpu_features_missing = 1
+ }
+
+ CONFIG += $$name
+ isEmpty(cpu_features_missing): QT_CPU_FEATURES += $$name
+
+ export(QT_CPU_FEATURES)
+ export(CONFIG)
+ addSimdCompiler($$name)
+}
+
+isEmpty(QMAKE_CFLAGS_ARCH_HASWELL): QMAKE_CFLAGS_ARCH_HASWELL = $$QMAKE_CFLAGS_AVX2
+avx2: addSimdArch(haswell, avx2 bmi bmi2 f16c fma lzcnt popcnt)
+
# Follow the Intel compiler's lead and define profiles of AVX512 instructions
defineTest(addAvx512Profile) {
name = $$1
@@ -149,7 +171,7 @@ defineTest(addAvx512Profile) {
varname = QMAKE_CFLAGS_$$upname
cpu_features_missing =
- cflags = $$QMAKE_CFLAGS_AVX512F
+ cflags = $$QMAKE_CFLAGS_ARCH_HASWELL $$QMAKE_CFLAGS_AVX512F
for(part, dependencies) {
!CONFIG($$part): return() # Profile isn't supported by the compiler
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index af262ec88f..9f1321df94 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -1,7 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
-** Copyright (C) 2016 Intel Corporation.
+** Copyright (C) 2018 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -232,8 +232,49 @@
# define __RDRND__ 1
# endif
+# if defined(__BMI__) && !defined(__BMI2__) && defined(Q_CC_INTEL)
+// BMI2 instructions:
+// All processors that support BMI support BMI2 (and AVX2)
+// (but neither MSVC nor the Intel compiler define this macro)
+# define __BMI2__ 1
+# endif
+
# include "qsimd_x86_p.h"
+// Haswell sub-architecture
+//
+// The Intel Core 4th generation was codenamed "Haswell" and introduced AVX2,
+// BMI1, BMI2, FMA, LZCNT, MOVBE, which makes it a good divider for a
+// sub-target for us. The first AMD processor with AVX2 support (Zen) has the
+// same features.
+//
+// macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc
+// ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell).
+# define QT_FUNCTION_TARGET_STRING_ARCH_HASWELL "arch=haswell"
+# if defined(__AVX2__) && defined(__BMI__) && defined(__BMI2__) && defined(__F16C__) && \
+ defined(__FMA__) && defined(__LZCNT__) && defined(__RDRND__)
+# define __haswell__ 1
+# endif
+
+// This constant does not include all CPU features found in a Haswell, only
+// those that we'd have optimized code for.
+// Note: must use Q_CONSTEXPR here, as this file may be compiled in C mode.
+QT_BEGIN_NAMESPACE
+static const quint64 CpuFeatureArchHaswell = 0
+ | CpuFeatureSSE2
+ | CpuFeatureSSE3
+ | CpuFeatureSSSE3
+ | CpuFeatureSSE4_1
+ | CpuFeatureSSE4_2
+ | CpuFeatureFMA
+ | CpuFeaturePOPCNT
+ | CpuFeatureAVX
+ | CpuFeatureF16C
+ | CpuFeatureAVX2
+ | CpuFeatureBMI
+ | CpuFeatureBMI2;
+QT_END_NAMESPACE
+
#endif /* Q_PROCESSOR_X86 */
// Clang compiler fix, see http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20160222/151168.html
diff --git a/src/gui/painting/painting.pri b/src/gui/painting/painting.pri
index 749cc221ff..c3585a4647 100644
--- a/src/gui/painting/painting.pri
+++ b/src/gui/painting/painting.pri
@@ -127,7 +127,7 @@ SSE2_SOURCES += painting/qdrawhelper_sse2.cpp
SSSE3_SOURCES += painting/qdrawhelper_ssse3.cpp
SSE4_1_SOURCES += painting/qdrawhelper_sse4.cpp \
painting/qimagescale_sse4.cpp
-AVX2_SOURCES += painting/qdrawhelper_avx2.cpp
+ARCH_HASWELL_SOURCES += painting/qdrawhelper_avx2.cpp
NEON_SOURCES += painting/qdrawhelper_neon.cpp painting/qimagescale_neon.cpp
NEON_HEADERS += painting/qdrawhelper_neon_p.h
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 5892d59a64..235bba7206 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -5772,7 +5772,7 @@ static void qInitDrawhelperFunctions()
#endif
#if defined(QT_COMPILER_SUPPORTS_AVX2)
- if (qCpuHasFeature(AVX2)) {
+ if (qCpuHasFeature(ArchHaswell)) {
extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h, int const_alpha);