From a587ad1e507aa367d55f1954000417271383c685 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Fri, 17 Jul 2015 14:46:05 -0700 Subject: configure: Add support for detecting AVX512 instructions Tested on Linux with Clang 3.7, GCC 4.9, 5.1 and 6.0, ICC 16 beta; on OS X with Clang-XCode 6.4, ICC 16 beta; on Windows with MSVC 2013 and ICC 15. MinGW is not tested. GCC 4.9: AVX512F AVX512ER AVX512CD AVX512PF GCC 5 & 6: AVX512F AVX512ER AVX512CD AVX512PF AVX512DQ AVX512BW AVX512VL AVX512IFMA AVX512VBMI Clang 3.7: AVX512F AVX512ER AVX512CD Clang-XCode: ICC 15 & 16: AVX512F AVX512ER AVX512CD AVX512PF AVX512DQ AVX512BW AVX512VL MSVC 2013: Change-Id: Ib306f8f647014b399b87ffff13f1da1b161c31d7 Reviewed-by: Oswald Buddenhagen Reviewed-by: Thiago Macieira --- config.tests/common/avx512/avx512.cpp | 97 +++++++++++++++++++++++++++++++++++ config.tests/common/avx512/avx512.pro | 12 +++++ configure | 41 +++++++++++++++ mkspecs/common/gcc-base.conf | 9 ++++ mkspecs/features/simd.prf | 41 +++++++++++++++ mkspecs/linux-icc/qmake.conf | 7 +++ mkspecs/macx-icc/qmake.conf | 7 +++ mkspecs/win32-icc/qmake.conf | 7 +++ src/corelib/tools/qsimd_p.h | 1 + tools/configure/configureapp.cpp | 34 ++++++++++++ tools/configure/configureapp.h | 1 + 11 files changed, 257 insertions(+) create mode 100644 config.tests/common/avx512/avx512.cpp create mode 100644 config.tests/common/avx512/avx512.pro diff --git a/config.tests/common/avx512/avx512.cpp b/config.tests/common/avx512/avx512.cpp new file mode 100644 index 0000000000..d0c55a09b5 --- /dev/null +++ b/config.tests/common/avx512/avx512.cpp @@ -0,0 +1,97 @@ +/**************************************************************************** +** +** Copyright (C) 2012 Intel Corporation. +** Contact: http://www.qt.io/licensing/ +** +** This file is part of the configuration of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL21$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see http://www.qt.io/terms-conditions. For further +** information use the contact form at http://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 or version 3 as published by the Free +** Software Foundation and appearing in the file LICENSE.LGPLv21 and +** LICENSE.LGPLv3 included in the packaging of this file. Please review the +** following information to ensure the GNU Lesser General Public License +** requirements will be met: https://www.gnu.org/licenses/lgpl.html and +** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** As a special exception, The Qt Company gives you certain additional +** rights. These rights are described in The Qt Company LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include + +#ifndef AVX512WANT +# error ".pro file must define AVX512WANT macro to the AVX-512 feature to be tested" +#endif + +// The following checks if __AVXx__ is defined, where x is the value in +// AVX512WANT +#define HAS2(x) __AVX512 ## x ## __ +#define HAS(x) HAS2(x) +#if !HAS(AVX512WANT) +# error "Feature not supported" +#endif + +int main(int, char**argv) +{ + /* AVX512 Foundation */ + __m512i i; + __m512d d; + __m512 f; + __mmask16 m = ~1; + i = _mm512_maskz_loadu_epi32(0, argv); + d = _mm512_loadu_pd((double *)argv + 64); + f = _mm512_loadu_ps((float *)argv + 128); + +#ifdef __AVX512ER__ + /* AVX512 Exponential and Reciprocal */ + f = _mm512_exp2a23_round_ps(f, 8); +#endif +#ifdef __AVX512CD__ + /* AVX512 Conflict Detection */ + i = _mm512_maskz_conflict_epi32(m, i); +#endif +#ifdef __AVX512PF__ + /* AVX512 Prefetch */ + _mm512_mask_prefetch_i64scatter_pd(argv, 0xf, i, 2, 2); +#endif +#ifdef __AVX512DQ__ + /* AVX512 Doubleword and Quadword support */ + m = _mm512_movepi32_mask(i); +#endif +#ifdef __AVX512BW__ + /* AVX512 Byte and Word support */ + i = _mm512_mask_loadu_epi8(i, m, argv - 8); +#endif +#ifdef __AVX512VL__ + /* AVX512 Vector Length */ + __m256i i2 = _mm256_maskz_loadu_epi32(0, argv); + _mm256_mask_storeu_epi32(argv + 1, m, i2); +#endif +#ifdef __AVX512IFMA__ + /* AVX512 Integer Fused Multiply-Add */ + i = _mm512_madd52lo_epu64(i, i, i); +#endif +#ifdef __AVX512VBMI__ + /* AVX512 Vector Byte Manipulation Instructions */ + i = _mm512_permutexvar_epi8(i, i); +#endif + + _mm512_mask_storeu_epi64(argv, m, i); + _mm512_mask_storeu_ps(argv + 64, m, f); + _mm512_mask_storeu_pd(argv + 128, m, d); + return 0; +} diff --git a/config.tests/common/avx512/avx512.pro b/config.tests/common/avx512/avx512.pro new file mode 100644 index 0000000000..37cfb2ea54 --- /dev/null +++ b/config.tests/common/avx512/avx512.pro @@ -0,0 +1,12 @@ +SOURCES = avx512.cpp +CONFIG -= qt dylib release debug_and_release +CONFIG += debug console + +isEmpty(AVX512): error("You must set the AVX512 variable!") + +varname = QMAKE_CFLAGS_AVX512$$AVX512 +value = $$eval($$varname) +isEmpty($$varname): error("This compiler does not support AVX512") + +QMAKE_CXXFLAGS += $$value +DEFINES += AVX512WANT=$$AVX512 diff --git a/configure b/configure index 43360022de..1b333d757f 100755 --- a/configure +++ b/configure @@ -716,6 +716,7 @@ CFG_SSE4_1=auto CFG_SSE4_2=auto CFG_AVX=auto CFG_AVX2=auto +CFG_AVX512=auto CFG_REDUCE_RELOCATIONS=auto CFG_ACCESSIBILITY=auto CFG_ACCESSIBILITY_ATSPI_BRIDGE=no # will be enabled depending on dbus and accessibility being enabled @@ -1640,6 +1641,13 @@ while [ "$#" -gt 0 ]; do UNKNOWN_OPT=yes fi ;; + avx512) + if [ "$VAL" = "no" ]; then + CFG_AVX512="" + else + UNKNOWN_OPT=yes + fi + ;; mips_dsp) if [ "$VAL" = "no" ]; then CFG_MIPS_DSP="$VAL" @@ -2463,6 +2471,7 @@ Configure options: -no-sse4.2 ......... Do not compile with use of SSE4.2 instructions. -no-avx ............ Do not compile with use of AVX instructions. -no-avx2 ........... Do not compile with use of AVX2 instructions. + -no-avx512 ......... Do not compile with use of AVX512 instructions. -no-mips_dsp ....... Do not compile with use of MIPS DSP instructions. -no-mips_dspr2 ..... Do not compile with use of MIPS DSP rev2 instructions. @@ -4498,6 +4507,31 @@ if [ "${CFG_AVX2}" = "auto" ]; then fi fi +# detect avx512 support +if [ "${CFG_AVX512}" == "auto" ]; then + # First, test for AVX-512 Foundation + if compileTest common/avx512 "avx512f" AVX512=F; then + # Test for the sub-features + CFG_AVX512=f + CFG_AVX512_UPPER=AVX512F + for feature in er cd pf dq bw vl ifma vbmi; do + if [ -n "BASH_VERSION" ] && [ "${BASH_VERSION%%.*}" -gt 3 ]; then + upper=${feature^^*} + elif [ -n "$ZSH_VERSION" ]; then + upper=${(U)feature} + else + upper=`echo $feature | tr a-z A-Z` + fi + if compileTest common/avx512 "avx512$feature" AVX512=$upper; then + CFG_AVX512="$CFG_AVX512 $feature" + CFG_AVX512_UPPER="$CFG_AVX512_UPPER AVX512$upper" + fi + done + else + CFG_AVX512= + fi +fi + # check Neon support if [ "$CFG_NEON" = "auto" ]; then # no compile test, just check what the compiler has @@ -6246,6 +6280,9 @@ fi [ "$CFG_SSE4_2" = "yes" ] && QMAKE_CONFIG="$QMAKE_CONFIG sse4_2" [ "$CFG_AVX" = "yes" ] && QMAKE_CONFIG="$QMAKE_CONFIG avx" [ "$CFG_AVX2" = "yes" ] && QMAKE_CONFIG="$QMAKE_CONFIG avx2" +for feature in $CFG_AVX512; do + QMAKE_CONFIG="$QMAKE_CONFIG avx512$feature" +done [ "$CFG_NEON" = "yes" ] && QMAKE_CONFIG="$QMAKE_CONFIG neon" if [ "$CFG_ARCH" = "mips" ]; then [ "$CFG_MIPS_DSP" = "yes" ] && QMAKE_CONFIG="$QMAKE_CONFIG mips_dsp" @@ -6715,6 +6752,9 @@ for SUBARCH in SSE2 SSE3 SSSE3 SSE4_1 SSE4_2 AVX AVX2 \ ;; esac done +for feature in $CFG_AVX512_UPPER; do + echo "#define QT_COMPILER_SUPPORTS_$feature 1" >>"$outpath/src/corelib/global/qconfig.h.new" +done echo "" >>"$outpath/src/corelib/global/qconfig.h.new" @@ -7167,6 +7207,7 @@ if [ "$CFG_ARCH" = "i386" -o "$CFG_ARCH" = "x86_64" ]; then echo " SSE2/SSE3/SSSE3 ...... ${CFG_SSE2}/${CFG_SSE3}/${CFG_SSSE3}" echo " SSE4.1/SSE4.2 ........ ${CFG_SSE4_1}/${CFG_SSE4_2}" echo " AVX/AVX2 ............. ${CFG_AVX}/${CFG_AVX2}" + echo " AVX512 ............... ${CFG_AVX512_UPPER:-}" elif [ "$CFG_ARCH" = "arm" ]; then echo " Neon ................. ${CFG_NEON}" elif [ "$CFG_ARCH" = "mips" ]; then diff --git a/mkspecs/common/gcc-base.conf b/mkspecs/common/gcc-base.conf index 27e812b3a1..6e043f558f 100644 --- a/mkspecs/common/gcc-base.conf +++ b/mkspecs/common/gcc-base.conf @@ -86,6 +86,15 @@ QMAKE_CFLAGS_SSE4_1 += -msse4.1 QMAKE_CFLAGS_SSE4_2 += -msse4.2 QMAKE_CFLAGS_AVX += -mavx QMAKE_CFLAGS_AVX2 += -mavx2 +QMAKE_CFLAGS_AVX512F += -mavx512f +QMAKE_CFLAGS_AVX512ER += -mavx512er +QMAKE_CFLAGS_AVX512CD += -mavx512cd +QMAKE_CFLAGS_AVX512PF += -mavx512pf +QMAKE_CFLAGS_AVX512DQ += -mavx512dq +QMAKE_CFLAGS_AVX512BW += -mavx512bw +QMAKE_CFLAGS_AVX512VL += -mavx512vl +QMAKE_CFLAGS_AVX512IFMA += -mavx512ifma +QMAKE_CFLAGS_AVX512VBMI += -mavx512vbmi QMAKE_CFLAGS_NEON += -mfpu=neon # Wrapper tools that understand .o/.a files with GIMPLE instead of machine code diff --git a/mkspecs/features/simd.prf b/mkspecs/features/simd.prf index 700d79a1b5..4aafdbe5ed 100644 --- a/mkspecs/features/simd.prf +++ b/mkspecs/features/simd.prf @@ -95,6 +95,47 @@ addSimdCompiler(sse4_1) addSimdCompiler(sse4_2) addSimdCompiler(avx) addSimdCompiler(avx2) +addSimdCompiler(avx512f) +addSimdCompiler(avx512cd) +addSimdCompiler(avx512er) +addSimdCompiler(avx512pf) +addSimdCompiler(avx512dq) +addSimdCompiler(avx512bw) +addSimdCompiler(avx512vl) +addSimdCompiler(avx512ifma) +addSimdCompiler(avx512vbmi) addSimdCompiler(neon) addSimdCompiler(mips_dsp) addSimdCompiler(mips_dspr2) + +# Follow the Intel compiler's lead and define profiles of AVX512 instructions +defineTest(addAvx512Profile) { + name = $$1 + dependencies = $$2 + upname = $$upper($$name) + varname = QMAKE_CFLAGS_$$upname + + cpu_features_missing = + cflags = $$QMAKE_CFLAGS_AVX512F + for(part, dependencies) { + !CONFIG($$part): return() # Profile isn't supported by the compiler + + uppart = $$upper($$part) + cflags *= $$eval(QMAKE_CFLAGS_$${uppart}) + !contains(QT_CPU_FEATURES, $$uppart): cpu_features_missing += $$uppart + } + + CONFIG += $$name + isEmpty(cpu_features_missing): QT_CPU_FEATURES += $$name + $$varname = $$cflags + + export(QT_CPU_FEATURES) + export(CONFIG) + export($$varname) + addSimdCompiler($$name) +} +addAvx512Profile(avx512common, avx512cd) +addAvx512Profile(avx512mic, avx512cd avx512er avx512pf) +addAvx512Profile(avx512core, avx512cd avx512bw avx512dq avx512vl) +addAvx512Profile(avx512ifmavl, avx512ifma avx512vl) +addAvx512Profile(avx512vbmivl, avx512vbmi avx512vl) diff --git a/mkspecs/linux-icc/qmake.conf b/mkspecs/linux-icc/qmake.conf index 57f6c45ac1..c71c223768 100644 --- a/mkspecs/linux-icc/qmake.conf +++ b/mkspecs/linux-icc/qmake.conf @@ -35,6 +35,13 @@ QMAKE_CFLAGS_SSE4_1 += -xSSE4.1 QMAKE_CFLAGS_SSE4_2 += -xSSE4.2 QMAKE_CFLAGS_AVX += -xAVX QMAKE_CFLAGS_AVX2 += -xCORE-AVX2 +QMAKE_CFLAGS_AVX512F += -xCOMMON-AVX512 +QMAKE_CFLAGS_AVX512CD += -xCOMMON-AVX512 +QMAKE_CFLAGS_AVX512ER += -xMIC-AVX512 +QMAKE_CFLAGS_AVX512PF += -xMIC-AVX512 +QMAKE_CFLAGS_AVX512DQ += -xCORE-AVX512 +QMAKE_CFLAGS_AVX512BW += -xCORE-AVX512 +QMAKE_CFLAGS_AVX512VL += -xCORE-AVX512 QMAKE_CXX = icpc QMAKE_CXXFLAGS = $$QMAKE_CFLAGS diff --git a/mkspecs/macx-icc/qmake.conf b/mkspecs/macx-icc/qmake.conf index efce568ea9..4d2dd736b1 100644 --- a/mkspecs/macx-icc/qmake.conf +++ b/mkspecs/macx-icc/qmake.conf @@ -33,6 +33,13 @@ QMAKE_CFLAGS_SSE4_1 += -msse4.1 QMAKE_CFLAGS_SSE4_2 += -msse4.2 QMAKE_CFLAGS_AVX += -mavx QMAKE_CFLAGS_AVX2 += -march=core-avx2 +QMAKE_CFLAGS_AVX512F += -xCOMMON-AVX512 +QMAKE_CFLAGS_AVX512CD += -xCOMMON-AVX512 +QMAKE_CFLAGS_AVX512ER += -xMIC-AVX512 +QMAKE_CFLAGS_AVX512PF += -xMIC-AVX512 +QMAKE_CFLAGS_AVX512DQ += -xCORE-AVX512 +QMAKE_CFLAGS_AVX512BW += -xCORE-AVX512 +QMAKE_CFLAGS_AVX512VL += -xCORE-AVX512 QMAKE_OBJECTIVE_CC = clang QMAKE_OBJECTIVE_CFLAGS = -pipe diff --git a/mkspecs/win32-icc/qmake.conf b/mkspecs/win32-icc/qmake.conf index 65b533b3dd..0e6a2cbba4 100644 --- a/mkspecs/win32-icc/qmake.conf +++ b/mkspecs/win32-icc/qmake.conf @@ -32,6 +32,13 @@ QMAKE_CFLAGS_SSE4_1 = -QxSSE4.1 QMAKE_CFLAGS_SSE4_2 = -QxSSE4.2 QMAKE_CFLAGS_AVX = -QxAVX QMAKE_CFLAGS_AVX2 = -QxCORE-AVX2 +QMAKE_CFLAGS_AVX512F += -QxCOMMON-AVX512 +QMAKE_CFLAGS_AVX512CD += -QxCOMMON-AVX512 +QMAKE_CFLAGS_AVX512ER += -QxMIC-AVX512 +QMAKE_CFLAGS_AVX512PF += -QxMIC-AVX512 +QMAKE_CFLAGS_AVX512DQ += -QxCORE-AVX512 +QMAKE_CFLAGS_AVX512BW += -QxCORE-AVX512 +QMAKE_CFLAGS_AVX512VL += -QxCORE-AVX512 QMAKE_CXX = $$QMAKE_CC QMAKE_CXXFLAGS = $$QMAKE_CFLAGS /Zc:forScope diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index be003f6c6d..c4deee36e4 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -73,6 +73,7 @@ * SSE4_2 | x86 | I & C | I & C | I only | * AVX | x86 | I & C | I & C | I & C | * AVX2 | x86 | I & C | I & C | I only | + * AVX512xx | x86 | I & C | I & C | I only | * I = intrinsics; C = code generation * * Code can use the following constructs to determine compiler support & status: diff --git a/tools/configure/configureapp.cpp b/tools/configure/configureapp.cpp index 972134fcf0..d98c905c64 100644 --- a/tools/configure/configureapp.cpp +++ b/tools/configure/configureapp.cpp @@ -160,6 +160,7 @@ Configure::Configure(int& argc, char** argv) dictionary[ "SSE4_2" ] = "auto"; dictionary[ "AVX" ] = "auto"; dictionary[ "AVX2" ] = "auto"; + dictionary[ "AVX512" ] = "auto"; dictionary[ "SYNCQT" ] = "auto"; dictionary[ "CE_CRT" ] = "no"; dictionary[ "CETEST" ] = "auto"; @@ -885,6 +886,10 @@ void Configure::parseCmdLine() dictionary[ "AVX2" ] = "no"; else if (configCmdLine.at(i) == "-avx2") dictionary[ "AVX2" ] = "yes"; + else if (configCmdLine.at(i) == "-no-avx512") + dictionary[ "AVX512" ] = ""; + else if (configCmdLine.at(i) == "-avx512") + dictionary[ "AVX512" ] = "auto"; else if (configCmdLine.at(i) == "-no-ssl") { dictionary[ "SSL"] = "no"; @@ -1699,6 +1704,7 @@ void Configure::applySpecSpecifics() dictionary[ "SSE4_2" ] = "no"; dictionary[ "AVX" ] = "no"; dictionary[ "AVX2" ] = "no"; + dictionary[ "AVX512" ] = "no"; dictionary[ "CE_CRT" ] = "yes"; dictionary[ "LARGE_FILE" ] = "no"; dictionary[ "ANGLE" ] = "no"; @@ -2014,6 +2020,8 @@ bool Configure::displayHelp() desc("AVX", "yes", "-avx", "Compile with use of AVX instructions."); desc("AVX2", "no", "-no-avx2", "Do not compile with use of AVX2 instructions."); desc("AVX2", "yes", "-avx2", "Compile with use of AVX2 instructions.\n"); + desc("AVX512", "no", "-no-avx512", "Do not compile with use of AVX512 instructions."); + desc("AVX512", "yes", "-avx512", "Compile with use of AVX512 instructions.\n"); desc("SSL", "no", "-no-ssl", "Do not compile support for SSL."); desc("SSL", "yes", "-ssl", "Enable run-time SSL support."); desc("OPENSSL", "no", "-no-openssl", "Do not compile support for OpenSSL."); @@ -2201,6 +2209,24 @@ bool Configure::checkAngleAvailability(QString *errorMessage /* = 0 */) const return true; } +QString Configure::checkAvx512Availability() +{ + static const char avx512features[][5] = { "cd", "er", "pf", "bw", "dq", "vl", "ifma", "vbmi" }; + + // try AVX512 Foundation. No Foundation, nothing else works. + if (!tryCompileProject("common/avx512", "AVX512=F")) + return QString(); + + QString available = "avx512f"; + for (int i = 0; i < sizeof(avx512features)/sizeof(avx512features[0]); ++i) { + if (tryCompileProject("common/avx512", QStringLiteral("AVX512=%0").arg(avx512features[i]).toUpper())) { + available += " avx512"; + available += avx512features[i]; + } + } + return available; +} + /*! Checks the system for the availability of a feature. Returns true if the feature is available, else false. @@ -2476,6 +2502,8 @@ void Configure::autoDetection() dictionary["AVX"] = checkAvailability("AVX") ? "yes" : "no"; if (dictionary["AVX2"] == "auto") dictionary["AVX2"] = checkAvailability("AVX2") ? "yes" : "no"; + if (dictionary["AVX512"] == "auto") + dictionary["AVX512"] = checkAvx512Availability(); if (dictionary["NEON"] == "auto") dictionary["NEON"] = checkAvailability("NEON") ? "yes" : "no"; if (dictionary["SSL"] == "auto") { @@ -3194,6 +3222,8 @@ void Configure::generateCachefile() moduleStream << " avx"; if (dictionary[ "AVX2" ] == "yes") moduleStream << " avx2"; + if (!dictionary[ "AVX512" ].isEmpty()) + moduleStream << ' ' << dictionary[ "AVX512" ]; if (dictionary[ "NEON" ] == "yes") moduleStream << " neon"; if (dictionary[ "LARGE_FILE" ] == "yes") @@ -3659,6 +3689,8 @@ void Configure::generateConfigfiles() tmpStream << "#define QT_COMPILER_SUPPORTS_AVX 1" << endl; if (dictionary[ "AVX2" ] == "yes") tmpStream << "#define QT_COMPILER_SUPPORTS_AVX2 1" << endl; + foreach (const QString &avx512feature, dictionary[ "AVX512" ].split(' ', QString::SkipEmptyParts)) + tmpStream << "#define QT_COMPILER_SUPPRTS_" << avx512feature.toUpper() << " 1" << endl; if (dictionary["QREAL"] != "double") { tmpStream << "#define QT_COORD_TYPE " << dictionary["QREAL"] << endl; @@ -3831,6 +3863,8 @@ void Configure::displayConfig() sout << "SSE4.2 support.............." << dictionary[ "SSE4_2" ] << endl; sout << "AVX support................." << dictionary[ "AVX" ] << endl; sout << "AVX2 support................" << dictionary[ "AVX2" ] << endl; + sout << "AVX512 support.............." + << (dictionary[ "AVX512" ].isEmpty() ? QString("") : dictionary[ "AVX512" ].toUpper()) << endl; sout << "NEON support................" << dictionary[ "NEON" ] << endl; sout << "OpenGL support.............." << dictionary[ "OPENGL" ] << endl; sout << "Large File support.........." << dictionary[ "LARGE_FILE" ] << endl; diff --git a/tools/configure/configureapp.h b/tools/configure/configureapp.h index de8d1a2469..c3685d9ff2 100644 --- a/tools/configure/configureapp.h +++ b/tools/configure/configureapp.h @@ -92,6 +92,7 @@ public: private: bool checkAngleAvailability(QString *errorMessage = 0) const; + QString checkAvx512Availability(); // Our variable dictionaries QMap dictionary; -- cgit v1.2.3