From 280e321e52fd4e86545f3f0d4bd4e047786a897e Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Mon, 3 Apr 2017 16:05:18 -0700 Subject: Fix detection of F16C for the Intel compiler (again) and Visual Studio Neither the Intel compiler nor Visual C++ have a dedicated switch to enable F16C support, like GCC and Clang do. So we used the AVX switch for that in commit 8241d51f7049d48912ce25fbd49ef4d22c58e340, as it was the closest, lowest denominator. That was incorrect and insufficient. The Intel compiler silently miscompiles the intrinsics with -xAVX, making calls to out-of-line functions like _mm_cvtps_ph, which don't exist. So we actually have to use AVX2 support to generate correct code. That might be a problem later, since Ivy Bridge supports F16C but not AVX2. Visual C++ is able to generate F16C code with just -arch:AVX. Either way, since there's no dedicated command-line switch, there's also no dedicated preprocessor macro. We're using __AVX2__ for both compilers, as that's a sufficient condition to indicate a processor that supports F16C. Change-Id: I27b55fdf514247549455fffd14b205b8d8b86da7 Reviewed-by: Allan Sandfeld Jensen --- mkspecs/common/msvc-version.conf | 2 ++ mkspecs/linux-icc/qmake.conf | 2 +- mkspecs/macx-icc/qmake.conf | 2 +- mkspecs/win32-icc/qmake.conf | 2 +- src/corelib/global/qfloat16.h | 4 ++-- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/mkspecs/common/msvc-version.conf b/mkspecs/common/msvc-version.conf index a69ea98a2d..945767ce8c 100644 --- a/mkspecs/common/msvc-version.conf +++ b/mkspecs/common/msvc-version.conf @@ -49,6 +49,8 @@ greaterThan(QMAKE_MSC_VER, 1799) { QMAKE_CFLAGS += -FS QMAKE_CXXFLAGS += -FS + QMAKE_CFLAGS_F16C = -arch:AVX + equals(QMAKE_MSC_VER, 1800) { QMAKE_CFLAGS_RELEASE += -Zc:strictStrings QMAKE_CFLAGS_RELEASE_WITH_DEBUGINFO += -Zc:strictStrings diff --git a/mkspecs/linux-icc/qmake.conf b/mkspecs/linux-icc/qmake.conf index a90536470b..582420ffee 100644 --- a/mkspecs/linux-icc/qmake.conf +++ b/mkspecs/linux-icc/qmake.conf @@ -42,7 +42,7 @@ QMAKE_CFLAGS_AVX512PF += -xMIC-AVX512 QMAKE_CFLAGS_AVX512DQ += -xCORE-AVX512 QMAKE_CFLAGS_AVX512BW += -xCORE-AVX512 QMAKE_CFLAGS_AVX512VL += -xCORE-AVX512 -QMAKE_CFLAGS_F16C += -xAVX +QMAKE_CFLAGS_F16C += $$QMAKE_CFLAGS_AVX2 QMAKE_CXX = icpc QMAKE_CXXFLAGS = $$QMAKE_CFLAGS diff --git a/mkspecs/macx-icc/qmake.conf b/mkspecs/macx-icc/qmake.conf index bbf9b4ba7a..6e43430342 100644 --- a/mkspecs/macx-icc/qmake.conf +++ b/mkspecs/macx-icc/qmake.conf @@ -40,7 +40,7 @@ QMAKE_CFLAGS_AVX512PF += -xMIC-AVX512 QMAKE_CFLAGS_AVX512DQ += -xCORE-AVX512 QMAKE_CFLAGS_AVX512BW += -xCORE-AVX512 QMAKE_CFLAGS_AVX512VL += -xCORE-AVX512 -QMAKE_CFLAGS_F16C += -xAVX +QMAKE_CFLAGS_F16C += $$QMAKE_CFLAGS_AVX2 QMAKE_CXX = icpc QMAKE_CXXFLAGS = $$QMAKE_CFLAGS diff --git a/mkspecs/win32-icc/qmake.conf b/mkspecs/win32-icc/qmake.conf index d73f7a03ef..ab0be95543 100644 --- a/mkspecs/win32-icc/qmake.conf +++ b/mkspecs/win32-icc/qmake.conf @@ -33,7 +33,7 @@ QMAKE_CFLAGS_AVX512PF += -QxMIC-AVX512 QMAKE_CFLAGS_AVX512DQ += -QxCORE-AVX512 QMAKE_CFLAGS_AVX512BW += -QxCORE-AVX512 QMAKE_CFLAGS_AVX512VL += -QxCORE-AVX512 -QMAKE_CFLAGS_F16C = -QxAVX +QMAKE_CFLAGS_F16C = $$QMAKE_CFLAGS_AVX2 QMAKE_CXX = $$QMAKE_CC QMAKE_CXXFLAGS = $$QMAKE_CFLAGS /Zc:forScope diff --git a/src/corelib/global/qfloat16.h b/src/corelib/global/qfloat16.h index 654183acd6..05b88e0e92 100644 --- a/src/corelib/global/qfloat16.h +++ b/src/corelib/global/qfloat16.h @@ -117,7 +117,7 @@ QT_WARNING_PUSH QT_WARNING_DISABLE_CLANG("-Wc99-extensions") inline qfloat16::qfloat16(float f) Q_DECL_NOTHROW { -#if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__) +#if defined(QT_COMPILER_SUPPORTS_F16C) && (defined(__F16C__) || defined(__AVX2__)) __m128 packsingle = _mm_set_ss(f); __m128i packhalf = _mm_cvtps_ph(packsingle, 0); b16 = _mm_extract_epi16(packhalf, 0); @@ -135,7 +135,7 @@ QT_WARNING_POP inline qfloat16::operator float() const Q_DECL_NOTHROW { -#if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__) +#if defined(QT_COMPILER_SUPPORTS_F16C) && (defined(__F16C__) || defined(__AVX2__)) __m128i packhalf = _mm_cvtsi32_si128(b16); __m128 packsingle = _mm_cvtph_ps(packhalf); return _mm_cvtss_f32(packsingle); -- cgit v1.2.3