f16c: Use the packed intrinsics instead of scalar ones

MSVC, Apple's Clang and Clang prior to 3.9 do not recognize _cvtss_sh and _cvtsh_ss. So expand the operation to use directly the packed intrinsics. Change-Id: I27b55fdf514247549455fffd14b2046fd638593d Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
author: Thiago Macieira <thiago.macieira@intel.com> 2017-04-03 15:41:44 -0700
committer: Thiago Macieira <thiago.macieira@intel.com> 2017-04-06 04:15:04 +0000
commit: c817b33b45f2886c89d8a768c61ffffaa46f67a4 (patch)
tree: 98e39e893502c4c03b54ba8d00276aed701910e1
parent: 264d814773a15806df497e872e4b19c613c94725 (diff)
2 files changed, 10 insertions, 9 deletions
diff --git a/config.tests/common/f16c/f16c.cpp b/config.tests/common/f16c/f16c.cpp
index fc73e4fc55..aeeb35eac3 100644
--- a/config.tests/common/f16c/f16c.cpp
+++ b/config.tests/common/f16c/f16c.cpp
@@ -41,14 +41,11 @@
 
 int main(int, char**)
 {
-    float f = 1.f;
-    unsigned short s = _cvtss_sh(f, 0);
-    float g = _cvtsh_ss(s);
-    bool result = f == g;
-    (void)result;
     __m128i a = _mm_setzero_si128();
-    __m256 b = _mm256_cvtph_ps(a);
-    __m128i c = _mm256_cvtps_ph(b, 0);
+    __m128 b = _mm_cvtph_ps(a);
+    __m256 b256 = _mm256_cvtph_ps(a);
+    __m128i c = _mm_cvtps_ph(b, 0);
+    c = _mm256_cvtps_ph(b256, 0);
     (void)c;
     return 0;
 }
diff --git a/src/corelib/global/qfloat16.h b/src/corelib/global/qfloat16.h
index 0fa83db93f..654183acd6 100644
--- a/src/corelib/global/qfloat16.h
+++ b/src/corelib/global/qfloat16.h
@@ -118,7 +118,9 @@ QT_WARNING_DISABLE_CLANG("-Wc99-extensions")
 inline qfloat16::qfloat16(float f) Q_DECL_NOTHROW
 {
 #if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__)
-    b16 = _cvtss_sh(f, 0);
+    __m128 packsingle = _mm_set_ss(f);
+    __m128i packhalf = _mm_cvtps_ph(packsingle, 0);
+    b16 = _mm_extract_epi16(packhalf, 0);
 #elif defined (__ARM_FP16_FORMAT_IEEE)
     __fp16 f16 = f;
     memcpy(&b16, &f16, sizeof(quint16));
@@ -134,7 +136,9 @@ QT_WARNING_POP
 inline qfloat16::operator float() const Q_DECL_NOTHROW
 {
 #if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__)
-    return _cvtsh_ss(b16);
+    __m128i packhalf = _mm_cvtsi32_si128(b16);
+    __m128 packsingle = _mm_cvtph_ps(packhalf);
+    return _mm_cvtss_f32(packsingle);
 #elif defined (__ARM_FP16_FORMAT_IEEE)
     __fp16 f16;
     memcpy(&f16, &b16, sizeof(quint16));
author	Thiago Macieira <thiago.macieira@intel.com>	2017-04-03 15:41:44 -0700
committer	Thiago Macieira <thiago.macieira@intel.com>	2017-04-06 04:15:04 +0000
commit	c817b33b45f2886c89d8a768c61ffffaa46f67a4 (patch)
tree	98e39e893502c4c03b54ba8d00276aed701910e1
parent	264d814773a15806df497e872e4b19c613c94725 (diff)