qfloat16: add a couple faster implementations of qSqrt

Instead of going through float. Change-Id: Ie1b556d9ebca4ccaadd2fffd170895088a5d2dec Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
author: Thiago Macieira <thiago.macieira@intel.com> 2022-08-05 15:58:46 -0700
committer: Thiago Macieira <thiago.macieira@intel.com> 2022-11-28 10:59:21 -0800
commit: beab4d30e24442fa7c0c3af5056b0e064b3acc95 (patch)
tree: f4903074283d84282d12873db28fb6f65ae2bf2f /src/corelib/global
parent: 99c7f0419e66692260be56c0385badeacb3f6760 (diff)
1 files changed, 23 insertions, 1 deletions
diff --git a/src/corelib/global/qfloat16.h b/src/corelib/global/qfloat16.h
index 10dcfe0f8d..02ad5f303f 100644
--- a/src/corelib/global/qfloat16.h
+++ b/src/corelib/global/qfloat16.h
@@ -243,13 +243,35 @@ Q_CORE_EXPORT void qFloatFromFloat16(float *, const qfloat16 *, qsizetype length
     // https://wg21.link/p1467 - disabled until tested
     using namespace std;
     return sqrt(f);
-#endif
+#elif QFLOAT16_IS_NATIVE && defined(__HAVE_FLOAT16) && __HAVE_FLOAT16
+    // This C library (glibc) has sqrtf16().
+    return sqrtf16(f);
+#else
+    bool mathUpdatesErrno = true;
+#  if defined(__NO_MATH_ERRNO__) || defined(_M_FP_FAST)
+    mathUpdatesErrno = false;
+#  elif defined(math_errhandling)
+    mathUpdatesErrno = (math_errhandling & MATH_ERRNO);
+#  endif
+
+    // We don't need to set errno to EDOM if (f >= 0 && f != -0 && !isnan(f))
+    // (or if we don't care about errno in the first place). We can merge the
+    // NaN check with by negating and inverting: !(0 > f), and leaving zero to
+    // sqrtf().
+    if (!mathUpdatesErrno || !(0 > f)) {
+#  if defined(__AVX512FP16__)
+        __m128h v = _mm_set_sh(f);
+        v = _mm_sqrt_sh(v, v);
+        return _mm_cvtsh_h(v);
+#  endif
+    }
 
     // WG14's N2601 does not provide a way to tell which types an
     // implementation supports, so we assume it doesn't and fall back to FP32
     float f32 = float(f);
     f32 = sqrtf(f32);
     return qfloat16::NearestFloat(f32);
+#endif
 }
 
 // The remainder of these utility functions complement qglobal.h
author	Thiago Macieira <thiago.macieira@intel.com>	2022-08-05 15:58:46 -0700
committer	Thiago Macieira <thiago.macieira@intel.com>	2022-11-28 10:59:21 -0800
commit	beab4d30e24442fa7c0c3af5056b0e064b3acc95 (patch)
tree	f4903074283d84282d12873db28fb6f65ae2bf2f /src/corelib/global
parent	99c7f0419e66692260be56c0385badeacb3f6760 (diff)