summaryrefslogtreecommitdiffstats
path: root/src/corelib/global/qfloat16.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/global/qfloat16.cpp')
-rw-r--r--src/corelib/global/qfloat16.cpp123
1 files changed, 104 insertions, 19 deletions
diff --git a/src/corelib/global/qfloat16.cpp b/src/corelib/global/qfloat16.cpp
index 1de1ae65fb..fd608efe55 100644
--- a/src/corelib/global/qfloat16.cpp
+++ b/src/corelib/global/qfloat16.cpp
@@ -38,10 +38,15 @@
****************************************************************************/
#include "qfloat16_p.h"
+#include "private/qsimd_p.h"
QT_BEGIN_NAMESPACE
-/*! \headerfile <QFloat16>
+/*!
+ \headerfile <QFloat16>
+ \title 16-bit Floating Point Support
+ \ingroup funclists
+ \brief The <QFloat16> header file provides 16-bit floating point support.
This header file provides support for half-precision (16-bit) floating
point data with the class \c qfloat16. It is fully compliant with IEEE
@@ -59,24 +64,6 @@ QT_BEGIN_NAMESPACE
\since 5.9
*/
-Q_STATIC_ASSERT_X(sizeof(float) == sizeof(quint32),
- "qfloat16 assumes that floats are 32 bits wide");
-
-// There are a few corner cases regarding denormals where GHS compiler is relying
-// hardware behavior that is not IEC 559 compliant. Therefore the compiler
-// reports std::numeric_limits<float>::is_iec559 as false. This is all right
-// according to our needs.
-
-#if !defined(Q_CC_GHS)
-Q_STATIC_ASSERT_X(std::numeric_limits<float>::is_iec559,
- "Only works with IEEE 754 floating point");
-#endif
-
-Q_STATIC_ASSERT_X(std::numeric_limits<float>::has_infinity &&
- std::numeric_limits<float>::has_quiet_NaN &&
- std::numeric_limits<float>::has_signaling_NaN,
- "Only works with IEEE 754 floating point");
-
/*!
Returns true if the \c qfloat16 \a {f} is equivalent to infinity.
\relates <QFloat16>
@@ -127,4 +114,102 @@ Q_REQUIRED_RESULT bool qIsFinite(qfloat16 f) Q_DECL_NOTHROW { return qt_is_finit
exactness is stronger the smaller the numbers are.
*/
+#if QT_COMPILER_SUPPORTS(F16C)
+static inline bool hasFastF16()
+{
+ // All processors with F16C also support AVX, but YMM registers
+ // might not be supported by the OS, or they might be disabled.
+ return qCpuHasFeature(F16C) && qCpuHasFeature(AVX);
+}
+
+extern "C" {
+#ifdef QFLOAT16_INCLUDE_FAST
+# define f16cextern static
+#else
+# define f16cextern extern
+#endif
+
+f16cextern void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) Q_DECL_NOTHROW;
+f16cextern void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) Q_DECL_NOTHROW;
+
+#undef f16cextern
+}
+
+#elif defined(__ARM_FP16_FORMAT_IEEE) && defined(__ARM_NEON__)
+static inline bool hasFastF16()
+{
+ return true;
+}
+
+static void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) Q_DECL_NOTHROW
+{
+ __fp16 *out_f16 = reinterpret_cast<__fp16 *>(out);
+ qsizetype i = 0;
+ for (; i < len - 3; i += 4)
+ vst1_f16(out_f16 + i, vcvt_f16_f32(vld1q_f32(in + i)));
+ SIMD_EPILOGUE(i, len, 3)
+ out_f16[i] = __fp16(in[i]);
+}
+
+static void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) Q_DECL_NOTHROW
+{
+ const __fp16 *in_f16 = reinterpret_cast<const __fp16 *>(in);
+ qsizetype i = 0;
+ for (; i < len - 3; i += 4)
+ vst1q_f32(out + i, vcvt_f32_f16(vld1_f16(in_f16 + i)));
+ SIMD_EPILOGUE(i, len, 3)
+ out[i] = float(in_f16[i]);
+}
+#else
+static inline bool hasFastF16()
+{
+ return false;
+}
+
+static void qFloatToFloat16_fast(quint16 *, const float *, qsizetype) Q_DECL_NOTHROW
+{
+ Q_UNREACHABLE();
+}
+
+static void qFloatFromFloat16_fast(float *, const quint16 *, qsizetype) Q_DECL_NOTHROW
+{
+ Q_UNREACHABLE();
+}
+#endif
+/*!
+ \since 5.11
+ \relates <QFloat16>
+
+ Converts \a len floats from \a in to qfloat16 and stores them in \a out.
+ Both \a in and \a out must have \a len allocated entries.
+*/
+Q_CORE_EXPORT void qFloatToFloat16(qfloat16 *out, const float *in, qsizetype len) Q_DECL_NOTHROW
+{
+ if (hasFastF16())
+ return qFloatToFloat16_fast(reinterpret_cast<quint16 *>(out), in, len);
+
+ for (qsizetype i = 0; i < len; ++i)
+ out[i] = qfloat16(in[i]);
+}
+
+/*!
+ \since 5.11
+ \relates <QFloat16>
+
+ Converts \a len qfloat16 from \a in to floats and stores them in \a out.
+ Both \a in and \a out must have \a len allocated entries.
+*/
+Q_CORE_EXPORT void qFloatFromFloat16(float *out, const qfloat16 *in, qsizetype len) Q_DECL_NOTHROW
+{
+ if (hasFastF16())
+ return qFloatFromFloat16_fast(out, reinterpret_cast<const quint16 *>(in), len);
+
+ for (qsizetype i = 0; i < len; ++i)
+ out[i] = float(in[i]);
+}
+
QT_END_NAMESPACE
+
+#ifdef QFLOAT16_INCLUDE_FAST
+# include "qfloat16_f16c.c"
+#endif