diff options
Diffstat (limited to 'src/corelib/global/qfloat16.cpp')
-rw-r--r-- | src/corelib/global/qfloat16.cpp | 123 |
1 files changed, 104 insertions, 19 deletions
diff --git a/src/corelib/global/qfloat16.cpp b/src/corelib/global/qfloat16.cpp index 1de1ae65fb..fd608efe55 100644 --- a/src/corelib/global/qfloat16.cpp +++ b/src/corelib/global/qfloat16.cpp @@ -38,10 +38,15 @@ ****************************************************************************/ #include "qfloat16_p.h" +#include "private/qsimd_p.h" QT_BEGIN_NAMESPACE -/*! \headerfile <QFloat16> +/*! + \headerfile <QFloat16> + \title 16-bit Floating Point Support + \ingroup funclists + \brief The <QFloat16> header file provides 16-bit floating point support. This header file provides support for half-precision (16-bit) floating point data with the class \c qfloat16. It is fully compliant with IEEE @@ -59,24 +64,6 @@ QT_BEGIN_NAMESPACE \since 5.9 */ -Q_STATIC_ASSERT_X(sizeof(float) == sizeof(quint32), - "qfloat16 assumes that floats are 32 bits wide"); - -// There are a few corner cases regarding denormals where GHS compiler is relying -// hardware behavior that is not IEC 559 compliant. Therefore the compiler -// reports std::numeric_limits<float>::is_iec559 as false. This is all right -// according to our needs. - -#if !defined(Q_CC_GHS) -Q_STATIC_ASSERT_X(std::numeric_limits<float>::is_iec559, - "Only works with IEEE 754 floating point"); -#endif - -Q_STATIC_ASSERT_X(std::numeric_limits<float>::has_infinity && - std::numeric_limits<float>::has_quiet_NaN && - std::numeric_limits<float>::has_signaling_NaN, - "Only works with IEEE 754 floating point"); - /*! Returns true if the \c qfloat16 \a {f} is equivalent to infinity. \relates <QFloat16> @@ -127,4 +114,102 @@ Q_REQUIRED_RESULT bool qIsFinite(qfloat16 f) Q_DECL_NOTHROW { return qt_is_finit exactness is stronger the smaller the numbers are. */ +#if QT_COMPILER_SUPPORTS(F16C) +static inline bool hasFastF16() +{ + // All processors with F16C also support AVX, but YMM registers + // might not be supported by the OS, or they might be disabled. + return qCpuHasFeature(F16C) && qCpuHasFeature(AVX); +} + +extern "C" { +#ifdef QFLOAT16_INCLUDE_FAST +# define f16cextern static +#else +# define f16cextern extern +#endif + +f16cextern void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) Q_DECL_NOTHROW; +f16cextern void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) Q_DECL_NOTHROW; + +#undef f16cextern +} + +#elif defined(__ARM_FP16_FORMAT_IEEE) && defined(__ARM_NEON__) +static inline bool hasFastF16() +{ + return true; +} + +static void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) Q_DECL_NOTHROW +{ + __fp16 *out_f16 = reinterpret_cast<__fp16 *>(out); + qsizetype i = 0; + for (; i < len - 3; i += 4) + vst1_f16(out_f16 + i, vcvt_f16_f32(vld1q_f32(in + i))); + SIMD_EPILOGUE(i, len, 3) + out_f16[i] = __fp16(in[i]); +} + +static void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) Q_DECL_NOTHROW +{ + const __fp16 *in_f16 = reinterpret_cast<const __fp16 *>(in); + qsizetype i = 0; + for (; i < len - 3; i += 4) + vst1q_f32(out + i, vcvt_f32_f16(vld1_f16(in_f16 + i))); + SIMD_EPILOGUE(i, len, 3) + out[i] = float(in_f16[i]); +} +#else +static inline bool hasFastF16() +{ + return false; +} + +static void qFloatToFloat16_fast(quint16 *, const float *, qsizetype) Q_DECL_NOTHROW +{ + Q_UNREACHABLE(); +} + +static void qFloatFromFloat16_fast(float *, const quint16 *, qsizetype) Q_DECL_NOTHROW +{ + Q_UNREACHABLE(); +} +#endif +/*! + \since 5.11 + \relates <QFloat16> + + Converts \a len floats from \a in to qfloat16 and stores them in \a out. + Both \a in and \a out must have \a len allocated entries. +*/ +Q_CORE_EXPORT void qFloatToFloat16(qfloat16 *out, const float *in, qsizetype len) Q_DECL_NOTHROW +{ + if (hasFastF16()) + return qFloatToFloat16_fast(reinterpret_cast<quint16 *>(out), in, len); + + for (qsizetype i = 0; i < len; ++i) + out[i] = qfloat16(in[i]); +} + +/*! + \since 5.11 + \relates <QFloat16> + + Converts \a len qfloat16 from \a in to floats and stores them in \a out. + Both \a in and \a out must have \a len allocated entries. +*/ +Q_CORE_EXPORT void qFloatFromFloat16(float *out, const qfloat16 *in, qsizetype len) Q_DECL_NOTHROW +{ + if (hasFastF16()) + return qFloatFromFloat16_fast(out, reinterpret_cast<const quint16 *>(in), len); + + for (qsizetype i = 0; i < len; ++i) + out[i] = float(in[i]); +} + QT_END_NAMESPACE + +#ifdef QFLOAT16_INCLUDE_FAST +# include "qfloat16_f16c.c" +#endif |