diff options
-rw-r--r-- | src/corelib/global/global.pri | 21 | ||||
-rw-r--r-- | src/corelib/global/qfloat16.cpp | 16 | ||||
-rw-r--r-- | src/corelib/global/qfloat16_f16c.c | 4 |
3 files changed, 37 insertions, 4 deletions
diff --git a/src/corelib/global/global.pri b/src/corelib/global/global.pri index f4f4a75536..20d010a041 100644 --- a/src/corelib/global/global.pri +++ b/src/corelib/global/global.pri @@ -39,7 +39,26 @@ SOURCES += \ global/qrandom.cpp \ global/qhooks.cpp -F16C_SOURCES += global/qfloat16_f16c.c +# Only add global/qfloat16_f16.c if qfloat16.cpp can't #include it. +# Any compiler: if it is already generating F16C code, let qfloat16.cpp do it +# Clang: ICE if not generating F16C code, so use qfloat16_f16c.c +# ICC: miscompiles if not generating F16C code, so use qfloat16_f16c.c +# GCC: if it can use F16C intrinsics, let qfloat16.cpp do it +# MSVC: if it is already generating AVX code, let qfloat16.cpp do it +# MSVC: otherwise, it generates poorly-performing code, so use qfloat16_f16c.c +contains(QT_CPU_FEATURES.$$QT_ARCH, f16c): \ + f16c_cxx = true +else: clang|intel_icl: \ + f16c_cxx = false +else: gcc:f16c:x86SimdAlways: \ + f16c_cxx = true +else: msvc:contains(QT_CPU_FEATURES.$$QT_ARCH, avx): \ + f16c_cxx = true +else: \ + f16c_cxx = false +$$f16c_cxx: DEFINES += QFLOAT16_INCLUDE_FAST +else: F16C_SOURCES += global/qfloat16_f16c.c +unset(f16c_cxx) VERSIONTAGGING_SOURCES = global/qversiontagging.cpp diff --git a/src/corelib/global/qfloat16.cpp b/src/corelib/global/qfloat16.cpp index 4c9b831469..129ab4ded2 100644 --- a/src/corelib/global/qfloat16.cpp +++ b/src/corelib/global/qfloat16.cpp @@ -123,8 +123,16 @@ static inline bool hasFastF16() } extern "C" { -extern void qFloatToFloat16_fast(quint16 *out, const float *in, qssize_t len) Q_DECL_NOTHROW; -extern void qFloatFromFloat16_fast(float *out, const quint16 *in, qssize_t len) Q_DECL_NOTHROW; +#ifdef QFLOAT16_INCLUDE_FAST +# define f16cextern static +#else +# define f16cextern extern +#endif + +f16cextern void qFloatToFloat16_fast(quint16 *out, const float *in, qssize_t len) Q_DECL_NOTHROW; +f16cextern void qFloatFromFloat16_fast(float *out, const quint16 *in, qssize_t len) Q_DECL_NOTHROW; + +#undef f16cextern } #elif defined(__ARM_FP16_FORMAT_IEEE) && defined(__ARM_NEON__) @@ -199,3 +207,7 @@ Q_CORE_EXPORT void qFloatFromFloat16(float *out, const qfloat16 *in, qssize_t le } QT_END_NAMESPACE + +#ifdef QFLOAT16_INCLUDE_FAST +# include "qfloat16_f16c.c" +#endif diff --git a/src/corelib/global/qfloat16_f16c.c b/src/corelib/global/qfloat16_f16c.c index ffd35f8ebc..c88dbb6944 100644 --- a/src/corelib/global/qfloat16_f16c.c +++ b/src/corelib/global/qfloat16_f16c.c @@ -44,7 +44,7 @@ // have been compiled if the support was missing in the first place, and not // all compilers define it. Technically, we didn't need to check for __AVX__ // either. -#if !defined(__AVX__) +#if !QT_COMPILER_SUPPORTS_HERE(AVX) # error "AVX support required" #endif @@ -53,6 +53,7 @@ QT_BEGIN_NAMESPACE extern "C" { #endif +QT_FUNCTION_TARGET(F16C) void qFloatToFloat16_fast(quint16 *out, const float *in, qssize_t len) Q_DECL_NOTHROW { qssize_t i = 0; @@ -67,6 +68,7 @@ void qFloatToFloat16_fast(quint16 *out, const float *in, qssize_t len) Q_DECL_NO out[i] = _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(in[i]), 0), 0); } +QT_FUNCTION_TARGET(F16C) void qFloatFromFloat16_fast(float *out, const quint16 *in, qssize_t len) Q_DECL_NOTHROW { qssize_t i = 0; |