summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/corelib/global/global.pri21
-rw-r--r--src/corelib/global/qfloat16.cpp16
-rw-r--r--src/corelib/global/qfloat16_f16c.c4
3 files changed, 37 insertions, 4 deletions
diff --git a/src/corelib/global/global.pri b/src/corelib/global/global.pri
index f4f4a75536..20d010a041 100644
--- a/src/corelib/global/global.pri
+++ b/src/corelib/global/global.pri
@@ -39,7 +39,26 @@ SOURCES += \
global/qrandom.cpp \
global/qhooks.cpp
-F16C_SOURCES += global/qfloat16_f16c.c
+# Only add global/qfloat16_f16.c if qfloat16.cpp can't #include it.
+# Any compiler: if it is already generating F16C code, let qfloat16.cpp do it
+# Clang: ICE if not generating F16C code, so use qfloat16_f16c.c
+# ICC: miscompiles if not generating F16C code, so use qfloat16_f16c.c
+# GCC: if it can use F16C intrinsics, let qfloat16.cpp do it
+# MSVC: if it is already generating AVX code, let qfloat16.cpp do it
+# MSVC: otherwise, it generates poorly-performing code, so use qfloat16_f16c.c
+contains(QT_CPU_FEATURES.$$QT_ARCH, f16c): \
+ f16c_cxx = true
+else: clang|intel_icl: \
+ f16c_cxx = false
+else: gcc:f16c:x86SimdAlways: \
+ f16c_cxx = true
+else: msvc:contains(QT_CPU_FEATURES.$$QT_ARCH, avx): \
+ f16c_cxx = true
+else: \
+ f16c_cxx = false
+$$f16c_cxx: DEFINES += QFLOAT16_INCLUDE_FAST
+else: F16C_SOURCES += global/qfloat16_f16c.c
+unset(f16c_cxx)
VERSIONTAGGING_SOURCES = global/qversiontagging.cpp
diff --git a/src/corelib/global/qfloat16.cpp b/src/corelib/global/qfloat16.cpp
index 4c9b831469..129ab4ded2 100644
--- a/src/corelib/global/qfloat16.cpp
+++ b/src/corelib/global/qfloat16.cpp
@@ -123,8 +123,16 @@ static inline bool hasFastF16()
}
extern "C" {
-extern void qFloatToFloat16_fast(quint16 *out, const float *in, qssize_t len) Q_DECL_NOTHROW;
-extern void qFloatFromFloat16_fast(float *out, const quint16 *in, qssize_t len) Q_DECL_NOTHROW;
+#ifdef QFLOAT16_INCLUDE_FAST
+# define f16cextern static
+#else
+# define f16cextern extern
+#endif
+
+f16cextern void qFloatToFloat16_fast(quint16 *out, const float *in, qssize_t len) Q_DECL_NOTHROW;
+f16cextern void qFloatFromFloat16_fast(float *out, const quint16 *in, qssize_t len) Q_DECL_NOTHROW;
+
+#undef f16cextern
}
#elif defined(__ARM_FP16_FORMAT_IEEE) && defined(__ARM_NEON__)
@@ -199,3 +207,7 @@ Q_CORE_EXPORT void qFloatFromFloat16(float *out, const qfloat16 *in, qssize_t le
}
QT_END_NAMESPACE
+
+#ifdef QFLOAT16_INCLUDE_FAST
+# include "qfloat16_f16c.c"
+#endif
diff --git a/src/corelib/global/qfloat16_f16c.c b/src/corelib/global/qfloat16_f16c.c
index ffd35f8ebc..c88dbb6944 100644
--- a/src/corelib/global/qfloat16_f16c.c
+++ b/src/corelib/global/qfloat16_f16c.c
@@ -44,7 +44,7 @@
// have been compiled if the support was missing in the first place, and not
// all compilers define it. Technically, we didn't need to check for __AVX__
// either.
-#if !defined(__AVX__)
+#if !QT_COMPILER_SUPPORTS_HERE(AVX)
# error "AVX support required"
#endif
@@ -53,6 +53,7 @@ QT_BEGIN_NAMESPACE
extern "C" {
#endif
+QT_FUNCTION_TARGET(F16C)
void qFloatToFloat16_fast(quint16 *out, const float *in, qssize_t len) Q_DECL_NOTHROW
{
qssize_t i = 0;
@@ -67,6 +68,7 @@ void qFloatToFloat16_fast(quint16 *out, const float *in, qssize_t len) Q_DECL_NO
out[i] = _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(in[i]), 0), 0);
}
+QT_FUNCTION_TARGET(F16C)
void qFloatFromFloat16_fast(float *out, const quint16 *in, qssize_t len) Q_DECL_NOTHROW
{
qssize_t i = 0;