diff options
Diffstat (limited to 'src/corelib/global/qnumeric_p.h')
-rw-r--r-- | src/corelib/global/qnumeric_p.h | 166 |
1 files changed, 159 insertions, 7 deletions
diff --git a/src/corelib/global/qnumeric_p.h b/src/corelib/global/qnumeric_p.h index 7c65b67b7a..d40e6b964b 100644 --- a/src/corelib/global/qnumeric_p.h +++ b/src/corelib/global/qnumeric_p.h @@ -23,6 +23,10 @@ #include <limits> #include <type_traits> +#ifndef __has_extension +# define __has_extension(X) 0 +#endif + #if !defined(Q_CC_MSVC) && defined(Q_OS_QNX) # include <math.h> # ifdef isnan @@ -51,6 +55,8 @@ QT_END_NAMESPACE QT_BEGIN_NAMESPACE +class qfloat16; + namespace qnumeric_std_wrapper { #if defined(QT_MATH_H_DEFINES_MACROS) # undef QT_MATH_H_DEFINES_MACROS @@ -138,22 +144,23 @@ Q_DECL_CONST_FUNCTION static inline int qt_fpclassify(float f) return qnumeric_std_wrapper::fpclassify(f); } -#ifndef Q_CLANG_QDOC +#ifndef Q_QDOC namespace { /*! Returns true if the double \a v can be converted to type \c T, false if it's out of range. If the conversion is successful, the converted value is stored in \a value; if it was not successful, \a value will contain the minimum or maximum of T, depending on the sign of \a d. If \c T is - unsigned, then \a value contains the absolute value of \a v. + unsigned, then \a value contains the absolute value of \a v. If \c T is \c + float, an underflow is also signalled by returning false and setting \a + value to zero. This function works for v containing infinities, but not NaN. It's the caller's responsibility to exclude that possibility before calling it. */ -template<typename T> -static inline bool convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true) +template <typename T> static inline std::enable_if_t<std::is_integral_v<T>, bool> +convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true) { - static_assert(std::numeric_limits<T>::is_integer); static_assert(std::is_integral_v<T>); constexpr bool TypeIsLarger = std::numeric_limits<T>::digits > std::numeric_limits<double>::digits; @@ -180,7 +187,7 @@ static inline bool convertDoubleTo(double v, T *value, bool allow_precision_upgr // correct, but Clang, ICC and MSVC don't realize that it's a constant and // the math call stays in the compiled code. -#ifdef Q_PROCESSOR_X86_64 +#if defined(Q_PROCESSOR_X86_64) && defined(__SSE2__) // Of course, UB doesn't apply if we use intrinsics, in which case we are // allowed to dpeend on exactly the processor's behavior. This // implementation uses the truncating conversions from Scalar Double to @@ -278,6 +285,116 @@ QT_WARNING_DISABLE_FLOAT_COMPARE QT_WARNING_POP } +template <typename T> static +std::enable_if_t<std::is_floating_point_v<T> || std::is_same_v<T, qfloat16>, bool> +convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true) +{ + Q_UNUSED(allow_precision_upgrade); + constexpr T Huge = std::numeric_limits<T>::infinity(); + + if constexpr (std::numeric_limits<double>::max_exponent <= + std::numeric_limits<T>::max_exponent) { + // no UB can happen + *value = T(v); + return true; + } + +#if defined(__SSE2__) && (defined(Q_CC_GNU) || __has_extension(gnu_asm)) + // The x86 CVTSD2SH instruction from SSE2 does what we want: + // - converts out-of-range doubles to ±infinity and sets #O + // - converts underflows to zero and sets #U + // We need to clear any previously-stored exceptions from it before the + // operation (3-cycle cost) and obtain the new state afterwards (1 cycle). + + unsigned csr = _MM_MASK_MASK; // clear stored exception indicators + auto sse_check_result = [&](auto result) { + if ((csr & (_MM_EXCEPT_UNDERFLOW | _MM_EXCEPT_OVERFLOW)) == 0) + return true; + if (csr & _MM_EXCEPT_OVERFLOW) + return false; + + // According to IEEE 754[1], #U is also set when the result is tiny and + // inexact, but still non-zero, so detect that (this won't generate + // good code for types without hardware support). + // [1] https://en.wikipedia.org/wiki/Floating-point_arithmetic#Exception_handling + return result != 0; + }; + + // Written directly in assembly because both Clang and GCC have been + // observed to reorder the STMXCSR instruction above the conversion + // operation. MSVC generates horrid code when using the intrinsics anyway, + // so it's not a loss. + // See https://github.com/llvm/llvm-project/issues/83661. + if constexpr (std::is_same_v<T, float>) { +# ifdef __AVX__ + asm ("vldmxcsr %[csr]\n\t" + "vcvtsd2ss %[in], %[in], %[out]\n\t" + "vstmxcsr %[csr]" + : [csr] "+m" (csr), [out] "=v" (*value) : [in] "v" (v)); +# else + asm ("ldmxcsr %[csr]\n\t" + "cvtsd2ss %[in], %[out]\n\t" + "stmxcsr %[csr]" + : [csr] "+m" (csr), [out] "=v" (*value) : [in] "v" (v)); +# endif + return sse_check_result(*value); + } + +# if defined(__F16C__) || defined(__AVX512FP16__) + if constexpr (sizeof(T) == 2 && std::numeric_limits<T>::max_exponent == 16) { + // qfloat16 or std::float16_t, but not std::bfloat16_t or std::bfloat8_t + auto doConvert = [&](auto *out) { + asm ("vldmxcsr %[csr]\n\t" +# ifdef __AVX512FP16__ + // AVX512FP16 & AVX10 have an instruction for this + "vcvtsd2sh %[in], %[in], %[out]\n\t" +# else + "vcvtsd2ss %[in], %[in], %[out]\n\t" // sets DEST[MAXVL-1:128] := 0 + "vcvtps2ph %[rc], %[out], %[out]\n\t" +# endif + "vstmxcsr %[csr]" + : [csr] "+m" (csr), [out] "=v" (*out) + : [in] "v" (v), [rc] "i" (_MM_FROUND_CUR_DIRECTION) + ); + return sse_check_result(out); + }; + + if constexpr (std::is_same_v<T, qfloat16> && !std::is_void_v<typename T::NativeType>) { + typename T::NativeType tmp; + bool b = doConvert(&tmp); + *value = tmp; + return b; + } else { +# ifndef Q_CC_CLANG + // Clang can only implement this if it has a native FP16 type + return doConvert(value); +# endif + } + } +# endif +#endif // __SSE2__ && inline assembly + + if (!qt_is_finite(v) && std::numeric_limits<T>::has_infinity) { + // infinity (or NaN) + *value = T(v); + return true; + } + + // Check for in-range value to ensure the conversion is not UB (see the + // comment above for Standard language). + if (std::fabs(v) > (std::numeric_limits<T>::max)()) { + *value = v < 0 ? -Huge : Huge; + return false; + } + + *value = T(v); + if (v != 0 && *value == 0) { + // Underflow through loss of precision + return false; + } + return true; +} + template <typename T> inline bool add_overflow(T v1, T v2, T *r) { return qAddOverflow(v1, v2, r); } template <typename T> inline bool sub_overflow(T v1, T v2, T *r) { return qSubOverflow(v1, v2, r); } template <typename T> inline bool mul_overflow(T v1, T v2, T *r) { return qMulOverflow(v1, v2, r); } @@ -312,7 +429,42 @@ template <auto V2, typename T> bool mul_overflow(T v1, T *r) return qMulOverflow<V2, T>(v1, r); } } -#endif // Q_CLANG_QDOC +#endif // Q_QDOC + +/* + Safely narrows \a x to \c{To}. Let \c L be + \c{std::numeric_limit<To>::min()} and \c H be \c{std::numeric_limit<To>::max()}. + + If \a x is less than L, returns L. If \a x is greater than H, + returns H. Otherwise, returns \c{To(x)}. +*/ +template <typename To, typename From> +static constexpr auto qt_saturate(From x) +{ + static_assert(std::is_integral_v<To>); + static_assert(std::is_integral_v<From>); + + [[maybe_unused]] + constexpr auto Lo = (std::numeric_limits<To>::min)(); + constexpr auto Hi = (std::numeric_limits<To>::max)(); + + if constexpr (std::is_signed_v<From> == std::is_signed_v<To>) { + // same signedness, we can accept regular integer conversion rules + return x < Lo ? Lo : + x > Hi ? Hi : + /*else*/ To(x); + } else { + if constexpr (std::is_signed_v<From>) { // ie. !is_signed_v<To> + if (x < From{0}) + return To{0}; + } + + // from here on, x >= 0 + using FromU = std::make_unsigned_t<From>; + using ToU = std::make_unsigned_t<To>; + return FromU(x) > ToU(Hi) ? Hi : To(x); // assumes Hi >= 0 + } +} QT_END_NAMESPACE |