summaryrefslogtreecommitdiffstats
path: root/src/corelib/global/qnumeric_p.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/global/qnumeric_p.h')
-rw-r--r--src/corelib/global/qnumeric_p.h309
1 files changed, 254 insertions, 55 deletions
diff --git a/src/corelib/global/qnumeric_p.h b/src/corelib/global/qnumeric_p.h
index 823a1812de..d40e6b964b 100644
--- a/src/corelib/global/qnumeric_p.h
+++ b/src/corelib/global/qnumeric_p.h
@@ -1,42 +1,6 @@
-/****************************************************************************
-**
-** Copyright (C) 2020 The Qt Company Ltd.
-** Copyright (C) 2020 Intel Corporation.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2020 The Qt Company Ltd.
+// Copyright (C) 2021 Intel Corporation.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
#ifndef QNUMERIC_P_H
#define QNUMERIC_P_H
@@ -54,11 +18,16 @@
#include "QtCore/private/qglobal_p.h"
#include "QtCore/qnumeric.h"
+#include "QtCore/qsimd.h"
#include <cmath>
#include <limits>
#include <type_traits>
-#if !defined(Q_CC_MSVC) && (defined(Q_OS_QNX) || defined(Q_CC_INTEL))
+#ifndef __has_extension
+# define __has_extension(X) 0
+#endif
+
+#if !defined(Q_CC_MSVC) && defined(Q_OS_QNX)
# include <math.h>
# ifdef isnan
# define QT_MATH_H_DEFINES_MACROS
@@ -86,6 +55,8 @@ QT_END_NAMESPACE
QT_BEGIN_NAMESPACE
+class qfloat16;
+
namespace qnumeric_std_wrapper {
#if defined(QT_MATH_H_DEFINES_MACROS)
# undef QT_MATH_H_DEFINES_MACROS
@@ -173,22 +144,38 @@ Q_DECL_CONST_FUNCTION static inline int qt_fpclassify(float f)
return qnumeric_std_wrapper::fpclassify(f);
}
-#ifndef Q_CLANG_QDOC
+#ifndef Q_QDOC
namespace {
/*!
Returns true if the double \a v can be converted to type \c T, false if
it's out of range. If the conversion is successful, the converted value is
stored in \a value; if it was not successful, \a value will contain the
minimum or maximum of T, depending on the sign of \a d. If \c T is
- unsigned, then \a value contains the absolute value of \a v.
+ unsigned, then \a value contains the absolute value of \a v. If \c T is \c
+ float, an underflow is also signalled by returning false and setting \a
+ value to zero.
This function works for v containing infinities, but not NaN. It's the
caller's responsibility to exclude that possibility before calling it.
*/
-template<typename T>
-static inline bool convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true)
+template <typename T> static inline std::enable_if_t<std::is_integral_v<T>, bool>
+convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true)
{
- static_assert(std::numeric_limits<T>::is_integer);
+ static_assert(std::is_integral_v<T>);
+ constexpr bool TypeIsLarger = std::numeric_limits<T>::digits > std::numeric_limits<double>::digits;
+
+ if constexpr (TypeIsLarger) {
+ using S = std::make_signed_t<T>;
+ constexpr S max_mantissa = S(1) << std::numeric_limits<double>::digits;
+ // T has more bits than double's mantissa, so don't allow "upgrading"
+ // to T (makes it look like the number had more precision than really
+ // was transmitted)
+ if (!allow_precision_upgrade && !(v <= double(max_mantissa) && v >= double(-max_mantissa - 1)))
+ return false;
+ }
+
+ constexpr T Tmin = (std::numeric_limits<T>::min)();
+ constexpr T Tmax = (std::numeric_limits<T>::max)();
// The [conv.fpint] (7.10 Floating-integral conversions) section of the C++
// standard says only exact conversions are guaranteed. Converting
@@ -200,23 +187,90 @@ static inline bool convertDoubleTo(double v, T *value, bool allow_precision_upgr
// correct, but Clang, ICC and MSVC don't realize that it's a constant and
// the math call stays in the compiled code.
+#if defined(Q_PROCESSOR_X86_64) && defined(__SSE2__)
+ // Of course, UB doesn't apply if we use intrinsics, in which case we are
+ // allowed to dpeend on exactly the processor's behavior. This
+ // implementation uses the truncating conversions from Scalar Double to
+ // integral types (CVTTSD2SI and VCVTTSD2USI), which is documented to
+ // return the "indefinite integer value" if the range of the target type is
+ // exceeded. (only implemented for x86-64 to avoid having to deal with the
+ // non-existence of the 64-bit intrinsics on i386)
+
+ if (std::numeric_limits<T>::is_signed) {
+ __m128d mv = _mm_set_sd(v);
+# ifdef __AVX512F__
+ // use explicit round control and suppress exceptions
+ if (sizeof(T) > 4)
+ *value = T(_mm_cvtt_roundsd_i64(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
+ else
+ *value = _mm_cvtt_roundsd_i32(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+# else
+ *value = sizeof(T) > 4 ? T(_mm_cvttsd_si64(mv)) : _mm_cvttsd_si32(mv);
+# endif
+
+ // if *value is the "indefinite integer value", check if the original
+ // variable \a v is the same value (Tmin is an exact representation)
+ if (*value == Tmin && !_mm_ucomieq_sd(mv, _mm_set_sd(Tmin))) {
+ // v != Tmin, so it was out of range
+ if (v > 0)
+ *value = Tmax;
+ return false;
+ }
+
+ // convert the integer back to double and compare for equality with v,
+ // to determine if we've lost any precision
+ __m128d mi = _mm_setzero_pd();
+ mi = sizeof(T) > 4 ? _mm_cvtsi64_sd(mv, *value) : _mm_cvtsi32_sd(mv, *value);
+ return _mm_ucomieq_sd(mv, mi);
+ }
+
+# ifdef __AVX512F__
+ if (!std::numeric_limits<T>::is_signed) {
+ // Same thing as above, but this function operates on absolute values
+ // and the "indefinite integer value" for the 64-bit unsigned
+ // conversion (Tmax) is not representable in double, so it can never be
+ // the result of an in-range conversion. This is implemented for AVX512
+ // and later because of the unsigned conversion instruction. Converting
+ // to unsigned without losing an extra bit of precision prior to AVX512
+ // is left to the compiler below.
+
+ v = fabs(v);
+ __m128d mv = _mm_set_sd(v);
+
+ // use explicit round control and suppress exceptions
+ if (sizeof(T) > 4)
+ *value = T(_mm_cvtt_roundsd_u64(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
+ else
+ *value = _mm_cvtt_roundsd_u32(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+
+ if (*value == Tmax) {
+ // no double can have an exact value of quint64(-1), but they can
+ // quint32(-1), so we need to compare for that
+ if (TypeIsLarger || _mm_ucomieq_sd(mv, _mm_set_sd(Tmax)))
+ return false;
+ }
+
+ // return true if it was an exact conversion
+ __m128d mi = _mm_setzero_pd();
+ mi = sizeof(T) > 4 ? _mm_cvtu64_sd(mv, *value) : _mm_cvtu32_sd(mv, *value);
+ return _mm_ucomieq_sd(mv, mi);
+ }
+# endif
+#endif
+
double supremum;
if (std::numeric_limits<T>::is_signed) {
- supremum = -1.0 * std::numeric_limits<T>::min(); // -1 * (-2^63) = 2^63, exact (for T = qint64)
- *value = std::numeric_limits<T>::min();
- if (v < std::numeric_limits<T>::min())
+ supremum = -1.0 * Tmin; // -1 * (-2^63) = 2^63, exact (for T = qint64)
+ *value = Tmin;
+ if (v < Tmin)
return false;
} else {
using ST = typename std::make_signed<T>::type;
- supremum = -2.0 * std::numeric_limits<ST>::min(); // -2 * (-2^63) = 2^64, exact (for T = quint64)
+ supremum = -2.0 * (std::numeric_limits<ST>::min)(); // -2 * (-2^63) = 2^64, exact (for T = quint64)
v = fabs(v);
}
- if (std::is_integral<T>::value && sizeof(T) > 4 && !allow_precision_upgrade) {
- if (v > double(Q_INT64_C(1)<<53) || v < double(-((Q_INT64_C(1)<<53) + 1)))
- return false;
- }
- *value = std::numeric_limits<T>::max();
+ *value = Tmax;
if (v >= supremum)
return false;
@@ -231,6 +285,116 @@ QT_WARNING_DISABLE_FLOAT_COMPARE
QT_WARNING_POP
}
+template <typename T> static
+std::enable_if_t<std::is_floating_point_v<T> || std::is_same_v<T, qfloat16>, bool>
+convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true)
+{
+ Q_UNUSED(allow_precision_upgrade);
+ constexpr T Huge = std::numeric_limits<T>::infinity();
+
+ if constexpr (std::numeric_limits<double>::max_exponent <=
+ std::numeric_limits<T>::max_exponent) {
+ // no UB can happen
+ *value = T(v);
+ return true;
+ }
+
+#if defined(__SSE2__) && (defined(Q_CC_GNU) || __has_extension(gnu_asm))
+ // The x86 CVTSD2SH instruction from SSE2 does what we want:
+ // - converts out-of-range doubles to ±infinity and sets #O
+ // - converts underflows to zero and sets #U
+ // We need to clear any previously-stored exceptions from it before the
+ // operation (3-cycle cost) and obtain the new state afterwards (1 cycle).
+
+ unsigned csr = _MM_MASK_MASK; // clear stored exception indicators
+ auto sse_check_result = [&](auto result) {
+ if ((csr & (_MM_EXCEPT_UNDERFLOW | _MM_EXCEPT_OVERFLOW)) == 0)
+ return true;
+ if (csr & _MM_EXCEPT_OVERFLOW)
+ return false;
+
+ // According to IEEE 754[1], #U is also set when the result is tiny and
+ // inexact, but still non-zero, so detect that (this won't generate
+ // good code for types without hardware support).
+ // [1] https://en.wikipedia.org/wiki/Floating-point_arithmetic#Exception_handling
+ return result != 0;
+ };
+
+ // Written directly in assembly because both Clang and GCC have been
+ // observed to reorder the STMXCSR instruction above the conversion
+ // operation. MSVC generates horrid code when using the intrinsics anyway,
+ // so it's not a loss.
+ // See https://github.com/llvm/llvm-project/issues/83661.
+ if constexpr (std::is_same_v<T, float>) {
+# ifdef __AVX__
+ asm ("vldmxcsr %[csr]\n\t"
+ "vcvtsd2ss %[in], %[in], %[out]\n\t"
+ "vstmxcsr %[csr]"
+ : [csr] "+m" (csr), [out] "=v" (*value) : [in] "v" (v));
+# else
+ asm ("ldmxcsr %[csr]\n\t"
+ "cvtsd2ss %[in], %[out]\n\t"
+ "stmxcsr %[csr]"
+ : [csr] "+m" (csr), [out] "=v" (*value) : [in] "v" (v));
+# endif
+ return sse_check_result(*value);
+ }
+
+# if defined(__F16C__) || defined(__AVX512FP16__)
+ if constexpr (sizeof(T) == 2 && std::numeric_limits<T>::max_exponent == 16) {
+ // qfloat16 or std::float16_t, but not std::bfloat16_t or std::bfloat8_t
+ auto doConvert = [&](auto *out) {
+ asm ("vldmxcsr %[csr]\n\t"
+# ifdef __AVX512FP16__
+ // AVX512FP16 & AVX10 have an instruction for this
+ "vcvtsd2sh %[in], %[in], %[out]\n\t"
+# else
+ "vcvtsd2ss %[in], %[in], %[out]\n\t" // sets DEST[MAXVL-1:128] := 0
+ "vcvtps2ph %[rc], %[out], %[out]\n\t"
+# endif
+ "vstmxcsr %[csr]"
+ : [csr] "+m" (csr), [out] "=v" (*out)
+ : [in] "v" (v), [rc] "i" (_MM_FROUND_CUR_DIRECTION)
+ );
+ return sse_check_result(out);
+ };
+
+ if constexpr (std::is_same_v<T, qfloat16> && !std::is_void_v<typename T::NativeType>) {
+ typename T::NativeType tmp;
+ bool b = doConvert(&tmp);
+ *value = tmp;
+ return b;
+ } else {
+# ifndef Q_CC_CLANG
+ // Clang can only implement this if it has a native FP16 type
+ return doConvert(value);
+# endif
+ }
+ }
+# endif
+#endif // __SSE2__ && inline assembly
+
+ if (!qt_is_finite(v) && std::numeric_limits<T>::has_infinity) {
+ // infinity (or NaN)
+ *value = T(v);
+ return true;
+ }
+
+ // Check for in-range value to ensure the conversion is not UB (see the
+ // comment above for Standard language).
+ if (std::fabs(v) > (std::numeric_limits<T>::max)()) {
+ *value = v < 0 ? -Huge : Huge;
+ return false;
+ }
+
+ *value = T(v);
+ if (v != 0 && *value == 0) {
+ // Underflow through loss of precision
+ return false;
+ }
+ return true;
+}
+
template <typename T> inline bool add_overflow(T v1, T v2, T *r) { return qAddOverflow(v1, v2, r); }
template <typename T> inline bool sub_overflow(T v1, T v2, T *r) { return qSubOverflow(v1, v2, r); }
template <typename T> inline bool mul_overflow(T v1, T v2, T *r) { return qMulOverflow(v1, v2, r); }
@@ -265,7 +429,42 @@ template <auto V2, typename T> bool mul_overflow(T v1, T *r)
return qMulOverflow<V2, T>(v1, r);
}
}
-#endif // Q_CLANG_QDOC
+#endif // Q_QDOC
+
+/*
+ Safely narrows \a x to \c{To}. Let \c L be
+ \c{std::numeric_limit<To>::min()} and \c H be \c{std::numeric_limit<To>::max()}.
+
+ If \a x is less than L, returns L. If \a x is greater than H,
+ returns H. Otherwise, returns \c{To(x)}.
+*/
+template <typename To, typename From>
+static constexpr auto qt_saturate(From x)
+{
+ static_assert(std::is_integral_v<To>);
+ static_assert(std::is_integral_v<From>);
+
+ [[maybe_unused]]
+ constexpr auto Lo = (std::numeric_limits<To>::min)();
+ constexpr auto Hi = (std::numeric_limits<To>::max)();
+
+ if constexpr (std::is_signed_v<From> == std::is_signed_v<To>) {
+ // same signedness, we can accept regular integer conversion rules
+ return x < Lo ? Lo :
+ x > Hi ? Hi :
+ /*else*/ To(x);
+ } else {
+ if constexpr (std::is_signed_v<From>) { // ie. !is_signed_v<To>
+ if (x < From{0})
+ return To{0};
+ }
+
+ // from here on, x >= 0
+ using FromU = std::make_unsigned_t<From>;
+ using ToU = std::make_unsigned_t<To>;
+ return FromU(x) > ToU(Hi) ? Hi : To(x); // assumes Hi >= 0
+ }
+}
QT_END_NAMESPACE