1 files changed, 250 insertions, 276 deletions
diff --git a/src/corelib/global/qnumeric_p.h b/src/corelib/global/qnumeric_p.h
index a11057dfff..d40e6b964b 100644
--- a/src/corelib/global/qnumeric_p.h
+++ b/src/corelib/global/qnumeric_p.h
@@ -1,42 +1,6 @@
-/****************************************************************************
-**
-** Copyright (C) 2020 The Qt Company Ltd.
-** Copyright (C) 2020 Intel Corporation.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2020 The Qt Company Ltd.
+// Copyright (C) 2021 Intel Corporation.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
 
 #ifndef QNUMERIC_P_H
 #define QNUMERIC_P_H
@@ -53,30 +17,17 @@
 //
 
 #include "QtCore/private/qglobal_p.h"
+#include "QtCore/qnumeric.h"
+#include "QtCore/qsimd.h"
 #include <cmath>
 #include <limits>
 #include <type_traits>
 
-#if defined(Q_CC_MSVC)
-#  include <intrin.h>
-#  include <float.h>
-#  if defined(Q_PROCESSOR_X86_64) || defined(Q_PROCESSOR_ARM_64)
-#    define Q_INTRINSIC_MUL_OVERFLOW64
-#    define Q_UMULH(v1, v2) __umulh(v1, v2);
-#    define Q_SMULH(v1, v2) __mulh(v1, v2);
-#    pragma intrinsic(__umulh)
-#    pragma intrinsic(__mulh)
-#  endif
-#endif
-
-# if defined(Q_OS_INTEGRITY) && defined(Q_PROCESSOR_ARM_64)
-#include <arm64_ghs.h>
-#  define Q_INTRINSIC_MUL_OVERFLOW64
-#  define Q_UMULH(v1, v2) __MULUH64(v1, v2);
-#  define Q_SMULH(v1, v2) __MULSH64(v1, v2);
+#ifndef __has_extension
+#  define __has_extension(X)    0
 #endif
 
-#if !defined(Q_CC_MSVC) && (defined(Q_OS_QNX) || defined(Q_CC_INTEL))
+#if !defined(Q_CC_MSVC) && defined(Q_OS_QNX)
 #  include <math.h>
 #  ifdef isnan
 #    define QT_MATH_H_DEFINES_MACROS
@@ -104,6 +55,8 @@ QT_END_NAMESPACE
 
 QT_BEGIN_NAMESPACE
 
+class qfloat16;
+
 namespace qnumeric_std_wrapper {
 #if defined(QT_MATH_H_DEFINES_MACROS)
 #  undef QT_MATH_H_DEFINES_MACROS
@@ -191,22 +144,38 @@ Q_DECL_CONST_FUNCTION static inline int qt_fpclassify(float f)
     return qnumeric_std_wrapper::fpclassify(f);
 }
 
-#ifndef Q_CLANG_QDOC
+#ifndef Q_QDOC
 namespace {
 /*!
     Returns true if the double \a v can be converted to type \c T, false if
     it's out of range. If the conversion is successful, the converted value is
     stored in \a value; if it was not successful, \a value will contain the
     minimum or maximum of T, depending on the sign of \a d. If \c T is
-    unsigned, then \a value contains the absolute value of \a v.
+    unsigned, then \a value contains the absolute value of \a v. If \c T is \c
+    float, an underflow is also signalled by returning false and setting \a
+    value to zero.
 
     This function works for v containing infinities, but not NaN. It's the
     caller's responsibility to exclude that possibility before calling it.
 */
-template<typename T>
-static inline bool convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true)
+template <typename T> static inline std::enable_if_t<std::is_integral_v<T>, bool>
+convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true)
 {
-    static_assert(std::numeric_limits<T>::is_integer);
+    static_assert(std::is_integral_v<T>);
+    constexpr bool TypeIsLarger = std::numeric_limits<T>::digits > std::numeric_limits<double>::digits;
+
+    if constexpr (TypeIsLarger) {
+        using S = std::make_signed_t<T>;
+        constexpr S max_mantissa = S(1) << std::numeric_limits<double>::digits;
+        // T has more bits than double's mantissa, so don't allow "upgrading"
+        // to T (makes it look like the number had more precision than really
+        // was transmitted)
+        if (!allow_precision_upgrade && !(v <= double(max_mantissa) && v >= double(-max_mantissa - 1)))
+            return false;
+    }
+
+    constexpr T Tmin = (std::numeric_limits<T>::min)();
+    constexpr T Tmax = (std::numeric_limits<T>::max)();
 
     // The [conv.fpint] (7.10 Floating-integral conversions) section of the C++
     // standard says only exact conversions are guaranteed. Converting
@@ -218,23 +187,90 @@ static inline bool convertDoubleTo(double v, T *value, bool allow_precision_upgr
     // correct, but Clang, ICC and MSVC don't realize that it's a constant and
     // the math call stays in the compiled code.
 
+#if defined(Q_PROCESSOR_X86_64) && defined(__SSE2__)
+    // Of course, UB doesn't apply if we use intrinsics, in which case we are
+    // allowed to dpeend on exactly the processor's behavior. This
+    // implementation uses the truncating conversions from Scalar Double to
+    // integral types (CVTTSD2SI and VCVTTSD2USI), which is documented to
+    // return the "indefinite integer value" if the range of the target type is
+    // exceeded. (only implemented for x86-64 to avoid having to deal with the
+    // non-existence of the 64-bit intrinsics on i386)
+
+    if (std::numeric_limits<T>::is_signed) {
+        __m128d mv = _mm_set_sd(v);
+#  ifdef __AVX512F__
+        // use explicit round control and suppress exceptions
+        if (sizeof(T) > 4)
+            *value = T(_mm_cvtt_roundsd_i64(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
+        else
+            *value = _mm_cvtt_roundsd_i32(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+#  else
+        *value = sizeof(T) > 4 ? T(_mm_cvttsd_si64(mv)) : _mm_cvttsd_si32(mv);
+#  endif
+
+        // if *value is the "indefinite integer value", check if the original
+        // variable \a v is the same value (Tmin is an exact representation)
+        if (*value == Tmin && !_mm_ucomieq_sd(mv, _mm_set_sd(Tmin))) {
+            // v != Tmin, so it was out of range
+            if (v > 0)
+                *value = Tmax;
+            return false;
+        }
+
+        // convert the integer back to double and compare for equality with v,
+        // to determine if we've lost any precision
+        __m128d mi = _mm_setzero_pd();
+        mi = sizeof(T) > 4 ? _mm_cvtsi64_sd(mv, *value) : _mm_cvtsi32_sd(mv, *value);
+        return _mm_ucomieq_sd(mv, mi);
+    }
+
+#  ifdef __AVX512F__
+    if (!std::numeric_limits<T>::is_signed) {
+        // Same thing as above, but this function operates on absolute values
+        // and the "indefinite integer value" for the 64-bit unsigned
+        // conversion (Tmax) is not representable in double, so it can never be
+        // the result of an in-range conversion. This is implemented for AVX512
+        // and later because of the unsigned conversion instruction. Converting
+        // to unsigned without losing an extra bit of precision prior to AVX512
+        // is left to the compiler below.
+
+        v = fabs(v);
+        __m128d mv = _mm_set_sd(v);
+
+        // use explicit round control and suppress exceptions
+        if (sizeof(T) > 4)
+            *value = T(_mm_cvtt_roundsd_u64(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
+        else
+            *value = _mm_cvtt_roundsd_u32(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+
+        if (*value == Tmax) {
+            // no double can have an exact value of quint64(-1), but they can
+            // quint32(-1), so we need to compare for that
+            if (TypeIsLarger || _mm_ucomieq_sd(mv, _mm_set_sd(Tmax)))
+                return false;
+        }
+
+        // return true if it was an exact conversion
+        __m128d mi = _mm_setzero_pd();
+        mi = sizeof(T) > 4 ? _mm_cvtu64_sd(mv, *value) : _mm_cvtu32_sd(mv, *value);
+        return _mm_ucomieq_sd(mv, mi);
+    }
+#  endif
+#endif
+
     double supremum;
     if (std::numeric_limits<T>::is_signed) {
-        supremum = -1.0 * std::numeric_limits<T>::min();    // -1 * (-2^63) = 2^63, exact (for T = qint64)
-        *value = std::numeric_limits<T>::min();
-        if (v < std::numeric_limits<T>::min())
+        supremum = -1.0 * Tmin;     // -1 * (-2^63) = 2^63, exact (for T = qint64)
+        *value = Tmin;
+        if (v < Tmin)
             return false;
     } else {
         using ST = typename std::make_signed<T>::type;
-        supremum = -2.0 * std::numeric_limits<ST>::min();   // -2 * (-2^63) = 2^64, exact (for T = quint64)
+        supremum = -2.0 * (std::numeric_limits<ST>::min)();   // -2 * (-2^63) = 2^64, exact (for T = quint64)
         v = fabs(v);
     }
-    if (std::is_integral<T>::value && sizeof(T) > 4 && !allow_precision_upgrade) {
-        if (v > double(Q_INT64_C(1)<<53) || v < double(-((Q_INT64_C(1)<<53) + 1)))
-            return false;
-    }
 
-    *value = std::numeric_limits<T>::max();
+    *value = Tmax;
     if (v >= supremum)
         return false;
 
@@ -249,249 +285,187 @@ QT_WARNING_DISABLE_FLOAT_COMPARE
 QT_WARNING_POP
 }
 
-// Overflow math.
-// This provides efficient implementations for int, unsigned, qsizetype and
-// size_t. Implementations for 8- and 16-bit types will work but may not be as
-// efficient. Implementations for 64-bit may be missing on 32-bit platforms.
-
-#if ((defined(Q_CC_INTEL) ? (Q_CC_INTEL >= 1800 && !defined(Q_OS_WIN)) : defined(Q_CC_GNU)) \
-     && Q_CC_GNU >= 500) || __has_builtin(__builtin_add_overflow)
-// GCC 5, ICC 18, and Clang 3.8 have builtins to detect overflows
-#define Q_INTRINSIC_MUL_OVERFLOW64
-
-template <typename T> inline
-typename std::enable_if<std::is_unsigned<T>::value || std::is_signed<T>::value, bool>::type
-add_overflow(T v1, T v2, T *r)
-{ return __builtin_add_overflow(v1, v2, r); }
-
-template <typename T> inline
-typename std::enable_if<std::is_unsigned<T>::value || std::is_signed<T>::value, bool>::type
-sub_overflow(T v1, T v2, T *r)
-{ return __builtin_sub_overflow(v1, v2, r); }
-
-template <typename T> inline
-typename std::enable_if<std::is_unsigned<T>::value || std::is_signed<T>::value, bool>::type
-mul_overflow(T v1, T v2, T *r)
-{ return __builtin_mul_overflow(v1, v2, r); }
-
-#else
-// Generic implementations
-
-template <typename T> inline typename std::enable_if<std::is_unsigned<T>::value, bool>::type
-add_overflow(T v1, T v2, T *r)
+template <typename T> static
+std::enable_if_t<std::is_floating_point_v<T> || std::is_same_v<T, qfloat16>, bool>
+convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true)
 {
-    // unsigned additions are well-defined
-    *r = v1 + v2;
-    return v1 > T(v1 + v2);
-}
-
-template <typename T> inline typename std::enable_if<std::is_signed<T>::value, bool>::type
-add_overflow(T v1, T v2, T *r)
-{
-    // Here's how we calculate the overflow:
-    // 1) unsigned addition is well-defined, so we can always execute it
-    // 2) conversion from unsigned back to signed is implementation-
-    //    defined and in the implementations we use, it's a no-op.
-    // 3) signed integer overflow happens if the sign of the two input operands
-    //    is the same but the sign of the result is different. In other words,
-    //    the sign of the result must be the same as the sign of either
-    //    operand.
-
-    using U = typename std::make_unsigned<T>::type;
-    *r = T(U(v1) + U(v2));
-
-    // If int is two's complement, assume all integer types are too.
-    if (std::is_same<int32_t, int>::value) {
-        // Two's complement equivalent (generates slightly shorter code):
-        //  x ^ y             is negative if x and y have different signs
-        //  x & y             is negative if x and y are negative
-        // (x ^ z) & (y ^ z)  is negative if x and z have different signs
-        //                    AND y and z have different signs
-        return ((v1 ^ *r) & (v2 ^ *r)) < 0;
+    Q_UNUSED(allow_precision_upgrade);
+    constexpr T Huge = std::numeric_limits<T>::infinity();
+
+    if constexpr (std::numeric_limits<double>::max_exponent <=
+            std::numeric_limits<T>::max_exponent) {
+        // no UB can happen
+        *value = T(v);
+        return true;
     }
 
-    bool s1 = (v1 < 0);
-    bool s2 = (v2 < 0);
-    bool sr = (*r < 0);
-    return s1 != sr && s2 != sr;
-    // also: return s1 == s2 && s1 != sr;
-}
+#if defined(__SSE2__) && (defined(Q_CC_GNU) || __has_extension(gnu_asm))
+    // The x86 CVTSD2SH instruction from SSE2 does what we want:
+    // - converts out-of-range doubles to ±infinity and sets #O
+    // - converts underflows to zero and sets #U
+    // We need to clear any previously-stored exceptions from it before the
+    // operation (3-cycle cost) and obtain the new state afterwards (1 cycle).
 
-template <typename T> inline typename std::enable_if<std::is_unsigned<T>::value, bool>::type
-sub_overflow(T v1, T v2, T *r)
-{
-    // unsigned subtractions are well-defined
-    *r = v1 - v2;
-    return v1 < v2;
-}
+    unsigned csr = _MM_MASK_MASK;         // clear stored exception indicators
+    auto sse_check_result = [&](auto result) {
+        if ((csr & (_MM_EXCEPT_UNDERFLOW | _MM_EXCEPT_OVERFLOW)) == 0)
+            return true;
+        if (csr & _MM_EXCEPT_OVERFLOW)
+            return false;
 
-template <typename T> inline typename std::enable_if<std::is_signed<T>::value, bool>::type
-sub_overflow(T v1, T v2, T *r)
-{
-    // See above for explanation. This is the same with some signs reversed.
-    // We can't use add_overflow(v1, -v2, r) because it would be UB if
-    // v2 == std::numeric_limits<T>::min().
+        // According to IEEE 754[1], #U is also set when the result is tiny and
+        // inexact, but still non-zero, so detect that (this won't generate
+        // good code for types without hardware support).
+        // [1] https://en.wikipedia.org/wiki/Floating-point_arithmetic#Exception_handling
+        return result != 0;
+    };
+
+    // Written directly in assembly because both Clang and GCC have been
+    // observed to reorder the STMXCSR instruction above the conversion
+    // operation. MSVC generates horrid code when using the intrinsics anyway,
+    // so it's not a loss.
+    // See https://github.com/llvm/llvm-project/issues/83661.
+    if constexpr (std::is_same_v<T, float>) {
+#  ifdef __AVX__
+        asm ("vldmxcsr  %[csr]\n\t"
+             "vcvtsd2ss %[in], %[in], %[out]\n\t"
+             "vstmxcsr  %[csr]"
+            : [csr] "+m" (csr), [out] "=v" (*value) : [in] "v" (v));
+#  else
+        asm ("ldmxcsr  %[csr]\n\t"
+             "cvtsd2ss %[in], %[out]\n\t"
+             "stmxcsr  %[csr]"
+            : [csr] "+m" (csr), [out] "=v" (*value) : [in] "v" (v));
+#  endif
+        return sse_check_result(*value);
+    }
+
+#  if defined(__F16C__) || defined(__AVX512FP16__)
+    if constexpr (sizeof(T) == 2 && std::numeric_limits<T>::max_exponent == 16) {
+        // qfloat16 or std::float16_t, but not std::bfloat16_t or std::bfloat8_t
+        auto doConvert = [&](auto *out) {
+            asm ("vldmxcsr  %[csr]\n\t"
+#    ifdef __AVX512FP16__
+                 // AVX512FP16 & AVX10 have an instruction for this
+                 "vcvtsd2sh %[in], %[in], %[out]\n\t"
+#    else
+                 "vcvtsd2ss %[in], %[in], %[out]\n\t"   // sets DEST[MAXVL-1:128] := 0
+                 "vcvtps2ph %[rc], %[out], %[out]\n\t"
+#    endif
+                 "vstmxcsr  %[csr]"
+                : [csr] "+m" (csr), [out] "=v" (*out)
+                : [in] "v" (v), [rc] "i" (_MM_FROUND_CUR_DIRECTION)
+            );
+            return sse_check_result(out);
+        };
+
+        if constexpr (std::is_same_v<T, qfloat16> && !std::is_void_v<typename T::NativeType>) {
+            typename T::NativeType tmp;
+            bool b = doConvert(&tmp);
+            *value = tmp;
+            return b;
+        } else {
+#    ifndef Q_CC_CLANG
+            // Clang can only implement this if it has a native FP16 type
+            return doConvert(value);
+#    endif
+        }
+    }
+#  endif
+#endif // __SSE2__ && inline assembly
 
-    using U = typename std::make_unsigned<T>::type;
-    *r = T(U(v1) - U(v2));
+    if (!qt_is_finite(v) && std::numeric_limits<T>::has_infinity) {
+        // infinity (or NaN)
+        *value = T(v);
+        return true;
+    }
 
-    if (std::is_same<int32_t, int>::value)
-        return ((v1 ^ *r) & (~v2 ^ *r)) < 0;
+    // Check for in-range value to ensure the conversion is not UB (see the
+    // comment above for Standard language).
+    if (std::fabs(v) > (std::numeric_limits<T>::max)()) {
+        *value = v < 0 ? -Huge : Huge;
+        return false;
+    }
 
-    bool s1 = (v1 < 0);
-    bool s2 = !(v2 < 0);
-    bool sr = (*r < 0);
-    return s1 != sr && s2 != sr;
-    // also: return s1 == s2 && s1 != sr;
+    *value = T(v);
+    if (v != 0 && *value == 0) {
+        // Underflow through loss of precision
+        return false;
+    }
+    return true;
 }
 
-template <typename T> inline
-typename std::enable_if<std::is_unsigned<T>::value || std::is_signed<T>::value, bool>::type
-mul_overflow(T v1, T v2, T *r)
-{
-    // use the next biggest type
-    // Note: for 64-bit systems where __int128 isn't supported, this will cause an error.
-    using LargerInt = QIntegerForSize<sizeof(T) * 2>;
-    using Larger = typename std::conditional<std::is_signed<T>::value,
-            typename LargerInt::Signed, typename LargerInt::Unsigned>::type;
-    Larger lr = Larger(v1) * Larger(v2);
-    *r = T(lr);
-    return lr > std::numeric_limits<T>::max() || lr < std::numeric_limits<T>::min();
-}
+template <typename T> inline bool add_overflow(T v1, T v2, T *r) { return qAddOverflow(v1, v2, r); }
+template <typename T> inline bool sub_overflow(T v1, T v2, T *r) { return qSubOverflow(v1, v2, r); }
+template <typename T> inline bool mul_overflow(T v1, T v2, T *r) { return qMulOverflow(v1, v2, r); }
 
-# if defined(Q_INTRINSIC_MUL_OVERFLOW64)
-template <> inline bool mul_overflow(quint64 v1, quint64 v2, quint64 *r)
-{
-    *r = v1 * v2;
-    return Q_UMULH(v1, v2);
-}
-template <> inline bool mul_overflow(qint64 v1, qint64 v2, qint64 *r)
+template <typename T, T V2> bool add_overflow(T v1, std::integral_constant<T, V2>, T *r)
 {
-    // This is slightly more complex than the unsigned case above: the sign bit
-    // of 'low' must be replicated as the entire 'high', so the only valid
-    // values for 'high' are 0 and -1. Use unsigned multiply since it's the same
-    // as signed for the low bits and use a signed right shift to verify that
-    // 'high' is nothing but sign bits that match the sign of 'low'.
-
-    qint64 high = Q_SMULH(v1, v2);
-    *r = qint64(quint64(v1) * quint64(v2));
-    return (*r >> 63) != high;
+    return qAddOverflow<T, V2>(v1, std::integral_constant<T, V2>{}, r);
 }
 
-#   if defined(Q_OS_INTEGRITY) && defined(Q_PROCESSOR_ARM_64)
-template <> inline bool mul_overflow(uint64_t v1, uint64_t v2, uint64_t *r)
+template <auto V2, typename T> bool add_overflow(T v1, T *r)
 {
-    return mul_overflow<quint64>(v1,v2,reinterpret_cast<quint64*>(r));
+    return qAddOverflow<V2, T>(v1, r);
 }
 
-template <> inline bool mul_overflow(int64_t v1, int64_t v2, int64_t *r)
+template <typename T, T V2> bool sub_overflow(T v1, std::integral_constant<T, V2>, T *r)
 {
-    return mul_overflow<qint64>(v1,v2,reinterpret_cast<qint64*>(r));
+    return qSubOverflow<T, V2>(v1, std::integral_constant<T, V2>{}, r);
 }
-#    endif // OS_INTEGRITY ARM64
-#  endif // Q_INTRINSIC_MUL_OVERFLOW64
-
-#  if defined(Q_CC_MSVC) && defined(Q_PROCESSOR_X86)
-// We can use intrinsics for the unsigned operations with MSVC
-template <> inline bool add_overflow(unsigned v1, unsigned v2, unsigned *r)
-{ return _addcarry_u32(0, v1, v2, r); }
 
-// 32-bit mul_overflow is fine with the generic code above
-
-template <> inline bool add_overflow(quint64 v1, quint64 v2, quint64 *r)
+template <auto V2, typename T> bool sub_overflow(T v1, T *r)
 {
-#    if defined(Q_PROCESSOR_X86_64)
-    return _addcarry_u64(0, v1, v2, reinterpret_cast<unsigned __int64 *>(r));
-#    else
-    uint low, high;
-    uchar carry = _addcarry_u32(0, unsigned(v1), unsigned(v2), &low);
-    carry = _addcarry_u32(carry, v1 >> 32, v2 >> 32, &high);
-    *r = (quint64(high) << 32) | low;
-    return carry;
-#    endif // !x86-64
+    return qSubOverflow<V2, T>(v1, r);
 }
-#  endif // MSVC X86
-#endif // !GCC
 
-// Implementations for addition, subtraction or multiplication by a
-// compile-time constant. For addition and subtraction, we simply call the code
-// that detects overflow at runtime. For multiplication, we compare to the
-// maximum possible values before multiplying to ensure no overflow happens.
-
-template <typename T, T V2> bool add_overflow(T v1, std::integral_constant<T, V2>, T *r)
+template <typename T, T V2> bool mul_overflow(T v1, std::integral_constant<T, V2>, T *r)
 {
-    return add_overflow(v1, V2, r);
+    return qMulOverflow<T, V2>(v1, std::integral_constant<T, V2>{}, r);
 }
 
-template <auto V2, typename T> bool add_overflow(T v1, T *r)
+template <auto V2, typename T> bool mul_overflow(T v1, T *r)
 {
-    return add_overflow(v1, std::integral_constant<T, V2>{}, r);
+    return qMulOverflow<V2, T>(v1, r);
 }
-
-template <typename T, T V2> bool sub_overflow(T v1, std::integral_constant<T, V2>, T *r)
-{
-    return sub_overflow(v1, V2, r);
 }
+#endif // Q_QDOC
 
-template <auto V2, typename T> bool sub_overflow(T v1, T *r)
-{
-    return sub_overflow(v1, std::integral_constant<T, V2>{}, r);
-}
+/*
+    Safely narrows \a x to \c{To}. Let \c L be
+    \c{std::numeric_limit<To>::min()} and \c H be \c{std::numeric_limit<To>::max()}.
 
-template <typename T, T V2> bool mul_overflow(T v1, std::integral_constant<T, V2>, T *r)
+    If \a x is less than L, returns L. If \a x is greater than H,
+    returns H. Otherwise, returns \c{To(x)}.
+*/
+template <typename To, typename From>
+static constexpr auto qt_saturate(From x)
 {
-    // Runtime detection for anything smaller than or equal to a register
-    // width, as most architectures' multiplication instructions actually
-    // produce a result twice as wide as the input registers, allowing us to
-    // efficiently detect the overflow.
-    if constexpr (sizeof(T) <= sizeof(qregisteruint)) {
-        return mul_overflow(v1, V2, r);
-
-#ifdef Q_INTRINSIC_MUL_OVERFLOW64
-    } else if constexpr (sizeof(T) <= sizeof(quint64)) {
-        // If we have intrinsics detecting overflow of 64-bit multiplications,
-        // then detect overflows through them up to 64 bits.
-        return mul_overflow(v1, V2, r);
-#endif
-
-    } else if constexpr (V2 == 0 || V2 == 1) {
-        // trivial cases (and simplify logic below due to division by zero)
-        *r = v1 * V2;
-        return false;
-    } else if constexpr (V2 == -1) {
-        // multiplication by -1 is valid *except* for signed minimum values
-        // (necessary to avoid diving min() by -1, which is an overflow)
-        if (v1 < 0 && v1 == std::numeric_limits<T>::min())
-            return true;
-        *r = -v1;
-        return false;
+    static_assert(std::is_integral_v<To>);
+    static_assert(std::is_integral_v<From>);
+
+    [[maybe_unused]]
+    constexpr auto Lo = (std::numeric_limits<To>::min)();
+    constexpr auto Hi = (std::numeric_limits<To>::max)();
+
+    if constexpr (std::is_signed_v<From> == std::is_signed_v<To>) {
+        // same signedness, we can accept regular integer conversion rules
+        return x < Lo  ? Lo :
+               x > Hi  ? Hi :
+               /*else*/  To(x);
     } else {
-        // For 64-bit multiplications on 32-bit platforms, let's instead compare v1
-        // against the bounds that would overflow.
-        constexpr T Highest = std::numeric_limits<T>::max() / V2;
-        constexpr T Lowest = std::numeric_limits<T>::min() / V2;
-        if constexpr (Highest > Lowest) {
-            if (v1 > Highest || v1 < Lowest)
-                return true;
-        } else {
-            // this can only happen if V2 < 0
-            static_assert(V2 < 0);
-            if (v1 > Lowest || v1 < Highest)
-                return true;
+        if constexpr (std::is_signed_v<From>) { // ie. !is_signed_v<To>
+            if (x < From{0})
+                return To{0};
         }
 
-        *r = v1 * V2;
-        return false;
+        // from here on, x >= 0
+        using FromU = std::make_unsigned_t<From>;
+        using ToU = std::make_unsigned_t<To>;
+        return FromU(x) > ToU(Hi) ? Hi : To(x); // assumes Hi >= 0
     }
 }
 
-template <auto V2, typename T> bool mul_overflow(T v1, T *r)
-{
-    return mul_overflow(v1, std::integral_constant<T, V2>{}, r);
-}
-}
-#endif // Q_CLANG_QDOC
-
 QT_END_NAMESPACE
 
 #endif // QNUMERIC_P_H