diff options
Diffstat (limited to 'src/corelib/global/qnumeric_p.h')
-rw-r--r-- | src/corelib/global/qnumeric_p.h | 526 |
1 files changed, 250 insertions, 276 deletions
diff --git a/src/corelib/global/qnumeric_p.h b/src/corelib/global/qnumeric_p.h index a11057dfff..d40e6b964b 100644 --- a/src/corelib/global/qnumeric_p.h +++ b/src/corelib/global/qnumeric_p.h @@ -1,42 +1,6 @@ -/**************************************************************************** -** -** Copyright (C) 2020 The Qt Company Ltd. -** Copyright (C) 2020 Intel Corporation. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2020 The Qt Company Ltd. +// Copyright (C) 2021 Intel Corporation. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only #ifndef QNUMERIC_P_H #define QNUMERIC_P_H @@ -53,30 +17,17 @@ // #include "QtCore/private/qglobal_p.h" +#include "QtCore/qnumeric.h" +#include "QtCore/qsimd.h" #include <cmath> #include <limits> #include <type_traits> -#if defined(Q_CC_MSVC) -# include <intrin.h> -# include <float.h> -# if defined(Q_PROCESSOR_X86_64) || defined(Q_PROCESSOR_ARM_64) -# define Q_INTRINSIC_MUL_OVERFLOW64 -# define Q_UMULH(v1, v2) __umulh(v1, v2); -# define Q_SMULH(v1, v2) __mulh(v1, v2); -# pragma intrinsic(__umulh) -# pragma intrinsic(__mulh) -# endif -#endif - -# if defined(Q_OS_INTEGRITY) && defined(Q_PROCESSOR_ARM_64) -#include <arm64_ghs.h> -# define Q_INTRINSIC_MUL_OVERFLOW64 -# define Q_UMULH(v1, v2) __MULUH64(v1, v2); -# define Q_SMULH(v1, v2) __MULSH64(v1, v2); +#ifndef __has_extension +# define __has_extension(X) 0 #endif -#if !defined(Q_CC_MSVC) && (defined(Q_OS_QNX) || defined(Q_CC_INTEL)) +#if !defined(Q_CC_MSVC) && defined(Q_OS_QNX) # include <math.h> # ifdef isnan # define QT_MATH_H_DEFINES_MACROS @@ -104,6 +55,8 @@ QT_END_NAMESPACE QT_BEGIN_NAMESPACE +class qfloat16; + namespace qnumeric_std_wrapper { #if defined(QT_MATH_H_DEFINES_MACROS) # undef QT_MATH_H_DEFINES_MACROS @@ -191,22 +144,38 @@ Q_DECL_CONST_FUNCTION static inline int qt_fpclassify(float f) return qnumeric_std_wrapper::fpclassify(f); } -#ifndef Q_CLANG_QDOC +#ifndef Q_QDOC namespace { /*! Returns true if the double \a v can be converted to type \c T, false if it's out of range. If the conversion is successful, the converted value is stored in \a value; if it was not successful, \a value will contain the minimum or maximum of T, depending on the sign of \a d. If \c T is - unsigned, then \a value contains the absolute value of \a v. + unsigned, then \a value contains the absolute value of \a v. If \c T is \c + float, an underflow is also signalled by returning false and setting \a + value to zero. This function works for v containing infinities, but not NaN. It's the caller's responsibility to exclude that possibility before calling it. */ -template<typename T> -static inline bool convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true) +template <typename T> static inline std::enable_if_t<std::is_integral_v<T>, bool> +convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true) { - static_assert(std::numeric_limits<T>::is_integer); + static_assert(std::is_integral_v<T>); + constexpr bool TypeIsLarger = std::numeric_limits<T>::digits > std::numeric_limits<double>::digits; + + if constexpr (TypeIsLarger) { + using S = std::make_signed_t<T>; + constexpr S max_mantissa = S(1) << std::numeric_limits<double>::digits; + // T has more bits than double's mantissa, so don't allow "upgrading" + // to T (makes it look like the number had more precision than really + // was transmitted) + if (!allow_precision_upgrade && !(v <= double(max_mantissa) && v >= double(-max_mantissa - 1))) + return false; + } + + constexpr T Tmin = (std::numeric_limits<T>::min)(); + constexpr T Tmax = (std::numeric_limits<T>::max)(); // The [conv.fpint] (7.10 Floating-integral conversions) section of the C++ // standard says only exact conversions are guaranteed. Converting @@ -218,23 +187,90 @@ static inline bool convertDoubleTo(double v, T *value, bool allow_precision_upgr // correct, but Clang, ICC and MSVC don't realize that it's a constant and // the math call stays in the compiled code. +#if defined(Q_PROCESSOR_X86_64) && defined(__SSE2__) + // Of course, UB doesn't apply if we use intrinsics, in which case we are + // allowed to dpeend on exactly the processor's behavior. This + // implementation uses the truncating conversions from Scalar Double to + // integral types (CVTTSD2SI and VCVTTSD2USI), which is documented to + // return the "indefinite integer value" if the range of the target type is + // exceeded. (only implemented for x86-64 to avoid having to deal with the + // non-existence of the 64-bit intrinsics on i386) + + if (std::numeric_limits<T>::is_signed) { + __m128d mv = _mm_set_sd(v); +# ifdef __AVX512F__ + // use explicit round control and suppress exceptions + if (sizeof(T) > 4) + *value = T(_mm_cvtt_roundsd_i64(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); + else + *value = _mm_cvtt_roundsd_i32(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +# else + *value = sizeof(T) > 4 ? T(_mm_cvttsd_si64(mv)) : _mm_cvttsd_si32(mv); +# endif + + // if *value is the "indefinite integer value", check if the original + // variable \a v is the same value (Tmin is an exact representation) + if (*value == Tmin && !_mm_ucomieq_sd(mv, _mm_set_sd(Tmin))) { + // v != Tmin, so it was out of range + if (v > 0) + *value = Tmax; + return false; + } + + // convert the integer back to double and compare for equality with v, + // to determine if we've lost any precision + __m128d mi = _mm_setzero_pd(); + mi = sizeof(T) > 4 ? _mm_cvtsi64_sd(mv, *value) : _mm_cvtsi32_sd(mv, *value); + return _mm_ucomieq_sd(mv, mi); + } + +# ifdef __AVX512F__ + if (!std::numeric_limits<T>::is_signed) { + // Same thing as above, but this function operates on absolute values + // and the "indefinite integer value" for the 64-bit unsigned + // conversion (Tmax) is not representable in double, so it can never be + // the result of an in-range conversion. This is implemented for AVX512 + // and later because of the unsigned conversion instruction. Converting + // to unsigned without losing an extra bit of precision prior to AVX512 + // is left to the compiler below. + + v = fabs(v); + __m128d mv = _mm_set_sd(v); + + // use explicit round control and suppress exceptions + if (sizeof(T) > 4) + *value = T(_mm_cvtt_roundsd_u64(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); + else + *value = _mm_cvtt_roundsd_u32(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + + if (*value == Tmax) { + // no double can have an exact value of quint64(-1), but they can + // quint32(-1), so we need to compare for that + if (TypeIsLarger || _mm_ucomieq_sd(mv, _mm_set_sd(Tmax))) + return false; + } + + // return true if it was an exact conversion + __m128d mi = _mm_setzero_pd(); + mi = sizeof(T) > 4 ? _mm_cvtu64_sd(mv, *value) : _mm_cvtu32_sd(mv, *value); + return _mm_ucomieq_sd(mv, mi); + } +# endif +#endif + double supremum; if (std::numeric_limits<T>::is_signed) { - supremum = -1.0 * std::numeric_limits<T>::min(); // -1 * (-2^63) = 2^63, exact (for T = qint64) - *value = std::numeric_limits<T>::min(); - if (v < std::numeric_limits<T>::min()) + supremum = -1.0 * Tmin; // -1 * (-2^63) = 2^63, exact (for T = qint64) + *value = Tmin; + if (v < Tmin) return false; } else { using ST = typename std::make_signed<T>::type; - supremum = -2.0 * std::numeric_limits<ST>::min(); // -2 * (-2^63) = 2^64, exact (for T = quint64) + supremum = -2.0 * (std::numeric_limits<ST>::min)(); // -2 * (-2^63) = 2^64, exact (for T = quint64) v = fabs(v); } - if (std::is_integral<T>::value && sizeof(T) > 4 && !allow_precision_upgrade) { - if (v > double(Q_INT64_C(1)<<53) || v < double(-((Q_INT64_C(1)<<53) + 1))) - return false; - } - *value = std::numeric_limits<T>::max(); + *value = Tmax; if (v >= supremum) return false; @@ -249,249 +285,187 @@ QT_WARNING_DISABLE_FLOAT_COMPARE QT_WARNING_POP } -// Overflow math. -// This provides efficient implementations for int, unsigned, qsizetype and -// size_t. Implementations for 8- and 16-bit types will work but may not be as -// efficient. Implementations for 64-bit may be missing on 32-bit platforms. - -#if ((defined(Q_CC_INTEL) ? (Q_CC_INTEL >= 1800 && !defined(Q_OS_WIN)) : defined(Q_CC_GNU)) \ - && Q_CC_GNU >= 500) || __has_builtin(__builtin_add_overflow) -// GCC 5, ICC 18, and Clang 3.8 have builtins to detect overflows -#define Q_INTRINSIC_MUL_OVERFLOW64 - -template <typename T> inline -typename std::enable_if<std::is_unsigned<T>::value || std::is_signed<T>::value, bool>::type -add_overflow(T v1, T v2, T *r) -{ return __builtin_add_overflow(v1, v2, r); } - -template <typename T> inline -typename std::enable_if<std::is_unsigned<T>::value || std::is_signed<T>::value, bool>::type -sub_overflow(T v1, T v2, T *r) -{ return __builtin_sub_overflow(v1, v2, r); } - -template <typename T> inline -typename std::enable_if<std::is_unsigned<T>::value || std::is_signed<T>::value, bool>::type -mul_overflow(T v1, T v2, T *r) -{ return __builtin_mul_overflow(v1, v2, r); } - -#else -// Generic implementations - -template <typename T> inline typename std::enable_if<std::is_unsigned<T>::value, bool>::type -add_overflow(T v1, T v2, T *r) +template <typename T> static +std::enable_if_t<std::is_floating_point_v<T> || std::is_same_v<T, qfloat16>, bool> +convertDoubleTo(double v, T *value, bool allow_precision_upgrade = true) { - // unsigned additions are well-defined - *r = v1 + v2; - return v1 > T(v1 + v2); -} - -template <typename T> inline typename std::enable_if<std::is_signed<T>::value, bool>::type -add_overflow(T v1, T v2, T *r) -{ - // Here's how we calculate the overflow: - // 1) unsigned addition is well-defined, so we can always execute it - // 2) conversion from unsigned back to signed is implementation- - // defined and in the implementations we use, it's a no-op. - // 3) signed integer overflow happens if the sign of the two input operands - // is the same but the sign of the result is different. In other words, - // the sign of the result must be the same as the sign of either - // operand. - - using U = typename std::make_unsigned<T>::type; - *r = T(U(v1) + U(v2)); - - // If int is two's complement, assume all integer types are too. - if (std::is_same<int32_t, int>::value) { - // Two's complement equivalent (generates slightly shorter code): - // x ^ y is negative if x and y have different signs - // x & y is negative if x and y are negative - // (x ^ z) & (y ^ z) is negative if x and z have different signs - // AND y and z have different signs - return ((v1 ^ *r) & (v2 ^ *r)) < 0; + Q_UNUSED(allow_precision_upgrade); + constexpr T Huge = std::numeric_limits<T>::infinity(); + + if constexpr (std::numeric_limits<double>::max_exponent <= + std::numeric_limits<T>::max_exponent) { + // no UB can happen + *value = T(v); + return true; } - bool s1 = (v1 < 0); - bool s2 = (v2 < 0); - bool sr = (*r < 0); - return s1 != sr && s2 != sr; - // also: return s1 == s2 && s1 != sr; -} +#if defined(__SSE2__) && (defined(Q_CC_GNU) || __has_extension(gnu_asm)) + // The x86 CVTSD2SH instruction from SSE2 does what we want: + // - converts out-of-range doubles to ±infinity and sets #O + // - converts underflows to zero and sets #U + // We need to clear any previously-stored exceptions from it before the + // operation (3-cycle cost) and obtain the new state afterwards (1 cycle). -template <typename T> inline typename std::enable_if<std::is_unsigned<T>::value, bool>::type -sub_overflow(T v1, T v2, T *r) -{ - // unsigned subtractions are well-defined - *r = v1 - v2; - return v1 < v2; -} + unsigned csr = _MM_MASK_MASK; // clear stored exception indicators + auto sse_check_result = [&](auto result) { + if ((csr & (_MM_EXCEPT_UNDERFLOW | _MM_EXCEPT_OVERFLOW)) == 0) + return true; + if (csr & _MM_EXCEPT_OVERFLOW) + return false; -template <typename T> inline typename std::enable_if<std::is_signed<T>::value, bool>::type -sub_overflow(T v1, T v2, T *r) -{ - // See above for explanation. This is the same with some signs reversed. - // We can't use add_overflow(v1, -v2, r) because it would be UB if - // v2 == std::numeric_limits<T>::min(). + // According to IEEE 754[1], #U is also set when the result is tiny and + // inexact, but still non-zero, so detect that (this won't generate + // good code for types without hardware support). + // [1] https://en.wikipedia.org/wiki/Floating-point_arithmetic#Exception_handling + return result != 0; + }; + + // Written directly in assembly because both Clang and GCC have been + // observed to reorder the STMXCSR instruction above the conversion + // operation. MSVC generates horrid code when using the intrinsics anyway, + // so it's not a loss. + // See https://github.com/llvm/llvm-project/issues/83661. + if constexpr (std::is_same_v<T, float>) { +# ifdef __AVX__ + asm ("vldmxcsr %[csr]\n\t" + "vcvtsd2ss %[in], %[in], %[out]\n\t" + "vstmxcsr %[csr]" + : [csr] "+m" (csr), [out] "=v" (*value) : [in] "v" (v)); +# else + asm ("ldmxcsr %[csr]\n\t" + "cvtsd2ss %[in], %[out]\n\t" + "stmxcsr %[csr]" + : [csr] "+m" (csr), [out] "=v" (*value) : [in] "v" (v)); +# endif + return sse_check_result(*value); + } + +# if defined(__F16C__) || defined(__AVX512FP16__) + if constexpr (sizeof(T) == 2 && std::numeric_limits<T>::max_exponent == 16) { + // qfloat16 or std::float16_t, but not std::bfloat16_t or std::bfloat8_t + auto doConvert = [&](auto *out) { + asm ("vldmxcsr %[csr]\n\t" +# ifdef __AVX512FP16__ + // AVX512FP16 & AVX10 have an instruction for this + "vcvtsd2sh %[in], %[in], %[out]\n\t" +# else + "vcvtsd2ss %[in], %[in], %[out]\n\t" // sets DEST[MAXVL-1:128] := 0 + "vcvtps2ph %[rc], %[out], %[out]\n\t" +# endif + "vstmxcsr %[csr]" + : [csr] "+m" (csr), [out] "=v" (*out) + : [in] "v" (v), [rc] "i" (_MM_FROUND_CUR_DIRECTION) + ); + return sse_check_result(out); + }; + + if constexpr (std::is_same_v<T, qfloat16> && !std::is_void_v<typename T::NativeType>) { + typename T::NativeType tmp; + bool b = doConvert(&tmp); + *value = tmp; + return b; + } else { +# ifndef Q_CC_CLANG + // Clang can only implement this if it has a native FP16 type + return doConvert(value); +# endif + } + } +# endif +#endif // __SSE2__ && inline assembly - using U = typename std::make_unsigned<T>::type; - *r = T(U(v1) - U(v2)); + if (!qt_is_finite(v) && std::numeric_limits<T>::has_infinity) { + // infinity (or NaN) + *value = T(v); + return true; + } - if (std::is_same<int32_t, int>::value) - return ((v1 ^ *r) & (~v2 ^ *r)) < 0; + // Check for in-range value to ensure the conversion is not UB (see the + // comment above for Standard language). + if (std::fabs(v) > (std::numeric_limits<T>::max)()) { + *value = v < 0 ? -Huge : Huge; + return false; + } - bool s1 = (v1 < 0); - bool s2 = !(v2 < 0); - bool sr = (*r < 0); - return s1 != sr && s2 != sr; - // also: return s1 == s2 && s1 != sr; + *value = T(v); + if (v != 0 && *value == 0) { + // Underflow through loss of precision + return false; + } + return true; } -template <typename T> inline -typename std::enable_if<std::is_unsigned<T>::value || std::is_signed<T>::value, bool>::type -mul_overflow(T v1, T v2, T *r) -{ - // use the next biggest type - // Note: for 64-bit systems where __int128 isn't supported, this will cause an error. - using LargerInt = QIntegerForSize<sizeof(T) * 2>; - using Larger = typename std::conditional<std::is_signed<T>::value, - typename LargerInt::Signed, typename LargerInt::Unsigned>::type; - Larger lr = Larger(v1) * Larger(v2); - *r = T(lr); - return lr > std::numeric_limits<T>::max() || lr < std::numeric_limits<T>::min(); -} +template <typename T> inline bool add_overflow(T v1, T v2, T *r) { return qAddOverflow(v1, v2, r); } +template <typename T> inline bool sub_overflow(T v1, T v2, T *r) { return qSubOverflow(v1, v2, r); } +template <typename T> inline bool mul_overflow(T v1, T v2, T *r) { return qMulOverflow(v1, v2, r); } -# if defined(Q_INTRINSIC_MUL_OVERFLOW64) -template <> inline bool mul_overflow(quint64 v1, quint64 v2, quint64 *r) -{ - *r = v1 * v2; - return Q_UMULH(v1, v2); -} -template <> inline bool mul_overflow(qint64 v1, qint64 v2, qint64 *r) +template <typename T, T V2> bool add_overflow(T v1, std::integral_constant<T, V2>, T *r) { - // This is slightly more complex than the unsigned case above: the sign bit - // of 'low' must be replicated as the entire 'high', so the only valid - // values for 'high' are 0 and -1. Use unsigned multiply since it's the same - // as signed for the low bits and use a signed right shift to verify that - // 'high' is nothing but sign bits that match the sign of 'low'. - - qint64 high = Q_SMULH(v1, v2); - *r = qint64(quint64(v1) * quint64(v2)); - return (*r >> 63) != high; + return qAddOverflow<T, V2>(v1, std::integral_constant<T, V2>{}, r); } -# if defined(Q_OS_INTEGRITY) && defined(Q_PROCESSOR_ARM_64) -template <> inline bool mul_overflow(uint64_t v1, uint64_t v2, uint64_t *r) +template <auto V2, typename T> bool add_overflow(T v1, T *r) { - return mul_overflow<quint64>(v1,v2,reinterpret_cast<quint64*>(r)); + return qAddOverflow<V2, T>(v1, r); } -template <> inline bool mul_overflow(int64_t v1, int64_t v2, int64_t *r) +template <typename T, T V2> bool sub_overflow(T v1, std::integral_constant<T, V2>, T *r) { - return mul_overflow<qint64>(v1,v2,reinterpret_cast<qint64*>(r)); + return qSubOverflow<T, V2>(v1, std::integral_constant<T, V2>{}, r); } -# endif // OS_INTEGRITY ARM64 -# endif // Q_INTRINSIC_MUL_OVERFLOW64 - -# if defined(Q_CC_MSVC) && defined(Q_PROCESSOR_X86) -// We can use intrinsics for the unsigned operations with MSVC -template <> inline bool add_overflow(unsigned v1, unsigned v2, unsigned *r) -{ return _addcarry_u32(0, v1, v2, r); } -// 32-bit mul_overflow is fine with the generic code above - -template <> inline bool add_overflow(quint64 v1, quint64 v2, quint64 *r) +template <auto V2, typename T> bool sub_overflow(T v1, T *r) { -# if defined(Q_PROCESSOR_X86_64) - return _addcarry_u64(0, v1, v2, reinterpret_cast<unsigned __int64 *>(r)); -# else - uint low, high; - uchar carry = _addcarry_u32(0, unsigned(v1), unsigned(v2), &low); - carry = _addcarry_u32(carry, v1 >> 32, v2 >> 32, &high); - *r = (quint64(high) << 32) | low; - return carry; -# endif // !x86-64 + return qSubOverflow<V2, T>(v1, r); } -# endif // MSVC X86 -#endif // !GCC -// Implementations for addition, subtraction or multiplication by a -// compile-time constant. For addition and subtraction, we simply call the code -// that detects overflow at runtime. For multiplication, we compare to the -// maximum possible values before multiplying to ensure no overflow happens. - -template <typename T, T V2> bool add_overflow(T v1, std::integral_constant<T, V2>, T *r) +template <typename T, T V2> bool mul_overflow(T v1, std::integral_constant<T, V2>, T *r) { - return add_overflow(v1, V2, r); + return qMulOverflow<T, V2>(v1, std::integral_constant<T, V2>{}, r); } -template <auto V2, typename T> bool add_overflow(T v1, T *r) +template <auto V2, typename T> bool mul_overflow(T v1, T *r) { - return add_overflow(v1, std::integral_constant<T, V2>{}, r); + return qMulOverflow<V2, T>(v1, r); } - -template <typename T, T V2> bool sub_overflow(T v1, std::integral_constant<T, V2>, T *r) -{ - return sub_overflow(v1, V2, r); } +#endif // Q_QDOC -template <auto V2, typename T> bool sub_overflow(T v1, T *r) -{ - return sub_overflow(v1, std::integral_constant<T, V2>{}, r); -} +/* + Safely narrows \a x to \c{To}. Let \c L be + \c{std::numeric_limit<To>::min()} and \c H be \c{std::numeric_limit<To>::max()}. -template <typename T, T V2> bool mul_overflow(T v1, std::integral_constant<T, V2>, T *r) + If \a x is less than L, returns L. If \a x is greater than H, + returns H. Otherwise, returns \c{To(x)}. +*/ +template <typename To, typename From> +static constexpr auto qt_saturate(From x) { - // Runtime detection for anything smaller than or equal to a register - // width, as most architectures' multiplication instructions actually - // produce a result twice as wide as the input registers, allowing us to - // efficiently detect the overflow. - if constexpr (sizeof(T) <= sizeof(qregisteruint)) { - return mul_overflow(v1, V2, r); - -#ifdef Q_INTRINSIC_MUL_OVERFLOW64 - } else if constexpr (sizeof(T) <= sizeof(quint64)) { - // If we have intrinsics detecting overflow of 64-bit multiplications, - // then detect overflows through them up to 64 bits. - return mul_overflow(v1, V2, r); -#endif - - } else if constexpr (V2 == 0 || V2 == 1) { - // trivial cases (and simplify logic below due to division by zero) - *r = v1 * V2; - return false; - } else if constexpr (V2 == -1) { - // multiplication by -1 is valid *except* for signed minimum values - // (necessary to avoid diving min() by -1, which is an overflow) - if (v1 < 0 && v1 == std::numeric_limits<T>::min()) - return true; - *r = -v1; - return false; + static_assert(std::is_integral_v<To>); + static_assert(std::is_integral_v<From>); + + [[maybe_unused]] + constexpr auto Lo = (std::numeric_limits<To>::min)(); + constexpr auto Hi = (std::numeric_limits<To>::max)(); + + if constexpr (std::is_signed_v<From> == std::is_signed_v<To>) { + // same signedness, we can accept regular integer conversion rules + return x < Lo ? Lo : + x > Hi ? Hi : + /*else*/ To(x); } else { - // For 64-bit multiplications on 32-bit platforms, let's instead compare v1 - // against the bounds that would overflow. - constexpr T Highest = std::numeric_limits<T>::max() / V2; - constexpr T Lowest = std::numeric_limits<T>::min() / V2; - if constexpr (Highest > Lowest) { - if (v1 > Highest || v1 < Lowest) - return true; - } else { - // this can only happen if V2 < 0 - static_assert(V2 < 0); - if (v1 > Lowest || v1 < Highest) - return true; + if constexpr (std::is_signed_v<From>) { // ie. !is_signed_v<To> + if (x < From{0}) + return To{0}; } - *r = v1 * V2; - return false; + // from here on, x >= 0 + using FromU = std::make_unsigned_t<From>; + using ToU = std::make_unsigned_t<To>; + return FromU(x) > ToU(Hi) ? Hi : To(x); // assumes Hi >= 0 } } -template <auto V2, typename T> bool mul_overflow(T v1, T *r) -{ - return mul_overflow(v1, std::integral_constant<T, V2>{}, r); -} -} -#endif // Q_CLANG_QDOC - QT_END_NAMESPACE #endif // QNUMERIC_P_H |