summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2021-11-27 21:35:31 -0800
committerThiago Macieira <thiago.macieira@intel.com>2022-01-19 20:57:51 -0800
commitdf8456061ef0d57ea6be37746951c50f38a65101 (patch)
tree4cdc004c06266e58a88a1419db7bf247662dbcde /src/corelib
parent69731bec5796beb53b5ab00388c7c21c6a01d822 (diff)
convertDoubleTo: add an x86-64 intrinsics version
The UB that the C and C++ standards talk about do not apply if we use intrinsics. We can rely on the processors' architectural behavior instead. There are two ways to detect a conversion that cannot be represented in the result. One would be to check if the #IE bit got set in the MXCSR, but in order to do that we'd need two issue an STMXCSR+LDMCXSR pair to clear the bit first and then another STMXCSR at the end to see if it got set. Those instructions are 4 uops long and necessarily target memory, so that's a bit slow. This commit implements the second way, which is to check if the result of the conversion is the "undefined" value. Unfortunately, that value is a valid, precise value that double can hold for all data types except unsigned 64-bit, so we need to recheck if that was the actual value stored in the original double. This implementation targets 64-bit exclusively because that avoids having to deal with the 64-bit intrinsics not even being defined in 32- bit code (converting a double to 64-bit integer in 32-bit is messy). The unsigned implementation is only implemented with AVX512F because of the unsigned conversion instructions that were introduced then. Change-Id: I89446ea06b5742efb194fffd16bb9f04b2014bab Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/global/qnumeric_p.h84
1 files changed, 79 insertions, 5 deletions
diff --git a/src/corelib/global/qnumeric_p.h b/src/corelib/global/qnumeric_p.h
index 4fa817077e..b7af847673 100644
--- a/src/corelib/global/qnumeric_p.h
+++ b/src/corelib/global/qnumeric_p.h
@@ -1,7 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2020 The Qt Company Ltd.
-** Copyright (C) 2020 Intel Corporation.
+** Copyright (C) 2021 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -54,6 +54,7 @@
#include "QtCore/private/qglobal_p.h"
#include "QtCore/qnumeric.h"
+#include "QtCore/qsimd.h"
#include <cmath>
#include <limits>
#include <type_traits>
@@ -202,6 +203,8 @@ static inline bool convertDoubleTo(double v, T *value, bool allow_precision_upgr
return false;
}
+ constexpr T Tmin = std::numeric_limits<T>::min();
+ constexpr T Tmax = std::numeric_limits<T>::max();
// The [conv.fpint] (7.10 Floating-integral conversions) section of the C++
// standard says only exact conversions are guaranteed. Converting
@@ -213,11 +216,82 @@ static inline bool convertDoubleTo(double v, T *value, bool allow_precision_upgr
// correct, but Clang, ICC and MSVC don't realize that it's a constant and
// the math call stays in the compiled code.
+#ifdef Q_PROCESSOR_X86_64
+ // Of course, UB doesn't apply if we use intrinsics, in which case we are
+ // allowed to dpeend on exactly the processor's behavior. This
+ // implementation uses the truncating conversions from Scalar Double to
+ // integral types (CVTTSD2SI and VCVTTSD2USI), which is documented to
+ // return the "indefinite integer value" if the range of the target type is
+ // exceeded. (only implemented for x86-64 to avoid having to deal with the
+ // non-existence of the 64-bit intrinsics on i386)
+
+ if (std::numeric_limits<T>::is_signed) {
+ __m128d mv = _mm_set_sd(v);
+# ifdef __AVX512F__
+ // use explicit round control and suppress exceptions
+ if (sizeof(T) > 4)
+ *value = T(_mm_cvtt_roundsd_i64(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
+ else
+ *value = _mm_cvtt_roundsd_i32(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+# else
+ *value = sizeof(T) > 4 ? T(_mm_cvttsd_si64(mv)) : _mm_cvttsd_si32(mv);
+# endif
+
+ // if *value is the "indefinite integer value", check if the original
+ // variable \a v is the same value (Tmin is an exact representation)
+ if (*value == Tmin && !_mm_ucomieq_sd(mv, _mm_set_sd(Tmin))) {
+ // v != Tmin, so it was out of range
+ if (v > 0)
+ *value = Tmax;
+ return false;
+ }
+
+ // convert the integer back to double and compare for equality with v,
+ // to determine if we've lost any precision
+ __m128d mi = _mm_setzero_pd();
+ mi = sizeof(T) > 4 ? _mm_cvtsi64_sd(mv, *value) : _mm_cvtsi32_sd(mv, *value);
+ return _mm_ucomieq_sd(mv, mi);
+ }
+
+# ifdef __AVX512F__
+ if (!std::numeric_limits<T>::is_signed) {
+ // Same thing as above, but this function operates on absolute values
+ // and the "indefinite integer value" for the 64-bit unsigned
+ // conversion (Tmax) is not representable in double, so it can never be
+ // the result of an in-range conversion. This is implemented for AVX512
+ // and later because of the unsigned conversion instruction. Converting
+ // to unsigned without losing an extra bit of precision prior to AVX512
+ // is left to the compiler below.
+
+ v = fabs(v);
+ __m128d mv = _mm_set_sd(v);
+
+ // use explicit round control and suppress exceptions
+ if (sizeof(T) > 4)
+ *value = T(_mm_cvtt_roundsd_u64(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
+ else
+ *value = _mm_cvtt_roundsd_u32(mv, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+
+ if (*value == Tmax) {
+ // no double can have an exact value of quint64(-1), but they can
+ // quint32(-1), so we need to compare for that
+ if (TypeIsLarger || _mm_ucomieq_sd(mv, _mm_set_sd(Tmax)))
+ return false;
+ }
+
+ // return true if it was an exact conversion
+ __m128d mi = _mm_setzero_pd();
+ mi = sizeof(T) > 4 ? _mm_cvtu64_sd(mv, *value) : _mm_cvtu32_sd(mv, *value);
+ return _mm_ucomieq_sd(mv, mi);
+ }
+# endif
+#endif
+
double supremum;
if (std::numeric_limits<T>::is_signed) {
- supremum = -1.0 * std::numeric_limits<T>::min(); // -1 * (-2^63) = 2^63, exact (for T = qint64)
- *value = std::numeric_limits<T>::min();
- if (v < std::numeric_limits<T>::min())
+ supremum = -1.0 * Tmin; // -1 * (-2^63) = 2^63, exact (for T = qint64)
+ *value = Tmin;
+ if (v < Tmin)
return false;
} else {
using ST = typename std::make_signed<T>::type;
@@ -225,7 +299,7 @@ static inline bool convertDoubleTo(double v, T *value, bool allow_precision_upgr
v = fabs(v);
}
- *value = std::numeric_limits<T>::max();
+ *value = Tmax;
if (v >= supremum)
return false;