summaryrefslogtreecommitdiffstats
path: root/src/corelib/global/qfloat16.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/global/qfloat16.cpp')
-rw-r--r--src/corelib/global/qfloat16.cpp294
1 files changed, 213 insertions, 81 deletions
diff --git a/src/corelib/global/qfloat16.cpp b/src/corelib/global/qfloat16.cpp
index c97331748b..f6f782e364 100644
--- a/src/corelib/global/qfloat16.cpp
+++ b/src/corelib/global/qfloat16.cpp
@@ -1,49 +1,20 @@
-/****************************************************************************
-**
-** Copyright (C) 2019 The Qt Company Ltd.
-** Copyright (C) 2016 by Southwest Research Institute (R)
-** Contact: http://www.qt-project.org/legal
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2020 The Qt Company Ltd.
+// Copyright (C) 2016 by Southwest Research Institute (R)
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
#include "qfloat16.h"
#include "private/qsimd_p.h"
#include <cmath> // for fpclassify()'s return values
+#include <QtCore/qdatastream.h>
+#include <QtCore/qmetatype.h>
+#include <QtCore/qtextstream.h>
+
+QT_DECL_METATYPE_EXTERN(qfloat16, Q_CORE_EXPORT)
QT_BEGIN_NAMESPACE
+QT_IMPL_METATYPE_EXTERN(qfloat16)
+
/*!
\class qfloat16
\keyword 16-bit Floating Point Support
@@ -52,6 +23,15 @@ QT_BEGIN_NAMESPACE
\inheaderfile QFloat16
\brief Provides 16-bit floating point support.
+ \compares partial
+ \compareswith partial float double {long double} qint8 quint8 qint16 quint16 \
+ qint32 quint32 long {unsigned long} qint64 quint64
+ \endcompareswith
+ \compareswith partial qint128 quint128
+ Comparison with 128-bit integral types is only supported if Qt provides
+ these types.
+ \endcompareswith
+
The \c qfloat16 class provides support for half-precision (16-bit) floating
point data. It is fully compliant with IEEE 754 as a storage type. This
implies that any arithmetic operation on a \c qfloat16 instance results in
@@ -73,43 +53,34 @@ QT_BEGIN_NAMESPACE
*/
/*!
- \macro QT_NO_FLOAT16_OPERATORS
- \relates qfloat16
- \since 5.12.4
-
- Defining this macro disables the arithmetic operators for qfloat16.
+ \fn qfloat16::qfloat16(Qt::Initialization)
+ \since 6.1
- This is only necessary on Visual Studio 2017 (and earlier) when including
- \c {<QFloat16>} and \c{<bitset>} in the same translation unit, which would
- otherwise cause a compilation error due to a toolchain bug (see
- [QTBUG-72073]).
+ Constructs a qfloat16 without initializing the value.
*/
/*!
\fn bool qIsInf(qfloat16 f)
\relates qfloat16
+ \overload qIsInf(float)
Returns true if the \c qfloat16 \a {f} is equivalent to infinity.
-
- \sa qIsInf
*/
/*!
\fn bool qIsNaN(qfloat16 f)
\relates qfloat16
+ \overload qIsNaN(float)
Returns true if the \c qfloat16 \a {f} is not a number (NaN).
-
- \sa qIsNaN
*/
/*!
\fn bool qIsFinite(qfloat16 f)
\relates qfloat16
+ \overload qIsFinite(float)
Returns true if the \c qfloat16 \a {f} is a finite number.
-
- \sa qIsFinite
*/
/*!
@@ -118,8 +89,6 @@ QT_BEGIN_NAMESPACE
\fn bool qfloat16::isInf() const noexcept
Tests whether this \c qfloat16 value is an infinity.
-
- \sa qIsInf()
*/
/*!
@@ -128,8 +97,6 @@ QT_BEGIN_NAMESPACE
\fn bool qfloat16::isNaN() const noexcept
Tests whether this \c qfloat16 value is "not a number".
-
- \sa qIsNaN()
*/
/*!
@@ -147,8 +114,6 @@ QT_BEGIN_NAMESPACE
\fn bool qfloat16::isFinite() const noexcept
Tests whether this \c qfloat16 value is finite.
-
- \sa qIsFinite()
*/
/*!
@@ -160,11 +125,18 @@ QT_BEGIN_NAMESPACE
*/
/*!
+ \fn int qFpClassify(qfloat16 val)
+ \relates qfloat16
+ \since 5.14
+ \overload qFpClassify(float)
+
+ Returns the floating-point class of \a val.
+*/
+
+/*!
\internal
\since 5.14
Implements qFpClassify() for qfloat16.
-
- \sa qFpClassify()
*/
int qfloat16::fpClassify() const noexcept
{
@@ -174,22 +146,21 @@ int qfloat16::fpClassify() const noexcept
/*! \fn int qRound(qfloat16 value)
\relates qfloat16
+ \overload qRound(float)
Rounds \a value to the nearest integer.
-
- \sa qRound
*/
/*! \fn qint64 qRound64(qfloat16 value)
\relates qfloat16
+ \overload qRound64(float)
Rounds \a value to the nearest 64-bit integer.
-
- \sa qRound64
*/
/*! \fn bool qFuzzyCompare(qfloat16 p1, qfloat16 p2)
\relates qfloat16
+ \overload qFuzzyCompare(float, float)
Compares the floating point value \a p1 and \a p2 and
returns \c true if they are considered equal, otherwise \c false.
@@ -198,25 +169,128 @@ int qfloat16::fpClassify() const noexcept
exactness is stronger the smaller the numbers are.
*/
-#if QT_COMPILER_SUPPORTS(F16C)
+#if QT_COMPILER_SUPPORTS_HERE(F16C)
static inline bool hasFastF16()
{
- // All processors with F16C also support AVX, but YMM registers
- // might not be supported by the OS, or they might be disabled.
- return qCpuHasFeature(F16C) && qCpuHasFeature(AVX);
+ // qsimd.cpp:detectProcessorFeatures() turns off this feature if AVX
+ // state-saving is not enabled by the OS
+ return qCpuHasFeature(F16C);
}
-extern "C" {
-#ifdef QFLOAT16_INCLUDE_FAST
-# define f16cextern static
-#else
-# define f16cextern extern
+#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
+static bool hasFastF16Avx256()
+{
+ // 256-bit AVX512 don't have a performance penalty (see qstring.cpp for more info)
+ return qCpuHasFeature(ArchSkylakeAvx512);
+}
+
+static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512)
+void qFloatToFloat16_tail_avx256(quint16 *out, const float *in, qsizetype len) noexcept
+{
+ __mmask16 mask = _bzhi_u32(-1, len);
+ __m256 f32 = _mm256_maskz_loadu_ps(mask, in );
+ __m128i f16 = _mm256_maskz_cvtps_ph(mask, f32, _MM_FROUND_TO_NEAREST_INT);
+ _mm_mask_storeu_epi16(out, mask, f16);
+};
+
+static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512)
+void qFloatFromFloat16_tail_avx256(float *out, const quint16 *in, qsizetype len) noexcept
+{
+ __mmask16 mask = _bzhi_u32(-1, len);
+ __m128i f16 = _mm_maskz_loadu_epi16(mask, in);
+ __m256 f32 = _mm256_cvtph_ps(f16);
+ _mm256_mask_storeu_ps(out, mask, f32);
+};
#endif
-f16cextern void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) noexcept;
-f16cextern void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) noexcept;
+QT_FUNCTION_TARGET(F16C)
+static void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) noexcept
+{
+ constexpr qsizetype Step = sizeof(__m256i) / sizeof(float);
+ constexpr qsizetype HalfStep = sizeof(__m128i) / sizeof(float);
+ qsizetype i = 0;
+
+ if (len >= Step) {
+ auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
+ __m256 f32 = _mm256_loadu_ps(in + offset);
+ __m128i f16 = _mm256_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT);
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(out + offset), f16);
+ };
+
+ // main loop: convert Step (8) floats per iteration
+ for ( ; i + Step < len; i += Step)
+ convertOneChunk(i);
+
+ // epilogue: convert the last chunk, possibly overlapping with the last
+ // iteration of the loop
+ return convertOneChunk(len - Step);
+ }
+
+#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
+ if (hasFastF16Avx256())
+ return qFloatToFloat16_tail_avx256(out, in, len);
+#endif
-#undef f16cextern
+ if (len >= HalfStep) {
+ auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
+ __m128 f32 = _mm_loadu_ps(in + offset);
+ __m128i f16 = _mm_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT);
+ _mm_storel_epi64(reinterpret_cast<__m128i *>(out + offset), f16);
+ };
+
+ // two conversions, possibly overlapping
+ convertOneChunk(0);
+ return convertOneChunk(len - HalfStep);
+ }
+
+ // Inlining "qfloat16::qfloat16(float f)":
+ for ( ; i < len; ++i)
+ out[i] = _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(in[i]), 0), 0);
+}
+
+QT_FUNCTION_TARGET(F16C)
+static void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) noexcept
+{
+ constexpr qsizetype Step = sizeof(__m256i) / sizeof(float);
+ constexpr qsizetype HalfStep = sizeof(__m128i) / sizeof(float);
+ qsizetype i = 0;
+
+ if (len >= Step) {
+ auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
+ __m128i f16 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(in + offset));
+ __m256 f32 = _mm256_cvtph_ps(f16);
+ _mm256_storeu_ps(out + offset, f32);
+ };
+
+ // main loop: convert Step (8) floats per iteration
+ for ( ; i + Step < len; i += Step)
+ convertOneChunk(i);
+
+ // epilogue: convert the last chunk, possibly overlapping with the last
+ // iteration of the loop
+ return convertOneChunk(len - Step);
+ }
+
+#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
+ if (hasFastF16Avx256())
+ return qFloatFromFloat16_tail_avx256(out, in, len);
+#endif
+
+ if (len >= HalfStep) {
+ auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
+ __m128i f16 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(in + offset));
+ __m128 f32 = _mm_cvtph_ps(f16);
+ _mm_storeu_ps(out + offset, f32);
+ };
+
+ // two conversions, possibly overlapping
+ convertOneChunk(0);
+ return convertOneChunk(len - HalfStep);
+ }
+
+ // Inlining "qfloat16::operator float()":
+ for ( ; i < len; ++i)
+ out[i] = _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(in[i])));
}
#elif defined(__ARM_FP16_FORMAT_IEEE) && defined(__ARM_NEON__) && (__ARM_FP & 2)
@@ -298,9 +372,67 @@ Q_CORE_EXPORT void qFloatFromFloat16(float *out, const qfloat16 *in, qsizetype l
out[i] = float(in[i]);
}
+/*!
+ \fn size_t qfloat16::qHash(qfloat16 key, size_t seed)
+ \since 6.5.3
+
+ Returns the hash value for the \a key, using \a seed to seed the
+ calculation.
+
+ \note In Qt versions before 6.5, this operation was provided by the
+ qHash(float) overload. In Qt versions 6.5.0 to 6.5.2, this functionality
+ was broken in various ways. In Qt versions 6.5.3 and 6.6 onwards, this
+ overload restores the Qt 6.4 behavior.
+*/
+
+#ifndef QT_NO_DATASTREAM
+/*!
+ \fn qfloat16::operator<<(QDataStream &ds, qfloat16 f)
+ \relates QDataStream
+ \since 5.9
+
+ Writes a floating point number, \a f, to the stream \a ds using
+ the standard IEEE 754 format. Returns a reference to the stream.
+
+ \note In Qt versions prior to 6.3, this was a member function on
+ QDataStream.
+*/
+QDataStream &operator<<(QDataStream &ds, qfloat16 f)
+{
+ return ds << f.b16;
+}
+
+/*!
+ \fn qfloat16::operator>>(QDataStream &ds, qfloat16 &f)
+ \relates QDataStream
+ \since 5.9
+
+ Reads a floating point number from the stream \a ds into \a f,
+ using the standard IEEE 754 format. Returns a reference to the
+ stream.
+
+ \note In Qt versions prior to 6.3, this was a member function on
+ QDataStream.
+*/
+QDataStream &operator>>(QDataStream &ds, qfloat16 &f)
+{
+ return ds >> f.b16;
+}
+#endif
+
+QTextStream &operator>>(QTextStream &ts, qfloat16 &f16)
+{
+ float f;
+ ts >> f;
+ f16 = qfloat16(f);
+ return ts;
+}
+
+QTextStream &operator<<(QTextStream &ts, qfloat16 f)
+{
+ return ts << float(f);
+}
+
QT_END_NAMESPACE
#include "qfloat16tables.cpp"
-#ifdef QFLOAT16_INCLUDE_FAST
-# include "qfloat16_f16c.c"
-#endif