diff options
Diffstat (limited to 'src/corelib/global/qfloat16.cpp')
-rw-r--r-- | src/corelib/global/qfloat16.cpp | 294 |
1 files changed, 213 insertions, 81 deletions
diff --git a/src/corelib/global/qfloat16.cpp b/src/corelib/global/qfloat16.cpp index c97331748b..f6f782e364 100644 --- a/src/corelib/global/qfloat16.cpp +++ b/src/corelib/global/qfloat16.cpp @@ -1,49 +1,20 @@ -/**************************************************************************** -** -** Copyright (C) 2019 The Qt Company Ltd. -** Copyright (C) 2016 by Southwest Research Institute (R) -** Contact: http://www.qt-project.org/legal -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2020 The Qt Company Ltd. +// Copyright (C) 2016 by Southwest Research Institute (R) +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only #include "qfloat16.h" #include "private/qsimd_p.h" #include <cmath> // for fpclassify()'s return values +#include <QtCore/qdatastream.h> +#include <QtCore/qmetatype.h> +#include <QtCore/qtextstream.h> + +QT_DECL_METATYPE_EXTERN(qfloat16, Q_CORE_EXPORT) QT_BEGIN_NAMESPACE +QT_IMPL_METATYPE_EXTERN(qfloat16) + /*! \class qfloat16 \keyword 16-bit Floating Point Support @@ -52,6 +23,15 @@ QT_BEGIN_NAMESPACE \inheaderfile QFloat16 \brief Provides 16-bit floating point support. + \compares partial + \compareswith partial float double {long double} qint8 quint8 qint16 quint16 \ + qint32 quint32 long {unsigned long} qint64 quint64 + \endcompareswith + \compareswith partial qint128 quint128 + Comparison with 128-bit integral types is only supported if Qt provides + these types. + \endcompareswith + The \c qfloat16 class provides support for half-precision (16-bit) floating point data. It is fully compliant with IEEE 754 as a storage type. This implies that any arithmetic operation on a \c qfloat16 instance results in @@ -73,43 +53,34 @@ QT_BEGIN_NAMESPACE */ /*! - \macro QT_NO_FLOAT16_OPERATORS - \relates qfloat16 - \since 5.12.4 - - Defining this macro disables the arithmetic operators for qfloat16. + \fn qfloat16::qfloat16(Qt::Initialization) + \since 6.1 - This is only necessary on Visual Studio 2017 (and earlier) when including - \c {<QFloat16>} and \c{<bitset>} in the same translation unit, which would - otherwise cause a compilation error due to a toolchain bug (see - [QTBUG-72073]). + Constructs a qfloat16 without initializing the value. */ /*! \fn bool qIsInf(qfloat16 f) \relates qfloat16 + \overload qIsInf(float) Returns true if the \c qfloat16 \a {f} is equivalent to infinity. - - \sa qIsInf */ /*! \fn bool qIsNaN(qfloat16 f) \relates qfloat16 + \overload qIsNaN(float) Returns true if the \c qfloat16 \a {f} is not a number (NaN). - - \sa qIsNaN */ /*! \fn bool qIsFinite(qfloat16 f) \relates qfloat16 + \overload qIsFinite(float) Returns true if the \c qfloat16 \a {f} is a finite number. - - \sa qIsFinite */ /*! @@ -118,8 +89,6 @@ QT_BEGIN_NAMESPACE \fn bool qfloat16::isInf() const noexcept Tests whether this \c qfloat16 value is an infinity. - - \sa qIsInf() */ /*! @@ -128,8 +97,6 @@ QT_BEGIN_NAMESPACE \fn bool qfloat16::isNaN() const noexcept Tests whether this \c qfloat16 value is "not a number". - - \sa qIsNaN() */ /*! @@ -147,8 +114,6 @@ QT_BEGIN_NAMESPACE \fn bool qfloat16::isFinite() const noexcept Tests whether this \c qfloat16 value is finite. - - \sa qIsFinite() */ /*! @@ -160,11 +125,18 @@ QT_BEGIN_NAMESPACE */ /*! + \fn int qFpClassify(qfloat16 val) + \relates qfloat16 + \since 5.14 + \overload qFpClassify(float) + + Returns the floating-point class of \a val. +*/ + +/*! \internal \since 5.14 Implements qFpClassify() for qfloat16. - - \sa qFpClassify() */ int qfloat16::fpClassify() const noexcept { @@ -174,22 +146,21 @@ int qfloat16::fpClassify() const noexcept /*! \fn int qRound(qfloat16 value) \relates qfloat16 + \overload qRound(float) Rounds \a value to the nearest integer. - - \sa qRound */ /*! \fn qint64 qRound64(qfloat16 value) \relates qfloat16 + \overload qRound64(float) Rounds \a value to the nearest 64-bit integer. - - \sa qRound64 */ /*! \fn bool qFuzzyCompare(qfloat16 p1, qfloat16 p2) \relates qfloat16 + \overload qFuzzyCompare(float, float) Compares the floating point value \a p1 and \a p2 and returns \c true if they are considered equal, otherwise \c false. @@ -198,25 +169,128 @@ int qfloat16::fpClassify() const noexcept exactness is stronger the smaller the numbers are. */ -#if QT_COMPILER_SUPPORTS(F16C) +#if QT_COMPILER_SUPPORTS_HERE(F16C) static inline bool hasFastF16() { - // All processors with F16C also support AVX, but YMM registers - // might not be supported by the OS, or they might be disabled. - return qCpuHasFeature(F16C) && qCpuHasFeature(AVX); + // qsimd.cpp:detectProcessorFeatures() turns off this feature if AVX + // state-saving is not enabled by the OS + return qCpuHasFeature(F16C); } -extern "C" { -#ifdef QFLOAT16_INCLUDE_FAST -# define f16cextern static -#else -# define f16cextern extern +#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW) +static bool hasFastF16Avx256() +{ + // 256-bit AVX512 don't have a performance penalty (see qstring.cpp for more info) + return qCpuHasFeature(ArchSkylakeAvx512); +} + +static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512) +void qFloatToFloat16_tail_avx256(quint16 *out, const float *in, qsizetype len) noexcept +{ + __mmask16 mask = _bzhi_u32(-1, len); + __m256 f32 = _mm256_maskz_loadu_ps(mask, in ); + __m128i f16 = _mm256_maskz_cvtps_ph(mask, f32, _MM_FROUND_TO_NEAREST_INT); + _mm_mask_storeu_epi16(out, mask, f16); +}; + +static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512) +void qFloatFromFloat16_tail_avx256(float *out, const quint16 *in, qsizetype len) noexcept +{ + __mmask16 mask = _bzhi_u32(-1, len); + __m128i f16 = _mm_maskz_loadu_epi16(mask, in); + __m256 f32 = _mm256_cvtph_ps(f16); + _mm256_mask_storeu_ps(out, mask, f32); +}; #endif -f16cextern void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) noexcept; -f16cextern void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) noexcept; +QT_FUNCTION_TARGET(F16C) +static void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) noexcept +{ + constexpr qsizetype Step = sizeof(__m256i) / sizeof(float); + constexpr qsizetype HalfStep = sizeof(__m128i) / sizeof(float); + qsizetype i = 0; + + if (len >= Step) { + auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) { + __m256 f32 = _mm256_loadu_ps(in + offset); + __m128i f16 = _mm256_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT); + _mm_storeu_si128(reinterpret_cast<__m128i *>(out + offset), f16); + }; + + // main loop: convert Step (8) floats per iteration + for ( ; i + Step < len; i += Step) + convertOneChunk(i); + + // epilogue: convert the last chunk, possibly overlapping with the last + // iteration of the loop + return convertOneChunk(len - Step); + } + +#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW) + if (hasFastF16Avx256()) + return qFloatToFloat16_tail_avx256(out, in, len); +#endif -#undef f16cextern + if (len >= HalfStep) { + auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) { + __m128 f32 = _mm_loadu_ps(in + offset); + __m128i f16 = _mm_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT); + _mm_storel_epi64(reinterpret_cast<__m128i *>(out + offset), f16); + }; + + // two conversions, possibly overlapping + convertOneChunk(0); + return convertOneChunk(len - HalfStep); + } + + // Inlining "qfloat16::qfloat16(float f)": + for ( ; i < len; ++i) + out[i] = _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(in[i]), 0), 0); +} + +QT_FUNCTION_TARGET(F16C) +static void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) noexcept +{ + constexpr qsizetype Step = sizeof(__m256i) / sizeof(float); + constexpr qsizetype HalfStep = sizeof(__m128i) / sizeof(float); + qsizetype i = 0; + + if (len >= Step) { + auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) { + __m128i f16 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(in + offset)); + __m256 f32 = _mm256_cvtph_ps(f16); + _mm256_storeu_ps(out + offset, f32); + }; + + // main loop: convert Step (8) floats per iteration + for ( ; i + Step < len; i += Step) + convertOneChunk(i); + + // epilogue: convert the last chunk, possibly overlapping with the last + // iteration of the loop + return convertOneChunk(len - Step); + } + +#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW) + if (hasFastF16Avx256()) + return qFloatFromFloat16_tail_avx256(out, in, len); +#endif + + if (len >= HalfStep) { + auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) { + __m128i f16 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(in + offset)); + __m128 f32 = _mm_cvtph_ps(f16); + _mm_storeu_ps(out + offset, f32); + }; + + // two conversions, possibly overlapping + convertOneChunk(0); + return convertOneChunk(len - HalfStep); + } + + // Inlining "qfloat16::operator float()": + for ( ; i < len; ++i) + out[i] = _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(in[i]))); } #elif defined(__ARM_FP16_FORMAT_IEEE) && defined(__ARM_NEON__) && (__ARM_FP & 2) @@ -298,9 +372,67 @@ Q_CORE_EXPORT void qFloatFromFloat16(float *out, const qfloat16 *in, qsizetype l out[i] = float(in[i]); } +/*! + \fn size_t qfloat16::qHash(qfloat16 key, size_t seed) + \since 6.5.3 + + Returns the hash value for the \a key, using \a seed to seed the + calculation. + + \note In Qt versions before 6.5, this operation was provided by the + qHash(float) overload. In Qt versions 6.5.0 to 6.5.2, this functionality + was broken in various ways. In Qt versions 6.5.3 and 6.6 onwards, this + overload restores the Qt 6.4 behavior. +*/ + +#ifndef QT_NO_DATASTREAM +/*! + \fn qfloat16::operator<<(QDataStream &ds, qfloat16 f) + \relates QDataStream + \since 5.9 + + Writes a floating point number, \a f, to the stream \a ds using + the standard IEEE 754 format. Returns a reference to the stream. + + \note In Qt versions prior to 6.3, this was a member function on + QDataStream. +*/ +QDataStream &operator<<(QDataStream &ds, qfloat16 f) +{ + return ds << f.b16; +} + +/*! + \fn qfloat16::operator>>(QDataStream &ds, qfloat16 &f) + \relates QDataStream + \since 5.9 + + Reads a floating point number from the stream \a ds into \a f, + using the standard IEEE 754 format. Returns a reference to the + stream. + + \note In Qt versions prior to 6.3, this was a member function on + QDataStream. +*/ +QDataStream &operator>>(QDataStream &ds, qfloat16 &f) +{ + return ds >> f.b16; +} +#endif + +QTextStream &operator>>(QTextStream &ts, qfloat16 &f16) +{ + float f; + ts >> f; + f16 = qfloat16(f); + return ts; +} + +QTextStream &operator<<(QTextStream &ts, qfloat16 f) +{ + return ts << float(f); +} + QT_END_NAMESPACE #include "qfloat16tables.cpp" -#ifdef QFLOAT16_INCLUDE_FAST -# include "qfloat16_f16c.c" -#endif |