summaryrefslogtreecommitdiffstats
path: root/src/corelib/global/qfloat16.h
blob: a25fac28862e26e9d55a080421553135ff0dcc6e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
/****************************************************************************
**
** Copyright (C) 2020 The Qt Company Ltd.
** Copyright (C) 2016 by Southwest Research Institute (R)
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtCore module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 3 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL3 included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 3 requirements
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 2.0 or (at your option) the GNU General
** Public license version 3 or any later version approved by the KDE Free
** Qt Foundation. The licenses are as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
** https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/

#ifndef QFLOAT16_H
#define QFLOAT16_H

#include <QtCore/qglobal.h>
#include <QtCore/qmetatype.h>
#include <QtCore/qnamespace.h>
#include <limits>
#include <string.h>

#if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__AVX2__) && !defined(__F16C__)
// All processors that support AVX2 do support F16C too. That doesn't mean
// we're allowed to use the intrinsics directly, so we'll do it only for
// the Intel and Microsoft's compilers.
#  if defined(Q_CC_INTEL) || defined(Q_CC_MSVC)
#    define __F16C__        1
# endif
#endif

#if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__)
#include <immintrin.h>
#endif

QT_BEGIN_NAMESPACE

#if 0
#pragma qt_class(QFloat16)
#pragma qt_no_master_include
#endif

class qfloat16
{
    struct Wrap
    {
        // To let our private constructor work, without other code seeing
        // ambiguity when constructing from int, double &c.
        quint16 b16;
        constexpr inline explicit Wrap(int value) : b16(quint16(value)) {}
    };
public:
    constexpr inline qfloat16() noexcept : b16(0) {}
    explicit qfloat16(Qt::Initialization) noexcept { }
    inline qfloat16(float f) noexcept;
    inline operator float() const noexcept;

    // Support for qIs{Inf,NaN,Finite}:
    bool isInf() const noexcept { return (b16 & 0x7fff) == 0x7c00; }
    bool isNaN() const noexcept { return (b16 & 0x7fff) > 0x7c00; }
    bool isFinite() const noexcept { return (b16 & 0x7fff) < 0x7c00; }
    Q_CORE_EXPORT int fpClassify() const noexcept;
    // Can't specialize std::copysign() for qfloat16
    qfloat16 copySign(qfloat16 sign) const noexcept
    { return qfloat16(Wrap((sign.b16 & 0x8000) | (b16 & 0x7fff))); }
    // Support for std::numeric_limits<qfloat16>
    static constexpr qfloat16 _limit_epsilon()    noexcept { return qfloat16(Wrap(0x1400)); }
    static constexpr qfloat16 _limit_min()        noexcept { return qfloat16(Wrap(0x400)); }
    static constexpr qfloat16 _limit_denorm_min() noexcept { return qfloat16(Wrap(1)); }
    static constexpr qfloat16 _limit_max()        noexcept { return qfloat16(Wrap(0x7bff)); }
    static constexpr qfloat16 _limit_lowest()     noexcept { return qfloat16(Wrap(0xfbff)); }
    static constexpr qfloat16 _limit_infinity()   noexcept { return qfloat16(Wrap(0x7c00)); }
    static constexpr qfloat16 _limit_quiet_NaN()  noexcept { return qfloat16(Wrap(0x7e00)); }
#if QT_CONFIG(signaling_nan)
    static constexpr qfloat16 _limit_signaling_NaN() noexcept { return qfloat16(Wrap(0x7d00)); }
#endif
    inline constexpr bool isNormal() const noexcept
    { return (b16 & 0x7c00) && (b16 & 0x7c00) != 0x7c00; }
private:
    quint16 b16;
    constexpr inline explicit qfloat16(Wrap nibble) noexcept : b16(nibble.b16) {}

    Q_CORE_EXPORT static const quint32 mantissatable[];
    Q_CORE_EXPORT static const quint32 exponenttable[];
    Q_CORE_EXPORT static const quint32 offsettable[];
    Q_CORE_EXPORT static const quint16 basetable[];
    Q_CORE_EXPORT static const quint16 shifttable[];
    Q_CORE_EXPORT static const quint32 roundtable[];

    friend bool qIsNull(qfloat16 f) noexcept;

    friend inline qfloat16 operator-(qfloat16 a) noexcept
    {
        qfloat16 f;
        f.b16 = a.b16 ^ quint16(0x8000);
        return f;
    }

    friend inline qfloat16 operator+(qfloat16 a, qfloat16 b) noexcept { return qfloat16(static_cast<float>(a) + static_cast<float>(b)); }
    friend inline qfloat16 operator-(qfloat16 a, qfloat16 b) noexcept { return qfloat16(static_cast<float>(a) - static_cast<float>(b)); }
    friend inline qfloat16 operator*(qfloat16 a, qfloat16 b) noexcept { return qfloat16(static_cast<float>(a) * static_cast<float>(b)); }
    friend inline qfloat16 operator/(qfloat16 a, qfloat16 b) noexcept { return qfloat16(static_cast<float>(a) / static_cast<float>(b)); }

#define QF16_MAKE_ARITH_OP_FP(FP, OP) \
    friend inline FP operator OP(qfloat16 lhs, FP rhs) noexcept { return static_cast<FP>(lhs) OP rhs; } \
    friend inline FP operator OP(FP lhs, qfloat16 rhs) noexcept { return lhs OP static_cast<FP>(rhs); }
#define QF16_MAKE_ARITH_OP_EQ_FP(FP, OP_EQ, OP) \
    friend inline qfloat16& operator OP_EQ(qfloat16& lhs, FP rhs) noexcept \
    { lhs = qfloat16(float(static_cast<FP>(lhs) OP rhs)); return lhs; }
#define QF16_MAKE_ARITH_OP(FP) \
    QF16_MAKE_ARITH_OP_FP(FP, +) \
    QF16_MAKE_ARITH_OP_FP(FP, -) \
    QF16_MAKE_ARITH_OP_FP(FP, *) \
    QF16_MAKE_ARITH_OP_FP(FP, /) \
    QF16_MAKE_ARITH_OP_EQ_FP(FP, +=, +) \
    QF16_MAKE_ARITH_OP_EQ_FP(FP, -=, -) \
    QF16_MAKE_ARITH_OP_EQ_FP(FP, *=, *) \
    QF16_MAKE_ARITH_OP_EQ_FP(FP, /=, /)

    QF16_MAKE_ARITH_OP(long double)
    QF16_MAKE_ARITH_OP(double)
    QF16_MAKE_ARITH_OP(float)
#undef QF16_MAKE_ARITH_OP
#undef QF16_MAKE_ARITH_OP_FP

#define QF16_MAKE_ARITH_OP_INT(OP) \
    friend inline double operator OP(qfloat16 lhs, int rhs) noexcept { return static_cast<double>(lhs) OP rhs; } \
    friend inline double operator OP(int lhs, qfloat16 rhs) noexcept { return lhs OP static_cast<double>(rhs); }

    QF16_MAKE_ARITH_OP_INT(+)
    QF16_MAKE_ARITH_OP_INT(-)
    QF16_MAKE_ARITH_OP_INT(*)
    QF16_MAKE_ARITH_OP_INT(/)
#undef QF16_MAKE_ARITH_OP_INT

QT_WARNING_PUSH
QT_WARNING_DISABLE_FLOAT_COMPARE

    friend inline bool operator>(qfloat16 a, qfloat16 b)  noexcept { return static_cast<float>(a) >  static_cast<float>(b); }
    friend inline bool operator<(qfloat16 a, qfloat16 b)  noexcept { return static_cast<float>(a) <  static_cast<float>(b); }
    friend inline bool operator>=(qfloat16 a, qfloat16 b) noexcept { return static_cast<float>(a) >= static_cast<float>(b); }
    friend inline bool operator<=(qfloat16 a, qfloat16 b) noexcept { return static_cast<float>(a) <= static_cast<float>(b); }
    friend inline bool operator==(qfloat16 a, qfloat16 b) noexcept { return static_cast<float>(a) == static_cast<float>(b); }
    friend inline bool operator!=(qfloat16 a, qfloat16 b) noexcept { return static_cast<float>(a) != static_cast<float>(b); }

#define QF16_MAKE_BOOL_OP_FP(FP, OP) \
    friend inline bool operator OP(qfloat16 lhs, FP rhs) noexcept { return static_cast<FP>(lhs) OP rhs; } \
    friend inline bool operator OP(FP lhs, qfloat16 rhs) noexcept { return lhs OP static_cast<FP>(rhs); }
#define QF16_MAKE_BOOL_OP(FP) \
    QF16_MAKE_BOOL_OP_FP(FP, <) \
    QF16_MAKE_BOOL_OP_FP(FP, >) \
    QF16_MAKE_BOOL_OP_FP(FP, >=) \
    QF16_MAKE_BOOL_OP_FP(FP, <=) \
    QF16_MAKE_BOOL_OP_FP(FP, ==) \
    QF16_MAKE_BOOL_OP_FP(FP, !=)

    QF16_MAKE_BOOL_OP(long double)
    QF16_MAKE_BOOL_OP(double)
    QF16_MAKE_BOOL_OP(float)
#undef QF16_MAKE_BOOL_OP
#undef QF16_MAKE_BOOL_OP_FP

#define QF16_MAKE_BOOL_OP_INT(OP) \
    friend inline bool operator OP(qfloat16 a, int b) noexcept { return static_cast<float>(a) OP static_cast<float>(b); } \
    friend inline bool operator OP(int a, qfloat16 b) noexcept { return static_cast<float>(a) OP static_cast<float>(b); }

    QF16_MAKE_BOOL_OP_INT(>)
    QF16_MAKE_BOOL_OP_INT(<)
    QF16_MAKE_BOOL_OP_INT(>=)
    QF16_MAKE_BOOL_OP_INT(<=)
    QF16_MAKE_BOOL_OP_INT(==)
    QF16_MAKE_BOOL_OP_INT(!=)
#undef QF16_MAKE_BOOL_OP_INT

QT_WARNING_POP
};

Q_DECLARE_TYPEINFO(qfloat16, Q_PRIMITIVE_TYPE);

Q_CORE_EXPORT void qFloatToFloat16(qfloat16 *, const float *, qsizetype length) noexcept;
Q_CORE_EXPORT void qFloatFromFloat16(float *, const qfloat16 *, qsizetype length) noexcept;

// Complement qnumeric.h:
[[nodiscard]] inline bool qIsInf(qfloat16 f) noexcept { return f.isInf(); }
[[nodiscard]] inline bool qIsNaN(qfloat16 f) noexcept { return f.isNaN(); }
[[nodiscard]] inline bool qIsFinite(qfloat16 f) noexcept { return f.isFinite(); }
[[nodiscard]] inline int qFpClassify(qfloat16 f) noexcept { return f.fpClassify(); }
// [[nodiscard]] quint32 qFloatDistance(qfloat16 a, qfloat16 b);

// The remainder of these utility functions complement qglobal.h
[[nodiscard]] inline int qRound(qfloat16 d) noexcept
{ return qRound(static_cast<float>(d)); }

[[nodiscard]] inline qint64 qRound64(qfloat16 d) noexcept
{ return qRound64(static_cast<float>(d)); }

[[nodiscard]] inline bool qFuzzyCompare(qfloat16 p1, qfloat16 p2) noexcept
{
    float f1 = static_cast<float>(p1);
    float f2 = static_cast<float>(p2);
    // The significand precision for IEEE754 half precision is
    // 11 bits (10 explicitly stored), or approximately 3 decimal
    // digits.  In selecting the fuzzy comparison factor of 102.5f
    // (that is, (2^10+1)/10) below, we effectively select a
    // window of about 1 (least significant) decimal digit about
    // which the two operands can vary and still return true.
    return (qAbs(f1 - f2) * 102.5f <= qMin(qAbs(f1), qAbs(f2)));
}

[[nodiscard]] inline bool qIsNull(qfloat16 f) noexcept
{
    return (f.b16 & static_cast<quint16>(0x7fff)) == 0;
}

inline int qIntCast(qfloat16 f) noexcept
{ return int(static_cast<float>(f)); }

#ifndef Q_QDOC
QT_WARNING_PUSH
QT_WARNING_DISABLE_CLANG("-Wc99-extensions")
QT_WARNING_DISABLE_GCC("-Wold-style-cast")
inline qfloat16::qfloat16(float f) noexcept
{
#if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__)
    __m128 packsingle = _mm_set_ss(f);
    __m128i packhalf = _mm_cvtps_ph(packsingle, 0);
    b16 = _mm_extract_epi16(packhalf, 0);
#elif defined (__ARM_FP16_FORMAT_IEEE)
    __fp16 f16 = __fp16(f);
    memcpy(&b16, &f16, sizeof(quint16));
#else
    quint32 u;
    memcpy(&u, &f, sizeof(quint32));
    const quint32 signAndExp = u >> 23;
    const quint16 base = basetable[signAndExp];
    const quint16 shift = shifttable[signAndExp];
    const quint32 round = roundtable[signAndExp];
    quint32 mantissa = (u & 0x007fffff);
    if ((signAndExp & 0xff) == 0xff) {
        if (mantissa) // keep nan from truncating to inf
            mantissa = qMax(1U << shift, mantissa);
    } else {
        // round half to even
        mantissa += round;
        if (mantissa & (1 << shift))
            --mantissa;
    }

    // We use add as the mantissa may overflow causing
    // the exp part to shift exactly one value.
    b16 = quint16(base + (mantissa >> shift));
#endif
}
QT_WARNING_POP

inline qfloat16::operator float() const noexcept
{
#if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__)
    __m128i packhalf = _mm_cvtsi32_si128(b16);
    __m128 packsingle = _mm_cvtph_ps(packhalf);
    return _mm_cvtss_f32(packsingle);
#elif defined (__ARM_FP16_FORMAT_IEEE)
    __fp16 f16;
    memcpy(&f16, &b16, sizeof(quint16));
    return float(f16);
#else
    quint32 u = mantissatable[offsettable[b16 >> 10] + (b16 & 0x3ff)]
                + exponenttable[b16 >> 10];
    float f;
    memcpy(&f, &u, sizeof(quint32));
    return f;
#endif
}
#endif

/*!
  \internal
*/
[[nodiscard]] inline bool qFuzzyIsNull(qfloat16 f) noexcept
{
    return qAbs(static_cast<float>(f)) <= 0.001f;
}

QT_END_NAMESPACE

Q_DECLARE_METATYPE(qfloat16)

namespace std {
template<>
class numeric_limits<QT_PREPEND_NAMESPACE(qfloat16)> : public numeric_limits<float>
{
public:
    /*
      Treat quint16 b16 as if it were:
      uint S: 1; // b16 >> 15 (sign); can be set for zero
      uint E: 5; // (b16 >> 10) & 0x1f (offset exponent)
      uint M: 10; // b16 & 0x3ff (adjusted mantissa)

      for E == 0: magnitude is M / 2.^{24}
      for 0 < E < 31: magnitude is (1. + M / 2.^{10}) * 2.^{E - 15)
      for E == 31: not finite
     */
    static constexpr int digits = 11;
    static constexpr int min_exponent = -13;
    static constexpr int max_exponent = 16;

    static constexpr int digits10 = 3;
    static constexpr int max_digits10 = 5;
    static constexpr int min_exponent10 = -4;
    static constexpr int max_exponent10 = 4;

    static constexpr QT_PREPEND_NAMESPACE(qfloat16) epsilon()
    { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_epsilon(); }
    static constexpr QT_PREPEND_NAMESPACE(qfloat16) (min)()
    { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_min(); }
    static constexpr QT_PREPEND_NAMESPACE(qfloat16) denorm_min()
    { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_denorm_min(); }
    static constexpr QT_PREPEND_NAMESPACE(qfloat16) (max)()
    { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_max(); }
    static constexpr QT_PREPEND_NAMESPACE(qfloat16) lowest()
    { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_lowest(); }
    static constexpr QT_PREPEND_NAMESPACE(qfloat16) infinity()
    { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_infinity(); }
    static constexpr QT_PREPEND_NAMESPACE(qfloat16) quiet_NaN()
    { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_quiet_NaN(); }
#if QT_CONFIG(signaling_nan)
    static constexpr QT_PREPEND_NAMESPACE(qfloat16) signaling_NaN()
    { return QT_PREPEND_NAMESPACE(qfloat16)::_limit_signaling_NaN(); }
#else
    static constexpr bool has_signaling_NaN = false;
#endif
};

template<> class numeric_limits<const QT_PREPEND_NAMESPACE(qfloat16)>
    : public numeric_limits<QT_PREPEND_NAMESPACE(qfloat16)> {};
template<> class numeric_limits<volatile QT_PREPEND_NAMESPACE(qfloat16)>
    : public numeric_limits<QT_PREPEND_NAMESPACE(qfloat16)> {};
template<> class numeric_limits<const volatile QT_PREPEND_NAMESPACE(qfloat16)>
    : public numeric_limits<QT_PREPEND_NAMESPACE(qfloat16)> {};

// Adding overloads to std isn't allowed, so we can't extend this to support
// for fpclassify(), isnormal() &c. (which, furthermore, are macros on MinGW).
} // namespace std

#endif // QFLOAT16_H