diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-07-29 12:11:35 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-09-06 12:35:12 +0200 |
commit | d3ff95dcb84861e8f42b480910d822b4ca8715b1 (patch) | |
tree | 7ade0148126e83ed589258983b632fbe3488d7fb /src/corelib/global | |
parent | a0e0b51001edfc1c7aea113c472ce995efa833fd (diff) |
Round float->qfloat16 to even
Calibrated to match F16C and ARM-FP16 hardware conversions.
Change-Id: I3bdd4d3db3046fee4aeb24e4ce8b9bc9a06e0397
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib/global')
-rw-r--r-- | src/corelib/global/qfloat16.h | 15 | ||||
-rw-r--r-- | src/corelib/global/qfloat16tables.cpp | 524 |
2 files changed, 530 insertions, 9 deletions
diff --git a/src/corelib/global/qfloat16.h b/src/corelib/global/qfloat16.h index acf9220490..136571bbaf 100644 --- a/src/corelib/global/qfloat16.h +++ b/src/corelib/global/qfloat16.h @@ -107,8 +107,9 @@ private: Q_CORE_EXPORT static const quint32 mantissatable[]; Q_CORE_EXPORT static const quint32 exponenttable[]; Q_CORE_EXPORT static const quint32 offsettable[]; - Q_CORE_EXPORT static const quint32 basetable[]; - Q_CORE_EXPORT static const quint32 shifttable[]; + Q_CORE_EXPORT static const quint16 basetable[]; + Q_CORE_EXPORT static const quint16 shifttable[]; + Q_CORE_EXPORT static const quint32 roundtable[]; friend bool qIsNull(qfloat16 f) noexcept; #if !defined(QT_NO_FLOAT16_OPERATORS) @@ -173,14 +174,18 @@ inline qfloat16::qfloat16(float f) noexcept quint32 u; memcpy(&u, &f, sizeof(quint32)); const quint32 signAndExp = u >> 23; - const quint32 base = basetable[signAndExp]; - const quint32 shift = shifttable[signAndExp]; + const quint16 base = basetable[signAndExp]; + const quint16 shift = shifttable[signAndExp]; + const quint32 round = roundtable[signAndExp]; quint32 mantissa = (u & 0x007fffff); if ((signAndExp & 0xff) == 0xff) { if (mantissa) // keep nan from truncating to inf mantissa = qMax(1U << shift, mantissa); } else { - mantissa += (1U << (shift - 1)) - 1; // rounding + // round half to even + mantissa += round; + if (mantissa & (1 << shift)) + --mantissa; } // We use add as the mantissa may overflow causing diff --git a/src/corelib/global/qfloat16tables.cpp b/src/corelib/global/qfloat16tables.cpp index b87986d6b8..55173366c6 100644 --- a/src/corelib/global/qfloat16tables.cpp +++ b/src/corelib/global/qfloat16tables.cpp @@ -2,6 +2,7 @@ ** ** Copyright (C) 2016 by Southwest Research Institute (R) ** Copyright (C) 2019 Intel Corporation. +** Copyright (C) 2020 The Qt Company Ltd. ** Contact: http://www.qt-project.org/legal ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -38,7 +39,7 @@ ** ****************************************************************************/ -/* This file was generated by gen_qfloat16_tables.cpp */ +/* This file was generated by util/qfloat16-tables/gen_qfloat16_tables.cpp */ #include <QtCore/qfloat16.h> @@ -2231,7 +2232,7 @@ const quint32 qfloat16::offsettable[64] = { 1024U, }; -const quint32 qfloat16::basetable[512] = { +const quint16 qfloat16::basetable[512] = { 0x0U, 0x0U, 0x0U, @@ -2746,7 +2747,7 @@ const quint32 qfloat16::basetable[512] = { 0xFC00U, }; -const quint32 qfloat16::shifttable[512] = { +const quint16 qfloat16::shifttable[512] = { 0x18U, 0x18U, 0x18U, @@ -3261,6 +3262,521 @@ const quint32 qfloat16::shifttable[512] = { 0xDU, }; -#endif // !__F16C__ && !__ARM_FP16_FORMAT_IEEE +const quint32 qfloat16::roundtable[512] = { +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x1000000U, +0x400001U, +0x200000U, +0x100000U, +0x80000U, +0x40000U, +0x20000U, +0x10000U, +0x8000U, +0x4000U, +0x2000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x1000000U, +0x400001U, +0x200000U, +0x100000U, +0x80000U, +0x40000U, +0x20000U, +0x10000U, +0x8000U, +0x4000U, +0x2000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x1000U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +0x0U, +}; + +#endif // !__ARM_FP16_FORMAT_IEEE QT_END_NAMESPACE |