diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-07-27 14:50:10 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-07-31 16:50:30 +0200 |
commit | 0792188440e12b211102a1e624828aec46964c9f (patch) | |
tree | 44a8e6d2bec8103f382fec6b498cfc8325792ead /src | |
parent | c95fc0f662f8df6533f6961cc003a2fe2fb739fb (diff) |
Add float->qfloat16 rounding and fix the tests
Our hardware optimized conversions of float to qfloat16 rounds to even
where our table based conversion truncated to zero.
The rounding is not in this patch exactly round to even like the
hardware implementation but much closer.
Change-Id: I4c5e72c15fef9079d3660680b2727ff7ba4e768a
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/corelib/global/qfloat16.h | 16 |
1 files changed, 14 insertions, 2 deletions
diff --git a/src/corelib/global/qfloat16.h b/src/corelib/global/qfloat16.h index ab480f84f5..acf9220490 100644 --- a/src/corelib/global/qfloat16.h +++ b/src/corelib/global/qfloat16.h @@ -172,8 +172,20 @@ inline qfloat16::qfloat16(float f) noexcept #else quint32 u; memcpy(&u, &f, sizeof(quint32)); - b16 = quint16(basetable[(u >> 23) & 0x1ff] - + ((u & 0x007fffff) >> shifttable[(u >> 23) & 0x1ff])); + const quint32 signAndExp = u >> 23; + const quint32 base = basetable[signAndExp]; + const quint32 shift = shifttable[signAndExp]; + quint32 mantissa = (u & 0x007fffff); + if ((signAndExp & 0xff) == 0xff) { + if (mantissa) // keep nan from truncating to inf + mantissa = qMax(1U << shift, mantissa); + } else { + mantissa += (1U << (shift - 1)) - 1; // rounding + } + + // We use add as the mantissa may overflow causing + // the exp part to shift exactly one value. + b16 = quint16(base + (mantissa >> shift)); #endif } QT_WARNING_POP |