diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-07-29 12:11:35 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-09-06 12:35:12 +0200 |
commit | d3ff95dcb84861e8f42b480910d822b4ca8715b1 (patch) | |
tree | 7ade0148126e83ed589258983b632fbe3488d7fb /util/qfloat16-tables | |
parent | a0e0b51001edfc1c7aea113c472ce995efa833fd (diff) |
Round float->qfloat16 to even
Calibrated to match F16C and ARM-FP16 hardware conversions.
Change-Id: I3bdd4d3db3046fee4aeb24e4ce8b9bc9a06e0397
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'util/qfloat16-tables')
-rw-r--r-- | util/qfloat16-tables/gen_qfloat16_tables.cpp | 34 |
1 files changed, 31 insertions, 3 deletions
diff --git a/util/qfloat16-tables/gen_qfloat16_tables.cpp b/util/qfloat16-tables/gen_qfloat16_tables.cpp index 5d7bab01d2..af172c5513 100644 --- a/util/qfloat16-tables/gen_qfloat16_tables.cpp +++ b/util/qfloat16-tables/gen_qfloat16_tables.cpp @@ -2,6 +2,7 @@ ** ** Copyright (C) 2016 by Southwest Research Institute (R) ** Copyright (C) 2019 Intel Corporation. +** Copyright (C) 2020 The Qt Company Ltd. ** Contact: http://www.qt-project.org/legal ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -71,6 +72,7 @@ uint32_t convertmantissa(int32_t i) // to more closely map the implementation given in the paper. uint32_t basetable[512]; uint32_t shifttable[512]; +uint32_t roundtable[512]; int main() { @@ -113,50 +115,76 @@ int main() int32_t e; for (i = 0; i < 256; ++i) { e = i - 127; - if (e < -24) { // Very small numbers map to zero + if (e < -25) { // Very small numbers map to zero basetable[i | 0x000] = 0x0000; basetable[i | 0x100] = 0x8000; shifttable[i | 0x000] = 24; shifttable[i | 0x100] = 24; + roundtable[i | 0x000] = 0; + roundtable[i | 0x100] = 0; } else if (e < -14) { // Small numbers map to denorms basetable[i | 0x000] = (0x0400 >> (-e - 14)); basetable[i | 0x100] = (0x0400 >> (-e - 14)) | 0x8000; shifttable[i | 0x000] = -e - 1; shifttable[i | 0x100] = -e - 1; + if (e == -25) { + // rounds up + roundtable[i | 0x000] = (1 << 24); + roundtable[i | 0x100] = (1 << 24); + } else if (e == -24) { + // rounds half up + roundtable[i | 0x000] = (1 << 22) + 1; + roundtable[i | 0x100] = (1 << 22) + 1; + } else { + roundtable[i | 0x000] = (1 << (-e - 2)); + roundtable[i | 0x100] = (1 << (-e - 2)); + } } else if (e <= 15) { // Normal numbers just lose precision basetable[i | 0x000] = ((e + 15) << 10); basetable[i | 0x100] = ((e + 15) << 10) | 0x8000; shifttable[i | 0x000] = 13; shifttable[i | 0x100] = 13; + roundtable[i | 0x000] = (1 << 12); + roundtable[i | 0x100] = (1 << 12); } else if (e < 128) { // Large numbers map to Infinity basetable[i | 0x000] = 0x7C00; basetable[i | 0x100] = 0xFC00; shifttable[i | 0x000] = 24; shifttable[i | 0x100] = 24; + roundtable[i | 0x000] = 0; + roundtable[i | 0x100] = 0; } else { // Infinity and NaN's stay Infinity and NaN's basetable[i | 0x000] = 0x7C00; basetable[i | 0x100] = 0xFC00; shifttable[i | 0x000] = 13; shifttable[i | 0x100] = 13; + roundtable[i | 0x000] = 0; + roundtable[i | 0x100] = 0; } } - printf("const quint32 qfloat16::basetable[512] = {\n"); + printf("const quint16 qfloat16::basetable[512] = {\n"); for (i = 0; i < 512; i++) printf("0x%XU,\n", basetable[i]); printf("};\n\n"); - printf("const quint32 qfloat16::shifttable[512] = {\n"); + printf("const quint16 qfloat16::shifttable[512] = {\n"); for (i = 0; i < 512; i++) printf("0x%XU,\n", shifttable[i]); printf("};\n\n"); + printf("const quint32 qfloat16::roundtable[512] = {\n"); + for (i = 0; i < 512; i++) + printf("0x%XU,\n", roundtable[i]); + + printf("};\n\n"); + printf("#endif // !__ARM_FP16_FORMAT_IEEE\n\n"); printf("QT_END_NAMESPACE\n"); return 0; |