// Copyright (C) 2016 by Southwest Research Institute (R) // Copyright (C) 2019 Intel Corporation. // Copyright (C) 2020 The Qt Company Ltd. // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 #include #include /* * This tool generates the tables used by qfloat16 to implement a * software-emulated version of IEEE 754 binary16. qfloat16 automatically uses * CPU instructions to convert to and from float (IEEE 754 binary32), but if * the CPU is not guaranteed to have those instructions available at compile * time, then qfloat16 needs the tables to perform the conversion with * reasonable performance. * * Because Qt requires float to be IEEE 754 binary32, these tables are * platform-independent and will never change. */ uint32_t convertmantissa(int32_t i) { uint32_t m = i << 13; // Zero pad mantissa bits uint32_t e = 0; // Zero exponent while (!(m & 0x00800000)) { // While not normalized e -= 0x00800000; // Decrement exponent (1<<23) m <<= 1; // Shift mantissa } m &= ~0x00800000; // Clear leading 1 bit e += 0x38800000; // Adjust bias ((127-14)<<23) return m | e; // Return combined number } // we first build these tables up and then print them out as a separate step in order // to more closely map the implementation given in the paper. uint32_t basetable[512]; uint32_t shifttable[512]; uint32_t roundtable[512]; int main() { uint32_t i; printf("/* This file was generated by util/qfloat16-tables/gen_qfloat16_tables.cpp */\n\n"); printf("#include \n\n"); printf("QT_BEGIN_NAMESPACE\n\n"); printf("#if !defined(__ARM_FP16_FORMAT_IEEE)\n\n"); printf("const quint32 qfloat16::mantissatable[2048] = {\n"); printf("0,\n"); for (i = 1; i < 1024; i++) printf("0x%XU,\n", convertmantissa(i)); for (i = 1024; i < 2048; i++) printf("0x%XU,\n", 0x38000000U + ((i - 1024) << 13)); printf("};\n\n"); printf("const quint32 qfloat16::exponenttable[64] = {\n"); printf("0,\n"); for (i = 1; i < 31; i++) printf("0x%XU,\n", i << 23); printf("0x47800000U,\n"); // 31 printf("0x80000000U,\n"); // 32 for (i = 33; i < 63; i++) printf("0x%XU,\n", 0x80000000U + ((i - 32) << 23)); printf("0xC7800000U,\n"); // 63 printf("};\n\n"); printf("const quint32 qfloat16::offsettable[64] = {\n"); printf("0,\n"); for (i = 1; i < 32; i++) printf("1024U,\n"); printf("0,\n"); for (i = 33; i < 64; i++) printf("1024U,\n"); printf("};\n\n"); int32_t e; for (i = 0; i < 256; ++i) { e = i - 127; if (e < -25) { // Very small numbers map to zero basetable[i | 0x000] = 0x0000; basetable[i | 0x100] = 0x8000; shifttable[i | 0x000] = 24; shifttable[i | 0x100] = 24; roundtable[i | 0x000] = 0; roundtable[i | 0x100] = 0; } else if (e < -14) { // Small numbers map to denorms basetable[i | 0x000] = (0x0400 >> (-e - 14)); basetable[i | 0x100] = (0x0400 >> (-e - 14)) | 0x8000; shifttable[i | 0x000] = -e - 1; shifttable[i | 0x100] = -e - 1; if (e == -25) { // rounds up roundtable[i | 0x000] = (1 << 24); roundtable[i | 0x100] = (1 << 24); } else if (e == -24) { // rounds half up roundtable[i | 0x000] = (1 << 22) + 1; roundtable[i | 0x100] = (1 << 22) + 1; } else { roundtable[i | 0x000] = (1 << (-e - 2)); roundtable[i | 0x100] = (1 << (-e - 2)); } } else if (e <= 15) { // Normal numbers just lose precision basetable[i | 0x000] = ((e + 15) << 10); basetable[i | 0x100] = ((e + 15) << 10) | 0x8000; shifttable[i | 0x000] = 13; shifttable[i | 0x100] = 13; roundtable[i | 0x000] = (1 << 12); roundtable[i | 0x100] = (1 << 12); } else if (e < 128) { // Large numbers map to Infinity basetable[i | 0x000] = 0x7C00; basetable[i | 0x100] = 0xFC00; shifttable[i | 0x000] = 24; shifttable[i | 0x100] = 24; roundtable[i | 0x000] = 0; roundtable[i | 0x100] = 0; } else { // Infinity and NaN's stay Infinity and NaN's basetable[i | 0x000] = 0x7C00; basetable[i | 0x100] = 0xFC00; shifttable[i | 0x000] = 13; shifttable[i | 0x100] = 13; roundtable[i | 0x000] = 0; roundtable[i | 0x100] = 0; } } printf("const quint16 qfloat16::basetable[512] = {\n"); for (i = 0; i < 512; i++) printf("0x%XU,\n", basetable[i]); printf("};\n\n"); printf("const quint16 qfloat16::shifttable[512] = {\n"); for (i = 0; i < 512; i++) printf("0x%XU,\n", shifttable[i]); printf("};\n\n"); printf("const quint32 qfloat16::roundtable[512] = {\n"); for (i = 0; i < 512; i++) printf("0x%XU,\n", roundtable[i]); printf("};\n\n"); printf("#endif // !__ARM_FP16_FORMAT_IEEE\n\n"); printf("QT_END_NAMESPACE\n"); return 0; }