summaryrefslogtreecommitdiffstats
path: root/util/unicode/main.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'util/unicode/main.cpp')
-rw-r--r--util/unicode/main.cpp246
1 files changed, 151 insertions, 95 deletions
diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp
index 91015411a0..fb308b7dc0 100644
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp
@@ -1,30 +1,5 @@
-/****************************************************************************
-**
-** Copyright (C) 2020 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the utils of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2020 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
#include <qbytearray.h>
#include <qchar.h>
@@ -39,8 +14,8 @@
#include <private/qunicodetables_p.h>
#endif
-#define DATA_VERSION_S "13.0"
-#define DATA_VERSION_STR "QChar::Unicode_13_0"
+#define DATA_VERSION_S "15.1"
+#define DATA_VERSION_STR "QChar::Unicode_15_1"
static QHash<QByteArray, QChar::UnicodeVersion> age_map;
@@ -74,6 +49,9 @@ static void initAgeMap()
{ QChar::Unicode_12_0, "12.0" },
{ QChar::Unicode_12_1, "12.1" }, // UCD Revision 24
{ QChar::Unicode_13_0, "13.0" }, // UCD Revision 26
+ { QChar::Unicode_14_0, "14.0" }, // UCD Revision 28
+ { QChar::Unicode_15_0, "15.0" }, // UCD Revision 30
+ { QChar::Unicode_15_1, "15.1" }, // UCD Revision 32
{ QChar::Unicode_Unassigned, 0 }
};
AgeMap *d = ageMap;
@@ -83,6 +61,47 @@ static void initAgeMap()
}
}
+static const char *east_asian_width_string =
+R"(enum class EastAsianWidth : unsigned int {
+ A,
+ F,
+ H,
+ N,
+ Na,
+ W,
+};
+
+)";
+
+enum class EastAsianWidth : unsigned int {
+ A,
+ F,
+ H,
+ N,
+ Na,
+ W,
+};
+
+static QHash<QByteArray, EastAsianWidth> eastAsianWidthMap;
+
+static void initEastAsianWidthMap()
+{
+ constexpr struct W {
+ EastAsianWidth width;
+ const char *name;
+ } widths[] = {
+ { EastAsianWidth::A, "A" },
+ { EastAsianWidth::F, "F" },
+ { EastAsianWidth::H, "H" },
+ { EastAsianWidth::N, "N" },
+ { EastAsianWidth::Na, "Na" },
+ { EastAsianWidth::W, "W" },
+ };
+
+ for (auto &w : widths)
+ eastAsianWidthMap.insert(w.name, w.width);
+}
+
static QHash<QByteArray, QChar::Category> categoryMap;
static void initCategoryMap()
@@ -370,10 +389,6 @@ static const char *word_break_class_string =
" WordBreak_MidNum,\n"
" WordBreak_Numeric,\n"
" WordBreak_ExtendNumLet,\n"
- " WordBreak_E_Base,\n"
- " WordBreak_E_Modifier,\n"
- " WordBreak_Glue_After_Zwj,\n"
- " WordBreak_E_Base_GAZ,\n"
" WordBreak_WSegSpace,\n"
"\n"
" NumWordBreakClasses\n"
@@ -398,10 +413,6 @@ enum WordBreakClass {
WordBreak_MidNum,
WordBreak_Numeric,
WordBreak_ExtendNumLet,
- WordBreak_E_Base,
- WordBreak_E_Modifier,
- WordBreak_Glue_After_Zwj,
- WordBreak_E_Base_GAZ,
WordBreak_WSegSpace,
WordBreak_Unassigned
@@ -433,10 +444,6 @@ static void initWordBreak()
{ WordBreak_MidNum, "MidNum" },
{ WordBreak_Numeric, "Numeric" },
{ WordBreak_ExtendNumLet, "ExtendNumLet" },
- { WordBreak_E_Base, "E_Base" },
- { WordBreak_E_Modifier, "E_Modifier" },
- { WordBreak_Glue_After_Zwj, "Glue_After_Zwj" },
- { WordBreak_E_Base_GAZ, "E_Base_GAZ" },
{ WordBreak_WSegSpace, "WSegSpace" },
{ WordBreak_Unassigned, 0 }
};
@@ -522,9 +529,11 @@ static void initSentenceBreak()
static const char *line_break_class_string =
"// see http://www.unicode.org/reports/tr14/tr14-30.html\n"
- "// we don't use the XX and AI classes and map them to AL instead.\n"
+ "// we don't use the XX, AK, AP, AS and AI classes and map them to AL instead.\n"
+ "// VI and VF classes are mapped to CM.\n"
"enum LineBreakClass {\n"
- " LineBreak_OP, LineBreak_CL, LineBreak_CP, LineBreak_QU, LineBreak_GL,\n"
+ " LineBreak_OP, LineBreak_CL, LineBreak_CP,\n"
+ " LineBreak_QU, LineBreak_QU_Pi, LineBreak_QU_Pf, LineBreak_GL,\n"
" LineBreak_NS, LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR,\n"
" LineBreak_PO, LineBreak_NU, LineBreak_AL, LineBreak_HL, LineBreak_ID,\n"
" LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,\n"
@@ -538,7 +547,8 @@ static const char *line_break_class_string =
"};\n\n";
enum LineBreakClass {
- LineBreak_OP, LineBreak_CL, LineBreak_CP, LineBreak_QU, LineBreak_GL,
+ LineBreak_OP, LineBreak_CL, LineBreak_CP,
+ LineBreak_QU, LineBreak_QU_Pi, LineBreak_QU_Pf, LineBreak_GL,
LineBreak_NS, LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR,
LineBreak_PO, LineBreak_NU, LineBreak_AL, LineBreak_HL, LineBreak_ID,
LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,
@@ -606,6 +616,11 @@ static void initLineBreak()
{ LineBreak_EB, "EB" },
{ LineBreak_EM, "EM" },
{ LineBreak_ZWJ, "ZWJ" },
+ { LineBreak_AL, "AK" },
+ { LineBreak_AL, "AP" },
+ { LineBreak_AL, "AS" },
+ { LineBreak_CM, "VI" },
+ { LineBreak_CM, "VF" },
{ LineBreak_Unassigned, 0 }
};
LineBreakList *d = breaks;
@@ -797,6 +812,17 @@ static void initScriptMap()
{ QChar::Script_KhitanSmallScript, "KhitanSmallScript" },
{ QChar::Script_Yezidi, "Yezidi" },
+ // 14.0
+ { QChar::Script_CyproMinoan, "CyproMinoan"},
+ { QChar::Script_OldUyghur, "OldUyghur"},
+ { QChar::Script_Tangsa, "Tangsa"},
+ { QChar::Script_Toto, "Toto"},
+ { QChar::Script_Vithkuqi, "Vithkuqi"},
+
+ // 15.0
+ { QChar::Script_Kawi, "Kawi"},
+ { QChar::Script_NagMundari, "NagMundari"},
+
// unhandled
{ QChar::Script_Unknown, 0 }
};
@@ -878,7 +904,8 @@ static const char *property_string =
" ushort joining : 3;\n"
" signed short digitValue : 5;\n"
" signed short mirrorDiff : 16;\n"
- " ushort unicodeVersion : 8; /* 5 used */\n"
+ " ushort unicodeVersion : 5; /* 5 used */\n"
+ " ushort eastAsianWidth : 3; /* 3 used */\n"
" ushort nfQuickCheck : 8;\n" // could be narrowed
"#ifdef Q_OS_WASM\n"
" unsigned char : 0; //wasm 64 packing trick\n"
@@ -925,6 +952,10 @@ static const char *methods =
"Q_CORE_EXPORT QStringView QT_FASTCALL idnaMapping(char32_t usc4) noexcept;\n"
"inline QStringView idnaMapping(QChar ch) noexcept\n"
"{ return idnaMapping(ch.unicode()); }\n"
+ "\n"
+ "Q_CORE_EXPORT EastAsianWidth QT_FASTCALL eastAsianWidth(char32_t ucs4) noexcept;\n"
+ "inline EastAsianWidth eastAsianWidth(QChar ch) noexcept\n"
+ "{ return eastAsianWidth(ch.unicode()); }\n"
"\n";
static const int SizeOfPropertiesStruct = 20;
@@ -947,6 +978,7 @@ struct PropertyFlags {
&& direction == o.direction
&& joining == o.joining
&& age == o.age
+ && eastAsianWidth == o.eastAsianWidth
&& digitValue == o.digitValue
&& mirrorDiff == o.mirrorDiff
&& lowerCaseDiff == o.lowerCaseDiff
@@ -974,6 +1006,8 @@ struct PropertyFlags {
QChar::JoiningType joining : 3;
// from DerivedAge.txt
QChar::UnicodeVersion age : 5;
+ // From EastAsianWidth.txt
+ EastAsianWidth eastAsianWidth = EastAsianWidth::N;
int digitValue = -1;
int mirrorDiff : 16;
@@ -1512,6 +1546,52 @@ static void readDerivedAge()
}
}
+static void readEastAsianWidth()
+{
+ qDebug("Reading EastAsianWidth.txt");
+
+ QFile f("data/EastAsianWidth.txt");
+ if (!f.exists() || !f.open(QFile::ReadOnly))
+ qFatal("Couldn't find or read EastAsianWidth.txt");
+
+ while (!f.atEnd()) {
+ QByteArray line = f.readLine().trimmed();
+
+ int comment = line.indexOf('#');
+ line = (comment < 0 ? line : line.left(comment)).simplified();
+
+ if (line.isEmpty())
+ continue;
+
+ QList<QByteArray> fields = line.split(';');
+ Q_ASSERT(fields.size() == 2);
+
+ // That would be split(".."), but that API does not exist.
+ const QByteArray codePoints = fields[0].trimmed().replace("..", ".");
+ QList<QByteArray> cl = codePoints.split('.');
+ Q_ASSERT(cl.size() >= 1 && cl.size() <= 2);
+
+ const QByteArray widthString = fields[1].trimmed();
+ if (!eastAsianWidthMap.contains(widthString)) {
+ qFatal("Unhandled EastAsianWidth property value for %s: %s",
+ qPrintable(codePoints), qPrintable(widthString));
+ }
+ auto width = eastAsianWidthMap.value(widthString);
+
+ bool ok;
+ const int first = cl[0].toInt(&ok, 16);
+ const int last = ok && cl.size() == 2 ? cl[1].toInt(&ok, 16) : first;
+ Q_ASSERT(ok);
+
+ for (int codepoint = first; codepoint <= last; ++codepoint) {
+ UnicodeData &ud = UnicodeData::valueRef(codepoint);
+ // Ensure that ranges don't overlap.
+ Q_ASSERT(ud.p.eastAsianWidth == EastAsianWidth::N);
+ ud.p.eastAsianWidth = width;
+ }
+ }
+}
+
static void readDerivedNormalizationProps()
{
qDebug("Reading DerivedNormalizationProps.txt");
@@ -1639,7 +1719,7 @@ static QByteArray createNormalizationCorrections()
" int version;\n"
"};\n\n"
- "static const NormalizationCorrection uc_normalization_corrections[] = {\n";
+ "static constexpr NormalizationCorrection uc_normalization_corrections[] = {\n";
int maxVersion = 0;
int numCorrections = 0;
@@ -2655,7 +2735,7 @@ static QByteArray createIdnaMapping()
qsizetype memoryUsage = 0;
QByteArray out =
- "static const char16_t idnaMappingData[] = {";
+ "static constexpr char16_t idnaMappingData[] = {";
int col = 0;
for (auto c : idnaMappingData) {
@@ -2680,7 +2760,7 @@ static QByteArray createIdnaMapping()
" char16_t ucs[2]; // ucs[0] is offset if size > 2\n"
"};\n"
"static_assert(sizeof(IdnaMapEntry) == 8);\n\n"
- "static const IdnaMapEntry idnaMap[] = {\n";
+ "static constexpr IdnaMapEntry idnaMap[] = {\n";
for (auto i = idnaMappingTable.keyValueBegin(); i != idnaMappingTable.keyValueEnd(); i++) {
const QString &mapping = i->second;
@@ -2849,7 +2929,7 @@ static QByteArray createPropertyInfo()
Q_ASSERT(blockMap.size() == BMP_END/BMP_BLOCKSIZE +(SMP_END-BMP_END)/SMP_BLOCKSIZE); // 0x1870
Q_ASSERT(blockMap.last() + blockMap.size() < (1<<(sizeof(unsigned short)*8)));
- QByteArray out = "static const unsigned short uc_property_trie[] = {\n";
+ QByteArray out = "static constexpr unsigned short uc_property_trie[] = {\n";
// First write the map from blockId to indices of unique blocks:
out += " // [0x0..0x" + QByteArray::number(BMP_END, 16) + ")";
for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) {
@@ -2902,7 +2982,7 @@ static QByteArray createPropertyInfo()
out.chop(2);
out += "\n};\n\n";
- out += "static const Properties uc_properties[] = {";
+ out += "static constexpr Properties uc_properties[] = {";
// keep in sync with the property declaration
for (int i = 0; i < uniqueProperties.size(); ++i) {
const PropertyFlags &p = uniqueProperties.at(i);
@@ -2925,9 +3005,12 @@ static QByteArray createPropertyInfo()
// " signed short mirrorDiff : 16;\n"
out += QByteArray::number( p.mirrorDiff );
out += ", ";
-// " ushort unicodeVersion : 8; /* 5 used */\n"
+// " ushort unicodeVersion : 5; /* 5 used */\n"
out += QByteArray::number( p.age );
out += ", ";
+// " ushort eastAsianWidth : 3;" /* 3 used */\n"
+ out += QByteArray::number( static_cast<unsigned int>(p.eastAsianWidth) );
+ out += ", ";
// " ushort nfQuickCheck : 8;\n"
out += QByteArray::number( p.nfQuickCheck );
out += ", ";
@@ -3032,6 +3115,11 @@ static QByteArray createPropertyInfo()
"{\n"
" return static_cast<IdnaStatus>(qGetProp(ucs4)->idnaStatus);\n"
"}\n"
+ "\n"
+ "Q_CORE_EXPORT EastAsianWidth QT_FASTCALL eastAsianWidth(char32_t ucs4) noexcept\n"
+ "{\n"
+ " return static_cast<EastAsianWidth>(qGetProp(ucs4)->eastAsianWidth);\n"
+ "}\n"
"\n";
return out;
@@ -3042,7 +3130,7 @@ static QByteArray createSpecialCaseMap()
qDebug("createSpecialCaseMap:");
QByteArray out
- = "static const unsigned short specialCaseMap[] = {\n"
+ = "static constexpr unsigned short specialCaseMap[] = {\n"
" 0x0, // placeholder";
int i = 1;
@@ -3058,7 +3146,7 @@ static QByteArray createSpecialCaseMap()
maxN = std::max(maxN, n);
}
out.chop(1);
- out += "\n};\n\nconst unsigned int MaxSpecialCaseLength = ";
+ out += "\n};\n\nconstexpr unsigned int MaxSpecialCaseLength = ";
out += QByteArray::number(maxN);
out += ";\n\n";
@@ -3190,7 +3278,7 @@ static QByteArray createCompositionInfo()
Q_ASSERT(blockMap.last() + blockMap.size() < (1<<(sizeof(unsigned short)*8)));
- QByteArray out = "static const unsigned short uc_decomposition_trie[] = {\n";
+ QByteArray out = "static constexpr unsigned short uc_decomposition_trie[] = {\n";
// first write the map
out += " // 0 - 0x" + QByteArray::number(BMP_END, 16);
for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) {
@@ -3254,7 +3342,7 @@ static QByteArray createCompositionInfo()
+ QByteArray::number(SMP_BLOCKSIZE-1, 16) + ")] \\\n"
" : 0xffff)\n\n";
- out += "static const unsigned short uc_decomposition_map[] = {";
+ out += "static constexpr unsigned short uc_decomposition_map[] = {";
for (int i = 0; i < decompositions.size(); ++i) {
if (!(i % 8)) {
if (out.endsWith(' '))
@@ -3390,7 +3478,7 @@ static QByteArray createLigatureInfo()
Q_ASSERT(blockMap.last() + blockMap.size() < (1<<(sizeof(unsigned short)*8)));
- QByteArray out = "static const unsigned short uc_ligature_trie[] = {\n";
+ QByteArray out = "static constexpr unsigned short uc_ligature_trie[] = {\n";
// first write the map
out += " // 0 - 0x" + QByteArray::number(BMP_END, 16);
for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) {
@@ -3454,7 +3542,7 @@ static QByteArray createLigatureInfo()
+ QByteArray::number(SMP_BLOCKSIZE-1, 16) + ")] \\\n"
" : 0xffff)\n\n";
- out += "static const unsigned short uc_ligature_map[] = {";
+ out += "static constexpr unsigned short uc_ligature_map[] = {";
for (int i = 0; i < ligatures.size(); ++i) {
if (!(i % 8)) {
if (out.endsWith(' '))
@@ -3487,6 +3575,7 @@ QByteArray createCasingInfo()
int main(int, char **)
{
initAgeMap();
+ initEastAsianWidthMap();
initCategoryMap();
initDecompositionMap();
initDirectionMap();
@@ -3502,6 +3591,7 @@ int main(int, char **)
readBidiMirroring();
readArabicShaping();
readDerivedAge();
+ readEastAsianWidth();
readDerivedNormalizationProps();
readSpecialCasing();
readCaseFolding();
@@ -3525,44 +3615,9 @@ int main(int, char **)
QByteArray idnaMapping = createIdnaMapping();
QByteArray header =
- "/****************************************************************************\n"
- "**\n"
- "** Copyright (C) 2020 The Qt Company Ltd.\n"
- "** Contact: https://www.qt.io/licensing/\n"
- "**\n"
- "** This file is part of the QtCore module of the Qt Toolkit.\n"
- "**\n"
- "** $QT_BEGIN_LICENSE:LGPL$\n"
- "** Commercial License Usage\n"
- "** Licensees holding valid commercial Qt licenses may use this file in\n"
- "** accordance with the commercial license agreement provided with the\n"
- "** Software or, alternatively, in accordance with the terms contained in\n"
- "** a written agreement between you and The Qt Company. For licensing terms\n"
- "** and conditions see https://www.qt.io/terms-conditions. For further\n"
- "** information use the contact form at https://www.qt.io/contact-us.\n"
- "**\n"
- "** GNU Lesser General Public License Usage\n"
- "** Alternatively, this file may be used under the terms of the GNU Lesser\n"
- "** General Public License version 3 as published by the Free Software\n"
- "** Foundation and appearing in the file LICENSE.LGPL3 included in the\n"
- "** packaging of this file. Please review the following information to\n"
- "** ensure the GNU Lesser General Public License version 3 requirements\n"
- "** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.\n"
- "**\n"
- "** GNU General Public License Usage\n"
- "** Alternatively, this file may be used under the terms of the GNU\n"
- "** General Public License version 2.0 or (at your option) the GNU General\n"
- "** Public license version 3 or any later version approved by the KDE Free\n"
- "** Qt Foundation. The licenses are as published by the Free Software\n"
- "** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3\n"
- "** included in the packaging of this file. Please review the following\n"
- "** information to ensure the GNU General Public License requirements will\n"
- "** be met: https://www.gnu.org/licenses/gpl-2.0.html and\n"
- "** https://www.gnu.org/licenses/gpl-3.0.html.\n"
- "**\n"
- "** $QT_END_LICENSE$\n"
- "**\n"
- "****************************************************************************/\n\n";
+ "// Copyright (C) 2020 The Qt Company Ltd.\n"
+ "// SPDX-License-Identifier: Unicode-3.0\n"
+ "\n";
QByteArray note =
"/* This file is autogenerated from the Unicode " DATA_VERSION_S " database. Do not edit */\n\n";
@@ -3612,6 +3667,7 @@ int main(int, char **)
f.write("namespace QUnicodeTables {\n\n");
f.write(property_string);
f.write(sizeOfPropertiesStructCheck);
+ f.write(east_asian_width_string);
f.write(grapheme_break_class_string);
f.write(word_break_class_string);
f.write(sentence_break_class_string);