From c3eb521a0f10112df6b61d2592351c4eef2e1f9b Mon Sep 17 00:00:00 2001 From: Edward Welbourne Date: Wed, 23 Oct 2019 17:17:49 +0200 Subject: Update UCD data to Unicode 12.1.0's Revision 24 Had to teach the update program to accept category Lm as for Joining_Transparent, for the sake of a new ArabicShaping.txt entry. Added three new Unicode versions, several new scripts and a new word-break class. Updated UCD's test data for tst_QTextBoundaryFinder. This left 57 tests failing; I have commented out the data rows for those tests, pending someone with more knowledge addressing this. Task-number: QTBUG-79631 Task-number: QTBUG-79418 Change-Id: Ic33d3b3551195d47a84d98e84020f57a68f0b201 Reviewed-by: Eskil Abrahamsen Blomfeldt --- util/unicode/main.cpp | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) (limited to 'util/unicode/main.cpp') diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index 26cdab87d6..c3465b3045 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -1,6 +1,6 @@ /**************************************************************************** ** -** Copyright (C) 2016 The Qt Company Ltd. +** Copyright (C) 2019 The Qt Company Ltd. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the utils of the Qt Toolkit. @@ -38,8 +38,8 @@ #include #endif -#define DATA_VERSION_S "10.0" -#define DATA_VERSION_STR "QChar::Unicode_10_0" +#define DATA_VERSION_S "12.1" +#define DATA_VERSION_STR "QChar::Unicode_12_1" static QHash age_map; @@ -69,6 +69,9 @@ static void initAgeMap() { QChar::Unicode_8_0, "8.0" }, { QChar::Unicode_9_0, "9.0" }, { QChar::Unicode_10_0, "10.0" }, + { QChar::Unicode_11_0, "11.0" }, + { QChar::Unicode_12_0, "12.0" }, + { QChar::Unicode_12_1, "12.1" }, // UCD Revision 24 { QChar::Unicode_Unassigned, 0 } }; AgeMap *d = ageMap; @@ -377,6 +380,7 @@ static const char *word_break_class_string = " WordBreak_E_Modifier,\n" " WordBreak_Glue_After_Zwj,\n" " WordBreak_E_Base_GAZ,\n" + " WordBreak_WSegSpace,\n" " NumWordBreakClasses,\n" "};\n\n"; @@ -403,6 +407,7 @@ enum WordBreakClass { WordBreak_E_Modifier, WordBreak_Glue_After_Zwj, WordBreak_E_Base_GAZ, + WordBreak_WSegSpace, WordBreak_Unassigned }; @@ -437,6 +442,7 @@ static void initWordBreak() { WordBreak_E_Modifier, "E_Modifier" }, { WordBreak_Glue_After_Zwj, "Glue_After_Zwj" }, { WordBreak_E_Base_GAZ, "E_Base_GAZ" }, + { WordBreak_WSegSpace, "WSegSpace" }, { WordBreak_Unassigned, 0 } }; WordBreakList *d = breaks; @@ -776,6 +782,18 @@ static void initScriptMap() { QChar::Script_Nushu, "Nushu" }, { QChar::Script_Soyombo, "Soyombo" }, { QChar::Script_ZanabazarSquare, "ZanabazarSquare" }, + // 12.1 + { QChar::Script_Dogra, "Dogra" }, + { QChar::Script_GunjalaGondi, "GunjalaGondi" }, + { QChar::Script_HanifiRohingya, "HanifiRohingya" }, + { QChar::Script_Makasar, "Makasar" }, + { QChar::Script_Medefaidrin, "Medefaidrin" }, + { QChar::Script_OldSogdian, "OldSogdian" }, + { QChar::Script_Sogdian, "Sogdian" }, + { QChar::Script_Elymaic, "Elymaic" }, + { QChar::Script_Nandinagari, "Nandinagari" }, + { QChar::Script_NyiakengPuachueHmong, "NyiakengPuachueHmong" }, + { QChar::Script_Wancho, "Wancho" }, // unhandled { QChar::Script_Unknown, 0 } @@ -1375,12 +1393,18 @@ static void readArabicShaping() qFatal("%x: unassigned or unhandled joining type: %s", codepoint, l[2].constData()); break; case Joining_Transparent: - if (d.p.category != QChar::Mark_NonSpacing && d.p.category != QChar::Mark_Enclosing && d.p.category != QChar::Other_Format) { - qFatal("%x: joining type '%s' was met; the current implementation needs to be revised!", - codepoint, l[2].constData()); + switch (d.p.category) { + case QChar::Mark_Enclosing: + case QChar::Mark_NonSpacing: + case QChar::Letter_Modifier: + case QChar::Other_Format: + break; + default: + qFatal("%x: joining type '%s' was met (category: %d); " + "the current implementation needs to be revised!", + codepoint, l[2].constData(), d.p.category); } - // fall through - + Q_FALLTHROUGH(); default: d.p.joining = QChar::JoiningType(joining); break; -- cgit v1.2.3