summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEdward Welbourne <edward.welbourne@qt.io>2018-10-19 15:33:34 +0200
committerLiang Qi <liang.qi@qt.io>2018-11-11 22:09:27 +0000
commitd38f635355dded964ce14de6160fb897e5d6b40d (patch)
treeca0e79b47a3bc4e6c01158c17d7248d3efa6e6ff
parent45764e07eb16fc75cc91de6772415c3da4c450fe (diff)
Clean up and update Unicode character data 3rd-party infrastructure
Document how to do an update, fix the bit-rot that had crept into main.cpp since last it was compiled, correct the qt_attribution.json to use the actual version number of UCD (its Revision number) instead of the (admittedly correlated) Unicode release number. Updated to Release 22 (which came with Unicode 11.0.0) in the process; but this doesn't change our actual qunicodetables.cpp (so is incidental). Task-number: QTBUG-71281 Change-Id: Ieb7a6e1a4d49f639993f76ff82c8f12a572db3c3 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> Reviewed-by: Lars Knoll <lars.knoll@qt.io>
-rw-r--r--src/corelib/tools/qt_attribution.json4
-rw-r--r--util/unicode/README31
-rw-r--r--util/unicode/main.cpp11
3 files changed, 38 insertions, 8 deletions
diff --git a/src/corelib/tools/qt_attribution.json b/src/corelib/tools/qt_attribution.json
index 5bb95c9f5b..a842d9467b 100644
--- a/src/corelib/tools/qt_attribution.json
+++ b/src/corelib/tools/qt_attribution.json
@@ -4,12 +4,14 @@
"Name": "Unicode Character Database (UCD)",
"QDocModule": "qtcore",
"QtUsage": "Qt Core uses data obtained from UCD files for working with characters and strings.",
+ "Files": "For update, see qtbase/util/unicode/README",
"Files": "qunicodetables_p.h qunicodetables.cpp",
"Description": "The Unicode Character Database (UCD) is a set of files that
define the Unicode character properties and internal mappings.",
"Homepage": "https://www.unicode.org/ucd/",
- "Version": "10.0.0",
+ "Version": "Don't use the Unicode standard version; UCD has its own 'Revision' numbers",
+ "Version": "20",
"License": "Unicode License Agreement - Data Files and Software (2016)",
"LicenseId": "Unicode-DFS-2016",
"LicenseFile": "UNICODE_LICENSE.txt",
diff --git a/util/unicode/README b/util/unicode/README
index ca34266a36..e52f26175a 100644
--- a/util/unicode/README
+++ b/util/unicode/README
@@ -1 +1,32 @@
Unicode is used to generate the unicode data in src/corelib/tools.
+
+To update:
+* Find the data (UAX #44, UCD; not the XML version) at
+ ftp://www.unicode.org/Public/zipped/$Version/
+* Unpack the zip file; for each file in data/, replace with the new
+ version; find the *BreakProperty.txt in auxiliary/. (These last are
+ only in the zip, not in the web-space's unpacked versions.)
+* If needed, add an entry to enum QChar::UnicodeVersion for the new
+ Unicode version
+* In that case, also update main.cpp's initAgeMap and DATA_VERSION_S*
+ to match
+* Build this project. Its binary, unicode, ignores command-line
+ options and assumes it is being run from this directory. When run,
+ it produces lots of output. Hopefully that doesn't matter.
+* Assertions may trigger: if so, study code and understand what's more
+ complicated about this update; talk to folk named in the git logs,
+ maybe push a WIP to gerrit to solicit advice. Some bit-field may
+ need to be expanded, for example. In some cases QChar may need
+ additions to some of its enums.
+* Build with the modified code, fix any compilation issues.
+* That may have updated qtbase/src/corelib/tools/qunicodetables.cpp;
+ if so the update matters; be sure to commit the changes to data/ at
+ the same time and update tools/qt_attribution.json to match; use the
+ UCD Revision number, rather than the Unicode standard number, as the
+ Version, for all that qunicodetables.cpp uses the latter.
+
+The script writingSystems.sh generates a list of writing systems,
+ostensibly as a the basis for updating QFontDatabase::WritingSystem
+enum; however, the Release 20 output of it contains many more writing
+systems than are present in that enum, suggesting it has not been run
+in a very long time. Further research needed.
diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp
index 0c3c0b2ee1..00c69de008 100644
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp
@@ -78,7 +78,6 @@ static void initAgeMap()
}
}
-
static QHash<QByteArray, QChar::Category> categoryMap;
static void initCategoryMap()
@@ -778,7 +777,6 @@ static void initScriptMap()
{ QChar::Script_Soyombo, "Soyombo" },
{ QChar::Script_ZanabazarSquare, "ZanabazarSquare" },
-
// unhandled
{ QChar::Script_Unknown, 0 }
};
@@ -789,7 +787,6 @@ static void initScriptMap()
}
}
-
// Keep this one in sync with the code in createPropertyInfo
static const char *property_string =
"struct Properties {\n"
@@ -2473,9 +2470,9 @@ static QByteArray createPropertyInfo()
out += ", ";
out += QByteArray::number( p.lowerCaseDiff );
out += ", ";
- out += "#ifdef Q_OS_WASM \n"
+ out += "#ifdef Q_OS_WASM \n";
// " unsigned char : 0; //wasm 64 packing trick QTBUG-65259\n"
- out += "#endif \n"
+ out += "#endif \n";
out += ", ";
// " ushort upperCaseSpecial : 1;\n"
// " signed short upperCaseDiff : 15;\n"
@@ -2501,9 +2498,9 @@ static QByteArray createPropertyInfo()
// " ushort nfQuickCheck : 8;\n"
out += QByteArray::number( p.nfQuickCheck );
out += ", ";
- out += "#ifdef Q_OS_WASM \n"
+ out += "#ifdef Q_OS_WASM \n";
// " unsigned char : 0; //wasm 64 packing trick QTBUG-65259\n"
- out += "#endif \n"
+ out += "#endif \n";
out += ", ";
// " ushort graphemeBreakClass : 5; /* 5 used */\n"
// " ushort wordBreakClass : 5; /* 5 used */\n"