Allow surrogate pairs for various "single character" locale data

Extract the character in its proper unicode form and encode it in a new single_character_data table of locale data. Record each entry as the range within that table that encodes it. Also added an assertion in the generator script to check that the digits CLDR gives us are a contiguous sequence in increasing order, as has been assumed by the C++ code for some time. Lots of number-formatting code now has to take account of how wide the digits are. This leaves nowhere for updateSystemPrivate() to record values read from sys_locale->query(), so we must always consult that function when accessing these members of the systemData() object. Various internal users of these single-character fields need the system-or-CLDR value rather than the raw CLDR value, so move QLocalePrivate's methods to supply them down to QLocaleData and ensure they check for system values, where appropriate first. This allows us to finally support the Chakma language and script, for whose number system UTF-16 needs surrogate pairs. Costs 10.8 kB in added data, much of it due to adding two new locales that need surrogates to represent digits. [ChangeLog][QtCore][QLocale] Various QLocale methods that returned single QChar values now return QString values to accommodate those locales which need a surrogate pair to represent the (single character) return value. Fixes: QTBUG-69324 Fixes: QTBUG-81053 Change-Id: I481722d6f5ee266164f09031679a851dfa6e7839 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
author: Edward Welbourne <edward.welbourne@qt.io> 2020-01-13 15:46:13 +0100
committer: Edward Welbourne <edward.welbourne@qt.io> 2020-02-17 14:55:24 +0100
commit: ed2b110b6add650954dc102a0317c14ff826c677 (patch)
tree: 53fbcb5f99b9d05667ffbadd1ed9a34cc090566a /util/locale_database/qlocalexml2cpp.py
parent: 1b4dd753eda1111767d81df3bb665f2b14a65d8e (diff)
1 files changed, 31 insertions, 44 deletions
diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py
index 0cfa0f03e4..ad02bf18af 100755
--- a/util/locale_database/qlocalexml2cpp.py
+++ b/util/locale_database/qlocalexml2cpp.py
@@ -472,6 +472,7 @@ def main():
     data_temp_file.write("};\n\n")
 
     list_pattern_part_data = StringData('list_pattern_part_data')
+    single_character_data = StringData('single_character_data')
     date_format_data = StringData('date_format_data')
     time_format_data = StringData('time_format_data')
     days_data = StringData('days_data')
@@ -491,19 +492,6 @@ def main():
                          + ' lang  ' # IDs
                          + 'script '
                          + '  terr '
-                         + '  dec  ' # Numeric punctuation:
-                         + ' group '
-                         + ' list  ' # List delimiter
-                         + ' prcnt ' # Arithmetic symbols:
-                         + '  zero '
-                         + ' minus '
-                         + ' plus  '
-                         + '  exp  '
-                         # Width 8 + comma - to make space for these wide labels !
-                         + ' quotOpn ' # Quotation marks
-                         + ' quotEnd '
-                         + 'altQtOpn '
-                         + 'altQtEnd '
 
                          # Range entries (all start-indices, then all sizes):
                          # Width 5 + comma:
@@ -511,6 +499,20 @@ def main():
                          + 'lpMid '
                          + 'lpEnd '
                          + 'lPair '
+                         + 'lDelm ' # List delimiter
+                         # Representing numbers:
+                         + ' dec  '
+                         + 'group '
+                         + 'prcnt '
+                         + ' zero '
+                         + 'minus '
+                         + 'plus  '
+                         + ' exp  '
+                         # Quotation marks
+                         + 'qtOpn '
+                         + 'qtEnd '
+                         + 'altQO '
+                         + 'altQE '
                          + 'lDFmt ' # Date format
                          + 'sDFmt '
                          + 'lTFmt ' # Time format
@@ -533,7 +535,7 @@ def main():
                          + 'ntLng ' # Name of language in itself, and of territory:
                          + 'ntTer '
                          # Width 3 + comma for each size; no header
-                         + '    ' * 25
+                         + '    ' * 37
 
                          # Strays (char array, bit-fields):
                          # Width 8+4 + comma
@@ -556,17 +558,10 @@ def main():
     line_format = ('    { '
                    # Locale-identifier:
                    + '%6d,' * 3
-                   # Numeric formats, list delimiter:
-                   + '%6d,' * 8
-                   # Quotation marks:
-                   + '%8d,' * 4
-
-                   # List patterns, date/time formats, month/day names, am/pm:
-                   # SI/IEC byte-unit abbreviations:
-                   # Currency and endonyms
-                   + '%5d,' * 25
+                   # Offsets for starts of ranges:
+                   + '%5d,' * 37
                    # Sizes for the same:
-                   + '%3d,' * 25
+                   + '%3d,' * 37
 
                    # Currency ISO code:
                    + ' %10s, '
@@ -578,9 +573,13 @@ def main():
     for key in locale_keys:
         l = locale_map[key]
         # Sequence of StringDataToken:
-        ranges = (tuple(list_pattern_part_data.append(p) for p in # 4 entries:
+        ranges = (tuple(list_pattern_part_data.append(p) for p in # 5 entries:
                         (l.listPatternPartStart, l.listPatternPartMiddle,
-                         l.listPatternPartEnd, l.listPatternPartTwo)) +
+                         l.listPatternPartEnd, l.listPatternPartTwo, l.listDelim)) +
+                  tuple(single_character_data.append(p) for p in # 11 entries
+                        (l.decimal, l.group, l.percent, l.zero, l.minus, l.plus, l.exp,
+                         l.quotationStart, l.quotationEnd,
+                         l.alternateQuotationStart, l.alternateQuotationEnd)) +
                   tuple (date_format_data.append(f) for f in # 2 entries:
                          (l.longDateFormat, l.shortDateFormat)) +
                   tuple(time_format_data.append(f) for f in # 2 entries:
@@ -598,23 +597,11 @@ def main():
                    currency_format_data.append(l.currencyNegativeFormat),
                    endonyms_data.append(l.languageEndonym),
                    endonyms_data.append(l.countryEndonym)) # 6 entries
-                  ) # Total: 25 entries
-        assert len(ranges) == 25
+                  ) # Total: 37 entries
+        assert len(ranges) == 37
 
         data_temp_file.write(line_format
-                    % ((key[0], key[1], key[2],
-                        l.decimal,
-                        l.group,
-                        l.listDelim,
-                        l.percent,
-                        l.zero,
-                        l.minus,
-                        l.plus,
-                        l.exp,
-                        l.quotationStart,
-                        l.quotationEnd,
-                        l.alternateQuotationStart,
-                        l.alternateQuotationEnd) +
+                    % ((key[0], key[1], key[2]) +
                        tuple(r.index for r in ranges) +
                        tuple(r.length for r in ranges) +
                        (currencyIsoCodeData(l.currencyIsoCode),
@@ -625,7 +612,7 @@ def main():
                         l.weekendEnd))
                              + ", // %s/%s/%s\n" % (l.language, l.script, l.country))
     data_temp_file.write(line_format # All zeros, matching the format:
-                         % ( (0,) * (3 + 8 + 4) + (0,) * 25 * 2
+                         % ( (0,) * 3 + (0,) * 37 * 2
                              + (currencyIsoCodeData(0),)
                              + (0,) * 2
                              + (0,) * 3)
@@ -633,8 +620,8 @@ def main():
     data_temp_file.write("};\n")
 
     # StringData tables:
-    for data in (list_pattern_part_data, date_format_data,
-                 time_format_data, days_data,
+    for data in (list_pattern_part_data, single_character_data,
+                 date_format_data, time_format_data, days_data,
                  byte_unit_data, am_data, pm_data, currency_symbol_data,
                  currency_display_name_data, currency_format_data,
                  endonyms_data):
author	Edward Welbourne <edward.welbourne@qt.io>	2020-01-13 15:46:13 +0100
committer	Edward Welbourne <edward.welbourne@qt.io>	2020-02-17 14:55:24 +0100
commit	ed2b110b6add650954dc102a0317c14ff826c677 (patch)
tree	53fbcb5f99b9d05667ffbadd1ed9a34cc090566a /util/locale_database/qlocalexml2cpp.py
parent	1b4dd753eda1111767d81df3bb665f2b14a65d8e (diff)