summaryrefslogtreecommitdiffstats
path: root/util/locale_database/qlocalexml2cpp.py
diff options
context:
space:
mode:
authorEdward Welbourne <edward.welbourne@qt.io>2020-01-13 15:46:13 +0100
committerEdward Welbourne <edward.welbourne@qt.io>2020-02-17 14:55:24 +0100
commited2b110b6add650954dc102a0317c14ff826c677 (patch)
tree53fbcb5f99b9d05667ffbadd1ed9a34cc090566a /util/locale_database/qlocalexml2cpp.py
parent1b4dd753eda1111767d81df3bb665f2b14a65d8e (diff)
Allow surrogate pairs for various "single character" locale data
Extract the character in its proper unicode form and encode it in a new single_character_data table of locale data. Record each entry as the range within that table that encodes it. Also added an assertion in the generator script to check that the digits CLDR gives us are a contiguous sequence in increasing order, as has been assumed by the C++ code for some time. Lots of number-formatting code now has to take account of how wide the digits are. This leaves nowhere for updateSystemPrivate() to record values read from sys_locale->query(), so we must always consult that function when accessing these members of the systemData() object. Various internal users of these single-character fields need the system-or-CLDR value rather than the raw CLDR value, so move QLocalePrivate's methods to supply them down to QLocaleData and ensure they check for system values, where appropriate first. This allows us to finally support the Chakma language and script, for whose number system UTF-16 needs surrogate pairs. Costs 10.8 kB in added data, much of it due to adding two new locales that need surrogates to represent digits. [ChangeLog][QtCore][QLocale] Various QLocale methods that returned single QChar values now return QString values to accommodate those locales which need a surrogate pair to represent the (single character) return value. Fixes: QTBUG-69324 Fixes: QTBUG-81053 Change-Id: I481722d6f5ee266164f09031679a851dfa6e7839 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'util/locale_database/qlocalexml2cpp.py')
-rwxr-xr-xutil/locale_database/qlocalexml2cpp.py75
1 files changed, 31 insertions, 44 deletions
diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py
index 0cfa0f03e4..ad02bf18af 100755
--- a/util/locale_database/qlocalexml2cpp.py
+++ b/util/locale_database/qlocalexml2cpp.py
@@ -472,6 +472,7 @@ def main():
data_temp_file.write("};\n\n")
list_pattern_part_data = StringData('list_pattern_part_data')
+ single_character_data = StringData('single_character_data')
date_format_data = StringData('date_format_data')
time_format_data = StringData('time_format_data')
days_data = StringData('days_data')
@@ -491,19 +492,6 @@ def main():
+ ' lang ' # IDs
+ 'script '
+ ' terr '
- + ' dec ' # Numeric punctuation:
- + ' group '
- + ' list ' # List delimiter
- + ' prcnt ' # Arithmetic symbols:
- + ' zero '
- + ' minus '
- + ' plus '
- + ' exp '
- # Width 8 + comma - to make space for these wide labels !
- + ' quotOpn ' # Quotation marks
- + ' quotEnd '
- + 'altQtOpn '
- + 'altQtEnd '
# Range entries (all start-indices, then all sizes):
# Width 5 + comma:
@@ -511,6 +499,20 @@ def main():
+ 'lpMid '
+ 'lpEnd '
+ 'lPair '
+ + 'lDelm ' # List delimiter
+ # Representing numbers:
+ + ' dec '
+ + 'group '
+ + 'prcnt '
+ + ' zero '
+ + 'minus '
+ + 'plus '
+ + ' exp '
+ # Quotation marks
+ + 'qtOpn '
+ + 'qtEnd '
+ + 'altQO '
+ + 'altQE '
+ 'lDFmt ' # Date format
+ 'sDFmt '
+ 'lTFmt ' # Time format
@@ -533,7 +535,7 @@ def main():
+ 'ntLng ' # Name of language in itself, and of territory:
+ 'ntTer '
# Width 3 + comma for each size; no header
- + ' ' * 25
+ + ' ' * 37
# Strays (char array, bit-fields):
# Width 8+4 + comma
@@ -556,17 +558,10 @@ def main():
line_format = (' { '
# Locale-identifier:
+ '%6d,' * 3
- # Numeric formats, list delimiter:
- + '%6d,' * 8
- # Quotation marks:
- + '%8d,' * 4
-
- # List patterns, date/time formats, month/day names, am/pm:
- # SI/IEC byte-unit abbreviations:
- # Currency and endonyms
- + '%5d,' * 25
+ # Offsets for starts of ranges:
+ + '%5d,' * 37
# Sizes for the same:
- + '%3d,' * 25
+ + '%3d,' * 37
# Currency ISO code:
+ ' %10s, '
@@ -578,9 +573,13 @@ def main():
for key in locale_keys:
l = locale_map[key]
# Sequence of StringDataToken:
- ranges = (tuple(list_pattern_part_data.append(p) for p in # 4 entries:
+ ranges = (tuple(list_pattern_part_data.append(p) for p in # 5 entries:
(l.listPatternPartStart, l.listPatternPartMiddle,
- l.listPatternPartEnd, l.listPatternPartTwo)) +
+ l.listPatternPartEnd, l.listPatternPartTwo, l.listDelim)) +
+ tuple(single_character_data.append(p) for p in # 11 entries
+ (l.decimal, l.group, l.percent, l.zero, l.minus, l.plus, l.exp,
+ l.quotationStart, l.quotationEnd,
+ l.alternateQuotationStart, l.alternateQuotationEnd)) +
tuple (date_format_data.append(f) for f in # 2 entries:
(l.longDateFormat, l.shortDateFormat)) +
tuple(time_format_data.append(f) for f in # 2 entries:
@@ -598,23 +597,11 @@ def main():
currency_format_data.append(l.currencyNegativeFormat),
endonyms_data.append(l.languageEndonym),
endonyms_data.append(l.countryEndonym)) # 6 entries
- ) # Total: 25 entries
- assert len(ranges) == 25
+ ) # Total: 37 entries
+ assert len(ranges) == 37
data_temp_file.write(line_format
- % ((key[0], key[1], key[2],
- l.decimal,
- l.group,
- l.listDelim,
- l.percent,
- l.zero,
- l.minus,
- l.plus,
- l.exp,
- l.quotationStart,
- l.quotationEnd,
- l.alternateQuotationStart,
- l.alternateQuotationEnd) +
+ % ((key[0], key[1], key[2]) +
tuple(r.index for r in ranges) +
tuple(r.length for r in ranges) +
(currencyIsoCodeData(l.currencyIsoCode),
@@ -625,7 +612,7 @@ def main():
l.weekendEnd))
+ ", // %s/%s/%s\n" % (l.language, l.script, l.country))
data_temp_file.write(line_format # All zeros, matching the format:
- % ( (0,) * (3 + 8 + 4) + (0,) * 25 * 2
+ % ( (0,) * 3 + (0,) * 37 * 2
+ (currencyIsoCodeData(0),)
+ (0,) * 2
+ (0,) * 3)
@@ -633,8 +620,8 @@ def main():
data_temp_file.write("};\n")
# StringData tables:
- for data in (list_pattern_part_data, date_format_data,
- time_format_data, days_data,
+ for data in (list_pattern_part_data, single_character_data,
+ date_format_data, time_format_data, days_data,
byte_unit_data, am_data, pm_data, currency_symbol_data,
currency_display_name_data, currency_format_data,
endonyms_data):