diff options
author | Edward Welbourne <edward.welbourne@qt.io> | 2020-01-09 14:48:21 +0100 |
---|---|---|
committer | Edward Welbourne <edward.welbourne@qt.io> | 2020-01-30 17:58:53 +0100 |
commit | c08a31634fd8d25d14aed4a73a80f44f254163f3 (patch) | |
tree | b19cf37f2e7405845f72613bd59911256c19a4dc /util/locale_database | |
parent | c0f041fcdf9573c0777fd19a0ce012fedf83fec4 (diff) |
Separate offsets from sizes in QLocale's data
This enables us to make the sizes quint8 and benefit from the
resulting packing, making the locale data smaller. The sizes for long
month-name lists (which concatenate twelve names with semicolon as
separator) can overflow an 8-bit member, so use quint16 where needed.
Re-ordered the data in QLocaleData and QCalendarLocale. Now all
long-short(-narrow) families arise in that order; and any standalone
is grouped with the one of the same length. (This cost 20 bytes in the
date-format table, which optimises out more duplication if short is
before long, but the saving in the (smaller) time-format table more
than make up for it; and 20 bytes isn't worth the confusion that being
inconsistent in ordering might cause.)
At the same time, drop trailing semicolons from list entries (which
join various names with semicolon) as they're not needed: we know
where the end of the list is, because we know the size of the string
that results from concatenation. The code that parses such lists can
even correctly handle empty entries at the end.
Saves 26 kB of data in the compiled binaries.
Task-number: QTBUG-81053
Change-Id: If6ccc96a6910828817aa605d10fd814f567ae1e8
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Diffstat (limited to 'util/locale_database')
-rwxr-xr-x | util/locale_database/cldr2qlocalexml.py | 20 | ||||
-rw-r--r-- | util/locale_database/localexml.py | 10 | ||||
-rwxr-xr-x | util/locale_database/qlocalexml2cpp.py | 210 |
3 files changed, 132 insertions, 108 deletions
diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py index e321cb501e..625f1c32c4 100755 --- a/util/locale_database/cldr2qlocalexml.py +++ b/util/locale_database/cldr2qlocalexml.py @@ -352,13 +352,15 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ result['currencySymbol'] = '' result['currencyDisplayName'] = '' if result['currencyIsoCode']: - result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode']) - result['currencyDisplayName'] = ';'.join( - findEntryDef(path, 'numbers/currencies/currency[' + result['currencyIsoCode'] - + ']/displayName' + tail) - for tail in ['',] + [ - '[count=%s]' % x for x in ('zero', 'one', 'two', 'few', 'many', 'other') - ]) + ';' + stem = "numbers/currencies/currency[%s]/" % result['currencyIsoCode'] + result['currencySymbol'] = findEntryDef(path, stem + 'symbol') + displays = tuple(findEntryDef(path, stem + 'displayName' + tail) + for tail in ('',) + tuple( + '[count=%s]' % x for x in ('zero', 'one', 'two', + 'few', 'many', 'other'))) + while displays and not displays[-1]: + displays = displays[:-1] + result['currencyDisplayName'] = ';'.join(displays) def findUnitDef(path, stem, fallback=''): # The displayName for a quantified unit in en.xml is kByte @@ -405,7 +407,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ prop = 'monthContext[' + mode + ']/monthWidth[' + size + ']/' result[key + 'Months_' + cal] = ';'.join( findEntry(path, stem + prop + "month[%d]" % i) - for i in range(1, 13)) + ';' + for i in range(1, 13)) # Day data (for Gregorian, at least): stem = 'dates/calendars/calendar[gregorian]/days/' @@ -414,7 +416,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ prop = 'dayContext[' + mode + ']/dayWidth[' + size + ']/day' result[key + 'Days'] = ';'.join( findEntry(path, stem + prop + '[' + day + ']') - for day in days) + ';' + for day in days) return Locale(result) diff --git a/util/locale_database/localexml.py b/util/locale_database/localexml.py index 9b353f5122..9958398938 100644 --- a/util/locale_database/localexml.py +++ b/util/locale_database/localexml.py @@ -1,7 +1,7 @@ # coding=utf8 ############################################################################# ## -## Copyright (C) 2018 The Qt Company Ltd. +## Copyright (C) 2020 The Qt Company Ltd. ## Contact: https://www.qt.io/licensing/ ## ## This file is part of the test suite of the Qt Toolkit. @@ -267,7 +267,7 @@ class Locale: except KeyError: # Need to add an entry to known, above. print 'Unsupported calendar:', cal raise - names, get = data[0] + ('',), data[1:] + names, get = data[0], data[1:] for n, size in enumerate(sizes): yield ('_'.join((camelCase((size, 'months')), cal)), ';'.join(get[n][0](i, x) for i, x in enumerate(names))) @@ -279,7 +279,7 @@ class Locale: def C(cls, calendars=('gregorian',), # Empty entry at end to ensure final separator when join()ed: days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday', - 'Thursday', 'Friday', 'Saturday', ''), + 'Thursday', 'Friday', 'Saturday'), quantifiers=('k', 'M', 'G', 'T', 'P', 'E')): """Returns an object representing the C locale.""" return cls(dict(cls.__monthNames(calendars)), @@ -303,11 +303,11 @@ class Locale: longTimeFormat='HH:mm:ss z', shortTimeFormat='HH:mm:ss', longDays=';'.join(days), shortDays=';'.join(d[:3] for d in days), - narrowDays='7;1;2;3;4;5;6;', + narrowDays='7;1;2;3;4;5;6', standaloneLongDays=';'.join(days), standaloneShortDays=';'.join(d[:3] for d in days), standaloneNarrowDays=';'.join(d[:1] for d in days), currencyIsoCode='', currencySymbol='', - currencyDisplayName=';' * 7, + currencyDisplayName='', currencyDigits=2, currencyRounding=1, currencyFormat='%1%2', currencyNegativeFormat='') diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py index e5e5cccbff..0cfa0f03e4 100755 --- a/util/locale_database/qlocalexml2cpp.py +++ b/util/locale_database/qlocalexml2cpp.py @@ -259,13 +259,16 @@ def unicode2hex(s): return lst class StringDataToken: - def __init__(self, index, length): - if index > 0xFFFF or length > 0xFFFF: - raise Error("Position exceeds ushort range: %d,%d " % (index, length)) + def __init__(self, index, length, bits): + if index > 0xffff: + print "\n\n\n#error Data index is too big!", index + raise ValueError("Start-index (%d) exceeds the uint16 range!" % index) + if length >= (1 << bits): + print "\n\n\n#error Range length is too big!", length + raise ValueError("Data size (%d) exceeds the %d-bit range!" % (length, bits)) + self.index = index self.length = length - def __str__(self): - return " %d,%d " % (self.index, self.length) class StringData: def __init__(self, name): @@ -274,22 +277,22 @@ class StringData: self.name = name self.text = '' # Used in quick-search for matches in data - def append(self, s): + def append(self, s, bits=8): try: token = self.hash[s] except KeyError: - token = self.__store(s) + token = self.__store(s, bits) self.hash[s] = token return token - def __store(self, s): + def __store(self, s, bits): """Add string s to known data. Seeks to avoid duplication, where possible. For example, short-forms may be prefixes of long-forms. """ if not s: - return StringDataToken(0, 0) + return StringDataToken(0, 0, bits) ucs2 = unicode2hex(s) try: index = self.text.index(s) - 1 @@ -307,12 +310,15 @@ class StringData: assert index >= 0 try: - return StringDataToken(index, len(ucs2)) + return StringDataToken(index, len(ucs2), bits) except ValueError as e: e.args += (self.name, s) raise def write(self, fd): + if len(self.data) > 0xffff: + raise ValueError("Data is too big for quint16 index to its end!" % len(self.data), + self.name) fd.write("\nstatic const ushort %s[] = {\n" % self.name) fd.write(wrap_list(self.data)) fd.write("\n};\n") @@ -498,39 +504,43 @@ def main(): + ' quotEnd ' + 'altQtOpn ' + 'altQtEnd ' - # Width 11 + comma: - + ' lpStart ' # List pattern - + ' lpMid ' - + ' lpEnd ' - + ' lpTwo ' - + ' sDtFmt ' # Date format - + ' lDtFmt ' - + ' sTmFmt ' # Time format - + ' lTmFmt ' - + ' ssDays ' # Days - + ' slDays ' - + ' snDays ' - + ' sDays ' - + ' lDays ' - + ' nDays ' - + ' am ' # am/pm indicators - + ' pm ' - # Width 8 + comma - + ' byte ' - + ' siQuant ' - + 'iecQuant ' + + # Range entries (all start-indices, then all sizes): + # Width 5 + comma: + + 'lStrt ' # List pattern + + 'lpMid ' + + 'lpEnd ' + + 'lPair ' + + 'lDFmt ' # Date format + + 'sDFmt ' + + 'lTFmt ' # Time format + + 'sTFmt ' + + 'slDay ' # Day names + + 'lDays ' + + 'ssDys ' + + 'sDays ' + + 'snDay ' + + 'nDays ' + + ' am ' # am/pm indicators + + ' pm ' + + ' byte ' + + 'siQnt ' + + 'iecQn ' + + 'crSym ' # Currency formatting: + + 'crDsp ' + + 'crFmt ' + + 'crFNg ' + + 'ntLng ' # Name of language in itself, and of territory: + + 'ntTer ' + # Width 3 + comma for each size; no header + + ' ' * 25 + + # Strays (char array, bit-fields): # Width 8+4 + comma + ' currISO ' - # Width 11 + comma: - + ' currSym ' # Currency formatting: - + ' currDsply ' - + ' currFmt ' - + ' currFmtNeg ' - + ' endoLang ' # Name of language in itself, and of country: - + ' endoCntry ' # Width 6 + comma: - + 'curDgt ' # Currency number representation: - + 'curRnd ' + + 'curDgt ' # Currency digits + + 'curRnd ' # Currencty rounding (unused: QTBUG-81343) + 'dow1st ' # First day of week + ' wknd+ ' # Week-end start/end days: + ' wknd-' @@ -550,14 +560,16 @@ def main(): + '%6d,' * 8 # Quotation marks: + '%8d,' * 4 + # List patterns, date/time formats, month/day names, am/pm: - + '%11s,' * 16 # SI/IEC byte-unit abbreviations: - + '%8s,' * 3 + # Currency and endonyms + + '%5d,' * 25 + # Sizes for the same: + + '%3d,' * 25 + # Currency ISO code: + ' %10s, ' - # Currency and endonyms - + '%11s,' * 6 # Currency formatting: + '%6d,%6d' # Day of week and week-end: @@ -565,8 +577,32 @@ def main(): + ' }') for key in locale_keys: l = locale_map[key] + # Sequence of StringDataToken: + ranges = (tuple(list_pattern_part_data.append(p) for p in # 4 entries: + (l.listPatternPartStart, l.listPatternPartMiddle, + l.listPatternPartEnd, l.listPatternPartTwo)) + + tuple (date_format_data.append(f) for f in # 2 entries: + (l.longDateFormat, l.shortDateFormat)) + + tuple(time_format_data.append(f) for f in # 2 entries: + (l.longTimeFormat, l.shortTimeFormat)) + + tuple(days_data.append(d) for d in # 6 entries: + (l.standaloneLongDays, l.longDays, + l.standaloneShortDays, l.shortDays, + l.standaloneNarrowDays, l.narrowDays)) + + (am_data.append(l.am), pm_data.append(l.pm)) + # 2 entries: + tuple(byte_unit_data.append(b) for b in # 3 entries: + (l.byte_unit, l.byte_si_quantified, l.byte_iec_quantified)) + + (currency_symbol_data.append(l.currencySymbol), + currency_display_name_data.append(l.currencyDisplayName), + currency_format_data.append(l.currencyFormat), + currency_format_data.append(l.currencyNegativeFormat), + endonyms_data.append(l.languageEndonym), + endonyms_data.append(l.countryEndonym)) # 6 entries + ) # Total: 25 entries + assert len(ranges) == 25 + data_temp_file.write(line_format - % (key[0], key[1], key[2], + % ((key[0], key[1], key[2], l.decimal, l.group, l.listDelim, @@ -578,43 +614,21 @@ def main(): l.quotationStart, l.quotationEnd, l.alternateQuotationStart, - l.alternateQuotationEnd, - list_pattern_part_data.append(l.listPatternPartStart), - list_pattern_part_data.append(l.listPatternPartMiddle), - list_pattern_part_data.append(l.listPatternPartEnd), - list_pattern_part_data.append(l.listPatternPartTwo), - date_format_data.append(l.shortDateFormat), - date_format_data.append(l.longDateFormat), - time_format_data.append(l.shortTimeFormat), - time_format_data.append(l.longTimeFormat), - days_data.append(l.standaloneShortDays), - days_data.append(l.standaloneLongDays), - days_data.append(l.standaloneNarrowDays), - days_data.append(l.shortDays), - days_data.append(l.longDays), - days_data.append(l.narrowDays), - am_data.append(l.am), - pm_data.append(l.pm), - byte_unit_data.append(l.byte_unit), - byte_unit_data.append(l.byte_si_quantified), - byte_unit_data.append(l.byte_iec_quantified), - currencyIsoCodeData(l.currencyIsoCode), - currency_symbol_data.append(l.currencySymbol), - currency_display_name_data.append(l.currencyDisplayName), - currency_format_data.append(l.currencyFormat), - currency_format_data.append(l.currencyNegativeFormat), - endonyms_data.append(l.languageEndonym), - endonyms_data.append(l.countryEndonym), + l.alternateQuotationEnd) + + tuple(r.index for r in ranges) + + tuple(r.length for r in ranges) + + (currencyIsoCodeData(l.currencyIsoCode), l.currencyDigits, l.currencyRounding, # unused (QTBUG-81343) l.firstDayOfWeek, l.weekendStart, - l.weekendEnd) + l.weekendEnd)) + ", // %s/%s/%s\n" % (l.language, l.script, l.country)) data_temp_file.write(line_format # All zeros, matching the format: - % ( (0,) * (3 + 8 + 4) + ("0,0",) * (16 + 3) + % ( (0,) * (3 + 8 + 4) + (0,) * 25 * 2 + (currencyIsoCodeData(0),) - + ("0,0",) * 6 + (0,) * (2 + 3)) + + (0,) * 2 + + (0,) * 3) + " // trailing zeros\n") data_temp_file.write("};\n") @@ -750,7 +764,7 @@ def main(): os.rename(data_temp_file_path, qtsrcdir + "/src/corelib/text/qlocale_data_p.h") # Generate calendar data - calendar_format = ' {%6d,%6d,%6d,{%5s},{%5s},{%5s},{%5s},{%5s},{%5s}}, ' + calendar_format = ' {%6d,%6d,%6d' + ',%5d' * 6 + ',%3d' * 6 + ' },' for calendar, stem in calendars.items(): months_data = StringData('months_data') calendar_data_file = "q%scalendar_data_p.h" % stem @@ -770,30 +784,38 @@ def main(): + ' lang ' + ' script' + ' terr ' - # Month-name start-end pairs, width 8 (5 plus '{},'): - + ' sShort ' - + ' sLong ' - + ' sNarrow' - + ' short ' - + ' long ' - + ' narrow' - # No trailing space on last; be sure - # to pad before adding later entries. + # Month-name start-indices, width 6 (5 + comma): + + 'sLng ' + + 'long ' + + 'sSrt ' + + 'shrt ' + + 'sNrw ' + + 'naro ' + # No individual headers for the sizes. + + 'Sizes...' + '\n') for key in locale_keys: l = locale_map[key] + # Sequence of StringDataToken: + try: + # Twelve long month names can add up to more than 256 (e.g. kde_TZ: 264) + ranges = (tuple(months_data.append(m[calendar], 16) for m in + (l.standaloneLongMonths, l.longMonths)) + + tuple(months_data.append(m[calendar]) for m in + (l.standaloneShortMonths, l.shortMonths, + l.standaloneNarrowMonths, l.narrowMonths))) + except ValueError as e: + e.args += (l.language, l.script, l.country, stem) + raise + calendar_temp_file.write( calendar_format - % (key[0], key[1], key[2], - months_data.append(l.standaloneShortMonths[calendar]), - months_data.append(l.standaloneLongMonths[calendar]), - months_data.append(l.standaloneNarrowMonths[calendar]), - months_data.append(l.shortMonths[calendar]), - months_data.append(l.longMonths[calendar]), - months_data.append(l.narrowMonths[calendar])) + % ((key[0], key[1], key[2]) + + tuple(r.index for r in ranges) + + tuple(r.length for r in ranges)) + "// %s/%s/%s\n" % (l.language, l.script, l.country)) - calendar_temp_file.write(calendar_format % ( (0,) * 3 + ('0,0',) * 6 ) - + '// trailing zeros\n') + calendar_temp_file.write(calendar_format % ( (0,) * (3 + 6 * 2) ) + + '// trailing zeros\n') calendar_temp_file.write("};\n") months_data.write(calendar_temp_file) s = calendar_template_file.readline() |