summaryrefslogtreecommitdiffstats
path: root/util/locale_database
diff options
context:
space:
mode:
authorEdward Welbourne <edward.welbourne@qt.io>2020-01-09 14:48:21 +0100
committerEdward Welbourne <edward.welbourne@qt.io>2020-01-30 17:58:53 +0100
commitc08a31634fd8d25d14aed4a73a80f44f254163f3 (patch)
treeb19cf37f2e7405845f72613bd59911256c19a4dc /util/locale_database
parentc0f041fcdf9573c0777fd19a0ce012fedf83fec4 (diff)
Separate offsets from sizes in QLocale's data
This enables us to make the sizes quint8 and benefit from the resulting packing, making the locale data smaller. The sizes for long month-name lists (which concatenate twelve names with semicolon as separator) can overflow an 8-bit member, so use quint16 where needed. Re-ordered the data in QLocaleData and QCalendarLocale. Now all long-short(-narrow) families arise in that order; and any standalone is grouped with the one of the same length. (This cost 20 bytes in the date-format table, which optimises out more duplication if short is before long, but the saving in the (smaller) time-format table more than make up for it; and 20 bytes isn't worth the confusion that being inconsistent in ordering might cause.) At the same time, drop trailing semicolons from list entries (which join various names with semicolon) as they're not needed: we know where the end of the list is, because we know the size of the string that results from concatenation. The code that parses such lists can even correctly handle empty entries at the end. Saves 26 kB of data in the compiled binaries. Task-number: QTBUG-81053 Change-Id: If6ccc96a6910828817aa605d10fd814f567ae1e8 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Diffstat (limited to 'util/locale_database')
-rwxr-xr-xutil/locale_database/cldr2qlocalexml.py20
-rw-r--r--util/locale_database/localexml.py10
-rwxr-xr-xutil/locale_database/qlocalexml2cpp.py210
3 files changed, 132 insertions, 108 deletions
diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py
index e321cb501e..625f1c32c4 100755
--- a/util/locale_database/cldr2qlocalexml.py
+++ b/util/locale_database/cldr2qlocalexml.py
@@ -352,13 +352,15 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
result['currencySymbol'] = ''
result['currencyDisplayName'] = ''
if result['currencyIsoCode']:
- result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode'])
- result['currencyDisplayName'] = ';'.join(
- findEntryDef(path, 'numbers/currencies/currency[' + result['currencyIsoCode']
- + ']/displayName' + tail)
- for tail in ['',] + [
- '[count=%s]' % x for x in ('zero', 'one', 'two', 'few', 'many', 'other')
- ]) + ';'
+ stem = "numbers/currencies/currency[%s]/" % result['currencyIsoCode']
+ result['currencySymbol'] = findEntryDef(path, stem + 'symbol')
+ displays = tuple(findEntryDef(path, stem + 'displayName' + tail)
+ for tail in ('',) + tuple(
+ '[count=%s]' % x for x in ('zero', 'one', 'two',
+ 'few', 'many', 'other')))
+ while displays and not displays[-1]:
+ displays = displays[:-1]
+ result['currencyDisplayName'] = ';'.join(displays)
def findUnitDef(path, stem, fallback=''):
# The displayName for a quantified unit in en.xml is kByte
@@ -405,7 +407,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
prop = 'monthContext[' + mode + ']/monthWidth[' + size + ']/'
result[key + 'Months_' + cal] = ';'.join(
findEntry(path, stem + prop + "month[%d]" % i)
- for i in range(1, 13)) + ';'
+ for i in range(1, 13))
# Day data (for Gregorian, at least):
stem = 'dates/calendars/calendar[gregorian]/days/'
@@ -414,7 +416,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
prop = 'dayContext[' + mode + ']/dayWidth[' + size + ']/day'
result[key + 'Days'] = ';'.join(
findEntry(path, stem + prop + '[' + day + ']')
- for day in days) + ';'
+ for day in days)
return Locale(result)
diff --git a/util/locale_database/localexml.py b/util/locale_database/localexml.py
index 9b353f5122..9958398938 100644
--- a/util/locale_database/localexml.py
+++ b/util/locale_database/localexml.py
@@ -1,7 +1,7 @@
# coding=utf8
#############################################################################
##
-## Copyright (C) 2018 The Qt Company Ltd.
+## Copyright (C) 2020 The Qt Company Ltd.
## Contact: https://www.qt.io/licensing/
##
## This file is part of the test suite of the Qt Toolkit.
@@ -267,7 +267,7 @@ class Locale:
except KeyError: # Need to add an entry to known, above.
print 'Unsupported calendar:', cal
raise
- names, get = data[0] + ('',), data[1:]
+ names, get = data[0], data[1:]
for n, size in enumerate(sizes):
yield ('_'.join((camelCase((size, 'months')), cal)),
';'.join(get[n][0](i, x) for i, x in enumerate(names)))
@@ -279,7 +279,7 @@ class Locale:
def C(cls, calendars=('gregorian',),
# Empty entry at end to ensure final separator when join()ed:
days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday',
- 'Thursday', 'Friday', 'Saturday', ''),
+ 'Thursday', 'Friday', 'Saturday'),
quantifiers=('k', 'M', 'G', 'T', 'P', 'E')):
"""Returns an object representing the C locale."""
return cls(dict(cls.__monthNames(calendars)),
@@ -303,11 +303,11 @@ class Locale:
longTimeFormat='HH:mm:ss z', shortTimeFormat='HH:mm:ss',
longDays=';'.join(days),
shortDays=';'.join(d[:3] for d in days),
- narrowDays='7;1;2;3;4;5;6;',
+ narrowDays='7;1;2;3;4;5;6',
standaloneLongDays=';'.join(days),
standaloneShortDays=';'.join(d[:3] for d in days),
standaloneNarrowDays=';'.join(d[:1] for d in days),
currencyIsoCode='', currencySymbol='',
- currencyDisplayName=';' * 7,
+ currencyDisplayName='',
currencyDigits=2, currencyRounding=1,
currencyFormat='%1%2', currencyNegativeFormat='')
diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py
index e5e5cccbff..0cfa0f03e4 100755
--- a/util/locale_database/qlocalexml2cpp.py
+++ b/util/locale_database/qlocalexml2cpp.py
@@ -259,13 +259,16 @@ def unicode2hex(s):
return lst
class StringDataToken:
- def __init__(self, index, length):
- if index > 0xFFFF or length > 0xFFFF:
- raise Error("Position exceeds ushort range: %d,%d " % (index, length))
+ def __init__(self, index, length, bits):
+ if index > 0xffff:
+ print "\n\n\n#error Data index is too big!", index
+ raise ValueError("Start-index (%d) exceeds the uint16 range!" % index)
+ if length >= (1 << bits):
+ print "\n\n\n#error Range length is too big!", length
+ raise ValueError("Data size (%d) exceeds the %d-bit range!" % (length, bits))
+
self.index = index
self.length = length
- def __str__(self):
- return " %d,%d " % (self.index, self.length)
class StringData:
def __init__(self, name):
@@ -274,22 +277,22 @@ class StringData:
self.name = name
self.text = '' # Used in quick-search for matches in data
- def append(self, s):
+ def append(self, s, bits=8):
try:
token = self.hash[s]
except KeyError:
- token = self.__store(s)
+ token = self.__store(s, bits)
self.hash[s] = token
return token
- def __store(self, s):
+ def __store(self, s, bits):
"""Add string s to known data.
Seeks to avoid duplication, where possible.
For example, short-forms may be prefixes of long-forms.
"""
if not s:
- return StringDataToken(0, 0)
+ return StringDataToken(0, 0, bits)
ucs2 = unicode2hex(s)
try:
index = self.text.index(s) - 1
@@ -307,12 +310,15 @@ class StringData:
assert index >= 0
try:
- return StringDataToken(index, len(ucs2))
+ return StringDataToken(index, len(ucs2), bits)
except ValueError as e:
e.args += (self.name, s)
raise
def write(self, fd):
+ if len(self.data) > 0xffff:
+ raise ValueError("Data is too big for quint16 index to its end!" % len(self.data),
+ self.name)
fd.write("\nstatic const ushort %s[] = {\n" % self.name)
fd.write(wrap_list(self.data))
fd.write("\n};\n")
@@ -498,39 +504,43 @@ def main():
+ ' quotEnd '
+ 'altQtOpn '
+ 'altQtEnd '
- # Width 11 + comma:
- + ' lpStart ' # List pattern
- + ' lpMid '
- + ' lpEnd '
- + ' lpTwo '
- + ' sDtFmt ' # Date format
- + ' lDtFmt '
- + ' sTmFmt ' # Time format
- + ' lTmFmt '
- + ' ssDays ' # Days
- + ' slDays '
- + ' snDays '
- + ' sDays '
- + ' lDays '
- + ' nDays '
- + ' am ' # am/pm indicators
- + ' pm '
- # Width 8 + comma
- + ' byte '
- + ' siQuant '
- + 'iecQuant '
+
+ # Range entries (all start-indices, then all sizes):
+ # Width 5 + comma:
+ + 'lStrt ' # List pattern
+ + 'lpMid '
+ + 'lpEnd '
+ + 'lPair '
+ + 'lDFmt ' # Date format
+ + 'sDFmt '
+ + 'lTFmt ' # Time format
+ + 'sTFmt '
+ + 'slDay ' # Day names
+ + 'lDays '
+ + 'ssDys '
+ + 'sDays '
+ + 'snDay '
+ + 'nDays '
+ + ' am ' # am/pm indicators
+ + ' pm '
+ + ' byte '
+ + 'siQnt '
+ + 'iecQn '
+ + 'crSym ' # Currency formatting:
+ + 'crDsp '
+ + 'crFmt '
+ + 'crFNg '
+ + 'ntLng ' # Name of language in itself, and of territory:
+ + 'ntTer '
+ # Width 3 + comma for each size; no header
+ + ' ' * 25
+
+ # Strays (char array, bit-fields):
# Width 8+4 + comma
+ ' currISO '
- # Width 11 + comma:
- + ' currSym ' # Currency formatting:
- + ' currDsply '
- + ' currFmt '
- + ' currFmtNeg '
- + ' endoLang ' # Name of language in itself, and of country:
- + ' endoCntry '
# Width 6 + comma:
- + 'curDgt ' # Currency number representation:
- + 'curRnd '
+ + 'curDgt ' # Currency digits
+ + 'curRnd ' # Currencty rounding (unused: QTBUG-81343)
+ 'dow1st ' # First day of week
+ ' wknd+ ' # Week-end start/end days:
+ ' wknd-'
@@ -550,14 +560,16 @@ def main():
+ '%6d,' * 8
# Quotation marks:
+ '%8d,' * 4
+
# List patterns, date/time formats, month/day names, am/pm:
- + '%11s,' * 16
# SI/IEC byte-unit abbreviations:
- + '%8s,' * 3
+ # Currency and endonyms
+ + '%5d,' * 25
+ # Sizes for the same:
+ + '%3d,' * 25
+
# Currency ISO code:
+ ' %10s, '
- # Currency and endonyms
- + '%11s,' * 6
# Currency formatting:
+ '%6d,%6d'
# Day of week and week-end:
@@ -565,8 +577,32 @@ def main():
+ ' }')
for key in locale_keys:
l = locale_map[key]
+ # Sequence of StringDataToken:
+ ranges = (tuple(list_pattern_part_data.append(p) for p in # 4 entries:
+ (l.listPatternPartStart, l.listPatternPartMiddle,
+ l.listPatternPartEnd, l.listPatternPartTwo)) +
+ tuple (date_format_data.append(f) for f in # 2 entries:
+ (l.longDateFormat, l.shortDateFormat)) +
+ tuple(time_format_data.append(f) for f in # 2 entries:
+ (l.longTimeFormat, l.shortTimeFormat)) +
+ tuple(days_data.append(d) for d in # 6 entries:
+ (l.standaloneLongDays, l.longDays,
+ l.standaloneShortDays, l.shortDays,
+ l.standaloneNarrowDays, l.narrowDays)) +
+ (am_data.append(l.am), pm_data.append(l.pm)) + # 2 entries:
+ tuple(byte_unit_data.append(b) for b in # 3 entries:
+ (l.byte_unit, l.byte_si_quantified, l.byte_iec_quantified)) +
+ (currency_symbol_data.append(l.currencySymbol),
+ currency_display_name_data.append(l.currencyDisplayName),
+ currency_format_data.append(l.currencyFormat),
+ currency_format_data.append(l.currencyNegativeFormat),
+ endonyms_data.append(l.languageEndonym),
+ endonyms_data.append(l.countryEndonym)) # 6 entries
+ ) # Total: 25 entries
+ assert len(ranges) == 25
+
data_temp_file.write(line_format
- % (key[0], key[1], key[2],
+ % ((key[0], key[1], key[2],
l.decimal,
l.group,
l.listDelim,
@@ -578,43 +614,21 @@ def main():
l.quotationStart,
l.quotationEnd,
l.alternateQuotationStart,
- l.alternateQuotationEnd,
- list_pattern_part_data.append(l.listPatternPartStart),
- list_pattern_part_data.append(l.listPatternPartMiddle),
- list_pattern_part_data.append(l.listPatternPartEnd),
- list_pattern_part_data.append(l.listPatternPartTwo),
- date_format_data.append(l.shortDateFormat),
- date_format_data.append(l.longDateFormat),
- time_format_data.append(l.shortTimeFormat),
- time_format_data.append(l.longTimeFormat),
- days_data.append(l.standaloneShortDays),
- days_data.append(l.standaloneLongDays),
- days_data.append(l.standaloneNarrowDays),
- days_data.append(l.shortDays),
- days_data.append(l.longDays),
- days_data.append(l.narrowDays),
- am_data.append(l.am),
- pm_data.append(l.pm),
- byte_unit_data.append(l.byte_unit),
- byte_unit_data.append(l.byte_si_quantified),
- byte_unit_data.append(l.byte_iec_quantified),
- currencyIsoCodeData(l.currencyIsoCode),
- currency_symbol_data.append(l.currencySymbol),
- currency_display_name_data.append(l.currencyDisplayName),
- currency_format_data.append(l.currencyFormat),
- currency_format_data.append(l.currencyNegativeFormat),
- endonyms_data.append(l.languageEndonym),
- endonyms_data.append(l.countryEndonym),
+ l.alternateQuotationEnd) +
+ tuple(r.index for r in ranges) +
+ tuple(r.length for r in ranges) +
+ (currencyIsoCodeData(l.currencyIsoCode),
l.currencyDigits,
l.currencyRounding, # unused (QTBUG-81343)
l.firstDayOfWeek,
l.weekendStart,
- l.weekendEnd)
+ l.weekendEnd))
+ ", // %s/%s/%s\n" % (l.language, l.script, l.country))
data_temp_file.write(line_format # All zeros, matching the format:
- % ( (0,) * (3 + 8 + 4) + ("0,0",) * (16 + 3)
+ % ( (0,) * (3 + 8 + 4) + (0,) * 25 * 2
+ (currencyIsoCodeData(0),)
- + ("0,0",) * 6 + (0,) * (2 + 3))
+ + (0,) * 2
+ + (0,) * 3)
+ " // trailing zeros\n")
data_temp_file.write("};\n")
@@ -750,7 +764,7 @@ def main():
os.rename(data_temp_file_path, qtsrcdir + "/src/corelib/text/qlocale_data_p.h")
# Generate calendar data
- calendar_format = ' {%6d,%6d,%6d,{%5s},{%5s},{%5s},{%5s},{%5s},{%5s}}, '
+ calendar_format = ' {%6d,%6d,%6d' + ',%5d' * 6 + ',%3d' * 6 + ' },'
for calendar, stem in calendars.items():
months_data = StringData('months_data')
calendar_data_file = "q%scalendar_data_p.h" % stem
@@ -770,30 +784,38 @@ def main():
+ ' lang '
+ ' script'
+ ' terr '
- # Month-name start-end pairs, width 8 (5 plus '{},'):
- + ' sShort '
- + ' sLong '
- + ' sNarrow'
- + ' short '
- + ' long '
- + ' narrow'
- # No trailing space on last; be sure
- # to pad before adding later entries.
+ # Month-name start-indices, width 6 (5 + comma):
+ + 'sLng '
+ + 'long '
+ + 'sSrt '
+ + 'shrt '
+ + 'sNrw '
+ + 'naro '
+ # No individual headers for the sizes.
+ + 'Sizes...'
+ '\n')
for key in locale_keys:
l = locale_map[key]
+ # Sequence of StringDataToken:
+ try:
+ # Twelve long month names can add up to more than 256 (e.g. kde_TZ: 264)
+ ranges = (tuple(months_data.append(m[calendar], 16) for m in
+ (l.standaloneLongMonths, l.longMonths)) +
+ tuple(months_data.append(m[calendar]) for m in
+ (l.standaloneShortMonths, l.shortMonths,
+ l.standaloneNarrowMonths, l.narrowMonths)))
+ except ValueError as e:
+ e.args += (l.language, l.script, l.country, stem)
+ raise
+
calendar_temp_file.write(
calendar_format
- % (key[0], key[1], key[2],
- months_data.append(l.standaloneShortMonths[calendar]),
- months_data.append(l.standaloneLongMonths[calendar]),
- months_data.append(l.standaloneNarrowMonths[calendar]),
- months_data.append(l.shortMonths[calendar]),
- months_data.append(l.longMonths[calendar]),
- months_data.append(l.narrowMonths[calendar]))
+ % ((key[0], key[1], key[2]) +
+ tuple(r.index for r in ranges) +
+ tuple(r.length for r in ranges))
+ "// %s/%s/%s\n" % (l.language, l.script, l.country))
- calendar_temp_file.write(calendar_format % ( (0,) * 3 + ('0,0',) * 6 )
- + '// trailing zeros\n')
+ calendar_temp_file.write(calendar_format % ( (0,) * (3 + 6 * 2) )
+ + '// trailing zeros\n')
calendar_temp_file.write("};\n")
months_data.write(calendar_temp_file)
s = calendar_template_file.readline()