diff options
author | Edward Welbourne <edward.welbourne@qt.io> | 2020-01-17 11:00:24 +0100 |
---|---|---|
committer | Edward Welbourne <edward.welbourne@qt.io> | 2020-07-14 14:52:08 +0200 |
commit | bb6a73260ec8272647265f42180963604ad0f755 (patch) | |
tree | a85d2f1f0a2e11eaa9ed8dcb2e7d3032faa4acf2 /util/locale_database | |
parent | 19ed60d703f9071d183760a8ce1c50009c780bf8 (diff) |
Support digit-grouping correctly
Read three more values from CLDR and add a byte to the bit-fields at
the end of QLocaleData, indicating the three group sizes. This adds
three new parameters to various low-level formatting functions. At the
same time, rename ThousandsGroup to GroupDigits, more faithfully
expressing what this (internal) option means.
This replaces commit 27d139128013c969a939779536485c1a80be977e with a
fuller implementation that handles digit-grouping in any of the ways
that CLDR supports. The formerly "Indian" formatting now also applies
to at least some locales for Bangladesh, Bhutan and Sri Lanka.
Fixed Costa Rica currency formatting test that wrongly put a separator
after the first digit; the locale (in common with several Spanish
locales) requires at least two digits before the first separator.
[ChangeLog][QtCore][Important Behavior Changes] Some locales require
more than one digit before the first grouping separator; others use
group sizes other than three. The latter was partially supported (only
for India) at 5.15 but is now systematically supported; the former is
now also supported.
Task-number: QTBUG-24301
Fixes: QTBUG-81050
Change-Id: I4ea4e331f3254d1f34801cddf51f3c65d3815573
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'util/locale_database')
-rw-r--r-- | util/locale_database/ldml.py | 31 | ||||
-rw-r--r-- | util/locale_database/qlocalexml.py | 7 | ||||
-rwxr-xr-x | util/locale_database/qlocalexml2cpp.py | 15 |
3 files changed, 47 insertions, 6 deletions
diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py index e3e3a2e4ba..0f1cefc30c 100644 --- a/util/locale_database/ldml.py +++ b/util/locale_database/ldml.py @@ -294,6 +294,7 @@ class LocaleScanner (object): yield 'percent', self.find(stem + 'percentSign') yield 'list', self.find(stem + 'list') yield 'exp', self.find(stem + 'exponential') + yield 'groupSizes', self.__numberGrouping(system) digits = lookup(system)['digits'] assert len(digits) == 10 @@ -527,6 +528,36 @@ class LocaleScanner (object): cache.append(rest) yield it + def __numberGrouping(self, system): + """Sizes of groups of digits within a number. + + Returns a triple (least, higher, top) for which: + * least is the number of digits after the last grouping + separator; + * higher is the number of digits between grouping + separators; + * top is the fewest digits that can appear before the first + grouping separator. + + Thus (4, 3, 2) would want 1e7 as 1000,0000 but 1e8 as 10,000,0000. + + Note: CLDR does countenance the possibility of grouping also + in the fractional part. This is not presently attempted. Nor + is placement of the sign character anywhere but at the start + of the number (some formats may place it at the end, possibly + elsewhere).""" + top = int(self.find('numbers/minimumGroupingDigits')) + assert top < 4, top # We store it in a 2-bit field + grouping = self.find('numbers/decimalFormats[numberSystem=' + + system + ']/decimalFormatLength/decimalFormat/pattern') + groups = grouping.split('.')[0].split(',')[-3:] + assert all(len(x) < 8 for x in groups[-2:]), grouping # we store them in 3-bit fields + if len(groups) > 2: + return len(groups[-1]), len(groups[-2]), top + + size = len(groups[-1]) if len(groups) == 2 else 3 + return size, size, top + @staticmethod def __currencyFormats(patterns, plus, minus): for p in patterns.split(';'): diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py index 550021ba01..d8a99f3f3d 100644 --- a/util/locale_database/qlocalexml.py +++ b/util/locale_database/qlocalexml.py @@ -476,6 +476,11 @@ class Locale (object): for k in cls.propsMonthDay('months'): data[k] = dict((cal, lookup('_'.join((k, cal)))) for cal in calendars) + grouping = lookup('groupSizes').split(';') + data.update(groupLeast = int(grouping[0]), + groupHigher = int(grouping[1]), + groupTop = int(grouping[2])) + return cls(data) def toXml(self, write, calendars=('gregorian',)): @@ -515,6 +520,7 @@ class Locale (object): for cal in calendars): write(key, escape(get(key)).encode('utf-8')) + write('groupSizes', ';'.join(str(x) for x in get('groupSizes'))) for key in ('currencyDigits', 'currencyRounding'): write(key, get(key)) @@ -586,6 +592,7 @@ class Locale (object): language='C', language_code='0', languageEndonym='', script='AnyScript', script_code='0', country='AnyCountry', country_code='0', countryEndonym='', + groupSizes=(3, 3, 1), decimal='.', group=',', list=';', percent='%', zero='0', minus='-', plus='+', exp='e', quotationStart='"', quotationEnd='"', diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py index c670672fc0..4235f1feb3 100755 --- a/util/locale_database/qlocalexml2cpp.py +++ b/util/locale_database/qlocalexml2cpp.py @@ -253,7 +253,10 @@ class LocaleDataWriter (LocaleSourceEditor): 'curRnd ' # Currencty rounding (unused: QTBUG-81343) 'dow1st ' # First day of week ' wknd+ ' # Week-end start/end days - ' wknd-' + ' wknd- ' + 'grpTop ' + 'grpMid ' + 'grpEnd' # No trailing space on last entry (be sure to # pad before adding anything after it). '\n') @@ -276,6 +279,8 @@ class LocaleDataWriter (LocaleSourceEditor): '{:6d},{:6d}', # Day of week and week-end ',{:6d}' * 3, + # Number group sizes + ',{:6d}' * 3, ' }}')).format for key in names: locale = locales[key] @@ -318,16 +323,14 @@ class LocaleDataWriter (LocaleSourceEditor): (currencyIsoCodeData(locale.currencyIsoCode), locale.currencyDigits, locale.currencyRounding, # unused (QTBUG-81343) - locale.firstDayOfWeek, - locale.weekendStart, - locale.weekendEnd) )) + locale.firstDayOfWeek, locale.weekendStart, locale.weekendEnd, + locale.groupTop, locale.groupHigher, locale.groupLeast) )) + ', // {}/{}/{}\n'.format( locale.language, locale.script, locale.country)) self.writer.write(formatLine(*( # All zeros, matching the format: (0,) * 3 + (0,) * 37 * 2 + (currencyIsoCodeData(0),) - + (0,) * 2 - + (0,) * 3 )) + + (0,) * 8 )) + ' // trailing zeros\n') self.writer.write('};\n') |