summaryrefslogtreecommitdiffstats
path: root/util/local_database
diff options
context:
space:
mode:
Diffstat (limited to 'util/local_database')
-rwxr-xr-xutil/local_database/cldr2qlocalexml.py69
-rw-r--r--util/local_database/localexml.py10
-rwxr-xr-xutil/local_database/qlocalexml2cpp.py14
3 files changed, 89 insertions, 4 deletions
diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py
index fbc28ca712..58ea21edab 100755
--- a/util/local_database/cldr2qlocalexml.py
+++ b/util/local_database/cldr2qlocalexml.py
@@ -86,6 +86,47 @@ def parse_list_pattern_part_format(pattern):
# This is a very limited parsing of the format for list pattern part only.
return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3")
+def unit_quantifiers(find, path, stem, suffix, known,
+ # Stop at exa/exbi: 16 exbi = 2^{64} < zetta =
+ # 1000^7 < zebi = 2^{70}, the next quantifiers up:
+ si_quantifiers = ('kilo', 'mega', 'giga', 'tera', 'peta', 'exa')):
+ """Work out the unit quantifiers.
+
+ Unfortunately, the CLDR data only go up to terabytes and we want
+ all the way to exabytes; but we can recognize the SI quantifiers
+ as prefixes, strip and identify the tail as the localized
+ translation for 'B' (e.g. French has 'octet' for 'byte' and uses
+ ko, Mo, Go, To from which we can extrapolate Po, Eo).
+
+ Should be called first for the SI quantifiers, with suffix = 'B',
+ then for the IEC ones, with suffix = 'iB'; the list known
+ (initially empty before first call) is used to let the second call
+ know what the first learned about the localized unit.
+ """
+ if suffix == 'B': # first call, known = []
+ tail = suffix
+ for q in si_quantifiers:
+ it = find(path, stem % q)
+ # kB for kilobyte, in contrast with KiB for IEC:
+ q = q[0] if q == 'kilo' else q[0].upper()
+ if not it:
+ it = q + tail
+ elif it.startswith(q):
+ rest = it[1:]
+ tail = rest if all(rest == k for k in known) else suffix
+ known.append(rest)
+ yield it
+ else: # second call, re-using first's known
+ assert suffix == 'iB'
+ if known:
+ byte = known.pop()
+ if all(byte == k for k in known):
+ suffix = 'i' + byte
+ for q in si_quantifiers:
+ yield find(path, stem % q[:2],
+ # Those don't (yet, v31) exist in CLDR, so we always fall back to:
+ q[0].upper() + suffix)
+
def generateLocaleInfo(path):
if not path.endswith(".xml"):
return {}
@@ -261,6 +302,34 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
'[count=%s]' % x for x in ('zero', 'one', 'two', 'few', 'many', 'other')
]) + ';'
+ def findUnitDef(path, stem, fallback=''):
+ # The displayName for a quantified unit in en.xml is kByte
+ # instead of kB (etc.), so prefer any unitPattern provided:
+ for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
+ try:
+ ans = findEntry(path, stem + 'unitPattern[count=%s]' % count)
+ except xpathlite.Error:
+ continue
+
+ # TODO: epxloit count-handling, instead of discarding placeholders
+ if ans.startswith('{0}'):
+ ans = ans[3:].lstrip()
+ if ans:
+ return ans
+
+ return findEntryDef(path, stem + 'displayName', fallback)
+
+ # First without quantifier, then quantified each way:
+ result['byte_unit'] = findEntryDef(
+ path, 'units/unitLength[type=long]/unit[type=digital-byte]/displayName',
+ 'bytes')
+ stem = 'units/unitLength[type=short]/unit[type=digital-%sbyte]/'
+ known = [] # cases where we *do* have a given version:
+ result['byte_si_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem, 'B', known))
+ # IEC 60027-2
+ # http://physics.nist.gov/cuu/Units/binary.html
+ result['byte_iec_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem % '%sbi', 'iB', known))
+
# Used for month and day data:
namings = (
('standaloneLong', 'stand-alone', 'wide'),
diff --git a/util/local_database/localexml.py b/util/local_database/localexml.py
index 6db10e2b9a..a47fa6a5ff 100644
--- a/util/local_database/localexml.py
+++ b/util/local_database/localexml.py
@@ -111,6 +111,7 @@ class Locale:
__astxt = ("language", "languageEndonym", "script", "country", "countryEndonym",
"listPatternPartStart", "listPatternPartMiddle",
"listPatternPartEnd", "listPatternPartTwo", "am", "pm",
+ 'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
"currencyIsoCode", "currencySymbol", "currencyDisplayName",
"currencyFormat", "currencyNegativeFormat"
) + tuple(propsMonthDay())
@@ -169,6 +170,7 @@ class Locale:
'alternateQuotationStart', 'alternateQuotationEnd',
'listPatternPartStart', 'listPatternPartMiddle',
'listPatternPartEnd', 'listPatternPartTwo',
+ 'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
'am', 'pm', 'firstDayOfWeek',
'weekendStart', 'weekendEnd',
'longDateFormat', 'shortDateFormat',
@@ -180,7 +182,7 @@ class Locale:
'standaloneLongDays', 'standaloneShortDays', 'standaloneNarrowDays',
'currencyIsoCode', 'currencySymbol', 'currencyDisplayName',
'currencyFormat', 'currencyNegativeFormat'):
- ent = camelCase(key.split('_')) if '_' in key else key
+ ent = camelCase(key.split('_')) if key.endswith('_endonym') else key
print inner + "<%s>%s</%s>" % (ent, escape(get(key)).encode('utf-8'), ent)
for key in ('currencyDigits', 'currencyRounding'):
@@ -198,7 +200,8 @@ class Locale:
months = ('January', 'February', 'March', 'April', 'May', 'June', 'July',
'August', 'September', 'October', 'November', 'December', ''),
days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday',
- 'Thursday', 'Friday', 'Saturday', '')):
+ 'Thursday', 'Friday', 'Saturday', ''),
+ quantifiers=('k', 'M', 'G', 'T', 'P', 'E')):
"""Returns an object representing the C locale."""
return cls(language='C', language_code='0', language_endonym='',
script='AnyScript', script_code='0',
@@ -211,6 +214,9 @@ class Locale:
listPatternPartMiddle='%1, %2',
listPatternPartEnd='%1, %2',
listPatternPartTwo='%1, %2',
+ byte_unit='bytes',
+ byte_si_quantified=';'.join(q + 'B' for q in quantifiers),
+ byte_iec_quantified=';'.join(q.upper() + 'iB' for q in quantifiers),
am='AM', pm='PM', firstDayOfWeek='mon',
weekendStart='sat', weekendEnd='sun',
longDateFormat='EEEE, d MMMM yyyy', shortDateFormat='d MMM yyyy',
diff --git a/util/local_database/qlocalexml2cpp.py b/util/local_database/qlocalexml2cpp.py
index baa5a60263..0f10f8ce2d 100755
--- a/util/local_database/qlocalexml2cpp.py
+++ b/util/local_database/qlocalexml2cpp.py
@@ -445,6 +445,7 @@ def main():
days_data = StringData('days_data')
am_data = StringData('am_data')
pm_data = StringData('pm_data')
+ byte_unit_data = StringData('byte_unit_data')
currency_symbol_data = StringData('currency_symbol_data')
currency_display_name_data = StringData('currency_display_name_data')
currency_format_data = StringData('currency_format_data')
@@ -494,6 +495,10 @@ def main():
+ ' nDays '
+ ' am ' # am/pm indicators
+ ' pm '
+ # Width 8 + comma
+ + ' byte '
+ + ' siQuant '
+ + 'iecQuant '
# Width 8+4 + comma
+ ' currISO '
# Width 11 + comma:
@@ -527,6 +532,8 @@ def main():
+ '%8d,' * 4
# List patterns, date/time formats, month/day names, am/pm:
+ '%11s,' * 22
+ # SI/IEC byte-unit abbreviations:
+ + '%8s,' * 3
# Currency ISO code:
+ ' %10s, '
# Currency and endonyms
@@ -574,6 +581,9 @@ def main():
days_data.append(l.narrowDays),
am_data.append(l.am),
pm_data.append(l.pm),
+ byte_unit_data.append(l.byte_unit),
+ byte_unit_data.append(l.byte_si_quantified),
+ byte_unit_data.append(l.byte_iec_quantified),
currencyIsoCodeData(l.currencyIsoCode),
currency_symbol_data.append(l.currencySymbol),
currency_display_name_data.append(l.currencyDisplayName),
@@ -588,7 +598,7 @@ def main():
l.weekendEnd)
+ ", // %s/%s/%s\n" % (l.language, l.script, l.country))
data_temp_file.write(line_format # All zeros, matching the format:
- % ( (0,) * (3 + 8 + 4) + ("0,0",) * 22
+ % ( (0,) * (3 + 8 + 4) + ("0,0",) * (22 + 3)
+ (currencyIsoCodeData(0),)
+ ("0,0",) * 6 + (0,) * (2 + 3))
+ " // trailing 0s\n")
@@ -597,7 +607,7 @@ def main():
# StringData tables:
for data in (list_pattern_part_data, date_format_data,
time_format_data, months_data, days_data,
- am_data, pm_data, currency_symbol_data,
+ byte_unit_data, am_data, pm_data, currency_symbol_data,
currency_display_name_data, currency_format_data,
endonyms_data):
data_temp_file.write("\nstatic const ushort %s[] = {\n" % data.name)