diff options
Diffstat (limited to 'util/local_database')
-rwxr-xr-x | util/local_database/cldr2qlocalexml.py | 69 | ||||
-rw-r--r-- | util/local_database/localexml.py | 10 | ||||
-rwxr-xr-x | util/local_database/qlocalexml2cpp.py | 14 |
3 files changed, 89 insertions, 4 deletions
diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py index fbc28ca712..58ea21edab 100755 --- a/util/local_database/cldr2qlocalexml.py +++ b/util/local_database/cldr2qlocalexml.py @@ -86,6 +86,47 @@ def parse_list_pattern_part_format(pattern): # This is a very limited parsing of the format for list pattern part only. return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3") +def unit_quantifiers(find, path, stem, suffix, known, + # Stop at exa/exbi: 16 exbi = 2^{64} < zetta = + # 1000^7 < zebi = 2^{70}, the next quantifiers up: + si_quantifiers = ('kilo', 'mega', 'giga', 'tera', 'peta', 'exa')): + """Work out the unit quantifiers. + + Unfortunately, the CLDR data only go up to terabytes and we want + all the way to exabytes; but we can recognize the SI quantifiers + as prefixes, strip and identify the tail as the localized + translation for 'B' (e.g. French has 'octet' for 'byte' and uses + ko, Mo, Go, To from which we can extrapolate Po, Eo). + + Should be called first for the SI quantifiers, with suffix = 'B', + then for the IEC ones, with suffix = 'iB'; the list known + (initially empty before first call) is used to let the second call + know what the first learned about the localized unit. + """ + if suffix == 'B': # first call, known = [] + tail = suffix + for q in si_quantifiers: + it = find(path, stem % q) + # kB for kilobyte, in contrast with KiB for IEC: + q = q[0] if q == 'kilo' else q[0].upper() + if not it: + it = q + tail + elif it.startswith(q): + rest = it[1:] + tail = rest if all(rest == k for k in known) else suffix + known.append(rest) + yield it + else: # second call, re-using first's known + assert suffix == 'iB' + if known: + byte = known.pop() + if all(byte == k for k in known): + suffix = 'i' + byte + for q in si_quantifiers: + yield find(path, stem % q[:2], + # Those don't (yet, v31) exist in CLDR, so we always fall back to: + q[0].upper() + suffix) + def generateLocaleInfo(path): if not path.endswith(".xml"): return {} @@ -261,6 +302,34 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ '[count=%s]' % x for x in ('zero', 'one', 'two', 'few', 'many', 'other') ]) + ';' + def findUnitDef(path, stem, fallback=''): + # The displayName for a quantified unit in en.xml is kByte + # instead of kB (etc.), so prefer any unitPattern provided: + for count in ('many', 'few', 'two', 'other', 'zero', 'one'): + try: + ans = findEntry(path, stem + 'unitPattern[count=%s]' % count) + except xpathlite.Error: + continue + + # TODO: epxloit count-handling, instead of discarding placeholders + if ans.startswith('{0}'): + ans = ans[3:].lstrip() + if ans: + return ans + + return findEntryDef(path, stem + 'displayName', fallback) + + # First without quantifier, then quantified each way: + result['byte_unit'] = findEntryDef( + path, 'units/unitLength[type=long]/unit[type=digital-byte]/displayName', + 'bytes') + stem = 'units/unitLength[type=short]/unit[type=digital-%sbyte]/' + known = [] # cases where we *do* have a given version: + result['byte_si_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem, 'B', known)) + # IEC 60027-2 + # http://physics.nist.gov/cuu/Units/binary.html + result['byte_iec_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem % '%sbi', 'iB', known)) + # Used for month and day data: namings = ( ('standaloneLong', 'stand-alone', 'wide'), diff --git a/util/local_database/localexml.py b/util/local_database/localexml.py index 6db10e2b9a..a47fa6a5ff 100644 --- a/util/local_database/localexml.py +++ b/util/local_database/localexml.py @@ -111,6 +111,7 @@ class Locale: __astxt = ("language", "languageEndonym", "script", "country", "countryEndonym", "listPatternPartStart", "listPatternPartMiddle", "listPatternPartEnd", "listPatternPartTwo", "am", "pm", + 'byte_unit', 'byte_si_quantified', 'byte_iec_quantified', "currencyIsoCode", "currencySymbol", "currencyDisplayName", "currencyFormat", "currencyNegativeFormat" ) + tuple(propsMonthDay()) @@ -169,6 +170,7 @@ class Locale: 'alternateQuotationStart', 'alternateQuotationEnd', 'listPatternPartStart', 'listPatternPartMiddle', 'listPatternPartEnd', 'listPatternPartTwo', + 'byte_unit', 'byte_si_quantified', 'byte_iec_quantified', 'am', 'pm', 'firstDayOfWeek', 'weekendStart', 'weekendEnd', 'longDateFormat', 'shortDateFormat', @@ -180,7 +182,7 @@ class Locale: 'standaloneLongDays', 'standaloneShortDays', 'standaloneNarrowDays', 'currencyIsoCode', 'currencySymbol', 'currencyDisplayName', 'currencyFormat', 'currencyNegativeFormat'): - ent = camelCase(key.split('_')) if '_' in key else key + ent = camelCase(key.split('_')) if key.endswith('_endonym') else key print inner + "<%s>%s</%s>" % (ent, escape(get(key)).encode('utf-8'), ent) for key in ('currencyDigits', 'currencyRounding'): @@ -198,7 +200,8 @@ class Locale: months = ('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December', ''), days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday', - 'Thursday', 'Friday', 'Saturday', '')): + 'Thursday', 'Friday', 'Saturday', ''), + quantifiers=('k', 'M', 'G', 'T', 'P', 'E')): """Returns an object representing the C locale.""" return cls(language='C', language_code='0', language_endonym='', script='AnyScript', script_code='0', @@ -211,6 +214,9 @@ class Locale: listPatternPartMiddle='%1, %2', listPatternPartEnd='%1, %2', listPatternPartTwo='%1, %2', + byte_unit='bytes', + byte_si_quantified=';'.join(q + 'B' for q in quantifiers), + byte_iec_quantified=';'.join(q.upper() + 'iB' for q in quantifiers), am='AM', pm='PM', firstDayOfWeek='mon', weekendStart='sat', weekendEnd='sun', longDateFormat='EEEE, d MMMM yyyy', shortDateFormat='d MMM yyyy', diff --git a/util/local_database/qlocalexml2cpp.py b/util/local_database/qlocalexml2cpp.py index baa5a60263..0f10f8ce2d 100755 --- a/util/local_database/qlocalexml2cpp.py +++ b/util/local_database/qlocalexml2cpp.py @@ -445,6 +445,7 @@ def main(): days_data = StringData('days_data') am_data = StringData('am_data') pm_data = StringData('pm_data') + byte_unit_data = StringData('byte_unit_data') currency_symbol_data = StringData('currency_symbol_data') currency_display_name_data = StringData('currency_display_name_data') currency_format_data = StringData('currency_format_data') @@ -494,6 +495,10 @@ def main(): + ' nDays ' + ' am ' # am/pm indicators + ' pm ' + # Width 8 + comma + + ' byte ' + + ' siQuant ' + + 'iecQuant ' # Width 8+4 + comma + ' currISO ' # Width 11 + comma: @@ -527,6 +532,8 @@ def main(): + '%8d,' * 4 # List patterns, date/time formats, month/day names, am/pm: + '%11s,' * 22 + # SI/IEC byte-unit abbreviations: + + '%8s,' * 3 # Currency ISO code: + ' %10s, ' # Currency and endonyms @@ -574,6 +581,9 @@ def main(): days_data.append(l.narrowDays), am_data.append(l.am), pm_data.append(l.pm), + byte_unit_data.append(l.byte_unit), + byte_unit_data.append(l.byte_si_quantified), + byte_unit_data.append(l.byte_iec_quantified), currencyIsoCodeData(l.currencyIsoCode), currency_symbol_data.append(l.currencySymbol), currency_display_name_data.append(l.currencyDisplayName), @@ -588,7 +598,7 @@ def main(): l.weekendEnd) + ", // %s/%s/%s\n" % (l.language, l.script, l.country)) data_temp_file.write(line_format # All zeros, matching the format: - % ( (0,) * (3 + 8 + 4) + ("0,0",) * 22 + % ( (0,) * (3 + 8 + 4) + ("0,0",) * (22 + 3) + (currencyIsoCodeData(0),) + ("0,0",) * 6 + (0,) * (2 + 3)) + " // trailing 0s\n") @@ -597,7 +607,7 @@ def main(): # StringData tables: for data in (list_pattern_part_data, date_format_data, time_format_data, months_data, days_data, - am_data, pm_data, currency_symbol_data, + byte_unit_data, am_data, pm_data, currency_symbol_data, currency_display_name_data, currency_format_data, endonyms_data): data_temp_file.write("\nstatic const ushort %s[] = {\n" % data.name) |