diff options
Diffstat (limited to 'util/local_database/cldr2qlocalexml.py')
-rwxr-xr-x | util/local_database/cldr2qlocalexml.py | 638 |
1 files changed, 201 insertions, 437 deletions
diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py index 41cfafab0d..58ea21edab 100755 --- a/util/local_database/cldr2qlocalexml.py +++ b/util/local_database/cldr2qlocalexml.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 ############################################################################# ## -## Copyright (C) 2016 The Qt Company Ltd. +## Copyright (C) 2017 The Qt Company Ltd. ## Contact: https://www.qt.io/licensing/ ## ## This file is part of the test suite of the Qt Toolkit. @@ -26,20 +26,31 @@ ## $QT_END_LICENSE$ ## ############################################################################# +"""Convert CLDR data to qLocaleXML + +The CLDR data can be downloaded from CLDR_, which has a sub-directory +for each version; you need the ``core.zip`` file for your version of +choice (typically the latest). This script has had updates to cope up +to v29; for later versions, we may need adaptations. Unpack the +downloaded ``core.zip`` and check it has a common/main/ sub-directory: +pass the path of that sub-directory to this script as its single +command-line argument. Save its standard output (but not error) to a +file for later processing by ``./qlocalexml2cpp.py`` + +.. _CLDR: ftp://unicode.org/Public/cldr/ +""" import os import sys +import re + import enumdata import xpathlite -from xpathlite import DraftResolution +from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile from dateconverter import convert_date -from xml.sax.saxutils import escape, unescape -import re +from localexml import Locale -findAlias = xpathlite.findAlias -findEntry = xpathlite.findEntry findEntryInFile = xpathlite._findEntryInFile -findTagsInFile = xpathlite.findTagsInFile def parse_number_format(patterns, data): # this is a very limited parsing of the number format for currency only. @@ -72,42 +83,49 @@ def parse_number_format(patterns, data): return result def parse_list_pattern_part_format(pattern): - # this is a very limited parsing of the format for list pattern part only. - result = "" - result = pattern.replace("{0}", "%1") - result = result.replace("{1}", "%2") - result = result.replace("{2}", "%3") - return result - -def ordStr(c): - if len(c) == 1: - return str(ord(c)) - raise xpathlite.Error("Unable to handle value \"%s\"" % addEscapes(c)) - return "##########" - -# the following functions are supposed to fix the problem with QLocale -# returning a character instead of strings for QLocale::exponential() -# and others. So we fallback to default values in these cases. -def fixOrdStrMinus(c): - if len(c) == 1: - return str(ord(c)) - return str(ord('-')) -def fixOrdStrPlus(c): - if len(c) == 1: - return str(ord(c)) - return str(ord('+')) -def fixOrdStrExp(c): - if len(c) == 1: - return str(ord(c)) - return str(ord('e')) -def fixOrdStrPercent(c): - if len(c) == 1: - return str(ord(c)) - return str(ord('%')) -def fixOrdStrList(c): - if len(c) == 1: - return str(ord(c)) - return str(ord(';')) + # This is a very limited parsing of the format for list pattern part only. + return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3") + +def unit_quantifiers(find, path, stem, suffix, known, + # Stop at exa/exbi: 16 exbi = 2^{64} < zetta = + # 1000^7 < zebi = 2^{70}, the next quantifiers up: + si_quantifiers = ('kilo', 'mega', 'giga', 'tera', 'peta', 'exa')): + """Work out the unit quantifiers. + + Unfortunately, the CLDR data only go up to terabytes and we want + all the way to exabytes; but we can recognize the SI quantifiers + as prefixes, strip and identify the tail as the localized + translation for 'B' (e.g. French has 'octet' for 'byte' and uses + ko, Mo, Go, To from which we can extrapolate Po, Eo). + + Should be called first for the SI quantifiers, with suffix = 'B', + then for the IEC ones, with suffix = 'iB'; the list known + (initially empty before first call) is used to let the second call + know what the first learned about the localized unit. + """ + if suffix == 'B': # first call, known = [] + tail = suffix + for q in si_quantifiers: + it = find(path, stem % q) + # kB for kilobyte, in contrast with KiB for IEC: + q = q[0] if q == 'kilo' else q[0].upper() + if not it: + it = q + tail + elif it.startswith(q): + rest = it[1:] + tail = rest if all(rest == k for k in known) else suffix + known.append(rest) + yield it + else: # second call, re-using first's known + assert suffix == 'iB' + if known: + byte = known.pop() + if all(byte == k for k in known): + suffix = 'i' + byte + for q in si_quantifiers: + yield find(path, stem % q[:2], + # Those don't (yet, v31) exist in CLDR, so we always fall back to: + q[0].upper() + suffix) def generateLocaleInfo(path): if not path.endswith(".xml"): @@ -116,14 +134,13 @@ def generateLocaleInfo(path): # skip legacy/compatibility ones alias = findAlias(path) if alias: - raise xpathlite.Error("alias to \"%s\"" % alias) + raise xpathlite.Error('alias to "%s"' % alias) - language_code = findEntryInFile(path, "identity/language", attribute="type")[0] - country_code = findEntryInFile(path, "identity/territory", attribute="type")[0] - script_code = findEntryInFile(path, "identity/script", attribute="type")[0] - variant_code = findEntryInFile(path, "identity/variant", attribute="type")[0] + def code(tag): + return findEntryInFile(path, 'identity/' + tag, attribute="type")[0] - return _generateLocaleInfo(path, language_code, script_code, country_code, variant_code) + return _generateLocaleInfo(path, code('language'), code('script'), + code('territory'), code('variant')) def _generateLocaleInfo(path, language_code, script_code, country_code, variant_code=""): if not path.endswith(".xml"): @@ -137,25 +154,22 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ # ### actually there is only one locale with variant: en_US_POSIX # does anybody care about it at all? if variant_code: - raise xpathlite.Error("we do not support variants (\"%s\")" % variant_code) + raise xpathlite.Error('we do not support variants ("%s")' % variant_code) language_id = enumdata.languageCodeToId(language_code) if language_id <= 0: - raise xpathlite.Error("unknown language code \"%s\"" % language_code) - language = enumdata.language_list[language_id][0] + raise xpathlite.Error('unknown language code "%s"' % language_code) script_id = enumdata.scriptCodeToId(script_code) if script_id == -1: - raise xpathlite.Error("unknown script code \"%s\"" % script_code) - script = enumdata.script_list[script_id][0] + raise xpathlite.Error('unknown script code "%s"' % script_code) # we should handle fully qualified names with the territory if not country_code: return {} country_id = enumdata.countryCodeToId(country_code) if country_id <= 0: - raise xpathlite.Error("unknown country code \"%s\"" % country_code) - country = enumdata.country_list[country_id][0] + raise xpathlite.Error('unknown country code "%s"' % country_code) # So we say we accept only those values that have "contributed" or # "approved" resolution. see http://www.unicode.org/cldr/process.html @@ -163,39 +177,39 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ # compatibility. draft = DraftResolution.contributed - result = {} - result['language'] = language - result['script'] = script - result['country'] = country - result['language_code'] = language_code - result['country_code'] = country_code - result['script_code'] = script_code - result['variant_code'] = variant_code - result['language_id'] = language_id - result['script_id'] = script_id - result['country_id'] = country_id + result = dict( + language=enumdata.language_list[language_id][0], + language_code=language_code, language_id=language_id, + script=enumdata.script_list[script_id][0], + script_code=script_code, script_id=script_id, + country=enumdata.country_list[country_id][0], + country_code=country_code, country_id=country_id, + variant_code=variant_code) (dir_name, file_name) = os.path.split(path) - supplementalPath = dir_name + "/../supplemental/supplementalData.xml" - currencies = findTagsInFile(supplementalPath, "currencyData/region[iso3166=%s]"%country_code); + def from_supplement(tag, + path=os.path.join(dir_name, '..', 'supplemental', + 'supplementalData.xml')): + return findTagsInFile(path, tag) + currencies = from_supplement('currencyData/region[iso3166=%s]' % country_code) result['currencyIsoCode'] = '' result['currencyDigits'] = 2 result['currencyRounding'] = 1 if currencies: for e in currencies: if e[0] == 'currency': - tender = True - t = filter(lambda x: x[0] == 'tender', e[1]) - if t and t[0][1] == 'false': - tender = False; - if tender and not filter(lambda x: x[0] == 'to', e[1]): - result['currencyIsoCode'] = filter(lambda x: x[0] == 'iso4217', e[1])[0][1] + t = [x[1] == 'false' for x in e[1] if x[0] == 'tender'] + if t and t[0]: + pass + elif not any(x[0] == 'to' for x in e[1]): + result['currencyIsoCode'] = (x[1] for x in e[1] if x[0] == 'iso4217').next() break if result['currencyIsoCode']: - t = findTagsInFile(supplementalPath, "currencyData/fractions/info[iso4217=%s]"%result['currencyIsoCode']); + t = from_supplement("currencyData/fractions/info[iso4217=%s]" + % result['currencyIsoCode']) if t and t[0][0] == 'info': - result['currencyDigits'] = int(filter(lambda x: x[0] == 'digits', t[0][1])[0][1]) - result['currencyRounding'] = int(filter(lambda x: x[0] == 'rounding', t[0][1])[0][1]) + result['currencyDigits'] = (int(x[1]) for x in t[0][1] if x[0] == 'digits').next() + result['currencyRounding'] = (int(x[1]) for x in t[0][1] if x[0] == 'rounding').next() numbering_system = None try: numbering_system = findEntry(path, "numbers/defaultNumberingSystem") @@ -226,7 +240,9 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ result['percent'] = get_number_in_system(path, "numbers/symbols/percentSign", numbering_system) try: numbering_systems = {} - for ns in findTagsInFile(cldr_dir + "/../supplemental/numberingSystems.xml", "numberingSystems"): + for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental', + 'numberingSystems.xml'), + 'numberingSystems'): tmp = {} id = "" for data in ns[1:][0]: # ns looks like this: [u'numberingSystem', [(u'digits', u'0123456789'), (u'type', u'numeric'), (u'id', u'latn')]] @@ -279,167 +295,70 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ result['currencyDisplayName'] = '' if result['currencyIsoCode']: result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode']) - display_name_path = "numbers/currencies/currency[%s]/displayName" % result['currencyIsoCode'] - result['currencyDisplayName'] \ - = findEntryDef(path, display_name_path) + ";" \ - + findEntryDef(path, display_name_path + "[count=zero]") + ";" \ - + findEntryDef(path, display_name_path + "[count=one]") + ";" \ - + findEntryDef(path, display_name_path + "[count=two]") + ";" \ - + findEntryDef(path, display_name_path + "[count=few]") + ";" \ - + findEntryDef(path, display_name_path + "[count=many]") + ";" \ - + findEntryDef(path, display_name_path + "[count=other]") + ";" - - standalone_long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[wide]/month" - result['standaloneLongMonths'] \ - = findEntry(path, standalone_long_month_path + "[1]") + ";" \ - + findEntry(path, standalone_long_month_path + "[2]") + ";" \ - + findEntry(path, standalone_long_month_path + "[3]") + ";" \ - + findEntry(path, standalone_long_month_path + "[4]") + ";" \ - + findEntry(path, standalone_long_month_path + "[5]") + ";" \ - + findEntry(path, standalone_long_month_path + "[6]") + ";" \ - + findEntry(path, standalone_long_month_path + "[7]") + ";" \ - + findEntry(path, standalone_long_month_path + "[8]") + ";" \ - + findEntry(path, standalone_long_month_path + "[9]") + ";" \ - + findEntry(path, standalone_long_month_path + "[10]") + ";" \ - + findEntry(path, standalone_long_month_path + "[11]") + ";" \ - + findEntry(path, standalone_long_month_path + "[12]") + ";" - - standalone_short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[abbreviated]/month" - result['standaloneShortMonths'] \ - = findEntry(path, standalone_short_month_path + "[1]") + ";" \ - + findEntry(path, standalone_short_month_path + "[2]") + ";" \ - + findEntry(path, standalone_short_month_path + "[3]") + ";" \ - + findEntry(path, standalone_short_month_path + "[4]") + ";" \ - + findEntry(path, standalone_short_month_path + "[5]") + ";" \ - + findEntry(path, standalone_short_month_path + "[6]") + ";" \ - + findEntry(path, standalone_short_month_path + "[7]") + ";" \ - + findEntry(path, standalone_short_month_path + "[8]") + ";" \ - + findEntry(path, standalone_short_month_path + "[9]") + ";" \ - + findEntry(path, standalone_short_month_path + "[10]") + ";" \ - + findEntry(path, standalone_short_month_path + "[11]") + ";" \ - + findEntry(path, standalone_short_month_path + "[12]") + ";" - - standalone_narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[narrow]/month" - result['standaloneNarrowMonths'] \ - = findEntry(path, standalone_narrow_month_path + "[1]") + ";" \ - + findEntry(path, standalone_narrow_month_path + "[2]") + ";" \ - + findEntry(path, standalone_narrow_month_path + "[3]") + ";" \ - + findEntry(path, standalone_narrow_month_path + "[4]") + ";" \ - + findEntry(path, standalone_narrow_month_path + "[5]") + ";" \ - + findEntry(path, standalone_narrow_month_path + "[6]") + ";" \ - + findEntry(path, standalone_narrow_month_path + "[7]") + ";" \ - + findEntry(path, standalone_narrow_month_path + "[8]") + ";" \ - + findEntry(path, standalone_narrow_month_path + "[9]") + ";" \ - + findEntry(path, standalone_narrow_month_path + "[10]") + ";" \ - + findEntry(path, standalone_narrow_month_path + "[11]") + ";" \ - + findEntry(path, standalone_narrow_month_path + "[12]") + ";" - - long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[wide]/month" - result['longMonths'] \ - = findEntry(path, long_month_path + "[1]") + ";" \ - + findEntry(path, long_month_path + "[2]") + ";" \ - + findEntry(path, long_month_path + "[3]") + ";" \ - + findEntry(path, long_month_path + "[4]") + ";" \ - + findEntry(path, long_month_path + "[5]") + ";" \ - + findEntry(path, long_month_path + "[6]") + ";" \ - + findEntry(path, long_month_path + "[7]") + ";" \ - + findEntry(path, long_month_path + "[8]") + ";" \ - + findEntry(path, long_month_path + "[9]") + ";" \ - + findEntry(path, long_month_path + "[10]") + ";" \ - + findEntry(path, long_month_path + "[11]") + ";" \ - + findEntry(path, long_month_path + "[12]") + ";" - - short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[abbreviated]/month" - result['shortMonths'] \ - = findEntry(path, short_month_path + "[1]") + ";" \ - + findEntry(path, short_month_path + "[2]") + ";" \ - + findEntry(path, short_month_path + "[3]") + ";" \ - + findEntry(path, short_month_path + "[4]") + ";" \ - + findEntry(path, short_month_path + "[5]") + ";" \ - + findEntry(path, short_month_path + "[6]") + ";" \ - + findEntry(path, short_month_path + "[7]") + ";" \ - + findEntry(path, short_month_path + "[8]") + ";" \ - + findEntry(path, short_month_path + "[9]") + ";" \ - + findEntry(path, short_month_path + "[10]") + ";" \ - + findEntry(path, short_month_path + "[11]") + ";" \ - + findEntry(path, short_month_path + "[12]") + ";" - - narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[narrow]/month" - result['narrowMonths'] \ - = findEntry(path, narrow_month_path + "[1]") + ";" \ - + findEntry(path, narrow_month_path + "[2]") + ";" \ - + findEntry(path, narrow_month_path + "[3]") + ";" \ - + findEntry(path, narrow_month_path + "[4]") + ";" \ - + findEntry(path, narrow_month_path + "[5]") + ";" \ - + findEntry(path, narrow_month_path + "[6]") + ";" \ - + findEntry(path, narrow_month_path + "[7]") + ";" \ - + findEntry(path, narrow_month_path + "[8]") + ";" \ - + findEntry(path, narrow_month_path + "[9]") + ";" \ - + findEntry(path, narrow_month_path + "[10]") + ";" \ - + findEntry(path, narrow_month_path + "[11]") + ";" \ - + findEntry(path, narrow_month_path + "[12]") + ";" - - long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[wide]/day" - result['longDays'] \ - = findEntry(path, long_day_path + "[sun]") + ";" \ - + findEntry(path, long_day_path + "[mon]") + ";" \ - + findEntry(path, long_day_path + "[tue]") + ";" \ - + findEntry(path, long_day_path + "[wed]") + ";" \ - + findEntry(path, long_day_path + "[thu]") + ";" \ - + findEntry(path, long_day_path + "[fri]") + ";" \ - + findEntry(path, long_day_path + "[sat]") + ";" - - short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[abbreviated]/day" - result['shortDays'] \ - = findEntry(path, short_day_path + "[sun]") + ";" \ - + findEntry(path, short_day_path + "[mon]") + ";" \ - + findEntry(path, short_day_path + "[tue]") + ";" \ - + findEntry(path, short_day_path + "[wed]") + ";" \ - + findEntry(path, short_day_path + "[thu]") + ";" \ - + findEntry(path, short_day_path + "[fri]") + ";" \ - + findEntry(path, short_day_path + "[sat]") + ";" - - narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[narrow]/day" - result['narrowDays'] \ - = findEntry(path, narrow_day_path + "[sun]") + ";" \ - + findEntry(path, narrow_day_path + "[mon]") + ";" \ - + findEntry(path, narrow_day_path + "[tue]") + ";" \ - + findEntry(path, narrow_day_path + "[wed]") + ";" \ - + findEntry(path, narrow_day_path + "[thu]") + ";" \ - + findEntry(path, narrow_day_path + "[fri]") + ";" \ - + findEntry(path, narrow_day_path + "[sat]") + ";" - - standalone_long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[wide]/day" - result['standaloneLongDays'] \ - = findEntry(path, standalone_long_day_path + "[sun]") + ";" \ - + findEntry(path, standalone_long_day_path + "[mon]") + ";" \ - + findEntry(path, standalone_long_day_path + "[tue]") + ";" \ - + findEntry(path, standalone_long_day_path + "[wed]") + ";" \ - + findEntry(path, standalone_long_day_path + "[thu]") + ";" \ - + findEntry(path, standalone_long_day_path + "[fri]") + ";" \ - + findEntry(path, standalone_long_day_path + "[sat]") + ";" - - standalone_short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[abbreviated]/day" - result['standaloneShortDays'] \ - = findEntry(path, standalone_short_day_path + "[sun]") + ";" \ - + findEntry(path, standalone_short_day_path + "[mon]") + ";" \ - + findEntry(path, standalone_short_day_path + "[tue]") + ";" \ - + findEntry(path, standalone_short_day_path + "[wed]") + ";" \ - + findEntry(path, standalone_short_day_path + "[thu]") + ";" \ - + findEntry(path, standalone_short_day_path + "[fri]") + ";" \ - + findEntry(path, standalone_short_day_path + "[sat]") + ";" - - standalone_narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[narrow]/day" - result['standaloneNarrowDays'] \ - = findEntry(path, standalone_narrow_day_path + "[sun]") + ";" \ - + findEntry(path, standalone_narrow_day_path + "[mon]") + ";" \ - + findEntry(path, standalone_narrow_day_path + "[tue]") + ";" \ - + findEntry(path, standalone_narrow_day_path + "[wed]") + ";" \ - + findEntry(path, standalone_narrow_day_path + "[thu]") + ";" \ - + findEntry(path, standalone_narrow_day_path + "[fri]") + ";" \ - + findEntry(path, standalone_narrow_day_path + "[sat]") + ";" - - return result + result['currencyDisplayName'] = ';'.join( + findEntryDef(path, 'numbers/currencies/currency[' + result['currencyIsoCode'] + + ']/displayName' + tail) + for tail in ['',] + [ + '[count=%s]' % x for x in ('zero', 'one', 'two', 'few', 'many', 'other') + ]) + ';' + + def findUnitDef(path, stem, fallback=''): + # The displayName for a quantified unit in en.xml is kByte + # instead of kB (etc.), so prefer any unitPattern provided: + for count in ('many', 'few', 'two', 'other', 'zero', 'one'): + try: + ans = findEntry(path, stem + 'unitPattern[count=%s]' % count) + except xpathlite.Error: + continue + + # TODO: epxloit count-handling, instead of discarding placeholders + if ans.startswith('{0}'): + ans = ans[3:].lstrip() + if ans: + return ans + + return findEntryDef(path, stem + 'displayName', fallback) + + # First without quantifier, then quantified each way: + result['byte_unit'] = findEntryDef( + path, 'units/unitLength[type=long]/unit[type=digital-byte]/displayName', + 'bytes') + stem = 'units/unitLength[type=short]/unit[type=digital-%sbyte]/' + known = [] # cases where we *do* have a given version: + result['byte_si_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem, 'B', known)) + # IEC 60027-2 + # http://physics.nist.gov/cuu/Units/binary.html + result['byte_iec_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem % '%sbi', 'iB', known)) + + # Used for month and day data: + namings = ( + ('standaloneLong', 'stand-alone', 'wide'), + ('standaloneShort', 'stand-alone', 'abbreviated'), + ('standaloneNarrow', 'stand-alone', 'narrow'), + ('long', 'format', 'wide'), + ('short', 'format', 'abbreviated'), + ('narrow', 'format', 'narrow'), + ) + + # Month data: + for cal in ('gregorian',): # We shall want to add to this + stem = 'dates/calendars/calendar[' + cal + ']/months/' + for (key, mode, size) in namings: + prop = 'monthContext[' + mode + ']/monthWidth[' + size + ']/' + result[key + 'Months'] = ';'.join( + findEntry(path, stem + prop + "month[%d]" % i) + for i in range(1, 13)) + ';' + + # Day data (for Gregorian, at least): + stem = 'dates/calendars/calendar[gregorian]/days/' + days = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat') + for (key, mode, size) in namings: + prop = 'dayContext[' + mode + ']/dayWidth[' + size + ']/day' + result[key + 'Days'] = ';'.join( + findEntry(path, stem + prop + '[' + day + ']') + for day in days) + ';' + + return Locale(result) def addEscapes(s): result = '' @@ -463,94 +382,42 @@ def usage(): def integrateWeekData(filePath): if not filePath.endswith(".xml"): return {} - monFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=mon]", attribute="territories")[0].split(" ") - tueFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=tue]", attribute="territories")[0].split(" ") - wedFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=wed]", attribute="territories")[0].split(" ") - thuFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=thu]", attribute="territories")[0].split(" ") - friFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=fri]", attribute="territories")[0].split(" ") - satFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sat]", attribute="territories")[0].split(" ") - sunFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sun]", attribute="territories")[0].split(" ") - - monWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=mon]", attribute="territories")[0].split(" ") - tueWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=tue]", attribute="territories")[0].split(" ") - wedWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=wed]", attribute="territories")[0].split(" ") - thuWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=thu]", attribute="territories")[0].split(" ") - friWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=fri]", attribute="territories")[0].split(" ") - satWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sat]", attribute="territories")[0].split(" ") - sunWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sun]", attribute="territories")[0].split(" ") - - monWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=mon]", attribute="territories")[0].split(" ") - tueWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=tue]", attribute="territories")[0].split(" ") - wedWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=wed]", attribute="territories")[0].split(" ") - thuWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=thu]", attribute="territories")[0].split(" ") - friWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=fri]", attribute="territories")[0].split(" ") - satWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sat]", attribute="territories")[0].split(" ") - sunWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sun]", attribute="territories")[0].split(" ") + + def lookup(key): + return findEntryInFile(filePath, key, attribute='territories')[0].split() + days = ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun') firstDayByCountryCode = {} - for countryCode in monFirstDayIn: - firstDayByCountryCode[countryCode] = "mon" - for countryCode in tueFirstDayIn: - firstDayByCountryCode[countryCode] = "tue" - for countryCode in wedFirstDayIn: - firstDayByCountryCode[countryCode] = "wed" - for countryCode in thuFirstDayIn: - firstDayByCountryCode[countryCode] = "thu" - for countryCode in friFirstDayIn: - firstDayByCountryCode[countryCode] = "fri" - for countryCode in satFirstDayIn: - firstDayByCountryCode[countryCode] = "sat" - for countryCode in sunFirstDayIn: - firstDayByCountryCode[countryCode] = "sun" + for day in days: + for countryCode in lookup('weekData/firstDay[day=%s]' % day): + firstDayByCountryCode[countryCode] = day weekendStartByCountryCode = {} - for countryCode in monWeekendStart: - weekendStartByCountryCode[countryCode] = "mon" - for countryCode in tueWeekendStart: - weekendStartByCountryCode[countryCode] = "tue" - for countryCode in wedWeekendStart: - weekendStartByCountryCode[countryCode] = "wed" - for countryCode in thuWeekendStart: - weekendStartByCountryCode[countryCode] = "thu" - for countryCode in friWeekendStart: - weekendStartByCountryCode[countryCode] = "fri" - for countryCode in satWeekendStart: - weekendStartByCountryCode[countryCode] = "sat" - for countryCode in sunWeekendStart: - weekendStartByCountryCode[countryCode] = "sun" + for day in days: + for countryCode in lookup('weekData/weekendStart[day=%s]' % day): + weekendStartByCountryCode[countryCode] = day weekendEndByCountryCode = {} - for countryCode in monWeekendEnd: - weekendEndByCountryCode[countryCode] = "mon" - for countryCode in tueWeekendEnd: - weekendEndByCountryCode[countryCode] = "tue" - for countryCode in wedWeekendEnd: - weekendEndByCountryCode[countryCode] = "wed" - for countryCode in thuWeekendEnd: - weekendEndByCountryCode[countryCode] = "thu" - for countryCode in friWeekendEnd: - weekendEndByCountryCode[countryCode] = "fri" - for countryCode in satWeekendEnd: - weekendEndByCountryCode[countryCode] = "sat" - for countryCode in sunWeekendEnd: - weekendEndByCountryCode[countryCode] = "sun" - - for (key,locale) in locale_database.iteritems(): - countryCode = locale['country_code'] + for day in days: + for countryCode in lookup('weekData/weekendEnd[day=%s]' % day): + weekendEndByCountryCode[countryCode] = day + + for (key, locale) in locale_database.iteritems(): + countryCode = locale.country_code if countryCode in firstDayByCountryCode: - locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode[countryCode] + locale.firstDayOfWeek = firstDayByCountryCode[countryCode] else: - locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode["001"] + locale.firstDayOfWeek = firstDayByCountryCode["001"] if countryCode in weekendStartByCountryCode: - locale_database[key]['weekendStart'] = weekendStartByCountryCode[countryCode] + locale.weekendStart = weekendStartByCountryCode[countryCode] else: - locale_database[key]['weekendStart'] = weekendStartByCountryCode["001"] + locale.weekendStart = weekendStartByCountryCode["001"] if countryCode in weekendEndByCountryCode: - locale_database[key]['weekendEnd'] = weekendEndByCountryCode[countryCode] + locale.weekendEnd = weekendEndByCountryCode[countryCode] else: - locale_database[key]['weekendEnd'] = weekendEndByCountryCode["001"] + locale.weekendEnd = weekendEndByCountryCode["001"] if len(sys.argv) != 2: usage() @@ -566,7 +433,9 @@ locale_database = {} # see http://www.unicode.org/reports/tr35/tr35-info.html#Default_Content defaultContent_locales = {} -for ns in findTagsInFile(cldr_dir + "/../supplemental/supplementalMetadata.xml", "metadata/defaultContent"): +for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental', + 'supplementalMetadata.xml'), + 'metadata/defaultContent'): for data in ns[1:][0]: if data[0] == u"locales": defaultContent_locales = data[1].split() @@ -579,36 +448,36 @@ for file in defaultContent_locales: country_code = items[2] else: if len(items) != 2: - sys.stderr.write("skipping defaultContent locale \"" + file + "\"\n") + sys.stderr.write('skipping defaultContent locale "' + file + '" [neither lang_script_country nor lang_country]\n') continue language_code = items[0] script_code = "" country_code = items[1] if len(country_code) == 4: - sys.stderr.write("skipping defaultContent locale \"" + file + "\"\n") + sys.stderr.write('skipping defaultContent locale "' + file + '" [long country code]\n') continue try: l = _generateLocaleInfo(cldr_dir + "/" + file + ".xml", language_code, script_code, country_code) if not l: - sys.stderr.write("skipping defaultContent locale \"" + file + "\"\n") + sys.stderr.write('skipping defaultContent locale "' + file + '" [no locale info generated]\n') continue except xpathlite.Error as e: - sys.stderr.write("skipping defaultContent locale \"%s\" (%s)\n" % (file, str(e))) + sys.stderr.write('skipping defaultContent locale "%s" (%s)\n' % (file, str(e))) continue - locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l + locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l for file in cldr_files: try: l = generateLocaleInfo(cldr_dir + "/" + file) if not l: - sys.stderr.write("skipping file \"" + file + "\"\n") + sys.stderr.write('skipping file "' + file + '" [no locale info generated]\n') continue except xpathlite.Error as e: - sys.stderr.write("skipping file \"%s\" (%s)\n" % (file, str(e))) + sys.stderr.write('skipping file "%s" (%s)\n' % (file, str(e))) continue - locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l + locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l integrateWeekData(cldr_dir+"/../supplemental/supplementalData.xml") locale_keys = locale_database.keys() @@ -665,7 +534,7 @@ def _parseLocale(l): if language_code != "und": language_id = enumdata.languageCodeToId(language_code) if language_id == -1: - raise xpathlite.Error("unknown language code \"%s\"" % language_code) + raise xpathlite.Error('unknown language code "%s"' % language_code) language = enumdata.language_list[language_id][0] if len(items) > 1: @@ -676,14 +545,14 @@ def _parseLocale(l): if len(script_code) == 4: script_id = enumdata.scriptCodeToId(script_code) if script_id == -1: - raise xpathlite.Error("unknown script code \"%s\"" % script_code) + raise xpathlite.Error('unknown script code "%s"' % script_code) script = enumdata.script_list[script_id][0] else: country_code = script_code if country_code: country_id = enumdata.countryCodeToId(country_code) if country_id == -1: - raise xpathlite.Error("unknown country code \"%s\"" % country_code) + raise xpathlite.Error('unknown country code "%s"' % country_code) country = enumdata.country_list[country_id][0] return (language, script, country) @@ -697,12 +566,12 @@ for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likel try: (from_language, from_script, from_country) = _parseLocale(tmp[u"from"]) except xpathlite.Error as e: - sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e))) + sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e))) continue try: (to_language, to_script, to_country) = _parseLocale(tmp[u"to"]) except xpathlite.Error as e: - sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e))) + sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e))) continue # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags if to_country == "AnyCountry" and from_country != to_country: @@ -725,115 +594,10 @@ for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likel print " </likelySubtags>" print " <localeList>" -print \ -" <locale>\n\ - <language>C</language>\n\ - <languageEndonym></languageEndonym>\n\ - <script>AnyScript</script>\n\ - <country>AnyCountry</country>\n\ - <countryEndonym></countryEndonym>\n\ - <decimal>46</decimal>\n\ - <group>44</group>\n\ - <list>59</list>\n\ - <percent>37</percent>\n\ - <zero>48</zero>\n\ - <minus>45</minus>\n\ - <plus>43</plus>\n\ - <exp>101</exp>\n\ - <quotationStart>\"</quotationStart>\n\ - <quotationEnd>\"</quotationEnd>\n\ - <alternateQuotationStart>\'</alternateQuotationStart>\n\ - <alternateQuotationEnd>\'</alternateQuotationEnd>\n\ - <listPatternPartStart>%1, %2</listPatternPartStart>\n\ - <listPatternPartMiddle>%1, %2</listPatternPartMiddle>\n\ - <listPatternPartEnd>%1, %2</listPatternPartEnd>\n\ - <listPatternPartTwo>%1, %2</listPatternPartTwo>\n\ - <am>AM</am>\n\ - <pm>PM</pm>\n\ - <firstDayOfWeek>mon</firstDayOfWeek>\n\ - <weekendStart>sat</weekendStart>\n\ - <weekendEnd>sun</weekendEnd>\n\ - <longDateFormat>EEEE, d MMMM yyyy</longDateFormat>\n\ - <shortDateFormat>d MMM yyyy</shortDateFormat>\n\ - <longTimeFormat>HH:mm:ss z</longTimeFormat>\n\ - <shortTimeFormat>HH:mm:ss</shortTimeFormat>\n\ - <standaloneLongMonths>January;February;March;April;May;June;July;August;September;October;November;December;</standaloneLongMonths>\n\ - <standaloneShortMonths>Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;</standaloneShortMonths>\n\ - <standaloneNarrowMonths>J;F;M;A;M;J;J;A;S;O;N;D;</standaloneNarrowMonths>\n\ - <longMonths>January;February;March;April;May;June;July;August;September;October;November;December;</longMonths>\n\ - <shortMonths>Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;</shortMonths>\n\ - <narrowMonths>1;2;3;4;5;6;7;8;9;10;11;12;</narrowMonths>\n\ - <longDays>Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;</longDays>\n\ - <shortDays>Sun;Mon;Tue;Wed;Thu;Fri;Sat;</shortDays>\n\ - <narrowDays>7;1;2;3;4;5;6;</narrowDays>\n\ - <standaloneLongDays>Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;</standaloneLongDays>\n\ - <standaloneShortDays>Sun;Mon;Tue;Wed;Thu;Fri;Sat;</standaloneShortDays>\n\ - <standaloneNarrowDays>S;M;T;W;T;F;S;</standaloneNarrowDays>\n\ - <currencyIsoCode></currencyIsoCode>\n\ - <currencySymbol></currencySymbol>\n\ - <currencyDisplayName>;;;;;;;</currencyDisplayName>\n\ - <currencyDigits>2</currencyDigits>\n\ - <currencyRounding>1</currencyRounding>\n\ - <currencyFormat>%1%2</currencyFormat>\n\ - <currencyNegativeFormat></currencyNegativeFormat>\n\ - </locale>" +Locale.C().toXml() for key in locale_keys: - l = locale_database[key] - - print " <locale>" - print " <language>" + l['language'] + "</language>" - print " <languageEndonym>" + escape(l['language_endonym']).encode('utf-8') + "</languageEndonym>" - print " <script>" + l['script'] + "</script>" - print " <country>" + l['country'] + "</country>" - print " <countryEndonym>" + escape(l['country_endonym']).encode('utf-8') + "</countryEndonym>" - print " <languagecode>" + l['language_code'] + "</languagecode>" - print " <scriptcode>" + l['script_code'] + "</scriptcode>" - print " <countrycode>" + l['country_code'] + "</countrycode>" - print " <decimal>" + ordStr(l['decimal']) + "</decimal>" - print " <group>" + ordStr(l['group']) + "</group>" - print " <list>" + fixOrdStrList(l['list']) + "</list>" - print " <percent>" + fixOrdStrPercent(l['percent']) + "</percent>" - print " <zero>" + ordStr(l['zero']) + "</zero>" - print " <minus>" + fixOrdStrMinus(l['minus']) + "</minus>" - print " <plus>" + fixOrdStrPlus(l['plus']) + "</plus>" - print " <exp>" + fixOrdStrExp(l['exp']) + "</exp>" - print " <quotationStart>" + escape(l['quotationStart']).encode('utf-8') + "</quotationStart>" - print " <quotationEnd>" + escape(l['quotationEnd']).encode('utf-8') + "</quotationEnd>" - print " <alternateQuotationStart>" + escape(l['alternateQuotationStart']).encode('utf-8') + "</alternateQuotationStart>" - print " <alternateQuotationEnd>" + escape(l['alternateQuotationEnd']).encode('utf-8') + "</alternateQuotationEnd>" - print " <listPatternPartStart>" + escape(l['listPatternPartStart']).encode('utf-8') + "</listPatternPartStart>" - print " <listPatternPartMiddle>" + escape(l['listPatternPartMiddle']).encode('utf-8') + "</listPatternPartMiddle>" - print " <listPatternPartEnd>" + escape(l['listPatternPartEnd']).encode('utf-8') + "</listPatternPartEnd>" - print " <listPatternPartTwo>" + escape(l['listPatternPartTwo']).encode('utf-8') + "</listPatternPartTwo>" - print " <am>" + escape(l['am']).encode('utf-8') + "</am>" - print " <pm>" + escape(l['pm']).encode('utf-8') + "</pm>" - print " <firstDayOfWeek>" + escape(l['firstDayOfWeek']).encode('utf-8') + "</firstDayOfWeek>" - print " <weekendStart>" + escape(l['weekendStart']).encode('utf-8') + "</weekendStart>" - print " <weekendEnd>" + escape(l['weekendEnd']).encode('utf-8') + "</weekendEnd>" - print " <longDateFormat>" + escape(l['longDateFormat']).encode('utf-8') + "</longDateFormat>" - print " <shortDateFormat>" + escape(l['shortDateFormat']).encode('utf-8') + "</shortDateFormat>" - print " <longTimeFormat>" + escape(l['longTimeFormat']).encode('utf-8') + "</longTimeFormat>" - print " <shortTimeFormat>" + escape(l['shortTimeFormat']).encode('utf-8') + "</shortTimeFormat>" - print " <standaloneLongMonths>" + escape(l['standaloneLongMonths']).encode('utf-8') + "</standaloneLongMonths>" - print " <standaloneShortMonths>"+ escape(l['standaloneShortMonths']).encode('utf-8') + "</standaloneShortMonths>" - print " <standaloneNarrowMonths>"+ escape(l['standaloneNarrowMonths']).encode('utf-8') + "</standaloneNarrowMonths>" - print " <longMonths>" + escape(l['longMonths']).encode('utf-8') + "</longMonths>" - print " <shortMonths>" + escape(l['shortMonths']).encode('utf-8') + "</shortMonths>" - print " <narrowMonths>" + escape(l['narrowMonths']).encode('utf-8') + "</narrowMonths>" - print " <longDays>" + escape(l['longDays']).encode('utf-8') + "</longDays>" - print " <shortDays>" + escape(l['shortDays']).encode('utf-8') + "</shortDays>" - print " <narrowDays>" + escape(l['narrowDays']).encode('utf-8') + "</narrowDays>" - print " <standaloneLongDays>" + escape(l['standaloneLongDays']).encode('utf-8') + "</standaloneLongDays>" - print " <standaloneShortDays>" + escape(l['standaloneShortDays']).encode('utf-8') + "</standaloneShortDays>" - print " <standaloneNarrowDays>" + escape(l['standaloneNarrowDays']).encode('utf-8') + "</standaloneNarrowDays>" - print " <currencyIsoCode>" + escape(l['currencyIsoCode']).encode('utf-8') + "</currencyIsoCode>" - print " <currencySymbol>" + escape(l['currencySymbol']).encode('utf-8') + "</currencySymbol>" - print " <currencyDisplayName>" + escape(l['currencyDisplayName']).encode('utf-8') + "</currencyDisplayName>" - print " <currencyDigits>" + str(l['currencyDigits']) + "</currencyDigits>" - print " <currencyRounding>" + str(l['currencyRounding']) + "</currencyRounding>" - print " <currencyFormat>" + escape(l['currencyFormat']).encode('utf-8') + "</currencyFormat>" - print " <currencyNegativeFormat>" + escape(l['currencyNegativeFormat']).encode('utf-8') + "</currencyNegativeFormat>" - print " </locale>" + locale_database[key].toXml() + print " </localeList>" print "</localeDatabase>" |