diff options
Diffstat (limited to 'util/locale_database/cldr2qlocalexml.py')
-rwxr-xr-x | util/locale_database/cldr2qlocalexml.py | 636 |
1 files changed, 45 insertions, 591 deletions
diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py index 41795ff634..b28dcecc45 100755 --- a/util/locale_database/cldr2qlocalexml.py +++ b/util/locale_database/cldr2qlocalexml.py @@ -2,7 +2,7 @@ # coding=utf8 ############################################################################# ## -## Copyright (C) 2018 The Qt Company Ltd. +## Copyright (C) 2020 The Qt Company Ltd. ## Contact: https://www.qt.io/licensing/ ## ## This file is part of the test suite of the Qt Toolkit. @@ -31,15 +31,17 @@ The CLDR data can be downloaded from CLDR_, which has a sub-directory for each version; you need the ``core.zip`` file for your version of -choice (typically the latest). This script has had updates to cope up -to v35; for later versions, we may need adaptations. Unpack the +choice (typically the latest). This script has had updates to cope up +to v35; for later versions, we may need adaptations. Unpack the downloaded ``core.zip`` and check it has a common/main/ sub-directory: -pass the path of that sub-directory to this script as its single -command-line argument. Save its standard output (but not error) to a -file for later processing by ``./qlocalexml2cpp.py`` +pass the path of that root of the download to this script as its first +command-line argument. Pass the name of the file in which to write +output as the second argument; either omit it or use '-' to select the +standard output. This file is the input needed by +``./qlocalexml2cpp.py`` When you update the CLDR data, be sure to also update -src/corelib/text/qt_attribution.json's entry for unicode-cldr. Check +src/corelib/text/qt_attribution.json's entry for unicode-cldr. Check this script's output for unknown language, country or script messages; if any can be resolved, use their entry in common/main/en.xml to append new entries to enumdata.py's lists and update documentation in @@ -53,610 +55,62 @@ time zone names; see cldr2qtimezone.py for details. """ import os -import sys -import re -import textwrap -import enumdata from localetools import Error -from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, codeMapsFromFile, \ - _findEntryInFile as findEntryInFile -from dateconverter import convert_date -from qlocalexml import Locale, QLocaleXmlWriter - -# TODO: make calendars a command-line option -calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew' -def wrappedwarn(err, prefix, tokens): - return err.write( - '\n'.join(textwrap.wrap(prefix + ', '.join(tokens), - subsequent_indent=' ', width=80)) + '\n') - -def parse_number_format(patterns, data): - # this is a very limited parsing of the number format for currency only. - def skip_repeating_pattern(x): - p = x.replace('0', '#').replace(',', '').replace('.', '') - seen = False - result = '' - for c in p: - if c == '#': - if seen: - continue - seen = True - else: - seen = False - result = result + c - return result - patterns = patterns.split(';') - result = [] - for pattern in patterns: - pattern = skip_repeating_pattern(pattern) - pattern = pattern.replace('#', "%1") - # according to http://www.unicode.org/reports/tr35/#Number_Format_Patterns - # there can be doubled or trippled currency sign, however none of the - # locales use that. - pattern = pattern.replace(u'\xa4', "%2") - pattern = pattern.replace("''", "###").replace("'", '').replace("###", "'") - pattern = pattern.replace('-', data['minus']) - pattern = pattern.replace('+', data['plus']) - result.append(pattern) - return result - -cldr_dir = None -def raiseUnknownCode(code, form, cache={}): - """Check whether an unknown code could be supported. - - We declare a language, script or country code unknown if it's not - known to enumdata.py; however, if it's present in main/en.xml's - mapping of codes to names, we have the option of adding support. - This caches the necessary look-up (so we only read main/en.xml - once) and returns the name we should use if we do add support. - - First parameter, code, is the unknown code. Second parameter, - form, is one of 'language', 'script' or 'country' to select the - type of code to look up. Do not pass further parameters (the next - will deprive you of the cache). - - Raises localetools.Error with a suitable message, that includes - the unknown code's full name if found. - - Relies on global cldr_dir being set before it's called; see tail - of this file. - """ - if not cache: - cache.update(codeMapsFromFile(os.path.join(cldr_dir, 'en.xml'))) - name = cache[form].get(code) - msg = 'unknown %s code "%s"' % (form, code) - if name: - msg += ' - could use "%s"' % name - raise Error(msg) - -def parse_list_pattern_part_format(pattern): - # This is a very limited parsing of the format for list pattern part only. - return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3") - -def unit_quantifiers(find, path, stem, suffix, known, - # Stop at exa/exbi: 16 exbi = 2^{64} < zetta = - # 1000^7 < zebi = 2^{70}, the next quantifiers up: - si_quantifiers = ('kilo', 'mega', 'giga', 'tera', 'peta', 'exa')): - """Work out the unit quantifiers. - - Unfortunately, the CLDR data only go up to terabytes and we want - all the way to exabytes; but we can recognize the SI quantifiers - as prefixes, strip and identify the tail as the localized - translation for 'B' (e.g. French has 'octet' for 'byte' and uses - ko, Mo, Go, To from which we can extrapolate Po, Eo). - - Should be called first for the SI quantifiers, with suffix = 'B', - then for the IEC ones, with suffix = 'iB'; the list known - (initially empty before first call) is used to let the second call - know what the first learned about the localized unit. - """ - if suffix == 'B': # first call, known = [] - tail = suffix - for q in si_quantifiers: - it = find(path, stem % q) - # kB for kilobyte, in contrast with KiB for IEC: - q = q[0] if q == 'kilo' else q[0].upper() - if not it: - it = q + tail - elif it.startswith(q): - rest = it[1:] - tail = rest if all(rest == k for k in known) else suffix - known.append(rest) - yield it - else: # second call, re-using first's known - assert suffix == 'iB' - if known: - byte = known.pop() - if all(byte == k for k in known): - suffix = 'i' + byte - for q in si_quantifiers: - yield find(path, stem % q[:2], - # Those don't (yet, v31) exist in CLDR, so we always fall back to: - q[0].upper() + suffix) - -def generateLocaleInfo(path): - if not path.endswith(".xml"): - return {} - - # skip legacy/compatibility ones - alias = findAlias(path) - if alias: - raise Error('Alias to "{}"'.format(alias)) - - def code(tag): - return findEntryInFile(path, 'identity/' + tag, attribute="type")[0] - - return _generateLocaleInfo(path, code('language'), code('script'), - code('territory'), code('variant')) - -def getNumberSystems(cache={}): - """Cached look-up of number system information. - - Pass no arguments. Returns a mapping from number system names to, - for each system, a mapping with keys 'digits', 'type' and 'id'. - Relies on global cldr_dir being set before it's first called.\n""" - if not cache: - for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental', - 'numberingSystems.xml'), - 'numberingSystems'): - # ns has form: [u'numberingSystem', [(u'digits', u'0123456789'), (u'type', u'numeric'), (u'id', u'latn')]] - entry = dict(ns[1]) - name = entry[u'id'] - if u'digits' in entry and ord(entry[u'digits'][0]) > 0xffff: - # FIXME, QTBUG-69324: make this redundant: - # omit number system if zero doesn't fit in single-char16 UTF-16 :-( - sys.stderr.write('skipping number system "%s" [can\'t represent its zero, U+%X]\n' - % (name, ord(entry[u'digits'][0]))) - else: - cache[name] = entry - return cache - -def _generateLocaleInfo(path, language_code, script_code, country_code, variant_code=""): - if not path.endswith(".xml"): - return {} - - if language_code == 'root': - # just skip it - return {} - - # we do not support variants - # ### actually there is only one locale with variant: en_US_POSIX - # does anybody care about it at all? - if variant_code: - raise Error('We do not support variants ("{}")'.format(variant_code)) - - language_id = enumdata.languageCodeToId(language_code) - if language_id <= 0: - raiseUnknownCode(language_code, 'language') - - script_id = enumdata.scriptCodeToId(script_code) - if script_id == -1: - raiseUnknownCode(script_code, 'script') - - # we should handle fully qualified names with the territory - if not country_code: - return {} - country_id = enumdata.countryCodeToId(country_code) - if country_id <= 0: - raiseUnknownCode(country_code, 'country') - - # So we say we accept only those values that have "contributed" or - # "approved" resolution. see http://www.unicode.org/cldr/process.html - # But we only respect the resolution for new datas for backward - # compatibility. - draft = DraftResolution.contributed - - result = dict( - language=enumdata.language_list[language_id][0], - language_code=language_code, language_id=language_id, - script=enumdata.script_list[script_id][0], - script_code=script_code, script_id=script_id, - country=enumdata.country_list[country_id][0], - country_code=country_code, country_id=country_id, - variant_code=variant_code) - - (dir_name, file_name) = os.path.split(path) - def from_supplement(tag, - path=os.path.join(dir_name, '..', 'supplemental', - 'supplementalData.xml')): - return findTagsInFile(path, tag) - currencies = from_supplement('currencyData/region[iso3166=%s]' % country_code) - result['currencyIsoCode'] = '' - result['currencyDigits'] = 2 - result['currencyRounding'] = 1 - if currencies: - for e in currencies: - if e[0] == 'currency': - t = [x[1] == 'false' for x in e[1] if x[0] == 'tender'] - if t and t[0]: - pass - elif not any(x[0] == 'to' for x in e[1]): - result['currencyIsoCode'] = (x[1] for x in e[1] if x[0] == 'iso4217').next() - break - if result['currencyIsoCode']: - t = from_supplement("currencyData/fractions/info[iso4217=%s]" - % result['currencyIsoCode']) - if t and t[0][0] == 'info': - result['currencyDigits'] = (int(x[1]) for x in t[0][1] if x[0] == 'digits').next() - result['currencyRounding'] = (int(x[1]) for x in t[0][1] if x[0] == 'rounding').next() - numbering_system = None - try: - numbering_system = findEntry(path, "numbers/defaultNumberingSystem") - except Error: - pass - def findEntryDef(path, xpath, value=''): - try: - return findEntry(path, xpath) - except Error: - return value - def get_number_in_system(path, xpath, numbering_system): - if numbering_system: - try: - return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]") - except Error: - # in CLDR 1.9 number system was refactored for numbers (but not for currency) - # so if previous findEntry doesn't work we should try this: - try: - return findEntry(path, xpath.replace("/symbols/", "/symbols[numberSystem=" + numbering_system + "]/")) - except Error: - # fallback to default - pass - return findEntry(path, xpath) - - result['decimal'] = get_number_in_system(path, "numbers/symbols/decimal", numbering_system) - result['group'] = get_number_in_system(path, "numbers/symbols/group", numbering_system) - assert result['decimal'] != result['group'] - result['list'] = get_number_in_system(path, "numbers/symbols/list", numbering_system) - result['percent'] = get_number_in_system(path, "numbers/symbols/percentSign", numbering_system) - try: - result['zero'] = getNumberSystems()[numbering_system][u"digits"][0] - except Exception as e: - sys.stderr.write("Native zero detection problem: %s\n" % repr(e)) - result['zero'] = get_number_in_system(path, "numbers/symbols/nativeZeroDigit", numbering_system) - result['minus'] = get_number_in_system(path, "numbers/symbols/minusSign", numbering_system) - result['plus'] = get_number_in_system(path, "numbers/symbols/plusSign", numbering_system) - result['exp'] = get_number_in_system(path, "numbers/symbols/exponential", numbering_system).lower() - result['quotationStart'] = findEntry(path, "delimiters/quotationStart") - result['quotationEnd'] = findEntry(path, "delimiters/quotationEnd") - result['alternateQuotationStart'] = findEntry(path, "delimiters/alternateQuotationStart") - result['alternateQuotationEnd'] = findEntry(path, "delimiters/alternateQuotationEnd") - result['listPatternPartStart'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[start]")) - result['listPatternPartMiddle'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[middle]")) - result['listPatternPartEnd'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[end]")) - result['listPatternPartTwo'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[2]")) - result['am'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[am]", draft) - result['pm'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[pm]", draft) - result['longDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[full]/dateFormat/pattern")) - result['shortDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[short]/dateFormat/pattern")) - result['longTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[full]/timeFormat/pattern")) - result['shortTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[short]/timeFormat/pattern")) - - endonym = None - if country_code and script_code: - endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s_%s]" % (language_code, script_code, country_code)) - if not endonym and script_code: - endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, script_code)) - if not endonym and country_code: - endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, country_code)) - if not endonym: - endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s]" % (language_code)) - result['languageEndonym'] = endonym - result['countryEndonym'] = findEntryDef(path, "localeDisplayNames/territories/territory[type=%s]" % (country_code)) - - currency_format = get_number_in_system(path, "numbers/currencyFormats/currencyFormatLength/currencyFormat/pattern", numbering_system) - currency_format = parse_number_format(currency_format, result) - result['currencyFormat'] = currency_format[0] - result['currencyNegativeFormat'] = '' - if len(currency_format) > 1: - result['currencyNegativeFormat'] = currency_format[1] - - result['currencySymbol'] = '' - result['currencyDisplayName'] = '' - if result['currencyIsoCode']: - result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode']) - result['currencyDisplayName'] = ';'.join( - findEntryDef(path, 'numbers/currencies/currency[' + result['currencyIsoCode'] - + ']/displayName' + tail) - for tail in ['',] + [ - '[count=%s]' % x for x in ('zero', 'one', 'two', 'few', 'many', 'other') - ]) + ';' - - def findUnitDef(path, stem, fallback=''): - # The displayName for a quantified unit in en.xml is kByte - # instead of kB (etc.), so prefer any unitPattern provided: - for count in ('many', 'few', 'two', 'other', 'zero', 'one'): - try: - ans = findEntry(path, stem + 'unitPattern[count=%s]' % count) - except Error: - continue - - # TODO: epxloit count-handling, instead of discarding placeholders - if ans.startswith('{0}'): - ans = ans[3:].lstrip() - if ans: - return ans - - return findEntryDef(path, stem + 'displayName', fallback) - - # First without quantifier, then quantified each way: - result['byte_unit'] = findEntryDef( - path, 'units/unitLength[type=long]/unit[type=digital-byte]/displayName', - 'bytes') - stem = 'units/unitLength[type=short]/unit[type=digital-%sbyte]/' - known = [] # cases where we *do* have a given version: - result['byte_si_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem, 'B', known)) - # IEC 60027-2 - # http://physics.nist.gov/cuu/Units/binary.html - result['byte_iec_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem % '%sbi', 'iB', known)) - - # Used for month and day data: - namings = ( - ('standaloneLong', 'stand-alone', 'wide'), - ('standaloneShort', 'stand-alone', 'abbreviated'), - ('standaloneNarrow', 'stand-alone', 'narrow'), - ('long', 'format', 'wide'), - ('short', 'format', 'abbreviated'), - ('narrow', 'format', 'narrow'), - ) - - # Month names for 12-month calendars: - for cal in calendars: - stem = 'dates/calendars/calendar[' + cal + ']/months/' - for (key, mode, size) in namings: - prop = 'monthContext[' + mode + ']/monthWidth[' + size + ']/' - result[key + 'Months_' + cal] = ';'.join( - findEntry(path, stem + prop + "month[%d]" % i) - for i in range(1, 13)) + ';' - - # Day data (for Gregorian, at least): - stem = 'dates/calendars/calendar[gregorian]/days/' - days = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat') - for (key, mode, size) in namings: - prop = 'dayContext[' + mode + ']/dayWidth[' + size + ']/day' - result[key + 'Days'] = ';'.join( - findEntry(path, stem + prop + '[' + day + ']') - for day in days) + ';' - - return Locale(result) - -def integrateWeekData(filePath, locale_database): - if not filePath.endswith(".xml"): - return {} - - def lookup(key): - return findEntryInFile(filePath, key, attribute='territories')[0].split() - days = ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun') - - firstDayByCountryCode = {} - for day in days: - for countryCode in lookup('weekData/firstDay[day=%s]' % day): - firstDayByCountryCode[countryCode] = day - - weekendStartByCountryCode = {} - for day in days: - for countryCode in lookup('weekData/weekendStart[day=%s]' % day): - weekendStartByCountryCode[countryCode] = day - - weekendEndByCountryCode = {} - for day in days: - for countryCode in lookup('weekData/weekendEnd[day=%s]' % day): - weekendEndByCountryCode[countryCode] = day - - for (key, locale) in locale_database.iteritems(): - countryCode = locale.country_code - if countryCode in firstDayByCountryCode: - locale.firstDayOfWeek = firstDayByCountryCode[countryCode] - else: - locale.firstDayOfWeek = firstDayByCountryCode["001"] - - if countryCode in weekendStartByCountryCode: - locale.weekendStart = weekendStartByCountryCode[countryCode] - else: - locale.weekendStart = weekendStartByCountryCode["001"] - - if countryCode in weekendEndByCountryCode: - locale.weekendEnd = weekendEndByCountryCode[countryCode] - else: - locale.weekendEnd = weekendEndByCountryCode["001"] - -def splitLocale(name): - """Split name into (language, script, territory) triple as generator. - - Ignores any trailing fields (with a warning), leaves script (a capitalised - four-letter token) or territory (either a number or an all-uppercase token) - empty if unspecified, returns a single-entry generator if name is a single - tag (i.e. contains no underscores). Always yields 1 or 3 values, never 2.""" - tags = iter(name.split('_')) - yield tags.next() # Language - tag = tags.next() - - # Script is always four letters, always capitalised: - if len(tag) == 4 and tag[0].isupper() and tag[1:].islower(): - yield tag - try: - tag = tags.next() - except StopIteration: - tag = '' - else: - yield '' - - # Territory is upper-case or numeric: - if tag and tag.isupper() or tag.isdigit(): - yield tag - tag = '' - else: - yield '' - - # If nothing is left, StopIteration will avoid the warning: - tag = (tag if tag else tags.next(),) - sys.stderr.write('Ignoring unparsed cruft %s in %s\n' % ('_'.join(tag + tuple(tags)), name)) - -def _parseLocale(l): - language = "AnyLanguage" - script = "AnyScript" - country = "AnyCountry" - - if l == "und": - raise Error('We treat unknown locale like C') - - parsed = splitLocale(l) - language_code = parsed.next() - script_code = country_code = '' - try: - script_code, country_code = parsed - except ValueError: - pass - - if language_code != "und": - language_id = enumdata.languageCodeToId(language_code) - if language_id == -1: - raise Error('Unknown language code "{}"'.format(language_code)) - language = enumdata.language_list[language_id][0] - - if script_code: - script_id = enumdata.scriptCodeToId(script_code) - if script_id == -1: - raise Error('Unknown script code "{}"'.format(script_code)) - script = enumdata.script_list[script_id][0] - - if country_code: - country_id = enumdata.countryCodeToId(country_code) - if country_id == -1: - raise Error('Unknown country code "{}"'.format(country_code)) - country = enumdata.country_list[country_id][0] - - return (language, script, country) - -def likelySubtags(root, err): - skips = [] - for ns in findTagsInFile(os.path.join(root, 'supplemental', 'likelySubtags.xml'), "likelySubtags"): - tmp = {} - for data in ns[1:][0]: # ns looks like this: [u'likelySubtag', [(u'from', u'aa'), (u'to', u'aa_Latn_ET')]] - tmp[data[0]] = data[1] - - try: - from_language, from_script, from_country = _parseLocale(tmp[u"from"]) - to_language, to_script, to_country = _parseLocale(tmp[u"to"]) - except Error as e: - if (tmp['to'].startswith(tmp['from']) - and e.message == 'Unknown language code "{}"'.format(tmp['from'])): - skips.append(tmp['to']) - else: - sys.stderr.write('skipping likelySubtag "{}" -> "{}" ({})\n'.format( - tmp[u"from"], tmp[u"to"], e.message)) - continue - # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags - if to_country == "AnyCountry" and from_country != to_country: - to_country = from_country - if to_script == "AnyScript" and from_script != to_script: - to_script = from_script - - yield ((from_language, from_script, from_country), - (to_language, to_script, to_country)) - if skips: - wrappedwarn(err, 'skipping likelySubtags (for unknown language codes): ', skips) +from cldr import CldrReader +from qlocalexml import QLocaleXmlWriter +from enumdata import language_list, script_list, country_list def usage(err, name, message = ''): - err.write("""Usage: {} <path-to-cldr-main> [out-file.xml] -""".format(name)) # TODO: expand + err.write("""Usage: {} path/to/cldr/common/main [out-file.xml] +""".format(name)) # TODO: expand command-line, improve help message if message: err.write('\n' + message + '\n') def main(args, out, err): - name = args.pop(0) + # TODO: make calendars a command-line option + calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew' - if len(args) < 1: - usage(err, name) + # TODO: make argument parsing more sophisticated + name = args.pop(0) + if not args: + usage(name, err, 'Where is your CLDR data tree ?') return 1 - global cldr_dir - cldr_dir = args.pop(0) - if not os.path.isdir(cldr_dir): - usage(err, name, 'Where did you unpack the CLDR data files ?') + root = args.pop(0) + if not os.path.exists(os.path.join(root, 'common', 'main', 'root.xml')): + usage(name, err, + 'First argument is the root of the CLDR tree: found no common/main/root.xml under ' + + root) return 1 - if len(args) > 1: - usage(err, name, 'Too many arguments passed') + xml = args.pop(0) if args else None + if not xml or xml == '-': + emit = out + elif not xml.endswith('.xml'): + usage(name, err, 'Please use a .xml extension on your output file name, not ' + xml) return 1 - if args: - qxml = open(args.pop(0), 'w') else: - qxml = out - - getNumberSystems(cldr_dir) - cldr_files = os.listdir(cldr_dir) - locale_database = {} - - # see http://www.unicode.org/reports/tr35/tr35-info.html#Default_Content - defaultContent_locales = [] - for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental', - 'supplementalMetadata.xml'), - 'metadata/defaultContent'): - for data in ns[1:][0]: - if data[0] == u"locales": - defaultContent_locales += data[1].split() - - skips = [] - for file in defaultContent_locales: try: - language_code, script_code, country_code = splitLocale(file) - except ValueError: - sys.stderr.write('skipping defaultContent locale "' + file + '" [neither two nor three tags]\n') - continue + emit = open(xml, 'w') + except IOError as e: + usage(name, err, 'Failed to open "{}" to write output to it\n'.format(xml)) + return 1 - if not (script_code or country_code): - sys.stderr.write('skipping defaultContent locale "' + file + '" [second tag is neither script nor territory]\n') - continue - - try: - l = _generateLocaleInfo(cldr_dir + "/" + file + ".xml", language_code, script_code, country_code) - if not l: - skips.append(file) - continue - except Error as e: - sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, e.message)) - continue - - locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l - - if skips: - wrappedwarn(err, 'skipping defaultContent locales [no locale info generated]: ', skips) - skips = [] - - for file in cldr_files: - try: - l = generateLocaleInfo(cldr_dir + "/" + file) - if not l: - skips.append(file) - continue - except Error as e: - sys.stderr.write('skipping file "{}" ({})\n'.format(file, e.message)) - continue - - locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l - - if skips: - wrappedwarn(err, 'skipping files [no locale info generated]: ', skips) + if args: + usage(name, err, 'Too many arguments - excess: ' + ' '.join(args)) + return 1 - integrateWeekData(cldr_dir + "/../supplemental/supplementalData.xml", locale_database) - cldr_version = 'unknown' - with open(cldr_dir+"/../dtd/ldml.dtd", "r") as ldml: - for line in ldml: - if 'version cldrVersion CDATA #FIXED' in line: - cldr_version = line.split('"')[1] + # TODO - command line options to tune choice of grumble and whitter: + reader = CldrReader(root, err.write, err.write) + writer = QLocaleXmlWriter(emit.write) - xmlOut = QLocaleXmlWriter(qxml.write) - xmlOut.version(cldr_version) - xmlOut.enumData(enumdata.language_list, - enumdata.script_list, - enumdata.country_list) - xmlOut.likelySubTags(likelySubtags(os.path.split(cldr_dir)[0], err)) - xmlOut.locales(locale_database, calendars) - xmlOut.close() - if qxml is not out: - qxml.close() + writer.version(reader.root.cldrVersion) + writer.enumData(language_list, script_list, country_list) + writer.likelySubTags(reader.likelySubTags()) + writer.locales(reader.readLocales(calendars), calendars) + writer.close() return 0 if __name__ == '__main__': |