diff options
author | Edward Welbourne <edward.welbourne@qt.io> | 2020-02-19 15:17:16 +0100 |
---|---|---|
committer | Edward Welbourne <eddy@chaos.org.uk> | 2020-04-02 19:42:28 +0100 |
commit | a20697a3940ede60b2fd5eac0ffd1a57b132191a (patch) | |
tree | 76e021eae0a65162b4f62f83cd7c22f4658eec3e /util/locale_database/cldr2qlocalexml.py | |
parent | 54886d7f81175ac6bc39a0b40efd18c886b8bf8f (diff) |
Rework cldr2qlocalexml.py in terms of a QLocaleXmlWriter class
Delegate the output of XML to a helper class provided by qlocalexml.py
and restructure the driver script so that it can be imported without
running anything. It now has a minimal __name__ == '__main__' block
that calls a main() function. This, for the moment, requires a global
via which it shares the CLDR directory with various other functions;
that shall go away in a later commit.
Task-number: QTBUG-81344
Change-Id: Ica2d3ec09f2d38ba42fd930258cc765283f29a71
Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
Diffstat (limited to 'util/locale_database/cldr2qlocalexml.py')
-rwxr-xr-x | util/locale_database/cldr2qlocalexml.py | 312 |
1 files changed, 138 insertions, 174 deletions
diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py index ee53381b22..fba8d7fdd5 100755 --- a/util/locale_database/cldr2qlocalexml.py +++ b/util/locale_database/cldr2qlocalexml.py @@ -61,13 +61,13 @@ import enumdata import xpathlite from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile from dateconverter import convert_date -from qlocalexml import Locale +from qlocalexml import Locale, QLocaleXmlWriter # TODO: make calendars a command-line option calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew' findEntryInFile = xpathlite._findEntryInFile -def wrappedwarn(prefix, tokens): - return sys.stderr.write( +def wrappedwarn(err, prefix, tokens): + return err.write( '\n'.join(textwrap.wrap(prefix + ', '.join(tokens), subsequent_indent=' ', width=80)) + '\n') @@ -101,6 +101,7 @@ def parse_number_format(patterns, data): result.append(pattern) return result +cldr_dir = None def raiseUnknownCode(code, form, cache={}): """Check whether an unknown code could be supported. @@ -193,8 +194,8 @@ def getNumberSystems(cache={}): """Cached look-up of number system information. Pass no arguments. Returns a mapping from number system names to, - for each system, a mapping with keys u'digits', u'type' and - u'id'\n""" + for each system, a mapping with keys 'digits', 'type' and 'id'. + Relies on global cldr_dir being set before it's first called.\n""" if not cache: for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental', 'numberingSystems.xml'), @@ -419,26 +420,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ return Locale(result) -def addEscapes(s): - result = '' - for c in s: - n = ord(c) - if n < 128: - result += c - else: - result += "\\x" - result += "%02x" % (n) - return result - -def unicodeStr(s): - utf8 = s.encode('utf-8') - return "<size>" + str(len(utf8)) + "</size><data>" + addEscapes(utf8) + "</data>" - -def usage(): - print "Usage: cldr2qlocalexml.py <path-to-cldr-main>" - sys.exit() - -def integrateWeekData(filePath): +def integrateWeekData(filePath, locale_database): if not filePath.endswith(".xml"): return {} @@ -510,111 +492,6 @@ def splitLocale(name): tag = (tag if tag else tags.next(),) sys.stderr.write('Ignoring unparsed cruft %s in %s\n' % ('_'.join(tag + tuple(tags)), name)) -if len(sys.argv) != 2: - usage() - -cldr_dir = sys.argv[1] - -if not os.path.isdir(cldr_dir): - usage() - -cldr_files = os.listdir(cldr_dir) - -locale_database = {} - -# see http://www.unicode.org/reports/tr35/tr35-info.html#Default_Content -defaultContent_locales = [] -for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental', - 'supplementalMetadata.xml'), - 'metadata/defaultContent'): - for data in ns[1:][0]: - if data[0] == u"locales": - defaultContent_locales += data[1].split() - -skips = [] -for file in defaultContent_locales: - try: - language_code, script_code, country_code = splitLocale(file) - except ValueError: - sys.stderr.write('skipping defaultContent locale "' + file + '" [neither two nor three tags]\n') - continue - - if not (script_code or country_code): - sys.stderr.write('skipping defaultContent locale "' + file + '" [second tag is neither script nor territory]\n') - continue - - try: - l = _generateLocaleInfo(cldr_dir + "/" + file + ".xml", language_code, script_code, country_code) - if not l: - skips.append(file) - continue - except xpathlite.Error as e: - sys.stderr.write('skipping defaultContent locale "%s" (%s)\n' % (file, str(e))) - continue - - locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l - -if skips: - wrappedwarn('skipping defaultContent locales [no locale info generated]: ', skips) - skips = [] - -for file in cldr_files: - try: - l = generateLocaleInfo(cldr_dir + "/" + file) - if not l: - skips.append(file) - continue - except xpathlite.Error as e: - sys.stderr.write('skipping file "%s" (%s)\n' % (file, str(e))) - continue - - locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l - -if skips: - wrappedwarn('skipping files [no locale info generated]: ', skips) - -integrateWeekData(cldr_dir+"/../supplemental/supplementalData.xml") -locale_keys = locale_database.keys() -locale_keys.sort() - -cldr_version = 'unknown' -ldml = open(cldr_dir+"/../dtd/ldml.dtd", "r") -for line in ldml: - if 'version cldrVersion CDATA #FIXED' in line: - cldr_version = line.split('"')[1] - -print "<localeDatabase>" -print " <version>" + cldr_version + "</version>" -print " <languageList>" -for id in enumdata.language_list: - l = enumdata.language_list[id] - print " <language>" - print " <name>" + l[0] + "</name>" - print " <id>" + str(id) + "</id>" - print " <code>" + l[1] + "</code>" - print " </language>" -print " </languageList>" - -print " <scriptList>" -for id in enumdata.script_list: - l = enumdata.script_list[id] - print " <script>" - print " <name>" + l[0] + "</name>" - print " <id>" + str(id) + "</id>" - print " <code>" + l[1] + "</code>" - print " </script>" -print " </scriptList>" - -print " <countryList>" -for id in enumdata.country_list: - l = enumdata.country_list[id] - print " <country>" - print " <name>" + l[0] + "</name>" - print " <id>" + str(id) + "</id>" - print " <code>" + l[1] + "</code>" - print " </country>" -print " </countryList>" - def _parseLocale(l): language = "AnyLanguage" script = "AnyScript" @@ -651,48 +528,135 @@ def _parseLocale(l): return (language, script, country) -skips = [] -print " <likelySubtags>" -for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likelySubtags"): - tmp = {} - for data in ns[1:][0]: # ns looks like this: [u'likelySubtag', [(u'from', u'aa'), (u'to', u'aa_Latn_ET')]] - tmp[data[0]] = data[1] +def likelySubtags(root, err): + skips = [] + for ns in findTagsInFile(os.path.join(root, 'supplemental', 'likelySubtags.xml'), "likelySubtags"): + tmp = {} + for data in ns[1:][0]: # ns looks like this: [u'likelySubtag', [(u'from', u'aa'), (u'to', u'aa_Latn_ET')]] + tmp[data[0]] = data[1] + + try: + from_language, from_script, from_country = _parseLocale(tmp[u"from"]) + to_language, to_script, to_country = _parseLocale(tmp[u"to"]) + except xpathlite.Error as e: + if tmp[u'to'].startswith(tmp[u'from']) and str(e) == 'unknown language code "%s"' % tmp[u'from']: + skips.append(tmp[u'to']) + else: + sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e))) + continue + # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags + if to_country == "AnyCountry" and from_country != to_country: + to_country = from_country + if to_script == "AnyScript" and from_script != to_script: + to_script = from_script + + yield ((from_language, from_script, from_country), + (to_language, to_script, to_country)) + if skips: + wrappedwarn(err, 'skipping likelySubtags (for unknown language codes): ', skips) + +def usage(err, name, message = ''): + err.write("""Usage: {} <path-to-cldr-main> [out-file.xml] +""".format(name)) # TODO: expand + if message: + err.write('\n' + message + '\n') + +def main(args, out, err): + name = args.pop(0) + + if len(args) < 1: + usage(err, name) + return 1 + + global cldr_dir + cldr_dir = args.pop(0) + if not os.path.isdir(cldr_dir): + usage(err, name, 'Where did you unpack the CLDR data files ?') + return 1 + + if len(args) > 1: + usage(err, name, 'Too many arguments passed') + return 1 + if args: + qxml = open(args.pop(0), 'w') + else: + qxml = out - try: - from_language, from_script, from_country = _parseLocale(tmp[u"from"]) - to_language, to_script, to_country = _parseLocale(tmp[u"to"]) - except xpathlite.Error as e: - if tmp[u'to'].startswith(tmp[u'from']) and str(e) == 'unknown language code "%s"' % tmp[u'from']: - skips.append(tmp[u'to']) - else: - sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e))) - continue - # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags - if to_country == "AnyCountry" and from_country != to_country: - to_country = from_country - if to_script == "AnyScript" and from_script != to_script: - to_script = from_script - - print " <likelySubtag>" - print " <from>" - print " <language>" + from_language + "</language>" - print " <script>" + from_script + "</script>" - print " <country>" + from_country + "</country>" - print " </from>" - print " <to>" - print " <language>" + to_language + "</language>" - print " <script>" + to_script + "</script>" - print " <country>" + to_country + "</country>" - print " </to>" - print " </likelySubtag>" -print " </likelySubtags>" -if skips: - wrappedwarn('skipping likelySubtags (for unknown language codes): ', skips) -print " <localeList>" - -Locale.C(calendars).toXml(calendars) -for key in locale_keys: - locale_database[key].toXml(calendars) - -print " </localeList>" -print "</localeDatabase>" + getNumberSystems(cldr_dir) + cldr_files = os.listdir(cldr_dir) + locale_database = {} + + # see http://www.unicode.org/reports/tr35/tr35-info.html#Default_Content + defaultContent_locales = [] + for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental', + 'supplementalMetadata.xml'), + 'metadata/defaultContent'): + for data in ns[1:][0]: + if data[0] == u"locales": + defaultContent_locales += data[1].split() + + skips = [] + for file in defaultContent_locales: + try: + language_code, script_code, country_code = splitLocale(file) + except ValueError: + sys.stderr.write('skipping defaultContent locale "' + file + '" [neither two nor three tags]\n') + continue + + if not (script_code or country_code): + sys.stderr.write('skipping defaultContent locale "' + file + '" [second tag is neither script nor territory]\n') + continue + + try: + l = _generateLocaleInfo(cldr_dir + "/" + file + ".xml", language_code, script_code, country_code) + if not l: + skips.append(file) + continue + except xpathlite.Error as e: + sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, str(e))) + continue + + locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l + + if skips: + wrappedwarn(err, 'skipping defaultContent locales [no locale info generated]: ', skips) + skips = [] + + for file in cldr_files: + try: + l = generateLocaleInfo(cldr_dir + "/" + file) + if not l: + skips.append(file) + continue + except xpathlite.Error as e: + sys.stderr.write('skipping file "{}" ({})\n'.format(file, str(e))) + continue + + locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l + + if skips: + wrappedwarn(err, 'skipping files [no locale info generated]: ', skips) + + integrateWeekData(cldr_dir + "/../supplemental/supplementalData.xml", locale_database) + cldr_version = 'unknown' + with open(cldr_dir+"/../dtd/ldml.dtd", "r") as ldml: + for line in ldml: + if 'version cldrVersion CDATA #FIXED' in line: + cldr_version = line.split('"')[1] + + xmlOut = QLocaleXmlWriter(qxml.write) + xmlOut.version(cldr_version) + xmlOut.enumData(enumdata.language_list, + enumdata.script_list, + enumdata.country_list) + xmlOut.likelySubTags(likelySubtags(os.path.split(cldr_dir)[0], err)) + xmlOut.locales(locale_database, calendars) + xmlOut.close() + if qxml is not out: + qxml.close() + + return 0 + +if __name__ == '__main__': + import sys + sys.exit(main(sys.argv, sys.stdout, sys.stderr)) |