diff options
author | Edward Welbourne <edward.welbourne@qt.io> | 2017-05-31 21:42:11 +0200 |
---|---|---|
committer | Edward Welbourne <edward.welbourne@qt.io> | 2017-06-09 08:24:55 +0000 |
commit | 267edbec198a0cedbf7bed4c3c5fa93c1dbc86bd (patch) | |
tree | eda4439aa046a190d7ff72b973761375f78d03fc /util | |
parent | 6a4875f0d17f7ab282eebfd2870918a1abf032a5 (diff) |
Clean up in CLDR scripts for QLocale
Use python more fluently; DRY - use functions to avoid repetition; use
dict-comprehensions; use os.path.join() in preference to arithmetic
with path strings; use elsif to avoid the need for a local variable;
set() can take a generator directly, no need to go via a list; don't
end lines in semicolon (this is python). Test isdir() once instead of
exists() twice on the same name.
Just, generally, use python's feature-set.
Change-Id: Ib114aa016f70b3be09e968d9cfc069b057f49d41
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Diffstat (limited to 'util')
-rwxr-xr-x | util/local_database/cldr2qlocalexml.py | 71 | ||||
-rwxr-xr-x | util/local_database/qlocalexml2cpp.py | 71 |
2 files changed, 57 insertions, 85 deletions
diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py index 2f68b69a9f..fbc28ca712 100755 --- a/util/local_database/cldr2qlocalexml.py +++ b/util/local_database/cldr2qlocalexml.py @@ -46,14 +46,11 @@ import re import enumdata import xpathlite -from xpathlite import DraftResolution +from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile from dateconverter import convert_date from localexml import Locale -findAlias = xpathlite.findAlias -findEntry = xpathlite.findEntry findEntryInFile = xpathlite._findEntryInFile -findTagsInFile = xpathlite.findTagsInFile def parse_number_format(patterns, data): # this is a very limited parsing of the number format for currency only. @@ -86,12 +83,8 @@ def parse_number_format(patterns, data): return result def parse_list_pattern_part_format(pattern): - # this is a very limited parsing of the format for list pattern part only. - result = "" - result = pattern.replace("{0}", "%1") - result = result.replace("{1}", "%2") - result = result.replace("{2}", "%3") - return result + # This is a very limited parsing of the format for list pattern part only. + return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3") def generateLocaleInfo(path): if not path.endswith(".xml"): @@ -102,12 +95,11 @@ def generateLocaleInfo(path): if alias: raise xpathlite.Error('alias to "%s"' % alias) - language_code = findEntryInFile(path, "identity/language", attribute="type")[0] - country_code = findEntryInFile(path, "identity/territory", attribute="type")[0] - script_code = findEntryInFile(path, "identity/script", attribute="type")[0] - variant_code = findEntryInFile(path, "identity/variant", attribute="type")[0] + def code(tag): + return findEntryInFile(path, 'identity/' + tag, attribute="type")[0] - return _generateLocaleInfo(path, language_code, script_code, country_code, variant_code) + return _generateLocaleInfo(path, code('language'), code('script'), + code('territory'), code('variant')) def _generateLocaleInfo(path, language_code, script_code, country_code, variant_code=""): if not path.endswith(".xml"): @@ -126,12 +118,10 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ language_id = enumdata.languageCodeToId(language_code) if language_id <= 0: raise xpathlite.Error('unknown language code "%s"' % language_code) - language = enumdata.language_list[language_id][0] script_id = enumdata.scriptCodeToId(script_code) if script_id == -1: raise xpathlite.Error('unknown script code "%s"' % script_code) - script = enumdata.script_list[script_id][0] # we should handle fully qualified names with the territory if not country_code: @@ -139,7 +129,6 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ country_id = enumdata.countryCodeToId(country_code) if country_id <= 0: raise xpathlite.Error('unknown country code "%s"' % country_code) - country = enumdata.country_list[country_id][0] # So we say we accept only those values that have "contributed" or # "approved" resolution. see http://www.unicode.org/cldr/process.html @@ -147,36 +136,36 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ # compatibility. draft = DraftResolution.contributed - result = {} - result['language'] = language - result['script'] = script - result['country'] = country - result['language_code'] = language_code - result['country_code'] = country_code - result['script_code'] = script_code - result['variant_code'] = variant_code - result['language_id'] = language_id - result['script_id'] = script_id - result['country_id'] = country_id + result = dict( + language=enumdata.language_list[language_id][0], + language_code=language_code, language_id=language_id, + script=enumdata.script_list[script_id][0], + script_code=script_code, script_id=script_id, + country=enumdata.country_list[country_id][0], + country_code=country_code, country_id=country_id, + variant_code=variant_code) (dir_name, file_name) = os.path.split(path) - supplementalPath = dir_name + "/../supplemental/supplementalData.xml" - currencies = findTagsInFile(supplementalPath, "currencyData/region[iso3166=%s]"%country_code); + def from_supplement(tag, + path=os.path.join(dir_name, '..', 'supplemental', + 'supplementalData.xml')): + return findTagsInFile(path, tag) + currencies = from_supplement('currencyData/region[iso3166=%s]' % country_code) result['currencyIsoCode'] = '' result['currencyDigits'] = 2 result['currencyRounding'] = 1 if currencies: for e in currencies: if e[0] == 'currency': - tender = True - t = [x for x in e[1] if x[0] == 'tender'] - if t and t[0][1] == 'false': - tender = False; - if tender and not any(x[0] == 'to' for x in e[1]): + t = [x[1] == 'false' for x in e[1] if x[0] == 'tender'] + if t and t[0]: + pass + elif not any(x[0] == 'to' for x in e[1]): result['currencyIsoCode'] = (x[1] for x in e[1] if x[0] == 'iso4217').next() break if result['currencyIsoCode']: - t = findTagsInFile(supplementalPath, "currencyData/fractions/info[iso4217=%s]"%result['currencyIsoCode']); + t = from_supplement("currencyData/fractions/info[iso4217=%s]" + % result['currencyIsoCode']) if t and t[0][0] == 'info': result['currencyDigits'] = (int(x[1]) for x in t[0][1] if x[0] == 'digits').next() result['currencyRounding'] = (int(x[1]) for x in t[0][1] if x[0] == 'rounding').next() @@ -210,7 +199,9 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ result['percent'] = get_number_in_system(path, "numbers/symbols/percentSign", numbering_system) try: numbering_systems = {} - for ns in findTagsInFile(cldr_dir + "/../supplemental/numberingSystems.xml", "numberingSystems"): + for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental', + 'numberingSystems.xml'), + 'numberingSystems'): tmp = {} id = "" for data in ns[1:][0]: # ns looks like this: [u'numberingSystem', [(u'digits', u'0123456789'), (u'type', u'numeric'), (u'id', u'latn')]] @@ -373,7 +364,9 @@ locale_database = {} # see http://www.unicode.org/reports/tr35/tr35-info.html#Default_Content defaultContent_locales = {} -for ns in findTagsInFile(cldr_dir + "/../supplemental/supplementalMetadata.xml", "metadata/defaultContent"): +for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental', + 'supplementalMetadata.xml'), + 'metadata/defaultContent'): for data in ns[1:][0]: if data[0] == u"locales": defaultContent_locales = data[1].split() diff --git a/util/local_database/qlocalexml2cpp.py b/util/local_database/qlocalexml2cpp.py index 44187ede59..baa5a60263 100755 --- a/util/local_database/qlocalexml2cpp.py +++ b/util/local_database/qlocalexml2cpp.py @@ -77,54 +77,40 @@ def eachEltInGroup(parent, group, key): yield element element = element.nextSibling -def eltText(elt): - result = "" +def eltWords(elt): child = elt.firstChild while child: if child.nodeType == elt.TEXT_NODE: - if result: - result += " " - result += child.nodeValue + yield child.nodeValue child = child.nextSibling - return result + +def firstChildText(elt, key): + return ' '.join(eltWords(firstChildElt(elt, key))) def loadMap(doc, category): - return dict((int(eltText(firstChildElt(element, 'id'))), - (eltText(firstChildElt(element, 'name')), - eltText(firstChildElt(element, 'code')))) + return dict((int(firstChildText(element, 'id')), + (firstChildText(element, 'name'), + firstChildText(element, 'code'))) for element in eachEltInGroup(doc.documentElement, category + 'List', category)) def loadLikelySubtagsMap(doc): - result = {} + def triplet(element, keys=('language', 'script', 'country')): + return tuple(firstChildText(element, key) for key in keys) - i = 0 - for elt in eachEltInGroup(doc.documentElement, "likelySubtags", "likelySubtag"): - elt_from = firstChildElt(elt, "from") - from_language = eltText(firstChildElt(elt_from, "language")); - from_script = eltText(firstChildElt(elt_from, "script")); - from_country = eltText(firstChildElt(elt_from, "country")); - - elt_to = firstChildElt(elt, "to") - to_language = eltText(firstChildElt(elt_to, "language")); - to_script = eltText(firstChildElt(elt_to, "script")); - to_country = eltText(firstChildElt(elt_to, "country")); - - tmp = {} - tmp["from"] = (from_language, from_script, from_country) - tmp["to"] = (to_language, to_script, to_country) - result[i] = tmp; - i += 1 - return result + return dict((i, {'from': triplet(firstChildElt(elt, "from")), + 'to': triplet(firstChildElt(elt, "to"))}) + for i, elt in enumerate(eachEltInGroup(doc.documentElement, + 'likelySubtags', 'likelySubtag'))) def fixedScriptName(name, dupes): # Don't .capitalize() as some names are already camel-case (see enumdata.py): name = ''.join(word[0].upper() + word[1:] for word in name.split()) if name[-6:] != "Script": - name = name + "Script"; + name = name + "Script" if name in dupes: sys.stderr.write("\n\n\nERROR: The script name '%s' is messy" % name) - sys.exit(1); + sys.exit(1) return name def fixedCountryName(name, dupes): @@ -138,8 +124,8 @@ def fixedLanguageName(name, dupes): return name.replace(" ", "") def findDupes(country_map, language_map): - country_set = set([ v[0] for a, v in country_map.iteritems() ]) - language_set = set([ v[0] for a, v in language_map.iteritems() ]) + country_set = set(v[0] for a, v in country_map.iteritems()) + language_set = set(v[0] for a, v in language_map.iteritems()) return country_set & language_set def languageNameToId(name, language_map): @@ -164,7 +150,7 @@ def loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map result = {} for locale_elt in eachEltInGroup(doc.documentElement, "localeList", "locale"): - locale = Locale.fromXmlData(lambda k: eltText(firstChildElt(locale_elt, k))) + locale = Locale.fromXmlData(lambda k: firstChildText(locale_elt, k)) language_id = languageNameToId(locale.language, language_map) if language_id == -1: sys.stderr.write("Cannot find a language id for '%s'\n" % locale.language) @@ -324,8 +310,7 @@ def escapedString(s): return result def printEscapedString(s): - print escapedString(s); - + print escapedString(s) def currencyIsoCodeData(s): if s: @@ -346,13 +331,9 @@ def main(): localexml = sys.argv[1] qtsrcdir = sys.argv[2] - if not os.path.exists(qtsrcdir) or not os.path.exists(qtsrcdir): - usage() - if not os.path.isfile(qtsrcdir + "/src/corelib/tools/qlocale_data_p.h"): - usage() - if not os.path.isfile(qtsrcdir + "/src/corelib/tools/qlocale.h"): - usage() - if not os.path.isfile(qtsrcdir + "/src/corelib/tools/qlocale.qdoc"): + if not (os.path.isdir(qtsrcdir) + and all(os.path.isfile(os.path.join(qtsrcdir, 'src', 'corelib', 'tools', leaf)) + for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))): usage() (data_temp_file, data_temp_file_path) = tempfile.mkstemp("qlocale_data_p", dir=qtsrcdir) @@ -377,7 +358,7 @@ def main(): locale_map = loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map) dupes = findDupes(language_map, country_map) - cldr_version = eltText(firstChildElt(doc.documentElement, "version")) + cldr_version = firstChildText(doc.documentElement, "version") data_temp_file.write(""" /* @@ -455,9 +436,7 @@ def main(): index += count data_temp_file.write("%6d, // %s\n" % (i, language_map[key][0])) data_temp_file.write(" 0 // trailing 0\n") - data_temp_file.write("};\n") - - data_temp_file.write("\n") + data_temp_file.write("};\n\n") list_pattern_part_data = StringData('list_pattern_part_data') date_format_data = StringData('date_format_data') |