diff options
author | Edward Welbourne <edward.welbourne@qt.io> | 2019-05-08 15:20:30 +0200 |
---|---|---|
committer | Edward Welbourne <edward.welbourne@qt.io> | 2019-05-20 20:42:11 +0200 |
commit | b7d8169f02c43f2726eae46e20b8ccb6e8b18ee4 (patch) | |
tree | 26261c13f50322e6d8b3716743835121d93ba23b /util | |
parent | 248b6756da0d31c58672c0e356c3ec16e9088234 (diff) |
Suggest name, when available, for unknown codes
When parsing the CLDR data, we only handle language, script and
territory (which we call country) codes if they are known to our
enumdata.py tables. When reporting the rest as unknown, in the
content of an actual locale definition (not the likely subtag data),
check whether en.xml can resolve the code for us; if it can, report
the full name it provides, as a hint to whoever's running the script
that an update to enumdata.py may be in order.
Change-Id: I9ca1d6922a91d45bc436f4b622e5557261897d7f
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
Diffstat (limited to 'util')
-rwxr-xr-x | util/locale_database/cldr2qlocalexml.py | 34 | ||||
-rw-r--r-- | util/locale_database/xpathlite.py | 28 |
2 files changed, 57 insertions, 5 deletions
diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py index 4ce0a6e3b1..d75ef282f9 100755 --- a/util/locale_database/cldr2qlocalexml.py +++ b/util/locale_database/cldr2qlocalexml.py @@ -95,6 +95,34 @@ def parse_number_format(patterns, data): result.append(pattern) return result +def raiseUnknownCode(code, form, cache={}): + """Check whether an unknown code could be supported. + + We declare a language, script or country code unknown if it's not + known to enumdata.py; however, if it's present in main/en.xml's + mapping of codes to names, we have the option of adding support. + This caches the necessary look-up (so we only read main/en.xml + once) and returns the name we should use if we do add support. + + First parameter, code, is the unknown code. Second parameter, + form, is one of 'language', 'script' or 'country' to select the + type of code to look up. Do not pass further parameters (the next + will deprive you of the cache). + + Raises xpathlite.Error with a suitable message, that includes the + unknown code's full name if found. + + Relies on global cldr_dir being set before it's called; see tail + of this file. + """ + if not cache: + cache.update(xpathlite.codeMapsFromFile(os.path.join(cldr_dir, 'en.xml'))) + name = cache[form].get(code) + msg = 'unknown %s code "%s"' % (form, code) + if name: + msg += ' - could use "%s"' % name + raise xpathlite.Error(msg) + def parse_list_pattern_part_format(pattern): # This is a very limited parsing of the format for list pattern part only. return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3") @@ -193,18 +221,18 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ language_id = enumdata.languageCodeToId(language_code) if language_id <= 0: - raise xpathlite.Error('unknown language code "%s"' % language_code) + raiseUnknownCode(language_code, 'language') script_id = enumdata.scriptCodeToId(script_code) if script_id == -1: - raise xpathlite.Error('unknown script code "%s"' % script_code) + raiseUnknownCode(script_code, 'script') # we should handle fully qualified names with the territory if not country_code: return {} country_id = enumdata.countryCodeToId(country_code) if country_id <= 0: - raise xpathlite.Error('unknown country code "%s"' % country_code) + raiseUnknownCode(country_code, 'country') # So we say we accept only those values that have "contributed" or # "approved" resolution. see http://www.unicode.org/cldr/process.html diff --git a/util/locale_database/xpathlite.py b/util/locale_database/xpathlite.py index 218135d7a7..97efaaab41 100644 --- a/util/locale_database/xpathlite.py +++ b/util/locale_database/xpathlite.py @@ -78,14 +78,38 @@ def findChild(parent, tag_name, arg_name=None, arg_value=None, draft=None): return node return False +def codeMapsFromFile(file): + """Extract mappings of language, script and country codes to names. + + The file shall typically be common/main/en.xml, which contains a + localeDisplayNames element with children languages, scripts and + territories; each element in each of these has a code as its type + attribute and its name as element content. This returns a mapping + withe keys 'language', 'script' and 'country', each of which + has, as value, a mapping of the relevant codes to names. + """ + parent = findChild(findChild(parseDoc(file), 'ldml'), 'localeDisplayNames') + keys, result = {'languages': 'language', 'scripts': 'script', 'territories': 'country'}, {} + for src, dst in keys.items(): + child = findChild(parent, src) + data = result[dst] = {} + for elt in child.childNodes: + if elt.attributes and elt.attributes.has_key('type'): + key, value = elt.attributes['type'].value, elt.childNodes[0].wholeText + # Don't over-write previously-read data for an alt form: + if elt.attributes.has_key('alt') and data.has_key(key): + continue + data[key] = value + + return result + def findTagsInFile(file, path): doc = parseDoc(file) elt = doc.documentElement tag_spec_list = path.split("/") last_entry = None - for i in range(len(tag_spec_list)): - tag_spec = tag_spec_list[i] + for tag_spec in tag_spec_list: tag_name = tag_spec arg_name = 'type' arg_value = '' |