summaryrefslogtreecommitdiffstats
path: root/util
diff options
context:
space:
mode:
authorEdward Welbourne <edward.welbourne@qt.io>2019-05-08 15:20:30 +0200
committerEdward Welbourne <edward.welbourne@qt.io>2019-05-20 20:42:11 +0200
commitb7d8169f02c43f2726eae46e20b8ccb6e8b18ee4 (patch)
tree26261c13f50322e6d8b3716743835121d93ba23b /util
parent248b6756da0d31c58672c0e356c3ec16e9088234 (diff)
Suggest name, when available, for unknown codes
When parsing the CLDR data, we only handle language, script and territory (which we call country) codes if they are known to our enumdata.py tables. When reporting the rest as unknown, in the content of an actual locale definition (not the likely subtag data), check whether en.xml can resolve the code for us; if it can, report the full name it provides, as a hint to whoever's running the script that an update to enumdata.py may be in order. Change-Id: I9ca1d6922a91d45bc436f4b622e5557261897d7f Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> Reviewed-by: Lars Knoll <lars.knoll@qt.io> Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
Diffstat (limited to 'util')
-rwxr-xr-xutil/locale_database/cldr2qlocalexml.py34
-rw-r--r--util/locale_database/xpathlite.py28
2 files changed, 57 insertions, 5 deletions
diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py
index 4ce0a6e3b1..d75ef282f9 100755
--- a/util/locale_database/cldr2qlocalexml.py
+++ b/util/locale_database/cldr2qlocalexml.py
@@ -95,6 +95,34 @@ def parse_number_format(patterns, data):
result.append(pattern)
return result
+def raiseUnknownCode(code, form, cache={}):
+ """Check whether an unknown code could be supported.
+
+ We declare a language, script or country code unknown if it's not
+ known to enumdata.py; however, if it's present in main/en.xml's
+ mapping of codes to names, we have the option of adding support.
+ This caches the necessary look-up (so we only read main/en.xml
+ once) and returns the name we should use if we do add support.
+
+ First parameter, code, is the unknown code. Second parameter,
+ form, is one of 'language', 'script' or 'country' to select the
+ type of code to look up. Do not pass further parameters (the next
+ will deprive you of the cache).
+
+ Raises xpathlite.Error with a suitable message, that includes the
+ unknown code's full name if found.
+
+ Relies on global cldr_dir being set before it's called; see tail
+ of this file.
+ """
+ if not cache:
+ cache.update(xpathlite.codeMapsFromFile(os.path.join(cldr_dir, 'en.xml')))
+ name = cache[form].get(code)
+ msg = 'unknown %s code "%s"' % (form, code)
+ if name:
+ msg += ' - could use "%s"' % name
+ raise xpathlite.Error(msg)
+
def parse_list_pattern_part_format(pattern):
# This is a very limited parsing of the format for list pattern part only.
return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3")
@@ -193,18 +221,18 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
language_id = enumdata.languageCodeToId(language_code)
if language_id <= 0:
- raise xpathlite.Error('unknown language code "%s"' % language_code)
+ raiseUnknownCode(language_code, 'language')
script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1:
- raise xpathlite.Error('unknown script code "%s"' % script_code)
+ raiseUnknownCode(script_code, 'script')
# we should handle fully qualified names with the territory
if not country_code:
return {}
country_id = enumdata.countryCodeToId(country_code)
if country_id <= 0:
- raise xpathlite.Error('unknown country code "%s"' % country_code)
+ raiseUnknownCode(country_code, 'country')
# So we say we accept only those values that have "contributed" or
# "approved" resolution. see http://www.unicode.org/cldr/process.html
diff --git a/util/locale_database/xpathlite.py b/util/locale_database/xpathlite.py
index 218135d7a7..97efaaab41 100644
--- a/util/locale_database/xpathlite.py
+++ b/util/locale_database/xpathlite.py
@@ -78,14 +78,38 @@ def findChild(parent, tag_name, arg_name=None, arg_value=None, draft=None):
return node
return False
+def codeMapsFromFile(file):
+ """Extract mappings of language, script and country codes to names.
+
+ The file shall typically be common/main/en.xml, which contains a
+ localeDisplayNames element with children languages, scripts and
+ territories; each element in each of these has a code as its type
+ attribute and its name as element content. This returns a mapping
+ withe keys 'language', 'script' and 'country', each of which
+ has, as value, a mapping of the relevant codes to names.
+ """
+ parent = findChild(findChild(parseDoc(file), 'ldml'), 'localeDisplayNames')
+ keys, result = {'languages': 'language', 'scripts': 'script', 'territories': 'country'}, {}
+ for src, dst in keys.items():
+ child = findChild(parent, src)
+ data = result[dst] = {}
+ for elt in child.childNodes:
+ if elt.attributes and elt.attributes.has_key('type'):
+ key, value = elt.attributes['type'].value, elt.childNodes[0].wholeText
+ # Don't over-write previously-read data for an alt form:
+ if elt.attributes.has_key('alt') and data.has_key(key):
+ continue
+ data[key] = value
+
+ return result
+
def findTagsInFile(file, path):
doc = parseDoc(file)
elt = doc.documentElement
tag_spec_list = path.split("/")
last_entry = None
- for i in range(len(tag_spec_list)):
- tag_spec = tag_spec_list[i]
+ for tag_spec in tag_spec_list:
tag_name = tag_spec
arg_name = 'type'
arg_value = ''