2 files changed, 57 insertions, 5 deletions
diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py
index 4ce0a6e3b1..d75ef282f9 100755
--- a/util/locale_database/cldr2qlocalexml.py
+++ b/util/locale_database/cldr2qlocalexml.py
@@ -95,6 +95,34 @@ def parse_number_format(patterns, data):
         result.append(pattern)
     return result
 
+def raiseUnknownCode(code, form, cache={}):
+    """Check whether an unknown code could be supported.
+
+    We declare a language, script or country code unknown if it's not
+    known to enumdata.py; however, if it's present in main/en.xml's
+    mapping of codes to names, we have the option of adding support.
+    This caches the necessary look-up (so we only read main/en.xml
+    once) and returns the name we should use if we do add support.
+
+    First parameter, code, is the unknown code.  Second parameter,
+    form, is one of 'language', 'script' or 'country' to select the
+    type of code to look up.  Do not pass further parameters (the next
+    will deprive you of the cache).
+
+    Raises xpathlite.Error with a suitable message, that includes the
+    unknown code's full name if found.
+
+    Relies on global cldr_dir being set before it's called; see tail
+    of this file.
+    """
+    if not cache:
+        cache.update(xpathlite.codeMapsFromFile(os.path.join(cldr_dir, 'en.xml')))
+    name = cache[form].get(code)
+    msg = 'unknown %s code "%s"' % (form, code)
+    if name:
+        msg += ' - could use "%s"' % name
+    raise xpathlite.Error(msg)
+
 def parse_list_pattern_part_format(pattern):
     # This is a very limited parsing of the format for list pattern part only.
     return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3")
@@ -193,18 +221,18 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
 
     language_id = enumdata.languageCodeToId(language_code)
     if language_id <= 0:
-        raise xpathlite.Error('unknown language code "%s"' % language_code)
+        raiseUnknownCode(language_code, 'language')
 
     script_id = enumdata.scriptCodeToId(script_code)
     if script_id == -1:
-        raise xpathlite.Error('unknown script code "%s"' % script_code)
+        raiseUnknownCode(script_code, 'script')
 
     # we should handle fully qualified names with the territory
     if not country_code:
         return {}
     country_id = enumdata.countryCodeToId(country_code)
     if country_id <= 0:
-        raise xpathlite.Error('unknown country code "%s"' % country_code)
+        raiseUnknownCode(country_code, 'country')
 
     # So we say we accept only those values that have "contributed" or
     # "approved" resolution. see http://www.unicode.org/cldr/process.html
diff --git a/util/locale_database/xpathlite.py b/util/locale_database/xpathlite.py
index 218135d7a7..97efaaab41 100644
--- a/util/locale_database/xpathlite.py
+++ b/util/locale_database/xpathlite.py
@@ -78,14 +78,38 @@ def findChild(parent, tag_name, arg_name=None, arg_value=None, draft=None):
         return node
     return False
 
+def codeMapsFromFile(file):
+    """Extract mappings of language, script and country codes to names.
+
+    The file shall typically be common/main/en.xml, which contains a
+    localeDisplayNames element with children languages, scripts and
+    territories; each element in each of these has a code as its type
+    attribute and its name as element content.  This returns a mapping
+    withe keys 'language', 'script' and 'country', each of which
+    has, as value, a mapping of the relevant codes to names.
+    """
+    parent = findChild(findChild(parseDoc(file), 'ldml'), 'localeDisplayNames')
+    keys, result = {'languages': 'language', 'scripts': 'script', 'territories': 'country'}, {}
+    for src, dst in keys.items():
+        child = findChild(parent, src)
+        data = result[dst] = {}
+        for elt in child.childNodes:
+            if elt.attributes and elt.attributes.has_key('type'):
+                key, value = elt.attributes['type'].value, elt.childNodes[0].wholeText
+                # Don't over-write previously-read data for an alt form:
+                if elt.attributes.has_key('alt') and data.has_key(key):
+                    continue
+                data[key] = value
+
+    return result
+
 def findTagsInFile(file, path):
     doc = parseDoc(file)
 
     elt = doc.documentElement
     tag_spec_list = path.split("/")
     last_entry = None
-    for i in range(len(tag_spec_list)):
-        tag_spec = tag_spec_list[i]
+    for tag_spec in tag_spec_list:
         tag_name = tag_spec
         arg_name = 'type'
         arg_value = ''