summaryrefslogtreecommitdiffstats
path: root/util/local_database
diff options
context:
space:
mode:
Diffstat (limited to 'util/local_database')
-rwxr-xr-xutil/local_database/cldr2qlocalexml.py175
-rwxr-xr-xutil/local_database/qlocalexml2cpp.py34
2 files changed, 98 insertions, 111 deletions
diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py
index 2ad41cbec9..b5c4a79d3c 100755
--- a/util/local_database/cldr2qlocalexml.py
+++ b/util/local_database/cldr2qlocalexml.py
@@ -607,109 +607,78 @@ for id in enumdata.country_list:
print " </country>"
print " </countryList>"
-print \
-" <defaultCountryList>\n\
- <defaultCountry>\n\
- <language>Afrikaans</language>\n\
- <country>SouthAfrica</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Afan</language>\n\
- <country>Ethiopia</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Afar</language>\n\
- <country>Djibouti</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Arabic</language>\n\
- <country>SaudiArabia</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Chinese</language>\n\
- <country>China</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Dutch</language>\n\
- <country>Netherlands</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>English</language>\n\
- <country>UnitedStates</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>French</language>\n\
- <country>France</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>German</language>\n\
- <country>Germany</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Greek</language>\n\
- <country>Greece</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Italian</language>\n\
- <country>Italy</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Malay</language>\n\
- <country>Malaysia</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Portuguese</language>\n\
- <country>Portugal</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Russian</language>\n\
- <country>RussianFederation</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Serbian</language>\n\
- <country>SerbiaAndMontenegro</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>SerboCroatian</language>\n\
- <country>SerbiaAndMontenegro</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Somali</language>\n\
- <country>Somalia</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Spanish</language>\n\
- <country>Spain</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Swahili</language>\n\
- <country>Kenya</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Swedish</language>\n\
- <country>Sweden</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Tigrinya</language>\n\
- <country>Eritrea</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Uzbek</language>\n\
- <country>Uzbekistan</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Persian</language>\n\
- <country>Iran</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Mongolian</language>\n\
- <country>Mongolia</country>\n\
- </defaultCountry>\n\
- <defaultCountry>\n\
- <language>Nepali</language>\n\
- <country>Nepal</country>\n\
- </defaultCountry>\n\
- </defaultCountryList>"
+def _parseLocale(l):
+ language = "AnyLanguage"
+ script = "AnyScript"
+ country = "AnyCountry"
+
+ if l == "und": # we are treating unknown locale like C
+ return (None, None, None)
+
+ items = l.split("_")
+ language_code = items[0]
+ if language_code != "und":
+ language_id = enumdata.languageCodeToId(language_code)
+ if language_id == -1:
+ sys.stderr.write("unknown language code \"" + language_code + "\"\n")
+ return (None, None, None)
+ language = enumdata.language_list[language_id][0]
+
+ if len(items) > 1:
+ script_code = items[1]
+ country_code = ""
+ if len(items) > 2:
+ country_code = items[2]
+ if len(script_code) == 4:
+ script_id = enumdata.scriptCodeToId(script_code)
+ if script_id == -1:
+ sys.stderr.write("unknown script code \"" + script_code + "\"\n")
+ return (None, None, None)
+ script = enumdata.script_list[script_id][0]
+ else:
+ country_code = script_code
+ if country_code:
+ country_id = enumdata.countryCodeToId(country_code)
+ if country_id == -1:
+ sys.stderr.write("unknown country code \"" + country_code + "\"\n")
+ return (None, None, None)
+ country = enumdata.country_list[country_id][0]
+
+ return (language, script, country)
+
+print " <likelySubtags>"
+for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likelySubtags"):
+ tmp = {}
+ for data in ns[1:][0]: # ns looks like this: [u'likelySubtag', [(u'from', u'aa'), (u'to', u'aa_Latn_ET')]]
+ tmp[data[0]] = data[1]
+
+ (from_language, from_script, from_country) = _parseLocale(tmp[u"from"])
+ if not from_language:
+ sys.stderr.write("skipping likelySubtag " + tmp[u"from"] + " -> " + tmp[u"to"] + "\n")
+ continue
+ (to_language, to_script, to_country) = _parseLocale(tmp[u"to"])
+ if not to_language:
+ sys.stderr.write("skipping likelySubtag " + tmp[u"from"] + " -> " + tmp[u"to"] + "\n")
+ continue
+ # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
+ if to_country == "AnyCountry" and from_country != to_country:
+ to_country = from_country
+ if to_script == "AnyScript" and from_script != to_script:
+ to_script = from_script
+
+ print " <likelySubtag>"
+ print " <from>"
+ print " <language>" + from_language + "</language>"
+ print " <script>" + from_script + "</script>"
+ print " <country>" + from_country + "</country>"
+ print " </from>"
+ print " <to>"
+ print " <language>" + to_language + "</language>"
+ print " <script>" + to_script + "</script>"
+ print " <country>" + to_country + "</country>"
+ print " </to>"
+ print " </likelySubtag>"
+print " </likelySubtags>"
print " <localeList>"
print \
diff --git a/util/local_database/qlocalexml2cpp.py b/util/local_database/qlocalexml2cpp.py
index cb243652fc..b225d4fba5 100755
--- a/util/local_database/qlocalexml2cpp.py
+++ b/util/local_database/qlocalexml2cpp.py
@@ -129,16 +129,29 @@ def loadCountryMap(doc):
return result
-def loadDefaultMap(doc):
+def loadLikelySubtagsMap(doc):
result = {}
- list_elt = firstChildElt(doc.documentElement, "defaultCountryList")
- elt = firstChildElt(list_elt, "defaultCountry")
+ i = 0
+ list_elt = firstChildElt(doc.documentElement, "likelySubtags")
+ elt = firstChildElt(list_elt, "likelySubtag")
while elt:
- country = eltText(firstChildElt(elt, "country"));
- language = eltText(firstChildElt(elt, "language"));
- result[language] = country;
- elt = nextSiblingElt(elt, "defaultCountry");
+ elt_from = firstChildElt(elt, "from")
+ from_language = eltText(firstChildElt(elt_from, "language"));
+ from_script = eltText(firstChildElt(elt_from, "script"));
+ from_country = eltText(firstChildElt(elt_from, "country"));
+
+ elt_to = firstChildElt(elt, "to")
+ to_language = eltText(firstChildElt(elt_to, "language"));
+ to_script = eltText(firstChildElt(elt_to, "script"));
+ to_country = eltText(firstChildElt(elt_to, "country"));
+
+ tmp = {}
+ tmp["from"] = (from_language, from_script, from_country)
+ tmp["to"] = (to_language, to_script, to_country)
+ result[i] = tmp;
+ i += 1
+ elt = nextSiblingElt(elt, "likelySubtag");
return result
def fixedScriptName(name, dupes):
@@ -459,7 +472,12 @@ def main():
language_map = loadLanguageMap(doc)
script_map = loadScriptMap(doc)
country_map = loadCountryMap(doc)
- default_map = loadDefaultMap(doc)
+ likely_subtags_map = loadLikelySubtagsMap(doc)
+ default_map = {}
+ for key in likely_subtags_map.keys():
+ tmp = likely_subtags_map[key]
+ if tmp["from"][2] == "AnyCountry" and tmp["to"][2] != "AnyCountry" and tmp["from"][1] == "AnyScript":
+ default_map[tmp["to"][0]] = tmp["to"][2]
locale_map = loadLocaleMap(doc, language_map, script_map, country_map)
dupes = findDupes(language_map, country_map)