diff options
author | Konstantin Ritt <ritt.ks@gmail.com> | 2012-11-19 19:12:58 +0200 |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2012-11-19 21:18:03 +0100 |
commit | da90a3a490b452154391676857ac2ffc269d4843 (patch) | |
tree | 9671ccef3d69ea168425d703c08ead4809bc5f4c /util | |
parent | b256c47d625c519a88fe9dd96611015445776434 (diff) |
QLocale: replace hard-coded default country-for-language map
...with a generated one in a way similar to what
http://www.unicode.org/reports/tr35/#Likely_Subtags suggests.
The supplemental/likelySubtags.xml contains all the required data.
This changes some default countries to a most-expected ones.
Change-Id: I920a5623601d8661a943e78197d3bcc838191483
Reviewed-by: Lars Knoll <lars.knoll@digia.com>
Diffstat (limited to 'util')
-rwxr-xr-x | util/local_database/cldr2qlocalexml.py | 175 | ||||
-rwxr-xr-x | util/local_database/qlocalexml2cpp.py | 34 |
2 files changed, 98 insertions, 111 deletions
diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py index 2ad41cbec9..b5c4a79d3c 100755 --- a/util/local_database/cldr2qlocalexml.py +++ b/util/local_database/cldr2qlocalexml.py @@ -607,109 +607,78 @@ for id in enumdata.country_list: print " </country>" print " </countryList>" -print \ -" <defaultCountryList>\n\ - <defaultCountry>\n\ - <language>Afrikaans</language>\n\ - <country>SouthAfrica</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Afan</language>\n\ - <country>Ethiopia</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Afar</language>\n\ - <country>Djibouti</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Arabic</language>\n\ - <country>SaudiArabia</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Chinese</language>\n\ - <country>China</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Dutch</language>\n\ - <country>Netherlands</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>English</language>\n\ - <country>UnitedStates</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>French</language>\n\ - <country>France</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>German</language>\n\ - <country>Germany</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Greek</language>\n\ - <country>Greece</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Italian</language>\n\ - <country>Italy</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Malay</language>\n\ - <country>Malaysia</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Portuguese</language>\n\ - <country>Portugal</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Russian</language>\n\ - <country>RussianFederation</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Serbian</language>\n\ - <country>SerbiaAndMontenegro</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>SerboCroatian</language>\n\ - <country>SerbiaAndMontenegro</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Somali</language>\n\ - <country>Somalia</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Spanish</language>\n\ - <country>Spain</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Swahili</language>\n\ - <country>Kenya</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Swedish</language>\n\ - <country>Sweden</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Tigrinya</language>\n\ - <country>Eritrea</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Uzbek</language>\n\ - <country>Uzbekistan</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Persian</language>\n\ - <country>Iran</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Mongolian</language>\n\ - <country>Mongolia</country>\n\ - </defaultCountry>\n\ - <defaultCountry>\n\ - <language>Nepali</language>\n\ - <country>Nepal</country>\n\ - </defaultCountry>\n\ - </defaultCountryList>" +def _parseLocale(l): + language = "AnyLanguage" + script = "AnyScript" + country = "AnyCountry" + + if l == "und": # we are treating unknown locale like C + return (None, None, None) + + items = l.split("_") + language_code = items[0] + if language_code != "und": + language_id = enumdata.languageCodeToId(language_code) + if language_id == -1: + sys.stderr.write("unknown language code \"" + language_code + "\"\n") + return (None, None, None) + language = enumdata.language_list[language_id][0] + + if len(items) > 1: + script_code = items[1] + country_code = "" + if len(items) > 2: + country_code = items[2] + if len(script_code) == 4: + script_id = enumdata.scriptCodeToId(script_code) + if script_id == -1: + sys.stderr.write("unknown script code \"" + script_code + "\"\n") + return (None, None, None) + script = enumdata.script_list[script_id][0] + else: + country_code = script_code + if country_code: + country_id = enumdata.countryCodeToId(country_code) + if country_id == -1: + sys.stderr.write("unknown country code \"" + country_code + "\"\n") + return (None, None, None) + country = enumdata.country_list[country_id][0] + + return (language, script, country) + +print " <likelySubtags>" +for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likelySubtags"): + tmp = {} + for data in ns[1:][0]: # ns looks like this: [u'likelySubtag', [(u'from', u'aa'), (u'to', u'aa_Latn_ET')]] + tmp[data[0]] = data[1] + + (from_language, from_script, from_country) = _parseLocale(tmp[u"from"]) + if not from_language: + sys.stderr.write("skipping likelySubtag " + tmp[u"from"] + " -> " + tmp[u"to"] + "\n") + continue + (to_language, to_script, to_country) = _parseLocale(tmp[u"to"]) + if not to_language: + sys.stderr.write("skipping likelySubtag " + tmp[u"from"] + " -> " + tmp[u"to"] + "\n") + continue + # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags + if to_country == "AnyCountry" and from_country != to_country: + to_country = from_country + if to_script == "AnyScript" and from_script != to_script: + to_script = from_script + + print " <likelySubtag>" + print " <from>" + print " <language>" + from_language + "</language>" + print " <script>" + from_script + "</script>" + print " <country>" + from_country + "</country>" + print " </from>" + print " <to>" + print " <language>" + to_language + "</language>" + print " <script>" + to_script + "</script>" + print " <country>" + to_country + "</country>" + print " </to>" + print " </likelySubtag>" +print " </likelySubtags>" print " <localeList>" print \ diff --git a/util/local_database/qlocalexml2cpp.py b/util/local_database/qlocalexml2cpp.py index cb243652fc..b225d4fba5 100755 --- a/util/local_database/qlocalexml2cpp.py +++ b/util/local_database/qlocalexml2cpp.py @@ -129,16 +129,29 @@ def loadCountryMap(doc): return result -def loadDefaultMap(doc): +def loadLikelySubtagsMap(doc): result = {} - list_elt = firstChildElt(doc.documentElement, "defaultCountryList") - elt = firstChildElt(list_elt, "defaultCountry") + i = 0 + list_elt = firstChildElt(doc.documentElement, "likelySubtags") + elt = firstChildElt(list_elt, "likelySubtag") while elt: - country = eltText(firstChildElt(elt, "country")); - language = eltText(firstChildElt(elt, "language")); - result[language] = country; - elt = nextSiblingElt(elt, "defaultCountry"); + elt_from = firstChildElt(elt, "from") + from_language = eltText(firstChildElt(elt_from, "language")); + from_script = eltText(firstChildElt(elt_from, "script")); + from_country = eltText(firstChildElt(elt_from, "country")); + + elt_to = firstChildElt(elt, "to") + to_language = eltText(firstChildElt(elt_to, "language")); + to_script = eltText(firstChildElt(elt_to, "script")); + to_country = eltText(firstChildElt(elt_to, "country")); + + tmp = {} + tmp["from"] = (from_language, from_script, from_country) + tmp["to"] = (to_language, to_script, to_country) + result[i] = tmp; + i += 1 + elt = nextSiblingElt(elt, "likelySubtag"); return result def fixedScriptName(name, dupes): @@ -459,7 +472,12 @@ def main(): language_map = loadLanguageMap(doc) script_map = loadScriptMap(doc) country_map = loadCountryMap(doc) - default_map = loadDefaultMap(doc) + likely_subtags_map = loadLikelySubtagsMap(doc) + default_map = {} + for key in likely_subtags_map.keys(): + tmp = likely_subtags_map[key] + if tmp["from"][2] == "AnyCountry" and tmp["to"][2] != "AnyCountry" and tmp["from"][1] == "AnyScript": + default_map[tmp["to"][0]] = tmp["to"][2] locale_map = loadLocaleMap(doc, language_map, script_map, country_map) dupes = findDupes(language_map, country_map) |