From da90a3a490b452154391676857ac2ffc269d4843 Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Mon, 19 Nov 2012 19:12:58 +0200 Subject: QLocale: replace hard-coded default country-for-language map ...with a generated one in a way similar to what http://www.unicode.org/reports/tr35/#Likely_Subtags suggests. The supplemental/likelySubtags.xml contains all the required data. This changes some default countries to a most-expected ones. Change-Id: I920a5623601d8661a943e78197d3bcc838191483 Reviewed-by: Lars Knoll --- util/local_database/cldr2qlocalexml.py | 175 ++++++++++++++------------------- 1 file changed, 72 insertions(+), 103 deletions(-) (limited to 'util/local_database/cldr2qlocalexml.py') diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py index 2ad41cbec9..b5c4a79d3c 100755 --- a/util/local_database/cldr2qlocalexml.py +++ b/util/local_database/cldr2qlocalexml.py @@ -607,109 +607,78 @@ for id in enumdata.country_list: print " " print " " -print \ -" \n\ - \n\ - Afrikaans\n\ - SouthAfrica\n\ - \n\ - \n\ - Afan\n\ - Ethiopia\n\ - \n\ - \n\ - Afar\n\ - Djibouti\n\ - \n\ - \n\ - Arabic\n\ - SaudiArabia\n\ - \n\ - \n\ - Chinese\n\ - China\n\ - \n\ - \n\ - Dutch\n\ - Netherlands\n\ - \n\ - \n\ - English\n\ - UnitedStates\n\ - \n\ - \n\ - French\n\ - France\n\ - \n\ - \n\ - German\n\ - Germany\n\ - \n\ - \n\ - Greek\n\ - Greece\n\ - \n\ - \n\ - Italian\n\ - Italy\n\ - \n\ - \n\ - Malay\n\ - Malaysia\n\ - \n\ - \n\ - Portuguese\n\ - Portugal\n\ - \n\ - \n\ - Russian\n\ - RussianFederation\n\ - \n\ - \n\ - Serbian\n\ - SerbiaAndMontenegro\n\ - \n\ - \n\ - SerboCroatian\n\ - SerbiaAndMontenegro\n\ - \n\ - \n\ - Somali\n\ - Somalia\n\ - \n\ - \n\ - Spanish\n\ - Spain\n\ - \n\ - \n\ - Swahili\n\ - Kenya\n\ - \n\ - \n\ - Swedish\n\ - Sweden\n\ - \n\ - \n\ - Tigrinya\n\ - Eritrea\n\ - \n\ - \n\ - Uzbek\n\ - Uzbekistan\n\ - \n\ - \n\ - Persian\n\ - Iran\n\ - \n\ - \n\ - Mongolian\n\ - Mongolia\n\ - \n\ - \n\ - Nepali\n\ - Nepal\n\ - \n\ - " +def _parseLocale(l): + language = "AnyLanguage" + script = "AnyScript" + country = "AnyCountry" + + if l == "und": # we are treating unknown locale like C + return (None, None, None) + + items = l.split("_") + language_code = items[0] + if language_code != "und": + language_id = enumdata.languageCodeToId(language_code) + if language_id == -1: + sys.stderr.write("unknown language code \"" + language_code + "\"\n") + return (None, None, None) + language = enumdata.language_list[language_id][0] + + if len(items) > 1: + script_code = items[1] + country_code = "" + if len(items) > 2: + country_code = items[2] + if len(script_code) == 4: + script_id = enumdata.scriptCodeToId(script_code) + if script_id == -1: + sys.stderr.write("unknown script code \"" + script_code + "\"\n") + return (None, None, None) + script = enumdata.script_list[script_id][0] + else: + country_code = script_code + if country_code: + country_id = enumdata.countryCodeToId(country_code) + if country_id == -1: + sys.stderr.write("unknown country code \"" + country_code + "\"\n") + return (None, None, None) + country = enumdata.country_list[country_id][0] + + return (language, script, country) + +print " " +for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likelySubtags"): + tmp = {} + for data in ns[1:][0]: # ns looks like this: [u'likelySubtag', [(u'from', u'aa'), (u'to', u'aa_Latn_ET')]] + tmp[data[0]] = data[1] + + (from_language, from_script, from_country) = _parseLocale(tmp[u"from"]) + if not from_language: + sys.stderr.write("skipping likelySubtag " + tmp[u"from"] + " -> " + tmp[u"to"] + "\n") + continue + (to_language, to_script, to_country) = _parseLocale(tmp[u"to"]) + if not to_language: + sys.stderr.write("skipping likelySubtag " + tmp[u"from"] + " -> " + tmp[u"to"] + "\n") + continue + # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags + if to_country == "AnyCountry" and from_country != to_country: + to_country = from_country + if to_script == "AnyScript" and from_script != to_script: + to_script = from_script + + print " " + print " " + print " " + from_language + "" + print " " + print " " + from_country + "" + print " " + print " " + print " " + to_language + "" + print " " + print " " + to_country + "" + print " " + print " " +print " " print " " print \ -- cgit v1.2.3