#!/usr/bin/env python ############################################################################# ## ## Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies). ## Contact: http://www.qt-project.org/legal ## ## This file is part of the test suite of the Qt Toolkit. ## ## $QT_BEGIN_LICENSE:LGPL$ ## Commercial License Usage ## Licensees holding valid commercial Qt licenses may use this file in ## accordance with the commercial license agreement provided with the ## Software or, alternatively, in accordance with the terms contained in ## a written agreement between you and Digia. For licensing terms and ## conditions see http://qt.digia.com/licensing. For further information ## use the contact form at http://qt.digia.com/contact-us. ## ## GNU Lesser General Public License Usage ## Alternatively, this file may be used under the terms of the GNU Lesser ## General Public License version 2.1 as published by the Free Software ## Foundation and appearing in the file LICENSE.LGPL included in the ## packaging of this file. Please review the following information to ## ensure the GNU Lesser General Public License version 2.1 requirements ## will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. ## ## In addition, as a special exception, Digia gives you certain additional ## rights. These rights are described in the Digia Qt LGPL Exception ## version 1.1, included in the file LGPL_EXCEPTION.txt in this package. ## ## GNU General Public License Usage ## Alternatively, this file may be used under the terms of the GNU ## General Public License version 3.0 as published by the Free Software ## Foundation and appearing in the file LICENSE.GPL included in the ## packaging of this file. Please review the following information to ## ensure the GNU General Public License version 3.0 requirements will be ## met: http://www.gnu.org/copyleft/gpl.html. ## ## ## $QT_END_LICENSE$ ## ############################################################################# import os import sys import enumdata import xpathlite from xpathlite import DraftResolution from dateconverter import convert_date import re findAlias = xpathlite.findAlias findEntry = xpathlite.findEntry findEntryInFile = xpathlite._findEntryInFile findTagsInFile = xpathlite.findTagsInFile def parse_number_format(patterns, data): # this is a very limited parsing of the number format for currency only. def skip_repeating_pattern(x): p = x.replace('0', '#').replace(',', '').replace('.', '') seen = False result = '' for c in p: if c == '#': if seen: continue seen = True else: seen = False result = result + c return result patterns = patterns.split(';') result = [] for pattern in patterns: pattern = skip_repeating_pattern(pattern) pattern = pattern.replace('#', "%1") # according to http://www.unicode.org/reports/tr35/#Number_Format_Patterns # there can be doubled or trippled currency sign, however none of the # locales use that. pattern = pattern.replace(u'\xa4', "%2") pattern = pattern.replace("''", "###").replace("'", '').replace("###", "'") pattern = pattern.replace('-', data['minus']) pattern = pattern.replace('+', data['plus']) result.append(pattern) return result def parse_list_pattern_part_format(pattern): # this is a very limited parsing of the format for list pattern part only. result = "" result = pattern.replace("{0}", "%1") result = result.replace("{1}", "%2") result = result.replace("{2}", "%3") return result def ordStr(c): if len(c) == 1: return str(ord(c)) return "##########" # the following functions are supposed to fix the problem with QLocale # returning a character instead of strings for QLocale::exponential() # and others. So we fallback to default values in these cases. def fixOrdStrExp(c): if len(c) == 1: return str(ord(c)) return str(ord('e')) def fixOrdStrPercent(c): if len(c) == 1: return str(ord(c)) return str(ord('%')) def fixOrdStrList(c): if len(c) == 1: return str(ord(c)) return str(ord(';')) def generateLocaleInfo(path): (dir_name, file_name) = os.path.split(path) if not path.endswith(".xml"): return {} # skip legacy/compatibility ones alias = findAlias(path) if alias: raise xpathlite.Error("alias to \"%s\"" % alias) language_code = findEntryInFile(path, "identity/language", attribute="type")[0] if language_code == 'root': # just skip it return {} country_code = findEntryInFile(path, "identity/territory", attribute="type")[0] script_code = findEntryInFile(path, "identity/script", attribute="type")[0] variant_code = findEntryInFile(path, "identity/variant", attribute="type")[0] # we do not support variants # ### actually there is only one locale with variant: en_US_POSIX # does anybody care about it at all? if variant_code: raise xpathlite.Error("we do not support variants (\"%s\")" % variant_code) language_id = enumdata.languageCodeToId(language_code) if language_id <= 0: raise xpathlite.Error("unknown language code \"%s\"" % language_code) language = enumdata.language_list[language_id][0] script_id = enumdata.scriptCodeToId(script_code) if script_id == -1: raise xpathlite.Error("unknown script code \"%s\"" % script_code) script = enumdata.script_list[script_id][0] # we should handle fully qualified names with the territory if not country_code: return {} country_id = enumdata.countryCodeToId(country_code) if country_id <= 0: raise xpathlite.Error("unknown country code \"%s\"" % country_code) country = enumdata.country_list[country_id][0] # So we say we accept only those values that have "contributed" or # "approved" resolution. see http://www.unicode.org/cldr/process.html # But we only respect the resolution for new datas for backward # compatibility. draft = DraftResolution.contributed result = {} result['language'] = language result['script'] = script result['country'] = country result['language_code'] = language_code result['country_code'] = country_code result['script_code'] = script_code result['variant_code'] = variant_code result['language_id'] = language_id result['script_id'] = script_id result['country_id'] = country_id supplementalPath = dir_name + "/../supplemental/supplementalData.xml" currencies = findTagsInFile(supplementalPath, "currencyData/region[iso3166=%s]"%country_code); result['currencyIsoCode'] = '' result['currencyDigits'] = 2 result['currencyRounding'] = 1 if currencies: for e in currencies: if e[0] == 'currency': tender = True t = filter(lambda x: x[0] == 'tender', e[1]) if t and t[0][1] == 'false': tender = False; if tender and not filter(lambda x: x[0] == 'to', e[1]): result['currencyIsoCode'] = filter(lambda x: x[0] == 'iso4217', e[1])[0][1] break if result['currencyIsoCode']: t = findTagsInFile(supplementalPath, "currencyData/fractions/info[iso4217=%s]"%result['currencyIsoCode']); if t and t[0][0] == 'info': result['currencyDigits'] = int(filter(lambda x: x[0] == 'digits', t[0][1])[0][1]) result['currencyRounding'] = int(filter(lambda x: x[0] == 'rounding', t[0][1])[0][1]) numbering_system = None try: numbering_system = findEntry(path, "numbers/defaultNumberingSystem") except: pass def findEntryDef(path, xpath, value=''): try: return findEntry(path, xpath) except xpathlite.Error: return value def get_number_in_system(path, xpath, numbering_system): if numbering_system: try: return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]") except xpathlite.Error: # in CLDR 1.9 number system was refactored for numbers (but not for currency) # so if previous findEntry doesn't work we should try this: try: return findEntry(path, xpath.replace("/symbols/", "/symbols[numberSystem=" + numbering_system + "]/")) except xpathlite.Error: # fallback to default pass return findEntry(path, xpath) result['decimal'] = get_number_in_system(path, "numbers/symbols/decimal", numbering_system) result['group'] = get_number_in_system(path, "numbers/symbols/group", numbering_system) result['list'] = get_number_in_system(path, "numbers/symbols/list", numbering_system) result['percent'] = get_number_in_system(path, "numbers/symbols/percentSign", numbering_system) try: numbering_systems = {} for ns in findTagsInFile(cldr_dir + "/../supplemental/numberingSystems.xml", "numberingSystems"): tmp = {} id = "" for data in ns[1:][0]: # ns looks like this: [u'numberingSystem', [(u'digits', u'0123456789'), (u'type', u'numeric'), (u'id', u'latn')]] tmp[data[0]] = data[1] if data[0] == u"id": id = data[1] numbering_systems[id] = tmp result['zero'] = numbering_systems[numbering_system][u"digits"][0] except e: sys.stderr.write("Native zero detection problem:\n" + str(e) + "\n") result['zero'] = get_number_in_system(path, "numbers/symbols/nativeZeroDigit", numbering_system) result['minus'] = get_number_in_system(path, "numbers/symbols/minusSign", numbering_system) result['plus'] = get_number_in_system(path, "numbers/symbols/plusSign", numbering_system) result['exp'] = get_number_in_system(path, "numbers/symbols/exponential", numbering_system).lower() result['quotationStart'] = findEntry(path, "delimiters/quotationStart") result['quotationEnd'] = findEntry(path, "delimiters/quotationEnd") result['alternateQuotationStart'] = findEntry(path, "delimiters/alternateQuotationStart") result['alternateQuotationEnd'] = findEntry(path, "delimiters/alternateQuotationEnd") result['listPatternPartStart'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[start]")) result['listPatternPartMiddle'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[middle]")) result['listPatternPartEnd'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[end]")) result['listPatternPartTwo'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[2]")) result['am'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[am]", draft) result['pm'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[pm]", draft) result['longDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[full]/dateFormat/pattern")) result['shortDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[short]/dateFormat/pattern")) result['longTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[full]/timeFormat/pattern")) result['shortTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[short]/timeFormat/pattern")) endonym = None if country_code and script_code: endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s_%s]" % (language_code, script_code, country_code)) if not endonym and script_code: endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, script_code)) if not endonym and country_code: endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, country_code)) if not endonym: endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s]" % (language_code)) result['language_endonym'] = endonym result['country_endonym'] = findEntryDef(path, "localeDisplayNames/territories/territory[type=%s]" % (country_code)) currency_format = get_number_in_system(path, "numbers/currencyFormats/currencyFormatLength/currencyFormat/pattern", numbering_system) currency_format = parse_number_format(currency_format, result) result['currencyFormat'] = currency_format[0] result['currencyNegativeFormat'] = '' if len(currency_format) > 1: result['currencyNegativeFormat'] = currency_format[1] result['currencySymbol'] = '' result['currencyDisplayName'] = '' if result['currencyIsoCode']: result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode']) display_name_path = "numbers/currencies/currency[%s]/displayName" % result['currencyIsoCode'] result['currencyDisplayName'] \ = findEntryDef(path, display_name_path) + ";" \ + findEntryDef(path, display_name_path + "[count=zero]") + ";" \ + findEntryDef(path, display_name_path + "[count=one]") + ";" \ + findEntryDef(path, display_name_path + "[count=two]") + ";" \ + findEntryDef(path, display_name_path + "[count=few]") + ";" \ + findEntryDef(path, display_name_path + "[count=many]") + ";" \ + findEntryDef(path, display_name_path + "[count=other]") + ";" standalone_long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[wide]/month" result['standaloneLongMonths'] \ = findEntry(path, standalone_long_month_path + "[1]") + ";" \ + findEntry(path, standalone_long_month_path + "[2]") + ";" \ + findEntry(path, standalone_long_month_path + "[3]") + ";" \ + findEntry(path, standalone_long_month_path + "[4]") + ";" \ + findEntry(path, standalone_long_month_path + "[5]") + ";" \ + findEntry(path, standalone_long_month_path + "[6]") + ";" \ + findEntry(path, standalone_long_month_path + "[7]") + ";" \ + findEntry(path, standalone_long_month_path + "[8]") + ";" \ + findEntry(path, standalone_long_month_path + "[9]") + ";" \ + findEntry(path, standalone_long_month_path + "[10]") + ";" \ + findEntry(path, standalone_long_month_path + "[11]") + ";" \ + findEntry(path, standalone_long_month_path + "[12]") + ";" standalone_short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[abbreviated]/month" result['standaloneShortMonths'] \ = findEntry(path, standalone_short_month_path + "[1]") + ";" \ + findEntry(path, standalone_short_month_path + "[2]") + ";" \ + findEntry(path, standalone_short_month_path + "[3]") + ";" \ + findEntry(path, standalone_short_month_path + "[4]") + ";" \ + findEntry(path, standalone_short_month_path + "[5]") + ";" \ + findEntry(path, standalone_short_month_path + "[6]") + ";" \ + findEntry(path, standalone_short_month_path + "[7]") + ";" \ + findEntry(path, standalone_short_month_path + "[8]") + ";" \ + findEntry(path, standalone_short_month_path + "[9]") + ";" \ + findEntry(path, standalone_short_month_path + "[10]") + ";" \ + findEntry(path, standalone_short_month_path + "[11]") + ";" \ + findEntry(path, standalone_short_month_path + "[12]") + ";" standalone_narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[narrow]/month" result['standaloneNarrowMonths'] \ = findEntry(path, standalone_narrow_month_path + "[1]") + ";" \ + findEntry(path, standalone_narrow_month_path + "[2]") + ";" \ + findEntry(path, standalone_narrow_month_path + "[3]") + ";" \ + findEntry(path, standalone_narrow_month_path + "[4]") + ";" \ + findEntry(path, standalone_narrow_month_path + "[5]") + ";" \ + findEntry(path, standalone_narrow_month_path + "[6]") + ";" \ + findEntry(path, standalone_narrow_month_path + "[7]") + ";" \ + findEntry(path, standalone_narrow_month_path + "[8]") + ";" \ + findEntry(path, standalone_narrow_month_path + "[9]") + ";" \ + findEntry(path, standalone_narrow_month_path + "[10]") + ";" \ + findEntry(path, standalone_narrow_month_path + "[11]") + ";" \ + findEntry(path, standalone_narrow_month_path + "[12]") + ";" long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[wide]/month" result['longMonths'] \ = findEntry(path, long_month_path + "[1]") + ";" \ + findEntry(path, long_month_path + "[2]") + ";" \ + findEntry(path, long_month_path + "[3]") + ";" \ + findEntry(path, long_month_path + "[4]") + ";" \ + findEntry(path, long_month_path + "[5]") + ";" \ + findEntry(path, long_month_path + "[6]") + ";" \ + findEntry(path, long_month_path + "[7]") + ";" \ + findEntry(path, long_month_path + "[8]") + ";" \ + findEntry(path, long_month_path + "[9]") + ";" \ + findEntry(path, long_month_path + "[10]") + ";" \ + findEntry(path, long_month_path + "[11]") + ";" \ + findEntry(path, long_month_path + "[12]") + ";" short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[abbreviated]/month" result['shortMonths'] \ = findEntry(path, short_month_path + "[1]") + ";" \ + findEntry(path, short_month_path + "[2]") + ";" \ + findEntry(path, short_month_path + "[3]") + ";" \ + findEntry(path, short_month_path + "[4]") + ";" \ + findEntry(path, short_month_path + "[5]") + ";" \ + findEntry(path, short_month_path + "[6]") + ";" \ + findEntry(path, short_month_path + "[7]") + ";" \ + findEntry(path, short_month_path + "[8]") + ";" \ + findEntry(path, short_month_path + "[9]") + ";" \ + findEntry(path, short_month_path + "[10]") + ";" \ + findEntry(path, short_month_path + "[11]") + ";" \ + findEntry(path, short_month_path + "[12]") + ";" narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[narrow]/month" result['narrowMonths'] \ = findEntry(path, narrow_month_path + "[1]") + ";" \ + findEntry(path, narrow_month_path + "[2]") + ";" \ + findEntry(path, narrow_month_path + "[3]") + ";" \ + findEntry(path, narrow_month_path + "[4]") + ";" \ + findEntry(path, narrow_month_path + "[5]") + ";" \ + findEntry(path, narrow_month_path + "[6]") + ";" \ + findEntry(path, narrow_month_path + "[7]") + ";" \ + findEntry(path, narrow_month_path + "[8]") + ";" \ + findEntry(path, narrow_month_path + "[9]") + ";" \ + findEntry(path, narrow_month_path + "[10]") + ";" \ + findEntry(path, narrow_month_path + "[11]") + ";" \ + findEntry(path, narrow_month_path + "[12]") + ";" long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[wide]/day" result['longDays'] \ = findEntry(path, long_day_path + "[sun]") + ";" \ + findEntry(path, long_day_path + "[mon]") + ";" \ + findEntry(path, long_day_path + "[tue]") + ";" \ + findEntry(path, long_day_path + "[wed]") + ";" \ + findEntry(path, long_day_path + "[thu]") + ";" \ + findEntry(path, long_day_path + "[fri]") + ";" \ + findEntry(path, long_day_path + "[sat]") + ";" short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[abbreviated]/day" result['shortDays'] \ = findEntry(path, short_day_path + "[sun]") + ";" \ + findEntry(path, short_day_path + "[mon]") + ";" \ + findEntry(path, short_day_path + "[tue]") + ";" \ + findEntry(path, short_day_path + "[wed]") + ";" \ + findEntry(path, short_day_path + "[thu]") + ";" \ + findEntry(path, short_day_path + "[fri]") + ";" \ + findEntry(path, short_day_path + "[sat]") + ";" narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[narrow]/day" result['narrowDays'] \ = findEntry(path, narrow_day_path + "[sun]") + ";" \ + findEntry(path, narrow_day_path + "[mon]") + ";" \ + findEntry(path, narrow_day_path + "[tue]") + ";" \ + findEntry(path, narrow_day_path + "[wed]") + ";" \ + findEntry(path, narrow_day_path + "[thu]") + ";" \ + findEntry(path, narrow_day_path + "[fri]") + ";" \ + findEntry(path, narrow_day_path + "[sat]") + ";" standalone_long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[wide]/day" result['standaloneLongDays'] \ = findEntry(path, standalone_long_day_path + "[sun]") + ";" \ + findEntry(path, standalone_long_day_path + "[mon]") + ";" \ + findEntry(path, standalone_long_day_path + "[tue]") + ";" \ + findEntry(path, standalone_long_day_path + "[wed]") + ";" \ + findEntry(path, standalone_long_day_path + "[thu]") + ";" \ + findEntry(path, standalone_long_day_path + "[fri]") + ";" \ + findEntry(path, standalone_long_day_path + "[sat]") + ";" standalone_short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[abbreviated]/day" result['standaloneShortDays'] \ = findEntry(path, standalone_short_day_path + "[sun]") + ";" \ + findEntry(path, standalone_short_day_path + "[mon]") + ";" \ + findEntry(path, standalone_short_day_path + "[tue]") + ";" \ + findEntry(path, standalone_short_day_path + "[wed]") + ";" \ + findEntry(path, standalone_short_day_path + "[thu]") + ";" \ + findEntry(path, standalone_short_day_path + "[fri]") + ";" \ + findEntry(path, standalone_short_day_path + "[sat]") + ";" standalone_narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[narrow]/day" result['standaloneNarrowDays'] \ = findEntry(path, standalone_narrow_day_path + "[sun]") + ";" \ + findEntry(path, standalone_narrow_day_path + "[mon]") + ";" \ + findEntry(path, standalone_narrow_day_path + "[tue]") + ";" \ + findEntry(path, standalone_narrow_day_path + "[wed]") + ";" \ + findEntry(path, standalone_narrow_day_path + "[thu]") + ";" \ + findEntry(path, standalone_narrow_day_path + "[fri]") + ";" \ + findEntry(path, standalone_narrow_day_path + "[sat]") + ";" return result def addEscapes(s): result = '' for c in s: n = ord(c) if n < 128: result += c else: result += "\\x" result += "%02x" % (n) return result def unicodeStr(s): utf8 = s.encode('utf-8') return "" + str(len(utf8)) + "" + addEscapes(utf8) + "" def usage(): print "Usage: cldr2qlocalexml.py " sys.exit() def integrateWeekData(filePath): if not filePath.endswith(".xml"): return {} monFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=mon]", attribute="territories")[0].split(" ") tueFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=tue]", attribute="territories")[0].split(" ") wedFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=wed]", attribute="territories")[0].split(" ") thuFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=thu]", attribute="territories")[0].split(" ") friFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=fri]", attribute="territories")[0].split(" ") satFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sat]", attribute="territories")[0].split(" ") sunFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sun]", attribute="territories")[0].split(" ") monWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=mon]", attribute="territories")[0].split(" ") tueWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=tue]", attribute="territories")[0].split(" ") wedWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=wed]", attribute="territories")[0].split(" ") thuWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=thu]", attribute="territories")[0].split(" ") friWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=fri]", attribute="territories")[0].split(" ") satWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sat]", attribute="territories")[0].split(" ") sunWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sun]", attribute="territories")[0].split(" ") monWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=mon]", attribute="territories")[0].split(" ") tueWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=tue]", attribute="territories")[0].split(" ") wedWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=wed]", attribute="territories")[0].split(" ") thuWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=thu]", attribute="territories")[0].split(" ") friWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=fri]", attribute="territories")[0].split(" ") satWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sat]", attribute="territories")[0].split(" ") sunWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sun]", attribute="territories")[0].split(" ") firstDayByCountryCode = {} for countryCode in monFirstDayIn: firstDayByCountryCode[countryCode] = "mon" for countryCode in tueFirstDayIn: firstDayByCountryCode[countryCode] = "tue" for countryCode in wedFirstDayIn: firstDayByCountryCode[countryCode] = "wed" for countryCode in thuFirstDayIn: firstDayByCountryCode[countryCode] = "thu" for countryCode in friFirstDayIn: firstDayByCountryCode[countryCode] = "fri" for countryCode in satFirstDayIn: firstDayByCountryCode[countryCode] = "sat" for countryCode in sunFirstDayIn: firstDayByCountryCode[countryCode] = "sun" weekendStartByCountryCode = {} for countryCode in monWeekendStart: weekendStartByCountryCode[countryCode] = "mon" for countryCode in tueWeekendStart: weekendStartByCountryCode[countryCode] = "tue" for countryCode in wedWeekendStart: weekendStartByCountryCode[countryCode] = "wed" for countryCode in thuWeekendStart: weekendStartByCountryCode[countryCode] = "thu" for countryCode in friWeekendStart: weekendStartByCountryCode[countryCode] = "fri" for countryCode in satWeekendStart: weekendStartByCountryCode[countryCode] = "sat" for countryCode in sunWeekendStart: weekendStartByCountryCode[countryCode] = "sun" weekendEndByCountryCode = {} for countryCode in monWeekendEnd: weekendEndByCountryCode[countryCode] = "mon" for countryCode in tueWeekendEnd: weekendEndByCountryCode[countryCode] = "tue" for countryCode in wedWeekendEnd: weekendEndByCountryCode[countryCode] = "wed" for countryCode in thuWeekendEnd: weekendEndByCountryCode[countryCode] = "thu" for countryCode in friWeekendEnd: weekendEndByCountryCode[countryCode] = "fri" for countryCode in satWeekendEnd: weekendEndByCountryCode[countryCode] = "sat" for countryCode in sunWeekendEnd: weekendEndByCountryCode[countryCode] = "sun" for (key,locale) in locale_database.iteritems(): countryCode = locale['country_code'] if countryCode in firstDayByCountryCode: locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode[countryCode] else: locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode["001"] if countryCode in weekendStartByCountryCode: locale_database[key]['weekendStart'] = weekendStartByCountryCode[countryCode] else: locale_database[key]['weekendStart'] = weekendStartByCountryCode["001"] if countryCode in weekendEndByCountryCode: locale_database[key]['weekendEnd'] = weekendEndByCountryCode[countryCode] else: locale_database[key]['weekendEnd'] = weekendEndByCountryCode["001"] if len(sys.argv) != 2: usage() cldr_dir = sys.argv[1] if not os.path.isdir(cldr_dir): usage() cldr_files = os.listdir(cldr_dir) locale_database = {} for file in cldr_files: try: l = generateLocaleInfo(cldr_dir + "/" + file) if not l: sys.stderr.write("skipping file \"" + file + "\"\n") continue except xpathlite.Error as e: sys.stderr.write("skipping file \"%s\" (%s)\n" % (file, str(e))) continue locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l integrateWeekData(cldr_dir+"/../supplemental/supplementalData.xml") locale_keys = locale_database.keys() locale_keys.sort() cldr_version = 'unknown' ldml = open(cldr_dir+"/../dtd/ldml.dtd", "r") for line in ldml: if 'version cldrVersion CDATA #FIXED' in line: cldr_version = line.split('"')[1] print "" print " " + cldr_version + "" print " " for id in enumdata.language_list: l = enumdata.language_list[id] print " " print " " + l[0] + "" print " " + str(id) + "" print " " + l[1] + "" print " " print " " print " " for id in enumdata.script_list: l = enumdata.script_list[id] print " " print " " print " " for id in enumdata.country_list: l = enumdata.country_list[id] print " " print " " + l[0] + "" print " " + str(id) + "" print " " + l[1] + "" print " " print " " def _parseLocale(l): language = "AnyLanguage" script = "AnyScript" country = "AnyCountry" if l == "und": raise xpathlite.Error("we are treating unknown locale like C") items = l.split("_") language_code = items[0] if language_code != "und": language_id = enumdata.languageCodeToId(language_code) if language_id == -1: raise xpathlite.Error("unknown language code \"%s\"" % language_code) language = enumdata.language_list[language_id][0] if len(items) > 1: script_code = items[1] country_code = "" if len(items) > 2: country_code = items[2] if len(script_code) == 4: script_id = enumdata.scriptCodeToId(script_code) if script_id == -1: raise xpathlite.Error("unknown script code \"%s\"" % script_code) script = enumdata.script_list[script_id][0] else: country_code = script_code if country_code: country_id = enumdata.countryCodeToId(country_code) if country_id == -1: raise xpathlite.Error("unknown country code \"%s\"" % country_code) country = enumdata.country_list[country_id][0] return (language, script, country) print " " for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likelySubtags"): tmp = {} for data in ns[1:][0]: # ns looks like this: [u'likelySubtag', [(u'from', u'aa'), (u'to', u'aa_Latn_ET')]] tmp[data[0]] = data[1] try: (from_language, from_script, from_country) = _parseLocale(tmp[u"from"]) except xpathlite.Error as e: sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e))) continue try: (to_language, to_script, to_country) = _parseLocale(tmp[u"to"]) except xpathlite.Error as e: sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e))) continue # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags if to_country == "AnyCountry" and from_country != to_country: to_country = from_country if to_script == "AnyScript" and from_script != to_script: to_script = from_script print " " print " " print " " + from_language + "" print " " print " " + from_country + "" print " " print " " print " " + to_language + "" print " " print " " + to_country + "" print " " print " " print " " print " " print \ " \n\ C\n\ \n\ \n\ AnyCountry\n\ \n\ 46\n\ 44\n\ 59\n\ 37\n\ 48\n\ 45\n\ 43\n\ 101\n\ \"\n\ \"\n\ \'\n\ \'\n\ %1, %2\n\ %1, %2\n\ %1, %2\n\ %1, %2\n\ AM\n\ PM\n\ mon\n\ sat\n\ sun\n\ EEEE, d MMMM yyyy\n\ d MMM yyyy\n\ HH:mm:ss z\n\ HH:mm:ss\n\ January;February;March;April;May;June;July;August;September;October;November;December;\n\ Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;\n\ J;F;M;A;M;J;J;A;S;O;N;D;\n\ January;February;March;April;May;June;July;August;September;October;November;December;\n\ Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;\n\ 1;2;3;4;5;6;7;8;9;10;11;12;\n\ Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;\n\ Sun;Mon;Tue;Wed;Thu;Fri;Sat;\n\ 7;1;2;3;4;5;6;\n\ Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;\n\ Sun;Mon;Tue;Wed;Thu;Fri;Sat;\n\ S;M;T;W;T;F;S;\n\ \n\ \n\ ;;;;;;;\n\ 2\n\ 1\n\ %1%2\n\ \n\ " for key in locale_keys: l = locale_database[key] print " " print " " + l['language'] + "" print " " + l['language_endonym'].encode('utf-8') + "" print " " print " " + l['country'] + "" print " " + l['country_endonym'].encode('utf-8') + "" print " " + l['language_code'] + "" print " " + l['script_code'] + "" print " " + l['country_code'] + "" print " " + ordStr(l['decimal']) + "" print " " + ordStr(l['group']) + "" print " " + fixOrdStrList(l['list']) + "" print " " + fixOrdStrPercent(l['percent']) + "" print " " + ordStr(l['zero']) + "" print " " + ordStr(l['minus']) + "" print " " + ordStr(l['plus']) + "" print " " + fixOrdStrExp(l['exp']) + "" print " " + l['quotationStart'].encode('utf-8') + "" print " " + l['quotationEnd'].encode('utf-8') + "" print " " + l['alternateQuotationStart'].encode('utf-8') + "" print " " + l['alternateQuotationEnd'].encode('utf-8') + "" print " " + l['listPatternPartStart'].encode('utf-8') + "" print " " + l['listPatternPartMiddle'].encode('utf-8') + "" print " " + l['listPatternPartEnd'].encode('utf-8') + "" print " " + l['listPatternPartTwo'].encode('utf-8') + "" print " " + l['am'].encode('utf-8') + "" print " " + l['pm'].encode('utf-8') + "" print " " + l['firstDayOfWeek'].encode('utf-8') + "" print " " + l['weekendStart'].encode('utf-8') + "" print " " + l['weekendEnd'].encode('utf-8') + "" print " " + l['longDateFormat'].encode('utf-8') + "" print " " + l['shortDateFormat'].encode('utf-8') + "" print " " + l['longTimeFormat'].encode('utf-8') + "" print " " + l['shortTimeFormat'].encode('utf-8') + "" print " " + l['standaloneLongMonths'].encode('utf-8') + "" print " "+ l['standaloneShortMonths'].encode('utf-8') + "" print " "+ l['standaloneNarrowMonths'].encode('utf-8') + "" print " " + l['longMonths'].encode('utf-8') + "" print " " + l['shortMonths'].encode('utf-8') + "" print " " + l['narrowMonths'].encode('utf-8') + "" print " " + l['longDays'].encode('utf-8') + "" print " " + l['shortDays'].encode('utf-8') + "" print " " + l['narrowDays'].encode('utf-8') + "" print " " + l['standaloneLongDays'].encode('utf-8') + "" print " " + l['standaloneShortDays'].encode('utf-8') + "" print " " + l['standaloneNarrowDays'].encode('utf-8') + "" print " " + l['currencyIsoCode'].encode('utf-8') + "" print " " + l['currencySymbol'].encode('utf-8') + "" print " " + l['currencyDisplayName'].encode('utf-8') + "" print " " + str(l['currencyDigits']) + "" print " " + str(l['currencyRounding']) + "" print " " + l['currencyFormat'].encode('utf-8') + "" print " " + l['currencyNegativeFormat'].encode('utf-8') + "" print " " print " " print ""