diff options
Diffstat (limited to 'util/local_database/cldr2qlocalexml.py')
-rwxr-xr-x | util/local_database/cldr2qlocalexml.py | 810 |
1 files changed, 810 insertions, 0 deletions
diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py new file mode 100755 index 0000000000..f0f9cb1ead --- /dev/null +++ b/util/local_database/cldr2qlocalexml.py @@ -0,0 +1,810 @@ +#!/usr/bin/env python +############################################################################# +## +## Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies). +## All rights reserved. +## Contact: Nokia Corporation (qt-info@nokia.com) +## +## This file is part of the test suite of the Qt Toolkit. +## +## $QT_BEGIN_LICENSE:LGPL$ +## No Commercial Usage +## This file contains pre-release code and may not be distributed. +## You may use this file in accordance with the terms and conditions +## contained in the Technology Preview License Agreement accompanying +## this package. +## +## GNU Lesser General Public License Usage +## Alternatively, this file may be used under the terms of the GNU Lesser +## General Public License version 2.1 as published by the Free Software +## Foundation and appearing in the file LICENSE.LGPL included in the +## packaging of this file. Please review the following information to +## ensure the GNU Lesser General Public License version 2.1 requirements +## will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +## +## In addition, as a special exception, Nokia gives you certain additional +## rights. These rights are described in the Nokia Qt LGPL Exception +## version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +## +## If you have questions regarding the use of this file, please contact +## Nokia at qt-info@nokia.com. +## +## +## +## +## +## +## +## +## $QT_END_LICENSE$ +## +############################################################################# + +import os +import sys +import enumdata +import xpathlite +from xpathlite import DraftResolution +from dateconverter import convert_date +import re + +findEntry = xpathlite.findEntry +findEntryInFile = xpathlite._findEntryInFile +findTagsInFile = xpathlite.findTagsInFile + +def parse_number_format(patterns, data): + # this is a very limited parsing of the number format for currency only. + def skip_repeating_pattern(x): + p = x.replace('0', '#').replace(',', '').replace('.', '') + seen = False + result = '' + for c in p: + if c == '#': + if seen: + continue + seen = True + else: + seen = False + result = result + c + return result + patterns = patterns.split(';') + result = [] + for pattern in patterns: + pattern = skip_repeating_pattern(pattern) + pattern = pattern.replace('#', "%1") + # according to http://www.unicode.org/reports/tr35/#Number_Format_Patterns + # there can be doubled or trippled currency sign, however none of the + # locales use that. + pattern = pattern.replace(u'\xa4', "%2") + pattern = pattern.replace("''", "###").replace("'", '').replace("###", "'") + pattern = pattern.replace('-', data['minus']) + pattern = pattern.replace('+', data['plus']) + result.append(pattern) + return result + +def parse_list_pattern_part_format(pattern): + # this is a very limited parsing of the format for list pattern part only. + result = "" + result = pattern.replace("{0}", "%1") + result = result.replace("{1}", "%2") + result = result.replace("{2}", "%3") + return result + +def ordStr(c): + if len(c) == 1: + return str(ord(c)) + return "##########" + +# the following functions are supposed to fix the problem with QLocale +# returning a character instead of strings for QLocale::exponential() +# and others. So we fallback to default values in these cases. +def fixOrdStrExp(c): + if len(c) == 1: + return str(ord(c)) + return str(ord('e')) +def fixOrdStrPercent(c): + if len(c) == 1: + return str(ord(c)) + return str(ord('%')) +def fixOrdStrList(c): + if len(c) == 1: + return str(ord(c)) + return str(ord(';')) + +def generateLocaleInfo(path): + (dir_name, file_name) = os.path.split(path) + + if not path.endswith(".xml"): + return {} + language_code = findEntryInFile(path, "identity/language", attribute="type")[0] + if language_code == 'root': + # just skip it + return {} + country_code = findEntryInFile(path, "identity/territory", attribute="type")[0] + script_code = findEntryInFile(path, "identity/script", attribute="type")[0] + variant_code = findEntryInFile(path, "identity/variant", attribute="type")[0] + + # we should handle fully qualified names with the territory + if not country_code: + return {} + + # we do not support variants + # ### actually there is only one locale with variant: en_US_POSIX + # does anybody care about it at all? + if variant_code: + return {} + + language_id = enumdata.languageCodeToId(language_code) + if language_id == -1: + sys.stderr.write("unnknown language code \"" + language_code + "\"\n") + return {} + language = enumdata.language_list[language_id][0] + + script_id = enumdata.scriptCodeToId(script_code) + if script_code == -1: + sys.stderr.write("unnknown script code \"" + script_code + "\"\n") + return {} + script = "AnyScript" + if script_id != -1: + script = enumdata.script_list[script_id][0] + + country_id = enumdata.countryCodeToId(country_code) + country = "" + if country_id != -1: + country = enumdata.country_list[country_id][0] + if country == "": + sys.stderr.write("unnknown country code \"" + country_code + "\"\n") + return {} + + # So we say we accept only those values that have "contributed" or + # "approved" resolution. see http://www.unicode.org/cldr/process.html + # But we only respect the resolution for new datas for backward + # compatibility. + draft = DraftResolution.contributed + + result = {} + result['language'] = language + result['script'] = script + result['country'] = country + result['language_code'] = language_code + result['country_code'] = country_code + result['script_code'] = script_code + result['variant_code'] = variant_code + result['language_id'] = language_id + result['script_id'] = script_id + result['country_id'] = country_id + + supplementalPath = dir_name + "/../supplemental/supplementalData.xml" + currencies = findTagsInFile(supplementalPath, "currencyData/region[iso3166=%s]"%country_code); + result['currencyIsoCode'] = '' + result['currencyDigits'] = 2 + result['currencyRounding'] = 1 + if currencies: + for e in currencies: + if e[0] == 'currency': + tender = True + t = filter(lambda x: x[0] == 'tender', e[1]) + if t and t[0][1] == 'false': + tender = False; + if tender and not filter(lambda x: x[0] == 'to', e[1]): + result['currencyIsoCode'] = filter(lambda x: x[0] == 'iso4217', e[1])[0][1] + break + if result['currencyIsoCode']: + t = findTagsInFile(supplementalPath, "currencyData/fractions/info[iso4217=%s]"%result['currencyIsoCode']); + if t and t[0][0] == 'info': + result['currencyDigits'] = int(filter(lambda x: x[0] == 'digits', t[0][1])[0][1]) + result['currencyRounding'] = int(filter(lambda x: x[0] == 'rounding', t[0][1])[0][1]) + numbering_system = None + try: + numbering_system = findEntry(path, "numbers/defaultNumberingSystem") + except: + pass + def findEntryDef(path, xpath, value=''): + try: + return findEntry(path, xpath) + except xpathlite.Error: + return value + def get_number_in_system(path, xpath, numbering_system): + if numbering_system: + try: + return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]") + except xpathlite.Error: + pass + return findEntry(path, xpath) + result['decimal'] = get_number_in_system(path, "numbers/symbols/decimal", numbering_system) + result['group'] = get_number_in_system(path, "numbers/symbols/group", numbering_system) + result['list'] = get_number_in_system(path, "numbers/symbols/list", numbering_system) + result['percent'] = get_number_in_system(path, "numbers/symbols/percentSign", numbering_system) + result['zero'] = get_number_in_system(path, "numbers/symbols/nativeZeroDigit", numbering_system) + result['minus'] = get_number_in_system(path, "numbers/symbols/minusSign", numbering_system) + result['plus'] = get_number_in_system(path, "numbers/symbols/plusSign", numbering_system) + result['exp'] = get_number_in_system(path, "numbers/symbols/exponential", numbering_system).lower() + result['quotationStart'] = findEntry(path, "delimiters/quotationStart") + result['quotationEnd'] = findEntry(path, "delimiters/quotationEnd") + result['alternateQuotationStart'] = findEntry(path, "delimiters/alternateQuotationStart") + result['alternateQuotationEnd'] = findEntry(path, "delimiters/alternateQuotationEnd") + result['listPatternPartStart'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[start]")) + result['listPatternPartMiddle'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[middle]")) + result['listPatternPartEnd'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[end]")) + result['listPatternPartTwo'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[2]")) + result['am'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[am]", draft) + result['pm'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[pm]", draft) + result['longDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[full]/dateFormat/pattern")) + result['shortDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[short]/dateFormat/pattern")) + result['longTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[full]/timeFormat/pattern")) + result['shortTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[short]/timeFormat/pattern")) + + endonym = None + if country_code and script_code: + endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s_%s]" % (language_code, script_code, country_code)) + if not endonym and script_code: + endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, script_code)) + if not endonym and country_code: + endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, country_code)) + if not endonym: + endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s]" % (language_code)) + result['language_endonym'] = endonym + result['country_endonym'] = findEntryDef(path, "localeDisplayNames/territories/territory[type=%s]" % (country_code)) + + currency_format = get_number_in_system(path, "numbers/currencyFormats/currencyFormatLength/currencyFormat/pattern", numbering_system) + currency_format = parse_number_format(currency_format, result) + result['currencyFormat'] = currency_format[0] + result['currencyNegativeFormat'] = '' + if len(currency_format) > 1: + result['currencyNegativeFormat'] = currency_format[1] + + result['currencySymbol'] = '' + result['currencyDisplayName'] = '' + if result['currencyIsoCode']: + result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode']) + display_name_path = "numbers/currencies/currency[%s]/displayName" % result['currencyIsoCode'] + result['currencyDisplayName'] \ + = findEntryDef(path, display_name_path) + ";" \ + + findEntryDef(path, display_name_path + "[count=zero]") + ";" \ + + findEntryDef(path, display_name_path + "[count=one]") + ";" \ + + findEntryDef(path, display_name_path + "[count=two]") + ";" \ + + findEntryDef(path, display_name_path + "[count=few]") + ";" \ + + findEntryDef(path, display_name_path + "[count=many]") + ";" \ + + findEntryDef(path, display_name_path + "[count=other]") + ";" + + standalone_long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[wide]/month" + result['standaloneLongMonths'] \ + = findEntry(path, standalone_long_month_path + "[1]") + ";" \ + + findEntry(path, standalone_long_month_path + "[2]") + ";" \ + + findEntry(path, standalone_long_month_path + "[3]") + ";" \ + + findEntry(path, standalone_long_month_path + "[4]") + ";" \ + + findEntry(path, standalone_long_month_path + "[5]") + ";" \ + + findEntry(path, standalone_long_month_path + "[6]") + ";" \ + + findEntry(path, standalone_long_month_path + "[7]") + ";" \ + + findEntry(path, standalone_long_month_path + "[8]") + ";" \ + + findEntry(path, standalone_long_month_path + "[9]") + ";" \ + + findEntry(path, standalone_long_month_path + "[10]") + ";" \ + + findEntry(path, standalone_long_month_path + "[11]") + ";" \ + + findEntry(path, standalone_long_month_path + "[12]") + ";" + + standalone_short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[abbreviated]/month" + result['standaloneShortMonths'] \ + = findEntry(path, standalone_short_month_path + "[1]") + ";" \ + + findEntry(path, standalone_short_month_path + "[2]") + ";" \ + + findEntry(path, standalone_short_month_path + "[3]") + ";" \ + + findEntry(path, standalone_short_month_path + "[4]") + ";" \ + + findEntry(path, standalone_short_month_path + "[5]") + ";" \ + + findEntry(path, standalone_short_month_path + "[6]") + ";" \ + + findEntry(path, standalone_short_month_path + "[7]") + ";" \ + + findEntry(path, standalone_short_month_path + "[8]") + ";" \ + + findEntry(path, standalone_short_month_path + "[9]") + ";" \ + + findEntry(path, standalone_short_month_path + "[10]") + ";" \ + + findEntry(path, standalone_short_month_path + "[11]") + ";" \ + + findEntry(path, standalone_short_month_path + "[12]") + ";" + + standalone_narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[narrow]/month" + result['standaloneNarrowMonths'] \ + = findEntry(path, standalone_narrow_month_path + "[1]") + ";" \ + + findEntry(path, standalone_narrow_month_path + "[2]") + ";" \ + + findEntry(path, standalone_narrow_month_path + "[3]") + ";" \ + + findEntry(path, standalone_narrow_month_path + "[4]") + ";" \ + + findEntry(path, standalone_narrow_month_path + "[5]") + ";" \ + + findEntry(path, standalone_narrow_month_path + "[6]") + ";" \ + + findEntry(path, standalone_narrow_month_path + "[7]") + ";" \ + + findEntry(path, standalone_narrow_month_path + "[8]") + ";" \ + + findEntry(path, standalone_narrow_month_path + "[9]") + ";" \ + + findEntry(path, standalone_narrow_month_path + "[10]") + ";" \ + + findEntry(path, standalone_narrow_month_path + "[11]") + ";" \ + + findEntry(path, standalone_narrow_month_path + "[12]") + ";" + + long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[wide]/month" + result['longMonths'] \ + = findEntry(path, long_month_path + "[1]") + ";" \ + + findEntry(path, long_month_path + "[2]") + ";" \ + + findEntry(path, long_month_path + "[3]") + ";" \ + + findEntry(path, long_month_path + "[4]") + ";" \ + + findEntry(path, long_month_path + "[5]") + ";" \ + + findEntry(path, long_month_path + "[6]") + ";" \ + + findEntry(path, long_month_path + "[7]") + ";" \ + + findEntry(path, long_month_path + "[8]") + ";" \ + + findEntry(path, long_month_path + "[9]") + ";" \ + + findEntry(path, long_month_path + "[10]") + ";" \ + + findEntry(path, long_month_path + "[11]") + ";" \ + + findEntry(path, long_month_path + "[12]") + ";" + + short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[abbreviated]/month" + result['shortMonths'] \ + = findEntry(path, short_month_path + "[1]") + ";" \ + + findEntry(path, short_month_path + "[2]") + ";" \ + + findEntry(path, short_month_path + "[3]") + ";" \ + + findEntry(path, short_month_path + "[4]") + ";" \ + + findEntry(path, short_month_path + "[5]") + ";" \ + + findEntry(path, short_month_path + "[6]") + ";" \ + + findEntry(path, short_month_path + "[7]") + ";" \ + + findEntry(path, short_month_path + "[8]") + ";" \ + + findEntry(path, short_month_path + "[9]") + ";" \ + + findEntry(path, short_month_path + "[10]") + ";" \ + + findEntry(path, short_month_path + "[11]") + ";" \ + + findEntry(path, short_month_path + "[12]") + ";" + + narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[narrow]/month" + result['narrowMonths'] \ + = findEntry(path, narrow_month_path + "[1]") + ";" \ + + findEntry(path, narrow_month_path + "[2]") + ";" \ + + findEntry(path, narrow_month_path + "[3]") + ";" \ + + findEntry(path, narrow_month_path + "[4]") + ";" \ + + findEntry(path, narrow_month_path + "[5]") + ";" \ + + findEntry(path, narrow_month_path + "[6]") + ";" \ + + findEntry(path, narrow_month_path + "[7]") + ";" \ + + findEntry(path, narrow_month_path + "[8]") + ";" \ + + findEntry(path, narrow_month_path + "[9]") + ";" \ + + findEntry(path, narrow_month_path + "[10]") + ";" \ + + findEntry(path, narrow_month_path + "[11]") + ";" \ + + findEntry(path, narrow_month_path + "[12]") + ";" + + long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[wide]/day" + result['longDays'] \ + = findEntry(path, long_day_path + "[sun]") + ";" \ + + findEntry(path, long_day_path + "[mon]") + ";" \ + + findEntry(path, long_day_path + "[tue]") + ";" \ + + findEntry(path, long_day_path + "[wed]") + ";" \ + + findEntry(path, long_day_path + "[thu]") + ";" \ + + findEntry(path, long_day_path + "[fri]") + ";" \ + + findEntry(path, long_day_path + "[sat]") + ";" + + short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[abbreviated]/day" + result['shortDays'] \ + = findEntry(path, short_day_path + "[sun]") + ";" \ + + findEntry(path, short_day_path + "[mon]") + ";" \ + + findEntry(path, short_day_path + "[tue]") + ";" \ + + findEntry(path, short_day_path + "[wed]") + ";" \ + + findEntry(path, short_day_path + "[thu]") + ";" \ + + findEntry(path, short_day_path + "[fri]") + ";" \ + + findEntry(path, short_day_path + "[sat]") + ";" + + narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[narrow]/day" + result['narrowDays'] \ + = findEntry(path, narrow_day_path + "[sun]") + ";" \ + + findEntry(path, narrow_day_path + "[mon]") + ";" \ + + findEntry(path, narrow_day_path + "[tue]") + ";" \ + + findEntry(path, narrow_day_path + "[wed]") + ";" \ + + findEntry(path, narrow_day_path + "[thu]") + ";" \ + + findEntry(path, narrow_day_path + "[fri]") + ";" \ + + findEntry(path, narrow_day_path + "[sat]") + ";" + + standalone_long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[wide]/day" + result['standaloneLongDays'] \ + = findEntry(path, standalone_long_day_path + "[sun]") + ";" \ + + findEntry(path, standalone_long_day_path + "[mon]") + ";" \ + + findEntry(path, standalone_long_day_path + "[tue]") + ";" \ + + findEntry(path, standalone_long_day_path + "[wed]") + ";" \ + + findEntry(path, standalone_long_day_path + "[thu]") + ";" \ + + findEntry(path, standalone_long_day_path + "[fri]") + ";" \ + + findEntry(path, standalone_long_day_path + "[sat]") + ";" + + standalone_short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[abbreviated]/day" + result['standaloneShortDays'] \ + = findEntry(path, standalone_short_day_path + "[sun]") + ";" \ + + findEntry(path, standalone_short_day_path + "[mon]") + ";" \ + + findEntry(path, standalone_short_day_path + "[tue]") + ";" \ + + findEntry(path, standalone_short_day_path + "[wed]") + ";" \ + + findEntry(path, standalone_short_day_path + "[thu]") + ";" \ + + findEntry(path, standalone_short_day_path + "[fri]") + ";" \ + + findEntry(path, standalone_short_day_path + "[sat]") + ";" + + standalone_narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[narrow]/day" + result['standaloneNarrowDays'] \ + = findEntry(path, standalone_narrow_day_path + "[sun]") + ";" \ + + findEntry(path, standalone_narrow_day_path + "[mon]") + ";" \ + + findEntry(path, standalone_narrow_day_path + "[tue]") + ";" \ + + findEntry(path, standalone_narrow_day_path + "[wed]") + ";" \ + + findEntry(path, standalone_narrow_day_path + "[thu]") + ";" \ + + findEntry(path, standalone_narrow_day_path + "[fri]") + ";" \ + + findEntry(path, standalone_narrow_day_path + "[sat]") + ";" + + return result + +def addEscapes(s): + result = '' + for c in s: + n = ord(c) + if n < 128: + result += c + else: + result += "\\x" + result += "%02x" % (n) + return result + +def unicodeStr(s): + utf8 = s.encode('utf-8') + return "<size>" + str(len(utf8)) + "</size><data>" + addEscapes(utf8) + "</data>" + +def usage(): + print "Usage: cldr2qlocalexml.py <path-to-cldr-main>" + sys.exit() + +def integrateWeekData(filePath): + if not filePath.endswith(".xml"): + return {} + monFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=mon]", attribute="territories")[0].split(" ") + tueFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=tue]", attribute="territories")[0].split(" ") + wedFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=wed]", attribute="territories")[0].split(" ") + thuFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=thu]", attribute="territories")[0].split(" ") + friFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=fri]", attribute="territories")[0].split(" ") + satFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sat]", attribute="territories")[0].split(" ") + sunFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sun]", attribute="territories")[0].split(" ") + + monWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=mon]", attribute="territories")[0].split(" ") + tueWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=tue]", attribute="territories")[0].split(" ") + wedWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=wed]", attribute="territories")[0].split(" ") + thuWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=thu]", attribute="territories")[0].split(" ") + friWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=fri]", attribute="territories")[0].split(" ") + satWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sat]", attribute="territories")[0].split(" ") + sunWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sun]", attribute="territories")[0].split(" ") + + monWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=mon]", attribute="territories")[0].split(" ") + tueWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=tue]", attribute="territories")[0].split(" ") + wedWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=wed]", attribute="territories")[0].split(" ") + thuWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=thu]", attribute="territories")[0].split(" ") + friWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=fri]", attribute="territories")[0].split(" ") + satWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sat]", attribute="territories")[0].split(" ") + sunWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sun]", attribute="territories")[0].split(" ") + + firstDayByCountryCode = {} + for countryCode in monFirstDayIn: + firstDayByCountryCode[countryCode] = "mon" + for countryCode in tueFirstDayIn: + firstDayByCountryCode[countryCode] = "tue" + for countryCode in wedFirstDayIn: + firstDayByCountryCode[countryCode] = "wed" + for countryCode in thuFirstDayIn: + firstDayByCountryCode[countryCode] = "thu" + for countryCode in friFirstDayIn: + firstDayByCountryCode[countryCode] = "fri" + for countryCode in satFirstDayIn: + firstDayByCountryCode[countryCode] = "sat" + for countryCode in sunFirstDayIn: + firstDayByCountryCode[countryCode] = "sun" + + weekendStartByCountryCode = {} + for countryCode in monWeekendStart: + weekendStartByCountryCode[countryCode] = "mon" + for countryCode in tueWeekendStart: + weekendStartByCountryCode[countryCode] = "tue" + for countryCode in wedWeekendStart: + weekendStartByCountryCode[countryCode] = "wed" + for countryCode in thuWeekendStart: + weekendStartByCountryCode[countryCode] = "thu" + for countryCode in friWeekendStart: + weekendStartByCountryCode[countryCode] = "fri" + for countryCode in satWeekendStart: + weekendStartByCountryCode[countryCode] = "sat" + for countryCode in sunWeekendStart: + weekendStartByCountryCode[countryCode] = "sun" + + weekendEndByCountryCode = {} + for countryCode in monWeekendEnd: + weekendEndByCountryCode[countryCode] = "mon" + for countryCode in tueWeekendEnd: + weekendEndByCountryCode[countryCode] = "tue" + for countryCode in wedWeekendEnd: + weekendEndByCountryCode[countryCode] = "wed" + for countryCode in thuWeekendEnd: + weekendEndByCountryCode[countryCode] = "thu" + for countryCode in friWeekendEnd: + weekendEndByCountryCode[countryCode] = "fri" + for countryCode in satWeekendEnd: + weekendEndByCountryCode[countryCode] = "sat" + for countryCode in sunWeekendEnd: + weekendEndByCountryCode[countryCode] = "sun" + + for (key,locale) in locale_database.iteritems(): + countryCode = locale['country_code'] + if countryCode in firstDayByCountryCode: + locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode[countryCode] + else: + locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode["001"] + + if countryCode in weekendStartByCountryCode: + locale_database[key]['weekendStart'] = weekendStartByCountryCode[countryCode] + else: + locale_database[key]['weekendStart'] = weekendStartByCountryCode["001"] + + if countryCode in weekendEndByCountryCode: + locale_database[key]['weekendEnd'] = weekendEndByCountryCode[countryCode] + else: + locale_database[key]['weekendEnd'] = weekendEndByCountryCode["001"] + +if len(sys.argv) != 2: + usage() + +cldr_dir = sys.argv[1] + +if not os.path.isdir(cldr_dir): + usage() + +cldr_files = os.listdir(cldr_dir) + +locale_database = {} +for file in cldr_files: + l = generateLocaleInfo(cldr_dir + "/" + file) + if not l: + sys.stderr.write("skipping file \"" + file + "\"\n") + continue + + locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l + +integrateWeekData(cldr_dir+"/../supplemental/supplementalData.xml") +locale_keys = locale_database.keys() +locale_keys.sort() + +cldr_version = 'unknown' +ldml = open(cldr_dir+"/../dtd/ldml.dtd", "r") +for line in ldml: + if 'version cldrVersion CDATA #FIXED' in line: + cldr_version = line.split('"')[1] + +print "<localeDatabase>" +print " <version>" + cldr_version + "</version>" +print " <languageList>" +for id in enumdata.language_list: + l = enumdata.language_list[id] + print " <language>" + print " <name>" + l[0] + "</name>" + print " <id>" + str(id) + "</id>" + print " <code>" + l[1] + "</code>" + print " </language>" +print " </languageList>" + +print " <scriptList>" +for id in enumdata.script_list: + l = enumdata.script_list[id] + print " <script>" + print " <name>" + l[0] + "</name>" + print " <id>" + str(id) + "</id>" + print " <code>" + l[1] + "</code>" + print " </script>" +print " </scriptList>" + +print " <countryList>" +for id in enumdata.country_list: + l = enumdata.country_list[id] + print " <country>" + print " <name>" + l[0] + "</name>" + print " <id>" + str(id) + "</id>" + print " <code>" + l[1] + "</code>" + print " </country>" +print " </countryList>" + +print \ +" <defaultCountryList>\n\ + <defaultCountry>\n\ + <language>Afrikaans</language>\n\ + <country>SouthAfrica</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Afan</language>\n\ + <country>Ethiopia</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Afar</language>\n\ + <country>Djibouti</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Arabic</language>\n\ + <country>SaudiArabia</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Chinese</language>\n\ + <country>China</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Dutch</language>\n\ + <country>Netherlands</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>English</language>\n\ + <country>UnitedStates</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>French</language>\n\ + <country>France</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>German</language>\n\ + <country>Germany</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Greek</language>\n\ + <country>Greece</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Italian</language>\n\ + <country>Italy</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Malay</language>\n\ + <country>Malaysia</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Portuguese</language>\n\ + <country>Portugal</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Russian</language>\n\ + <country>RussianFederation</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Serbian</language>\n\ + <country>SerbiaAndMontenegro</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>SerboCroatian</language>\n\ + <country>SerbiaAndMontenegro</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Somali</language>\n\ + <country>Somalia</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Spanish</language>\n\ + <country>Spain</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Swahili</language>\n\ + <country>Kenya</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Swedish</language>\n\ + <country>Sweden</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Tigrinya</language>\n\ + <country>Eritrea</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Uzbek</language>\n\ + <country>Uzbekistan</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Persian</language>\n\ + <country>Iran</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Mongolian</language>\n\ + <country>Mongolia</country>\n\ + </defaultCountry>\n\ + <defaultCountry>\n\ + <language>Nepali</language>\n\ + <country>Nepal</country>\n\ + </defaultCountry>\n\ + </defaultCountryList>" + +print " <localeList>" +print \ +" <locale>\n\ + <language>C</language>\n\ + <languageEndonym></languageEndonym>\n\ + <script>AnyScript</script>\n\ + <country>AnyCountry</country>\n\ + <countryEndonym></countryEndonym>\n\ + <decimal>46</decimal>\n\ + <group>44</group>\n\ + <list>59</list>\n\ + <percent>37</percent>\n\ + <zero>48</zero>\n\ + <minus>45</minus>\n\ + <plus>43</plus>\n\ + <exp>101</exp>\n\ + <quotationStart>\"</quotationStart>\n\ + <quotationEnd>\"</quotationEnd>\n\ + <alternateQuotationStart>\'</alternateQuotationStart>\n\ + <alternateQuotationEnd>\'</alternateQuotationEnd>\n\ + <listPatternPartStart>%1, %2</listPatternPartStart>\n\ + <listPatternPartMiddle>%1, %2</listPatternPartMiddle>\n\ + <listPatternPartEnd>%1, %2</listPatternPartEnd>\n\ + <listPatternPartTwo>%1, %2</listPatternPartTwo>\n\ + <am>AM</am>\n\ + <pm>PM</pm>\n\ + <firstDayOfWeek>mon</firstDayOfWeek>\n\ + <weekendStart>sat</weekendStart>\n\ + <weekendEnd>sun</weekendEnd>\n\ + <longDateFormat>EEEE, d MMMM yyyy</longDateFormat>\n\ + <shortDateFormat>d MMM yyyy</shortDateFormat>\n\ + <longTimeFormat>HH:mm:ss z</longTimeFormat>\n\ + <shortTimeFormat>HH:mm:ss</shortTimeFormat>\n\ + <standaloneLongMonths>January;February;March;April;May;June;July;August;September;October;November;December;</standaloneLongMonths>\n\ + <standaloneShortMonths>Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;</standaloneShortMonths>\n\ + <standaloneNarrowMonths>J;F;M;A;M;J;J;A;S;O;N;D;</standaloneNarrowMonths>\n\ + <longMonths>January;February;March;April;May;June;July;August;September;October;November;December;</longMonths>\n\ + <shortMonths>Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;</shortMonths>\n\ + <narrowMonths>1;2;3;4;5;6;7;8;9;10;11;12;</narrowMonths>\n\ + <longDays>Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;</longDays>\n\ + <shortDays>Sun;Mon;Tue;Wed;Thu;Fri;Sat;</shortDays>\n\ + <narrowDays>7;1;2;3;4;5;6;</narrowDays>\n\ + <standaloneLongDays>Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;</standaloneLongDays>\n\ + <standaloneShortDays>Sun;Mon;Tue;Wed;Thu;Fri;Sat;</standaloneShortDays>\n\ + <standaloneNarrowDays>S;M;T;W;T;F;S;</standaloneNarrowDays>\n\ + <currencyIsoCode></currencyIsoCode>\n\ + <currencySymbol></currencySymbol>\n\ + <currencyDisplayName>;;;;;;;</currencyDisplayName>\n\ + <currencyDigits>2</currencyDigits>\n\ + <currencyRounding>1</currencyRounding>\n\ + <currencyFormat>%1%2</currencyFormat>\n\ + <currencyNegativeFormat></currencyNegativeFormat>\n\ + </locale>" + +for key in locale_keys: + l = locale_database[key] + + print " <locale>" + print " <language>" + l['language'] + "</language>" + print " <languageEndonym>" + l['language_endonym'].encode('utf-8') + "</languageEndonym>" + print " <script>" + l['script'] + "</script>" + print " <country>" + l['country'] + "</country>" + print " <countryEndonym>" + l['country_endonym'].encode('utf-8') + "</countryEndonym>" + print " <languagecode>" + l['language_code'] + "</languagecode>" + print " <scriptcode>" + l['script_code'] + "</scriptcode>" + print " <countrycode>" + l['country_code'] + "</countrycode>" + print " <decimal>" + ordStr(l['decimal']) + "</decimal>" + print " <group>" + ordStr(l['group']) + "</group>" + print " <list>" + fixOrdStrList(l['list']) + "</list>" + print " <percent>" + fixOrdStrPercent(l['percent']) + "</percent>" + print " <zero>" + ordStr(l['zero']) + "</zero>" + print " <minus>" + ordStr(l['minus']) + "</minus>" + print " <plus>" + ordStr(l['plus']) + "</plus>" + print " <exp>" + fixOrdStrExp(l['exp']) + "</exp>" + print " <quotationStart>" + l['quotationStart'].encode('utf-8') + "</quotationStart>" + print " <quotationEnd>" + l['quotationEnd'].encode('utf-8') + "</quotationEnd>" + print " <alternateQuotationStart>" + l['alternateQuotationStart'].encode('utf-8') + "</alternateQuotationStart>" + print " <alternateQuotationEnd>" + l['alternateQuotationEnd'].encode('utf-8') + "</alternateQuotationEnd>" + print " <listPatternPartStart>" + l['listPatternPartStart'].encode('utf-8') + "</listPatternPartStart>" + print " <listPatternPartMiddle>" + l['listPatternPartMiddle'].encode('utf-8') + "</listPatternPartMiddle>" + print " <listPatternPartEnd>" + l['listPatternPartEnd'].encode('utf-8') + "</listPatternPartEnd>" + print " <listPatternPartTwo>" + l['listPatternPartTwo'].encode('utf-8') + "</listPatternPartTwo>" + print " <am>" + l['am'].encode('utf-8') + "</am>" + print " <pm>" + l['pm'].encode('utf-8') + "</pm>" + print " <firstDayOfWeek>" + l['firstDayOfWeek'].encode('utf-8') + "</firstDayOfWeek>" + print " <weekendStart>" + l['weekendStart'].encode('utf-8') + "</weekendStart>" + print " <weekendEnd>" + l['weekendEnd'].encode('utf-8') + "</weekendEnd>" + print " <longDateFormat>" + l['longDateFormat'].encode('utf-8') + "</longDateFormat>" + print " <shortDateFormat>" + l['shortDateFormat'].encode('utf-8') + "</shortDateFormat>" + print " <longTimeFormat>" + l['longTimeFormat'].encode('utf-8') + "</longTimeFormat>" + print " <shortTimeFormat>" + l['shortTimeFormat'].encode('utf-8') + "</shortTimeFormat>" + print " <standaloneLongMonths>" + l['standaloneLongMonths'].encode('utf-8') + "</standaloneLongMonths>" + print " <standaloneShortMonths>"+ l['standaloneShortMonths'].encode('utf-8') + "</standaloneShortMonths>" + print " <standaloneNarrowMonths>"+ l['standaloneNarrowMonths'].encode('utf-8') + "</standaloneNarrowMonths>" + print " <longMonths>" + l['longMonths'].encode('utf-8') + "</longMonths>" + print " <shortMonths>" + l['shortMonths'].encode('utf-8') + "</shortMonths>" + print " <narrowMonths>" + l['narrowMonths'].encode('utf-8') + "</narrowMonths>" + print " <longDays>" + l['longDays'].encode('utf-8') + "</longDays>" + print " <shortDays>" + l['shortDays'].encode('utf-8') + "</shortDays>" + print " <narrowDays>" + l['narrowDays'].encode('utf-8') + "</narrowDays>" + print " <standaloneLongDays>" + l['standaloneLongDays'].encode('utf-8') + "</standaloneLongDays>" + print " <standaloneShortDays>" + l['standaloneShortDays'].encode('utf-8') + "</standaloneShortDays>" + print " <standaloneNarrowDays>" + l['standaloneNarrowDays'].encode('utf-8') + "</standaloneNarrowDays>" + print " <currencyIsoCode>" + l['currencyIsoCode'].encode('utf-8') + "</currencyIsoCode>" + print " <currencySymbol>" + l['currencySymbol'].encode('utf-8') + "</currencySymbol>" + print " <currencyDisplayName>" + l['currencyDisplayName'].encode('utf-8') + "</currencyDisplayName>" + print " <currencyDigits>" + str(l['currencyDigits']) + "</currencyDigits>" + print " <currencyRounding>" + str(l['currencyRounding']) + "</currencyRounding>" + print " <currencyFormat>" + l['currencyFormat'].encode('utf-8') + "</currencyFormat>" + print " <currencyNegativeFormat>" + l['currencyNegativeFormat'].encode('utf-8') + "</currencyNegativeFormat>" + print " </locale>" +print " </localeList>" +print "</localeDatabase>" |