summaryrefslogtreecommitdiffstats
path: root/util/local_database/cldr2qlocalexml.py
diff options
context:
space:
mode:
Diffstat (limited to 'util/local_database/cldr2qlocalexml.py')
-rwxr-xr-xutil/local_database/cldr2qlocalexml.py638
1 files changed, 201 insertions, 437 deletions
diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py
index 41cfafab0d..58ea21edab 100755
--- a/util/local_database/cldr2qlocalexml.py
+++ b/util/local_database/cldr2qlocalexml.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
#############################################################################
##
-## Copyright (C) 2016 The Qt Company Ltd.
+## Copyright (C) 2017 The Qt Company Ltd.
## Contact: https://www.qt.io/licensing/
##
## This file is part of the test suite of the Qt Toolkit.
@@ -26,20 +26,31 @@
## $QT_END_LICENSE$
##
#############################################################################
+"""Convert CLDR data to qLocaleXML
+
+The CLDR data can be downloaded from CLDR_, which has a sub-directory
+for each version; you need the ``core.zip`` file for your version of
+choice (typically the latest). This script has had updates to cope up
+to v29; for later versions, we may need adaptations. Unpack the
+downloaded ``core.zip`` and check it has a common/main/ sub-directory:
+pass the path of that sub-directory to this script as its single
+command-line argument. Save its standard output (but not error) to a
+file for later processing by ``./qlocalexml2cpp.py``
+
+.. _CLDR: ftp://unicode.org/Public/cldr/
+"""
import os
import sys
+import re
+
import enumdata
import xpathlite
-from xpathlite import DraftResolution
+from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile
from dateconverter import convert_date
-from xml.sax.saxutils import escape, unescape
-import re
+from localexml import Locale
-findAlias = xpathlite.findAlias
-findEntry = xpathlite.findEntry
findEntryInFile = xpathlite._findEntryInFile
-findTagsInFile = xpathlite.findTagsInFile
def parse_number_format(patterns, data):
# this is a very limited parsing of the number format for currency only.
@@ -72,42 +83,49 @@ def parse_number_format(patterns, data):
return result
def parse_list_pattern_part_format(pattern):
- # this is a very limited parsing of the format for list pattern part only.
- result = ""
- result = pattern.replace("{0}", "%1")
- result = result.replace("{1}", "%2")
- result = result.replace("{2}", "%3")
- return result
-
-def ordStr(c):
- if len(c) == 1:
- return str(ord(c))
- raise xpathlite.Error("Unable to handle value \"%s\"" % addEscapes(c))
- return "##########"
-
-# the following functions are supposed to fix the problem with QLocale
-# returning a character instead of strings for QLocale::exponential()
-# and others. So we fallback to default values in these cases.
-def fixOrdStrMinus(c):
- if len(c) == 1:
- return str(ord(c))
- return str(ord('-'))
-def fixOrdStrPlus(c):
- if len(c) == 1:
- return str(ord(c))
- return str(ord('+'))
-def fixOrdStrExp(c):
- if len(c) == 1:
- return str(ord(c))
- return str(ord('e'))
-def fixOrdStrPercent(c):
- if len(c) == 1:
- return str(ord(c))
- return str(ord('%'))
-def fixOrdStrList(c):
- if len(c) == 1:
- return str(ord(c))
- return str(ord(';'))
+ # This is a very limited parsing of the format for list pattern part only.
+ return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3")
+
+def unit_quantifiers(find, path, stem, suffix, known,
+ # Stop at exa/exbi: 16 exbi = 2^{64} < zetta =
+ # 1000^7 < zebi = 2^{70}, the next quantifiers up:
+ si_quantifiers = ('kilo', 'mega', 'giga', 'tera', 'peta', 'exa')):
+ """Work out the unit quantifiers.
+
+ Unfortunately, the CLDR data only go up to terabytes and we want
+ all the way to exabytes; but we can recognize the SI quantifiers
+ as prefixes, strip and identify the tail as the localized
+ translation for 'B' (e.g. French has 'octet' for 'byte' and uses
+ ko, Mo, Go, To from which we can extrapolate Po, Eo).
+
+ Should be called first for the SI quantifiers, with suffix = 'B',
+ then for the IEC ones, with suffix = 'iB'; the list known
+ (initially empty before first call) is used to let the second call
+ know what the first learned about the localized unit.
+ """
+ if suffix == 'B': # first call, known = []
+ tail = suffix
+ for q in si_quantifiers:
+ it = find(path, stem % q)
+ # kB for kilobyte, in contrast with KiB for IEC:
+ q = q[0] if q == 'kilo' else q[0].upper()
+ if not it:
+ it = q + tail
+ elif it.startswith(q):
+ rest = it[1:]
+ tail = rest if all(rest == k for k in known) else suffix
+ known.append(rest)
+ yield it
+ else: # second call, re-using first's known
+ assert suffix == 'iB'
+ if known:
+ byte = known.pop()
+ if all(byte == k for k in known):
+ suffix = 'i' + byte
+ for q in si_quantifiers:
+ yield find(path, stem % q[:2],
+ # Those don't (yet, v31) exist in CLDR, so we always fall back to:
+ q[0].upper() + suffix)
def generateLocaleInfo(path):
if not path.endswith(".xml"):
@@ -116,14 +134,13 @@ def generateLocaleInfo(path):
# skip legacy/compatibility ones
alias = findAlias(path)
if alias:
- raise xpathlite.Error("alias to \"%s\"" % alias)
+ raise xpathlite.Error('alias to "%s"' % alias)
- language_code = findEntryInFile(path, "identity/language", attribute="type")[0]
- country_code = findEntryInFile(path, "identity/territory", attribute="type")[0]
- script_code = findEntryInFile(path, "identity/script", attribute="type")[0]
- variant_code = findEntryInFile(path, "identity/variant", attribute="type")[0]
+ def code(tag):
+ return findEntryInFile(path, 'identity/' + tag, attribute="type")[0]
- return _generateLocaleInfo(path, language_code, script_code, country_code, variant_code)
+ return _generateLocaleInfo(path, code('language'), code('script'),
+ code('territory'), code('variant'))
def _generateLocaleInfo(path, language_code, script_code, country_code, variant_code=""):
if not path.endswith(".xml"):
@@ -137,25 +154,22 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
# ### actually there is only one locale with variant: en_US_POSIX
# does anybody care about it at all?
if variant_code:
- raise xpathlite.Error("we do not support variants (\"%s\")" % variant_code)
+ raise xpathlite.Error('we do not support variants ("%s")' % variant_code)
language_id = enumdata.languageCodeToId(language_code)
if language_id <= 0:
- raise xpathlite.Error("unknown language code \"%s\"" % language_code)
- language = enumdata.language_list[language_id][0]
+ raise xpathlite.Error('unknown language code "%s"' % language_code)
script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1:
- raise xpathlite.Error("unknown script code \"%s\"" % script_code)
- script = enumdata.script_list[script_id][0]
+ raise xpathlite.Error('unknown script code "%s"' % script_code)
# we should handle fully qualified names with the territory
if not country_code:
return {}
country_id = enumdata.countryCodeToId(country_code)
if country_id <= 0:
- raise xpathlite.Error("unknown country code \"%s\"" % country_code)
- country = enumdata.country_list[country_id][0]
+ raise xpathlite.Error('unknown country code "%s"' % country_code)
# So we say we accept only those values that have "contributed" or
# "approved" resolution. see http://www.unicode.org/cldr/process.html
@@ -163,39 +177,39 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
# compatibility.
draft = DraftResolution.contributed
- result = {}
- result['language'] = language
- result['script'] = script
- result['country'] = country
- result['language_code'] = language_code
- result['country_code'] = country_code
- result['script_code'] = script_code
- result['variant_code'] = variant_code
- result['language_id'] = language_id
- result['script_id'] = script_id
- result['country_id'] = country_id
+ result = dict(
+ language=enumdata.language_list[language_id][0],
+ language_code=language_code, language_id=language_id,
+ script=enumdata.script_list[script_id][0],
+ script_code=script_code, script_id=script_id,
+ country=enumdata.country_list[country_id][0],
+ country_code=country_code, country_id=country_id,
+ variant_code=variant_code)
(dir_name, file_name) = os.path.split(path)
- supplementalPath = dir_name + "/../supplemental/supplementalData.xml"
- currencies = findTagsInFile(supplementalPath, "currencyData/region[iso3166=%s]"%country_code);
+ def from_supplement(tag,
+ path=os.path.join(dir_name, '..', 'supplemental',
+ 'supplementalData.xml')):
+ return findTagsInFile(path, tag)
+ currencies = from_supplement('currencyData/region[iso3166=%s]' % country_code)
result['currencyIsoCode'] = ''
result['currencyDigits'] = 2
result['currencyRounding'] = 1
if currencies:
for e in currencies:
if e[0] == 'currency':
- tender = True
- t = filter(lambda x: x[0] == 'tender', e[1])
- if t and t[0][1] == 'false':
- tender = False;
- if tender and not filter(lambda x: x[0] == 'to', e[1]):
- result['currencyIsoCode'] = filter(lambda x: x[0] == 'iso4217', e[1])[0][1]
+ t = [x[1] == 'false' for x in e[1] if x[0] == 'tender']
+ if t and t[0]:
+ pass
+ elif not any(x[0] == 'to' for x in e[1]):
+ result['currencyIsoCode'] = (x[1] for x in e[1] if x[0] == 'iso4217').next()
break
if result['currencyIsoCode']:
- t = findTagsInFile(supplementalPath, "currencyData/fractions/info[iso4217=%s]"%result['currencyIsoCode']);
+ t = from_supplement("currencyData/fractions/info[iso4217=%s]"
+ % result['currencyIsoCode'])
if t and t[0][0] == 'info':
- result['currencyDigits'] = int(filter(lambda x: x[0] == 'digits', t[0][1])[0][1])
- result['currencyRounding'] = int(filter(lambda x: x[0] == 'rounding', t[0][1])[0][1])
+ result['currencyDigits'] = (int(x[1]) for x in t[0][1] if x[0] == 'digits').next()
+ result['currencyRounding'] = (int(x[1]) for x in t[0][1] if x[0] == 'rounding').next()
numbering_system = None
try:
numbering_system = findEntry(path, "numbers/defaultNumberingSystem")
@@ -226,7 +240,9 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
result['percent'] = get_number_in_system(path, "numbers/symbols/percentSign", numbering_system)
try:
numbering_systems = {}
- for ns in findTagsInFile(cldr_dir + "/../supplemental/numberingSystems.xml", "numberingSystems"):
+ for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental',
+ 'numberingSystems.xml'),
+ 'numberingSystems'):
tmp = {}
id = ""
for data in ns[1:][0]: # ns looks like this: [u'numberingSystem', [(u'digits', u'0123456789'), (u'type', u'numeric'), (u'id', u'latn')]]
@@ -279,167 +295,70 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
result['currencyDisplayName'] = ''
if result['currencyIsoCode']:
result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode'])
- display_name_path = "numbers/currencies/currency[%s]/displayName" % result['currencyIsoCode']
- result['currencyDisplayName'] \
- = findEntryDef(path, display_name_path) + ";" \
- + findEntryDef(path, display_name_path + "[count=zero]") + ";" \
- + findEntryDef(path, display_name_path + "[count=one]") + ";" \
- + findEntryDef(path, display_name_path + "[count=two]") + ";" \
- + findEntryDef(path, display_name_path + "[count=few]") + ";" \
- + findEntryDef(path, display_name_path + "[count=many]") + ";" \
- + findEntryDef(path, display_name_path + "[count=other]") + ";"
-
- standalone_long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[wide]/month"
- result['standaloneLongMonths'] \
- = findEntry(path, standalone_long_month_path + "[1]") + ";" \
- + findEntry(path, standalone_long_month_path + "[2]") + ";" \
- + findEntry(path, standalone_long_month_path + "[3]") + ";" \
- + findEntry(path, standalone_long_month_path + "[4]") + ";" \
- + findEntry(path, standalone_long_month_path + "[5]") + ";" \
- + findEntry(path, standalone_long_month_path + "[6]") + ";" \
- + findEntry(path, standalone_long_month_path + "[7]") + ";" \
- + findEntry(path, standalone_long_month_path + "[8]") + ";" \
- + findEntry(path, standalone_long_month_path + "[9]") + ";" \
- + findEntry(path, standalone_long_month_path + "[10]") + ";" \
- + findEntry(path, standalone_long_month_path + "[11]") + ";" \
- + findEntry(path, standalone_long_month_path + "[12]") + ";"
-
- standalone_short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[abbreviated]/month"
- result['standaloneShortMonths'] \
- = findEntry(path, standalone_short_month_path + "[1]") + ";" \
- + findEntry(path, standalone_short_month_path + "[2]") + ";" \
- + findEntry(path, standalone_short_month_path + "[3]") + ";" \
- + findEntry(path, standalone_short_month_path + "[4]") + ";" \
- + findEntry(path, standalone_short_month_path + "[5]") + ";" \
- + findEntry(path, standalone_short_month_path + "[6]") + ";" \
- + findEntry(path, standalone_short_month_path + "[7]") + ";" \
- + findEntry(path, standalone_short_month_path + "[8]") + ";" \
- + findEntry(path, standalone_short_month_path + "[9]") + ";" \
- + findEntry(path, standalone_short_month_path + "[10]") + ";" \
- + findEntry(path, standalone_short_month_path + "[11]") + ";" \
- + findEntry(path, standalone_short_month_path + "[12]") + ";"
-
- standalone_narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[narrow]/month"
- result['standaloneNarrowMonths'] \
- = findEntry(path, standalone_narrow_month_path + "[1]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[2]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[3]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[4]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[5]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[6]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[7]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[8]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[9]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[10]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[11]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[12]") + ";"
-
- long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[wide]/month"
- result['longMonths'] \
- = findEntry(path, long_month_path + "[1]") + ";" \
- + findEntry(path, long_month_path + "[2]") + ";" \
- + findEntry(path, long_month_path + "[3]") + ";" \
- + findEntry(path, long_month_path + "[4]") + ";" \
- + findEntry(path, long_month_path + "[5]") + ";" \
- + findEntry(path, long_month_path + "[6]") + ";" \
- + findEntry(path, long_month_path + "[7]") + ";" \
- + findEntry(path, long_month_path + "[8]") + ";" \
- + findEntry(path, long_month_path + "[9]") + ";" \
- + findEntry(path, long_month_path + "[10]") + ";" \
- + findEntry(path, long_month_path + "[11]") + ";" \
- + findEntry(path, long_month_path + "[12]") + ";"
-
- short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[abbreviated]/month"
- result['shortMonths'] \
- = findEntry(path, short_month_path + "[1]") + ";" \
- + findEntry(path, short_month_path + "[2]") + ";" \
- + findEntry(path, short_month_path + "[3]") + ";" \
- + findEntry(path, short_month_path + "[4]") + ";" \
- + findEntry(path, short_month_path + "[5]") + ";" \
- + findEntry(path, short_month_path + "[6]") + ";" \
- + findEntry(path, short_month_path + "[7]") + ";" \
- + findEntry(path, short_month_path + "[8]") + ";" \
- + findEntry(path, short_month_path + "[9]") + ";" \
- + findEntry(path, short_month_path + "[10]") + ";" \
- + findEntry(path, short_month_path + "[11]") + ";" \
- + findEntry(path, short_month_path + "[12]") + ";"
-
- narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[narrow]/month"
- result['narrowMonths'] \
- = findEntry(path, narrow_month_path + "[1]") + ";" \
- + findEntry(path, narrow_month_path + "[2]") + ";" \
- + findEntry(path, narrow_month_path + "[3]") + ";" \
- + findEntry(path, narrow_month_path + "[4]") + ";" \
- + findEntry(path, narrow_month_path + "[5]") + ";" \
- + findEntry(path, narrow_month_path + "[6]") + ";" \
- + findEntry(path, narrow_month_path + "[7]") + ";" \
- + findEntry(path, narrow_month_path + "[8]") + ";" \
- + findEntry(path, narrow_month_path + "[9]") + ";" \
- + findEntry(path, narrow_month_path + "[10]") + ";" \
- + findEntry(path, narrow_month_path + "[11]") + ";" \
- + findEntry(path, narrow_month_path + "[12]") + ";"
-
- long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[wide]/day"
- result['longDays'] \
- = findEntry(path, long_day_path + "[sun]") + ";" \
- + findEntry(path, long_day_path + "[mon]") + ";" \
- + findEntry(path, long_day_path + "[tue]") + ";" \
- + findEntry(path, long_day_path + "[wed]") + ";" \
- + findEntry(path, long_day_path + "[thu]") + ";" \
- + findEntry(path, long_day_path + "[fri]") + ";" \
- + findEntry(path, long_day_path + "[sat]") + ";"
-
- short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[abbreviated]/day"
- result['shortDays'] \
- = findEntry(path, short_day_path + "[sun]") + ";" \
- + findEntry(path, short_day_path + "[mon]") + ";" \
- + findEntry(path, short_day_path + "[tue]") + ";" \
- + findEntry(path, short_day_path + "[wed]") + ";" \
- + findEntry(path, short_day_path + "[thu]") + ";" \
- + findEntry(path, short_day_path + "[fri]") + ";" \
- + findEntry(path, short_day_path + "[sat]") + ";"
-
- narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[narrow]/day"
- result['narrowDays'] \
- = findEntry(path, narrow_day_path + "[sun]") + ";" \
- + findEntry(path, narrow_day_path + "[mon]") + ";" \
- + findEntry(path, narrow_day_path + "[tue]") + ";" \
- + findEntry(path, narrow_day_path + "[wed]") + ";" \
- + findEntry(path, narrow_day_path + "[thu]") + ";" \
- + findEntry(path, narrow_day_path + "[fri]") + ";" \
- + findEntry(path, narrow_day_path + "[sat]") + ";"
-
- standalone_long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[wide]/day"
- result['standaloneLongDays'] \
- = findEntry(path, standalone_long_day_path + "[sun]") + ";" \
- + findEntry(path, standalone_long_day_path + "[mon]") + ";" \
- + findEntry(path, standalone_long_day_path + "[tue]") + ";" \
- + findEntry(path, standalone_long_day_path + "[wed]") + ";" \
- + findEntry(path, standalone_long_day_path + "[thu]") + ";" \
- + findEntry(path, standalone_long_day_path + "[fri]") + ";" \
- + findEntry(path, standalone_long_day_path + "[sat]") + ";"
-
- standalone_short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[abbreviated]/day"
- result['standaloneShortDays'] \
- = findEntry(path, standalone_short_day_path + "[sun]") + ";" \
- + findEntry(path, standalone_short_day_path + "[mon]") + ";" \
- + findEntry(path, standalone_short_day_path + "[tue]") + ";" \
- + findEntry(path, standalone_short_day_path + "[wed]") + ";" \
- + findEntry(path, standalone_short_day_path + "[thu]") + ";" \
- + findEntry(path, standalone_short_day_path + "[fri]") + ";" \
- + findEntry(path, standalone_short_day_path + "[sat]") + ";"
-
- standalone_narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[narrow]/day"
- result['standaloneNarrowDays'] \
- = findEntry(path, standalone_narrow_day_path + "[sun]") + ";" \
- + findEntry(path, standalone_narrow_day_path + "[mon]") + ";" \
- + findEntry(path, standalone_narrow_day_path + "[tue]") + ";" \
- + findEntry(path, standalone_narrow_day_path + "[wed]") + ";" \
- + findEntry(path, standalone_narrow_day_path + "[thu]") + ";" \
- + findEntry(path, standalone_narrow_day_path + "[fri]") + ";" \
- + findEntry(path, standalone_narrow_day_path + "[sat]") + ";"
-
- return result
+ result['currencyDisplayName'] = ';'.join(
+ findEntryDef(path, 'numbers/currencies/currency[' + result['currencyIsoCode']
+ + ']/displayName' + tail)
+ for tail in ['',] + [
+ '[count=%s]' % x for x in ('zero', 'one', 'two', 'few', 'many', 'other')
+ ]) + ';'
+
+ def findUnitDef(path, stem, fallback=''):
+ # The displayName for a quantified unit in en.xml is kByte
+ # instead of kB (etc.), so prefer any unitPattern provided:
+ for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
+ try:
+ ans = findEntry(path, stem + 'unitPattern[count=%s]' % count)
+ except xpathlite.Error:
+ continue
+
+ # TODO: epxloit count-handling, instead of discarding placeholders
+ if ans.startswith('{0}'):
+ ans = ans[3:].lstrip()
+ if ans:
+ return ans
+
+ return findEntryDef(path, stem + 'displayName', fallback)
+
+ # First without quantifier, then quantified each way:
+ result['byte_unit'] = findEntryDef(
+ path, 'units/unitLength[type=long]/unit[type=digital-byte]/displayName',
+ 'bytes')
+ stem = 'units/unitLength[type=short]/unit[type=digital-%sbyte]/'
+ known = [] # cases where we *do* have a given version:
+ result['byte_si_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem, 'B', known))
+ # IEC 60027-2
+ # http://physics.nist.gov/cuu/Units/binary.html
+ result['byte_iec_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem % '%sbi', 'iB', known))
+
+ # Used for month and day data:
+ namings = (
+ ('standaloneLong', 'stand-alone', 'wide'),
+ ('standaloneShort', 'stand-alone', 'abbreviated'),
+ ('standaloneNarrow', 'stand-alone', 'narrow'),
+ ('long', 'format', 'wide'),
+ ('short', 'format', 'abbreviated'),
+ ('narrow', 'format', 'narrow'),
+ )
+
+ # Month data:
+ for cal in ('gregorian',): # We shall want to add to this
+ stem = 'dates/calendars/calendar[' + cal + ']/months/'
+ for (key, mode, size) in namings:
+ prop = 'monthContext[' + mode + ']/monthWidth[' + size + ']/'
+ result[key + 'Months'] = ';'.join(
+ findEntry(path, stem + prop + "month[%d]" % i)
+ for i in range(1, 13)) + ';'
+
+ # Day data (for Gregorian, at least):
+ stem = 'dates/calendars/calendar[gregorian]/days/'
+ days = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat')
+ for (key, mode, size) in namings:
+ prop = 'dayContext[' + mode + ']/dayWidth[' + size + ']/day'
+ result[key + 'Days'] = ';'.join(
+ findEntry(path, stem + prop + '[' + day + ']')
+ for day in days) + ';'
+
+ return Locale(result)
def addEscapes(s):
result = ''
@@ -463,94 +382,42 @@ def usage():
def integrateWeekData(filePath):
if not filePath.endswith(".xml"):
return {}
- monFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=mon]", attribute="territories")[0].split(" ")
- tueFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=tue]", attribute="territories")[0].split(" ")
- wedFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=wed]", attribute="territories")[0].split(" ")
- thuFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=thu]", attribute="territories")[0].split(" ")
- friFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=fri]", attribute="territories")[0].split(" ")
- satFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sat]", attribute="territories")[0].split(" ")
- sunFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sun]", attribute="territories")[0].split(" ")
-
- monWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=mon]", attribute="territories")[0].split(" ")
- tueWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=tue]", attribute="territories")[0].split(" ")
- wedWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=wed]", attribute="territories")[0].split(" ")
- thuWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=thu]", attribute="territories")[0].split(" ")
- friWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=fri]", attribute="territories")[0].split(" ")
- satWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sat]", attribute="territories")[0].split(" ")
- sunWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sun]", attribute="territories")[0].split(" ")
-
- monWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=mon]", attribute="territories")[0].split(" ")
- tueWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=tue]", attribute="territories")[0].split(" ")
- wedWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=wed]", attribute="territories")[0].split(" ")
- thuWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=thu]", attribute="territories")[0].split(" ")
- friWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=fri]", attribute="territories")[0].split(" ")
- satWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sat]", attribute="territories")[0].split(" ")
- sunWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sun]", attribute="territories")[0].split(" ")
+
+ def lookup(key):
+ return findEntryInFile(filePath, key, attribute='territories')[0].split()
+ days = ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun')
firstDayByCountryCode = {}
- for countryCode in monFirstDayIn:
- firstDayByCountryCode[countryCode] = "mon"
- for countryCode in tueFirstDayIn:
- firstDayByCountryCode[countryCode] = "tue"
- for countryCode in wedFirstDayIn:
- firstDayByCountryCode[countryCode] = "wed"
- for countryCode in thuFirstDayIn:
- firstDayByCountryCode[countryCode] = "thu"
- for countryCode in friFirstDayIn:
- firstDayByCountryCode[countryCode] = "fri"
- for countryCode in satFirstDayIn:
- firstDayByCountryCode[countryCode] = "sat"
- for countryCode in sunFirstDayIn:
- firstDayByCountryCode[countryCode] = "sun"
+ for day in days:
+ for countryCode in lookup('weekData/firstDay[day=%s]' % day):
+ firstDayByCountryCode[countryCode] = day
weekendStartByCountryCode = {}
- for countryCode in monWeekendStart:
- weekendStartByCountryCode[countryCode] = "mon"
- for countryCode in tueWeekendStart:
- weekendStartByCountryCode[countryCode] = "tue"
- for countryCode in wedWeekendStart:
- weekendStartByCountryCode[countryCode] = "wed"
- for countryCode in thuWeekendStart:
- weekendStartByCountryCode[countryCode] = "thu"
- for countryCode in friWeekendStart:
- weekendStartByCountryCode[countryCode] = "fri"
- for countryCode in satWeekendStart:
- weekendStartByCountryCode[countryCode] = "sat"
- for countryCode in sunWeekendStart:
- weekendStartByCountryCode[countryCode] = "sun"
+ for day in days:
+ for countryCode in lookup('weekData/weekendStart[day=%s]' % day):
+ weekendStartByCountryCode[countryCode] = day
weekendEndByCountryCode = {}
- for countryCode in monWeekendEnd:
- weekendEndByCountryCode[countryCode] = "mon"
- for countryCode in tueWeekendEnd:
- weekendEndByCountryCode[countryCode] = "tue"
- for countryCode in wedWeekendEnd:
- weekendEndByCountryCode[countryCode] = "wed"
- for countryCode in thuWeekendEnd:
- weekendEndByCountryCode[countryCode] = "thu"
- for countryCode in friWeekendEnd:
- weekendEndByCountryCode[countryCode] = "fri"
- for countryCode in satWeekendEnd:
- weekendEndByCountryCode[countryCode] = "sat"
- for countryCode in sunWeekendEnd:
- weekendEndByCountryCode[countryCode] = "sun"
-
- for (key,locale) in locale_database.iteritems():
- countryCode = locale['country_code']
+ for day in days:
+ for countryCode in lookup('weekData/weekendEnd[day=%s]' % day):
+ weekendEndByCountryCode[countryCode] = day
+
+ for (key, locale) in locale_database.iteritems():
+ countryCode = locale.country_code
if countryCode in firstDayByCountryCode:
- locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode[countryCode]
+ locale.firstDayOfWeek = firstDayByCountryCode[countryCode]
else:
- locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode["001"]
+ locale.firstDayOfWeek = firstDayByCountryCode["001"]
if countryCode in weekendStartByCountryCode:
- locale_database[key]['weekendStart'] = weekendStartByCountryCode[countryCode]
+ locale.weekendStart = weekendStartByCountryCode[countryCode]
else:
- locale_database[key]['weekendStart'] = weekendStartByCountryCode["001"]
+ locale.weekendStart = weekendStartByCountryCode["001"]
if countryCode in weekendEndByCountryCode:
- locale_database[key]['weekendEnd'] = weekendEndByCountryCode[countryCode]
+ locale.weekendEnd = weekendEndByCountryCode[countryCode]
else:
- locale_database[key]['weekendEnd'] = weekendEndByCountryCode["001"]
+ locale.weekendEnd = weekendEndByCountryCode["001"]
if len(sys.argv) != 2:
usage()
@@ -566,7 +433,9 @@ locale_database = {}
# see http://www.unicode.org/reports/tr35/tr35-info.html#Default_Content
defaultContent_locales = {}
-for ns in findTagsInFile(cldr_dir + "/../supplemental/supplementalMetadata.xml", "metadata/defaultContent"):
+for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental',
+ 'supplementalMetadata.xml'),
+ 'metadata/defaultContent'):
for data in ns[1:][0]:
if data[0] == u"locales":
defaultContent_locales = data[1].split()
@@ -579,36 +448,36 @@ for file in defaultContent_locales:
country_code = items[2]
else:
if len(items) != 2:
- sys.stderr.write("skipping defaultContent locale \"" + file + "\"\n")
+ sys.stderr.write('skipping defaultContent locale "' + file + '" [neither lang_script_country nor lang_country]\n')
continue
language_code = items[0]
script_code = ""
country_code = items[1]
if len(country_code) == 4:
- sys.stderr.write("skipping defaultContent locale \"" + file + "\"\n")
+ sys.stderr.write('skipping defaultContent locale "' + file + '" [long country code]\n')
continue
try:
l = _generateLocaleInfo(cldr_dir + "/" + file + ".xml", language_code, script_code, country_code)
if not l:
- sys.stderr.write("skipping defaultContent locale \"" + file + "\"\n")
+ sys.stderr.write('skipping defaultContent locale "' + file + '" [no locale info generated]\n')
continue
except xpathlite.Error as e:
- sys.stderr.write("skipping defaultContent locale \"%s\" (%s)\n" % (file, str(e)))
+ sys.stderr.write('skipping defaultContent locale "%s" (%s)\n' % (file, str(e)))
continue
- locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l
+ locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
for file in cldr_files:
try:
l = generateLocaleInfo(cldr_dir + "/" + file)
if not l:
- sys.stderr.write("skipping file \"" + file + "\"\n")
+ sys.stderr.write('skipping file "' + file + '" [no locale info generated]\n')
continue
except xpathlite.Error as e:
- sys.stderr.write("skipping file \"%s\" (%s)\n" % (file, str(e)))
+ sys.stderr.write('skipping file "%s" (%s)\n' % (file, str(e)))
continue
- locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l
+ locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
integrateWeekData(cldr_dir+"/../supplemental/supplementalData.xml")
locale_keys = locale_database.keys()
@@ -665,7 +534,7 @@ def _parseLocale(l):
if language_code != "und":
language_id = enumdata.languageCodeToId(language_code)
if language_id == -1:
- raise xpathlite.Error("unknown language code \"%s\"" % language_code)
+ raise xpathlite.Error('unknown language code "%s"' % language_code)
language = enumdata.language_list[language_id][0]
if len(items) > 1:
@@ -676,14 +545,14 @@ def _parseLocale(l):
if len(script_code) == 4:
script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1:
- raise xpathlite.Error("unknown script code \"%s\"" % script_code)
+ raise xpathlite.Error('unknown script code "%s"' % script_code)
script = enumdata.script_list[script_id][0]
else:
country_code = script_code
if country_code:
country_id = enumdata.countryCodeToId(country_code)
if country_id == -1:
- raise xpathlite.Error("unknown country code \"%s\"" % country_code)
+ raise xpathlite.Error('unknown country code "%s"' % country_code)
country = enumdata.country_list[country_id][0]
return (language, script, country)
@@ -697,12 +566,12 @@ for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likel
try:
(from_language, from_script, from_country) = _parseLocale(tmp[u"from"])
except xpathlite.Error as e:
- sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e)))
+ sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e)))
continue
try:
(to_language, to_script, to_country) = _parseLocale(tmp[u"to"])
except xpathlite.Error as e:
- sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e)))
+ sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e)))
continue
# substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
if to_country == "AnyCountry" and from_country != to_country:
@@ -725,115 +594,10 @@ for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likel
print " </likelySubtags>"
print " <localeList>"
-print \
-" <locale>\n\
- <language>C</language>\n\
- <languageEndonym></languageEndonym>\n\
- <script>AnyScript</script>\n\
- <country>AnyCountry</country>\n\
- <countryEndonym></countryEndonym>\n\
- <decimal>46</decimal>\n\
- <group>44</group>\n\
- <list>59</list>\n\
- <percent>37</percent>\n\
- <zero>48</zero>\n\
- <minus>45</minus>\n\
- <plus>43</plus>\n\
- <exp>101</exp>\n\
- <quotationStart>\"</quotationStart>\n\
- <quotationEnd>\"</quotationEnd>\n\
- <alternateQuotationStart>\'</alternateQuotationStart>\n\
- <alternateQuotationEnd>\'</alternateQuotationEnd>\n\
- <listPatternPartStart>%1, %2</listPatternPartStart>\n\
- <listPatternPartMiddle>%1, %2</listPatternPartMiddle>\n\
- <listPatternPartEnd>%1, %2</listPatternPartEnd>\n\
- <listPatternPartTwo>%1, %2</listPatternPartTwo>\n\
- <am>AM</am>\n\
- <pm>PM</pm>\n\
- <firstDayOfWeek>mon</firstDayOfWeek>\n\
- <weekendStart>sat</weekendStart>\n\
- <weekendEnd>sun</weekendEnd>\n\
- <longDateFormat>EEEE, d MMMM yyyy</longDateFormat>\n\
- <shortDateFormat>d MMM yyyy</shortDateFormat>\n\
- <longTimeFormat>HH:mm:ss z</longTimeFormat>\n\
- <shortTimeFormat>HH:mm:ss</shortTimeFormat>\n\
- <standaloneLongMonths>January;February;March;April;May;June;July;August;September;October;November;December;</standaloneLongMonths>\n\
- <standaloneShortMonths>Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;</standaloneShortMonths>\n\
- <standaloneNarrowMonths>J;F;M;A;M;J;J;A;S;O;N;D;</standaloneNarrowMonths>\n\
- <longMonths>January;February;March;April;May;June;July;August;September;October;November;December;</longMonths>\n\
- <shortMonths>Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;</shortMonths>\n\
- <narrowMonths>1;2;3;4;5;6;7;8;9;10;11;12;</narrowMonths>\n\
- <longDays>Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;</longDays>\n\
- <shortDays>Sun;Mon;Tue;Wed;Thu;Fri;Sat;</shortDays>\n\
- <narrowDays>7;1;2;3;4;5;6;</narrowDays>\n\
- <standaloneLongDays>Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;</standaloneLongDays>\n\
- <standaloneShortDays>Sun;Mon;Tue;Wed;Thu;Fri;Sat;</standaloneShortDays>\n\
- <standaloneNarrowDays>S;M;T;W;T;F;S;</standaloneNarrowDays>\n\
- <currencyIsoCode></currencyIsoCode>\n\
- <currencySymbol></currencySymbol>\n\
- <currencyDisplayName>;;;;;;;</currencyDisplayName>\n\
- <currencyDigits>2</currencyDigits>\n\
- <currencyRounding>1</currencyRounding>\n\
- <currencyFormat>%1%2</currencyFormat>\n\
- <currencyNegativeFormat></currencyNegativeFormat>\n\
- </locale>"
+Locale.C().toXml()
for key in locale_keys:
- l = locale_database[key]
-
- print " <locale>"
- print " <language>" + l['language'] + "</language>"
- print " <languageEndonym>" + escape(l['language_endonym']).encode('utf-8') + "</languageEndonym>"
- print " <script>" + l['script'] + "</script>"
- print " <country>" + l['country'] + "</country>"
- print " <countryEndonym>" + escape(l['country_endonym']).encode('utf-8') + "</countryEndonym>"
- print " <languagecode>" + l['language_code'] + "</languagecode>"
- print " <scriptcode>" + l['script_code'] + "</scriptcode>"
- print " <countrycode>" + l['country_code'] + "</countrycode>"
- print " <decimal>" + ordStr(l['decimal']) + "</decimal>"
- print " <group>" + ordStr(l['group']) + "</group>"
- print " <list>" + fixOrdStrList(l['list']) + "</list>"
- print " <percent>" + fixOrdStrPercent(l['percent']) + "</percent>"
- print " <zero>" + ordStr(l['zero']) + "</zero>"
- print " <minus>" + fixOrdStrMinus(l['minus']) + "</minus>"
- print " <plus>" + fixOrdStrPlus(l['plus']) + "</plus>"
- print " <exp>" + fixOrdStrExp(l['exp']) + "</exp>"
- print " <quotationStart>" + escape(l['quotationStart']).encode('utf-8') + "</quotationStart>"
- print " <quotationEnd>" + escape(l['quotationEnd']).encode('utf-8') + "</quotationEnd>"
- print " <alternateQuotationStart>" + escape(l['alternateQuotationStart']).encode('utf-8') + "</alternateQuotationStart>"
- print " <alternateQuotationEnd>" + escape(l['alternateQuotationEnd']).encode('utf-8') + "</alternateQuotationEnd>"
- print " <listPatternPartStart>" + escape(l['listPatternPartStart']).encode('utf-8') + "</listPatternPartStart>"
- print " <listPatternPartMiddle>" + escape(l['listPatternPartMiddle']).encode('utf-8') + "</listPatternPartMiddle>"
- print " <listPatternPartEnd>" + escape(l['listPatternPartEnd']).encode('utf-8') + "</listPatternPartEnd>"
- print " <listPatternPartTwo>" + escape(l['listPatternPartTwo']).encode('utf-8') + "</listPatternPartTwo>"
- print " <am>" + escape(l['am']).encode('utf-8') + "</am>"
- print " <pm>" + escape(l['pm']).encode('utf-8') + "</pm>"
- print " <firstDayOfWeek>" + escape(l['firstDayOfWeek']).encode('utf-8') + "</firstDayOfWeek>"
- print " <weekendStart>" + escape(l['weekendStart']).encode('utf-8') + "</weekendStart>"
- print " <weekendEnd>" + escape(l['weekendEnd']).encode('utf-8') + "</weekendEnd>"
- print " <longDateFormat>" + escape(l['longDateFormat']).encode('utf-8') + "</longDateFormat>"
- print " <shortDateFormat>" + escape(l['shortDateFormat']).encode('utf-8') + "</shortDateFormat>"
- print " <longTimeFormat>" + escape(l['longTimeFormat']).encode('utf-8') + "</longTimeFormat>"
- print " <shortTimeFormat>" + escape(l['shortTimeFormat']).encode('utf-8') + "</shortTimeFormat>"
- print " <standaloneLongMonths>" + escape(l['standaloneLongMonths']).encode('utf-8') + "</standaloneLongMonths>"
- print " <standaloneShortMonths>"+ escape(l['standaloneShortMonths']).encode('utf-8') + "</standaloneShortMonths>"
- print " <standaloneNarrowMonths>"+ escape(l['standaloneNarrowMonths']).encode('utf-8') + "</standaloneNarrowMonths>"
- print " <longMonths>" + escape(l['longMonths']).encode('utf-8') + "</longMonths>"
- print " <shortMonths>" + escape(l['shortMonths']).encode('utf-8') + "</shortMonths>"
- print " <narrowMonths>" + escape(l['narrowMonths']).encode('utf-8') + "</narrowMonths>"
- print " <longDays>" + escape(l['longDays']).encode('utf-8') + "</longDays>"
- print " <shortDays>" + escape(l['shortDays']).encode('utf-8') + "</shortDays>"
- print " <narrowDays>" + escape(l['narrowDays']).encode('utf-8') + "</narrowDays>"
- print " <standaloneLongDays>" + escape(l['standaloneLongDays']).encode('utf-8') + "</standaloneLongDays>"
- print " <standaloneShortDays>" + escape(l['standaloneShortDays']).encode('utf-8') + "</standaloneShortDays>"
- print " <standaloneNarrowDays>" + escape(l['standaloneNarrowDays']).encode('utf-8') + "</standaloneNarrowDays>"
- print " <currencyIsoCode>" + escape(l['currencyIsoCode']).encode('utf-8') + "</currencyIsoCode>"
- print " <currencySymbol>" + escape(l['currencySymbol']).encode('utf-8') + "</currencySymbol>"
- print " <currencyDisplayName>" + escape(l['currencyDisplayName']).encode('utf-8') + "</currencyDisplayName>"
- print " <currencyDigits>" + str(l['currencyDigits']) + "</currencyDigits>"
- print " <currencyRounding>" + str(l['currencyRounding']) + "</currencyRounding>"
- print " <currencyFormat>" + escape(l['currencyFormat']).encode('utf-8') + "</currencyFormat>"
- print " <currencyNegativeFormat>" + escape(l['currencyNegativeFormat']).encode('utf-8') + "</currencyNegativeFormat>"
- print " </locale>"
+ locale_database[key].toXml()
+
print " </localeList>"
print "</localeDatabase>"