1 files changed, 201 insertions, 437 deletions
diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py
index 41cfafab0d..58ea21edab 100755
--- a/util/local_database/cldr2qlocalexml.py
+++ b/util/local_database/cldr2qlocalexml.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
 #############################################################################
 ##
-## Copyright (C) 2016 The Qt Company Ltd.
+## Copyright (C) 2017 The Qt Company Ltd.
 ## Contact: https://www.qt.io/licensing/
 ##
 ## This file is part of the test suite of the Qt Toolkit.
@@ -26,20 +26,31 @@
 ## $QT_END_LICENSE$
 ##
 #############################################################################
+"""Convert CLDR data to qLocaleXML
+
+The CLDR data can be downloaded from CLDR_, which has a sub-directory
+for each version; you need the ``core.zip`` file for your version of
+choice (typically the latest).  This script has had updates to cope up
+to v29; for later versions, we may need adaptations.  Unpack the
+downloaded ``core.zip`` and check it has a common/main/ sub-directory:
+pass the path of that sub-directory to this script as its single
+command-line argument.  Save its standard output (but not error) to a
+file for later processing by ``./qlocalexml2cpp.py``
+
+.. _CLDR: ftp://unicode.org/Public/cldr/
+"""
 
 import os
 import sys
+import re
+
 import enumdata
 import xpathlite
-from  xpathlite import DraftResolution
+from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile
 from dateconverter import convert_date
-from xml.sax.saxutils import escape, unescape
-import re
+from localexml import Locale
 
-findAlias = xpathlite.findAlias
-findEntry = xpathlite.findEntry
 findEntryInFile = xpathlite._findEntryInFile
-findTagsInFile = xpathlite.findTagsInFile
 
 def parse_number_format(patterns, data):
     # this is a very limited parsing of the number format for currency only.
@@ -72,42 +83,49 @@ def parse_number_format(patterns, data):
     return result
 
 def parse_list_pattern_part_format(pattern):
-    # this is a very limited parsing of the format for list pattern part only.
-    result = ""
-    result = pattern.replace("{0}", "%1")
-    result = result.replace("{1}", "%2")
-    result = result.replace("{2}", "%3")
-    return result
-
-def ordStr(c):
-    if len(c) == 1:
-        return str(ord(c))
-    raise xpathlite.Error("Unable to handle value \"%s\"" % addEscapes(c))
-    return "##########"
-
-# the following functions are supposed to fix the problem with QLocale
-# returning a character instead of strings for QLocale::exponential()
-# and others. So we fallback to default values in these cases.
-def fixOrdStrMinus(c):
-    if len(c) == 1:
-        return str(ord(c))
-    return str(ord('-'))
-def fixOrdStrPlus(c):
-    if len(c) == 1:
-        return str(ord(c))
-    return str(ord('+'))
-def fixOrdStrExp(c):
-    if len(c) == 1:
-        return str(ord(c))
-    return str(ord('e'))
-def fixOrdStrPercent(c):
-    if len(c) == 1:
-        return str(ord(c))
-    return str(ord('%'))
-def fixOrdStrList(c):
-    if len(c) == 1:
-        return str(ord(c))
-    return str(ord(';'))
+    # This is a very limited parsing of the format for list pattern part only.
+    return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3")
+
+def unit_quantifiers(find, path, stem, suffix, known,
+                     # Stop at exa/exbi: 16 exbi = 2^{64} < zetta =
+                     # 1000^7 < zebi = 2^{70}, the next quantifiers up:
+                     si_quantifiers = ('kilo', 'mega', 'giga', 'tera', 'peta', 'exa')):
+    """Work out the unit quantifiers.
+
+    Unfortunately, the CLDR data only go up to terabytes and we want
+    all the way to exabytes; but we can recognize the SI quantifiers
+    as prefixes, strip and identify the tail as the localized
+    translation for 'B' (e.g. French has 'octet' for 'byte' and uses
+    ko, Mo, Go, To from which we can extrapolate Po, Eo).
+
+    Should be called first for the SI quantifiers, with suffix = 'B',
+    then for the IEC ones, with suffix = 'iB'; the list known
+    (initially empty before first call) is used to let the second call
+    know what the first learned about the localized unit.
+    """
+    if suffix == 'B': # first call, known = []
+        tail = suffix
+        for q in si_quantifiers:
+            it = find(path, stem % q)
+            # kB for kilobyte, in contrast with KiB for IEC:
+            q = q[0] if q == 'kilo' else q[0].upper()
+            if not it:
+                it = q + tail
+            elif it.startswith(q):
+                rest = it[1:]
+                tail = rest if all(rest == k for k in known) else suffix
+                known.append(rest)
+            yield it
+    else: # second call, re-using first's known
+        assert suffix == 'iB'
+        if known:
+            byte = known.pop()
+            if all(byte == k for k in known):
+                suffix = 'i' + byte
+        for q in si_quantifiers:
+            yield find(path, stem % q[:2],
+                       # Those don't (yet, v31) exist in CLDR, so we always fall back to:
+                       q[0].upper() + suffix)
 
 def generateLocaleInfo(path):
     if not path.endswith(".xml"):
@@ -116,14 +134,13 @@ def generateLocaleInfo(path):
     # skip legacy/compatibility ones
     alias = findAlias(path)
     if alias:
-        raise xpathlite.Error("alias to \"%s\"" % alias)
+        raise xpathlite.Error('alias to "%s"' % alias)
 
-    language_code = findEntryInFile(path, "identity/language", attribute="type")[0]
-    country_code = findEntryInFile(path, "identity/territory", attribute="type")[0]
-    script_code = findEntryInFile(path, "identity/script", attribute="type")[0]
-    variant_code = findEntryInFile(path, "identity/variant", attribute="type")[0]
+    def code(tag):
+        return findEntryInFile(path, 'identity/' + tag, attribute="type")[0]
 
-    return _generateLocaleInfo(path, language_code, script_code, country_code, variant_code)
+    return _generateLocaleInfo(path, code('language'), code('script'),
+                               code('territory'), code('variant'))
 
 def _generateLocaleInfo(path, language_code, script_code, country_code, variant_code=""):
     if not path.endswith(".xml"):
@@ -137,25 +154,22 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
     # ### actually there is only one locale with variant: en_US_POSIX
     #     does anybody care about it at all?
     if variant_code:
-        raise xpathlite.Error("we do not support variants (\"%s\")" % variant_code)
+        raise xpathlite.Error('we do not support variants ("%s")' % variant_code)
 
     language_id = enumdata.languageCodeToId(language_code)
     if language_id <= 0:
-        raise xpathlite.Error("unknown language code \"%s\"" % language_code)
-    language = enumdata.language_list[language_id][0]
+        raise xpathlite.Error('unknown language code "%s"' % language_code)
 
     script_id = enumdata.scriptCodeToId(script_code)
     if script_id == -1:
-        raise xpathlite.Error("unknown script code \"%s\"" % script_code)
-    script = enumdata.script_list[script_id][0]
+        raise xpathlite.Error('unknown script code "%s"' % script_code)
 
     # we should handle fully qualified names with the territory
     if not country_code:
         return {}
     country_id = enumdata.countryCodeToId(country_code)
     if country_id <= 0:
-        raise xpathlite.Error("unknown country code \"%s\"" % country_code)
-    country = enumdata.country_list[country_id][0]
+        raise xpathlite.Error('unknown country code "%s"' % country_code)
 
     # So we say we accept only those values that have "contributed" or
     # "approved" resolution. see http://www.unicode.org/cldr/process.html
@@ -163,39 +177,39 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
     # compatibility.
     draft = DraftResolution.contributed
 
-    result = {}
-    result['language'] = language
-    result['script'] = script
-    result['country'] = country
-    result['language_code'] = language_code
-    result['country_code'] = country_code
-    result['script_code'] = script_code
-    result['variant_code'] = variant_code
-    result['language_id'] = language_id
-    result['script_id'] = script_id
-    result['country_id'] = country_id
+    result = dict(
+        language=enumdata.language_list[language_id][0],
+        language_code=language_code, language_id=language_id,
+        script=enumdata.script_list[script_id][0],
+        script_code=script_code, script_id=script_id,
+        country=enumdata.country_list[country_id][0],
+        country_code=country_code, country_id=country_id,
+        variant_code=variant_code)
 
     (dir_name, file_name) = os.path.split(path)
-    supplementalPath = dir_name + "/../supplemental/supplementalData.xml"
-    currencies = findTagsInFile(supplementalPath, "currencyData/region[iso3166=%s]"%country_code);
+    def from_supplement(tag,
+                        path=os.path.join(dir_name, '..', 'supplemental',
+                                          'supplementalData.xml')):
+        return findTagsInFile(path, tag)
+    currencies = from_supplement('currencyData/region[iso3166=%s]' % country_code)
     result['currencyIsoCode'] = ''
     result['currencyDigits'] = 2
     result['currencyRounding'] = 1
     if currencies:
         for e in currencies:
             if e[0] == 'currency':
-                tender = True
-                t = filter(lambda x: x[0] == 'tender', e[1])
-                if t and t[0][1] == 'false':
-                    tender = False;
-                if tender and not filter(lambda x: x[0] == 'to', e[1]):
-                    result['currencyIsoCode'] = filter(lambda x: x[0] == 'iso4217', e[1])[0][1]
+                t = [x[1] == 'false' for x in e[1] if x[0] == 'tender']
+                if t and t[0]:
+                    pass
+                elif not any(x[0] == 'to' for x in e[1]):
+                    result['currencyIsoCode'] = (x[1] for x in e[1] if x[0] == 'iso4217').next()
                     break
         if result['currencyIsoCode']:
-            t = findTagsInFile(supplementalPath, "currencyData/fractions/info[iso4217=%s]"%result['currencyIsoCode']);
+            t = from_supplement("currencyData/fractions/info[iso4217=%s]"
+                                % result['currencyIsoCode'])
             if t and t[0][0] == 'info':
-                result['currencyDigits'] = int(filter(lambda x: x[0] == 'digits', t[0][1])[0][1])
-                result['currencyRounding'] = int(filter(lambda x: x[0] == 'rounding', t[0][1])[0][1])
+                result['currencyDigits'] = (int(x[1]) for x in t[0][1] if x[0] == 'digits').next()
+                result['currencyRounding'] = (int(x[1]) for x in t[0][1] if x[0] == 'rounding').next()
     numbering_system = None
     try:
         numbering_system = findEntry(path, "numbers/defaultNumberingSystem")
@@ -226,7 +240,9 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
     result['percent'] = get_number_in_system(path, "numbers/symbols/percentSign", numbering_system)
     try:
         numbering_systems = {}
-        for ns in findTagsInFile(cldr_dir + "/../supplemental/numberingSystems.xml", "numberingSystems"):
+        for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental',
+                                              'numberingSystems.xml'),
+                                 'numberingSystems'):
             tmp = {}
             id = ""
             for data in ns[1:][0]: # ns looks like this: [u'numberingSystem', [(u'digits', u'0123456789'), (u'type', u'numeric'), (u'id', u'latn')]]
@@ -279,167 +295,70 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
     result['currencyDisplayName'] = ''
     if result['currencyIsoCode']:
         result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode'])
-        display_name_path = "numbers/currencies/currency[%s]/displayName" % result['currencyIsoCode']
-        result['currencyDisplayName'] \
-            = findEntryDef(path, display_name_path) + ";" \
-            + findEntryDef(path, display_name_path + "[count=zero]")  + ";" \
-            + findEntryDef(path, display_name_path + "[count=one]")   + ";" \
-            + findEntryDef(path, display_name_path + "[count=two]")   + ";" \
-            + findEntryDef(path, display_name_path + "[count=few]")   + ";" \
-            + findEntryDef(path, display_name_path + "[count=many]")  + ";" \
-            + findEntryDef(path, display_name_path + "[count=other]") + ";"
-
-    standalone_long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[wide]/month"
-    result['standaloneLongMonths'] \
-        = findEntry(path, standalone_long_month_path + "[1]") + ";" \
-        + findEntry(path, standalone_long_month_path + "[2]") + ";" \
-        + findEntry(path, standalone_long_month_path + "[3]") + ";" \
-        + findEntry(path, standalone_long_month_path + "[4]") + ";" \
-        + findEntry(path, standalone_long_month_path + "[5]") + ";" \
-        + findEntry(path, standalone_long_month_path + "[6]") + ";" \
-        + findEntry(path, standalone_long_month_path + "[7]") + ";" \
-        + findEntry(path, standalone_long_month_path + "[8]") + ";" \
-        + findEntry(path, standalone_long_month_path + "[9]") + ";" \
-        + findEntry(path, standalone_long_month_path + "[10]") + ";" \
-        + findEntry(path, standalone_long_month_path + "[11]") + ";" \
-        + findEntry(path, standalone_long_month_path + "[12]") + ";"
-
-    standalone_short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[abbreviated]/month"
-    result['standaloneShortMonths'] \
-        = findEntry(path, standalone_short_month_path + "[1]") + ";" \
-        + findEntry(path, standalone_short_month_path + "[2]") + ";" \
-        + findEntry(path, standalone_short_month_path + "[3]") + ";" \
-        + findEntry(path, standalone_short_month_path + "[4]") + ";" \
-        + findEntry(path, standalone_short_month_path + "[5]") + ";" \
-        + findEntry(path, standalone_short_month_path + "[6]") + ";" \
-        + findEntry(path, standalone_short_month_path + "[7]") + ";" \
-        + findEntry(path, standalone_short_month_path + "[8]") + ";" \
-        + findEntry(path, standalone_short_month_path + "[9]") + ";" \
-        + findEntry(path, standalone_short_month_path + "[10]") + ";" \
-        + findEntry(path, standalone_short_month_path + "[11]") + ";" \
-        + findEntry(path, standalone_short_month_path + "[12]") + ";"
-
-    standalone_narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[narrow]/month"
-    result['standaloneNarrowMonths'] \
-        = findEntry(path, standalone_narrow_month_path + "[1]") + ";" \
-        + findEntry(path, standalone_narrow_month_path + "[2]") + ";" \
-        + findEntry(path, standalone_narrow_month_path + "[3]") + ";" \
-        + findEntry(path, standalone_narrow_month_path + "[4]") + ";" \
-        + findEntry(path, standalone_narrow_month_path + "[5]") + ";" \
-        + findEntry(path, standalone_narrow_month_path + "[6]") + ";" \
-        + findEntry(path, standalone_narrow_month_path + "[7]") + ";" \
-        + findEntry(path, standalone_narrow_month_path + "[8]") + ";" \
-        + findEntry(path, standalone_narrow_month_path + "[9]") + ";" \
-        + findEntry(path, standalone_narrow_month_path + "[10]") + ";" \
-        + findEntry(path, standalone_narrow_month_path + "[11]") + ";" \
-        + findEntry(path, standalone_narrow_month_path + "[12]") + ";"
-
-    long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[wide]/month"
-    result['longMonths'] \
-        = findEntry(path, long_month_path + "[1]") + ";" \
-        + findEntry(path, long_month_path + "[2]") + ";" \
-        + findEntry(path, long_month_path + "[3]") + ";" \
-        + findEntry(path, long_month_path + "[4]") + ";" \
-        + findEntry(path, long_month_path + "[5]") + ";" \
-        + findEntry(path, long_month_path + "[6]") + ";" \
-        + findEntry(path, long_month_path + "[7]") + ";" \
-        + findEntry(path, long_month_path + "[8]") + ";" \
-        + findEntry(path, long_month_path + "[9]") + ";" \
-        + findEntry(path, long_month_path + "[10]") + ";" \
-        + findEntry(path, long_month_path + "[11]") + ";" \
-        + findEntry(path, long_month_path + "[12]") + ";"
-
-    short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[abbreviated]/month"
-    result['shortMonths'] \
-        = findEntry(path, short_month_path + "[1]") + ";" \
-        + findEntry(path, short_month_path + "[2]") + ";" \
-        + findEntry(path, short_month_path + "[3]") + ";" \
-        + findEntry(path, short_month_path + "[4]") + ";" \
-        + findEntry(path, short_month_path + "[5]") + ";" \
-        + findEntry(path, short_month_path + "[6]") + ";" \
-        + findEntry(path, short_month_path + "[7]") + ";" \
-        + findEntry(path, short_month_path + "[8]") + ";" \
-        + findEntry(path, short_month_path + "[9]") + ";" \
-        + findEntry(path, short_month_path + "[10]") + ";" \
-        + findEntry(path, short_month_path + "[11]") + ";" \
-        + findEntry(path, short_month_path + "[12]") + ";"
-
-    narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[narrow]/month"
-    result['narrowMonths'] \
-        = findEntry(path, narrow_month_path + "[1]") + ";" \
-        + findEntry(path, narrow_month_path + "[2]") + ";" \
-        + findEntry(path, narrow_month_path + "[3]") + ";" \
-        + findEntry(path, narrow_month_path + "[4]") + ";" \
-        + findEntry(path, narrow_month_path + "[5]") + ";" \
-        + findEntry(path, narrow_month_path + "[6]") + ";" \
-        + findEntry(path, narrow_month_path + "[7]") + ";" \
-        + findEntry(path, narrow_month_path + "[8]") + ";" \
-        + findEntry(path, narrow_month_path + "[9]") + ";" \
-        + findEntry(path, narrow_month_path + "[10]") + ";" \
-        + findEntry(path, narrow_month_path + "[11]") + ";" \
-        + findEntry(path, narrow_month_path + "[12]") + ";"
-
-    long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[wide]/day"
-    result['longDays'] \
-        = findEntry(path, long_day_path + "[sun]") + ";" \
-        + findEntry(path, long_day_path + "[mon]") + ";" \
-        + findEntry(path, long_day_path + "[tue]") + ";" \
-        + findEntry(path, long_day_path + "[wed]") + ";" \
-        + findEntry(path, long_day_path + "[thu]") + ";" \
-        + findEntry(path, long_day_path + "[fri]") + ";" \
-        + findEntry(path, long_day_path + "[sat]") + ";"
-
-    short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[abbreviated]/day"
-    result['shortDays'] \
-        = findEntry(path, short_day_path + "[sun]") + ";" \
-        + findEntry(path, short_day_path + "[mon]") + ";" \
-        + findEntry(path, short_day_path + "[tue]") + ";" \
-        + findEntry(path, short_day_path + "[wed]") + ";" \
-        + findEntry(path, short_day_path + "[thu]") + ";" \
-        + findEntry(path, short_day_path + "[fri]") + ";" \
-        + findEntry(path, short_day_path + "[sat]") + ";"
-
-    narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[narrow]/day"
-    result['narrowDays'] \
-        = findEntry(path, narrow_day_path + "[sun]") + ";" \
-        + findEntry(path, narrow_day_path + "[mon]") + ";" \
-        + findEntry(path, narrow_day_path + "[tue]") + ";" \
-        + findEntry(path, narrow_day_path + "[wed]") + ";" \
-        + findEntry(path, narrow_day_path + "[thu]") + ";" \
-        + findEntry(path, narrow_day_path + "[fri]") + ";" \
-        + findEntry(path, narrow_day_path + "[sat]") + ";"
-
-    standalone_long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[wide]/day"
-    result['standaloneLongDays'] \
-        = findEntry(path, standalone_long_day_path + "[sun]") + ";" \
-        + findEntry(path, standalone_long_day_path + "[mon]") + ";" \
-        + findEntry(path, standalone_long_day_path + "[tue]") + ";" \
-        + findEntry(path, standalone_long_day_path + "[wed]") + ";" \
-        + findEntry(path, standalone_long_day_path + "[thu]") + ";" \
-        + findEntry(path, standalone_long_day_path + "[fri]") + ";" \
-        + findEntry(path, standalone_long_day_path + "[sat]") + ";"
-
-    standalone_short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[abbreviated]/day"
-    result['standaloneShortDays'] \
-        = findEntry(path, standalone_short_day_path + "[sun]") + ";" \
-        + findEntry(path, standalone_short_day_path + "[mon]") + ";" \
-        + findEntry(path, standalone_short_day_path + "[tue]") + ";" \
-        + findEntry(path, standalone_short_day_path + "[wed]") + ";" \
-        + findEntry(path, standalone_short_day_path + "[thu]") + ";" \
-        + findEntry(path, standalone_short_day_path + "[fri]") + ";" \
-        + findEntry(path, standalone_short_day_path + "[sat]") + ";"
-
-    standalone_narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[narrow]/day"
-    result['standaloneNarrowDays'] \
-        = findEntry(path, standalone_narrow_day_path + "[sun]") + ";" \
-        + findEntry(path, standalone_narrow_day_path + "[mon]") + ";" \
-        + findEntry(path, standalone_narrow_day_path + "[tue]") + ";" \
-        + findEntry(path, standalone_narrow_day_path + "[wed]") + ";" \
-        + findEntry(path, standalone_narrow_day_path + "[thu]") + ";" \
-        + findEntry(path, standalone_narrow_day_path + "[fri]") + ";" \
-        + findEntry(path, standalone_narrow_day_path + "[sat]") + ";"
-
-    return result
+        result['currencyDisplayName'] = ';'.join(
+            findEntryDef(path, 'numbers/currencies/currency[' + result['currencyIsoCode']
+                         + ']/displayName' + tail)
+            for tail in ['',] + [
+                '[count=%s]' % x for x in ('zero', 'one', 'two', 'few', 'many', 'other')
+                ]) + ';'
+
+    def findUnitDef(path, stem, fallback=''):
+        # The displayName for a quantified unit in en.xml is kByte
+        # instead of kB (etc.), so prefer any unitPattern provided:
+        for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
+            try:
+                ans = findEntry(path, stem + 'unitPattern[count=%s]' % count)
+            except xpathlite.Error:
+                continue
+
+            # TODO: epxloit count-handling, instead of discarding placeholders
+            if ans.startswith('{0}'):
+                ans = ans[3:].lstrip()
+            if ans:
+                return ans
+
+        return findEntryDef(path, stem + 'displayName', fallback)
+
+    # First without quantifier, then quantified each way:
+    result['byte_unit'] = findEntryDef(
+        path, 'units/unitLength[type=long]/unit[type=digital-byte]/displayName',
+        'bytes')
+    stem = 'units/unitLength[type=short]/unit[type=digital-%sbyte]/'
+    known = [] # cases where we *do* have a given version:
+    result['byte_si_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem, 'B', known))
+    # IEC 60027-2
+    # http://physics.nist.gov/cuu/Units/binary.html
+    result['byte_iec_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem % '%sbi', 'iB', known))
+
+    # Used for month and day data:
+    namings = (
+        ('standaloneLong', 'stand-alone', 'wide'),
+        ('standaloneShort', 'stand-alone', 'abbreviated'),
+        ('standaloneNarrow', 'stand-alone', 'narrow'),
+        ('long', 'format', 'wide'),
+        ('short', 'format', 'abbreviated'),
+        ('narrow', 'format', 'narrow'),
+        )
+
+    # Month data:
+    for cal in ('gregorian',): # We shall want to add to this
+        stem = 'dates/calendars/calendar[' + cal + ']/months/'
+        for (key, mode, size) in namings:
+            prop = 'monthContext[' + mode + ']/monthWidth[' + size + ']/'
+            result[key + 'Months'] = ';'.join(
+                findEntry(path, stem + prop + "month[%d]" % i)
+                for i in range(1, 13)) + ';'
+
+    # Day data (for Gregorian, at least):
+    stem = 'dates/calendars/calendar[gregorian]/days/'
+    days = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat')
+    for (key, mode, size) in namings:
+        prop = 'dayContext[' + mode + ']/dayWidth[' + size + ']/day'
+        result[key + 'Days'] = ';'.join(
+            findEntry(path, stem + prop + '[' + day + ']')
+            for day in days) + ';'
+
+    return Locale(result)
 
 def addEscapes(s):
     result = ''
@@ -463,94 +382,42 @@ def usage():
 def integrateWeekData(filePath):
     if not filePath.endswith(".xml"):
         return {}
-    monFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=mon]", attribute="territories")[0].split(" ")
-    tueFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=tue]", attribute="territories")[0].split(" ")
-    wedFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=wed]", attribute="territories")[0].split(" ")
-    thuFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=thu]", attribute="territories")[0].split(" ")
-    friFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=fri]", attribute="territories")[0].split(" ")
-    satFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sat]", attribute="territories")[0].split(" ")
-    sunFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sun]", attribute="territories")[0].split(" ")
-
-    monWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=mon]", attribute="territories")[0].split(" ")
-    tueWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=tue]", attribute="territories")[0].split(" ")
-    wedWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=wed]", attribute="territories")[0].split(" ")
-    thuWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=thu]", attribute="territories")[0].split(" ")
-    friWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=fri]", attribute="territories")[0].split(" ")
-    satWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sat]", attribute="territories")[0].split(" ")
-    sunWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sun]", attribute="territories")[0].split(" ")
-
-    monWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=mon]", attribute="territories")[0].split(" ")
-    tueWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=tue]", attribute="territories")[0].split(" ")
-    wedWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=wed]", attribute="territories")[0].split(" ")
-    thuWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=thu]", attribute="territories")[0].split(" ")
-    friWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=fri]", attribute="territories")[0].split(" ")
-    satWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sat]", attribute="territories")[0].split(" ")
-    sunWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sun]", attribute="territories")[0].split(" ")
+
+    def lookup(key):
+        return findEntryInFile(filePath, key, attribute='territories')[0].split()
+    days = ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun')
 
     firstDayByCountryCode = {}
-    for countryCode in monFirstDayIn:
-        firstDayByCountryCode[countryCode] = "mon"
-    for countryCode in tueFirstDayIn:
-        firstDayByCountryCode[countryCode] = "tue"
-    for countryCode in wedFirstDayIn:
-        firstDayByCountryCode[countryCode] = "wed"
-    for countryCode in thuFirstDayIn:
-        firstDayByCountryCode[countryCode] = "thu"
-    for countryCode in friFirstDayIn:
-        firstDayByCountryCode[countryCode] = "fri"
-    for countryCode in satFirstDayIn:
-        firstDayByCountryCode[countryCode] = "sat"
-    for countryCode in sunFirstDayIn:
-        firstDayByCountryCode[countryCode] = "sun"
+    for day in days:
+        for countryCode in lookup('weekData/firstDay[day=%s]' % day):
+            firstDayByCountryCode[countryCode] = day
 
     weekendStartByCountryCode = {}
-    for countryCode in monWeekendStart:
-        weekendStartByCountryCode[countryCode] = "mon"
-    for countryCode in tueWeekendStart:
-        weekendStartByCountryCode[countryCode] = "tue"
-    for countryCode in wedWeekendStart:
-        weekendStartByCountryCode[countryCode] = "wed"
-    for countryCode in thuWeekendStart:
-        weekendStartByCountryCode[countryCode] = "thu"
-    for countryCode in friWeekendStart:
-        weekendStartByCountryCode[countryCode] = "fri"
-    for countryCode in satWeekendStart:
-        weekendStartByCountryCode[countryCode] = "sat"
-    for countryCode in sunWeekendStart:
-        weekendStartByCountryCode[countryCode] = "sun"
+    for day in days:
+        for countryCode in lookup('weekData/weekendStart[day=%s]' % day):
+            weekendStartByCountryCode[countryCode] = day
 
     weekendEndByCountryCode = {}
-    for countryCode in monWeekendEnd:
-        weekendEndByCountryCode[countryCode] = "mon"
-    for countryCode in tueWeekendEnd:
-        weekendEndByCountryCode[countryCode] = "tue"
-    for countryCode in wedWeekendEnd:
-        weekendEndByCountryCode[countryCode] = "wed"
-    for countryCode in thuWeekendEnd:
-        weekendEndByCountryCode[countryCode] = "thu"
-    for countryCode in friWeekendEnd:
-        weekendEndByCountryCode[countryCode] = "fri"
-    for countryCode in satWeekendEnd:
-        weekendEndByCountryCode[countryCode] = "sat"
-    for countryCode in sunWeekendEnd:
-        weekendEndByCountryCode[countryCode] = "sun"
-
-    for (key,locale) in locale_database.iteritems():
-        countryCode = locale['country_code']
+    for day in days:
+        for countryCode in lookup('weekData/weekendEnd[day=%s]' % day):
+            weekendEndByCountryCode[countryCode] = day
+
+    for (key, locale) in locale_database.iteritems():
+        countryCode = locale.country_code
         if countryCode in firstDayByCountryCode:
-            locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode[countryCode]
+            locale.firstDayOfWeek = firstDayByCountryCode[countryCode]
         else:
-            locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode["001"]
+            locale.firstDayOfWeek = firstDayByCountryCode["001"]
 
         if countryCode in weekendStartByCountryCode:
-            locale_database[key]['weekendStart'] = weekendStartByCountryCode[countryCode]
+            locale.weekendStart = weekendStartByCountryCode[countryCode]
         else:
-            locale_database[key]['weekendStart'] = weekendStartByCountryCode["001"]
+            locale.weekendStart = weekendStartByCountryCode["001"]
 
         if countryCode in weekendEndByCountryCode:
-            locale_database[key]['weekendEnd'] = weekendEndByCountryCode[countryCode]
+            locale.weekendEnd = weekendEndByCountryCode[countryCode]
         else:
-            locale_database[key]['weekendEnd'] = weekendEndByCountryCode["001"]
+            locale.weekendEnd = weekendEndByCountryCode["001"]
 
 if len(sys.argv) != 2:
     usage()
@@ -566,7 +433,9 @@ locale_database = {}
 
 # see http://www.unicode.org/reports/tr35/tr35-info.html#Default_Content
 defaultContent_locales = {}
-for ns in findTagsInFile(cldr_dir + "/../supplemental/supplementalMetadata.xml", "metadata/defaultContent"):
+for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental',
+                                      'supplementalMetadata.xml'),
+                         'metadata/defaultContent'):
     for data in ns[1:][0]:
         if data[0] == u"locales":
             defaultContent_locales = data[1].split()
@@ -579,36 +448,36 @@ for file in defaultContent_locales:
         country_code = items[2]
     else:
         if len(items) != 2:
-            sys.stderr.write("skipping defaultContent locale \"" + file + "\"\n")
+            sys.stderr.write('skipping defaultContent locale "' + file + '" [neither lang_script_country nor lang_country]\n')
             continue
         language_code = items[0]
         script_code = ""
         country_code = items[1]
         if len(country_code) == 4:
-            sys.stderr.write("skipping defaultContent locale \"" + file + "\"\n")
+            sys.stderr.write('skipping defaultContent locale "' + file + '" [long country code]\n')
             continue
     try:
         l = _generateLocaleInfo(cldr_dir + "/" + file + ".xml", language_code, script_code, country_code)
         if not l:
-            sys.stderr.write("skipping defaultContent locale \"" + file + "\"\n")
+            sys.stderr.write('skipping defaultContent locale "' + file + '" [no locale info generated]\n')
             continue
     except xpathlite.Error as e:
-        sys.stderr.write("skipping defaultContent locale \"%s\" (%s)\n" % (file, str(e)))
+        sys.stderr.write('skipping defaultContent locale "%s" (%s)\n' % (file, str(e)))
         continue
 
-    locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l
+    locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
 
 for file in cldr_files:
     try:
         l = generateLocaleInfo(cldr_dir + "/" + file)
         if not l:
-            sys.stderr.write("skipping file \"" + file + "\"\n")
+            sys.stderr.write('skipping file "' + file + '" [no locale info generated]\n')
             continue
     except xpathlite.Error as e:
-        sys.stderr.write("skipping file \"%s\" (%s)\n" % (file, str(e)))
+        sys.stderr.write('skipping file "%s" (%s)\n' % (file, str(e)))
         continue
 
-    locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l
+    locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
 
 integrateWeekData(cldr_dir+"/../supplemental/supplementalData.xml")
 locale_keys = locale_database.keys()
@@ -665,7 +534,7 @@ def _parseLocale(l):
     if language_code != "und":
         language_id = enumdata.languageCodeToId(language_code)
         if language_id == -1:
-            raise xpathlite.Error("unknown language code \"%s\"" % language_code)
+            raise xpathlite.Error('unknown language code "%s"' % language_code)
         language = enumdata.language_list[language_id][0]
 
     if len(items) > 1:
@@ -676,14 +545,14 @@ def _parseLocale(l):
         if len(script_code) == 4:
             script_id = enumdata.scriptCodeToId(script_code)
             if script_id == -1:
-                raise xpathlite.Error("unknown script code \"%s\"" % script_code)
+                raise xpathlite.Error('unknown script code "%s"' % script_code)
             script = enumdata.script_list[script_id][0]
         else:
             country_code = script_code
         if country_code:
             country_id = enumdata.countryCodeToId(country_code)
             if country_id == -1:
-                raise xpathlite.Error("unknown country code \"%s\"" % country_code)
+                raise xpathlite.Error('unknown country code "%s"' % country_code)
             country = enumdata.country_list[country_id][0]
 
     return (language, script, country)
@@ -697,12 +566,12 @@ for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likel
     try:
         (from_language, from_script, from_country) = _parseLocale(tmp[u"from"])
     except xpathlite.Error as e:
-        sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e)))
+        sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e)))
         continue
     try:
         (to_language, to_script, to_country) = _parseLocale(tmp[u"to"])
     except xpathlite.Error as e:
-        sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e)))
+        sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e)))
         continue
     # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
     if to_country == "AnyCountry" and from_country != to_country:
@@ -725,115 +594,10 @@ for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likel
 print "    </likelySubtags>"
 
 print "    <localeList>"
-print \
-"        <locale>\n\
-            <language>C</language>\n\
-            <languageEndonym></languageEndonym>\n\
-            <script>AnyScript</script>\n\
-            <country>AnyCountry</country>\n\
-            <countryEndonym></countryEndonym>\n\
-            <decimal>46</decimal>\n\
-            <group>44</group>\n\
-            <list>59</list>\n\
-            <percent>37</percent>\n\
-            <zero>48</zero>\n\
-            <minus>45</minus>\n\
-            <plus>43</plus>\n\
-            <exp>101</exp>\n\
-            <quotationStart>\"</quotationStart>\n\
-            <quotationEnd>\"</quotationEnd>\n\
-            <alternateQuotationStart>\'</alternateQuotationStart>\n\
-            <alternateQuotationEnd>\'</alternateQuotationEnd>\n\
-            <listPatternPartStart>%1, %2</listPatternPartStart>\n\
-            <listPatternPartMiddle>%1, %2</listPatternPartMiddle>\n\
-            <listPatternPartEnd>%1, %2</listPatternPartEnd>\n\
-            <listPatternPartTwo>%1, %2</listPatternPartTwo>\n\
-            <am>AM</am>\n\
-            <pm>PM</pm>\n\
-            <firstDayOfWeek>mon</firstDayOfWeek>\n\
-            <weekendStart>sat</weekendStart>\n\
-            <weekendEnd>sun</weekendEnd>\n\
-            <longDateFormat>EEEE, d MMMM yyyy</longDateFormat>\n\
-            <shortDateFormat>d MMM yyyy</shortDateFormat>\n\
-            <longTimeFormat>HH:mm:ss z</longTimeFormat>\n\
-            <shortTimeFormat>HH:mm:ss</shortTimeFormat>\n\
-            <standaloneLongMonths>January;February;March;April;May;June;July;August;September;October;November;December;</standaloneLongMonths>\n\
-            <standaloneShortMonths>Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;</standaloneShortMonths>\n\
-            <standaloneNarrowMonths>J;F;M;A;M;J;J;A;S;O;N;D;</standaloneNarrowMonths>\n\
-            <longMonths>January;February;March;April;May;June;July;August;September;October;November;December;</longMonths>\n\
-            <shortMonths>Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;</shortMonths>\n\
-            <narrowMonths>1;2;3;4;5;6;7;8;9;10;11;12;</narrowMonths>\n\
-            <longDays>Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;</longDays>\n\
-            <shortDays>Sun;Mon;Tue;Wed;Thu;Fri;Sat;</shortDays>\n\
-            <narrowDays>7;1;2;3;4;5;6;</narrowDays>\n\
-            <standaloneLongDays>Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;</standaloneLongDays>\n\
-            <standaloneShortDays>Sun;Mon;Tue;Wed;Thu;Fri;Sat;</standaloneShortDays>\n\
-            <standaloneNarrowDays>S;M;T;W;T;F;S;</standaloneNarrowDays>\n\
-            <currencyIsoCode></currencyIsoCode>\n\
-            <currencySymbol></currencySymbol>\n\
-            <currencyDisplayName>;;;;;;;</currencyDisplayName>\n\
-            <currencyDigits>2</currencyDigits>\n\
-            <currencyRounding>1</currencyRounding>\n\
-            <currencyFormat>%1%2</currencyFormat>\n\
-            <currencyNegativeFormat></currencyNegativeFormat>\n\
-        </locale>"
 
+Locale.C().toXml()
 for key in locale_keys:
-    l = locale_database[key]
-
-    print "        <locale>"
-    print "            <language>" + l['language']        + "</language>"
-    print "            <languageEndonym>" + escape(l['language_endonym']).encode('utf-8') + "</languageEndonym>"
-    print "            <script>" + l['script']        + "</script>"
-    print "            <country>"  + l['country']         + "</country>"
-    print "            <countryEndonym>"  + escape(l['country_endonym']).encode('utf-8') + "</countryEndonym>"
-    print "            <languagecode>" + l['language_code']        + "</languagecode>"
-    print "            <scriptcode>" + l['script_code']        + "</scriptcode>"
-    print "            <countrycode>"  + l['country_code']         + "</countrycode>"
-    print "            <decimal>"  + ordStr(l['decimal']) + "</decimal>"
-    print "            <group>"    + ordStr(l['group'])   + "</group>"
-    print "            <list>"     + fixOrdStrList(l['list'])    + "</list>"
-    print "            <percent>"  + fixOrdStrPercent(l['percent']) + "</percent>"
-    print "            <zero>"     + ordStr(l['zero'])    + "</zero>"
-    print "            <minus>"    + fixOrdStrMinus(l['minus'])   + "</minus>"
-    print "            <plus>"     + fixOrdStrPlus(l['plus'])   + "</plus>"
-    print "            <exp>"      + fixOrdStrExp(l['exp'])     + "</exp>"
-    print "            <quotationStart>" + escape(l['quotationStart']).encode('utf-8') + "</quotationStart>"
-    print "            <quotationEnd>" + escape(l['quotationEnd']).encode('utf-8')   + "</quotationEnd>"
-    print "            <alternateQuotationStart>" + escape(l['alternateQuotationStart']).encode('utf-8') + "</alternateQuotationStart>"
-    print "            <alternateQuotationEnd>" + escape(l['alternateQuotationEnd']).encode('utf-8')   + "</alternateQuotationEnd>"
-    print "            <listPatternPartStart>" + escape(l['listPatternPartStart']).encode('utf-8')   + "</listPatternPartStart>"
-    print "            <listPatternPartMiddle>" + escape(l['listPatternPartMiddle']).encode('utf-8')   + "</listPatternPartMiddle>"
-    print "            <listPatternPartEnd>" + escape(l['listPatternPartEnd']).encode('utf-8')   + "</listPatternPartEnd>"
-    print "            <listPatternPartTwo>" + escape(l['listPatternPartTwo']).encode('utf-8')   + "</listPatternPartTwo>"
-    print "            <am>"       + escape(l['am']).encode('utf-8') + "</am>"
-    print "            <pm>"       + escape(l['pm']).encode('utf-8') + "</pm>"
-    print "            <firstDayOfWeek>"  + escape(l['firstDayOfWeek']).encode('utf-8') + "</firstDayOfWeek>"
-    print "            <weekendStart>"  + escape(l['weekendStart']).encode('utf-8') + "</weekendStart>"
-    print "            <weekendEnd>"  + escape(l['weekendEnd']).encode('utf-8') + "</weekendEnd>"
-    print "            <longDateFormat>"  + escape(l['longDateFormat']).encode('utf-8')  + "</longDateFormat>"
-    print "            <shortDateFormat>" + escape(l['shortDateFormat']).encode('utf-8') + "</shortDateFormat>"
-    print "            <longTimeFormat>"  + escape(l['longTimeFormat']).encode('utf-8')  + "</longTimeFormat>"
-    print "            <shortTimeFormat>" + escape(l['shortTimeFormat']).encode('utf-8') + "</shortTimeFormat>"
-    print "            <standaloneLongMonths>" + escape(l['standaloneLongMonths']).encode('utf-8')      + "</standaloneLongMonths>"
-    print "            <standaloneShortMonths>"+ escape(l['standaloneShortMonths']).encode('utf-8')      + "</standaloneShortMonths>"
-    print "            <standaloneNarrowMonths>"+ escape(l['standaloneNarrowMonths']).encode('utf-8')      + "</standaloneNarrowMonths>"
-    print "            <longMonths>"      + escape(l['longMonths']).encode('utf-8')      + "</longMonths>"
-    print "            <shortMonths>"     + escape(l['shortMonths']).encode('utf-8')     + "</shortMonths>"
-    print "            <narrowMonths>"     + escape(l['narrowMonths']).encode('utf-8')     + "</narrowMonths>"
-    print "            <longDays>"        + escape(l['longDays']).encode('utf-8')        + "</longDays>"
-    print "            <shortDays>"       + escape(l['shortDays']).encode('utf-8')       + "</shortDays>"
-    print "            <narrowDays>"       + escape(l['narrowDays']).encode('utf-8')       + "</narrowDays>"
-    print "            <standaloneLongDays>" + escape(l['standaloneLongDays']).encode('utf-8')        + "</standaloneLongDays>"
-    print "            <standaloneShortDays>" + escape(l['standaloneShortDays']).encode('utf-8')       + "</standaloneShortDays>"
-    print "            <standaloneNarrowDays>" + escape(l['standaloneNarrowDays']).encode('utf-8')       + "</standaloneNarrowDays>"
-    print "            <currencyIsoCode>" + escape(l['currencyIsoCode']).encode('utf-8') + "</currencyIsoCode>"
-    print "            <currencySymbol>" + escape(l['currencySymbol']).encode('utf-8') + "</currencySymbol>"
-    print "            <currencyDisplayName>" + escape(l['currencyDisplayName']).encode('utf-8') + "</currencyDisplayName>"
-    print "            <currencyDigits>" + str(l['currencyDigits']) + "</currencyDigits>"
-    print "            <currencyRounding>" + str(l['currencyRounding']) + "</currencyRounding>"
-    print "            <currencyFormat>" + escape(l['currencyFormat']).encode('utf-8') + "</currencyFormat>"
-    print "            <currencyNegativeFormat>" + escape(l['currencyNegativeFormat']).encode('utf-8') + "</currencyNegativeFormat>"
-    print "        </locale>"
+    locale_database[key].toXml()
+
 print "    </localeList>"
 print "</localeDatabase>"