Rename util/locale_database/ to include the e that was missing

It was misnamed local_database, quite missing the point of its name. Change-Id: I73a4fdf24f53daac12304de1f443636d89afacb2 Reviewed-by: Lars Knoll <lars.knoll@qt.io> Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
author: Edward Welbourne <edward.welbourne@qt.io> 2019-05-16 14:31:54 +0200
committer: Edward Welbourne <edward.welbourne@qt.io> 2019-05-20 20:42:10 +0200
commit: 248b6756da0d31c58672c0e356c3ec16e9088234 (patch)
tree: 4869c1ca3d1415a9b259f4afbc61a702fee6812b /util/locale_database
parent: cf909f0ef609c4581ebbe2f81c7ae0c5e43d653f (diff)
15 files changed, 4137 insertions, 0 deletions
diff --git a/util/locale_database/README b/util/locale_database/README
new file mode 100644
index 0000000000..8654968d66
--- /dev/null
+++ b/util/locale_database/README
@@ -0,0 +1,5 @@
+locale_database is used to generate qlocale data from CLDR.
+
+CLDR is the Common Locale Data Repository, a database for localized
+data (like date formats, country names etc).  It is provided by the
+Unicode consortium.
diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py
new file mode 100755
index 0000000000..4ce0a6e3b1
--- /dev/null
+++ b/util/locale_database/cldr2qlocalexml.py
@@ -0,0 +1,663 @@
+#!/usr/bin/env python2
+#############################################################################
+##
+## Copyright (C) 2017 The Qt Company Ltd.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the test suite of the Qt Toolkit.
+##
+## $QT_BEGIN_LICENSE:GPL-EXCEPT$
+## Commercial License Usage
+## Licensees holding valid commercial Qt licenses may use this file in
+## accordance with the commercial license agreement provided with the
+## Software or, alternatively, in accordance with the terms contained in
+## a written agreement between you and The Qt Company. For licensing terms
+## and conditions see https://www.qt.io/terms-conditions. For further
+## information use the contact form at https://www.qt.io/contact-us.
+##
+## GNU General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU
+## General Public License version 3 as published by the Free Software
+## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+## included in the packaging of this file. Please review the following
+## information to ensure the GNU General Public License requirements will
+## be met: https://www.gnu.org/licenses/gpl-3.0.html.
+##
+## $QT_END_LICENSE$
+##
+#############################################################################
+"""Convert CLDR data to qLocaleXML
+
+The CLDR data can be downloaded from CLDR_, which has a sub-directory
+for each version; you need the ``core.zip`` file for your version of
+choice (typically the latest).  This script has had updates to cope up
+to v35; for later versions, we may need adaptations.  Unpack the
+downloaded ``core.zip`` and check it has a common/main/ sub-directory:
+pass the path of that sub-directory to this script as its single
+command-line argument.  Save its standard output (but not error) to a
+file for later processing by ``./qlocalexml2cpp.py``
+
+When you update the CLDR data, be sure to also update
+src/corelib/tools/qt_attribution.json's entry for unicode-cldr.  Check
+this script's output for unknown language, country or script messages;
+if any can be resolved, use their entry in common/main/en.xml to
+append new entries to enumdata.py's lists and update documentation in
+src/corelib/tools/qlocale.qdoc, adding the new entries in alphabetic
+order.
+
+.. _CLDR: ftp://unicode.org/Public/cldr/
+"""
+
+import os
+import sys
+import re
+import textwrap
+
+import enumdata
+import xpathlite
+from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile
+from dateconverter import convert_date
+from localexml import Locale
+
+findEntryInFile = xpathlite._findEntryInFile
+def wrappedwarn(prefix, tokens):
+    return sys.stderr.write(
+        '\n'.join(textwrap.wrap(prefix + ', '.join(tokens),
+                                subsequent_indent=' ', width=80)) + '\n')
+
+def parse_number_format(patterns, data):
+    # this is a very limited parsing of the number format for currency only.
+    def skip_repeating_pattern(x):
+        p = x.replace('0', '#').replace(',', '').replace('.', '')
+        seen = False
+        result = ''
+        for c in p:
+            if c == '#':
+                if seen:
+                    continue
+                seen = True
+            else:
+                seen = False
+            result = result + c
+        return result
+    patterns = patterns.split(';')
+    result = []
+    for pattern in patterns:
+        pattern = skip_repeating_pattern(pattern)
+        pattern = pattern.replace('#', "%1")
+        # according to http://www.unicode.org/reports/tr35/#Number_Format_Patterns
+        # there can be doubled or trippled currency sign, however none of the
+        # locales use that.
+        pattern = pattern.replace(u'\xa4', "%2")
+        pattern = pattern.replace("''", "###").replace("'", '').replace("###", "'")
+        pattern = pattern.replace('-', data['minus'])
+        pattern = pattern.replace('+', data['plus'])
+        result.append(pattern)
+    return result
+
+def parse_list_pattern_part_format(pattern):
+    # This is a very limited parsing of the format for list pattern part only.
+    return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3")
+
+def unit_quantifiers(find, path, stem, suffix, known,
+                     # Stop at exa/exbi: 16 exbi = 2^{64} < zetta =
+                     # 1000^7 < zebi = 2^{70}, the next quantifiers up:
+                     si_quantifiers = ('kilo', 'mega', 'giga', 'tera', 'peta', 'exa')):
+    """Work out the unit quantifiers.
+
+    Unfortunately, the CLDR data only go up to terabytes and we want
+    all the way to exabytes; but we can recognize the SI quantifiers
+    as prefixes, strip and identify the tail as the localized
+    translation for 'B' (e.g. French has 'octet' for 'byte' and uses
+    ko, Mo, Go, To from which we can extrapolate Po, Eo).
+
+    Should be called first for the SI quantifiers, with suffix = 'B',
+    then for the IEC ones, with suffix = 'iB'; the list known
+    (initially empty before first call) is used to let the second call
+    know what the first learned about the localized unit.
+    """
+    if suffix == 'B': # first call, known = []
+        tail = suffix
+        for q in si_quantifiers:
+            it = find(path, stem % q)
+            # kB for kilobyte, in contrast with KiB for IEC:
+            q = q[0] if q == 'kilo' else q[0].upper()
+            if not it:
+                it = q + tail
+            elif it.startswith(q):
+                rest = it[1:]
+                tail = rest if all(rest == k for k in known) else suffix
+                known.append(rest)
+            yield it
+    else: # second call, re-using first's known
+        assert suffix == 'iB'
+        if known:
+            byte = known.pop()
+            if all(byte == k for k in known):
+                suffix = 'i' + byte
+        for q in si_quantifiers:
+            yield find(path, stem % q[:2],
+                       # Those don't (yet, v31) exist in CLDR, so we always fall back to:
+                       q[0].upper() + suffix)
+
+def generateLocaleInfo(path):
+    if not path.endswith(".xml"):
+        return {}
+
+    # skip legacy/compatibility ones
+    alias = findAlias(path)
+    if alias:
+        raise xpathlite.Error('alias to "%s"' % alias)
+
+    def code(tag):
+        return findEntryInFile(path, 'identity/' + tag, attribute="type")[0]
+
+    return _generateLocaleInfo(path, code('language'), code('script'),
+                               code('territory'), code('variant'))
+
+def getNumberSystems(cache={}):
+    """Cached look-up of number system information.
+
+    Pass no arguments.  Returns a mapping from number system names to,
+    for each system, a mapping with keys u'digits', u'type' and
+    u'id'\n"""
+    if not cache:
+        for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental',
+                                              'numberingSystems.xml'),
+                                 'numberingSystems'):
+            # ns has form: [u'numberingSystem', [(u'digits', u'0123456789'), (u'type', u'numeric'), (u'id', u'latn')]]
+            entry = dict(ns[1])
+            name = entry[u'id']
+            if u'digits' in entry and ord(entry[u'digits'][0]) > 0xffff:
+                # FIXME, QTBUG-69324: make this redundant:
+                # omit number system if zero doesn't fit in single-char16 UTF-16 :-(
+                sys.stderr.write('skipping number system "%s" [can\'t represent its zero, U+%X]\n'
+                                 % (name, ord(entry[u'digits'][0])))
+            else:
+                cache[name] = entry
+    return cache
+
+def _generateLocaleInfo(path, language_code, script_code, country_code, variant_code=""):
+    if not path.endswith(".xml"):
+        return {}
+
+    if language_code == 'root':
+        # just skip it
+        return {}
+
+    # we do not support variants
+    # ### actually there is only one locale with variant: en_US_POSIX
+    #     does anybody care about it at all?
+    if variant_code:
+        raise xpathlite.Error('we do not support variants ("%s")' % variant_code)
+
+    language_id = enumdata.languageCodeToId(language_code)
+    if language_id <= 0:
+        raise xpathlite.Error('unknown language code "%s"' % language_code)
+
+    script_id = enumdata.scriptCodeToId(script_code)
+    if script_id == -1:
+        raise xpathlite.Error('unknown script code "%s"' % script_code)
+
+    # we should handle fully qualified names with the territory
+    if not country_code:
+        return {}
+    country_id = enumdata.countryCodeToId(country_code)
+    if country_id <= 0:
+        raise xpathlite.Error('unknown country code "%s"' % country_code)
+
+    # So we say we accept only those values that have "contributed" or
+    # "approved" resolution. see http://www.unicode.org/cldr/process.html
+    # But we only respect the resolution for new datas for backward
+    # compatibility.
+    draft = DraftResolution.contributed
+
+    result = dict(
+        language=enumdata.language_list[language_id][0],
+        language_code=language_code, language_id=language_id,
+        script=enumdata.script_list[script_id][0],
+        script_code=script_code, script_id=script_id,
+        country=enumdata.country_list[country_id][0],
+        country_code=country_code, country_id=country_id,
+        variant_code=variant_code)
+
+    (dir_name, file_name) = os.path.split(path)
+    def from_supplement(tag,
+                        path=os.path.join(dir_name, '..', 'supplemental',
+                                          'supplementalData.xml')):
+        return findTagsInFile(path, tag)
+    currencies = from_supplement('currencyData/region[iso3166=%s]' % country_code)
+    result['currencyIsoCode'] = ''
+    result['currencyDigits'] = 2
+    result['currencyRounding'] = 1
+    if currencies:
+        for e in currencies:
+            if e[0] == 'currency':
+                t = [x[1] == 'false' for x in e[1] if x[0] == 'tender']
+                if t and t[0]:
+                    pass
+                elif not any(x[0] == 'to' for x in e[1]):
+                    result['currencyIsoCode'] = (x[1] for x in e[1] if x[0] == 'iso4217').next()
+                    break
+        if result['currencyIsoCode']:
+            t = from_supplement("currencyData/fractions/info[iso4217=%s]"
+                                % result['currencyIsoCode'])
+            if t and t[0][0] == 'info':
+                result['currencyDigits'] = (int(x[1]) for x in t[0][1] if x[0] == 'digits').next()
+                result['currencyRounding'] = (int(x[1]) for x in t[0][1] if x[0] == 'rounding').next()
+    numbering_system = None
+    try:
+        numbering_system = findEntry(path, "numbers/defaultNumberingSystem")
+    except xpathlite.Error:
+        pass
+    def findEntryDef(path, xpath, value=''):
+        try:
+            return findEntry(path, xpath)
+        except xpathlite.Error:
+            return value
+    def get_number_in_system(path, xpath, numbering_system):
+        if numbering_system:
+            try:
+                return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]")
+            except xpathlite.Error:
+                # in CLDR 1.9 number system was refactored for numbers (but not for currency)
+                # so if previous findEntry doesn't work we should try this:
+                try:
+                    return findEntry(path, xpath.replace("/symbols/", "/symbols[numberSystem=" + numbering_system + "]/"))
+                except xpathlite.Error:
+                    # fallback to default
+                    pass
+        return findEntry(path, xpath)
+
+    result['decimal'] = get_number_in_system(path, "numbers/symbols/decimal", numbering_system)
+    result['group'] = get_number_in_system(path, "numbers/symbols/group", numbering_system)
+    result['list'] = get_number_in_system(path, "numbers/symbols/list", numbering_system)
+    result['percent'] = get_number_in_system(path, "numbers/symbols/percentSign", numbering_system)
+    try:
+        result['zero'] = getNumberSystems()[numbering_system][u"digits"][0]
+    except Exception as e:
+        sys.stderr.write("Native zero detection problem: %s\n" % repr(e))
+        result['zero'] = get_number_in_system(path, "numbers/symbols/nativeZeroDigit", numbering_system)
+    result['minus'] = get_number_in_system(path, "numbers/symbols/minusSign", numbering_system)
+    result['plus'] = get_number_in_system(path, "numbers/symbols/plusSign", numbering_system)
+    result['exp'] = get_number_in_system(path, "numbers/symbols/exponential", numbering_system).lower()
+    result['quotationStart'] = findEntry(path, "delimiters/quotationStart")
+    result['quotationEnd'] = findEntry(path, "delimiters/quotationEnd")
+    result['alternateQuotationStart'] = findEntry(path, "delimiters/alternateQuotationStart")
+    result['alternateQuotationEnd'] = findEntry(path, "delimiters/alternateQuotationEnd")
+    result['listPatternPartStart'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[start]"))
+    result['listPatternPartMiddle'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[middle]"))
+    result['listPatternPartEnd'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[end]"))
+    result['listPatternPartTwo'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[2]"))
+    result['am'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[am]", draft)
+    result['pm'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[pm]", draft)
+    result['longDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[full]/dateFormat/pattern"))
+    result['shortDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[short]/dateFormat/pattern"))
+    result['longTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[full]/timeFormat/pattern"))
+    result['shortTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[short]/timeFormat/pattern"))
+
+    endonym = None
+    if country_code and script_code:
+        endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s_%s]" % (language_code, script_code, country_code))
+    if not endonym and script_code:
+        endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, script_code))
+    if not endonym and country_code:
+        endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, country_code))
+    if not endonym:
+        endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s]" % (language_code))
+    result['language_endonym'] = endonym
+    result['country_endonym'] = findEntryDef(path, "localeDisplayNames/territories/territory[type=%s]" % (country_code))
+
+    currency_format = get_number_in_system(path, "numbers/currencyFormats/currencyFormatLength/currencyFormat/pattern", numbering_system)
+    currency_format = parse_number_format(currency_format, result)
+    result['currencyFormat'] = currency_format[0]
+    result['currencyNegativeFormat'] = ''
+    if len(currency_format) > 1:
+        result['currencyNegativeFormat'] = currency_format[1]
+
+    result['currencySymbol'] = ''
+    result['currencyDisplayName'] = ''
+    if result['currencyIsoCode']:
+        result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode'])
+        result['currencyDisplayName'] = ';'.join(
+            findEntryDef(path, 'numbers/currencies/currency[' + result['currencyIsoCode']
+                         + ']/displayName' + tail)
+            for tail in ['',] + [
+                '[count=%s]' % x for x in ('zero', 'one', 'two', 'few', 'many', 'other')
+                ]) + ';'
+
+    def findUnitDef(path, stem, fallback=''):
+        # The displayName for a quantified unit in en.xml is kByte
+        # instead of kB (etc.), so prefer any unitPattern provided:
+        for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
+            try:
+                ans = findEntry(path, stem + 'unitPattern[count=%s]' % count)
+            except xpathlite.Error:
+                continue
+
+            # TODO: epxloit count-handling, instead of discarding placeholders
+            if ans.startswith('{0}'):
+                ans = ans[3:].lstrip()
+            if ans:
+                return ans
+
+        return findEntryDef(path, stem + 'displayName', fallback)
+
+    # First without quantifier, then quantified each way:
+    result['byte_unit'] = findEntryDef(
+        path, 'units/unitLength[type=long]/unit[type=digital-byte]/displayName',
+        'bytes')
+    stem = 'units/unitLength[type=short]/unit[type=digital-%sbyte]/'
+    known = [] # cases where we *do* have a given version:
+    result['byte_si_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem, 'B', known))
+    # IEC 60027-2
+    # http://physics.nist.gov/cuu/Units/binary.html
+    result['byte_iec_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem % '%sbi', 'iB', known))
+
+    # Used for month and day data:
+    namings = (
+        ('standaloneLong', 'stand-alone', 'wide'),
+        ('standaloneShort', 'stand-alone', 'abbreviated'),
+        ('standaloneNarrow', 'stand-alone', 'narrow'),
+        ('long', 'format', 'wide'),
+        ('short', 'format', 'abbreviated'),
+        ('narrow', 'format', 'narrow'),
+        )
+
+    # Month data:
+    for cal in ('gregorian',): # We shall want to add to this
+        stem = 'dates/calendars/calendar[' + cal + ']/months/'
+        for (key, mode, size) in namings:
+            prop = 'monthContext[' + mode + ']/monthWidth[' + size + ']/'
+            result[key + 'Months'] = ';'.join(
+                findEntry(path, stem + prop + "month[%d]" % i)
+                for i in range(1, 13)) + ';'
+
+    # Day data (for Gregorian, at least):
+    stem = 'dates/calendars/calendar[gregorian]/days/'
+    days = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat')
+    for (key, mode, size) in namings:
+        prop = 'dayContext[' + mode + ']/dayWidth[' + size + ']/day'
+        result[key + 'Days'] = ';'.join(
+            findEntry(path, stem + prop + '[' + day + ']')
+            for day in days) + ';'
+
+    return Locale(result)
+
+def addEscapes(s):
+    result = ''
+    for c in s:
+        n = ord(c)
+        if n < 128:
+            result += c
+        else:
+            result += "\\x"
+            result += "%02x" % (n)
+    return result
+
+def unicodeStr(s):
+    utf8 = s.encode('utf-8')
+    return "<size>" + str(len(utf8)) + "</size><data>" + addEscapes(utf8) + "</data>"
+
+def usage():
+    print "Usage: cldr2qlocalexml.py <path-to-cldr-main>"
+    sys.exit()
+
+def integrateWeekData(filePath):
+    if not filePath.endswith(".xml"):
+        return {}
+
+    def lookup(key):
+        return findEntryInFile(filePath, key, attribute='territories')[0].split()
+    days = ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun')
+
+    firstDayByCountryCode = {}
+    for day in days:
+        for countryCode in lookup('weekData/firstDay[day=%s]' % day):
+            firstDayByCountryCode[countryCode] = day
+
+    weekendStartByCountryCode = {}
+    for day in days:
+        for countryCode in lookup('weekData/weekendStart[day=%s]' % day):
+            weekendStartByCountryCode[countryCode] = day
+
+    weekendEndByCountryCode = {}
+    for day in days:
+        for countryCode in lookup('weekData/weekendEnd[day=%s]' % day):
+            weekendEndByCountryCode[countryCode] = day
+
+    for (key, locale) in locale_database.iteritems():
+        countryCode = locale.country_code
+        if countryCode in firstDayByCountryCode:
+            locale.firstDayOfWeek = firstDayByCountryCode[countryCode]
+        else:
+            locale.firstDayOfWeek = firstDayByCountryCode["001"]
+
+        if countryCode in weekendStartByCountryCode:
+            locale.weekendStart = weekendStartByCountryCode[countryCode]
+        else:
+            locale.weekendStart = weekendStartByCountryCode["001"]
+
+        if countryCode in weekendEndByCountryCode:
+            locale.weekendEnd = weekendEndByCountryCode[countryCode]
+        else:
+            locale.weekendEnd = weekendEndByCountryCode["001"]
+
+def splitLocale(name):
+    """Split name into (language, script, territory) triple as generator.
+
+    Ignores any trailing fields (with a warning), leaves script (a capitalised
+    four-letter token) or territory (either a number or an all-uppercase token)
+    empty if unspecified, returns a single-entry generator if name is a single
+    tag (i.e. contains no underscores).  Always yields 1 or 3 values, never 2."""
+    tags = iter(name.split('_'))
+    yield tags.next() # Language
+    tag = tags.next()
+
+    # Script is always four letters, always capitalised:
+    if len(tag) == 4 and tag[0].isupper() and tag[1:].islower():
+        yield tag
+        try:
+            tag = tags.next()
+        except StopIteration:
+            tag = ''
+    else:
+        yield ''
+
+    # Territory is upper-case or numeric:
+    if tag and tag.isupper() or tag.isdigit():
+        yield tag
+        tag = ''
+    else:
+        yield ''
+
+    # If nothing is left, StopIteration will avoid the warning:
+    tag = (tag if tag else tags.next(),)
+    sys.stderr.write('Ignoring unparsed cruft %s in %s\n' % ('_'.join(tag + tuple(tags)), name))
+
+if len(sys.argv) != 2:
+    usage()
+
+cldr_dir = sys.argv[1]
+
+if not os.path.isdir(cldr_dir):
+    usage()
+
+cldr_files = os.listdir(cldr_dir)
+
+locale_database = {}
+
+# see http://www.unicode.org/reports/tr35/tr35-info.html#Default_Content
+defaultContent_locales = []
+for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental',
+                                      'supplementalMetadata.xml'),
+                         'metadata/defaultContent'):
+    for data in ns[1:][0]:
+        if data[0] == u"locales":
+            defaultContent_locales += data[1].split()
+
+skips = []
+for file in defaultContent_locales:
+    try:
+        language_code, script_code, country_code = splitLocale(file)
+    except ValueError:
+        sys.stderr.write('skipping defaultContent locale "' + file + '" [neither two nor three tags]\n')
+        continue
+
+    if not (script_code or country_code):
+        sys.stderr.write('skipping defaultContent locale "' + file + '" [second tag is neither script nor territory]\n')
+        continue
+
+    try:
+        l = _generateLocaleInfo(cldr_dir + "/" + file + ".xml", language_code, script_code, country_code)
+        if not l:
+            skips.append(file)
+            continue
+    except xpathlite.Error as e:
+        sys.stderr.write('skipping defaultContent locale "%s" (%s)\n' % (file, str(e)))
+        continue
+
+    locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
+
+if skips:
+    wrappedwarn('skipping defaultContent locales [no locale info generated]: ', skips)
+    skips = []
+
+for file in cldr_files:
+    try:
+        l = generateLocaleInfo(cldr_dir + "/" + file)
+        if not l:
+            skips.append(file)
+            continue
+    except xpathlite.Error as e:
+        sys.stderr.write('skipping file "%s" (%s)\n' % (file, str(e)))
+        continue
+
+    locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
+
+if skips:
+    wrappedwarn('skipping files [no locale info generated]: ', skips)
+
+integrateWeekData(cldr_dir+"/../supplemental/supplementalData.xml")
+locale_keys = locale_database.keys()
+locale_keys.sort()
+
+cldr_version = 'unknown'
+ldml = open(cldr_dir+"/../dtd/ldml.dtd", "r")
+for line in ldml:
+    if 'version cldrVersion CDATA #FIXED' in line:
+        cldr_version = line.split('"')[1]
+
+print "<localeDatabase>"
+print "    <version>" + cldr_version + "</version>"
+print "    <languageList>"
+for id in enumdata.language_list:
+    l = enumdata.language_list[id]
+    print "        <language>"
+    print "            <name>" + l[0] + "</name>"
+    print "            <id>" + str(id) + "</id>"
+    print "            <code>" + l[1] + "</code>"
+    print "        </language>"
+print "    </languageList>"
+
+print "    <scriptList>"
+for id in enumdata.script_list:
+    l = enumdata.script_list[id]
+    print "        <script>"
+    print "            <name>" + l[0] + "</name>"
+    print "            <id>" + str(id) + "</id>"
+    print "            <code>" + l[1] + "</code>"
+    print "        </script>"
+print "    </scriptList>"
+
+print "    <countryList>"
+for id in enumdata.country_list:
+    l = enumdata.country_list[id]
+    print "        <country>"
+    print "            <name>" + l[0] + "</name>"
+    print "            <id>" + str(id) + "</id>"
+    print "            <code>" + l[1] + "</code>"
+    print "        </country>"
+print "    </countryList>"
+
+def _parseLocale(l):
+    language = "AnyLanguage"
+    script = "AnyScript"
+    country = "AnyCountry"
+
+    if l == "und":
+        raise xpathlite.Error("we are treating unknown locale like C")
+
+    parsed = splitLocale(l)
+    language_code = parsed.next()
+    script_code = country_code = ''
+    try:
+        script_code, country_code = parsed
+    except ValueError:
+        pass
+
+    if language_code != "und":
+        language_id = enumdata.languageCodeToId(language_code)
+        if language_id == -1:
+            raise xpathlite.Error('unknown language code "%s"' % language_code)
+        language = enumdata.language_list[language_id][0]
+
+    if script_code:
+        script_id = enumdata.scriptCodeToId(script_code)
+        if script_id == -1:
+            raise xpathlite.Error('unknown script code "%s"' % script_code)
+        script = enumdata.script_list[script_id][0]
+
+    if country_code:
+        country_id = enumdata.countryCodeToId(country_code)
+        if country_id == -1:
+            raise xpathlite.Error('unknown country code "%s"' % country_code)
+        country = enumdata.country_list[country_id][0]
+
+    return (language, script, country)
+
+skips = []
+print "    <likelySubtags>"
+for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likelySubtags"):
+    tmp = {}
+    for data in ns[1:][0]: # ns looks like this: [u'likelySubtag', [(u'from', u'aa'), (u'to', u'aa_Latn_ET')]]
+        tmp[data[0]] = data[1]
+
+    try:
+        from_language, from_script, from_country = _parseLocale(tmp[u"from"])
+        to_language, to_script, to_country = _parseLocale(tmp[u"to"])
+    except xpathlite.Error as e:
+        if tmp[u'to'].startswith(tmp[u'from']) and str(e) == 'unknown language code "%s"' % tmp[u'from']:
+            skips.append(tmp[u'to'])
+        else:
+            sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e)))
+        continue
+    # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
+    if to_country == "AnyCountry" and from_country != to_country:
+        to_country = from_country
+    if to_script == "AnyScript" and from_script != to_script:
+        to_script = from_script
+
+    print "        <likelySubtag>"
+    print "            <from>"
+    print "                <language>" + from_language + "</language>"
+    print "                <script>" + from_script + "</script>"
+    print "                <country>" + from_country + "</country>"
+    print "            </from>"
+    print "            <to>"
+    print "                <language>" + to_language + "</language>"
+    print "                <script>" + to_script + "</script>"
+    print "                <country>" + to_country + "</country>"
+    print "            </to>"
+    print "        </likelySubtag>"
+print "    </likelySubtags>"
+if skips:
+    wrappedwarn('skipping likelySubtags (for unknown language codes): ', skips)
+print "    <localeList>"
+
+Locale.C().toXml()
+for key in locale_keys:
+    locale_database[key].toXml()
+
+print "    </localeList>"
+print "</localeDatabase>"
diff --git a/util/locale_database/cldr2qtimezone.py b/util/locale_database/cldr2qtimezone.py
new file mode 100755
index 0000000000..256839317c
--- /dev/null
+++ b/util/locale_database/cldr2qtimezone.py
@@ -0,0 +1,431 @@
+#!/usr/bin/env python2
+#############################################################################
+##
+## Copyright (C) 2016 The Qt Company Ltd.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the test suite of the Qt Toolkit.
+##
+## $QT_BEGIN_LICENSE:GPL-EXCEPT$
+## Commercial License Usage
+## Licensees holding valid commercial Qt licenses may use this file in
+## accordance with the commercial license agreement provided with the
+## Software or, alternatively, in accordance with the terms contained in
+## a written agreement between you and The Qt Company. For licensing terms
+## and conditions see https://www.qt.io/terms-conditions. For further
+## information use the contact form at https://www.qt.io/contact-us.
+##
+## GNU General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU
+## General Public License version 3 as published by the Free Software
+## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+## included in the packaging of this file. Please review the following
+## information to ensure the GNU General Public License requirements will
+## be met: https://www.gnu.org/licenses/gpl-3.0.html.
+##
+## $QT_END_LICENSE$
+##
+#############################################################################
+"""Parse CLDR data for QTimeZone use with MS-Windows
+
+Script to parse the CLDR supplemental/windowsZones.xml file and encode
+for use in QTimeZone.  See ``./cldr2qlocalexml.py`` for where to get
+the CLDR data.  Pass its common/ directory as first parameter to this
+script and the qtbase root directory as second parameter.  It shall
+update qtbase's src/corelib/tools/qtimezoneprivate_data_p.h ready for
+use.
+
+The XML structure is as follows:
+
+ <supplementalData>
+     <version number="$Revision: 7825 $"/>
+     <generation date="$Date: 2012-10-10 14:45:31 -0700 (Wed, 10 Oct 2012) $"/>
+     <windowsZones>
+         <mapTimezones otherVersion="7dc0101" typeVersion="2012f">
+             <!-- (UTC-08:00) Pacific Time (US & Canada) -->
+             <mapZone other="Pacific Standard Time" territory="001" type="America/Los_Angeles"/>
+             <mapZone other="Pacific Standard Time" territory="CA"  type="America/Vancouver America/Dawson America/Whitehorse"/>
+             <mapZone other="Pacific Standard Time" territory="MX"  type="America/Tijuana"/>
+             <mapZone other="Pacific Standard Time" territory="US"  type="America/Los_Angeles"/>
+             <mapZone other="Pacific Standard Time" territory="ZZ"  type="PST8PDT"/>
+       </mapTimezones>
+     </windowsZones>
+ </supplementalData>
+"""
+
+import os
+import sys
+import datetime
+import tempfile
+import enumdata
+import xpathlite
+from  xpathlite import DraftResolution
+import re
+import qlocalexml2cpp
+
+findAlias = xpathlite.findAlias
+findEntry = xpathlite.findEntry
+findEntryInFile = xpathlite._findEntryInFile
+findTagsInFile = xpathlite.findTagsInFile
+unicode2hex = qlocalexml2cpp.unicode2hex
+wrap_list = qlocalexml2cpp.wrap_list
+
+class ByteArrayData:
+    def __init__(self):
+        self.data = []
+        self.hash = {}
+    def append(self, s):
+        s = s + '\0'
+        if s in self.hash:
+            return self.hash[s]
+
+        lst = unicode2hex(s)
+        index = len(self.data)
+        if index > 65535:
+            print "\n\n\n#error Data index is too big!"
+            sys.stderr.write ("\n\n\nERROR: index exceeds the uint16 range! index = %d\n" % index)
+            sys.exit(1)
+        self.hash[s] = index
+        self.data += lst
+        return index
+
+# List of currently known Windows IDs.  If script fails on missing ID plase add it here
+# Not public so may be safely changed.
+# Windows Key : [ Windows Id, Offset Seconds ]
+windowsIdList = {
+    1 : [ u'Afghanistan Standard Time',        16200  ],
+    2 : [ u'Alaskan Standard Time',           -32400  ],
+    3 : [ u'Arab Standard Time',               10800  ],
+    4 : [ u'Arabian Standard Time',            14400  ],
+    5 : [ u'Arabic Standard Time',             10800  ],
+    6 : [ u'Argentina Standard Time',         -10800  ],
+    7 : [ u'Atlantic Standard Time',          -14400  ],
+    8 : [ u'AUS Central Standard Time',        34200  ],
+    9 : [ u'AUS Eastern Standard Time',        36000  ],
+   10 : [ u'Azerbaijan Standard Time',         14400  ],
+   11 : [ u'Azores Standard Time',             -3600  ],
+   12 : [ u'Bahia Standard Time',             -10800  ],
+   13 : [ u'Bangladesh Standard Time',         21600  ],
+   14 : [ u'Belarus Standard Time',            10800  ],
+   15 : [ u'Canada Central Standard Time',    -21600  ],
+   16 : [ u'Cape Verde Standard Time',         -3600  ],
+   17 : [ u'Caucasus Standard Time',           14400  ],
+   18 : [ u'Cen. Australia Standard Time',     34200  ],
+   19 : [ u'Central America Standard Time',   -21600  ],
+   20 : [ u'Central Asia Standard Time',       21600  ],
+   21 : [ u'Central Brazilian Standard Time', -14400  ],
+   22 : [ u'Central Europe Standard Time',      3600  ],
+   23 : [ u'Central European Standard Time',    3600  ],
+   24 : [ u'Central Pacific Standard Time',    39600  ],
+   25 : [ u'Central Standard Time (Mexico)',  -21600  ],
+   26 : [ u'Central Standard Time',           -21600  ],
+   27 : [ u'China Standard Time',              28800  ],
+   28 : [ u'Dateline Standard Time',          -43200  ],
+   29 : [ u'E. Africa Standard Time',          10800  ],
+   30 : [ u'E. Australia Standard Time',       36000  ],
+   31 : [ u'E. South America Standard Time',  -10800  ],
+   32 : [ u'Eastern Standard Time',           -18000  ],
+   33 : [ u'Eastern Standard Time (Mexico)',  -18000  ],
+   34 : [ u'Egypt Standard Time',               7200  ],
+   35 : [ u'Ekaterinburg Standard Time',       18000  ],
+   36 : [ u'Fiji Standard Time',               43200  ],
+   37 : [ u'FLE Standard Time',                 7200  ],
+   38 : [ u'Georgian Standard Time',           14400  ],
+   39 : [ u'GMT Standard Time',                    0  ],
+   40 : [ u'Greenland Standard Time',         -10800  ],
+   41 : [ u'Greenwich Standard Time',              0  ],
+   42 : [ u'GTB Standard Time',                 7200  ],
+   43 : [ u'Hawaiian Standard Time',          -36000  ],
+   44 : [ u'India Standard Time',              19800  ],
+   45 : [ u'Iran Standard Time',               12600  ],
+   46 : [ u'Israel Standard Time',              7200  ],
+   47 : [ u'Jordan Standard Time',              7200  ],
+   48 : [ u'Kaliningrad Standard Time',         7200  ],
+   49 : [ u'Korea Standard Time',              32400  ],
+   50 : [ u'Libya Standard Time',               7200  ],
+   51 : [ u'Line Islands Standard Time',       50400  ],
+   52 : [ u'Magadan Standard Time',            36000  ],
+   53 : [ u'Mauritius Standard Time',          14400  ],
+   54 : [ u'Middle East Standard Time',         7200  ],
+   55 : [ u'Montevideo Standard Time',        -10800  ],
+   56 : [ u'Morocco Standard Time',                0  ],
+   57 : [ u'Mountain Standard Time (Mexico)', -25200  ],
+   58 : [ u'Mountain Standard Time',          -25200  ],
+   59 : [ u'Myanmar Standard Time',            23400  ],
+   60 : [ u'N. Central Asia Standard Time',    21600  ],
+   61 : [ u'Namibia Standard Time',             3600  ],
+   62 : [ u'Nepal Standard Time',              20700  ],
+   63 : [ u'New Zealand Standard Time',        43200  ],
+   64 : [ u'Newfoundland Standard Time',      -12600  ],
+   65 : [ u'North Asia East Standard Time',    28800  ],
+   66 : [ u'North Asia Standard Time',         25200  ],
+   67 : [ u'Pacific SA Standard Time',        -10800  ],
+   68 : [ u'E. Europe Standard Time',           7200  ],
+   69 : [ u'Pacific Standard Time',           -28800  ],
+   70 : [ u'Pakistan Standard Time',           18000  ],
+   71 : [ u'Paraguay Standard Time',          -14400  ],
+   72 : [ u'Romance Standard Time',             3600  ],
+   73 : [ u'Russia Time Zone 3',               14400  ],
+   74 : [ u'Russia Time Zone 10',              39600  ],
+   75 : [ u'Russia Time Zone 11',              43200  ],
+   76 : [ u'Russian Standard Time',            10800  ],
+   77 : [ u'SA Eastern Standard Time',        -10800  ],
+   78 : [ u'SA Pacific Standard Time',        -18000  ],
+   79 : [ u'SA Western Standard Time',        -14400  ],
+   80 : [ u'Samoa Standard Time',              46800  ],
+   81 : [ u'SE Asia Standard Time',            25200  ],
+   82 : [ u'Singapore Standard Time',          28800  ],
+   83 : [ u'South Africa Standard Time',        7200  ],
+   84 : [ u'Sri Lanka Standard Time',          19800  ],
+   85 : [ u'Syria Standard Time',               7200  ],
+   86 : [ u'Taipei Standard Time',             28800  ],
+   87 : [ u'Tasmania Standard Time',           36000  ],
+   88 : [ u'Tokyo Standard Time',              32400  ],
+   89 : [ u'Tonga Standard Time',              46800  ],
+   90 : [ u'Turkey Standard Time',              7200  ],
+   91 : [ u'Ulaanbaatar Standard Time',        28800  ],
+   92 : [ u'US Eastern Standard Time',        -18000  ],
+   93 : [ u'US Mountain Standard Time',       -25200  ],
+   94 : [ u'UTC-02',                           -7200  ],
+   95 : [ u'UTC-11',                          -39600  ],
+   96 : [ u'UTC',                                  0  ],
+   97 : [ u'UTC+12',                           43200  ],
+   98 : [ u'Venezuela Standard Time',         -16200  ],
+   99 : [ u'Vladivostok Standard Time',        36000  ],
+   100: [ u'W. Australia Standard Time',       28800  ],
+   101: [ u'W. Central Africa Standard Time',   3600  ],
+   102: [ u'W. Europe Standard Time',           3600  ],
+   103: [ u'West Asia Standard Time',          18000  ],
+   104: [ u'West Pacific Standard Time',       36000  ],
+   105: [ u'Yakutsk Standard Time',            32400  ],
+   106: [ u'North Korea Standard Time',        30600  ]
+}
+
+def windowsIdToKey(windowsId):
+    for windowsKey in windowsIdList:
+        if windowsIdList[windowsKey][0] == windowsId:
+            return windowsKey
+    return 0
+
+# List of standard UTC IDs to use.  Not public so may be safely changed.
+# Do not remove ID's as is part of API/behavior guarantee
+# Key : [ UTC Id, Offset Seconds ]
+utcIdList = {
+    0 : [ u'UTC',            0  ],  # Goes first so is default
+    1 : [ u'UTC-14:00', -50400  ],
+    2 : [ u'UTC-13:00', -46800  ],
+    3 : [ u'UTC-12:00', -43200  ],
+    4 : [ u'UTC-11:00', -39600  ],
+    5 : [ u'UTC-10:00', -36000  ],
+    6 : [ u'UTC-09:00', -32400  ],
+    7 : [ u'UTC-08:00', -28800  ],
+    8 : [ u'UTC-07:00', -25200  ],
+    9 : [ u'UTC-06:00', -21600  ],
+   10 : [ u'UTC-05:00', -18000  ],
+   11 : [ u'UTC-04:30', -16200  ],
+   12 : [ u'UTC-04:00', -14400  ],
+   13 : [ u'UTC-03:30', -12600  ],
+   14 : [ u'UTC-03:00', -10800  ],
+   15 : [ u'UTC-02:00',  -7200  ],
+   16 : [ u'UTC-01:00',  -3600  ],
+   17 : [ u'UTC-00:00',      0  ],
+   18 : [ u'UTC+00:00',      0  ],
+   19 : [ u'UTC+01:00',   3600  ],
+   20 : [ u'UTC+02:00',   7200  ],
+   21 : [ u'UTC+03:00',  10800  ],
+   22 : [ u'UTC+03:30',  12600  ],
+   23 : [ u'UTC+04:00',  14400  ],
+   24 : [ u'UTC+04:30',  16200  ],
+   25 : [ u'UTC+05:00',  18000  ],
+   26 : [ u'UTC+05:30',  19800  ],
+   27 : [ u'UTC+05:45',  20700  ],
+   28 : [ u'UTC+06:00',  21600  ],
+   29 : [ u'UTC+06:30',  23400  ],
+   30 : [ u'UTC+07:00',  25200  ],
+   31 : [ u'UTC+08:00',  28800  ],
+   32 : [ u'UTC+09:00',  32400  ],
+   33 : [ u'UTC+09:30',  34200  ],
+   34 : [ u'UTC+10:00',  36000  ],
+   35 : [ u'UTC+11:00',  39600  ],
+   36 : [ u'UTC+12:00',  43200  ],
+   37 : [ u'UTC+13:00',  46800  ],
+   38 : [ u'UTC+14:00',  50400  ],
+   39 : [ u'UTC+08:30',  30600  ]
+}
+
+def usage():
+    print "Usage: cldr2qtimezone.py <path to cldr core/common> <path to qtbase>"
+    sys.exit()
+
+if len(sys.argv) != 3:
+    usage()
+
+cldrPath = sys.argv[1]
+qtPath = sys.argv[2]
+
+if not os.path.isdir(cldrPath) or not os.path.isdir(qtPath):
+    usage()
+
+windowsZonesPath = cldrPath + "/supplemental/windowsZones.xml"
+tempFileDir = qtPath
+dataFilePath = qtPath + "/src/corelib/tools/qtimezoneprivate_data_p.h"
+
+if not os.path.isfile(windowsZonesPath):
+    usage()
+
+if not os.path.isfile(dataFilePath):
+    usage()
+
+cldr_version = 'unknown'
+ldml = open(cldrPath + "/dtd/ldml.dtd", "r")
+for line in ldml:
+    if 'version cldrVersion CDATA #FIXED' in line:
+        cldr_version = line.split('"')[1]
+
+# [[u'version', [(u'number', u'$Revision: 7825 $')]]]
+versionNumber = findTagsInFile(windowsZonesPath, "version")[0][1][0][1]
+
+mapTimezones = findTagsInFile(windowsZonesPath, "windowsZones/mapTimezones")
+
+defaultDict = {}
+windowsIdDict = {}
+
+if mapTimezones:
+    for mapZone in mapTimezones:
+        # [u'mapZone', [(u'territory', u'MH'), (u'other', u'UTC+12'), (u'type', u'Pacific/Majuro Pacific/Kwajalein')]]
+        if mapZone[0] == u'mapZone':
+            data = {}
+            for attribute in mapZone[1]:
+                if attribute[0] == u'other':
+                    data['windowsId'] = attribute[1]
+                if attribute[0] == u'territory':
+                    data['countryCode'] = attribute[1]
+                if attribute[0] == u'type':
+                    data['ianaList'] = attribute[1]
+
+            data['windowsKey'] = windowsIdToKey(data['windowsId'])
+            if data['windowsKey'] <= 0:
+                raise xpathlite.Error("Unknown Windows ID, please add \"%s\"" % data['windowsId'])
+
+            countryId = 0
+            if data['countryCode'] == u'001':
+                defaultDict[data['windowsKey']] = data['ianaList']
+            else:
+                data['countryId'] = enumdata.countryCodeToId(data['countryCode'])
+                if data['countryId'] < 0:
+                    raise xpathlite.Error("Unknown Country Code \"%s\"" % data['countryCode'])
+                data['country'] = enumdata.country_list[data['countryId']][0]
+                windowsIdDict[data['windowsKey'], data['countryId']] = data
+
+print "Input file parsed, now writing data"
+
+GENERATED_BLOCK_START = "// GENERATED PART STARTS HERE\n"
+GENERATED_BLOCK_END = "// GENERATED PART ENDS HERE\n"
+
+# Create a temp file to write the new data into
+(newTempFile, newTempFilePath) = tempfile.mkstemp("qtimezone_data_p", dir=tempFileDir)
+newTempFile = os.fdopen(newTempFile, "w")
+
+# Open the old file and copy over the first non-generated section to the new file
+oldDataFile = open(dataFilePath, "r")
+s = oldDataFile.readline()
+while s and s != GENERATED_BLOCK_START:
+    newTempFile.write(s)
+    s = oldDataFile.readline()
+
+# Write out generated block start tag and warning
+newTempFile.write(GENERATED_BLOCK_START)
+newTempFile.write("""
+/*
+    This part of the file was generated on %s from the
+    Common Locale Data Repository v%s supplemental/windowsZones.xml file %s
+
+    http://www.unicode.org/cldr/
+
+    Do not edit this code: run cldr2qtimezone.py on updated (or
+    edited) CLDR data; see qtbase/util/locale_database/.
+*/
+
+""" % (str(datetime.date.today()), cldr_version, versionNumber) )
+
+windowsIdData = ByteArrayData()
+ianaIdData = ByteArrayData()
+
+# Write Windows/IANA table
+newTempFile.write("// Windows ID Key, Country Enum, IANA ID Index\n")
+newTempFile.write("static const QZoneData zoneDataTable[] = {\n")
+for index in windowsIdDict:
+    data = windowsIdDict[index]
+    newTempFile.write("    { %6d,%6d,%6d }, // %s / %s\n"
+                         % (data['windowsKey'],
+                            data['countryId'],
+                            ianaIdData.append(data['ianaList']),
+                            data['windowsId'],
+                            data['country']))
+newTempFile.write("    {      0,     0,     0 } // Trailing zeroes\n")
+newTempFile.write("};\n\n")
+
+print "Done Zone Data"
+
+# Write Windows ID key table
+newTempFile.write("// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n")
+newTempFile.write("static const QWindowsData windowsDataTable[] = {\n")
+for windowsKey in windowsIdList:
+    newTempFile.write("    { %6d,%6d,%6d,%6d }, // %s\n"
+                         % (windowsKey,
+                            windowsIdData.append(windowsIdList[windowsKey][0]),
+                            ianaIdData.append(defaultDict[windowsKey]),
+                            windowsIdList[windowsKey][1],
+                            windowsIdList[windowsKey][0]))
+newTempFile.write("    {      0,     0,     0,     0 } // Trailing zeroes\n")
+newTempFile.write("};\n\n")
+
+print "Done Windows Data Table"
+
+# Write UTC ID key table
+newTempFile.write("// IANA ID Index, UTC Offset\n")
+newTempFile.write("static const QUtcData utcDataTable[] = {\n")
+for index in utcIdList:
+    data = utcIdList[index]
+    newTempFile.write("    { %6d,%6d }, // %s\n"
+                         % (ianaIdData.append(data[0]),
+                            data[1],
+                            data[0]))
+newTempFile.write("    {     0,      0 } // Trailing zeroes\n")
+newTempFile.write("};\n\n")
+
+print "Done UTC Data Table"
+
+# Write out Windows ID's data
+newTempFile.write("static const char windowsIdData[] = {\n")
+newTempFile.write(wrap_list(windowsIdData.data))
+newTempFile.write("\n};\n\n")
+
+# Write out IANA ID's data
+newTempFile.write("static const char ianaIdData[] = {\n")
+newTempFile.write(wrap_list(ianaIdData.data))
+newTempFile.write("\n};\n")
+
+print "Done ID Data Table"
+
+# Write out the end of generated block tag
+newTempFile.write(GENERATED_BLOCK_END)
+s = oldDataFile.readline()
+
+# Skip through the old generated data in the old file
+while s and s != GENERATED_BLOCK_END:
+    s = oldDataFile.readline()
+
+# Now copy the rest of the original file into the new file
+s = oldDataFile.readline()
+while s:
+    newTempFile.write(s)
+    s = oldDataFile.readline()
+
+# Now close the old and new file, delete the old file and copy the new file in its place
+newTempFile.close()
+oldDataFile.close()
+os.remove(dataFilePath)
+os.rename(newTempFilePath, dataFilePath)
+
+print "Data generation completed, please check the new file at " + dataFilePath
diff --git a/util/locale_database/dateconverter.py b/util/locale_database/dateconverter.py
new file mode 100755
index 0000000000..1990fe0c61
--- /dev/null
+++ b/util/locale_database/dateconverter.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+#############################################################################
+##
+## Copyright (C) 2016 The Qt Company Ltd.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the test suite of the Qt Toolkit.
+##
+## $QT_BEGIN_LICENSE:GPL-EXCEPT$
+## Commercial License Usage
+## Licensees holding valid commercial Qt licenses may use this file in
+## accordance with the commercial license agreement provided with the
+## Software or, alternatively, in accordance with the terms contained in
+## a written agreement between you and The Qt Company. For licensing terms
+## and conditions see https://www.qt.io/terms-conditions. For further
+## information use the contact form at https://www.qt.io/contact-us.
+##
+## GNU General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU
+## General Public License version 3 as published by the Free Software
+## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+## included in the packaging of this file. Please review the following
+## information to ensure the GNU General Public License requirements will
+## be met: https://www.gnu.org/licenses/gpl-3.0.html.
+##
+## $QT_END_LICENSE$
+##
+#############################################################################
+
+import re
+
+def _convert_pattern(pattern):
+    # patterns from http://www.unicode.org/reports/tr35/#Date_Format_Patterns
+    qt_regexps = {
+        r"yyy{3,}" : "yyyy", # more that three digits hence convert to four-digit year
+        r"L" : "M",          # stand-alone month names. not supported.
+        r"g{1,}": "",        # modified julian day. not supported.
+        r"S{1,}" : "",       # fractional seconds. not supported.
+        r"A{1,}" : ""        # milliseconds in day. not supported.
+    }
+    qt_patterns = {
+        "G" : "", "GG" : "", "GGG" : "", "GGGG" : "", "GGGGG" : "", # Era. not supported.
+        "y" : "yyyy", # four-digit year without leading zeroes
+        "Q" : "", "QQ" : "", "QQQ" : "", "QQQQ" : "", # quarter. not supported.
+        "q" : "", "qq" : "", "qqq" : "", "qqqq" : "", # quarter. not supported.
+        "MMMMM" : "MMM", # narrow month name.
+        "LLLLL" : "MMM", # stand-alone narrow month name.
+        "l" : "", # special symbol for chinese leap month. not supported.
+        "w" : "", "W" : "", # week of year/month. not supported.
+        "D" : "", "DD" : "", "DDD" : "", # day of year. not supported.
+        "F" : "", # day of week in month. not supported.
+        "E" : "ddd", "EE" : "ddd", "EEE" : "ddd", "EEEEE" : "ddd", "EEEE" : "dddd", # day of week
+        "e" : "ddd", "ee" : "ddd", "eee" : "ddd", "eeeee" : "ddd", "eeee" : "dddd", # local day of week
+        "c" : "ddd", "cc" : "ddd", "ccc" : "ddd", "ccccc" : "ddd", "cccc" : "dddd", # stand-alone local day of week
+        "a" : "AP", # AM/PM
+        "K" : "h", # Hour 0-11
+        "k" : "H", # Hour 1-24
+        "j" : "", # special reserved symbol.
+        "z" : "t", "zz" : "t", "zzz" : "t", "zzzz" : "t", # timezone
+        "Z" : "t", "ZZ" : "t", "ZZZ" : "t", "ZZZZ" : "t", # timezone
+        "v" : "t", "vv" : "t", "vvv" : "t", "vvvv" : "t", # timezone
+        "V" : "t", "VV" : "t", "VVV" : "t", "VVVV" : "t"  # timezone
+    }
+    if qt_patterns.has_key(pattern):
+        return qt_patterns[pattern]
+    for r,v in qt_regexps.items():
+        pattern = re.sub(r, v, pattern)
+    return pattern
+
+def convert_date(input):
+    result = ""
+    patterns = "GyYuQqMLlwWdDFgEecahHKkjmsSAzZvV"
+    last = ""
+    inquote = 0
+    chars_to_strip = " -"
+    for c in input:
+        if c == "'":
+            inquote = inquote + 1
+        if inquote % 2 == 0:
+            if c in patterns:
+                if not last:
+                    last = c
+                else:
+                    if c in last:
+                        last += c
+                    else:
+                        # pattern changed
+                        converted = _convert_pattern(last)
+                        result += converted
+                        if not converted:
+                            result = result.rstrip(chars_to_strip)
+                        last = c
+                continue
+        if last:
+            # pattern ended
+            converted = _convert_pattern(last)
+            result += converted
+            if not converted:
+                result = result.rstrip(chars_to_strip)
+            last = ""
+        result += c
+    if last:
+        converted = _convert_pattern(last)
+        result += converted
+        if not converted:
+            result = result.rstrip(chars_to_strip)
+    return result.lstrip(chars_to_strip)
diff --git a/util/locale_database/enumdata.py b/util/locale_database/enumdata.py
new file mode 100644
index 0000000000..26bb74d1fe
--- /dev/null
+++ b/util/locale_database/enumdata.py
@@ -0,0 +1,878 @@
+#!/usr/bin/env python
+#############################################################################
+##
+## Copyright (C) 2016 The Qt Company Ltd.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the test suite of the Qt Toolkit.
+##
+## $QT_BEGIN_LICENSE:GPL-EXCEPT$
+## Commercial License Usage
+## Licensees holding valid commercial Qt licenses may use this file in
+## accordance with the commercial license agreement provided with the
+## Software or, alternatively, in accordance with the terms contained in
+## a written agreement between you and The Qt Company. For licensing terms
+## and conditions see https://www.qt.io/terms-conditions. For further
+## information use the contact form at https://www.qt.io/contact-us.
+##
+## GNU General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU
+## General Public License version 3 as published by the Free Software
+## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+## included in the packaging of this file. Please review the following
+## information to ensure the GNU General Public License requirements will
+## be met: https://www.gnu.org/licenses/gpl-3.0.html.
+##
+## $QT_END_LICENSE$
+##
+#############################################################################
+
+# Each *_list reflects the current values of its enums in qlocale.h;
+# if new xml language files are available in CLDR, these languages and
+# countries need to be *appended* to this list (for compatibility
+# between versions).  Include any spaces present in names (scripts
+# shall squish them out for the enum entries) in *_list, but use the
+# squished forms of names in the *_aliases mappings.
+
+### Qt 6: restore alphabetic order in each list.
+
+language_list = {
+      0: ["AnyLanguage",                 "  "],
+      1: ["C",                           "  "],
+      2: ["Abkhazian",                   "ab"],
+      3: ["Oromo",                       "om"], # macrolanguage
+      4: ["Afar",                        "aa"],
+      5: ["Afrikaans",                   "af"],
+      6: ["Albanian",                    "sq"], # macrolanguage
+      7: ["Amharic",                     "am"],
+      8: ["Arabic",                      "ar"], # macrolanguage
+      9: ["Armenian",                    "hy"],
+     10: ["Assamese",                    "as"],
+     11: ["Aymara",                      "ay"], # macrolanguage
+     12: ["Azerbaijani",                 "az"], # macrolanguage
+     13: ["Bashkir",                     "ba"],
+     14: ["Basque",                      "eu"],
+     15: ["Bengali",                     "bn"],
+     16: ["Dzongkha",                    "dz"],
+     17: ["Bihari",                      "bh"],
+     18: ["Bislama",                     "bi"],
+     19: ["Breton",                      "br"],
+     20: ["Bulgarian",                   "bg"],
+     21: ["Burmese",                     "my"],
+     22: ["Belarusian",                  "be"],
+     23: ["Khmer",                       "km"],
+     24: ["Catalan",                     "ca"],
+     25: ["Chinese",                     "zh"], # macrolanguage
+     26: ["Corsican",                    "co"],
+     27: ["Croatian",                    "hr"],
+     28: ["Czech",                       "cs"],
+     29: ["Danish",                      "da"],
+     30: ["Dutch",                       "nl"],
+     31: ["English",                     "en"],
+     32: ["Esperanto",                   "eo"],
+     33: ["Estonian",                    "et"], # macrolanguage
+     34: ["Faroese",                     "fo"],
+     35: ["Fijian",                      "fj"],
+     36: ["Finnish",                     "fi"],
+     37: ["French",                      "fr"],
+     38: ["Western Frisian",             "fy"],
+     39: ["Gaelic",                      "gd"],
+     40: ["Galician",                    "gl"],
+     41: ["Georgian",                    "ka"],
+     42: ["German",                      "de"],
+     43: ["Greek",                       "el"],
+     44: ["Greenlandic",                 "kl"],
+     45: ["Guarani",                     "gn"], # macrolanguage
+     46: ["Gujarati",                    "gu"],
+     47: ["Hausa",                       "ha"],
+     48: ["Hebrew",                      "he"],
+     49: ["Hindi",                       "hi"],
+     50: ["Hungarian",                   "hu"],
+     51: ["Icelandic",                   "is"],
+     52: ["Indonesian",                  "id"],
+     53: ["Interlingua",                 "ia"],
+     54: ["Interlingue",                 "ie"],
+     55: ["Inuktitut",                   "iu"], # macrolanguage
+     56: ["Inupiak",                     "ik"], # macrolanguage
+     57: ["Irish",                       "ga"],
+     58: ["Italian",                     "it"],
+     59: ["Japanese",                    "ja"],
+     60: ["Javanese",                    "jv"],
+     61: ["Kannada",                     "kn"],
+     62: ["Kashmiri",                    "ks"],
+     63: ["Kazakh",                      "kk"],
+     64: ["Kinyarwanda",                 "rw"],
+     65: ["Kirghiz",                     "ky"],
+     66: ["Korean",                      "ko"],
+     67: ["Kurdish",                     "ku"], # macrolanguage
+     68: ["Rundi",                       "rn"],
+     69: ["Lao",                         "lo"],
+     70: ["Latin",                       "la"],
+     71: ["Latvian",                     "lv"], # macrolanguage
+     72: ["Lingala",                     "ln"],
+     73: ["Lithuanian",                  "lt"],
+     74: ["Macedonian",                  "mk"],
+     75: ["Malagasy",                    "mg"], # macrolanguage
+     76: ["Malay",                       "ms"], # macrolanguage
+     77: ["Malayalam",                   "ml"],
+     78: ["Maltese",                     "mt"],
+     79: ["Maori",                       "mi"],
+     80: ["Marathi",                     "mr"],
+     81: ["Marshallese",                 "mh"],
+     82: ["Mongolian",                   "mn"], # macrolanguage
+     83: ["Nauru",                       "na"],
+     84: ["Nepali",                      "ne"], # macrolanguage
+     85: ["Norwegian Bokmal",            "nb"],
+     86: ["Occitan",                     "oc"],
+     87: ["Oriya",                       "or"], # macrolanguage
+     88: ["Pashto",                      "ps"], # macrolanguage
+     89: ["Persian",                     "fa"], # macrolanguage
+     90: ["Polish",                      "pl"],
+     91: ["Portuguese",                  "pt"],
+     92: ["Punjabi",                     "pa"],
+     93: ["Quechua",                     "qu"], # macrolanguage
+     94: ["Romansh",                     "rm"],
+     95: ["Romanian",                    "ro"],
+     96: ["Russian",                     "ru"],
+     97: ["Samoan",                      "sm"],
+     98: ["Sango",                       "sg"],
+     99: ["Sanskrit",                    "sa"],
+    100: ["Serbian",                     "sr"],
+    101: ["Ossetic",                     "os"],
+    102: ["Southern Sotho",              "st"],
+    103: ["Tswana",                      "tn"],
+    104: ["Shona",                       "sn"],
+    105: ["Sindhi",                      "sd"],
+    106: ["Sinhala",                     "si"],
+    107: ["Swati",                       "ss"],
+    108: ["Slovak",                      "sk"],
+    109: ["Slovenian",                   "sl"],
+    110: ["Somali",                      "so"],
+    111: ["Spanish",                     "es"],
+    112: ["Sundanese",                   "su"],
+    113: ["Swahili",                     "sw"], # macrolanguage
+    114: ["Swedish",                     "sv"],
+    115: ["Sardinian",                   "sc"], # macrolanguage
+    116: ["Tajik",                       "tg"],
+    117: ["Tamil",                       "ta"],
+    118: ["Tatar",                       "tt"],
+    119: ["Telugu",                      "te"],
+    120: ["Thai",                        "th"],
+    121: ["Tibetan",                     "bo"],
+    122: ["Tigrinya",                    "ti"],
+    123: ["Tongan",                      "to"],
+    124: ["Tsonga",                      "ts"],
+    125: ["Turkish",                     "tr"],
+    126: ["Turkmen",                     "tk"],
+    127: ["Tahitian",                    "ty"],
+    128: ["Uighur",                      "ug"],
+    129: ["Ukrainian",                   "uk"],
+    130: ["Urdu",                        "ur"],
+    131: ["Uzbek",                       "uz"], # macrolanguage
+    132: ["Vietnamese",                  "vi"],
+    133: ["Volapuk",                     "vo"],
+    134: ["Welsh",                       "cy"],
+    135: ["Wolof",                       "wo"],
+    136: ["Xhosa",                       "xh"],
+    137: ["Yiddish",                     "yi"], # macrolanguage
+    138: ["Yoruba",                      "yo"],
+    139: ["Zhuang",                      "za"], # macrolanguage
+    140: ["Zulu",                        "zu"],
+    141: ["Norwegian Nynorsk",           "nn"],
+    142: ["Bosnian",                     "bs"],
+    143: ["Divehi",                      "dv"],
+    144: ["Manx",                        "gv"],
+    145: ["Cornish",                     "kw"],
+    146: ["Akan",                        "ak"], # macrolanguage
+    147: ["Konkani",                     "kok"],
+    148: ["Ga",                          "gaa"],
+    149: ["Igbo",                        "ig" ],
+    150: ["Kamba",                       "kam"],
+    151: ["Syriac",                      "syr"],
+    152: ["Blin",                        "byn"],
+    153: ["Geez",                        "gez"],
+    154: ["Koro",                        "kfo"],
+    155: ["Sidamo",                      "sid"],
+    156: ["Atsam",                       "cch"],
+    157: ["Tigre",                       "tig"],
+    158: ["Jju",                         "kaj"],
+    159: ["Friulian",                    "fur"],
+    160: ["Venda",                       "ve" ],
+    161: ["Ewe",                         "ee" ],
+    162: ["Walamo",                      "wal"],
+    163: ["Hawaiian",                    "haw"],
+    164: ["Tyap",                        "kcg"],
+    165: ["Nyanja",                      "ny" ],
+    166: ["Filipino",                    "fil"],
+    167: ["Swiss German",                "gsw"],
+    168: ["Sichuan Yi",                  "ii" ],
+    169: ["Kpelle",                      "kpe"],
+    170: ["Low German",                  "nds"],
+    171: ["South Ndebele",               "nr" ],
+    172: ["Northern Sotho",              "nso"],
+    173: ["Northern Sami",               "se" ],
+    174: ["Taroko",                      "trv"],
+    175: ["Gusii",                       "guz"],
+    176: ["Taita",                       "dav"],
+    177: ["Fulah",                       "ff"], # macrolanguage
+    178: ["Kikuyu",                      "ki"],
+    179: ["Samburu",                     "saq"],
+    180: ["Sena",                        "seh"],
+    181: ["North Ndebele",               "nd"],
+    182: ["Rombo",                       "rof"],
+    183: ["Tachelhit",                   "shi"],
+    184: ["Kabyle",                      "kab"],
+    185: ["Nyankole",                    "nyn"],
+    186: ["Bena",                        "bez"],
+    187: ["Vunjo",                       "vun"],
+    188: ["Bambara",                     "bm"],
+    189: ["Embu",                        "ebu"],
+    190: ["Cherokee",                    "chr"],
+    191: ["Morisyen",                    "mfe"],
+    192: ["Makonde",                     "kde"],
+    193: ["Langi",                       "lag"],
+    194: ["Ganda",                       "lg"],
+    195: ["Bemba",                       "bem"],
+    196: ["Kabuverdianu",                "kea"],
+    197: ["Meru",                        "mer"],
+    198: ["Kalenjin",                    "kln"],
+    199: ["Nama",                        "naq"],
+    200: ["Machame",                     "jmc"],
+    201: ["Colognian",                   "ksh"],
+    202: ["Masai",                       "mas"],
+    203: ["Soga",                        "xog"],
+    204: ["Luyia",                       "luy"],
+    205: ["Asu",                         "asa"],
+    206: ["Teso",                        "teo"],
+    207: ["Saho",                        "ssy"],
+    208: ["Koyra Chiini",                "khq"],
+    209: ["Rwa",                         "rwk"],
+    210: ["Luo",                         "luo"],
+    211: ["Chiga",                       "cgg"],
+    212: ["Central Morocco Tamazight",   "tzm"],
+    213: ["Koyraboro Senni",             "ses"],
+    214: ["Shambala",                    "ksb"],
+    215: ["Bodo",                        "brx"],
+    216: ["Avaric",                      "av"],
+    217: ["Chamorro",                    "ch"],
+    218: ["Chechen",                     "ce"],
+    219: ["Church",                      "cu"], # macrolanguage
+    220: ["Chuvash",                     "cv"],
+    221: ["Cree",                        "cr"], # macrolanguage
+    222: ["Haitian",                     "ht"],
+    223: ["Herero",                      "hz"],
+    224: ["Hiri Motu",                   "ho"],
+    225: ["Kanuri",                      "kr"], # macrolanguage
+    226: ["Komi",                        "kv"], # macrolanguage
+    227: ["Kongo",                       "kg"], # macrolanguage
+    228: ["Kwanyama",                    "kj"],
+    229: ["Limburgish",                  "li"],
+    230: ["Luba Katanga",                "lu"],
+    231: ["Luxembourgish",               "lb"],
+    232: ["Navaho",                      "nv"],
+    233: ["Ndonga",                      "ng"],
+    234: ["Ojibwa",                      "oj"], # macrolanguage
+    235: ["Pali",                        "pi"], # macrolanguage
+    236: ["Walloon",                     "wa"],
+    237: ["Aghem",                       "agq"],
+    238: ["Basaa",                       "bas"],
+    239: ["Zarma",                       "dje"],
+    240: ["Duala",                       "dua"],
+    241: ["Jola Fonyi",                  "dyo"],
+    242: ["Ewondo",                      "ewo"],
+    243: ["Bafia",                       "ksf"],
+    244: ["Makhuwa Meetto",              "mgh"],
+    245: ["Mundang",                     "mua"],
+    246: ["Kwasio",                      "nmg"],
+    247: ["Nuer",                        "nus"],
+    248: ["Sakha",                       "sah"],
+    249: ["Sangu",                       "sbp"],
+    250: ["Congo Swahili",               "swc"],
+    251: ["Tasawaq",                     "twq"],
+    252: ["Vai",                         "vai"],
+    253: ["Walser",                      "wae"],
+    254: ["Yangben",                     "yav"],
+    255: ["Avestan",                     "ae"],
+    256: ["Asturian",                    "ast"],
+    257: ["Ngomba",                      "jgo"],
+    258: ["Kako",                        "kkj"],
+    259: ["Meta",                        "mgo"],
+    260: ["Ngiemboon",                   "nnh"],
+    261: ["Aragonese",                   "an"],
+    262: ["Akkadian",                    "akk"],
+    263: ["Ancient Egyptian",            "egy"],
+    264: ["Ancient Greek",               "grc"],
+    265: ["Aramaic",                     "arc"],
+    266: ["Balinese",                    "ban"],
+    267: ["Bamun",                       "bax"],
+    268: ["Batak Toba",                  "bbc"],
+    269: ["Buginese",                    "bug"],
+    270: ["Buhid",                       "bku"],
+    271: ["Carian",                      "xcr"],
+    272: ["Chakma",                      "ccp"],
+    273: ["Classical Mandaic",           "myz"],
+    274: ["Coptic",                      "cop"],
+    275: ["Dogri",                       "doi"], # macrolanguage
+    276: ["Eastern Cham",                "cjm"],
+    277: ["Eastern Kayah",               "eky"],
+    278: ["Etruscan",                    "ett"],
+    279: ["Gothic",                      "got"],
+    280: ["Hanunoo",                     "hnn"],
+    281: ["Ingush",                      "inh"],
+    282: ["Large Flowery Miao",          "hmd"],
+    283: ["Lepcha",                      "lep"],
+    284: ["Limbu",                       "lif"],
+    285: ["Lisu",                        "lis"],
+    286: ["Lu",                          "khb"],
+    287: ["Lycian",                      "xlc"],
+    288: ["Lydian",                      "xld"],
+    289: ["Mandingo",                    "man"], # macrolanguage
+    290: ["Manipuri",                    "mni"],
+    291: ["Meroitic",                    "xmr"],
+    292: ["Northern Thai",               "nod"],
+    293: ["Old Irish",                   "sga"],
+    294: ["Old Norse",                   "non"],
+    295: ["Old Persian",                 "peo"],
+    296: ["Old Turkish",                 "otk"],
+    297: ["Pahlavi",                     "pal"],
+    298: ["Parthian",                    "xpr"],
+    299: ["Phoenician",                  "phn"],
+    300: ["Prakrit Language",            "pra"],
+    301: ["Rejang",                      "rej"],
+    302: ["Sabaean",                     "xsa"],
+    303: ["Samaritan",                   "smp"],
+    304: ["Santali",                     "sat"],
+    305: ["Saurashtra",                  "saz"],
+    306: ["Sora",                        "srb"],
+    307: ["Sylheti",                     "syl"],
+    308: ["Tagbanwa",                    "tbw"],
+    309: ["Tai Dam",                     "blt"],
+    310: ["Tai Nua",                     "tdd"],
+    311: ["Ugaritic",                    "uga"],
+    312: ["Akoose",                      "bss"],
+    313: ["Lakota",                      "lkt"],
+    314: ["Standard Moroccan Tamazight", "zgh"],
+    315: ["Mapuche",                     "arn"],
+    316: ["Central Kurdish",             "ckb"],
+    317: ["Lower Sorbian",               "dsb"],
+    318: ["Upper Sorbian",               "hsb"],
+    319: ["Kenyang",                     "ken"],
+    320: ["Mohawk",                      "moh"],
+    321: ["Nko",                         "nqo"],
+    322: ["Prussian",                    "prg"],
+    323: ["Kiche",                       "quc"],
+    324: ["Southern Sami",               "sma"],
+    325: ["Lule Sami",                   "smj"],
+    326: ["Inari Sami",                  "smn"],
+    327: ["Skolt Sami",                  "sms"],
+    328: ["Warlpiri",                    "wbp"],
+    329: ["Manichaean Middle Persian",   "xmn"],
+    330: ["Mende",                       "men"],
+    331: ["Ancient North Arabian",       "xna"],
+    332: ["Linear A",                    "lab"],
+    333: ["Hmong Njua",                  "hnj"],
+    334: ["Ho",                          "hoc"],
+    335: ["Lezghian",                    "lez"],
+    336: ["Bassa",                       "bsq"],
+    337: ["Mono",                        "mru"],
+    338: ["Tedim Chin",                  "ctd"],
+    339: ["Maithili",                    "mai"],
+    340: ["Ahom",                        "aho"],
+    341: ["American Sign Language",      "ase"],
+    342: ["Ardhamagadhi Prakrit",        "pka"],
+    343: ["Bhojpuri",                    "bho"],
+    344: ["Hieroglyphic Luwian",         "hlu"],
+    345: ["Literary Chinese",            "lzh"],
+    346: ["Mazanderani",                 "mzn"],
+    347: ["Mru",                         "mro"],
+    348: ["Newari",                      "new"],
+    349: ["Northern Luri",               "lrc"],
+    350: ["Palauan",                     "pau"],
+    351: ["Papiamento",                  "pap"],
+    352: ["Saraiki",                     "skr"],
+    353: ["Tokelau",                     "tkl"],
+    354: ["Tok Pisin",                   "tpi"],
+    355: ["Tuvalu",                      "tvl"],
+    356: ["Uncoded Languages",           "mis"],
+    357: ["Cantonese",                   "yue"],
+    358: ["Osage",                       "osa"],
+    359: ["Tangut",                      "txg"],
+    360: ["Ido",                         "io"],
+    361: ["Lojban",                      "jbo"],
+    362: ["Sicilian",                    "scn"],
+    363: ["Southern Kurdish",            "sdh"],
+    364: ["Western Balochi",             "bgn"],
+}
+
+language_aliases = {
+    # Legacy - should disappear at some point:
+    'Norwegian': 'NorwegianBokmal',
+    'Moldavian': 'Romanian',
+    'SerboCroatian': 'Serbian',
+    'Tagalog': 'Filipino',
+    'Twi': 'Akan',
+    # Renamings:
+    'Afan': 'Oromo',
+    'Byelorussian': 'Belarusian',
+    'Bhutani': 'Dzongkha',
+    'Cambodian': 'Khmer',
+    'Kurundi': 'Rundi',
+    'RhaetoRomance': 'Romansh',
+    'Chewa': 'Nyanja',
+    'Frisian': 'WesternFrisian',
+    'Uigur': 'Uighur',
+}
+
+country_list = {
+      0: ["AnyCountry",                                   "ZZ"],
+      1: ["Afghanistan",                                  "AF"],
+      2: ["Albania",                                      "AL"],
+      3: ["Algeria",                                      "DZ"],
+      4: ["American Samoa",                               "AS"],
+      5: ["Andorra",                                      "AD"],
+      6: ["Angola",                                       "AO"],
+      7: ["Anguilla",                                     "AI"],
+      8: ["Antarctica",                                   "AQ"],
+      9: ["Antigua And Barbuda",                          "AG"],
+     10: ["Argentina",                                    "AR"],
+     11: ["Armenia",                                      "AM"],
+     12: ["Aruba",                                        "AW"],
+     13: ["Australia",                                    "AU"],
+     14: ["Austria",                                      "AT"],
+     15: ["Azerbaijan",                                   "AZ"],
+     16: ["Bahamas",                                      "BS"],
+     17: ["Bahrain",                                      "BH"],
+     18: ["Bangladesh",                                   "BD"],
+     19: ["Barbados",                                     "BB"],
+     20: ["Belarus",                                      "BY"],
+     21: ["Belgium",                                      "BE"],
+     22: ["Belize",                                       "BZ"],
+     23: ["Benin",                                        "BJ"],
+     24: ["Bermuda",                                      "BM"],
+     25: ["Bhutan",                                       "BT"],
+     26: ["Bolivia",                                      "BO"],
+     27: ["Bosnia And Herzegowina",                       "BA"],
+     28: ["Botswana",                                     "BW"],
+     29: ["Bouvet Island",                                "BV"],
+     30: ["Brazil",                                       "BR"],
+     31: ["British Indian Ocean Territory",               "IO"],
+     32: ["Brunei",                                       "BN"],
+     33: ["Bulgaria",                                     "BG"],
+     34: ["Burkina Faso",                                 "BF"],
+     35: ["Burundi",                                      "BI"],
+     36: ["Cambodia",                                     "KH"],
+     37: ["Cameroon",                                     "CM"],
+     38: ["Canada",                                       "CA"],
+     39: ["Cape Verde",                                   "CV"],
+     40: ["Cayman Islands",                               "KY"],
+     41: ["Central African Republic",                     "CF"],
+     42: ["Chad",                                         "TD"],
+     43: ["Chile",                                        "CL"],
+     44: ["China",                                        "CN"],
+     45: ["Christmas Island",                             "CX"],
+     46: ["Cocos Islands",                                "CC"],
+     47: ["Colombia",                                     "CO"],
+     48: ["Comoros",                                      "KM"],
+     49: ["Congo Kinshasa",                               "CD"],
+     50: ["Congo Brazzaville",                            "CG"],
+     51: ["Cook Islands",                                 "CK"],
+     52: ["Costa Rica",                                   "CR"],
+     53: ["Ivory Coast",                                  "CI"],
+     54: ["Croatia",                                      "HR"],
+     55: ["Cuba",                                         "CU"],
+     56: ["Cyprus",                                       "CY"],
+     57: ["Czech Republic",                               "CZ"],
+     58: ["Denmark",                                      "DK"],
+     59: ["Djibouti",                                     "DJ"],
+     60: ["Dominica",                                     "DM"],
+     61: ["Dominican Republic",                           "DO"],
+     62: ["East Timor",                                   "TL"],
+     63: ["Ecuador",                                      "EC"],
+     64: ["Egypt",                                        "EG"],
+     65: ["El Salvador",                                  "SV"],
+     66: ["Equatorial Guinea",                            "GQ"],
+     67: ["Eritrea",                                      "ER"],
+     68: ["Estonia",                                      "EE"],
+     69: ["Ethiopia",                                     "ET"],
+     70: ["Falkland Islands",                             "FK"],
+     71: ["Faroe Islands",                                "FO"],
+     72: ["Fiji",                                         "FJ"],
+     73: ["Finland",                                      "FI"],
+     74: ["France",                                       "FR"],
+     75: ["Guernsey",                                     "GG"],
+     76: ["French Guiana",                                "GF"],
+     77: ["French Polynesia",                             "PF"],
+     78: ["French Southern Territories",                  "TF"],
+     79: ["Gabon",                                        "GA"],
+     80: ["Gambia",                                       "GM"],
+     81: ["Georgia",                                      "GE"],
+     82: ["Germany",                                      "DE"],
+     83: ["Ghana",                                        "GH"],
+     84: ["Gibraltar",                                    "GI"],
+     85: ["Greece",                                       "GR"],
+     86: ["Greenland",                                    "GL"],
+     87: ["Grenada",                                      "GD"],
+     88: ["Guadeloupe",                                   "GP"],
+     89: ["Guam",                                         "GU"],
+     90: ["Guatemala",                                    "GT"],
+     91: ["Guinea",                                       "GN"],
+     92: ["Guinea Bissau",                                "GW"],
+     93: ["Guyana",                                       "GY"],
+     94: ["Haiti",                                        "HT"],
+     95: ["Heard And McDonald Islands",                   "HM"],
+     96: ["Honduras",                                     "HN"],
+     97: ["Hong Kong",                                    "HK"],
+     98: ["Hungary",                                      "HU"],
+     99: ["Iceland",                                      "IS"],
+    100: ["India",                                        "IN"],
+    101: ["Indonesia",                                    "ID"],
+    102: ["Iran",                                         "IR"],
+    103: ["Iraq",                                         "IQ"],
+    104: ["Ireland",                                      "IE"],
+    105: ["Israel",                                       "IL"],
+    106: ["Italy",                                        "IT"],
+    107: ["Jamaica",                                      "JM"],
+    108: ["Japan",                                        "JP"],
+    109: ["Jordan",                                       "JO"],
+    110: ["Kazakhstan",                                   "KZ"],
+    111: ["Kenya",                                        "KE"],
+    112: ["Kiribati",                                     "KI"],
+    113: ["North Korea",                                  "KP"],
+    114: ["South Korea",                                  "KR"],
+    115: ["Kuwait",                                       "KW"],
+    116: ["Kyrgyzstan",                                   "KG"],
+    117: ["Laos",                                         "LA"],
+    118: ["Latvia",                                       "LV"],
+    119: ["Lebanon",                                      "LB"],
+    120: ["Lesotho",                                      "LS"],
+    121: ["Liberia",                                      "LR"],
+    122: ["Libya",                                        "LY"],
+    123: ["Liechtenstein",                                "LI"],
+    124: ["Lithuania",                                    "LT"],
+    125: ["Luxembourg",                                   "LU"],
+    126: ["Macau",                                        "MO"],
+    127: ["Macedonia",                                    "MK"],
+    128: ["Madagascar",                                   "MG"],
+    129: ["Malawi",                                       "MW"],
+    130: ["Malaysia",                                     "MY"],
+    131: ["Maldives",                                     "MV"],
+    132: ["Mali",                                         "ML"],
+    133: ["Malta",                                        "MT"],
+    134: ["Marshall Islands",                             "MH"],
+    135: ["Martinique",                                   "MQ"],
+    136: ["Mauritania",                                   "MR"],
+    137: ["Mauritius",                                    "MU"],
+    138: ["Mayotte",                                      "YT"],
+    139: ["Mexico",                                       "MX"],
+    140: ["Micronesia",                                   "FM"],
+    141: ["Moldova",                                      "MD"],
+    142: ["Monaco",                                       "MC"],
+    143: ["Mongolia",                                     "MN"],
+    144: ["Montserrat",                                   "MS"],
+    145: ["Morocco",                                      "MA"],
+    146: ["Mozambique",                                   "MZ"],
+    147: ["Myanmar",                                      "MM"],
+    148: ["Namibia",                                      "NA"],
+    149: ["Nauru",                                        "NR"],
+    150: ["Nepal",                                        "NP"],
+    151: ["Netherlands",                                  "NL"],
+    152: ["Cura Sao",                                     "CW"],
+    153: ["New Caledonia",                                "NC"],
+    154: ["New Zealand",                                  "NZ"],
+    155: ["Nicaragua",                                    "NI"],
+    156: ["Niger",                                        "NE"],
+    157: ["Nigeria",                                      "NG"],
+    158: ["Niue",                                         "NU"],
+    159: ["Norfolk Island",                               "NF"],
+    160: ["Northern Mariana Islands",                     "MP"],
+    161: ["Norway",                                       "NO"],
+    162: ["Oman",                                         "OM"],
+    163: ["Pakistan",                                     "PK"],
+    164: ["Palau",                                        "PW"],
+    165: ["Palestinian Territories",                      "PS"],
+    166: ["Panama",                                       "PA"],
+    167: ["Papua New Guinea",                             "PG"],
+    168: ["Paraguay",                                     "PY"],
+    169: ["Peru",                                         "PE"],
+    170: ["Philippines",                                  "PH"],
+    171: ["Pitcairn",                                     "PN"],
+    172: ["Poland",                                       "PL"],
+    173: ["Portugal",                                     "PT"],
+    174: ["Puerto Rico",                                  "PR"],
+    175: ["Qatar",                                        "QA"],
+    176: ["Reunion",                                      "RE"],
+    177: ["Romania",                                      "RO"],
+    178: ["Russia",                                       "RU"],
+    179: ["Rwanda",                                       "RW"],
+    180: ["Saint Kitts And Nevis",                        "KN"],
+    181: ["Saint Lucia",                                  "LC"],
+    182: ["Saint Vincent And The Grenadines",             "VC"],
+    183: ["Samoa",                                        "WS"],
+    184: ["San Marino",                                   "SM"],
+    185: ["Sao Tome And Principe",                        "ST"],
+    186: ["Saudi Arabia",                                 "SA"],
+    187: ["Senegal",                                      "SN"],
+    188: ["Seychelles",                                   "SC"],
+    189: ["Sierra Leone",                                 "SL"],
+    190: ["Singapore",                                    "SG"],
+    191: ["Slovakia",                                     "SK"],
+    192: ["Slovenia",                                     "SI"],
+    193: ["Solomon Islands",                              "SB"],
+    194: ["Somalia",                                      "SO"],
+    195: ["South Africa",                                 "ZA"],
+    196: ["South Georgia And The South Sandwich Islands", "GS"],
+    197: ["Spain",                                        "ES"],
+    198: ["Sri Lanka",                                    "LK"],
+    199: ["Saint Helena",                                 "SH"],
+    200: ["Saint Pierre And Miquelon",                    "PM"],
+    201: ["Sudan",                                        "SD"],
+    202: ["Suriname",                                     "SR"],
+    203: ["Svalbard And Jan Mayen Islands",               "SJ"],
+    204: ["Swaziland",                                    "SZ"],
+    205: ["Sweden",                                       "SE"],
+    206: ["Switzerland",                                  "CH"],
+    207: ["Syria",                                        "SY"],
+    208: ["Taiwan",                                       "TW"],
+    209: ["Tajikistan",                                   "TJ"],
+    210: ["Tanzania",                                     "TZ"],
+    211: ["Thailand",                                     "TH"],
+    212: ["Togo",                                         "TG"],
+    213: ["Tokelau",                                      "TK"],
+    214: ["Tonga",                                        "TO"],
+    215: ["Trinidad And Tobago",                          "TT"],
+    216: ["Tunisia",                                      "TN"],
+    217: ["Turkey",                                       "TR"],
+    218: ["Turkmenistan",                                 "TM"],
+    219: ["Turks And Caicos Islands",                     "TC"],
+    220: ["Tuvalu",                                       "TV"],
+    221: ["Uganda",                                       "UG"],
+    222: ["Ukraine",                                      "UA"],
+    223: ["United Arab Emirates",                         "AE"],
+    224: ["United Kingdom",                               "GB"],
+    225: ["United States",                                "US"],
+    226: ["United States Minor Outlying Islands",         "UM"],
+    227: ["Uruguay",                                      "UY"],
+    228: ["Uzbekistan",                                   "UZ"],
+    229: ["Vanuatu",                                      "VU"],
+    230: ["Vatican City State",                           "VA"],
+    231: ["Venezuela",                                    "VE"],
+    232: ["Vietnam",                                      "VN"],
+    233: ["British Virgin Islands",                       "VG"],
+    234: ["United States Virgin Islands",                 "VI"],
+    235: ["Wallis And Futuna Islands",                    "WF"],
+    236: ["Western Sahara",                               "EH"],
+    237: ["Yemen",                                        "YE"],
+    238: ["Canary Islands",                               "IC"],
+    239: ["Zambia",                                       "ZM"],
+    240: ["Zimbabwe",                                     "ZW"],
+    241: ["Clipperton Island",                            "CP"],
+    242: ["Montenegro",                                   "ME"],
+    243: ["Serbia",                                       "RS"],
+    244: ["Saint Barthelemy",                             "BL"],
+    245: ["Saint Martin",                                 "MF"],
+    246: ["Latin America",                                "419"],
+    247: ["Ascension Island",                             "AC"],
+    248: ["Aland Islands",                                "AX"],
+    249: ["Diego Garcia",                                 "DG"],
+    250: ["Ceuta And Melilla",                            "EA"],
+    251: ["Isle Of Man",                                  "IM"],
+    252: ["Jersey",                                       "JE"],
+    253: ["Tristan Da Cunha",                             "TA"],
+    254: ["South Sudan",                                  "SS"],
+    255: ["Bonaire",                                      "BQ"],
+    256: ["Sint Maarten",                                 "SX"],
+    257: ["Kosovo",                                       "XK"],
+    258: ["European Union",                               "EU"],
+    259: ["Outlying Oceania",                             "QO"],
+    260: ["World",                                        "001"],
+    261: ["Europe",                                       "150"],
+}
+
+country_aliases = {
+    # Deprecated:
+    'Tokelau': 'TokelauCountry',
+    'Tuvalu': 'TuvaluCountry',
+    # Renamings:
+    'DemocraticRepublicOfCongo': 'CongoKinshasa',
+    'PeoplesRepublicOfCongo': 'CongoBrazzaville',
+    'DemocraticRepublicOfKorea': 'NorthKorea',
+    'RepublicOfKorea': 'SouthKorea',
+    'RussianFederation': 'Russia',
+    'SyrianArabRepublic': 'Syria',
+    'LatinAmericaAndTheCaribbean': 'LatinAmerica',
+}
+
+script_list = {
+      0: ["AnyScript",              "Zzzz"],
+      1: ["Arabic",                 "Arab"],
+      2: ["Cyrillic",               "Cyrl"],
+      3: ["Deseret",                "Dsrt"],
+      4: ["Gurmukhi",               "Guru"],
+      5: ["Simplified Han",         "Hans"],
+      6: ["Traditional Han",        "Hant"],
+      7: ["Latin",                  "Latn"],
+      8: ["Mongolian",              "Mong"],
+      9: ["Tifinagh",               "Tfng"],
+     10: ["Armenian",               "Armn"],
+     11: ["Bengali",                "Beng"],
+     12: ["Cherokee",               "Cher"],
+     13: ["Devanagari",             "Deva"],
+     14: ["Ethiopic",               "Ethi"],
+     15: ["Georgian",               "Geor"],
+     16: ["Greek",                  "Grek"],
+     17: ["Gujarati",               "Gujr"],
+     18: ["Hebrew",                 "Hebr"],
+     19: ["Japanese",               "Jpan"],
+     20: ["Khmer",                  "Khmr"],
+     21: ["Kannada",                "Knda"],
+     22: ["Korean",                 "Kore"],
+     23: ["Lao",                    "Laoo"],
+     24: ["Malayalam",              "Mlym"],
+     25: ["Myanmar",                "Mymr"],
+     26: ["Oriya",                  "Orya"],
+     27: ["Tamil",                  "Taml"],
+     28: ["Telugu",                 "Telu"],
+     29: ["Thaana",                 "Thaa"],
+     30: ["Thai",                   "Thai"],
+     31: ["Tibetan",                "Tibt"],
+     32: ["Sinhala",                "Sinh"],
+     33: ["Syriac",                 "Syrc"],
+     34: ["Yi",                     "Yiii"],
+     35: ["Vai",                    "Vaii"],
+     36: ["Avestan",                "Avst"],
+     37: ["Balinese",               "Bali"],
+     38: ["Bamum",                  "Bamu"],
+     39: ["Batak",                  "Batk"],
+     40: ["Bopomofo",               "Bopo"],
+     41: ["Brahmi",                 "Brah"],
+     42: ["Buginese",               "Bugi"],
+     43: ["Buhid",                  "Buhd"],
+     44: ["Canadian Aboriginal",    "Cans"],
+     45: ["Carian",                 "Cari"],
+     46: ["Chakma",                 "Cakm"],
+     47: ["Cham",                   "Cham"],
+     48: ["Coptic",                 "Copt"],
+     49: ["Cypriot",                "Cprt"],
+     50: ["Egyptian Hieroglyphs",   "Egyp"],
+     51: ["Fraser",                 "Lisu"],
+     52: ["Glagolitic",             "Glag"],
+     53: ["Gothic",                 "Goth"],
+     54: ["Han",                    "Hani"],
+     55: ["Hangul",                 "Hang"],
+     56: ["Hanunoo",                "Hano"],
+     57: ["Imperial Aramaic",       "Armi"],
+     58: ["Inscriptional Pahlavi",  "Phli"],
+     59: ["Inscriptional Parthian", "Prti"],
+     60: ["Javanese",               "Java"],
+     61: ["Kaithi",                 "Kthi"],
+     62: ["Katakana",               "Kana"],
+     63: ["Kayah Li",               "Kali"],
+     64: ["Kharoshthi",             "Khar"],
+     65: ["Lanna",                  "Lana"],
+     66: ["Lepcha",                 "Lepc"],
+     67: ["Limbu",                  "Limb"],
+     68: ["Linear B",               "Linb"],
+     69: ["Lycian",                 "Lyci"],
+     70: ["Lydian",                 "Lydi"],
+     71: ["Mandaean",               "Mand"],
+     72: ["Meitei Mayek",           "Mtei"],
+     73: ["Meroitic",               "Mero"],
+     74: ["Meroitic Cursive",       "Merc"],
+     75: ["Nko",                    "Nkoo"],
+     76: ["New Tai Lue",            "Talu"],
+     77: ["Ogham",                  "Ogam"],
+     78: ["Ol Chiki",               "Olck"],
+     79: ["Old Italic",             "Ital"],
+     80: ["Old Persian",            "Xpeo"],
+     81: ["Old South Arabian",      "Sarb"],
+     82: ["Orkhon",                 "Orkh"],
+     83: ["Osmanya",                "Osma"],
+     84: ["Phags Pa",               "Phag"],
+     85: ["Phoenician",             "Phnx"],
+     86: ["Pollard Phonetic",       "Plrd"],
+     87: ["Rejang",                 "Rjng"],
+     88: ["Runic",                  "Runr"],
+     89: ["Samaritan",              "Samr"],
+     90: ["Saurashtra",             "Saur"],
+     91: ["Sharada",                "Shrd"],
+     92: ["Shavian",                "Shaw"],
+     93: ["Sora Sompeng",           "Sora"],
+     94: ["Cuneiform",              "Xsux"],
+     95: ["Sundanese",              "Sund"],
+     96: ["Syloti Nagri",           "Sylo"],
+     97: ["Tagalog",                "Tglg"],
+     98: ["Tagbanwa",               "Tagb"],
+     99: ["Tai Le",                 "Tale"],
+    100: ["Tai Viet",               "Tavt"],
+    101: ["Takri",                  "Takr"],
+    102: ["Ugaritic",               "Ugar"],
+    103: ["Braille",                "Brai"],
+    104: ["Hiragana",               "Hira"],
+    105: ["Caucasian Albanian",     "Aghb"],
+    106: ["Bassa Vah",              "Bass"],
+    107: ["Duployan",               "Dupl"],
+    108: ["Elbasan",                "Elba"],
+    109: ["Grantha",                "Gran"],
+    110: ["Pahawh Hmong",           "Hmng"],
+    111: ["Khojki",                 "Khoj"],
+    112: ["Linear A",               "Lina"],
+    113: ["Mahajani",               "Mahj"],
+    114: ["Manichaean",             "Mani"],
+    115: ["Mende Kikakui",          "Mend"],
+    116: ["Modi",                   "Modi"],
+    117: ["Mro",                    "Mroo"],
+    118: ["Old North Arabian",      "Narb"],
+    119: ["Nabataean",              "Nbat"],
+    120: ["Palmyrene",              "Palm"],
+    121: ["Pau Cin Hau",            "Pauc"],
+    122: ["Old Permic",             "Perm"],
+    123: ["Psalter Pahlavi",        "Phlp"],
+    124: ["Siddham",                "Sidd"],
+    125: ["Khudawadi",              "Sind"],
+    126: ["Tirhuta",                "Tirh"],
+    127: ["Varang Kshiti",          "Wara"],
+    128: ["Ahom",                   "Ahom"],
+    129: ["Anatolian Hieroglyphs",  "Hluw"],
+    130: ["Hatran",                 "Hatr"],
+    131: ["Multani",                "Mult"],
+    132: ["Old Hungarian",          "Hung"],
+    133: ["Sign Writing",           "Sgnw"],
+    134: ["Adlam",                  "Adlm"],
+    135: ["Bhaiksuki",              "Bhks"],
+    136: ["Marchen",                "Marc"],
+    137: ["Newa",                   "Newa"],
+    138: ["Osage",                  "Osge"],
+    139: ["Tangut",                 "Tang"],
+    140: ["Han with Bopomofo",      "Hanb"],
+    141: ["Jamo",                   "Jamo"],
+}
+
+script_aliases = {
+    # Renamings:
+    'SimplifiedChineseScript': 'SimplifiedHanScript',
+    'TraditionalChineseScript': 'TraditionalHanScript',
+}
+
+def countryCodeToId(code):
+    if not code:
+        return 0
+    for country_id in country_list:
+        if country_list[country_id][1] == code:
+            return country_id
+    return -1
+
+def languageCodeToId(code):
+    if not code:
+        return 0
+    for language_id in language_list:
+        if language_list[language_id][1] == code:
+            return language_id
+    return -1
+
+def scriptCodeToId(code):
+    if not code:
+        return 0
+    for script_id in script_list:
+        if script_list[script_id][1] == code:
+            return script_id
+    return -1
diff --git a/util/locale_database/formattags.txt b/util/locale_database/formattags.txt
new file mode 100644
index 0000000000..5138c37a81
--- /dev/null
+++ b/util/locale_database/formattags.txt
@@ -0,0 +1,23 @@
+d
+dd
+ddd
+dddd
+M
+MM
+MMM
+MMMM
+yy
+yyyy
+h the hour without a leading zero (0 to 23 or 1 to 12 if AM/PM display)
+hh the hour with a leading zero (00 to 23 or 01 to 12 if AM/PM display)
+H the hour without a leading zero (0 to 23, even with AM/PM display)
+HH the hour with a leading zero (00 to 23, even with AM/PM display)
+m
+mm
+s
+ss
+z the milliseconds without leading zeroes (0 to 999)
+zzz the milliseconds with leading zeroes (000 to 999)
+AP or A interpret as an AM/PM time. AP must be either "AM" or "PM"
+ap or a Interpret as an AM/PM time. ap must be either "am" or "pm"
+t time zone
diff --git a/util/locale_database/localexml.py b/util/locale_database/localexml.py
new file mode 100644
index 0000000000..e95b3aebcc
--- /dev/null
+++ b/util/locale_database/localexml.py
@@ -0,0 +1,263 @@
+#############################################################################
+##
+## Copyright (C) 2017 The Qt Company Ltd.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the test suite of the Qt Toolkit.
+##
+## $QT_BEGIN_LICENSE:GPL-EXCEPT$
+## Commercial License Usage
+## Licensees holding valid commercial Qt licenses may use this file in
+## accordance with the commercial license agreement provided with the
+## Software or, alternatively, in accordance with the terms contained in
+## a written agreement between you and The Qt Company. For licensing terms
+## and conditions see https://www.qt.io/terms-conditions. For further
+## information use the contact form at https://www.qt.io/contact-us.
+##
+## GNU General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU
+## General Public License version 3 as published by the Free Software
+## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+## included in the packaging of this file. Please review the following
+## information to ensure the GNU General Public License requirements will
+## be met: https://www.gnu.org/licenses/gpl-3.0.html.
+##
+## $QT_END_LICENSE$
+##
+#############################################################################
+"""Shared serialization-scanning code for QLocaleXML format.
+
+The Locale class is written by cldr2qlocalexml.py and read by qlocalexml2cpp.py
+"""
+from xml.sax.saxutils import escape
+
+import xpathlite
+
+# Tools used by Locale:
+def camel(seq):
+    yield seq.next()
+    for word in seq:
+        yield word.capitalize()
+
+def camelCase(words):
+    return ''.join(camel(iter(words)))
+
+def ordStr(c):
+    if len(c) == 1:
+        return str(ord(c))
+    raise xpathlite.Error('Unable to handle value "%s"' % addEscapes(c))
+
+# Fix for a problem with QLocale returning a character instead of
+# strings for QLocale::exponential() and others. So we fallback to
+# default values in these cases.
+def fixOrdStr(c, d):
+    return str(ord(c if len(c) == 1 else d))
+
+def startCount(c, text): # strspn
+    """First index in text where it doesn't have a character in c"""
+    assert text and text[0] in c
+    try:
+        return (j for j, d in enumerate(text) if d not in c).next()
+    except StopIteration:
+        return len(text)
+
+def convertFormat(format):
+    """Convert date/time format-specier from CLDR to Qt
+
+    Match up (as best we can) the differences between:
+    * https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
+    * QDateTimeParser::parseFormat() and QLocalePrivate::dateTimeToString()
+    """
+    result = ""
+    i = 0
+    while i < len(format):
+        if format[i] == "'":
+            result += "'"
+            i += 1
+            while i < len(format) and format[i] != "'":
+                result += format[i]
+                i += 1
+            if i < len(format):
+                result += "'"
+                i += 1
+        else:
+            s = format[i:]
+            if s.startswith('E'): # week-day
+                n = startCount('E', s)
+                if n < 3:
+                    result += 'ddd'
+                elif n == 4:
+                    result += 'dddd'
+                else: # 5: narrow, 6 short; but should be name, not number :-(
+                    result += 'd' if n < 6 else 'dd'
+                i += n
+            elif s[0] in 'ab': # am/pm
+                # 'b' should distinguish noon/midnight, too :-(
+                result += "AP"
+                i += startCount('ab', s)
+            elif s.startswith('S'): # fractions of seconds: count('S') == number of decimals to show
+                result += 'z'
+                i += startCount('S', s)
+            elif s.startswith('V'): # long time zone specifiers (and a deprecated short ID)
+                result += 't'
+                i += startCount('V', s)
+            elif s[0] in 'zv': # zone
+                # Should use full name, e.g. "Central European Time", if 'zzzz' :-(
+                # 'v' should get generic non-location format, e.g. PT for "Pacific Time", no DST indicator
+                result += "t"
+                i += startCount('zv', s)
+            else:
+                result += format[i]
+                i += 1
+
+    return result
+
+class Locale:
+    # Tool used during class body (see del below), not method:
+    def propsMonthDay(lengths=('long', 'short', 'narrow'), scale=('months', 'days')):
+        for L in lengths:
+            for S in scale:
+                yield camelCase((L, S))
+                yield camelCase(('standalone', L, S))
+
+    # Expected to be numbers, read with int():
+    __asint = ("decimal", "group", "zero",
+               "list", "percent", "minus", "plus", "exp",
+               "currencyDigits", "currencyRounding")
+    # Single character; use the code-point number for each:
+    __asord = ("quotationStart", "quotationEnd",
+               "alternateQuotationStart", "alternateQuotationEnd")
+    # Convert day-name to Qt day-of-week number:
+    __asdow = ("firstDayOfWeek", "weekendStart", "weekendEnd")
+    # Convert from CLDR format-strings to QDateTimeParser ones:
+    __asfmt = ("longDateFormat", "shortDateFormat", "longTimeFormat", "shortTimeFormat")
+    # Just use the raw text:
+    __astxt = ("language", "languageEndonym", "script", "country", "countryEndonym",
+               "listPatternPartStart", "listPatternPartMiddle",
+               "listPatternPartEnd", "listPatternPartTwo", "am", "pm",
+               'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
+               "currencyIsoCode", "currencySymbol", "currencyDisplayName",
+               "currencyFormat", "currencyNegativeFormat"
+               ) + tuple(propsMonthDay())
+    del propsMonthDay
+
+    # Day-of-Week numbering used by Qt:
+    __qDoW = {"mon": 1, "tue": 2, "wed": 3, "thu": 4, "fri": 5, "sat": 6, "sun": 7}
+
+    @classmethod
+    def fromXmlData(cls, lookup):
+        """Constructor from the contents of XML elements.
+
+        Single parameter, lookup, is called with the names of XML
+        elements that should contain the relevant data, within a CLDR
+        locale element (within a localeList element); these names are
+        used for the attributes of the object constructed.  Attribute
+        values are obtained by suitably digesting the returned element
+        texts.\n"""
+        data = {}
+        for k in cls.__asint:
+            data['listDelim' if k == 'list' else k] = int(lookup(k))
+
+        for k in cls.__asord:
+            value = lookup(k)
+            assert len(value) == 1, \
+                (k, value, 'value should be exactly one character')
+            data[k] = ord(value)
+
+        for k in cls.__asdow:
+            data[k] = cls.__qDoW[lookup(k)]
+
+        for k in cls.__asfmt:
+            data[k] = convertFormat(lookup(k))
+
+        for k in cls.__astxt:
+            data[k] = lookup(k)
+
+        return cls(data)
+
+    def toXml(self, indent='        ', tab='    '):
+        print indent + '<locale>'
+        inner = indent + tab
+        get = lambda k: getattr(self, k)
+        for key in ('language', 'script', 'country'):
+            print inner + "<%s>" % key + get(key) + "</%s>" % key
+            print inner + "<%scode>" % key + get(key + '_code') + "</%scode>" % key
+
+        for key in ('decimal', 'group', 'zero'):
+            print inner + "<%s>" % key + ordStr(get(key)) + "</%s>" % key
+        for key, std in (('list', ';'), ('percent', '%'),
+                         ('minus', '-'), ('plus', '+'), ('exp', 'e')):
+            print inner + "<%s>" % key + fixOrdStr(get(key), std) + "</%s>" % key
+
+        for key in ('language_endonym', 'country_endonym',
+                    'quotationStart', 'quotationEnd',
+                    'alternateQuotationStart', 'alternateQuotationEnd',
+                    'listPatternPartStart', 'listPatternPartMiddle',
+                    'listPatternPartEnd', 'listPatternPartTwo',
+                    'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
+                    'am', 'pm', 'firstDayOfWeek',
+                    'weekendStart', 'weekendEnd',
+                    'longDateFormat', 'shortDateFormat',
+                    'longTimeFormat', 'shortTimeFormat',
+                    'standaloneLongMonths', 'standaloneShortMonths',
+                    'standaloneNarrowMonths',
+                    'longMonths', 'shortMonths', 'narrowMonths',
+                    'longDays', 'shortDays', 'narrowDays',
+                    'standaloneLongDays', 'standaloneShortDays', 'standaloneNarrowDays',
+                    'currencyIsoCode', 'currencySymbol', 'currencyDisplayName',
+                    'currencyFormat', 'currencyNegativeFormat'):
+            ent = camelCase(key.split('_')) if key.endswith('_endonym') else key
+            print inner + "<%s>%s</%s>" % (ent, escape(get(key)).encode('utf-8'), ent)
+
+        for key in ('currencyDigits', 'currencyRounding'):
+            print inner + "<%s>%d</%s>" % (key, get(key), key)
+
+        print indent + "</locale>"
+
+    def __init__(self, data=None, **kw):
+        if data: self.__dict__.update(data)
+        if kw: self.__dict__.update(kw)
+
+    @classmethod
+    def C(cls,
+          # Empty entries at end to ensure final separator when join()ed:
+          months = ('January', 'February', 'March', 'April', 'May', 'June', 'July',
+                    'August', 'September', 'October', 'November', 'December', ''),
+          days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday',
+                  'Thursday', 'Friday', 'Saturday', ''),
+          quantifiers=('k', 'M', 'G', 'T', 'P', 'E')):
+        """Returns an object representing the C locale."""
+        return cls(language='C', language_code='0', language_endonym='',
+                   script='AnyScript', script_code='0',
+                   country='AnyCountry', country_code='0', country_endonym='',
+                   decimal='.', group=',', list=';', percent='%',
+                   zero='0', minus='-', plus='+', exp='e',
+                   quotationStart='"', quotationEnd='"',
+                   alternateQuotationStart='\'', alternateQuotationEnd='\'',
+                   listPatternPartStart='%1, %2',
+                   listPatternPartMiddle='%1, %2',
+                   listPatternPartEnd='%1, %2',
+                   listPatternPartTwo='%1, %2',
+                   byte_unit='bytes',
+                   byte_si_quantified=';'.join(q + 'B' for q in quantifiers),
+                   byte_iec_quantified=';'.join(q.upper() + 'iB' for q in quantifiers),
+                   am='AM', pm='PM', firstDayOfWeek='mon',
+                   weekendStart='sat', weekendEnd='sun',
+                   longDateFormat='EEEE, d MMMM yyyy', shortDateFormat='d MMM yyyy',
+                   longTimeFormat='HH:mm:ss z', shortTimeFormat='HH:mm:ss',
+                   longMonths=';'.join(months),
+                   shortMonths=';'.join(m[:3] for m in months),
+                   narrowMonths='1;2;3;4;5;6;7;8;9;10;11;12;',
+                   standaloneLongMonths=';'.join(months),
+                   standaloneShortMonths=';'.join(m[:3] for m in months),
+                   standaloneNarrowMonths=';'.join(m[:1] for m in months),
+                   longDays=';'.join(days),
+                   shortDays=';'.join(d[:3] for d in days),
+                   narrowDays='7;1;2;3;4;5;6;',
+                   standaloneLongDays=';'.join(days),
+                   standaloneShortDays=';'.join(d[:3] for d in days),
+                   standaloneNarrowDays=';'.join(d[:1] for d in days),
+                   currencyIsoCode='', currencySymbol='',
+                   currencyDisplayName=';' * 7,
+                   currencyDigits=2, currencyRounding=1,
+                   currencyFormat='%1%2', currencyNegativeFormat='')
diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py
new file mode 100755
index 0000000000..2dad2dd57a
--- /dev/null
+++ b/util/locale_database/qlocalexml2cpp.py
@@ -0,0 +1,834 @@
+#!/usr/bin/env python2
+#############################################################################
+##
+## Copyright (C) 2017 The Qt Company Ltd.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the test suite of the Qt Toolkit.
+##
+## $QT_BEGIN_LICENSE:GPL-EXCEPT$
+## Commercial License Usage
+## Licensees holding valid commercial Qt licenses may use this file in
+## accordance with the commercial license agreement provided with the
+## Software or, alternatively, in accordance with the terms contained in
+## a written agreement between you and The Qt Company. For licensing terms
+## and conditions see https://www.qt.io/terms-conditions. For further
+## information use the contact form at https://www.qt.io/contact-us.
+##
+## GNU General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU
+## General Public License version 3 as published by the Free Software
+## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+## included in the packaging of this file. Please review the following
+## information to ensure the GNU General Public License requirements will
+## be met: https://www.gnu.org/licenses/gpl-3.0.html.
+##
+## $QT_END_LICENSE$
+##
+#############################################################################
+"""Script to generate C++ code from CLDR data in qLocaleXML form
+
+See ``cldr2qlocalexml.py`` for how to generate the qLocaleXML data itself.
+Pass the output file from that as first parameter to this script; pass
+the root of the qtbase check-out as second parameter.
+"""
+
+import os
+import sys
+import tempfile
+import datetime
+import xml.dom.minidom
+from enumdata import language_aliases, country_aliases, script_aliases
+
+from localexml import Locale
+
+class Error:
+    def __init__(self, msg):
+        self.msg = msg
+    def __str__(self):
+        return self.msg
+
+def wrap_list(lst):
+    def split(lst, size):
+        while lst:
+            head, lst = lst[:size], lst[size:]
+            yield head
+    return ",\n".join(", ".join(x) for x in split(lst, 20))
+
+def isNodeNamed(elt, name, TYPE=xml.dom.minidom.Node.ELEMENT_NODE):
+    return elt.nodeType == TYPE and elt.nodeName == name
+
+def firstChildElt(parent, name):
+    child = parent.firstChild
+    while child:
+        if isNodeNamed(child, name):
+            return child
+        child = child.nextSibling
+
+    raise Error('No %s child found' % name)
+
+def eachEltInGroup(parent, group, key):
+    try:
+        element = firstChildElt(parent, group).firstChild
+    except Error:
+        element = None
+
+    while element:
+        if isNodeNamed(element, key):
+            yield element
+        element = element.nextSibling
+
+def eltWords(elt):
+    child = elt.firstChild
+    while child:
+        if child.nodeType == elt.TEXT_NODE:
+            yield child.nodeValue
+        child = child.nextSibling
+
+def firstChildText(elt, key):
+    return ' '.join(eltWords(firstChildElt(elt, key)))
+
+def loadMap(doc, category):
+    return dict((int(firstChildText(element, 'id')),
+                 (firstChildText(element, 'name'),
+                  firstChildText(element, 'code')))
+                for element in eachEltInGroup(doc.documentElement,
+                                              category + 'List', category))
+
+def loadLikelySubtagsMap(doc):
+    def triplet(element, keys=('language', 'script', 'country')):
+        return tuple(firstChildText(element, key) for key in keys)
+
+    return dict((i, {'from': triplet(firstChildElt(elt, "from")),
+                     'to': triplet(firstChildElt(elt, "to"))})
+                for i, elt in enumerate(eachEltInGroup(doc.documentElement,
+                                                       'likelySubtags', 'likelySubtag')))
+
+def fixedScriptName(name, dupes):
+    # Don't .capitalize() as some names are already camel-case (see enumdata.py):
+    name = ''.join(word[0].upper() + word[1:] for word in name.split())
+    if name[-6:] != "Script":
+        name = name + "Script"
+    if name in dupes:
+        sys.stderr.write("\n\n\nERROR: The script name '%s' is messy" % name)
+        sys.exit(1)
+    return name
+
+def fixedCountryName(name, dupes):
+    if name in dupes:
+        return name.replace(" ", "") + "Country"
+    return name.replace(" ", "")
+
+def fixedLanguageName(name, dupes):
+    if name in dupes:
+        return name.replace(" ", "") + "Language"
+    return name.replace(" ", "")
+
+def findDupes(country_map, language_map):
+    country_set = set(v[0] for a, v in country_map.iteritems())
+    language_set = set(v[0] for a, v in language_map.iteritems())
+    return country_set & language_set
+
+def languageNameToId(name, language_map):
+    for key in language_map.keys():
+        if language_map[key][0] == name:
+            return key
+    return -1
+
+def scriptNameToId(name, script_map):
+    for key in script_map.keys():
+        if script_map[key][0] == name:
+            return key
+    return -1
+
+def countryNameToId(name, country_map):
+    for key in country_map.keys():
+        if country_map[key][0] == name:
+            return key
+    return -1
+
+def loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map):
+    result = {}
+
+    for locale_elt in eachEltInGroup(doc.documentElement, "localeList", "locale"):
+        locale = Locale.fromXmlData(lambda k: firstChildText(locale_elt, k))
+        language_id = languageNameToId(locale.language, language_map)
+        if language_id == -1:
+            sys.stderr.write("Cannot find a language id for '%s'\n" % locale.language)
+        script_id = scriptNameToId(locale.script, script_map)
+        if script_id == -1:
+            sys.stderr.write("Cannot find a script id for '%s'\n" % locale.script)
+        country_id = countryNameToId(locale.country, country_map)
+        if country_id == -1:
+            sys.stderr.write("Cannot find a country id for '%s'\n" % locale.country)
+
+        if language_id != 1: # C
+            if country_id == 0:
+                sys.stderr.write("loadLocaleMap: No country id for '%s'\n" % locale.language)
+
+            if script_id == 0:
+                # find default script for a given language and country (see http://www.unicode.org/reports/tr35/#Likely_Subtags)
+                for key in likely_subtags_map.keys():
+                    tmp = likely_subtags_map[key]
+                    if tmp["from"][0] == locale.language and tmp["from"][1] == "AnyScript" and tmp["from"][2] == locale.country:
+                        locale.script = tmp["to"][1]
+                        script_id = scriptNameToId(locale.script, script_map)
+                        break
+            if script_id == 0 and country_id != 0:
+                # try with no country
+                for key in likely_subtags_map.keys():
+                    tmp = likely_subtags_map[key]
+                    if tmp["from"][0] == locale.language and tmp["from"][1] == "AnyScript" and tmp["from"][2] == "AnyCountry":
+                        locale.script = tmp["to"][1]
+                        script_id = scriptNameToId(locale.script, script_map)
+                        break
+
+        result[(language_id, script_id, country_id)] = locale
+
+    return result
+
+def compareLocaleKeys(key1, key2):
+    if key1 == key2:
+        return 0
+
+    if key1[0] == key2[0]:
+        l1 = compareLocaleKeys.locale_map[key1]
+        l2 = compareLocaleKeys.locale_map[key2]
+
+        if (l1.language, l1.script) in compareLocaleKeys.default_map.keys():
+            default = compareLocaleKeys.default_map[(l1.language, l1.script)]
+            if l1.country == default:
+                return -1
+            if l2.country == default:
+                return 1
+
+        if key1[1] != key2[1]:
+            if (l2.language, l2.script) in compareLocaleKeys.default_map.keys():
+                default = compareLocaleKeys.default_map[(l2.language, l2.script)]
+                if l2.country == default:
+                    return 1
+                if l1.country == default:
+                    return -1
+
+        if key1[1] != key2[1]:
+            return key1[1] - key2[1]
+    else:
+        return key1[0] - key2[0]
+
+    return key1[2] - key2[2]
+
+
+def languageCount(language_id, locale_map):
+    result = 0
+    for key in locale_map.keys():
+        if key[0] == language_id:
+            result += 1
+    return result
+
+def unicode2hex(s):
+    lst = []
+    for x in s:
+        v = ord(x)
+        if v > 0xFFFF:
+            # make a surrogate pair
+            # copied from qchar.h
+            high = (v >> 10) + 0xd7c0
+            low = (v % 0x400 + 0xdc00)
+            lst.append(hex(high))
+            lst.append(hex(low))
+        else:
+            lst.append(hex(v))
+    return lst
+
+class StringDataToken:
+    def __init__(self, index, length):
+        if index > 0xFFFF or length > 0xFFFF:
+            raise Error("Position exceeds ushort range: %d,%d " % (index, length))
+        self.index = index
+        self.length = length
+    def __str__(self):
+        return " %d,%d " % (self.index, self.length)
+
+class StringData:
+    def __init__(self, name):
+        self.data = []
+        self.hash = {}
+        self.name = name
+    def append(self, s):
+        if s in self.hash:
+            return self.hash[s]
+
+        lst = unicode2hex(s)
+        index = len(self.data)
+        if index > 65535:
+            print "\n\n\n#error Data index is too big!"
+            sys.stderr.write ("\n\n\nERROR: index exceeds the uint16 range! index = %d\n" % index)
+            sys.exit(1)
+        size = len(lst)
+        if size >= 65535:
+            print "\n\n\n#error Data is too big!"
+            sys.stderr.write ("\n\n\nERROR: data size exceeds the uint16 range! size = %d\n" % size)
+            sys.exit(1)
+        token = None
+        try:
+            token = StringDataToken(index, size)
+        except Error as e:
+            sys.stderr.write("\n\n\nERROR: %s: on data '%s'" % (e, s))
+            sys.exit(1)
+        self.hash[s] = token
+        self.data += lst
+        return token
+
+def escapedString(s):
+    result = ""
+    i = 0
+    while i < len(s):
+        if s[i] == '"':
+            result += '\\"'
+            i += 1
+        else:
+            result += s[i]
+            i += 1
+    s = result
+
+    line = ""
+    need_escape = False
+    result = ""
+    for c in s:
+        if ord(c) < 128 and (not need_escape or ord(c.lower()) < ord('a') or ord(c.lower()) > ord('f')):
+            line += c
+            need_escape = False
+        else:
+            line += "\\x%02x" % (ord(c))
+            need_escape = True
+        if len(line) > 80:
+            result = result + "\n" + '"' + line + '"'
+            line = ""
+    line += "\\0"
+    result = result + "\n" + '"' + line + '"'
+    if result[0] == "\n":
+        result = result[1:]
+    return result
+
+def printEscapedString(s):
+    print escapedString(s)
+
+def currencyIsoCodeData(s):
+    if s:
+        return '{' + ",".join(str(ord(x)) for x in s) + '}'
+    return "{0,0,0}"
+
+def usage():
+    print "Usage: qlocalexml2cpp.py <path-to-locale.xml> <path-to-qtbase-src-tree>"
+    sys.exit(1)
+
+GENERATED_BLOCK_START = "// GENERATED PART STARTS HERE\n"
+GENERATED_BLOCK_END = "// GENERATED PART ENDS HERE\n"
+
+def main():
+    if len(sys.argv) != 3:
+        usage()
+
+    localexml = sys.argv[1]
+    qtsrcdir = sys.argv[2]
+
+    if not (os.path.isdir(qtsrcdir)
+            and all(os.path.isfile(os.path.join(qtsrcdir, 'src', 'corelib', 'tools', leaf))
+                    for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))):
+        usage()
+
+    (data_temp_file, data_temp_file_path) = tempfile.mkstemp("qlocale_data_p", dir=qtsrcdir)
+    data_temp_file = os.fdopen(data_temp_file, "w")
+    qlocaledata_file = open(qtsrcdir + "/src/corelib/tools/qlocale_data_p.h", "r")
+    s = qlocaledata_file.readline()
+    while s and s != GENERATED_BLOCK_START:
+        data_temp_file.write(s)
+        s = qlocaledata_file.readline()
+    data_temp_file.write(GENERATED_BLOCK_START)
+
+    doc = xml.dom.minidom.parse(localexml)
+    language_map = loadMap(doc, 'language')
+    script_map = loadMap(doc, 'script')
+    country_map = loadMap(doc, 'country')
+    likely_subtags_map = loadLikelySubtagsMap(doc)
+    default_map = {}
+    for key in likely_subtags_map.keys():
+        tmp = likely_subtags_map[key]
+        if tmp["from"][1] == "AnyScript" and tmp["from"][2] == "AnyCountry" and tmp["to"][2] != "AnyCountry":
+            default_map[(tmp["to"][0], tmp["to"][1])] = tmp["to"][2]
+    locale_map = loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map)
+    dupes = findDupes(language_map, country_map)
+
+    cldr_version = firstChildText(doc.documentElement, "version")
+
+    data_temp_file.write("""
+/*
+    This part of the file was generated on %s from the
+    Common Locale Data Repository v%s
+
+    http://www.unicode.org/cldr/
+
+    Do not edit this section: instead regenerate it using
+    cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or
+    edited) CLDR data; see qtbase/util/locale_database/.
+*/
+
+""" % (str(datetime.date.today()), cldr_version) )
+
+    # Likely subtags map
+    data_temp_file.write("static const QLocaleId likely_subtags[] = {\n")
+    index = 0
+    for key in likely_subtags_map.keys():
+        tmp = likely_subtags_map[key]
+        from_language = languageNameToId(tmp["from"][0], language_map)
+        from_script = scriptNameToId(tmp["from"][1], script_map)
+        from_country = countryNameToId(tmp["from"][2], country_map)
+        to_language = languageNameToId(tmp["to"][0], language_map)
+        to_script = scriptNameToId(tmp["to"][1], script_map)
+        to_country = countryNameToId(tmp["to"][2], country_map)
+
+        cmnt_from = ""
+        if from_language != 0:
+            cmnt_from = cmnt_from + language_map[from_language][1]
+        else:
+            cmnt_from = cmnt_from + "und"
+        if from_script != 0:
+            if cmnt_from:
+                cmnt_from = cmnt_from + "_"
+            cmnt_from = cmnt_from + script_map[from_script][1]
+        if from_country != 0:
+            if cmnt_from:
+                cmnt_from = cmnt_from + "_"
+            cmnt_from = cmnt_from + country_map[from_country][1]
+        cmnt_to = ""
+        if to_language != 0:
+            cmnt_to = cmnt_to + language_map[to_language][1]
+        else:
+            cmnt_to = cmnt_to + "und"
+        if to_script != 0:
+            if cmnt_to:
+                cmnt_to = cmnt_to + "_"
+            cmnt_to = cmnt_to + script_map[to_script][1]
+        if to_country != 0:
+            if cmnt_to:
+                cmnt_to = cmnt_to + "_"
+            cmnt_to = cmnt_to + country_map[to_country][1]
+
+        data_temp_file.write("    ")
+        data_temp_file.write("{ %3d, %3d, %3d }, { %3d, %3d, %3d }" % (from_language, from_script, from_country, to_language, to_script, to_country))
+        index += 1
+        if index != len(likely_subtags_map):
+            data_temp_file.write(",")
+        else:
+            data_temp_file.write(" ")
+        data_temp_file.write(" // %s -> %s\n" % (cmnt_from, cmnt_to))
+    data_temp_file.write("};\n")
+
+    data_temp_file.write("\n")
+
+    # Locale index
+    data_temp_file.write("static const quint16 locale_index[] = {\n")
+    index = 0
+    for key in language_map.keys():
+        i = 0
+        count = languageCount(key, locale_map)
+        if count > 0:
+            i = index
+            index += count
+        data_temp_file.write("%6d, // %s\n" % (i, language_map[key][0]))
+    data_temp_file.write("     0 // trailing 0\n")
+    data_temp_file.write("};\n\n")
+
+    list_pattern_part_data = StringData('list_pattern_part_data')
+    date_format_data = StringData('date_format_data')
+    time_format_data = StringData('time_format_data')
+    months_data = StringData('months_data')
+    days_data = StringData('days_data')
+    am_data = StringData('am_data')
+    pm_data = StringData('pm_data')
+    byte_unit_data = StringData('byte_unit_data')
+    currency_symbol_data = StringData('currency_symbol_data')
+    currency_display_name_data = StringData('currency_display_name_data')
+    currency_format_data = StringData('currency_format_data')
+    endonyms_data = StringData('endonyms_data')
+
+    # Locale data
+    data_temp_file.write("static const QLocaleData locale_data[] = {\n")
+    # Table headings: keep each label centred in its field, matching line_format:
+    data_temp_file.write('   // '
+                         # Width 6 + comma:
+                         + ' lang  ' # IDs
+                         + 'script '
+                         + '  terr '
+                         + '  dec  ' # Numeric punctuation:
+                         + ' group '
+                         + ' list  ' # List delimiter
+                         + ' prcnt ' # Arithmetic symbols:
+                         + '  zero '
+                         + ' minus '
+                         + ' plus  '
+                         + '  exp  '
+                         # Width 8 + comma - to make space for these wide labels !
+                         + ' quotOpn ' # Quotation marks
+                         + ' quotEnd '
+                         + 'altQtOpn '
+                         + 'altQtEnd '
+                         # Width 11 + comma:
+                         + '  lpStart   ' # List pattern
+                         + '   lpMid    '
+                         + '   lpEnd    '
+                         + '   lpTwo    '
+                         + '   sDtFmt   ' # Date format
+                         + '   lDtFmt   '
+                         + '   sTmFmt   ' # Time format
+                         + '   lTmFmt   '
+                         + '  ssMonth   ' # Months
+                         + '  slMonth   '
+                         + '  snMonth   '
+                         + '   sMonth   '
+                         + '   lMonth   '
+                         + '   nMonth   '
+                         + '   ssDays   ' # Days
+                         + '   slDays   '
+                         + '   snDays   '
+                         + '    sDays   '
+                         + '    lDays   '
+                         + '    nDays   '
+                         + '     am     ' # am/pm indicators
+                         + '     pm     '
+                         # Width 8 + comma
+                         + '  byte   '
+                         + ' siQuant '
+                         + 'iecQuant '
+                         # Width 8+4 + comma
+                         + '   currISO   '
+                         # Width 11 + comma:
+                         + '  currSym   ' # Currency formatting:
+                         + ' currDsply  '
+                         + '  currFmt   '
+                         + ' currFmtNeg '
+                         + '  endoLang  ' # Name of language in itself, and of country:
+                         + '  endoCntry '
+                         # Width 6 + comma:
+                         + 'curDgt ' # Currency number representation:
+                         + 'curRnd '
+                         + 'dow1st ' # First day of week
+                         + ' wknd+ ' # Week-end start/end days:
+                         + ' wknd-'
+                         # No trailing space on last entry (be sure to
+                         # pad before adding anything after it).
+                         + '\n')
+
+    locale_keys = locale_map.keys()
+    compareLocaleKeys.default_map = default_map
+    compareLocaleKeys.locale_map = locale_map
+    locale_keys.sort(compareLocaleKeys)
+
+    line_format = ('    { '
+                   # Locale-identifier:
+                   + '%6d,' * 3
+                   # Numeric formats, list delimiter:
+                   + '%6d,' * 8
+                   # Quotation marks:
+                   + '%8d,' * 4
+                   # List patterns, date/time formats, month/day names, am/pm:
+                   + '%11s,' * 22
+                   # SI/IEC byte-unit abbreviations:
+                   + '%8s,' * 3
+                   # Currency ISO code:
+                   + ' %10s, '
+                   # Currency and endonyms
+                   + '%11s,' * 6
+                   # Currency formatting:
+                   + '%6d,%6d'
+                   # Day of week and week-end:
+                   + ',%6d' * 3
+                   + ' }')
+    for key in locale_keys:
+        l = locale_map[key]
+        data_temp_file.write(line_format
+                    % (key[0], key[1], key[2],
+                        l.decimal,
+                        l.group,
+                        l.listDelim,
+                        l.percent,
+                        l.zero,
+                        l.minus,
+                        l.plus,
+                        l.exp,
+                        l.quotationStart,
+                        l.quotationEnd,
+                        l.alternateQuotationStart,
+                        l.alternateQuotationEnd,
+                        list_pattern_part_data.append(l.listPatternPartStart),
+                        list_pattern_part_data.append(l.listPatternPartMiddle),
+                        list_pattern_part_data.append(l.listPatternPartEnd),
+                        list_pattern_part_data.append(l.listPatternPartTwo),
+                        date_format_data.append(l.shortDateFormat),
+                        date_format_data.append(l.longDateFormat),
+                        time_format_data.append(l.shortTimeFormat),
+                        time_format_data.append(l.longTimeFormat),
+                        months_data.append(l.standaloneShortMonths),
+                        months_data.append(l.standaloneLongMonths),
+                        months_data.append(l.standaloneNarrowMonths),
+                        months_data.append(l.shortMonths),
+                        months_data.append(l.longMonths),
+                        months_data.append(l.narrowMonths),
+                        days_data.append(l.standaloneShortDays),
+                        days_data.append(l.standaloneLongDays),
+                        days_data.append(l.standaloneNarrowDays),
+                        days_data.append(l.shortDays),
+                        days_data.append(l.longDays),
+                        days_data.append(l.narrowDays),
+                        am_data.append(l.am),
+                        pm_data.append(l.pm),
+                        byte_unit_data.append(l.byte_unit),
+                        byte_unit_data.append(l.byte_si_quantified),
+                        byte_unit_data.append(l.byte_iec_quantified),
+                        currencyIsoCodeData(l.currencyIsoCode),
+                        currency_symbol_data.append(l.currencySymbol),
+                        currency_display_name_data.append(l.currencyDisplayName),
+                        currency_format_data.append(l.currencyFormat),
+                        currency_format_data.append(l.currencyNegativeFormat),
+                        endonyms_data.append(l.languageEndonym),
+                        endonyms_data.append(l.countryEndonym),
+                        l.currencyDigits,
+                        l.currencyRounding,
+                        l.firstDayOfWeek,
+                        l.weekendStart,
+                        l.weekendEnd)
+                             + ", // %s/%s/%s\n" % (l.language, l.script, l.country))
+    data_temp_file.write(line_format # All zeros, matching the format:
+                         % ( (0,) * (3 + 8 + 4) + ("0,0",) * (22 + 3)
+                             + (currencyIsoCodeData(0),)
+                             + ("0,0",) * 6 + (0,) * (2 + 3))
+                         + " // trailing 0s\n")
+    data_temp_file.write("};\n")
+
+    # StringData tables:
+    for data in (list_pattern_part_data, date_format_data,
+                 time_format_data, months_data, days_data,
+                 byte_unit_data, am_data, pm_data, currency_symbol_data,
+                 currency_display_name_data, currency_format_data,
+                 endonyms_data):
+        data_temp_file.write("\nstatic const ushort %s[] = {\n" % data.name)
+        data_temp_file.write(wrap_list(data.data))
+        data_temp_file.write("\n};\n")
+
+    data_temp_file.write("\n")
+
+    # Language name list
+    data_temp_file.write("static const char language_name_list[] =\n")
+    data_temp_file.write('"Default\\0"\n')
+    for key in language_map.keys():
+        if key == 0:
+            continue
+        data_temp_file.write('"' + language_map[key][0] + '\\0"\n')
+    data_temp_file.write(";\n")
+
+    data_temp_file.write("\n")
+
+    # Language name index
+    data_temp_file.write("static const quint16 language_name_index[] = {\n")
+    data_temp_file.write("     0, // AnyLanguage\n")
+    index = 8
+    for key in language_map.keys():
+        if key == 0:
+            continue
+        language = language_map[key][0]
+        data_temp_file.write("%6d, // %s\n" % (index, language))
+        index += len(language) + 1
+    data_temp_file.write("};\n")
+
+    data_temp_file.write("\n")
+
+    # Script name list
+    data_temp_file.write("static const char script_name_list[] =\n")
+    data_temp_file.write('"Default\\0"\n')
+    for key in script_map.keys():
+        if key == 0:
+            continue
+        data_temp_file.write('"' + script_map[key][0] + '\\0"\n')
+    data_temp_file.write(";\n")
+
+    data_temp_file.write("\n")
+
+    # Script name index
+    data_temp_file.write("static const quint16 script_name_index[] = {\n")
+    data_temp_file.write("     0, // AnyScript\n")
+    index = 8
+    for key in script_map.keys():
+        if key == 0:
+            continue
+        script = script_map[key][0]
+        data_temp_file.write("%6d, // %s\n" % (index, script))
+        index += len(script) + 1
+    data_temp_file.write("};\n")
+
+    data_temp_file.write("\n")
+
+    # Country name list
+    data_temp_file.write("static const char country_name_list[] =\n")
+    data_temp_file.write('"Default\\0"\n')
+    for key in country_map.keys():
+        if key == 0:
+            continue
+        data_temp_file.write('"' + country_map[key][0] + '\\0"\n')
+    data_temp_file.write(";\n")
+
+    data_temp_file.write("\n")
+
+    # Country name index
+    data_temp_file.write("static const quint16 country_name_index[] = {\n")
+    data_temp_file.write("     0, // AnyCountry\n")
+    index = 8
+    for key in country_map.keys():
+        if key == 0:
+            continue
+        country = country_map[key][0]
+        data_temp_file.write("%6d, // %s\n" % (index, country))
+        index += len(country) + 1
+    data_temp_file.write("};\n")
+
+    data_temp_file.write("\n")
+
+    # Language code list
+    data_temp_file.write("static const unsigned char language_code_list[] =\n")
+    for key in language_map.keys():
+        code = language_map[key][1]
+        if len(code) == 2:
+            code += r"\0"
+        data_temp_file.write('"%2s" // %s\n' % (code, language_map[key][0]))
+    data_temp_file.write(";\n")
+
+    data_temp_file.write("\n")
+
+    # Script code list
+    data_temp_file.write("static const unsigned char script_code_list[] =\n")
+    for key in script_map.keys():
+        code = script_map[key][1]
+        for i in range(4 - len(code)):
+            code += "\\0"
+        data_temp_file.write('"%2s" // %s\n' % (code, script_map[key][0]))
+    data_temp_file.write(";\n")
+
+    # Country code list
+    data_temp_file.write("static const unsigned char country_code_list[] =\n")
+    for key in country_map.keys():
+        code = country_map[key][1]
+        if len(code) == 2:
+            code += "\\0"
+        data_temp_file.write('"%2s" // %s\n' % (code, country_map[key][0]))
+    data_temp_file.write(";\n")
+
+    data_temp_file.write("\n")
+    data_temp_file.write(GENERATED_BLOCK_END)
+    s = qlocaledata_file.readline()
+    # skip until end of the old block
+    while s and s != GENERATED_BLOCK_END:
+        s = qlocaledata_file.readline()
+
+    s = qlocaledata_file.readline()
+    while s:
+        data_temp_file.write(s)
+        s = qlocaledata_file.readline()
+    data_temp_file.close()
+    qlocaledata_file.close()
+
+    os.remove(qtsrcdir + "/src/corelib/tools/qlocale_data_p.h")
+    os.rename(data_temp_file_path, qtsrcdir + "/src/corelib/tools/qlocale_data_p.h")
+
+    # qlocale.h
+
+    (qlocaleh_temp_file, qlocaleh_temp_file_path) = tempfile.mkstemp("qlocale.h", dir=qtsrcdir)
+    qlocaleh_temp_file = os.fdopen(qlocaleh_temp_file, "w")
+    qlocaleh_file = open(qtsrcdir + "/src/corelib/tools/qlocale.h", "r")
+    s = qlocaleh_file.readline()
+    while s and s != GENERATED_BLOCK_START:
+        qlocaleh_temp_file.write(s)
+        s = qlocaleh_file.readline()
+    qlocaleh_temp_file.write(GENERATED_BLOCK_START)
+    qlocaleh_temp_file.write("// see qlocale_data_p.h for more info on generated data\n")
+
+    # Language enum
+    qlocaleh_temp_file.write("    enum Language {\n")
+    language = None
+    for key, value in language_map.items():
+        language = fixedLanguageName(value[0], dupes)
+        qlocaleh_temp_file.write("        " + language + " = " + str(key) + ",\n")
+
+    qlocaleh_temp_file.write("\n        " +
+                             ",\n        ".join('%s = %s' % pair
+                                                for pair in sorted(language_aliases.items())) +
+                             ",\n")
+    qlocaleh_temp_file.write("\n")
+    qlocaleh_temp_file.write("        LastLanguage = " + language + "\n")
+    qlocaleh_temp_file.write("    };\n")
+
+    qlocaleh_temp_file.write("\n")
+
+    # Script enum
+    qlocaleh_temp_file.write("    enum Script {\n")
+    script = None
+    for key, value in script_map.items():
+        script = fixedScriptName(value[0], dupes)
+        qlocaleh_temp_file.write("        " + script + " = " + str(key) + ",\n")
+    qlocaleh_temp_file.write("\n        " +
+                             ",\n        ".join('%s = %s' % pair
+                                                for pair in sorted(script_aliases.items())) +
+                             ",\n")
+    qlocaleh_temp_file.write("\n")
+    qlocaleh_temp_file.write("        LastScript = " + script + "\n")
+    qlocaleh_temp_file.write("    };\n")
+
+    # Country enum
+    qlocaleh_temp_file.write("    enum Country {\n")
+    country = None
+    for key, value in country_map.items():
+        country = fixedCountryName(value[0], dupes)
+        qlocaleh_temp_file.write("        " + country + " = " + str(key) + ",\n")
+    qlocaleh_temp_file.write("\n        " +
+                             ",\n        ".join('%s = %s' % pair
+                                                for pair in sorted(country_aliases.items())) +
+                             ",\n")
+    qlocaleh_temp_file.write("\n")
+    qlocaleh_temp_file.write("        LastCountry = " + country + "\n")
+    qlocaleh_temp_file.write("    };\n")
+
+    qlocaleh_temp_file.write(GENERATED_BLOCK_END)
+    s = qlocaleh_file.readline()
+    # skip until end of the old block
+    while s and s != GENERATED_BLOCK_END:
+        s = qlocaleh_file.readline()
+
+    s = qlocaleh_file.readline()
+    while s:
+        qlocaleh_temp_file.write(s)
+        s = qlocaleh_file.readline()
+    qlocaleh_temp_file.close()
+    qlocaleh_file.close()
+
+    os.remove(qtsrcdir + "/src/corelib/tools/qlocale.h")
+    os.rename(qlocaleh_temp_file_path, qtsrcdir + "/src/corelib/tools/qlocale.h")
+
+    # qlocale.qdoc
+
+    (qlocaleqdoc_temp_file, qlocaleqdoc_temp_file_path) = tempfile.mkstemp("qlocale.qdoc", dir=qtsrcdir)
+    qlocaleqdoc_temp_file = os.fdopen(qlocaleqdoc_temp_file, "w")
+    qlocaleqdoc_file = open(qtsrcdir + "/src/corelib/tools/qlocale.qdoc", "r")
+    s = qlocaleqdoc_file.readline()
+    DOCSTRING = "    QLocale's data is based on Common Locale Data Repository "
+    while s:
+        if DOCSTRING in s:
+            qlocaleqdoc_temp_file.write(DOCSTRING + "v" + cldr_version + ".\n")
+        else:
+            qlocaleqdoc_temp_file.write(s)
+        s = qlocaleqdoc_file.readline()
+    qlocaleqdoc_temp_file.close()
+    qlocaleqdoc_file.close()
+
+    os.remove(qtsrcdir + "/src/corelib/tools/qlocale.qdoc")
+    os.rename(qlocaleqdoc_temp_file_path, qtsrcdir + "/src/corelib/tools/qlocale.qdoc")
+
+if __name__ == "__main__":
+    main()
diff --git a/util/locale_database/testlocales/localemodel.cpp b/util/locale_database/testlocales/localemodel.cpp
new file mode 100644
index 0000000000..d380d01e09
--- /dev/null
+++ b/util/locale_database/testlocales/localemodel.cpp
@@ -0,0 +1,449 @@
+/****************************************************************************
+**
+** Copyright (C) 2016 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:GPL-EXCEPT$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3 as published by the Free Software
+** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#include "localemodel.h"
+
+#include <QLocale>
+#include <QDate>
+#include <qdebug.h>
+
+static const int g_model_cols = 6;
+
+struct LocaleListItem
+{
+    int language;
+    int country;
+};
+
+const LocaleListItem g_locale_list[] = {
+    {      1,     0 }, // C/AnyCountry
+    {      3,    69 }, // Afan/Ethiopia
+    {      3,   111 }, // Afan/Kenya
+    {      4,    59 }, // Afar/Djibouti
+    {      4,    67 }, // Afar/Eritrea
+    {      4,    69 }, // Afar/Ethiopia
+    {      5,   195 }, // Afrikaans/SouthAfrica
+    {      5,   148 }, // Afrikaans/Namibia
+    {      6,     2 }, // Albanian/Albania
+    {      7,    69 }, // Amharic/Ethiopia
+    {      8,   186 }, // Arabic/SaudiArabia
+    {      8,     3 }, // Arabic/Algeria
+    {      8,    17 }, // Arabic/Bahrain
+    {      8,    64 }, // Arabic/Egypt
+    {      8,   103 }, // Arabic/Iraq
+    {      8,   109 }, // Arabic/Jordan
+    {      8,   115 }, // Arabic/Kuwait
+    {      8,   119 }, // Arabic/Lebanon
+    {      8,   122 }, // Arabic/LibyanArabJamahiriya
+    {      8,   145 }, // Arabic/Morocco
+    {      8,   162 }, // Arabic/Oman
+    {      8,   175 }, // Arabic/Qatar
+    {      8,   201 }, // Arabic/Sudan
+    {      8,   207 }, // Arabic/SyrianArabRepublic
+    {      8,   216 }, // Arabic/Tunisia
+    {      8,   223 }, // Arabic/UnitedArabEmirates
+    {      8,   237 }, // Arabic/Yemen
+    {      9,    11 }, // Armenian/Armenia
+    {     10,   100 }, // Assamese/India
+    {     12,    15 }, // Azerbaijani/Azerbaijan
+    {     14,   197 }, // Basque/Spain
+    {     15,    18 }, // Bengali/Bangladesh
+    {     15,   100 }, // Bengali/India
+    {     16,    25 }, // Bhutani/Bhutan
+    {     20,    33 }, // Bulgarian/Bulgaria
+    {     22,    20 }, // Byelorussian/Belarus
+    {     23,    36 }, // Cambodian/Cambodia
+    {     24,   197 }, // Catalan/Spain
+    {     25,    44 }, // Chinese/China
+    {     25,    97 }, // Chinese/HongKong
+    {     25,   126 }, // Chinese/Macau
+    {     25,   190 }, // Chinese/Singapore
+    {     25,   208 }, // Chinese/Taiwan
+    {     27,    54 }, // Croatian/Croatia
+    {     28,    57 }, // Czech/CzechRepublic
+    {     29,    58 }, // Danish/Denmark
+    {     30,   151 }, // Dutch/Netherlands
+    {     30,    21 }, // Dutch/Belgium
+    {     31,   225 }, // English/UnitedStates
+    {     31,     4 }, // English/AmericanSamoa
+    {     31,    13 }, // English/Australia
+    {     31,    21 }, // English/Belgium
+    {     31,    22 }, // English/Belize
+    {     31,    28 }, // English/Botswana
+    {     31,    38 }, // English/Canada
+    {     31,    89 }, // English/Guam
+    {     31,    97 }, // English/HongKong
+    {     31,   100 }, // English/India
+    {     31,   104 }, // English/Ireland
+    {     31,   107 }, // English/Jamaica
+    {     31,   133 }, // English/Malta
+    {     31,   134 }, // English/MarshallIslands
+    {     31,   148 }, // English/Namibia
+    {     31,   154 }, // English/NewZealand
+    {     31,   160 }, // English/NorthernMarianaIslands
+    {     31,   163 }, // English/Pakistan
+    {     31,   170 }, // English/Philippines
+    {     31,   190 }, // English/Singapore
+    {     31,   195 }, // English/SouthAfrica
+    {     31,   215 }, // English/TrinidadAndTobago
+    {     31,   224 }, // English/UnitedKingdom
+    {     31,   226 }, // English/UnitedStatesMinorOutlyingIslands
+    {     31,   234 }, // English/USVirginIslands
+    {     31,   240 }, // English/Zimbabwe
+    {     33,    68 }, // Estonian/Estonia
+    {     34,    71 }, // Faroese/FaroeIslands
+    {     36,    73 }, // Finnish/Finland
+    {     37,    74 }, // French/France
+    {     37,    21 }, // French/Belgium
+    {     37,    38 }, // French/Canada
+    {     37,   125 }, // French/Luxembourg
+    {     37,   142 }, // French/Monaco
+    {     37,   206 }, // French/Switzerland
+    {     40,   197 }, // Galician/Spain
+    {     41,    81 }, // Georgian/Georgia
+    {     42,    82 }, // German/Germany
+    {     42,    14 }, // German/Austria
+    {     42,    21 }, // German/Belgium
+    {     42,   123 }, // German/Liechtenstein
+    {     42,   125 }, // German/Luxembourg
+    {     42,   206 }, // German/Switzerland
+    {     43,    85 }, // Greek/Greece
+    {     43,    56 }, // Greek/Cyprus
+    {     44,    86 }, // Greenlandic/Greenland
+    {     46,   100 }, // Gujarati/India
+    {     47,    83 }, // Hausa/Ghana
+    {     47,   156 }, // Hausa/Niger
+    {     47,   157 }, // Hausa/Nigeria
+    {     48,   105 }, // Hebrew/Israel
+    {     49,   100 }, // Hindi/India
+    {     50,    98 }, // Hungarian/Hungary
+    {     51,    99 }, // Icelandic/Iceland
+    {     52,   101 }, // Indonesian/Indonesia
+    {     57,   104 }, // Irish/Ireland
+    {     58,   106 }, // Italian/Italy
+    {     58,   206 }, // Italian/Switzerland
+    {     59,   108 }, // Japanese/Japan
+    {     61,   100 }, // Kannada/India
+    {     63,   110 }, // Kazakh/Kazakhstan
+    {     64,   179 }, // Kinyarwanda/Rwanda
+    {     65,   116 }, // Kirghiz/Kyrgyzstan
+    {     66,   114 }, // Korean/RepublicOfKorea
+    {     67,   102 }, // Kurdish/Iran
+    {     67,   103 }, // Kurdish/Iraq
+    {     67,   207 }, // Kurdish/SyrianArabRepublic
+    {     67,   217 }, // Kurdish/Turkey
+    {     69,   117 }, // Laothian/Lao
+    {     71,   118 }, // Latvian/Latvia
+    {     72,    49 }, // Lingala/DemocraticRepublicOfCongo
+    {     72,    50 }, // Lingala/PeoplesRepublicOfCongo
+    {     73,   124 }, // Lithuanian/Lithuania
+    {     74,   127 }, // Macedonian/Macedonia
+    {     76,   130 }, // Malay/Malaysia
+    {     76,    32 }, // Malay/BruneiDarussalam
+    {     77,   100 }, // Malayalam/India
+    {     78,   133 }, // Maltese/Malta
+    {     80,   100 }, // Marathi/India
+    {     82,   143 }, // Mongolian/Mongolia
+    {     84,   150 }, // Nepali/Nepal
+    {     85,   161 }, // Norwegian/Norway
+    {     87,   100 }, // Oriya/India
+    {     88,     1 }, // Pashto/Afghanistan
+    {     89,   102 }, // Persian/Iran
+    {     89,     1 }, // Persian/Afghanistan
+    {     90,   172 }, // Polish/Poland
+    {     91,   173 }, // Portuguese/Portugal
+    {     91,    30 }, // Portuguese/Brazil
+    {     92,   100 }, // Punjabi/India
+    {     92,   163 }, // Punjabi/Pakistan
+    {     95,   177 }, // Romanian/Romania
+    {     96,   178 }, // Russian/RussianFederation
+    {     96,   222 }, // Russian/Ukraine
+    {     99,   100 }, // Sanskrit/India
+    {    100,   241 }, // Serbian/SerbiaAndMontenegro
+    {    100,    27 }, // Serbian/BosniaAndHerzegowina
+    {    100,   238 }, // Serbian/Yugoslavia
+    {    101,   241 }, // SerboCroatian/SerbiaAndMontenegro
+    {    101,    27 }, // SerboCroatian/BosniaAndHerzegowina
+    {    101,   238 }, // SerboCroatian/Yugoslavia
+    {    102,   195 }, // Sesotho/SouthAfrica
+    {    103,   195 }, // Setswana/SouthAfrica
+    {    107,   195 }, // Siswati/SouthAfrica
+    {    108,   191 }, // Slovak/Slovakia
+    {    109,   192 }, // Slovenian/Slovenia
+    {    110,   194 }, // Somali/Somalia
+    {    110,    59 }, // Somali/Djibouti
+    {    110,    69 }, // Somali/Ethiopia
+    {    110,   111 }, // Somali/Kenya
+    {    111,   197 }, // Spanish/Spain
+    {    111,    10 }, // Spanish/Argentina
+    {    111,    26 }, // Spanish/Bolivia
+    {    111,    43 }, // Spanish/Chile
+    {    111,    47 }, // Spanish/Colombia
+    {    111,    52 }, // Spanish/CostaRica
+    {    111,    61 }, // Spanish/DominicanRepublic
+    {    111,    63 }, // Spanish/Ecuador
+    {    111,    65 }, // Spanish/ElSalvador
+    {    111,    90 }, // Spanish/Guatemala
+    {    111,    96 }, // Spanish/Honduras
+    {    111,   139 }, // Spanish/Mexico
+    {    111,   155 }, // Spanish/Nicaragua
+    {    111,   166 }, // Spanish/Panama
+    {    111,   168 }, // Spanish/Paraguay
+    {    111,   169 }, // Spanish/Peru
+    {    111,   174 }, // Spanish/PuertoRico
+    {    111,   225 }, // Spanish/UnitedStates
+    {    111,   227 }, // Spanish/Uruguay
+    {    111,   231 }, // Spanish/Venezuela
+    {    113,   111 }, // Swahili/Kenya
+    {    113,   210 }, // Swahili/Tanzania
+    {    114,   205 }, // Swedish/Sweden
+    {    114,    73 }, // Swedish/Finland
+    {    116,   209 }, // Tajik/Tajikistan
+    {    117,   100 }, // Tamil/India
+    {    118,   178 }, // Tatar/RussianFederation
+    {    119,   100 }, // Telugu/India
+    {    120,   211 }, // Thai/Thailand
+    {    122,    67 }, // Tigrinya/Eritrea
+    {    122,    69 }, // Tigrinya/Ethiopia
+    {    124,   195 }, // Tsonga/SouthAfrica
+    {    125,   217 }, // Turkish/Turkey
+    {    129,   222 }, // Ukrainian/Ukraine
+    {    130,   100 }, // Urdu/India
+    {    130,   163 }, // Urdu/Pakistan
+    {    131,   228 }, // Uzbek/Uzbekistan
+    {    131,     1 }, // Uzbek/Afghanistan
+    {    132,   232 }, // Vietnamese/VietNam
+    {    134,   224 }, // Welsh/UnitedKingdom
+    {    136,   195 }, // Xhosa/SouthAfrica
+    {    138,   157 }, // Yoruba/Nigeria
+    {    140,   195 }, // Zulu/SouthAfrica
+    {    141,   161 }, // Nynorsk/Norway
+    {    142,    27 }, // Bosnian/BosniaAndHerzegowina
+    {    143,   131 }, // Divehi/Maldives
+    {    144,   224 }, // Manx/UnitedKingdom
+    {    145,   224 }, // Cornish/UnitedKingdom
+    {    146,    83 }, // Akan/Ghana
+    {    147,   100 }, // Konkani/India
+    {    148,    83 }, // Ga/Ghana
+    {    149,   157 }, // Igbo/Nigeria
+    {    150,   111 }, // Kamba/Kenya
+    {    151,   207 }, // Syriac/SyrianArabRepublic
+    {    152,    67 }, // Blin/Eritrea
+    {    153,    67 }, // Geez/Eritrea
+    {    153,    69 }, // Geez/Ethiopia
+    {    154,   157 }, // Koro/Nigeria
+    {    155,    69 }, // Sidamo/Ethiopia
+    {    156,   157 }, // Atsam/Nigeria
+    {    157,    67 }, // Tigre/Eritrea
+    {    158,   157 }, // Jju/Nigeria
+    {    159,   106 }, // Friulian/Italy
+    {    160,   195 }, // Venda/SouthAfrica
+    {    161,    83 }, // Ewe/Ghana
+    {    161,   212 }, // Ewe/Togo
+    {    163,   225 }, // Hawaiian/UnitedStates
+    {    164,   157 }, // Tyap/Nigeria
+    {    165,   129 }, // Chewa/Malawi
+};
+static const int g_locale_list_count = sizeof(g_locale_list)/sizeof(g_locale_list[0]);
+
+LocaleModel::LocaleModel(QObject *parent)
+    : QAbstractItemModel(parent)
+{
+    m_data_list.append(1234.5678);
+    m_data_list.append(QDate::currentDate());
+    m_data_list.append(QDate::currentDate());
+    m_data_list.append(QTime::currentTime());
+    m_data_list.append(QTime::currentTime());
+}
+
+QVariant LocaleModel::data(const QModelIndex &index, int role) const
+{
+    if (!index.isValid()
+        || role != Qt::DisplayRole && role != Qt::EditRole && role != Qt::ToolTipRole
+        || index.column() >= g_model_cols
+        || index.row() >= g_locale_list_count + 2)
+        return QVariant();
+
+    QVariant data;
+    if (index.column() < g_model_cols - 1)
+        data = m_data_list.at(index.column());
+
+    if (index.row() == 0) {
+        if (role == Qt::ToolTipRole)
+            return QVariant();
+        switch (index.column()) {
+            case 0:
+                return data.toDouble();
+            case 1:
+                return data.toDate();
+            case 2:
+                return data.toDate();
+            case 3:
+                return data.toTime();
+            case 4:
+                return data.toTime();
+            case 5:
+                return QVariant();
+            default:
+                break;
+        }
+    } else {
+        QLocale locale;
+        if (index.row() == 1) {
+            locale = QLocale::system();
+        } else {
+            LocaleListItem item = g_locale_list[index.row() - 2];
+            locale = QLocale((QLocale::Language)item.language, (QLocale::Country)item.country);
+        }
+
+        switch (index.column()) {
+            case 0:
+                if (role == Qt::ToolTipRole)
+                    return QVariant();
+                return locale.toString(data.toDouble());
+            case 1:
+                if (role == Qt::ToolTipRole)
+                    return locale.dateFormat(QLocale::LongFormat);
+                return locale.toString(data.toDate(), QLocale::LongFormat);
+            case 2:
+                if (role == Qt::ToolTipRole)
+                    return locale.dateFormat(QLocale::ShortFormat);
+                return locale.toString(data.toDate(), QLocale::ShortFormat);
+            case 3:
+                if (role == Qt::ToolTipRole)
+                    return locale.timeFormat(QLocale::LongFormat);
+                return locale.toString(data.toTime(), QLocale::LongFormat);
+            case 4:
+                if (role == Qt::ToolTipRole)
+                    return locale.timeFormat(QLocale::ShortFormat);
+                return locale.toString(data.toTime(), QLocale::ShortFormat);
+            case 5:
+                if (role == Qt::ToolTipRole)
+                    return QVariant();
+                return locale.name();
+            default:
+                break;
+        }
+    }
+
+    return QVariant();
+}
+
+QVariant LocaleModel::headerData(int section, Qt::Orientation orientation, int role) const
+{
+    if (role != Qt::DisplayRole)
+        return QVariant();
+
+    if (orientation == Qt::Horizontal) {
+        switch (section) {
+            case 0:
+                return QLatin1String("Double");
+            case 1:
+                return QLatin1String("Long Date");
+            case 2:
+                return QLatin1String("Short Date");
+            case 3:
+                return QLatin1String("Long Time");
+            case 4:
+                return QLatin1String("Short Time");
+            case 5:
+                return QLatin1String("Name");
+            default:
+                break;
+        }
+    } else {
+        if (section >= g_locale_list_count + 2)
+            return QVariant();
+        if (section == 0) {
+            return QLatin1String("Input");
+        } else if (section == 1) {
+            return QLatin1String("System");
+        } else {
+            LocaleListItem item = g_locale_list[section - 2];
+            return QLocale::languageToString((QLocale::Language)item.language)
+                    + QLatin1Char('/')
+                    + QLocale::countryToString((QLocale::Country)item.country);
+        }
+    }
+
+    return QVariant();
+}
+
+QModelIndex LocaleModel::index(int row, int column,
+                    const QModelIndex &parent) const
+{
+    if (parent.isValid()
+        || row >= g_locale_list_count + 2
+        || column >= g_model_cols)
+        return QModelIndex();
+
+    return createIndex(row, column);
+}
+
+QModelIndex LocaleModel::parent(const QModelIndex&) const
+{
+    return QModelIndex();
+}
+
+int LocaleModel::columnCount(const QModelIndex&) const
+{
+    return g_model_cols;
+}
+
+int LocaleModel::rowCount(const QModelIndex &parent) const
+{
+    if (parent.isValid())
+        return 0;
+    return g_locale_list_count + 2;
+}
+
+Qt::ItemFlags LocaleModel::flags(const QModelIndex &index) const
+{
+    if (!index.isValid())
+        return 0;
+    if (index.row() == 0 && index.column() == g_model_cols - 1)
+        return 0;
+    if (index.row() == 0)
+        return QAbstractItemModel::flags(index) | Qt::ItemIsEditable;
+    return QAbstractItemModel::flags(index);
+}
+
+bool LocaleModel::setData(const QModelIndex &index, const QVariant &value, int role)
+{
+    if (!index.isValid()
+        || index.row() != 0
+        || index.column() >= g_model_cols - 1
+        || role != Qt::EditRole
+        || m_data_list.at(index.column()).type() != value.type())
+        return false;
+
+    m_data_list[index.column()] = value;
+    emit dataChanged(createIndex(1, index.column()),
+            createIndex(g_locale_list_count, index.column()));
+
+    return true;
+}
diff --git a/util/locale_database/testlocales/localemodel.h b/util/locale_database/testlocales/localemodel.h
new file mode 100644
index 0000000000..b24fc5f4c6
--- /dev/null
+++ b/util/locale_database/testlocales/localemodel.h
@@ -0,0 +1,56 @@
+/****************************************************************************
+**
+** Copyright (C) 2016 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:GPL-EXCEPT$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3 as published by the Free Software
+** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#ifndef LOCALEMODEL_H
+#define LOCALEMODEL_H
+
+#include <QAbstractItemModel>
+#include <QList>
+#include <QVariant>
+
+class LocaleModel : public QAbstractItemModel
+{
+    Q_OBJECT
+public:
+    LocaleModel(QObject *parent = 0);
+
+    virtual int columnCount(const QModelIndex &parent = QModelIndex()) const;
+    virtual QVariant data(const QModelIndex &index, int role = Qt::DisplayRole) const;
+    virtual QModelIndex index(int row, int column,
+                                const QModelIndex &parent = QModelIndex()) const;
+    virtual QModelIndex parent(const QModelIndex &index) const;
+    virtual int rowCount(const QModelIndex &parent = QModelIndex()) const;
+    virtual QVariant headerData(int section, Qt::Orientation orientation,
+                                int role = Qt::DisplayRole ) const;
+    virtual Qt::ItemFlags flags(const QModelIndex &index) const;
+    virtual bool setData(const QModelIndex &index, const QVariant &value,
+                            int role = Qt::EditRole);
+private:
+    QList<QVariant> m_data_list;
+};
+
+#endif // LOCALEMODEL_H
diff --git a/util/locale_database/testlocales/localewidget.cpp b/util/locale_database/testlocales/localewidget.cpp
new file mode 100644
index 0000000000..3ff7f73a98
--- /dev/null
+++ b/util/locale_database/testlocales/localewidget.cpp
@@ -0,0 +1,76 @@
+/****************************************************************************
+**
+** Copyright (C) 2016 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:GPL-EXCEPT$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3 as published by the Free Software
+** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#include <QTableView>
+#include <QVBoxLayout>
+#include <QItemDelegate>
+#include <QItemEditorFactory>
+#include <QDoubleSpinBox>
+
+#include "localewidget.h"
+#include "localemodel.h"
+
+class DoubleEditorCreator : public QItemEditorCreatorBase
+{
+public:
+    QWidget *createWidget(QWidget *parent) const {
+        QDoubleSpinBox *w = new QDoubleSpinBox(parent);
+        w->setDecimals(4);
+        w->setRange(-10000.0, 10000.0);
+        return w;
+    }
+    virtual QByteArray valuePropertyName() const {
+        return QByteArray("value");
+    }
+};
+
+class EditorFactory : public QItemEditorFactory
+{
+public:
+    EditorFactory() {
+        static DoubleEditorCreator double_editor_creator;
+        registerEditor(QVariant::Double, &double_editor_creator);
+    }
+};
+
+LocaleWidget::LocaleWidget(QWidget *parent)
+    : QWidget(parent)
+{
+    m_model = new LocaleModel(this);
+    m_view = new QTableView(this);
+
+    QItemDelegate *delegate = qobject_cast<QItemDelegate*>(m_view->itemDelegate());
+    Q_ASSERT(delegate != 0);
+    static EditorFactory editor_factory;
+    delegate->setItemEditorFactory(&editor_factory);
+
+    m_view->setModel(m_model);
+
+    QVBoxLayout *layout = new QVBoxLayout(this);
+    layout->setMargin(0);
+    layout->addWidget(m_view);
+}
diff --git a/util/locale_database/testlocales/localewidget.h b/util/locale_database/testlocales/localewidget.h
new file mode 100644
index 0000000000..896a6e5229
--- /dev/null
+++ b/util/locale_database/testlocales/localewidget.h
@@ -0,0 +1,46 @@
+/****************************************************************************
+**
+** Copyright (C) 2016 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:GPL-EXCEPT$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3 as published by the Free Software
+** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#ifndef LOCALEWIDGET_H
+#define LOCALEWIDGET_H
+
+#include <QWidget>
+
+class LocaleModel;
+class QTableView;
+
+class LocaleWidget : public QWidget
+{
+    Q_OBJECT
+public:
+    LocaleWidget(QWidget *parent = 0);
+private:
+    LocaleModel *m_model;
+    QTableView *m_view;
+};
+
+#endif // LOCALEWIDGET_H
diff --git a/util/locale_database/testlocales/main.cpp b/util/locale_database/testlocales/main.cpp
new file mode 100644
index 0000000000..0c3c45f989
--- /dev/null
+++ b/util/locale_database/testlocales/main.cpp
@@ -0,0 +1,38 @@
+/****************************************************************************
+**
+** Copyright (C) 2016 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:GPL-EXCEPT$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3 as published by the Free Software
+** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#include <QApplication>
+
+#include "localewidget.h"
+
+int main(int argc, char *argv[])
+{
+    QApplication app(argc, argv);
+    LocaleWidget wgt;
+    wgt.show();
+    return app.exec();
+}
diff --git a/util/locale_database/testlocales/testlocales.pro b/util/locale_database/testlocales/testlocales.pro
new file mode 100644
index 0000000000..a9a6247f96
--- /dev/null
+++ b/util/locale_database/testlocales/testlocales.pro
@@ -0,0 +1,4 @@
+TARGET = testlocales
+CONFIG += debug
+SOURCES += localemodel.cpp  localewidget.cpp  main.cpp
+HEADERS += localemodel.h  localewidget.h
+\ No newline at end of file
diff --git a/util/locale_database/xpathlite.py b/util/locale_database/xpathlite.py
new file mode 100644
index 0000000000..218135d7a7
--- /dev/null
+++ b/util/locale_database/xpathlite.py
@@ -0,0 +1,264 @@
+#!/usr/bin/env python
+#############################################################################
+##
+## Copyright (C) 2016 The Qt Company Ltd.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the test suite of the Qt Toolkit.
+##
+## $QT_BEGIN_LICENSE:GPL-EXCEPT$
+## Commercial License Usage
+## Licensees holding valid commercial Qt licenses may use this file in
+## accordance with the commercial license agreement provided with the
+## Software or, alternatively, in accordance with the terms contained in
+## a written agreement between you and The Qt Company. For licensing terms
+## and conditions see https://www.qt.io/terms-conditions. For further
+## information use the contact form at https://www.qt.io/contact-us.
+##
+## GNU General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU
+## General Public License version 3 as published by the Free Software
+## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+## included in the packaging of this file. Please review the following
+## information to ensure the GNU General Public License requirements will
+## be met: https://www.gnu.org/licenses/gpl-3.0.html.
+##
+## $QT_END_LICENSE$
+##
+#############################################################################
+
+import sys
+import os
+import xml.dom.minidom
+
+class DraftResolution:
+    # See http://www.unicode.org/cldr/process.html for description
+    unconfirmed = 'unconfirmed'
+    provisional = 'provisional'
+    contributed = 'contributed'
+    approved = 'approved'
+    _values = { unconfirmed : 1, provisional : 2, contributed : 3, approved : 4 }
+    def __init__(self, resolution):
+        self.resolution = resolution
+    def toInt(self):
+        return DraftResolution._values[self.resolution]
+
+class Error:
+    def __init__(self, msg):
+        self.msg = msg
+    def __str__(self):
+        return self.msg
+
+doc_cache = {}
+def parseDoc(file):
+    if not doc_cache.has_key(file):
+        doc_cache[file] = xml.dom.minidom.parse(file)
+    return doc_cache[file]
+
+def findChild(parent, tag_name, arg_name=None, arg_value=None, draft=None):
+    for node in parent.childNodes:
+        if node.nodeType != node.ELEMENT_NODE:
+            continue
+        if node.nodeName != tag_name:
+            continue
+        if arg_value:
+            if not node.attributes.has_key(arg_name):
+                continue
+            if node.attributes[arg_name].nodeValue != arg_value:
+                continue
+        if draft:
+            if not node.attributes.has_key('draft'):
+                # if draft is not specified then it's approved
+                return node
+            value = node.attributes['draft'].nodeValue
+            value = DraftResolution(value).toInt()
+            exemplar = DraftResolution(draft).toInt()
+            if exemplar > value:
+                continue
+        return node
+    return False
+
+def findTagsInFile(file, path):
+    doc = parseDoc(file)
+
+    elt = doc.documentElement
+    tag_spec_list = path.split("/")
+    last_entry = None
+    for i in range(len(tag_spec_list)):
+        tag_spec = tag_spec_list[i]
+        tag_name = tag_spec
+        arg_name = 'type'
+        arg_value = ''
+        left_bracket = tag_spec.find('[')
+        if left_bracket != -1:
+            tag_name = tag_spec[:left_bracket]
+            arg_value = tag_spec[left_bracket+1:-1].split("=")
+            if len(arg_value) == 2:
+                arg_name = arg_value[0]
+                arg_value = arg_value[1]
+            else:
+                arg_value = arg_value[0]
+        elt = findChild(elt, tag_name, arg_name, arg_value)
+        if not elt:
+            return None
+    ret = []
+    if elt.childNodes:
+        for node in elt.childNodes:
+            if node.attributes:
+                element = [node.nodeName, None]
+                element[1] = node.attributes.items()
+                ret.append(element)
+    else:
+        if elt.attributes:
+            element = [elt.nodeName, None]
+            element[1] = elt.attributes.items()
+            ret.append(element)
+    return ret
+
+def _findEntryInFile(file, path, draft=None, attribute=None):
+    doc = parseDoc(file)
+
+    elt = doc.documentElement
+    tag_spec_list = path.split("/")
+    last_entry = None
+    for i in range(len(tag_spec_list)):
+        tag_spec = tag_spec_list[i]
+        tag_name = tag_spec
+        arg_name = 'type'
+        arg_value = ''
+        left_bracket = tag_spec.find('[')
+        if left_bracket != -1:
+            tag_name = tag_spec[:left_bracket]
+            arg_value = tag_spec[left_bracket+1:-1].split("=")
+            if len(arg_value) == 2:
+                arg_name = arg_value[0].replace("@", "").replace("'", "")
+                arg_value = arg_value[1]
+            else:
+                arg_value = arg_value[0]
+        alias = findChild(elt, 'alias')
+        if alias and alias.attributes['source'].nodeValue == 'locale':
+            path = alias.attributes['path'].nodeValue
+            aliaspath = tag_spec_list[:i] + path.split("/")
+            def resolve(x, y):
+                if y == '..':
+                    return x[:-1]
+                return x + [y]
+            # resolve all dot-dot parts of the path
+            aliaspath = reduce(resolve, aliaspath, [])
+            # remove attribute specification that our xpathlite doesnt support
+            aliaspath = map(lambda x: x.replace("@type=", "").replace("'", ""), aliaspath)
+            # append the remaining path
+            aliaspath = aliaspath + tag_spec_list[i:]
+            aliaspath = "/".join(aliaspath)
+            # "locale" aliases are special - we need to start lookup from scratch
+            return (None, aliaspath)
+        elt = findChild(elt, tag_name, arg_name, arg_value, draft)
+        if not elt:
+            return ("", None)
+    if attribute is not None:
+        if elt.attributes.has_key(attribute):
+            return (elt.attributes[attribute].nodeValue, None)
+        return (None, None)
+    try:
+        return (elt.firstChild.nodeValue, None)
+    except:
+        pass
+    return (None, None)
+
+def findAlias(file):
+    doc = parseDoc(file)
+
+    alias_elt = findChild(doc.documentElement, "alias")
+    if not alias_elt:
+        return False
+    if not alias_elt.attributes.has_key('source'):
+        return False
+    return alias_elt.attributes['source'].nodeValue
+
+lookup_chain_cache = {}
+parent_locales = {}
+def _fixedLookupChain(dirname, name):
+    if lookup_chain_cache.has_key(name):
+        return lookup_chain_cache[name]
+
+    # see http://www.unicode.org/reports/tr35/#Parent_Locales
+    if not parent_locales:
+        for ns in findTagsInFile(dirname + "/../supplemental/supplementalData.xml", "parentLocales"):
+            tmp = {}
+            parent_locale = ""
+            for data in ns[1:][0]: # ns looks like this: [u'parentLocale', [(u'parent', u'root'), (u'locales', u'az_Cyrl bs_Cyrl en_Dsrt ..')]]
+                tmp[data[0]] = data[1]
+                if data[0] == u"parent":
+                    parent_locale = data[1]
+            parent_locales[parent_locale] = tmp[u"locales"].split(" ")
+
+    items = name.split("_")
+    # split locale name into items and iterate through them from back to front
+    # example: az_Latn_AZ => [az_Latn_AZ, az_Latn, az]
+    items = list(reversed(map(lambda x: "_".join(items[:x+1]), range(len(items)))))
+
+    for i in range(len(items)):
+        item = items[i]
+        for parent_locale in parent_locales.keys():
+            for locale in parent_locales[parent_locale]:
+                if item == locale:
+                    if parent_locale == u"root":
+                        items = items[:i+1]
+                    else:
+                        items = items[:i+1] + _fixedLookupChain(dirname, parent_locale)
+                    lookup_chain_cache[name] = items
+                    return items
+
+    lookup_chain_cache[name] = items
+    return items
+
+def _findEntry(base, path, draft=None, attribute=None):
+    if base.endswith(".xml"):
+        base = base[:-4]
+    (dirname, filename) = os.path.split(base)
+
+    items = _fixedLookupChain(dirname, filename)
+    for item in items:
+        file = dirname + "/" + item + ".xml"
+        if os.path.isfile(file):
+            alias = findAlias(file)
+            if alias:
+                # if alias is found we should follow it and stop processing current file
+                # see http://www.unicode.org/reports/tr35/#Common_Elements
+                aliasfile = os.path.dirname(file) + "/" + alias + ".xml"
+                if not os.path.isfile(aliasfile):
+                    raise Error("findEntry: fatal error: found an alias '%s' to '%s', but the alias file couldn't be found" % (filename, alias))
+                # found an alias, recurse into parsing it
+                result = _findEntry(aliasfile, path, draft, attribute)
+                return result
+            (result, aliaspath) = _findEntryInFile(file, path, draft, attribute)
+            if aliaspath:
+                # start lookup again because of the alias source="locale"
+                return _findEntry(base, aliaspath, draft, attribute)
+            if result:
+                return result
+    return None
+
+def findEntry(base, path, draft=None, attribute=None):
+    file = base
+    if base.endswith(".xml"):
+        file = base
+        base = base[:-4]
+    else:
+        file = base + ".xml"
+    (dirname, filename) = os.path.split(base)
+
+    result = None
+    while path:
+        result = _findEntry(base, path, draft, attribute)
+        if result:
+            return result
+        (result, aliaspath) = _findEntryInFile(dirname + "/root.xml", path, draft, attribute)
+        if result:
+            return result
+        if not aliaspath:
+            raise Error("findEntry: fatal error: %s: cannot find key %s" % (filename, path))
+        path = aliaspath
+
+    return result
+
author	Edward Welbourne <edward.welbourne@qt.io>	2019-05-16 14:31:54 +0200
committer	Edward Welbourne <edward.welbourne@qt.io>	2019-05-20 20:42:10 +0200
commit	248b6756da0d31c58672c0e356c3ec16e9088234 (patch)
tree	4869c1ca3d1415a9b259f4afbc61a702fee6812b /util/locale_database
parent	cf909f0ef609c4581ebbe2f81c7ae0c5e43d653f (diff)