summaryrefslogtreecommitdiffstats
path: root/util
diff options
context:
space:
mode:
Diffstat (limited to 'util')
-rwxr-xr-xutil/edid/qedidvendortable.py123
-rwxr-xr-xutil/local_database/cldr2qlocalexml.py638
-rwxr-xr-x[-rw-r--r--]util/local_database/cldr2qtimezone.py74
-rw-r--r--util/local_database/enumdata.py2
-rw-r--r--util/local_database/localexml.py239
-rwxr-xr-xutil/local_database/qlocalexml2cpp.py542
6 files changed, 819 insertions, 799 deletions
diff --git a/util/edid/qedidvendortable.py b/util/edid/qedidvendortable.py
new file mode 100755
index 0000000000..6d30f3a60d
--- /dev/null
+++ b/util/edid/qedidvendortable.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+#############################################################################
+##
+## Copyright (C) 2017 Pier Luigi Fiorini <pierluigi.fiorini@gmail.com>
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the plugins of the Qt Toolkit.
+##
+## $QT_BEGIN_LICENSE:GPL-EXCEPT$
+## Commercial License Usage
+## Licensees holding valid commercial Qt licenses may use this file in
+## accordance with the commercial license agreement provided with the
+## Software or, alternatively, in accordance with the terms contained in
+## a written agreement between you and The Qt Company. For licensing terms
+## and conditions see https://www.qt.io/terms-conditions. For further
+## information use the contact form at https://www.qt.io/contact-us.
+##
+## GNU General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU
+## General Public License version 3 as published by the Free Software
+## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+## included in the packaging of this file. Please review the following
+## information to ensure the GNU General Public License requirements will
+## be met: https://www.gnu.org/licenses/gpl-3.0.html.
+##
+## $QT_END_LICENSE$
+##
+#############################################################################
+
+import urllib.request
+
+url = 'https://git.fedorahosted.org/cgit/hwdata.git/plain/pnp.ids'
+
+copyright = """/****************************************************************************
+**
+** Copyright (C) 2017 Pier Luigi Fiorini <pierluigi.fiorini@gmail.com>
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the plugins of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+"""
+
+notice = """/*
+ * This lookup table was generated from {}
+ *
+ * Do not change directly this file, instead edit the
+ * qtbase/util/edid/qedidvendortable.py script and regenerate this file.
+ */""".format(url)
+
+header = """
+#ifndef QEDIDVENDORTABLE_P_H
+#define QEDIDVENDORTABLE_P_H
+
+QT_BEGIN_NAMESPACE
+
+typedef struct VendorTable {
+ const char id[4];
+ const char name[%d];
+} VendorTable;
+
+static const struct VendorTable q_edidVendorTable[] = {"""
+
+footer = """};
+
+QT_END_NAMESPACE
+
+#endif // QEDIDVENDORTABLE_P_H"""
+
+vendors = {}
+
+max_vendor_length = 0
+
+response = urllib.request.urlopen(url)
+data = response.read().decode('utf-8')
+for line in data.split('\n'):
+ l = line.split()
+ if line.startswith('#'):
+ continue
+ elif len(l) == 0:
+ continue
+ else:
+ pnp_id = l[0].upper()
+ vendors[pnp_id] = ' '.join(l[1:])
+ if len(vendors[pnp_id]) > max_vendor_length:
+ max_vendor_length = len(vendors[pnp_id])
+
+print(copyright)
+print(notice)
+print(header % (max_vendor_length + 1))
+for pnp_id in vendors.keys():
+ print(' { "%s", "%s" },' % (pnp_id, vendors[pnp_id]))
+print(footer)
diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py
index 41cfafab0d..58ea21edab 100755
--- a/util/local_database/cldr2qlocalexml.py
+++ b/util/local_database/cldr2qlocalexml.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
#############################################################################
##
-## Copyright (C) 2016 The Qt Company Ltd.
+## Copyright (C) 2017 The Qt Company Ltd.
## Contact: https://www.qt.io/licensing/
##
## This file is part of the test suite of the Qt Toolkit.
@@ -26,20 +26,31 @@
## $QT_END_LICENSE$
##
#############################################################################
+"""Convert CLDR data to qLocaleXML
+
+The CLDR data can be downloaded from CLDR_, which has a sub-directory
+for each version; you need the ``core.zip`` file for your version of
+choice (typically the latest). This script has had updates to cope up
+to v29; for later versions, we may need adaptations. Unpack the
+downloaded ``core.zip`` and check it has a common/main/ sub-directory:
+pass the path of that sub-directory to this script as its single
+command-line argument. Save its standard output (but not error) to a
+file for later processing by ``./qlocalexml2cpp.py``
+
+.. _CLDR: ftp://unicode.org/Public/cldr/
+"""
import os
import sys
+import re
+
import enumdata
import xpathlite
-from xpathlite import DraftResolution
+from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile
from dateconverter import convert_date
-from xml.sax.saxutils import escape, unescape
-import re
+from localexml import Locale
-findAlias = xpathlite.findAlias
-findEntry = xpathlite.findEntry
findEntryInFile = xpathlite._findEntryInFile
-findTagsInFile = xpathlite.findTagsInFile
def parse_number_format(patterns, data):
# this is a very limited parsing of the number format for currency only.
@@ -72,42 +83,49 @@ def parse_number_format(patterns, data):
return result
def parse_list_pattern_part_format(pattern):
- # this is a very limited parsing of the format for list pattern part only.
- result = ""
- result = pattern.replace("{0}", "%1")
- result = result.replace("{1}", "%2")
- result = result.replace("{2}", "%3")
- return result
-
-def ordStr(c):
- if len(c) == 1:
- return str(ord(c))
- raise xpathlite.Error("Unable to handle value \"%s\"" % addEscapes(c))
- return "##########"
-
-# the following functions are supposed to fix the problem with QLocale
-# returning a character instead of strings for QLocale::exponential()
-# and others. So we fallback to default values in these cases.
-def fixOrdStrMinus(c):
- if len(c) == 1:
- return str(ord(c))
- return str(ord('-'))
-def fixOrdStrPlus(c):
- if len(c) == 1:
- return str(ord(c))
- return str(ord('+'))
-def fixOrdStrExp(c):
- if len(c) == 1:
- return str(ord(c))
- return str(ord('e'))
-def fixOrdStrPercent(c):
- if len(c) == 1:
- return str(ord(c))
- return str(ord('%'))
-def fixOrdStrList(c):
- if len(c) == 1:
- return str(ord(c))
- return str(ord(';'))
+ # This is a very limited parsing of the format for list pattern part only.
+ return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3")
+
+def unit_quantifiers(find, path, stem, suffix, known,
+ # Stop at exa/exbi: 16 exbi = 2^{64} < zetta =
+ # 1000^7 < zebi = 2^{70}, the next quantifiers up:
+ si_quantifiers = ('kilo', 'mega', 'giga', 'tera', 'peta', 'exa')):
+ """Work out the unit quantifiers.
+
+ Unfortunately, the CLDR data only go up to terabytes and we want
+ all the way to exabytes; but we can recognize the SI quantifiers
+ as prefixes, strip and identify the tail as the localized
+ translation for 'B' (e.g. French has 'octet' for 'byte' and uses
+ ko, Mo, Go, To from which we can extrapolate Po, Eo).
+
+ Should be called first for the SI quantifiers, with suffix = 'B',
+ then for the IEC ones, with suffix = 'iB'; the list known
+ (initially empty before first call) is used to let the second call
+ know what the first learned about the localized unit.
+ """
+ if suffix == 'B': # first call, known = []
+ tail = suffix
+ for q in si_quantifiers:
+ it = find(path, stem % q)
+ # kB for kilobyte, in contrast with KiB for IEC:
+ q = q[0] if q == 'kilo' else q[0].upper()
+ if not it:
+ it = q + tail
+ elif it.startswith(q):
+ rest = it[1:]
+ tail = rest if all(rest == k for k in known) else suffix
+ known.append(rest)
+ yield it
+ else: # second call, re-using first's known
+ assert suffix == 'iB'
+ if known:
+ byte = known.pop()
+ if all(byte == k for k in known):
+ suffix = 'i' + byte
+ for q in si_quantifiers:
+ yield find(path, stem % q[:2],
+ # Those don't (yet, v31) exist in CLDR, so we always fall back to:
+ q[0].upper() + suffix)
def generateLocaleInfo(path):
if not path.endswith(".xml"):
@@ -116,14 +134,13 @@ def generateLocaleInfo(path):
# skip legacy/compatibility ones
alias = findAlias(path)
if alias:
- raise xpathlite.Error("alias to \"%s\"" % alias)
+ raise xpathlite.Error('alias to "%s"' % alias)
- language_code = findEntryInFile(path, "identity/language", attribute="type")[0]
- country_code = findEntryInFile(path, "identity/territory", attribute="type")[0]
- script_code = findEntryInFile(path, "identity/script", attribute="type")[0]
- variant_code = findEntryInFile(path, "identity/variant", attribute="type")[0]
+ def code(tag):
+ return findEntryInFile(path, 'identity/' + tag, attribute="type")[0]
- return _generateLocaleInfo(path, language_code, script_code, country_code, variant_code)
+ return _generateLocaleInfo(path, code('language'), code('script'),
+ code('territory'), code('variant'))
def _generateLocaleInfo(path, language_code, script_code, country_code, variant_code=""):
if not path.endswith(".xml"):
@@ -137,25 +154,22 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
# ### actually there is only one locale with variant: en_US_POSIX
# does anybody care about it at all?
if variant_code:
- raise xpathlite.Error("we do not support variants (\"%s\")" % variant_code)
+ raise xpathlite.Error('we do not support variants ("%s")' % variant_code)
language_id = enumdata.languageCodeToId(language_code)
if language_id <= 0:
- raise xpathlite.Error("unknown language code \"%s\"" % language_code)
- language = enumdata.language_list[language_id][0]
+ raise xpathlite.Error('unknown language code "%s"' % language_code)
script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1:
- raise xpathlite.Error("unknown script code \"%s\"" % script_code)
- script = enumdata.script_list[script_id][0]
+ raise xpathlite.Error('unknown script code "%s"' % script_code)
# we should handle fully qualified names with the territory
if not country_code:
return {}
country_id = enumdata.countryCodeToId(country_code)
if country_id <= 0:
- raise xpathlite.Error("unknown country code \"%s\"" % country_code)
- country = enumdata.country_list[country_id][0]
+ raise xpathlite.Error('unknown country code "%s"' % country_code)
# So we say we accept only those values that have "contributed" or
# "approved" resolution. see http://www.unicode.org/cldr/process.html
@@ -163,39 +177,39 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
# compatibility.
draft = DraftResolution.contributed
- result = {}
- result['language'] = language
- result['script'] = script
- result['country'] = country
- result['language_code'] = language_code
- result['country_code'] = country_code
- result['script_code'] = script_code
- result['variant_code'] = variant_code
- result['language_id'] = language_id
- result['script_id'] = script_id
- result['country_id'] = country_id
+ result = dict(
+ language=enumdata.language_list[language_id][0],
+ language_code=language_code, language_id=language_id,
+ script=enumdata.script_list[script_id][0],
+ script_code=script_code, script_id=script_id,
+ country=enumdata.country_list[country_id][0],
+ country_code=country_code, country_id=country_id,
+ variant_code=variant_code)
(dir_name, file_name) = os.path.split(path)
- supplementalPath = dir_name + "/../supplemental/supplementalData.xml"
- currencies = findTagsInFile(supplementalPath, "currencyData/region[iso3166=%s]"%country_code);
+ def from_supplement(tag,
+ path=os.path.join(dir_name, '..', 'supplemental',
+ 'supplementalData.xml')):
+ return findTagsInFile(path, tag)
+ currencies = from_supplement('currencyData/region[iso3166=%s]' % country_code)
result['currencyIsoCode'] = ''
result['currencyDigits'] = 2
result['currencyRounding'] = 1
if currencies:
for e in currencies:
if e[0] == 'currency':
- tender = True
- t = filter(lambda x: x[0] == 'tender', e[1])
- if t and t[0][1] == 'false':
- tender = False;
- if tender and not filter(lambda x: x[0] == 'to', e[1]):
- result['currencyIsoCode'] = filter(lambda x: x[0] == 'iso4217', e[1])[0][1]
+ t = [x[1] == 'false' for x in e[1] if x[0] == 'tender']
+ if t and t[0]:
+ pass
+ elif not any(x[0] == 'to' for x in e[1]):
+ result['currencyIsoCode'] = (x[1] for x in e[1] if x[0] == 'iso4217').next()
break
if result['currencyIsoCode']:
- t = findTagsInFile(supplementalPath, "currencyData/fractions/info[iso4217=%s]"%result['currencyIsoCode']);
+ t = from_supplement("currencyData/fractions/info[iso4217=%s]"
+ % result['currencyIsoCode'])
if t and t[0][0] == 'info':
- result['currencyDigits'] = int(filter(lambda x: x[0] == 'digits', t[0][1])[0][1])
- result['currencyRounding'] = int(filter(lambda x: x[0] == 'rounding', t[0][1])[0][1])
+ result['currencyDigits'] = (int(x[1]) for x in t[0][1] if x[0] == 'digits').next()
+ result['currencyRounding'] = (int(x[1]) for x in t[0][1] if x[0] == 'rounding').next()
numbering_system = None
try:
numbering_system = findEntry(path, "numbers/defaultNumberingSystem")
@@ -226,7 +240,9 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
result['percent'] = get_number_in_system(path, "numbers/symbols/percentSign", numbering_system)
try:
numbering_systems = {}
- for ns in findTagsInFile(cldr_dir + "/../supplemental/numberingSystems.xml", "numberingSystems"):
+ for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental',
+ 'numberingSystems.xml'),
+ 'numberingSystems'):
tmp = {}
id = ""
for data in ns[1:][0]: # ns looks like this: [u'numberingSystem', [(u'digits', u'0123456789'), (u'type', u'numeric'), (u'id', u'latn')]]
@@ -279,167 +295,70 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
result['currencyDisplayName'] = ''
if result['currencyIsoCode']:
result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode'])
- display_name_path = "numbers/currencies/currency[%s]/displayName" % result['currencyIsoCode']
- result['currencyDisplayName'] \
- = findEntryDef(path, display_name_path) + ";" \
- + findEntryDef(path, display_name_path + "[count=zero]") + ";" \
- + findEntryDef(path, display_name_path + "[count=one]") + ";" \
- + findEntryDef(path, display_name_path + "[count=two]") + ";" \
- + findEntryDef(path, display_name_path + "[count=few]") + ";" \
- + findEntryDef(path, display_name_path + "[count=many]") + ";" \
- + findEntryDef(path, display_name_path + "[count=other]") + ";"
-
- standalone_long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[wide]/month"
- result['standaloneLongMonths'] \
- = findEntry(path, standalone_long_month_path + "[1]") + ";" \
- + findEntry(path, standalone_long_month_path + "[2]") + ";" \
- + findEntry(path, standalone_long_month_path + "[3]") + ";" \
- + findEntry(path, standalone_long_month_path + "[4]") + ";" \
- + findEntry(path, standalone_long_month_path + "[5]") + ";" \
- + findEntry(path, standalone_long_month_path + "[6]") + ";" \
- + findEntry(path, standalone_long_month_path + "[7]") + ";" \
- + findEntry(path, standalone_long_month_path + "[8]") + ";" \
- + findEntry(path, standalone_long_month_path + "[9]") + ";" \
- + findEntry(path, standalone_long_month_path + "[10]") + ";" \
- + findEntry(path, standalone_long_month_path + "[11]") + ";" \
- + findEntry(path, standalone_long_month_path + "[12]") + ";"
-
- standalone_short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[abbreviated]/month"
- result['standaloneShortMonths'] \
- = findEntry(path, standalone_short_month_path + "[1]") + ";" \
- + findEntry(path, standalone_short_month_path + "[2]") + ";" \
- + findEntry(path, standalone_short_month_path + "[3]") + ";" \
- + findEntry(path, standalone_short_month_path + "[4]") + ";" \
- + findEntry(path, standalone_short_month_path + "[5]") + ";" \
- + findEntry(path, standalone_short_month_path + "[6]") + ";" \
- + findEntry(path, standalone_short_month_path + "[7]") + ";" \
- + findEntry(path, standalone_short_month_path + "[8]") + ";" \
- + findEntry(path, standalone_short_month_path + "[9]") + ";" \
- + findEntry(path, standalone_short_month_path + "[10]") + ";" \
- + findEntry(path, standalone_short_month_path + "[11]") + ";" \
- + findEntry(path, standalone_short_month_path + "[12]") + ";"
-
- standalone_narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[stand-alone]/monthWidth[narrow]/month"
- result['standaloneNarrowMonths'] \
- = findEntry(path, standalone_narrow_month_path + "[1]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[2]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[3]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[4]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[5]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[6]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[7]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[8]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[9]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[10]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[11]") + ";" \
- + findEntry(path, standalone_narrow_month_path + "[12]") + ";"
-
- long_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[wide]/month"
- result['longMonths'] \
- = findEntry(path, long_month_path + "[1]") + ";" \
- + findEntry(path, long_month_path + "[2]") + ";" \
- + findEntry(path, long_month_path + "[3]") + ";" \
- + findEntry(path, long_month_path + "[4]") + ";" \
- + findEntry(path, long_month_path + "[5]") + ";" \
- + findEntry(path, long_month_path + "[6]") + ";" \
- + findEntry(path, long_month_path + "[7]") + ";" \
- + findEntry(path, long_month_path + "[8]") + ";" \
- + findEntry(path, long_month_path + "[9]") + ";" \
- + findEntry(path, long_month_path + "[10]") + ";" \
- + findEntry(path, long_month_path + "[11]") + ";" \
- + findEntry(path, long_month_path + "[12]") + ";"
-
- short_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[abbreviated]/month"
- result['shortMonths'] \
- = findEntry(path, short_month_path + "[1]") + ";" \
- + findEntry(path, short_month_path + "[2]") + ";" \
- + findEntry(path, short_month_path + "[3]") + ";" \
- + findEntry(path, short_month_path + "[4]") + ";" \
- + findEntry(path, short_month_path + "[5]") + ";" \
- + findEntry(path, short_month_path + "[6]") + ";" \
- + findEntry(path, short_month_path + "[7]") + ";" \
- + findEntry(path, short_month_path + "[8]") + ";" \
- + findEntry(path, short_month_path + "[9]") + ";" \
- + findEntry(path, short_month_path + "[10]") + ";" \
- + findEntry(path, short_month_path + "[11]") + ";" \
- + findEntry(path, short_month_path + "[12]") + ";"
-
- narrow_month_path = "dates/calendars/calendar[gregorian]/months/monthContext[format]/monthWidth[narrow]/month"
- result['narrowMonths'] \
- = findEntry(path, narrow_month_path + "[1]") + ";" \
- + findEntry(path, narrow_month_path + "[2]") + ";" \
- + findEntry(path, narrow_month_path + "[3]") + ";" \
- + findEntry(path, narrow_month_path + "[4]") + ";" \
- + findEntry(path, narrow_month_path + "[5]") + ";" \
- + findEntry(path, narrow_month_path + "[6]") + ";" \
- + findEntry(path, narrow_month_path + "[7]") + ";" \
- + findEntry(path, narrow_month_path + "[8]") + ";" \
- + findEntry(path, narrow_month_path + "[9]") + ";" \
- + findEntry(path, narrow_month_path + "[10]") + ";" \
- + findEntry(path, narrow_month_path + "[11]") + ";" \
- + findEntry(path, narrow_month_path + "[12]") + ";"
-
- long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[wide]/day"
- result['longDays'] \
- = findEntry(path, long_day_path + "[sun]") + ";" \
- + findEntry(path, long_day_path + "[mon]") + ";" \
- + findEntry(path, long_day_path + "[tue]") + ";" \
- + findEntry(path, long_day_path + "[wed]") + ";" \
- + findEntry(path, long_day_path + "[thu]") + ";" \
- + findEntry(path, long_day_path + "[fri]") + ";" \
- + findEntry(path, long_day_path + "[sat]") + ";"
-
- short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[abbreviated]/day"
- result['shortDays'] \
- = findEntry(path, short_day_path + "[sun]") + ";" \
- + findEntry(path, short_day_path + "[mon]") + ";" \
- + findEntry(path, short_day_path + "[tue]") + ";" \
- + findEntry(path, short_day_path + "[wed]") + ";" \
- + findEntry(path, short_day_path + "[thu]") + ";" \
- + findEntry(path, short_day_path + "[fri]") + ";" \
- + findEntry(path, short_day_path + "[sat]") + ";"
-
- narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[format]/dayWidth[narrow]/day"
- result['narrowDays'] \
- = findEntry(path, narrow_day_path + "[sun]") + ";" \
- + findEntry(path, narrow_day_path + "[mon]") + ";" \
- + findEntry(path, narrow_day_path + "[tue]") + ";" \
- + findEntry(path, narrow_day_path + "[wed]") + ";" \
- + findEntry(path, narrow_day_path + "[thu]") + ";" \
- + findEntry(path, narrow_day_path + "[fri]") + ";" \
- + findEntry(path, narrow_day_path + "[sat]") + ";"
-
- standalone_long_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[wide]/day"
- result['standaloneLongDays'] \
- = findEntry(path, standalone_long_day_path + "[sun]") + ";" \
- + findEntry(path, standalone_long_day_path + "[mon]") + ";" \
- + findEntry(path, standalone_long_day_path + "[tue]") + ";" \
- + findEntry(path, standalone_long_day_path + "[wed]") + ";" \
- + findEntry(path, standalone_long_day_path + "[thu]") + ";" \
- + findEntry(path, standalone_long_day_path + "[fri]") + ";" \
- + findEntry(path, standalone_long_day_path + "[sat]") + ";"
-
- standalone_short_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[abbreviated]/day"
- result['standaloneShortDays'] \
- = findEntry(path, standalone_short_day_path + "[sun]") + ";" \
- + findEntry(path, standalone_short_day_path + "[mon]") + ";" \
- + findEntry(path, standalone_short_day_path + "[tue]") + ";" \
- + findEntry(path, standalone_short_day_path + "[wed]") + ";" \
- + findEntry(path, standalone_short_day_path + "[thu]") + ";" \
- + findEntry(path, standalone_short_day_path + "[fri]") + ";" \
- + findEntry(path, standalone_short_day_path + "[sat]") + ";"
-
- standalone_narrow_day_path = "dates/calendars/calendar[gregorian]/days/dayContext[stand-alone]/dayWidth[narrow]/day"
- result['standaloneNarrowDays'] \
- = findEntry(path, standalone_narrow_day_path + "[sun]") + ";" \
- + findEntry(path, standalone_narrow_day_path + "[mon]") + ";" \
- + findEntry(path, standalone_narrow_day_path + "[tue]") + ";" \
- + findEntry(path, standalone_narrow_day_path + "[wed]") + ";" \
- + findEntry(path, standalone_narrow_day_path + "[thu]") + ";" \
- + findEntry(path, standalone_narrow_day_path + "[fri]") + ";" \
- + findEntry(path, standalone_narrow_day_path + "[sat]") + ";"
-
- return result
+ result['currencyDisplayName'] = ';'.join(
+ findEntryDef(path, 'numbers/currencies/currency[' + result['currencyIsoCode']
+ + ']/displayName' + tail)
+ for tail in ['',] + [
+ '[count=%s]' % x for x in ('zero', 'one', 'two', 'few', 'many', 'other')
+ ]) + ';'
+
+ def findUnitDef(path, stem, fallback=''):
+ # The displayName for a quantified unit in en.xml is kByte
+ # instead of kB (etc.), so prefer any unitPattern provided:
+ for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
+ try:
+ ans = findEntry(path, stem + 'unitPattern[count=%s]' % count)
+ except xpathlite.Error:
+ continue
+
+ # TODO: epxloit count-handling, instead of discarding placeholders
+ if ans.startswith('{0}'):
+ ans = ans[3:].lstrip()
+ if ans:
+ return ans
+
+ return findEntryDef(path, stem + 'displayName', fallback)
+
+ # First without quantifier, then quantified each way:
+ result['byte_unit'] = findEntryDef(
+ path, 'units/unitLength[type=long]/unit[type=digital-byte]/displayName',
+ 'bytes')
+ stem = 'units/unitLength[type=short]/unit[type=digital-%sbyte]/'
+ known = [] # cases where we *do* have a given version:
+ result['byte_si_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem, 'B', known))
+ # IEC 60027-2
+ # http://physics.nist.gov/cuu/Units/binary.html
+ result['byte_iec_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem % '%sbi', 'iB', known))
+
+ # Used for month and day data:
+ namings = (
+ ('standaloneLong', 'stand-alone', 'wide'),
+ ('standaloneShort', 'stand-alone', 'abbreviated'),
+ ('standaloneNarrow', 'stand-alone', 'narrow'),
+ ('long', 'format', 'wide'),
+ ('short', 'format', 'abbreviated'),
+ ('narrow', 'format', 'narrow'),
+ )
+
+ # Month data:
+ for cal in ('gregorian',): # We shall want to add to this
+ stem = 'dates/calendars/calendar[' + cal + ']/months/'
+ for (key, mode, size) in namings:
+ prop = 'monthContext[' + mode + ']/monthWidth[' + size + ']/'
+ result[key + 'Months'] = ';'.join(
+ findEntry(path, stem + prop + "month[%d]" % i)
+ for i in range(1, 13)) + ';'
+
+ # Day data (for Gregorian, at least):
+ stem = 'dates/calendars/calendar[gregorian]/days/'
+ days = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat')
+ for (key, mode, size) in namings:
+ prop = 'dayContext[' + mode + ']/dayWidth[' + size + ']/day'
+ result[key + 'Days'] = ';'.join(
+ findEntry(path, stem + prop + '[' + day + ']')
+ for day in days) + ';'
+
+ return Locale(result)
def addEscapes(s):
result = ''
@@ -463,94 +382,42 @@ def usage():
def integrateWeekData(filePath):
if not filePath.endswith(".xml"):
return {}
- monFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=mon]", attribute="territories")[0].split(" ")
- tueFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=tue]", attribute="territories")[0].split(" ")
- wedFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=wed]", attribute="territories")[0].split(" ")
- thuFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=thu]", attribute="territories")[0].split(" ")
- friFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=fri]", attribute="territories")[0].split(" ")
- satFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sat]", attribute="territories")[0].split(" ")
- sunFirstDayIn = findEntryInFile(filePath, "weekData/firstDay[day=sun]", attribute="territories")[0].split(" ")
-
- monWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=mon]", attribute="territories")[0].split(" ")
- tueWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=tue]", attribute="territories")[0].split(" ")
- wedWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=wed]", attribute="territories")[0].split(" ")
- thuWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=thu]", attribute="territories")[0].split(" ")
- friWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=fri]", attribute="territories")[0].split(" ")
- satWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sat]", attribute="territories")[0].split(" ")
- sunWeekendStart = findEntryInFile(filePath, "weekData/weekendStart[day=sun]", attribute="territories")[0].split(" ")
-
- monWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=mon]", attribute="territories")[0].split(" ")
- tueWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=tue]", attribute="territories")[0].split(" ")
- wedWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=wed]", attribute="territories")[0].split(" ")
- thuWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=thu]", attribute="territories")[0].split(" ")
- friWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=fri]", attribute="territories")[0].split(" ")
- satWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sat]", attribute="territories")[0].split(" ")
- sunWeekendEnd = findEntryInFile(filePath, "weekData/weekendEnd[day=sun]", attribute="territories")[0].split(" ")
+
+ def lookup(key):
+ return findEntryInFile(filePath, key, attribute='territories')[0].split()
+ days = ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun')
firstDayByCountryCode = {}
- for countryCode in monFirstDayIn:
- firstDayByCountryCode[countryCode] = "mon"
- for countryCode in tueFirstDayIn:
- firstDayByCountryCode[countryCode] = "tue"
- for countryCode in wedFirstDayIn:
- firstDayByCountryCode[countryCode] = "wed"
- for countryCode in thuFirstDayIn:
- firstDayByCountryCode[countryCode] = "thu"
- for countryCode in friFirstDayIn:
- firstDayByCountryCode[countryCode] = "fri"
- for countryCode in satFirstDayIn:
- firstDayByCountryCode[countryCode] = "sat"
- for countryCode in sunFirstDayIn:
- firstDayByCountryCode[countryCode] = "sun"
+ for day in days:
+ for countryCode in lookup('weekData/firstDay[day=%s]' % day):
+ firstDayByCountryCode[countryCode] = day
weekendStartByCountryCode = {}
- for countryCode in monWeekendStart:
- weekendStartByCountryCode[countryCode] = "mon"
- for countryCode in tueWeekendStart:
- weekendStartByCountryCode[countryCode] = "tue"
- for countryCode in wedWeekendStart:
- weekendStartByCountryCode[countryCode] = "wed"
- for countryCode in thuWeekendStart:
- weekendStartByCountryCode[countryCode] = "thu"
- for countryCode in friWeekendStart:
- weekendStartByCountryCode[countryCode] = "fri"
- for countryCode in satWeekendStart:
- weekendStartByCountryCode[countryCode] = "sat"
- for countryCode in sunWeekendStart:
- weekendStartByCountryCode[countryCode] = "sun"
+ for day in days:
+ for countryCode in lookup('weekData/weekendStart[day=%s]' % day):
+ weekendStartByCountryCode[countryCode] = day
weekendEndByCountryCode = {}
- for countryCode in monWeekendEnd:
- weekendEndByCountryCode[countryCode] = "mon"
- for countryCode in tueWeekendEnd:
- weekendEndByCountryCode[countryCode] = "tue"
- for countryCode in wedWeekendEnd:
- weekendEndByCountryCode[countryCode] = "wed"
- for countryCode in thuWeekendEnd:
- weekendEndByCountryCode[countryCode] = "thu"
- for countryCode in friWeekendEnd:
- weekendEndByCountryCode[countryCode] = "fri"
- for countryCode in satWeekendEnd:
- weekendEndByCountryCode[countryCode] = "sat"
- for countryCode in sunWeekendEnd:
- weekendEndByCountryCode[countryCode] = "sun"
-
- for (key,locale) in locale_database.iteritems():
- countryCode = locale['country_code']
+ for day in days:
+ for countryCode in lookup('weekData/weekendEnd[day=%s]' % day):
+ weekendEndByCountryCode[countryCode] = day
+
+ for (key, locale) in locale_database.iteritems():
+ countryCode = locale.country_code
if countryCode in firstDayByCountryCode:
- locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode[countryCode]
+ locale.firstDayOfWeek = firstDayByCountryCode[countryCode]
else:
- locale_database[key]['firstDayOfWeek'] = firstDayByCountryCode["001"]
+ locale.firstDayOfWeek = firstDayByCountryCode["001"]
if countryCode in weekendStartByCountryCode:
- locale_database[key]['weekendStart'] = weekendStartByCountryCode[countryCode]
+ locale.weekendStart = weekendStartByCountryCode[countryCode]
else:
- locale_database[key]['weekendStart'] = weekendStartByCountryCode["001"]
+ locale.weekendStart = weekendStartByCountryCode["001"]
if countryCode in weekendEndByCountryCode:
- locale_database[key]['weekendEnd'] = weekendEndByCountryCode[countryCode]
+ locale.weekendEnd = weekendEndByCountryCode[countryCode]
else:
- locale_database[key]['weekendEnd'] = weekendEndByCountryCode["001"]
+ locale.weekendEnd = weekendEndByCountryCode["001"]
if len(sys.argv) != 2:
usage()
@@ -566,7 +433,9 @@ locale_database = {}
# see http://www.unicode.org/reports/tr35/tr35-info.html#Default_Content
defaultContent_locales = {}
-for ns in findTagsInFile(cldr_dir + "/../supplemental/supplementalMetadata.xml", "metadata/defaultContent"):
+for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental',
+ 'supplementalMetadata.xml'),
+ 'metadata/defaultContent'):
for data in ns[1:][0]:
if data[0] == u"locales":
defaultContent_locales = data[1].split()
@@ -579,36 +448,36 @@ for file in defaultContent_locales:
country_code = items[2]
else:
if len(items) != 2:
- sys.stderr.write("skipping defaultContent locale \"" + file + "\"\n")
+ sys.stderr.write('skipping defaultContent locale "' + file + '" [neither lang_script_country nor lang_country]\n')
continue
language_code = items[0]
script_code = ""
country_code = items[1]
if len(country_code) == 4:
- sys.stderr.write("skipping defaultContent locale \"" + file + "\"\n")
+ sys.stderr.write('skipping defaultContent locale "' + file + '" [long country code]\n')
continue
try:
l = _generateLocaleInfo(cldr_dir + "/" + file + ".xml", language_code, script_code, country_code)
if not l:
- sys.stderr.write("skipping defaultContent locale \"" + file + "\"\n")
+ sys.stderr.write('skipping defaultContent locale "' + file + '" [no locale info generated]\n')
continue
except xpathlite.Error as e:
- sys.stderr.write("skipping defaultContent locale \"%s\" (%s)\n" % (file, str(e)))
+ sys.stderr.write('skipping defaultContent locale "%s" (%s)\n' % (file, str(e)))
continue
- locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l
+ locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
for file in cldr_files:
try:
l = generateLocaleInfo(cldr_dir + "/" + file)
if not l:
- sys.stderr.write("skipping file \"" + file + "\"\n")
+ sys.stderr.write('skipping file "' + file + '" [no locale info generated]\n')
continue
except xpathlite.Error as e:
- sys.stderr.write("skipping file \"%s\" (%s)\n" % (file, str(e)))
+ sys.stderr.write('skipping file "%s" (%s)\n' % (file, str(e)))
continue
- locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l
+ locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
integrateWeekData(cldr_dir+"/../supplemental/supplementalData.xml")
locale_keys = locale_database.keys()
@@ -665,7 +534,7 @@ def _parseLocale(l):
if language_code != "und":
language_id = enumdata.languageCodeToId(language_code)
if language_id == -1:
- raise xpathlite.Error("unknown language code \"%s\"" % language_code)
+ raise xpathlite.Error('unknown language code "%s"' % language_code)
language = enumdata.language_list[language_id][0]
if len(items) > 1:
@@ -676,14 +545,14 @@ def _parseLocale(l):
if len(script_code) == 4:
script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1:
- raise xpathlite.Error("unknown script code \"%s\"" % script_code)
+ raise xpathlite.Error('unknown script code "%s"' % script_code)
script = enumdata.script_list[script_id][0]
else:
country_code = script_code
if country_code:
country_id = enumdata.countryCodeToId(country_code)
if country_id == -1:
- raise xpathlite.Error("unknown country code \"%s\"" % country_code)
+ raise xpathlite.Error('unknown country code "%s"' % country_code)
country = enumdata.country_list[country_id][0]
return (language, script, country)
@@ -697,12 +566,12 @@ for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likel
try:
(from_language, from_script, from_country) = _parseLocale(tmp[u"from"])
except xpathlite.Error as e:
- sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e)))
+ sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e)))
continue
try:
(to_language, to_script, to_country) = _parseLocale(tmp[u"to"])
except xpathlite.Error as e:
- sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e)))
+ sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e)))
continue
# substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
if to_country == "AnyCountry" and from_country != to_country:
@@ -725,115 +594,10 @@ for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likel
print " </likelySubtags>"
print " <localeList>"
-print \
-" <locale>\n\
- <language>C</language>\n\
- <languageEndonym></languageEndonym>\n\
- <script>AnyScript</script>\n\
- <country>AnyCountry</country>\n\
- <countryEndonym></countryEndonym>\n\
- <decimal>46</decimal>\n\
- <group>44</group>\n\
- <list>59</list>\n\
- <percent>37</percent>\n\
- <zero>48</zero>\n\
- <minus>45</minus>\n\
- <plus>43</plus>\n\
- <exp>101</exp>\n\
- <quotationStart>\"</quotationStart>\n\
- <quotationEnd>\"</quotationEnd>\n\
- <alternateQuotationStart>\'</alternateQuotationStart>\n\
- <alternateQuotationEnd>\'</alternateQuotationEnd>\n\
- <listPatternPartStart>%1, %2</listPatternPartStart>\n\
- <listPatternPartMiddle>%1, %2</listPatternPartMiddle>\n\
- <listPatternPartEnd>%1, %2</listPatternPartEnd>\n\
- <listPatternPartTwo>%1, %2</listPatternPartTwo>\n\
- <am>AM</am>\n\
- <pm>PM</pm>\n\
- <firstDayOfWeek>mon</firstDayOfWeek>\n\
- <weekendStart>sat</weekendStart>\n\
- <weekendEnd>sun</weekendEnd>\n\
- <longDateFormat>EEEE, d MMMM yyyy</longDateFormat>\n\
- <shortDateFormat>d MMM yyyy</shortDateFormat>\n\
- <longTimeFormat>HH:mm:ss z</longTimeFormat>\n\
- <shortTimeFormat>HH:mm:ss</shortTimeFormat>\n\
- <standaloneLongMonths>January;February;March;April;May;June;July;August;September;October;November;December;</standaloneLongMonths>\n\
- <standaloneShortMonths>Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;</standaloneShortMonths>\n\
- <standaloneNarrowMonths>J;F;M;A;M;J;J;A;S;O;N;D;</standaloneNarrowMonths>\n\
- <longMonths>January;February;March;April;May;June;July;August;September;October;November;December;</longMonths>\n\
- <shortMonths>Jan;Feb;Mar;Apr;May;Jun;Jul;Aug;Sep;Oct;Nov;Dec;</shortMonths>\n\
- <narrowMonths>1;2;3;4;5;6;7;8;9;10;11;12;</narrowMonths>\n\
- <longDays>Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;</longDays>\n\
- <shortDays>Sun;Mon;Tue;Wed;Thu;Fri;Sat;</shortDays>\n\
- <narrowDays>7;1;2;3;4;5;6;</narrowDays>\n\
- <standaloneLongDays>Sunday;Monday;Tuesday;Wednesday;Thursday;Friday;Saturday;</standaloneLongDays>\n\
- <standaloneShortDays>Sun;Mon;Tue;Wed;Thu;Fri;Sat;</standaloneShortDays>\n\
- <standaloneNarrowDays>S;M;T;W;T;F;S;</standaloneNarrowDays>\n\
- <currencyIsoCode></currencyIsoCode>\n\
- <currencySymbol></currencySymbol>\n\
- <currencyDisplayName>;;;;;;;</currencyDisplayName>\n\
- <currencyDigits>2</currencyDigits>\n\
- <currencyRounding>1</currencyRounding>\n\
- <currencyFormat>%1%2</currencyFormat>\n\
- <currencyNegativeFormat></currencyNegativeFormat>\n\
- </locale>"
+Locale.C().toXml()
for key in locale_keys:
- l = locale_database[key]
-
- print " <locale>"
- print " <language>" + l['language'] + "</language>"
- print " <languageEndonym>" + escape(l['language_endonym']).encode('utf-8') + "</languageEndonym>"
- print " <script>" + l['script'] + "</script>"
- print " <country>" + l['country'] + "</country>"
- print " <countryEndonym>" + escape(l['country_endonym']).encode('utf-8') + "</countryEndonym>"
- print " <languagecode>" + l['language_code'] + "</languagecode>"
- print " <scriptcode>" + l['script_code'] + "</scriptcode>"
- print " <countrycode>" + l['country_code'] + "</countrycode>"
- print " <decimal>" + ordStr(l['decimal']) + "</decimal>"
- print " <group>" + ordStr(l['group']) + "</group>"
- print " <list>" + fixOrdStrList(l['list']) + "</list>"
- print " <percent>" + fixOrdStrPercent(l['percent']) + "</percent>"
- print " <zero>" + ordStr(l['zero']) + "</zero>"
- print " <minus>" + fixOrdStrMinus(l['minus']) + "</minus>"
- print " <plus>" + fixOrdStrPlus(l['plus']) + "</plus>"
- print " <exp>" + fixOrdStrExp(l['exp']) + "</exp>"
- print " <quotationStart>" + escape(l['quotationStart']).encode('utf-8') + "</quotationStart>"
- print " <quotationEnd>" + escape(l['quotationEnd']).encode('utf-8') + "</quotationEnd>"
- print " <alternateQuotationStart>" + escape(l['alternateQuotationStart']).encode('utf-8') + "</alternateQuotationStart>"
- print " <alternateQuotationEnd>" + escape(l['alternateQuotationEnd']).encode('utf-8') + "</alternateQuotationEnd>"
- print " <listPatternPartStart>" + escape(l['listPatternPartStart']).encode('utf-8') + "</listPatternPartStart>"
- print " <listPatternPartMiddle>" + escape(l['listPatternPartMiddle']).encode('utf-8') + "</listPatternPartMiddle>"
- print " <listPatternPartEnd>" + escape(l['listPatternPartEnd']).encode('utf-8') + "</listPatternPartEnd>"
- print " <listPatternPartTwo>" + escape(l['listPatternPartTwo']).encode('utf-8') + "</listPatternPartTwo>"
- print " <am>" + escape(l['am']).encode('utf-8') + "</am>"
- print " <pm>" + escape(l['pm']).encode('utf-8') + "</pm>"
- print " <firstDayOfWeek>" + escape(l['firstDayOfWeek']).encode('utf-8') + "</firstDayOfWeek>"
- print " <weekendStart>" + escape(l['weekendStart']).encode('utf-8') + "</weekendStart>"
- print " <weekendEnd>" + escape(l['weekendEnd']).encode('utf-8') + "</weekendEnd>"
- print " <longDateFormat>" + escape(l['longDateFormat']).encode('utf-8') + "</longDateFormat>"
- print " <shortDateFormat>" + escape(l['shortDateFormat']).encode('utf-8') + "</shortDateFormat>"
- print " <longTimeFormat>" + escape(l['longTimeFormat']).encode('utf-8') + "</longTimeFormat>"
- print " <shortTimeFormat>" + escape(l['shortTimeFormat']).encode('utf-8') + "</shortTimeFormat>"
- print " <standaloneLongMonths>" + escape(l['standaloneLongMonths']).encode('utf-8') + "</standaloneLongMonths>"
- print " <standaloneShortMonths>"+ escape(l['standaloneShortMonths']).encode('utf-8') + "</standaloneShortMonths>"
- print " <standaloneNarrowMonths>"+ escape(l['standaloneNarrowMonths']).encode('utf-8') + "</standaloneNarrowMonths>"
- print " <longMonths>" + escape(l['longMonths']).encode('utf-8') + "</longMonths>"
- print " <shortMonths>" + escape(l['shortMonths']).encode('utf-8') + "</shortMonths>"
- print " <narrowMonths>" + escape(l['narrowMonths']).encode('utf-8') + "</narrowMonths>"
- print " <longDays>" + escape(l['longDays']).encode('utf-8') + "</longDays>"
- print " <shortDays>" + escape(l['shortDays']).encode('utf-8') + "</shortDays>"
- print " <narrowDays>" + escape(l['narrowDays']).encode('utf-8') + "</narrowDays>"
- print " <standaloneLongDays>" + escape(l['standaloneLongDays']).encode('utf-8') + "</standaloneLongDays>"
- print " <standaloneShortDays>" + escape(l['standaloneShortDays']).encode('utf-8') + "</standaloneShortDays>"
- print " <standaloneNarrowDays>" + escape(l['standaloneNarrowDays']).encode('utf-8') + "</standaloneNarrowDays>"
- print " <currencyIsoCode>" + escape(l['currencyIsoCode']).encode('utf-8') + "</currencyIsoCode>"
- print " <currencySymbol>" + escape(l['currencySymbol']).encode('utf-8') + "</currencySymbol>"
- print " <currencyDisplayName>" + escape(l['currencyDisplayName']).encode('utf-8') + "</currencyDisplayName>"
- print " <currencyDigits>" + str(l['currencyDigits']) + "</currencyDigits>"
- print " <currencyRounding>" + str(l['currencyRounding']) + "</currencyRounding>"
- print " <currencyFormat>" + escape(l['currencyFormat']).encode('utf-8') + "</currencyFormat>"
- print " <currencyNegativeFormat>" + escape(l['currencyNegativeFormat']).encode('utf-8') + "</currencyNegativeFormat>"
- print " </locale>"
+ locale_database[key].toXml()
+
print " </localeList>"
print "</localeDatabase>"
diff --git a/util/local_database/cldr2qtimezone.py b/util/local_database/cldr2qtimezone.py
index 502ab92fd5..7c10b1dfd2 100644..100755
--- a/util/local_database/cldr2qtimezone.py
+++ b/util/local_database/cldr2qtimezone.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
#############################################################################
##
## Copyright (C) 2016 The Qt Company Ltd.
@@ -26,25 +26,32 @@
## $QT_END_LICENSE$
##
#############################################################################
-
-
-# Script to parse the CLDR supplemental/windowsZones.xml file and encode for use in QTimeZone
-# XML structure is as follows:
-#
-# <supplementalData>
-# <version number="$Revision: 7825 $"/>
-# <generation date="$Date: 2012-10-10 14:45:31 -0700 (Wed, 10 Oct 2012) $"/>
-# <windowsZones>
-# <mapTimezones otherVersion="7dc0101" typeVersion="2012f">
-# <!-- (UTC-08:00) Pacific Time (US & Canada) -->
-# <mapZone other="Pacific Standard Time" territory="001" type="America/Los_Angeles"/>
-# <mapZone other="Pacific Standard Time" territory="CA" type="America/Vancouver America/Dawson America/Whitehorse"/>
-# <mapZone other="Pacific Standard Time" territory="MX" type="America/Tijuana"/>
-# <mapZone other="Pacific Standard Time" territory="US" type="America/Los_Angeles"/>
-# <mapZone other="Pacific Standard Time" territory="ZZ" type="PST8PDT"/>
-# </mapTimezones>
-# </windowsZones>
-# </supplementalData>
+"""Parse CLDR data for QTimeZone use with MS-Windows
+
+Script to parse the CLDR supplemental/windowsZones.xml file and encode
+for use in QTimeZone. See ``./cldr2qlocalexml.py`` for where to get
+the CLDR data. Pass its common/ directory as first parameter to this
+script and the qtbase root directory as second parameter. It shall
+update qtbase's src/corelib/tools/qtimezoneprivate_data_p.h ready for
+use.
+
+The XML structure is as follows:
+
+ <supplementalData>
+ <version number="$Revision: 7825 $"/>
+ <generation date="$Date: 2012-10-10 14:45:31 -0700 (Wed, 10 Oct 2012) $"/>
+ <windowsZones>
+ <mapTimezones otherVersion="7dc0101" typeVersion="2012f">
+ <!-- (UTC-08:00) Pacific Time (US & Canada) -->
+ <mapZone other="Pacific Standard Time" territory="001" type="America/Los_Angeles"/>
+ <mapZone other="Pacific Standard Time" territory="CA" type="America/Vancouver America/Dawson America/Whitehorse"/>
+ <mapZone other="Pacific Standard Time" territory="MX" type="America/Tijuana"/>
+ <mapZone other="Pacific Standard Time" territory="US" type="America/Los_Angeles"/>
+ <mapZone other="Pacific Standard Time" territory="ZZ" type="PST8PDT"/>
+ </mapTimezones>
+ </windowsZones>
+ </supplementalData>
+"""
import os
import sys
@@ -328,15 +335,18 @@ while s and s != GENERATED_BLOCK_START:
# Write out generated block start tag and warning
newTempFile.write(GENERATED_BLOCK_START)
-newTempFile.write("\n\
-/*\n\
- This part of the file was generated on %s from the\n\
- Common Locale Data Repository v%s supplemental/windowsZones.xml file %s\n\
-\n\
- http://www.unicode.org/cldr/\n\
-\n\
- Do not change this data, only generate it using cldr2qtimezone.py.\n\
-*/\n\n" % (str(datetime.date.today()), cldr_version, versionNumber) )
+newTempFile.write("""
+/*
+ This part of the file was generated on %s from the
+ Common Locale Data Repository v%s supplemental/windowsZones.xml file %s
+
+ http://www.unicode.org/cldr/
+
+ Do not edit this code: run cldr2qtimezone.py on updated (or
+ edited) CLDR data; see qtbase/util/local_database/.
+*/
+
+""" % (str(datetime.date.today()), cldr_version, versionNumber) )
windowsIdData = ByteArrayData()
ianaIdData = ByteArrayData()
@@ -346,7 +356,7 @@ newTempFile.write("// Windows ID Key, Country Enum, IANA ID Index\n")
newTempFile.write("static const QZoneData zoneDataTable[] = {\n")
for index in windowsIdDict:
data = windowsIdDict[index]
- newTempFile.write(" { %6d,%6d,%6d }, // %s / %s\n" \
+ newTempFile.write(" { %6d,%6d,%6d }, // %s / %s\n"
% (data['windowsKey'],
data['countryId'],
ianaIdData.append(data['ianaList']),
@@ -361,7 +371,7 @@ print "Done Zone Data"
newTempFile.write("// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n")
newTempFile.write("static const QWindowsData windowsDataTable[] = {\n")
for windowsKey in windowsIdList:
- newTempFile.write(" { %6d,%6d,%6d,%6d }, // %s\n" \
+ newTempFile.write(" { %6d,%6d,%6d,%6d }, // %s\n"
% (windowsKey,
windowsIdData.append(windowsIdList[windowsKey][0]),
ianaIdData.append(defaultDict[windowsKey]),
@@ -377,7 +387,7 @@ newTempFile.write("// IANA ID Index, UTC Offset\n")
newTempFile.write("static const QUtcData utcDataTable[] = {\n")
for index in utcIdList:
data = utcIdList[index]
- newTempFile.write(" { %6d,%6d }, // %s\n" \
+ newTempFile.write(" { %6d,%6d }, // %s\n"
% (ianaIdData.append(data[0]),
data[1],
data[0]))
diff --git a/util/local_database/enumdata.py b/util/local_database/enumdata.py
index aa2adfa52f..2d16e5851d 100644
--- a/util/local_database/enumdata.py
+++ b/util/local_database/enumdata.py
@@ -798,7 +798,7 @@ script_list = {
137 : [ "Newa", "Newa" ],
138 : [ "Osage", "Osge" ],
139 : [ "Tangut", "Tang" ],
- 140 : [ "Han With Bopomofo", "Hanb" ],
+ 140 : [ "Han with Bopomofo", "Hanb" ],
141 : [ "Jamo", "Jamo" ]
}
diff --git a/util/local_database/localexml.py b/util/local_database/localexml.py
new file mode 100644
index 0000000000..a47fa6a5ff
--- /dev/null
+++ b/util/local_database/localexml.py
@@ -0,0 +1,239 @@
+#############################################################################
+##
+## Copyright (C) 2017 The Qt Company Ltd.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the test suite of the Qt Toolkit.
+##
+## $QT_BEGIN_LICENSE:GPL-EXCEPT$
+## Commercial License Usage
+## Licensees holding valid commercial Qt licenses may use this file in
+## accordance with the commercial license agreement provided with the
+## Software or, alternatively, in accordance with the terms contained in
+## a written agreement between you and The Qt Company. For licensing terms
+## and conditions see https://www.qt.io/terms-conditions. For further
+## information use the contact form at https://www.qt.io/contact-us.
+##
+## GNU General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU
+## General Public License version 3 as published by the Free Software
+## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+## included in the packaging of this file. Please review the following
+## information to ensure the GNU General Public License requirements will
+## be met: https://www.gnu.org/licenses/gpl-3.0.html.
+##
+## $QT_END_LICENSE$
+##
+#############################################################################
+"""Shared serialization-scanning code for QLocaleXML format.
+
+The Locale class is written by cldr2qlocalexml.py and read by qlocalexml2cpp.py
+"""
+from xml.sax.saxutils import escape
+
+import xpathlite
+
+# Tools used by Locale:
+def camel(seq):
+ yield seq.next()
+ for word in seq:
+ yield word.capitalize()
+
+def camelCase(words):
+ return ''.join(camel(iter(words)))
+
+def ordStr(c):
+ if len(c) == 1:
+ return str(ord(c))
+ raise xpathlite.Error('Unable to handle value "%s"' % addEscapes(c))
+
+# Fix for a problem with QLocale returning a character instead of
+# strings for QLocale::exponential() and others. So we fallback to
+# default values in these cases.
+def fixOrdStr(c, d):
+ return str(ord(c if len(c) == 1 else d))
+
+def convertFormat(format):
+ result = ""
+ i = 0
+ while i < len(format):
+ if format[i] == "'":
+ result += "'"
+ i += 1
+ while i < len(format) and format[i] != "'":
+ result += format[i]
+ i += 1
+ if i < len(format):
+ result += "'"
+ i += 1
+ else:
+ s = format[i:]
+ if s.startswith("EEEE"):
+ result += "dddd"
+ i += 4
+ elif s.startswith("EEE"):
+ result += "ddd"
+ i += 3
+ elif s.startswith("a"):
+ result += "AP"
+ i += 1
+ elif s.startswith("z"):
+ result += "t"
+ i += 1
+ elif s.startswith("v"):
+ i += 1
+ else:
+ result += format[i]
+ i += 1
+
+ return result
+
+class Locale:
+ # Tool used during class body (see del below), not method:
+ def propsMonthDay(lengths=('long', 'short', 'narrow'), scale=('months', 'days')):
+ for L in lengths:
+ for S in scale:
+ yield camelCase((L, S))
+ yield camelCase(('standalone', L, S))
+
+ # Expected to be numbers, read with int():
+ __asint = ("decimal", "group", "zero",
+ "list", "percent", "minus", "plus", "exp",
+ "currencyDigits", "currencyRounding")
+ # Single character; use the code-point number for each:
+ __asord = ("quotationStart", "quotationEnd",
+ "alternateQuotationStart", "alternateQuotationEnd")
+ # Convert day-name to Qt day-of-week number:
+ __asdow = ("firstDayOfWeek", "weekendStart", "weekendEnd")
+ # Convert from CLDR format-strings to QDateTimeParser ones:
+ __asfmt = ("longDateFormat", "shortDateFormat", "longTimeFormat", "shortTimeFormat")
+ # Just use the raw text:
+ __astxt = ("language", "languageEndonym", "script", "country", "countryEndonym",
+ "listPatternPartStart", "listPatternPartMiddle",
+ "listPatternPartEnd", "listPatternPartTwo", "am", "pm",
+ 'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
+ "currencyIsoCode", "currencySymbol", "currencyDisplayName",
+ "currencyFormat", "currencyNegativeFormat"
+ ) + tuple(propsMonthDay())
+ del propsMonthDay
+
+ # Day-of-Week numbering used by Qt:
+ __qDoW = {"mon": 1, "tue": 2, "wed": 3, "thu": 4, "fri": 5, "sat": 6, "sun": 7}
+
+ @classmethod
+ def fromXmlData(cls, lookup):
+ """Constructor from the contents of XML elements.
+
+ Single parameter, lookup, is called with the names of XML
+ elements that should contain the relevant data, within a CLDR
+ locale element (within a localeList element); these names are
+ used for the attributes of the object constructed. Attribute
+ values are obtained by suitably digesting the returned element
+ texts.\n"""
+ data = {}
+ for k in cls.__asint:
+ data['listDelim' if k == 'list' else k] = int(lookup(k))
+
+ for k in cls.__asord:
+ value = lookup(k)
+ assert len(value) == 1, \
+ (k, value, 'value should be exactly one character')
+ data[k] = ord(value)
+
+ for k in cls.__asdow:
+ data[k] = cls.__qDoW[lookup(k)]
+
+ for k in cls.__asfmt:
+ data[k] = convertFormat(lookup(k))
+
+ for k in cls.__astxt:
+ data[k] = lookup(k)
+
+ return cls(data)
+
+ def toXml(self, indent=' ', tab=' '):
+ print indent + '<locale>'
+ inner = indent + tab
+ get = lambda k: getattr(self, k)
+ for key in ('language', 'script', 'country'):
+ print inner + "<%s>" % key + get(key) + "</%s>" % key
+ print inner + "<%scode>" % key + get(key + '_code') + "</%scode>" % key
+
+ for key in ('decimal', 'group', 'zero'):
+ print inner + "<%s>" % key + ordStr(get(key)) + "</%s>" % key
+ for key, std in (('list', ';'), ('percent', '%'),
+ ('minus', '-'), ('plus', '+'), ('exp', 'e')):
+ print inner + "<%s>" % key + fixOrdStr(get(key), std) + "</%s>" % key
+
+ for key in ('language_endonym', 'country_endonym',
+ 'quotationStart', 'quotationEnd',
+ 'alternateQuotationStart', 'alternateQuotationEnd',
+ 'listPatternPartStart', 'listPatternPartMiddle',
+ 'listPatternPartEnd', 'listPatternPartTwo',
+ 'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
+ 'am', 'pm', 'firstDayOfWeek',
+ 'weekendStart', 'weekendEnd',
+ 'longDateFormat', 'shortDateFormat',
+ 'longTimeFormat', 'shortTimeFormat',
+ 'standaloneLongMonths', 'standaloneShortMonths',
+ 'standaloneNarrowMonths',
+ 'longMonths', 'shortMonths', 'narrowMonths',
+ 'longDays', 'shortDays', 'narrowDays',
+ 'standaloneLongDays', 'standaloneShortDays', 'standaloneNarrowDays',
+ 'currencyIsoCode', 'currencySymbol', 'currencyDisplayName',
+ 'currencyFormat', 'currencyNegativeFormat'):
+ ent = camelCase(key.split('_')) if key.endswith('_endonym') else key
+ print inner + "<%s>%s</%s>" % (ent, escape(get(key)).encode('utf-8'), ent)
+
+ for key in ('currencyDigits', 'currencyRounding'):
+ print inner + "<%s>%d</%s>" % (key, get(key), key)
+
+ print indent + "</locale>"
+
+ def __init__(self, data=None, **kw):
+ if data: self.__dict__.update(data)
+ if kw: self.__dict__.update(kw)
+
+ @classmethod
+ def C(cls,
+ # Empty entries at end to ensure final separator when join()ed:
+ months = ('January', 'February', 'March', 'April', 'May', 'June', 'July',
+ 'August', 'September', 'October', 'November', 'December', ''),
+ days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday',
+ 'Thursday', 'Friday', 'Saturday', ''),
+ quantifiers=('k', 'M', 'G', 'T', 'P', 'E')):
+ """Returns an object representing the C locale."""
+ return cls(language='C', language_code='0', language_endonym='',
+ script='AnyScript', script_code='0',
+ country='AnyCountry', country_code='0', country_endonym='',
+ decimal='.', group=',', list=';', percent='%',
+ zero='0', minus='-', plus='+', exp='e',
+ quotationStart='"', quotationEnd='"',
+ alternateQuotationStart='\'', alternateQuotationEnd='\'',
+ listPatternPartStart='%1, %2',
+ listPatternPartMiddle='%1, %2',
+ listPatternPartEnd='%1, %2',
+ listPatternPartTwo='%1, %2',
+ byte_unit='bytes',
+ byte_si_quantified=';'.join(q + 'B' for q in quantifiers),
+ byte_iec_quantified=';'.join(q.upper() + 'iB' for q in quantifiers),
+ am='AM', pm='PM', firstDayOfWeek='mon',
+ weekendStart='sat', weekendEnd='sun',
+ longDateFormat='EEEE, d MMMM yyyy', shortDateFormat='d MMM yyyy',
+ longTimeFormat='HH:mm:ss z', shortTimeFormat='HH:mm:ss',
+ longMonths=';'.join(months),
+ shortMonths=';'.join(m[:3] for m in months),
+ narrowMonths='1;2;3;4;5;6;7;8;9;10;11;12;',
+ standaloneLongMonths=';'.join(months),
+ standaloneShortMonths=';'.join(m[:3] for m in months),
+ standaloneNarrowMonths=';'.join(m[:1] for m in months),
+ longDays=';'.join(days),
+ shortDays=';'.join(d[:3] for d in days),
+ narrowDays='7;1;2;3;4;5;6;',
+ standaloneLongDays=';'.join(days),
+ standaloneShortDays=';'.join(d[:3] for d in days),
+ standaloneNarrowDays=';'.join(d[:1] for d in days),
+ currencyIsoCode='', currencySymbol='',
+ currencyDisplayName=';' * 7,
+ currencyDigits=2, currencyRounding=1,
+ currencyFormat='%1%2', currencyNegativeFormat='')
diff --git a/util/local_database/qlocalexml2cpp.py b/util/local_database/qlocalexml2cpp.py
index 9fd88c46fe..0f10f8ce2d 100755
--- a/util/local_database/qlocalexml2cpp.py
+++ b/util/local_database/qlocalexml2cpp.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+#!/usr/bin/env python2
#############################################################################
##
-## Copyright (C) 2016 The Qt Company Ltd.
+## Copyright (C) 2017 The Qt Company Ltd.
## Contact: https://www.qt.io/licensing/
##
## This file is part of the test suite of the Qt Toolkit.
@@ -26,6 +26,12 @@
## $QT_END_LICENSE$
##
#############################################################################
+"""Script to generate C++ code from CLDR data in qLocaleXML form
+
+See ``cldr2qlocalexml.py`` for how to generate the qLocaleXML data itself.
+Pass the output file from that as first parameter to this script; pass
+the root of the qtbase check-out as second parameter.
+"""
import os
import sys
@@ -33,6 +39,8 @@ import tempfile
import datetime
import xml.dom.minidom
+from localexml import Locale
+
class Error:
def __init__(self, msg):
self.msg = msg
@@ -41,113 +49,68 @@ class Error:
def wrap_list(lst):
def split(lst, size):
- for i in range(len(lst)/size+1):
- yield lst[i*size:(i+1)*size]
- return ",\n".join(map(lambda x: ", ".join(x), split(lst, 20)))
+ while lst:
+ head, lst = lst[:size], lst[size:]
+ yield head
+ return ",\n".join(", ".join(x) for x in split(lst, 20))
+
+def isNodeNamed(elt, name, TYPE=xml.dom.minidom.Node.ELEMENT_NODE):
+ return elt.nodeType == TYPE and elt.nodeName == name
def firstChildElt(parent, name):
child = parent.firstChild
while child:
- if child.nodeType == parent.ELEMENT_NODE \
- and (not name or child.nodeName == name):
+ if isNodeNamed(child, name):
return child
child = child.nextSibling
- return False
-
-def nextSiblingElt(sibling, name):
- sib = sibling.nextSibling
- while sib:
- if sib.nodeType == sibling.ELEMENT_NODE \
- and (not name or sib.nodeName == name):
- return sib
- sib = sib.nextSibling
- return False
-
-def eltText(elt):
- result = ""
- child = elt.firstChild
- while child:
- if child.nodeType == elt.TEXT_NODE:
- if result:
- result += " "
- result += child.nodeValue
- child = child.nextSibling
- return result
-def loadLanguageMap(doc):
- result = {}
+ raise Error('No %s child found' % name)
- language_list_elt = firstChildElt(doc.documentElement, "languageList")
- language_elt = firstChildElt(language_list_elt, "language")
- while language_elt:
- language_id = int(eltText(firstChildElt(language_elt, "id")))
- language_name = eltText(firstChildElt(language_elt, "name"))
- language_code = eltText(firstChildElt(language_elt, "code"))
- result[language_id] = (language_name, language_code)
- language_elt = nextSiblingElt(language_elt, "language")
+def eachEltInGroup(parent, group, key):
+ try:
+ element = firstChildElt(parent, group).firstChild
+ except Error:
+ element = None
- return result
-
-def loadScriptMap(doc):
- result = {}
+ while element:
+ if isNodeNamed(element, key):
+ yield element
+ element = element.nextSibling
- script_list_elt = firstChildElt(doc.documentElement, "scriptList")
- script_elt = firstChildElt(script_list_elt, "script")
- while script_elt:
- script_id = int(eltText(firstChildElt(script_elt, "id")))
- script_name = eltText(firstChildElt(script_elt, "name"))
- script_code = eltText(firstChildElt(script_elt, "code"))
- result[script_id] = (script_name, script_code)
- script_elt = nextSiblingElt(script_elt, "script")
-
- return result
-
-def loadCountryMap(doc):
- result = {}
+def eltWords(elt):
+ child = elt.firstChild
+ while child:
+ if child.nodeType == elt.TEXT_NODE:
+ yield child.nodeValue
+ child = child.nextSibling
- country_list_elt = firstChildElt(doc.documentElement, "countryList")
- country_elt = firstChildElt(country_list_elt, "country")
- while country_elt:
- country_id = int(eltText(firstChildElt(country_elt, "id")))
- country_name = eltText(firstChildElt(country_elt, "name"))
- country_code = eltText(firstChildElt(country_elt, "code"))
- result[country_id] = (country_name, country_code)
- country_elt = nextSiblingElt(country_elt, "country")
+def firstChildText(elt, key):
+ return ' '.join(eltWords(firstChildElt(elt, key)))
- return result
+def loadMap(doc, category):
+ return dict((int(firstChildText(element, 'id')),
+ (firstChildText(element, 'name'),
+ firstChildText(element, 'code')))
+ for element in eachEltInGroup(doc.documentElement,
+ category + 'List', category))
def loadLikelySubtagsMap(doc):
- result = {}
+ def triplet(element, keys=('language', 'script', 'country')):
+ return tuple(firstChildText(element, key) for key in keys)
- i = 0
- list_elt = firstChildElt(doc.documentElement, "likelySubtags")
- elt = firstChildElt(list_elt, "likelySubtag")
- while elt:
- elt_from = firstChildElt(elt, "from")
- from_language = eltText(firstChildElt(elt_from, "language"));
- from_script = eltText(firstChildElt(elt_from, "script"));
- from_country = eltText(firstChildElt(elt_from, "country"));
-
- elt_to = firstChildElt(elt, "to")
- to_language = eltText(firstChildElt(elt_to, "language"));
- to_script = eltText(firstChildElt(elt_to, "script"));
- to_country = eltText(firstChildElt(elt_to, "country"));
-
- tmp = {}
- tmp["from"] = (from_language, from_script, from_country)
- tmp["to"] = (to_language, to_script, to_country)
- result[i] = tmp;
- i += 1
- elt = nextSiblingElt(elt, "likelySubtag");
- return result
+ return dict((i, {'from': triplet(firstChildElt(elt, "from")),
+ 'to': triplet(firstChildElt(elt, "to"))})
+ for i, elt in enumerate(eachEltInGroup(doc.documentElement,
+ 'likelySubtags', 'likelySubtag')))
def fixedScriptName(name, dupes):
- name = name.replace(" ", "")
+ # Don't .capitalize() as some names are already camel-case (see enumdata.py):
+ name = ''.join(word[0].upper() + word[1:] for word in name.split())
if name[-6:] != "Script":
- name = name + "Script";
+ name = name + "Script"
if name in dupes:
sys.stderr.write("\n\n\nERROR: The script name '%s' is messy" % name)
- sys.exit(1);
+ sys.exit(1)
return name
def fixedCountryName(name, dupes):
@@ -161,8 +124,8 @@ def fixedLanguageName(name, dupes):
return name.replace(" ", "")
def findDupes(country_map, language_map):
- country_set = set([ v[0] for a, v in country_map.iteritems() ])
- language_set = set([ v[0] for a, v in language_map.iteritems() ])
+ country_set = set(v[0] for a, v in country_map.iteritems())
+ language_set = set(v[0] for a, v in language_map.iteritems())
return country_set & language_set
def languageNameToId(name, language_map):
@@ -183,108 +146,11 @@ def countryNameToId(name, country_map):
return key
return -1
-def convertFormat(format):
- result = ""
- i = 0
- while i < len(format):
- if format[i] == "'":
- result += "'"
- i += 1
- while i < len(format) and format[i] != "'":
- result += format[i]
- i += 1
- if i < len(format):
- result += "'"
- i += 1
- else:
- s = format[i:]
- if s.startswith("EEEE"):
- result += "dddd"
- i += 4
- elif s.startswith("EEE"):
- result += "ddd"
- i += 3
- elif s.startswith("a"):
- result += "AP"
- i += 1
- elif s.startswith("z"):
- result += "t"
- i += 1
- elif s.startswith("v"):
- i += 1
- else:
- result += format[i]
- i += 1
-
- return result
-
-def convertToQtDayOfWeek(firstDay):
- qtDayOfWeek = {"mon":1, "tue":2, "wed":3, "thu":4, "fri":5, "sat":6, "sun":7}
- return qtDayOfWeek[firstDay]
-
-def assertSingleChar(string):
- assert len(string) == 1, "This string is not allowed to be longer than 1 character"
- return string
-
-class Locale:
- def __init__(self, elt):
- self.language = eltText(firstChildElt(elt, "language"))
- self.languageEndonym = eltText(firstChildElt(elt, "languageEndonym"))
- self.script = eltText(firstChildElt(elt, "script"))
- self.country = eltText(firstChildElt(elt, "country"))
- self.countryEndonym = eltText(firstChildElt(elt, "countryEndonym"))
- self.decimal = int(eltText(firstChildElt(elt, "decimal")))
- self.group = int(eltText(firstChildElt(elt, "group")))
- self.listDelim = int(eltText(firstChildElt(elt, "list")))
- self.percent = int(eltText(firstChildElt(elt, "percent")))
- self.zero = int(eltText(firstChildElt(elt, "zero")))
- self.minus = int(eltText(firstChildElt(elt, "minus")))
- self.plus = int(eltText(firstChildElt(elt, "plus")))
- self.exp = int(eltText(firstChildElt(elt, "exp")))
- self.quotationStart = ord(assertSingleChar(eltText(firstChildElt(elt, "quotationStart"))))
- self.quotationEnd = ord(assertSingleChar(eltText(firstChildElt(elt, "quotationEnd"))))
- self.alternateQuotationStart = ord(assertSingleChar(eltText(firstChildElt(elt, "alternateQuotationStart"))))
- self.alternateQuotationEnd = ord(assertSingleChar(eltText(firstChildElt(elt, "alternateQuotationEnd"))))
- self.listPatternPartStart = eltText(firstChildElt(elt, "listPatternPartStart"))
- self.listPatternPartMiddle = eltText(firstChildElt(elt, "listPatternPartMiddle"))
- self.listPatternPartEnd = eltText(firstChildElt(elt, "listPatternPartEnd"))
- self.listPatternPartTwo = eltText(firstChildElt(elt, "listPatternPartTwo"))
- self.am = eltText(firstChildElt(elt, "am"))
- self.pm = eltText(firstChildElt(elt, "pm"))
- self.firstDayOfWeek = convertToQtDayOfWeek(eltText(firstChildElt(elt, "firstDayOfWeek")))
- self.weekendStart = convertToQtDayOfWeek(eltText(firstChildElt(elt, "weekendStart")))
- self.weekendEnd = convertToQtDayOfWeek(eltText(firstChildElt(elt, "weekendEnd")))
- self.longDateFormat = convertFormat(eltText(firstChildElt(elt, "longDateFormat")))
- self.shortDateFormat = convertFormat(eltText(firstChildElt(elt, "shortDateFormat")))
- self.longTimeFormat = convertFormat(eltText(firstChildElt(elt, "longTimeFormat")))
- self.shortTimeFormat = convertFormat(eltText(firstChildElt(elt, "shortTimeFormat")))
- self.standaloneLongMonths = eltText(firstChildElt(elt, "standaloneLongMonths"))
- self.standaloneShortMonths = eltText(firstChildElt(elt, "standaloneShortMonths"))
- self.standaloneNarrowMonths = eltText(firstChildElt(elt, "standaloneNarrowMonths"))
- self.longMonths = eltText(firstChildElt(elt, "longMonths"))
- self.shortMonths = eltText(firstChildElt(elt, "shortMonths"))
- self.narrowMonths = eltText(firstChildElt(elt, "narrowMonths"))
- self.standaloneLongDays = eltText(firstChildElt(elt, "standaloneLongDays"))
- self.standaloneShortDays = eltText(firstChildElt(elt, "standaloneShortDays"))
- self.standaloneNarrowDays = eltText(firstChildElt(elt, "standaloneNarrowDays"))
- self.longDays = eltText(firstChildElt(elt, "longDays"))
- self.shortDays = eltText(firstChildElt(elt, "shortDays"))
- self.narrowDays = eltText(firstChildElt(elt, "narrowDays"))
- self.currencyIsoCode = eltText(firstChildElt(elt, "currencyIsoCode"))
- self.currencySymbol = eltText(firstChildElt(elt, "currencySymbol"))
- self.currencyDisplayName = eltText(firstChildElt(elt, "currencyDisplayName"))
- self.currencyDigits = int(eltText(firstChildElt(elt, "currencyDigits")))
- self.currencyRounding = int(eltText(firstChildElt(elt, "currencyRounding")))
- self.currencyFormat = eltText(firstChildElt(elt, "currencyFormat"))
- self.currencyNegativeFormat = eltText(firstChildElt(elt, "currencyNegativeFormat"))
-
def loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map):
result = {}
- locale_list_elt = firstChildElt(doc.documentElement, "localeList")
- locale_elt = firstChildElt(locale_list_elt, "locale")
- while locale_elt:
- locale = Locale(locale_elt)
+ for locale_elt in eachEltInGroup(doc.documentElement, "localeList", "locale"):
+ locale = Locale.fromXmlData(lambda k: firstChildText(locale_elt, k))
language_id = languageNameToId(locale.language, language_map)
if language_id == -1:
sys.stderr.write("Cannot find a language id for '%s'\n" % locale.language)
@@ -318,8 +184,6 @@ def loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map
result[(language_id, script_id, country_id)] = locale
- locale_elt = nextSiblingElt(locale_elt, "locale")
-
return result
def compareLocaleKeys(key1, key2):
@@ -385,9 +249,10 @@ class StringDataToken:
return " %d,%d " % (self.index, self.length)
class StringData:
- def __init__(self):
+ def __init__(self, name):
self.data = []
self.hash = {}
+ self.name = name
def append(self, s):
if s in self.hash:
return self.hash[s]
@@ -436,25 +301,24 @@ def escapedString(s):
line += "\\x%02x" % (ord(c))
need_escape = True
if len(line) > 80:
- result = result + "\n" + "\"" + line + "\""
+ result = result + "\n" + '"' + line + '"'
line = ""
line += "\\0"
- result = result + "\n" + "\"" + line + "\""
+ result = result + "\n" + '"' + line + '"'
if result[0] == "\n":
result = result[1:]
return result
def printEscapedString(s):
- print escapedString(s);
-
+ print escapedString(s)
def currencyIsoCodeData(s):
if s:
- return ",".join(map(lambda x: str(ord(x)), s))
- return "0,0,0"
+ return '{' + ",".join(str(ord(x)) for x in s) + '}'
+ return "{0,0,0}"
def usage():
- print "Usage: qlocalexml2cpp.py <path-to-locale.xml> <path-to-qt-src-tree>"
+ print "Usage: qlocalexml2cpp.py <path-to-locale.xml> <path-to-qtbase-src-tree>"
sys.exit(1)
GENERATED_BLOCK_START = "// GENERATED PART STARTS HERE\n"
@@ -467,13 +331,9 @@ def main():
localexml = sys.argv[1]
qtsrcdir = sys.argv[2]
- if not os.path.exists(qtsrcdir) or not os.path.exists(qtsrcdir):
- usage()
- if not os.path.isfile(qtsrcdir + "/src/corelib/tools/qlocale_data_p.h"):
- usage()
- if not os.path.isfile(qtsrcdir + "/src/corelib/tools/qlocale.h"):
- usage()
- if not os.path.isfile(qtsrcdir + "/src/corelib/tools/qlocale.qdoc"):
+ if not (os.path.isdir(qtsrcdir)
+ and all(os.path.isfile(os.path.join(qtsrcdir, 'src', 'corelib', 'tools', leaf))
+ for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))):
usage()
(data_temp_file, data_temp_file_path) = tempfile.mkstemp("qlocale_data_p", dir=qtsrcdir)
@@ -486,9 +346,9 @@ def main():
data_temp_file.write(GENERATED_BLOCK_START)
doc = xml.dom.minidom.parse(localexml)
- language_map = loadLanguageMap(doc)
- script_map = loadScriptMap(doc)
- country_map = loadCountryMap(doc)
+ language_map = loadMap(doc, 'language')
+ script_map = loadMap(doc, 'script')
+ country_map = loadMap(doc, 'country')
likely_subtags_map = loadLikelySubtagsMap(doc)
default_map = {}
for key in likely_subtags_map.keys():
@@ -498,19 +358,21 @@ def main():
locale_map = loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map)
dupes = findDupes(language_map, country_map)
- cldr_version = eltText(firstChildElt(doc.documentElement, "version"))
+ cldr_version = firstChildText(doc.documentElement, "version")
- data_temp_file.write("\n\
-/*\n\
- This part of the file was generated on %s from the\n\
- Common Locale Data Repository v%s\n\
-\n\
- http://www.unicode.org/cldr/\n\
-\n\
- Do not change it, instead edit CLDR data and regenerate this file using\n\
- cldr2qlocalexml.py and qlocalexml2cpp.py.\n\
-*/\n\n\n\
-" % (str(datetime.date.today()), cldr_version) )
+ data_temp_file.write("""
+/*
+ This part of the file was generated on %s from the
+ Common Locale Data Repository v%s
+
+ http://www.unicode.org/cldr/
+
+ Do not edit this section: instead regenerate it using
+ cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or
+ edited) CLDR data; see qtbase/util/local_database/.
+*/
+
+""" % (str(datetime.date.today()), cldr_version) )
# Likely subtags map
data_temp_file.write("static const QLocaleId likely_subtags[] = {\n")
@@ -574,34 +436,116 @@ def main():
index += count
data_temp_file.write("%6d, // %s\n" % (i, language_map[key][0]))
data_temp_file.write(" 0 // trailing 0\n")
- data_temp_file.write("};\n")
-
- data_temp_file.write("\n")
-
- list_pattern_part_data = StringData()
- date_format_data = StringData()
- time_format_data = StringData()
- months_data = StringData()
- days_data = StringData()
- am_data = StringData()
- pm_data = StringData()
- currency_symbol_data = StringData()
- currency_display_name_data = StringData()
- currency_format_data = StringData()
- endonyms_data = StringData()
+ data_temp_file.write("};\n\n")
+
+ list_pattern_part_data = StringData('list_pattern_part_data')
+ date_format_data = StringData('date_format_data')
+ time_format_data = StringData('time_format_data')
+ months_data = StringData('months_data')
+ days_data = StringData('days_data')
+ am_data = StringData('am_data')
+ pm_data = StringData('pm_data')
+ byte_unit_data = StringData('byte_unit_data')
+ currency_symbol_data = StringData('currency_symbol_data')
+ currency_display_name_data = StringData('currency_display_name_data')
+ currency_format_data = StringData('currency_format_data')
+ endonyms_data = StringData('endonyms_data')
# Locale data
data_temp_file.write("static const QLocaleData locale_data[] = {\n")
- data_temp_file.write("// lang script terr dec group list prcnt zero minus plus exp quotStart quotEnd altQuotStart altQuotEnd lpStart lpMid lpEnd lpTwo sDtFmt lDtFmt sTmFmt lTmFmt ssMonth slMonth sMonth lMonth sDays lDays am,len pm,len\n")
+ # Table headings: keep each label centred in its field, matching line_format:
+ data_temp_file.write(' // '
+ # Width 6 + comma:
+ + ' lang ' # IDs
+ + 'script '
+ + ' terr '
+ + ' dec ' # Numeric punctuation:
+ + ' group '
+ + ' list ' # List delimiter
+ + ' prcnt ' # Arithmetic symbols:
+ + ' zero '
+ + ' minus '
+ + ' plus '
+ + ' exp '
+ # Width 8 + comma - to make space for these wide labels !
+ + ' quotOpn ' # Quotation marks
+ + ' quotEnd '
+ + 'altQtOpn '
+ + 'altQtEnd '
+ # Width 11 + comma:
+ + ' lpStart ' # List pattern
+ + ' lpMid '
+ + ' lpEnd '
+ + ' lpTwo '
+ + ' sDtFmt ' # Date format
+ + ' lDtFmt '
+ + ' sTmFmt ' # Time format
+ + ' lTmFmt '
+ + ' ssMonth ' # Months
+ + ' slMonth '
+ + ' snMonth '
+ + ' sMonth '
+ + ' lMonth '
+ + ' nMonth '
+ + ' ssDays ' # Days
+ + ' slDays '
+ + ' snDays '
+ + ' sDays '
+ + ' lDays '
+ + ' nDays '
+ + ' am ' # am/pm indicators
+ + ' pm '
+ # Width 8 + comma
+ + ' byte '
+ + ' siQuant '
+ + 'iecQuant '
+ # Width 8+4 + comma
+ + ' currISO '
+ # Width 11 + comma:
+ + ' currSym ' # Currency formatting:
+ + ' currDsply '
+ + ' currFmt '
+ + ' currFmtNeg '
+ + ' endoLang ' # Name of language in itself, and of country:
+ + ' endoCntry '
+ # Width 6 + comma:
+ + 'curDgt ' # Currency number representation:
+ + 'curRnd '
+ + 'dow1st ' # First day of week
+ + ' wknd+ ' # Week-end start/end days:
+ + ' wknd-'
+ # No trailing space on last entry (be sure to
+ # pad before adding anything after it).
+ + '\n')
locale_keys = locale_map.keys()
compareLocaleKeys.default_map = default_map
compareLocaleKeys.locale_map = locale_map
locale_keys.sort(compareLocaleKeys)
+ line_format = (' { '
+ # Locale-identifier:
+ + '%6d,' * 3
+ # Numeric formats, list delimiter:
+ + '%6d,' * 8
+ # Quotation marks:
+ + '%8d,' * 4
+ # List patterns, date/time formats, month/day names, am/pm:
+ + '%11s,' * 22
+ # SI/IEC byte-unit abbreviations:
+ + '%8s,' * 3
+ # Currency ISO code:
+ + ' %10s, '
+ # Currency and endonyms
+ + '%11s,' * 6
+ # Currency formatting:
+ + '%6d,%6d'
+ # Day of week and week-end:
+ + ',%6d' * 3
+ + ' }')
for key in locale_keys:
l = locale_map[key]
- data_temp_file.write(" { %6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%6d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s, {%s}, %s,%s,%s,%s,%s,%s,%6d,%6d,%6d,%6d,%6d }, // %s/%s/%s\n" \
+ data_temp_file.write(line_format
% (key[0], key[1], key[2],
l.decimal,
l.group,
@@ -637,6 +581,9 @@ def main():
days_data.append(l.narrowDays),
am_data.append(l.am),
pm_data.append(l.pm),
+ byte_unit_data.append(l.byte_unit),
+ byte_unit_data.append(l.byte_si_quantified),
+ byte_unit_data.append(l.byte_iec_quantified),
currencyIsoCodeData(l.currencyIsoCode),
currency_symbol_data.append(l.currencySymbol),
currency_display_name_data.append(l.currencyDisplayName),
@@ -648,97 +595,34 @@ def main():
l.currencyRounding,
l.firstDayOfWeek,
l.weekendStart,
- l.weekendEnd,
- l.language,
- l.script,
- l.country))
- data_temp_file.write(" { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, {0,0,0}, 0,0, 0,0, 0,0, 0,0, 0, 0, 0, 0, 0, 0,0, 0,0 } // trailing 0s\n")
+ l.weekendEnd)
+ + ", // %s/%s/%s\n" % (l.language, l.script, l.country))
+ data_temp_file.write(line_format # All zeros, matching the format:
+ % ( (0,) * (3 + 8 + 4) + ("0,0",) * (22 + 3)
+ + (currencyIsoCodeData(0),)
+ + ("0,0",) * 6 + (0,) * (2 + 3))
+ + " // trailing 0s\n")
data_temp_file.write("};\n")
- data_temp_file.write("\n")
-
- # List patterns data
- data_temp_file.write("static const ushort list_pattern_part_data[] = {\n")
- data_temp_file.write(wrap_list(list_pattern_part_data.data))
- data_temp_file.write("\n};\n")
-
- data_temp_file.write("\n")
-
- # Date format data
- data_temp_file.write("static const ushort date_format_data[] = {\n")
- data_temp_file.write(wrap_list(date_format_data.data))
- data_temp_file.write("\n};\n")
-
- data_temp_file.write("\n")
-
- # Time format data
- data_temp_file.write("static const ushort time_format_data[] = {\n")
- data_temp_file.write(wrap_list(time_format_data.data))
- data_temp_file.write("\n};\n")
-
- data_temp_file.write("\n")
-
- # Months data
- data_temp_file.write("static const ushort months_data[] = {\n")
- data_temp_file.write(wrap_list(months_data.data))
- data_temp_file.write("\n};\n")
-
- data_temp_file.write("\n")
-
- # Days data
- data_temp_file.write("static const ushort days_data[] = {\n")
- data_temp_file.write(wrap_list(days_data.data))
- data_temp_file.write("\n};\n")
-
- data_temp_file.write("\n")
-
- # AM data
- data_temp_file.write("static const ushort am_data[] = {\n")
- data_temp_file.write(wrap_list(am_data.data))
- data_temp_file.write("\n};\n")
-
- data_temp_file.write("\n")
-
- # PM data
- data_temp_file.write("static const ushort pm_data[] = {\n")
- data_temp_file.write(wrap_list(pm_data.data))
- data_temp_file.write("\n};\n")
-
- data_temp_file.write("\n")
-
- # Currency symbol data
- data_temp_file.write("static const ushort currency_symbol_data[] = {\n")
- data_temp_file.write(wrap_list(currency_symbol_data.data))
- data_temp_file.write("\n};\n")
-
- data_temp_file.write("\n")
-
- # Currency display name data
- data_temp_file.write("static const ushort currency_display_name_data[] = {\n")
- data_temp_file.write(wrap_list(currency_display_name_data.data))
- data_temp_file.write("\n};\n")
-
- data_temp_file.write("\n")
-
- # Currency format data
- data_temp_file.write("static const ushort currency_format_data[] = {\n")
- data_temp_file.write(wrap_list(currency_format_data.data))
- data_temp_file.write("\n};\n")
-
- # Endonyms data
- data_temp_file.write("static const ushort endonyms_data[] = {\n")
- data_temp_file.write(wrap_list(endonyms_data.data))
- data_temp_file.write("\n};\n")
+ # StringData tables:
+ for data in (list_pattern_part_data, date_format_data,
+ time_format_data, months_data, days_data,
+ byte_unit_data, am_data, pm_data, currency_symbol_data,
+ currency_display_name_data, currency_format_data,
+ endonyms_data):
+ data_temp_file.write("\nstatic const ushort %s[] = {\n" % data.name)
+ data_temp_file.write(wrap_list(data.data))
+ data_temp_file.write("\n};\n")
data_temp_file.write("\n")
# Language name list
data_temp_file.write("static const char language_name_list[] =\n")
- data_temp_file.write("\"Default\\0\"\n")
+ data_temp_file.write('"Default\\0"\n')
for key in language_map.keys():
if key == 0:
continue
- data_temp_file.write("\"" + language_map[key][0] + "\\0\"\n")
+ data_temp_file.write('"' + language_map[key][0] + '\\0"\n')
data_temp_file.write(";\n")
data_temp_file.write("\n")
@@ -759,11 +643,11 @@ def main():
# Script name list
data_temp_file.write("static const char script_name_list[] =\n")
- data_temp_file.write("\"Default\\0\"\n")
+ data_temp_file.write('"Default\\0"\n')
for key in script_map.keys():
if key == 0:
continue
- data_temp_file.write("\"" + script_map[key][0] + "\\0\"\n")
+ data_temp_file.write('"' + script_map[key][0] + '\\0"\n')
data_temp_file.write(";\n")
data_temp_file.write("\n")
@@ -784,11 +668,11 @@ def main():
# Country name list
data_temp_file.write("static const char country_name_list[] =\n")
- data_temp_file.write("\"Default\\0\"\n")
+ data_temp_file.write('"Default\\0"\n')
for key in country_map.keys():
if key == 0:
continue
- data_temp_file.write("\"" + country_map[key][0] + "\\0\"\n")
+ data_temp_file.write('"' + country_map[key][0] + '\\0"\n')
data_temp_file.write(";\n")
data_temp_file.write("\n")
@@ -813,7 +697,7 @@ def main():
code = language_map[key][1]
if len(code) == 2:
code += r"\0"
- data_temp_file.write("\"%2s\" // %s\n" % (code, language_map[key][0]))
+ data_temp_file.write('"%2s" // %s\n' % (code, language_map[key][0]))
data_temp_file.write(";\n")
data_temp_file.write("\n")
@@ -824,7 +708,7 @@ def main():
code = script_map[key][1]
for i in range(4 - len(code)):
code += "\\0"
- data_temp_file.write("\"%2s\" // %s\n" % (code, script_map[key][0]))
+ data_temp_file.write('"%2s" // %s\n' % (code, script_map[key][0]))
data_temp_file.write(";\n")
# Country code list
@@ -833,13 +717,13 @@ def main():
code = country_map[key][1]
if len(code) == 2:
code += "\\0"
- data_temp_file.write("\"%2s\" // %s\n" % (code, country_map[key][0]))
+ data_temp_file.write('"%2s" // %s\n' % (code, country_map[key][0]))
data_temp_file.write(";\n")
data_temp_file.write("\n")
data_temp_file.write(GENERATED_BLOCK_END)
s = qlocaledata_file.readline()
- # skip until end of the block
+ # skip until end of the old block
while s and s != GENERATED_BLOCK_END:
s = qlocaledata_file.readline()
@@ -931,7 +815,7 @@ def main():
qlocaleh_temp_file.write(GENERATED_BLOCK_END)
s = qlocaleh_file.readline()
- # skip until end of the block
+ # skip until end of the old block
while s and s != GENERATED_BLOCK_END:
s = qlocaleh_file.readline()