summaryrefslogtreecommitdiffstats
path: root/util
diff options
context:
space:
mode:
Diffstat (limited to 'util')
-rw-r--r--util/aglfn/main.cpp5
-rwxr-xr-xutil/cmake/pro2cmake.py2
-rwxr-xr-xutil/edid/qedidvendortable.py4
-rw-r--r--util/gradientgen/gradientgen.cpp4
-rw-r--r--util/locale_database/README12
-rw-r--r--util/locale_database/cldr.py227
-rwxr-xr-xutil/locale_database/cldr2qlocalexml.py25
-rwxr-xr-xutil/locale_database/cldr2qtimezone.py175
-rw-r--r--util/locale_database/ldml.py1
-rw-r--r--util/locale_database/localetools.py16
-rw-r--r--util/locale_database/qlocalexml.py282
-rw-r--r--util/locale_database/qlocalexml.rnc22
-rwxr-xr-xutil/locale_database/qlocalexml2cpp.py200
-rw-r--r--util/locale_database/zonedata.py94
-rwxr-xr-xutil/update_public_suffix_list.sh2
15 files changed, 619 insertions, 452 deletions
diff --git a/util/aglfn/main.cpp b/util/aglfn/main.cpp
index afcf8c2b2c..6c8dd78828 100644
--- a/util/aglfn/main.cpp
+++ b/util/aglfn/main.cpp
@@ -1,5 +1,7 @@
+// REUSE-IgnoreStart
// Copyright (C) 2016 The Qt Company Ltd.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+// REUSE-IgnoreEnd
#include <qbytearray.h>
#include <qlist.h>
@@ -120,11 +122,12 @@ static QByteArray createGlyphList()
int main(int, char **)
{
readGlyphList();
-
+// REUSE-IgnoreStart
QByteArray header =
"// Copyright (C) 2016 The Qt Company Ltd.\n"
"// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only\n"
"\n";
+// REUSE-IgnoreEnd
QByteArray note =
"/* This file is autogenerated from the Adobe Glyph List database" +
diff --git a/util/cmake/pro2cmake.py b/util/cmake/pro2cmake.py
index 0ef35410ce..bc43f3ec28 100755
--- a/util/cmake/pro2cmake.py
+++ b/util/cmake/pro2cmake.py
@@ -4666,7 +4666,7 @@ def create_top_level_cmake_conf():
conf_file_name = ".cmake.conf"
try:
with open(conf_file_name, "x") as file:
- file.write('set(QT_REPO_MODULE_VERSION "6.8.0")\n')
+ file.write('set(QT_REPO_MODULE_VERSION "6.9.0")\n')
except FileExistsError:
pass
diff --git a/util/edid/qedidvendortable.py b/util/edid/qedidvendortable.py
index e1c5750695..0991d75db7 100755
--- a/util/edid/qedidvendortable.py
+++ b/util/edid/qedidvendortable.py
@@ -8,12 +8,12 @@ import urllib.request
# 'https://git.fedorahosted.org/cgit/hwdata.git/plain/pnp.ids'
# which is discontinued. For now there seems to be a fork at:
url = 'https://github.com/vcrhonek/hwdata/raw/master/pnp.ids'
-
+# REUSE-IgnoreStart
copyright = """
// Copyright (C) 2017 Pier Luigi Fiorini <pierluigi.fiorini@gmail.com>
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
"""
-
+# REUSE-IgnoreEnd
notice = """/*
* This lookup table was generated from {}
*
diff --git a/util/gradientgen/gradientgen.cpp b/util/gradientgen/gradientgen.cpp
index a4c5531cc5..23b20d0505 100644
--- a/util/gradientgen/gradientgen.cpp
+++ b/util/gradientgen/gradientgen.cpp
@@ -17,13 +17,13 @@
#include <QColor>
using namespace std;
-
+// REUSE-IgnoreStart
static const char LICENSE_HEADER[] =
R"(
// Copyright (C) 2019 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
)";
-
+// REUSE-IgnoreEnd
class Printer {
Q_DISABLE_COPY_MOVE(Printer)
public:
diff --git a/util/locale_database/README b/util/locale_database/README
index b910d36f2a..460f51993a 100644
--- a/util/locale_database/README
+++ b/util/locale_database/README
@@ -5,10 +5,8 @@ data (like date formats, country names etc). It is provided by the
Unicode consortium.
See cldr2qlocalexml.py for how to run it and qlocalexml2cpp.py to
-update the locale data tables (principally text/qlocale_data_p.h and
-time/q*calendar_data_p.h under src/corelib/). See enumdata.py for when
-and how to update the data it provides. You shall definitely need to
-pass --no-verify or -n to git commit for these changes.
-
-See cldr2qtimezone.py on how to update tables of Windows-specific
-names for zones and UTC-offset zone names.
+update the locale data tables (principally text/qlocale_data_p.h,
+time/q*calendar_data_p.h and time/qtimezone*_data_p.h under
+src/corelib/). See enumdata.py and zonedata.py for when and how to
+update the data they provide. You shall definitely need to pass
+--no-verify or -n to git commit for these changes.
diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py
index 9e0bae9667..75d687dd11 100644
--- a/util/locale_database/cldr.py
+++ b/util/locale_database/cldr.py
@@ -74,8 +74,79 @@ class CldrReader (object):
# more out.
pass # self.__wrapped(self.whitter, 'Skipping likelySubtags (for unknown codes): ', skips)
+ def zoneData(self):
+ """Locale-independent timezone data.
+
+ Returns a triple (alias, defaults, winIds) in which:
+ * alias is a mapping from aliases for IANA zone IDs, that
+ have the form of IANA IDs, to actual current IANA IDs; in
+ particular, this maps each CLDR zone ID to its
+ corresponding IANA ID.
+ * defaults maps each Windows name for a zone to the IANA ID
+ to use for it by default (when no territory is specified,
+ or when no entry in winIds matches the given Windows name
+ and territory).
+ * winIds is a mapping {(winId, land): ianaList} from Windows
+ name and territory code to the space-joined list of IANA
+ IDs associated with the Windows name in the given
+ territory.
+
+ and reports on any territories found in CLDR timezone data
+ that are not mentioned in enumdata.territory_map, on any
+ Windows IDs given in zonedata.windowsIdList that are no longer
+ covered by the CLDR data."""
+ alias, ignored = self.root.bcp47Aliases()
+ defaults, winIds = self.root.readWindowsTimeZones(alias)
+
+ from zonedata import windowsIdList
+ winUnused = set(n for n, o in windowsIdList).difference(
+ set(defaults).union(w for w, t, ids in winIds))
+ if winUnused:
+ joined = "\n\t".join(winUnused)
+ self.whitter.write(
+ f'No Windows ID in\n\t{joined}\nis still in use.\n'
+ 'They could be removed at the next major version.\n')
+
+ # Check for duplicate entries in winIds:
+ last = ('', '', '')
+ winDup = {}
+ for triple in sorted(winIds):
+ if triple[:2] == last[:2]:
+ try:
+ seq = winDup[triple[:2]]
+ except KeyError:
+ seq = winDup[triple[:2]] = []
+ seq.append(triple[-1])
+ if winDup:
+ joined = '\n\t'.join(f'{t}, {w}: ", ".join(ids)'
+ for (w, t), ids in winDup.items())
+ self.whitter.write(
+ f'Duplicated (territory, Windows ID) entries:\n\t{joined}\n')
+ winIds = [trip for trip in winIds if trip[:2] not in winDup]
+ for (w, t), seq in winDup.items():
+ ianalist = []
+ for ids in seq:
+ for iana in ids.split():
+ if iana not in ianaList:
+ ianaList.append(iana)
+ winIds.append((w, t, ' '.join(ianaList)))
+
+ from enumdata import territory_map
+ unLand = set(t for w, t, ids in winIds).difference(
+ v[1] for k, v in territory_map.items())
+ if unLand:
+ self.grumble.write(
+ 'Unknown territory codes in timezone data: '
+ f'{", ".join(unLand)}\n'
+ 'Skipping Windows zone mappings for these territories\n')
+ winIds = [(w, t, ids) for w, t, ids in winIds if t not in unLand]
+
+ # Convert list of triples to mapping:
+ winIds = {(w, t): ids for w, t, ids in winIds}
+ return alias, defaults, winIds
+
def readLocales(self, calendars = ('gregorian',)):
- return {(k.language_id, k.script_id, k.territory_id, k.variant_code): k
+ return {(k.language_id, k.script_id, k.territory_id, k.variant_id): k
for k in self.__allLocales(calendars)}
def __allLocales(self, calendars):
@@ -193,7 +264,7 @@ class CldrReader (object):
language = names[0], language_code = language, language_id = ids[0],
script = names[1], script_code = script, script_id = ids[1],
territory = names[2], territory_code = territory, territory_id = ids[2],
- variant_code = variant)
+ variant_code = variant, variant_id = ids[3])
firstDay, weStart, weEnd = self.root.weekData(territory)
assert all(day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun')
@@ -405,69 +476,115 @@ enumdata.py (keeping the old name as an alias):
+ '\n')
grumble('\n')
- def readWindowsTimeZones(self, lookup): # For use by cldr2qtimezone.py
+ def bcp47Aliases(self):
+ """Reads the mapping from CLDR IDs to IANA IDs
+
+ CLDR identifies timezones in various ways but its standard
+ 'name' for them, here described as a CLDR ID, has the form of
+ an IANA ID. CLDR IDs are stable across time, where IANA IDs
+ may be revised over time, for example Asia/Calcutta became
+ Asia/Kolkata. When a new zone is added to CLDR, it gets the
+ then-current IANA ID as its CLDR ID; if it is later
+ superseded, CLDR continues using the old ID, so we need a
+ mapping from that to current IANA IDs. Helpfully, CLDR
+ provides information about aliasing among time-zone IDs.
+
+ The file common/bcp47/timezone.xml has keyword/key/type
+ elements with attributes:
+
+ name -- zone code (ignore)
+ description -- long name for exemplar location, including
+ territory
+
+ and some of:
+
+ deprecated -- ignore entry if present (has no alias)
+ preferred -- only present if deprecated
+ since -- version at which this entry was added (ignore)
+ alias -- space-joined sequence of IANA-form IDs; first is CLDR ID
+ iana -- if present, repeats the alias entry that's the modern IANA ID
+
+ This returns a pair (alias, naming) wherein: alias is a
+ mapping from IANA-format IDs to actual IANA IDs, that maps
+ each alias to the contemporary ID used by IANA; and naming is
+ a mapping from IANA ID to the description it and its aliases
+ shared in their keyword/key/type entry."""
+ # File has the same form as supplements:
+ root = Supplement(Node(self.__xml('common/bcp47/timezone.xml')))
+
+ # If we ever need a mapping back to CLDR ID, we can make
+ # (description, space-joined-list) the naming values.
+ alias, naming = {}, {} # { alias: iana }, { iana: description }
+ for item, attrs in root.find('keyword/key/type', exclude=('deprecated',)):
+ assert 'description' in attrs, item
+ assert 'alias' in attrs, item
+ names = attrs['alias'].split()
+ assert not any(name in alias for name in names), item
+ # CLDR ID is names[0]; if IANA now uses another name for
+ # it, this is given as the iana attribute.
+ ianaid, fullName = attrs.get('iana', names[0]), attrs['description']
+ alias.update({name: ianaid for name in names})
+ assert not ianaid in naming
+ naming[ianaid] = fullName
+
+ return alias, naming
+
+ def readWindowsTimeZones(self, alias):
"""Digest CLDR's MS-Win time-zone name mapping.
- MS-Win have their own eccentric names for time-zones. CLDR
- helpfully provides a translation to more orthodox names.
-
- Single argument, lookup, is a mapping from known MS-Win names
- for locales to a unique integer index (starting at 1).
-
- The XML structure we read has the form:
-
- <supplementalData>
- <windowsZones>
- <mapTimezones otherVersion="..." typeVersion="...">
- <!-- (UTC-08:00) Pacific Time (US & Canada) -->
- <mapZone other="Pacific Standard Time" territory="001" type="America/Los_Angeles"/>
- <mapZone other="Pacific Standard Time" territory="CA" type="America/Vancouver America/Dawson America/Whitehorse"/>
- <mapZone other="Pacific Standard Time" territory="US" type="America/Los_Angeles America/Metlakatla"/>
- <mapZone other="Pacific Standard Time" territory="ZZ" type="PST8PDT"/>
- </mapTimezones>
- </windowsZones>
- </supplementalData>
-"""
+ Single argument, alias, should be the first part of the pair
+ returned by a call to bcp47Aliases(); it shall be used to
+ transform CLDR IDs into IANA IDs.
+
+ MS-Win have their own eccentric names for time-zones. CLDR
+ helpfully provides a translation to more orthodox names,
+ albeit these are CLDR IDs - see bcp47Aliases() - rather than
+ (up to date) IANA IDs. The windowsZones.xml supplement has
+ supplementalData/windowsZones/mapTimezones/mapZone nodes with
+ attributes
+
+ territory -- ISO code
+ type -- space-joined sequence of CLDR IDs of zones
+ other -- Windows name of these zones in the given territory
+
+ When 'territory' is '001', type is always just a single CLDR
+ zone ID. This is the default zone for the given Windows name.
+
+ For each mapZone node, its type is split on spacing and
+ cleaned up as follows. Those entries that are keys of alias
+ are mapped thereby to their canonical IANA IDs; all others are
+ presumed to be canonical IANA IDs and left unchanged. Any
+ later duplicates of earlier entries are omitted. The result
+ list of IANA IDs is joined with single spaces between to give
+ a string s.
+
+ Returns a twople (defaults, windows) in which defaults is a
+ mapping, from Windows ID to IANA ID (derived from the mapZone
+ nodes with territory='001'), and windows is a list of triples
+ (Windows ID, territory code, IANA ID list) in which the first
+ two entries are the 'other' and 'territory' fields of a
+ mapZone element and the last is s, its cleaned-up list of IANA
+ IDs."""
+
+ defaults, windows = {}, []
zones = self.supplement('windowsZones.xml')
- enum = self.__enumMap('territory')
- badZones, unLands, defaults, windows = set(), set(), {}, {}
-
for name, attrs in zones.find('windowsZones/mapTimezones'):
if name != 'mapZone':
continue
- wid, code = attrs['other'], attrs['territory']
- data = dict(windowsId = wid,
- territoryCode = code,
- ianaList = ' '.join(attrs['type'].split()))
-
- try:
- key = lookup[wid]
- except KeyError:
- badZones.add(wid)
- key = 0
- data['windowsKey'] = key
+ wid, code, ianas = attrs['other'], attrs['territory'], []
+ for cldr in attrs['type'].split():
+ iana = alias.get(cldr, cldr)
+ if iana not in ianas:
+ ianas.append(iana)
if code == '001':
- defaults[key] = data['ianaList']
+ assert len(ianas) == 1, (wid, *ianas)
+ defaults[wid] = ianas[0]
else:
- try:
- cid, name = enum[code]
- except KeyError:
- unLands.append(code)
- continue
- data.update(territoryId = cid, territory = name)
- windows[key, cid] = data
-
- if unLands:
- raise Error('Unknown territory codes, please add to enumdata.py: '
- + ', '.join(sorted(unLands)))
-
- if badZones:
- raise Error('Unknown Windows IDs, please add to cldr2qtimezone.py: '
- + ', '.join(sorted(badZones)))
+ windows.append((wid, code, ' '.join(ianas)))
- return self.cldrVersion, defaults, windows
+ return defaults, windows
@property
def cldrVersion(self):
@@ -548,6 +665,8 @@ enumdata.py (keeping the old name as an alias):
source = self.__supplementalData
for elt in source.findNodes('currencyData/region'):
iso, digits, rounding = '', 2, 1
+ # TODO: fractions/info[iso4217=DEFAULT] has rounding=0 - why do we differ ?
+ # Also: some fractions/info have cashDigits and cashRounding - should we use them ?
try:
territory = elt.dom.attributes['iso3166'].nodeValue
except KeyError:
diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py
index d3aa88ec38..f13a77c601 100755
--- a/util/locale_database/cldr2qlocalexml.py
+++ b/util/locale_database/cldr2qlocalexml.py
@@ -27,10 +27,8 @@ append new entries to enumdata.py's lists and update documentation in
src/corelib/text/qlocale.qdoc, adding the new entries in alphabetic
order.
-While updating the locale data, check also for updates to MS-Win's
-time zone names; see cldr2qtimezone.py for details.
-
-All the scripts mentioned support --help to tell you how to use them.
+Both of the scripts mentioned support --help to tell you how to use
+them.
.. _CLDR: https://unicode.org/Public/cldr/
.. _github: https://github.com/unicode-org/cldr
@@ -64,7 +62,10 @@ def main(argv, out, err):
parser.add_argument('--calendars', help='select calendars to emit data for',
nargs='+', metavar='CALENDAR',
choices=all_calendars, default=all_calendars)
-
+ parser.add_argument('-v', '--verbose', help='more verbose output',
+ action='count', default=0)
+ parser.add_argument('-q', '--quiet', help='less output',
+ dest='verbose', action='store_const', const=-1)
args = parser.parse_args(argv[1:])
root = Path(args.cldr_path)
@@ -85,14 +86,18 @@ def main(argv, out, err):
except IOError as e:
parser.error(f'Failed to open "{xml}" to write output to it')
- # TODO - command line options to tune choice of grumble and whitter:
- reader = CldrReader(root, err.write, err.write)
- writer = QLocaleXmlWriter(emit.write)
+ reader = CldrReader(root,
+ (lambda *x: None) if args.verbose < 0 else
+ # Use stderr for logging if stdout is where our XML is going:
+ err.write if out is emit else out.write,
+ err.write)
+ writer = QLocaleXmlWriter(reader.root.cldrVersion, emit.write)
- writer.version(reader.root.cldrVersion)
writer.enumData(reader.root.englishNaming)
writer.likelySubTags(reader.likelySubTags())
- writer.locales(reader.readLocales(args.calendars), args.calendars)
+ writer.zoneData(*reader.zoneData()) # Locale-independent zone data.
+ en_US = tuple(id for id, name in reader.root.codesToIdName('en', '', 'US'))
+ writer.locales(reader.readLocales(args.calendars), args.calendars, en_US)
writer.close(err.write)
return 0
diff --git a/util/locale_database/cldr2qtimezone.py b/util/locale_database/cldr2qtimezone.py
deleted file mode 100755
index 485177c0bd..0000000000
--- a/util/locale_database/cldr2qtimezone.py
+++ /dev/null
@@ -1,175 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (C) 2021 The Qt Company Ltd.
-# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
-"""Parse CLDR data for QTimeZone use with MS-Windows
-
-Script to parse the CLDR common/supplemental/windowsZones.xml file and
-prepare its data for use in QTimeZone. See ``./cldr2qlocalexml.py`` for
-where to get the CLDR data. Pass its root directory as first parameter
-to this script. You can optionally pass the qtbase root directory as
-second parameter; it defaults to the root of the checkout containing
-this script. This script updates qtbase's
-src/corelib/time/qtimezoneprivate_data_p.h with the new data.
-"""
-
-import datetime
-from pathlib import Path
-import textwrap
-import argparse
-
-from localetools import unicode2hex, wrap_list, Error, SourceFileEditor, qtbase_root
-from cldr import CldrAccess
-# This script shall report any updates zonedata may need.
-from zonedata import windowsIdList, utcIdList
-
-class ByteArrayData:
- def __init__(self):
- self.data = []
- self.hash = {}
-
- def append(self, s):
- s = s + '\0'
- if s in self.hash:
- return self.hash[s]
-
- lst = unicode2hex(s)
- index = len(self.data)
- if index > 0xffff:
- raise Error(f'Index ({index}) outside the uint16 range !')
- self.hash[s] = index
- self.data += lst
- return index
-
- def write(self, out, name):
- out(f'\nstatic constexpr char {name}[] = {{\n')
- out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ')
- # Will over-spill 100-col if some 4-digit hex show up, but none do (yet).
- out('\n};\n')
-
-class ZoneIdWriter (SourceFileEditor):
- # All the output goes into namespace QtTimeZoneCldr.
- def write(self, version, defaults, windowsIds):
- self.__writeWarning(version)
- windows, iana = self.__writeTables(self.writer.write, defaults, windowsIds)
- windows.write(self.writer.write, 'windowsIdData')
- iana.write(self.writer.write, 'ianaIdData')
-
- def __writeWarning(self, version):
- self.writer.write(f"""
-/*
- This part of the file was generated on {datetime.date.today()} from the
- Common Locale Data Repository v{version} file supplemental/windowsZones.xml
-
- http://www.unicode.org/cldr/
-
- Do not edit this code: run cldr2qtimezone.py on updated (or
- edited) CLDR data; see qtbase/util/locale_database/.
-*/
-
-""")
-
- @staticmethod
- def __writeTables(out, defaults, windowsIds):
- windowsIdData, ianaIdData = ByteArrayData(), ByteArrayData()
-
- # Write Windows/IANA table
- out('// Windows ID Key, Territory Enum, IANA ID Index\n')
- out('static constexpr ZoneData zoneDataTable[] = {\n')
- # Sorted by (Windows ID Key, territory enum)
- for index, data in sorted(windowsIds.items()):
- out(' {{ {:6d},{:6d},{:6d} }}, // {} / {}\n'.format(
- data['windowsKey'], data['territoryId'],
- ianaIdData.append(data['ianaList']),
- data['windowsId'], data['territory']))
- out('};\n\n')
-
- # Write Windows ID key table
- out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n')
- out('static constexpr WindowsData windowsDataTable[] = {\n')
- # Sorted by Windows ID key; sorting case-insensitively by
- # Windows ID must give the same order.
- winIdNames = [x.lower() for x, y in windowsIdList]
- assert all(x == y for x, y in zip(winIdNames, sorted(winIdNames))), \
- [(x, y) for x, y in zip(winIdNames, sorted(winIdNames)) if x != y]
- for index, pair in enumerate(windowsIdList, 1):
- out(' {{ {:6d},{:6d},{:6d},{:6d} }}, // {}\n'.format(
- index,
- windowsIdData.append(pair[0]),
- ianaIdData.append(defaults[index]),
- pair[1], pair[0]))
- out('};\n\n')
-
- offsetMap = {}
- for pair in utcIdList:
- offsetMap[pair[1]] = offsetMap.get(pair[1], ()) + (pair[0],)
- # Write UTC ID key table
- out('// IANA ID Index, UTC Offset\n')
- out('static constexpr UtcData utcDataTable[] = {\n')
- for offset in sorted(offsetMap.keys()): # Sort so C++ can binary-chop.
- names = offsetMap[offset];
- out(' {{ {:6d},{:6d} }}, // {}\n'.format(
- ianaIdData.append(' '.join(names)), offset, names[0]))
- out('};\n')
-
- return windowsIdData, ianaIdData
-
-
-def main(out, err):
- """Parses CLDR's data and updates Qt's representation of it.
-
- Takes sys.stdout, sys.stderr (or equivalents) as
- arguments. Expects two command-line options: the root of the
- unpacked CLDR data-file tree and the root of the qtbase module's
- checkout. Updates QTimeZone's private data about Windows time-zone
- IDs."""
- parser = argparse.ArgumentParser(
- description="Update Qt's CLDR-derived timezone data.")
- parser.add_argument('cldr_path', help='path to the root of the CLDR tree')
- parser.add_argument('qtbase_path',
- help='path to the root of the qtbase source tree',
- nargs='?', default=qtbase_root)
-
- args = parser.parse_args()
-
- cldrPath = Path(args.cldr_path)
- qtPath = Path(args.qtbase_path)
-
- if not qtPath.is_dir():
- parser.error(f"No such Qt directory: {qtPath}")
-
- if not cldrPath.is_dir():
- parser.error(f"No such CLDR directory: {cldrPath}")
-
- dataFilePath = qtPath.joinpath('src/corelib/time/qtimezoneprivate_data_p.h')
-
- if not dataFilePath.is_file():
- parser.error(f'No such file: {dataFilePath}')
-
- try:
- version, defaults, winIds = CldrAccess(cldrPath).readWindowsTimeZones(
- dict((name, ind) for ind, name in enumerate((x[0] for x in windowsIdList), 1)))
- except IOError as e:
- parser.error(
- f'Failed to open common/supplemental/windowsZones.xml: {e}')
- return 1
- except Error as e:
- err.write('\n'.join(textwrap.wrap(
- f'Failed to read windowsZones.xml: {e}',
- subsequent_indent=' ', width=80)) + '\n')
- return 1
-
- out.write('Input file parsed, now writing data\n')
-
- try:
- with ZoneIdWriter(dataFilePath, qtPath) as writer:
- writer.write(version, defaults, winIds)
- except Exception as e:
- err.write(f'\nError while updating timezone data: {e}\n')
- return 1
-
- out.write(f'Data generation completed, please check the new file at {dataFilePath}\n')
- return 0
-
-if __name__ == '__main__':
- import sys
- sys.exit(main(sys.stdout, sys.stderr))
diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py
index b94c242172..219d1f7145 100644
--- a/util/locale_database/ldml.py
+++ b/util/locale_database/ldml.py
@@ -358,6 +358,7 @@ class LocaleScanner (object):
def endonyms(self, language, script, territory, variant):
# TODO: take variant into account ?
+ # TODO: QTBUG-47892, support query for all combinations
for seq in ((language, script, territory),
(language, script), (language, territory), (language,)):
if not all(seq):
diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py
index 02ec7cafc7..a7fcd08727 100644
--- a/util/locale_database/localetools.py
+++ b/util/locale_database/localetools.py
@@ -26,6 +26,22 @@ class Error (Exception):
def __str__(self):
return self.message
+def qtVersion(root = qtbase_root, pfx = 'set(QT_REPO_MODULE_VERSION '):
+ with open(root.joinpath('.cmake.conf')) as fd:
+ for line in fd:
+ if line.startswith(pfx):
+ tail = line[len(pfx):].strip()
+ assert tail, ('No Qt version given', line)
+ if tail.startswith('"') or tail.startswith("'"):
+ cut = tail.index(tail[0], 1) # assert: doesn't ValueError
+ assert cut > 5, ('Truncated Qt version', tail)
+ version = tail[1:cut].strip()
+ assert all(x.isdigit() for x in version.split('.')), version
+ return version
+ raise Error(f'Missing quotes on Qt version: {tail}')
+ raise Error(f'Failed to find {pfx}...) line in {root.joinpath(".cmake.conf")}')
+qtVersion = qtVersion()
+
def unicode2hex(s):
lst = []
for x in s:
diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py
index 5cb56c2165..f90684e481 100644
--- a/util/locale_database/qlocalexml.py
+++ b/util/locale_database/qlocalexml.py
@@ -21,7 +21,7 @@ package manager lacks the jing package.
from xml.sax.saxutils import escape
-from localetools import Error
+from localetools import Error, qtVersion
# Tools used by Locale:
def camel(seq):
@@ -66,7 +66,13 @@ class QLocaleXmlReader (object):
self.__landByName = {v[1]: (v[0], v[2]) for v in territories}
# Other properties:
self.__dupes = set(v[1] for v in languages) & set(v[1] for v in territories)
- self.cldrVersion = self.__firstChildText(self.root, "version")
+
+ self.cldrVersion = self.root.attributes['versionCldr'].nodeValue
+ self.qtVersion = self.root.attributes['versionQt'].nodeValue
+ assert self.qtVersion == qtVersion, (
+ 'Using QLocaleXml file from incompatible Qt version',
+ self.qtVersion, qtVersion
+ )
def loadLocaleMap(self, calendars, grumble = lambda text: None):
kid = self.__firstChildText
@@ -97,6 +103,21 @@ class QLocaleXmlReader (object):
yield (language, script, territory), locale
+ def aliasToIana(self):
+ kid = self.__firstChildText
+ for elt in self.__eachEltInGroup(self.root, 'zoneAliases', 'zoneAlias'):
+ yield kid(elt, 'alias'), kid(elt, 'iana')
+
+ def msToIana(self):
+ kid = self.__firstChildText
+ for elt in self.__eachEltInGroup(self.root, 'windowsZone', 'msZoneIana'):
+ yield kid(elt, 'msid'), kid(elt, 'iana')
+
+ def msLandIanas(self):
+ kid = self.__firstChildText
+ for elt in self.__eachEltInGroup(self.root, 'windowsZone', 'msLandZones'):
+ yield kid(elt, 'msid'), kid(elt, 'territorycode'), kid(elt, 'ianaids')
+
def languageIndices(self, locales):
index = 0
for key, value in self.languages.items():
@@ -195,6 +216,8 @@ class QLocaleXmlReader (object):
child = elt.firstChild
while child:
if child.nodeType == elt.TEXT_NODE:
+ # Note: do not strip(), as some group separators are
+ # non-breaking spaces, that strip() will discard.
yield child.nodeValue
child = child.nextSibling
@@ -234,17 +257,16 @@ class Spacer (object):
First argument, indent, is either None (its default, for
'minifying'), an ingeter (number of spaces) or the unit of
text that is to be used for each indentation level (e.g. '\t'
- to use tabs). If indent is None, no indentation is added, nor
+ to use tabs). If indent is None, no indentation is added, nor
are line-breaks; otherwise, self(text), for non-empty text,
shall end with a newline and begin with indentation.
Second argument, initial, is the initial indentation; it is
- ignored if indent is None. Indentation increases after each
+ ignored if indent is None. Indentation increases after each
call to self(text) in which text starts with a tag and doesn't
include its end-tag; indentation decreases if text starts with
- an end-tag. The text is not parsed any more carefully than
- just described.
- """
+ an end-tag. The text is not parsed any more carefully than
+ just described."""
if indent is None:
self.__call = lambda x: x
else:
@@ -259,7 +281,7 @@ class Spacer (object):
indent = self.current
if line.startswith('</'):
indent = self.current = indent[:-len(self.__each)]
- elif line.startswith('<') and not line.startswith('<!'):
+ elif line.startswith('<') and line[1:2] not in '!?':
cut = line.find('>')
tag = (line[1:] if cut < 0 else line[1 : cut]).strip().split()[0]
if f'</{tag}>' not in line:
@@ -270,27 +292,35 @@ class Spacer (object):
return self.__call(line)
class QLocaleXmlWriter (object):
- def __init__(self, save = None, space = Spacer(4)):
- """Set up to write digested CLDR data as QLocale XML.
+ """Save the full set of locale data to a QLocaleXML file.
- Arguments are both optional.
+ The output saved by this should conform to qlocalexml.rnc's
+ schema."""
+ def __init__(self, cldrVersion, save = None, space = Spacer('\t')):
+ """Set up to write digested CLDR data as QLocale XML.
- First argument, save, is None (its default) or a callable that
- will write content to where you intend to save it. If None, it
- is replaced with a callable that prints the given content,
- suppressing the newline (but see the following); this is
- equivalent to passing sys.stdout.write.
-
- Second argument, space, is an object to call on each text
- output to prepend indentation and append newlines, or not as
- the case may be. The default is a Spacer(4), which grows
- indent by four spaces after each unmatched new tag and shrinks
- back on a close-tag (its parsing is naive, but adequate to how
- this class uses it), while adding a newline to each line.
- """
+ First argument is the version of CLDR whose data we'll be
+ writing. Other arguments are optional.
+
+ Second argument, save, is None (its default) or a callable that will
+ write content to where you intend to save it. If None, it is replaced
+ with a callable that prints the given content, suppressing the newline
+ (but see the following); this is equivalent to passing
+ sys.stdout.write.
+
+ Third argument, space, is an object to call on each text output to
+ prepend indentation and append newlines, or not as the case may be. The
+ default is a Spacer('\t'), which grows indent by a tab after each
+ unmatched new tag and shrinks back on a close-tag (its parsing is
+ naive, but adequate to how this class uses it), while adding a newline
+ to each line."""
self.__rawOutput = self.__printit if save is None else save
self.__wrap = space
- self.__write('<localeDatabase>')
+ self.__write('<?xml version="1.0" encoding="UTF-8" ?>'
+ # A hint to emacs to make display nicer:
+ '<!--*- tab-width: 4 -*-->')
+ self.__openTag('localeDatabase', versionCldr = cldrVersion,
+ versionQt = qtVersion)
# Output of various sections, in their usual order:
def enumData(self, code2name):
@@ -325,10 +355,48 @@ class QLocaleXmlWriter (object):
self.__closeTag('likelySubtag')
self.__closeTag('likelySubtags')
- def locales(self, locales, calendars):
+ def zoneData(self, alias, defaults, windowsIds):
+ self.__openTag('zoneAliases')
+ # iana is a single IANA ID
+ # name has the same form, but has been made redundant
+ for name, iana in sorted(alias.items()):
+ if name == iana:
+ continue
+ self.__openTag('zoneAlias')
+ self.inTag('alias', name)
+ self.inTag('iana', iana)
+ self.__closeTag('zoneAlias')
+ self.__closeTag('zoneAliases')
+
+ self.__openTag('windowsZone')
+ for (msid, code), ids in windowsIds.items():
+ # ianaids is a space-joined sequence of IANA IDs
+ self.__openTag('msLandZones')
+ self.inTag('msid', msid)
+ self.inTag('territorycode', code)
+ self.inTag('ianaids', ids)
+ self.__closeTag('msLandZones')
+
+ for winid, iana in defaults.items():
+ self.__openTag('msZoneIana')
+ self.inTag('msid', winid)
+ self.inTag('iana', iana)
+ self.__closeTag('msZoneIana')
+ self.__closeTag('windowsZone')
+
+ def locales(self, locales, calendars, en_US):
+ """Write the data for each locale.
+
+ First argument, locales, is the mapping whose values are the
+ Locale objects, with each key being the matching tuple of
+ numeric IDs for language, script, territory and variant.
+ Second argument is a tuple of calendar names. Third is the
+ tuple of numeric IDs that corresponds to en_US (needed to
+ provide fallbacks for the C locale)."""
+
self.__openTag('localeList')
self.__openTag('locale')
- self.__writeLocale(Locale.C(calendars), calendars)
+ self.__writeLocale(Locale.C(locales[en_US]), calendars)
self.__closeTag('locale')
for key in sorted(locales.keys()):
self.__openTag('locale')
@@ -336,16 +404,13 @@ class QLocaleXmlWriter (object):
self.__closeTag('locale')
self.__closeTag('localeList')
- def version(self, cldrVersion):
- self.inTag('version', cldrVersion)
-
def inTag(self, tag, text):
self.__write(f'<{tag}>{text}</{tag}>')
def close(self, grumble):
"""Finish writing and grumble about any issues discovered."""
if self.__rawOutput != self.__complain:
- self.__write('</localeDatabase>')
+ self.__closeTag('localeDatabase')
self.__rawOutput = self.__complain
if self.__languages or self.__scripts or self.__territories:
@@ -398,7 +463,10 @@ class QLocaleXmlWriter (object):
self.__scripts.discard(locale.script_code)
self.__territories.discard(locale.territory_code)
- def __openTag(self, tag):
+ def __openTag(self, tag, **attrs):
+ if attrs:
+ text = ' '.join(f'{k}="{v}"' for k, v in attrs.items())
+ tag = f'{tag} {text}'
self.__write(f'<{tag}>')
def __closeTag(self, tag):
self.__write(f'</{tag}>')
@@ -445,7 +513,8 @@ class Locale (object):
"longTimeFormat", "shortTimeFormat",
'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
"currencyIsoCode", "currencySymbol", "currencyDisplayName",
- "currencyFormat", "currencyNegativeFormat")
+ "currencyFormat", "currencyNegativeFormat",
+ )
# Day-of-Week numbering used by Qt:
__qDoW = {"mon": 1, "tue": 2, "wed": 3, "thu": 4, "fri": 5, "sat": 6, "sun": 7}
@@ -454,12 +523,15 @@ class Locale (object):
def fromXmlData(cls, lookup, calendars=('gregorian',)):
"""Constructor from the contents of XML elements.
- Single parameter, lookup, is called with the names of XML
- elements that should contain the relevant data, within a CLDR
- locale element (within a localeList element); these names are
- used for the attributes of the object constructed. Attribute
- values are obtained by suitably digesting the returned element
- texts.\n"""
+ First parameter, lookup, is called with the names of XML elements that
+ should contain the relevant data, within a QLocaleXML locale element
+ (within a localeList element); these names mostly match the attributes
+ of the object constructed. Its return must be the full text of the
+ first child DOM node element with the given name. Attribute values are
+ obtained by suitably digesting the returned element texts.
+
+ Optional second parameter, calendars, is a sequence of calendars for
+ which data is to be retrieved."""
data = {}
for k in cls.__asint:
data[k] = int(lookup(k))
@@ -510,7 +582,7 @@ class Locale (object):
'longDateFormat', 'shortDateFormat',
'longTimeFormat', 'shortTimeFormat',
'currencyIsoCode', 'currencySymbol', 'currencyDisplayName',
- 'currencyFormat', 'currencyNegativeFormat'
+ 'currencyFormat', 'currencyNegativeFormat',
) + tuple(self.propsMonthDay('days')) + tuple(
'_'.join((k, cal))
for k in self.propsMonthDay('months')
@@ -521,97 +593,49 @@ class Locale (object):
for key in ('currencyDigits', 'currencyRounding'):
write(key, get(key))
- # Tools used by __monthNames:
- def fullName(i, name): return name
- def firstThree(i, name): return name[:3]
- def initial(i, name): return name[:1]
- def number(i, name): return str(i + 1)
- def islamicShort(i, name):
- if not name: return name
- if name == 'Shawwal': return 'Shaw.'
- words = name.split()
- if words[0].startswith('Dhu'):
- words[0] = words[0][:7] + '.'
- elif len(words[0]) > 3:
- words[0] = words[0][:3] + '.'
- return ' '.join(words)
- @staticmethod
- def __monthNames(calendars,
- known={ # Map calendar to (names, extractors...):
- # TODO: do we even need these ? CLDR's root.xml seems to
- # have them, complete with yeartype="leap" handling for
- # Hebrew's extra.
- 'gregorian': (('January', 'February', 'March', 'April', 'May', 'June', 'July',
- 'August', 'September', 'October', 'November', 'December'),
- # Extractor pairs, (plain, standalone)
- (fullName, fullName), # long
- (firstThree, firstThree), # short
- (number, initial)), # narrow
- 'persian': (('Farvardin', 'Ordibehesht', 'Khordad', 'Tir', 'Mordad',
- 'Shahrivar', 'Mehr', 'Aban', 'Azar', 'Dey', 'Bahman', 'Esfand'),
- (fullName, fullName),
- (firstThree, firstThree),
- (number, initial)),
- 'islamic': (('Muharram', 'Safar', 'Rabiʻ I', 'Rabiʻ II', 'Jumada I',
- 'Jumada II', 'Rajab', 'Shaʻban', 'Ramadan', 'Shawwal',
- 'Dhuʻl-Qiʻdah', 'Dhuʻl-Hijjah'),
- (fullName, fullName),
- (islamicShort, islamicShort),
- (number, number)),
- 'hebrew': (('Tishri', 'Heshvan', 'Kislev', 'Tevet', 'Shevat', 'Adar I',
- 'Adar', 'Nisan', 'Iyar', 'Sivan', 'Tamuz', 'Av'),
- (fullName, fullName),
- (fullName, fullName),
- (number, number)),
- },
- sizes=('long', 'short', 'narrow')):
- for cal in calendars:
- try:
- data = known[cal]
- except KeyError as e: # Need to add an entry to known, above.
- e.args += ('Unsupported calendar:', cal)
- raise
- names, get = data[0], data[1:]
- for n, size in enumerate(sizes):
- yield ('_'.join((camelCase((size, 'months')), cal)),
- ';'.join(get[n][0](i, x) for i, x in enumerate(names)))
- yield ('_'.join((camelCase(('standalone', size, 'months')), cal)),
- ';'.join(get[n][1](i, x) for i, x in enumerate(names)))
- del fullName, firstThree, initial, number, islamicShort
-
@classmethod
- def C(cls, calendars=('gregorian',),
- days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday',
- 'Thursday', 'Friday', 'Saturday'),
- quantifiers=('k', 'M', 'G', 'T', 'P', 'E')):
- """Returns an object representing the C locale."""
- return cls(cls.__monthNames(calendars),
- language='C', language_code='0', languageEndonym='',
- script='AnyScript', script_code='0',
- territory='AnyTerritory', territory_code='0', territoryEndonym='',
- groupSizes=(3, 3, 1),
- decimal='.', group=',', list=';', percent='%',
- zero='0', minus='-', plus='+', exp='e',
+ def C(cls, en_US):
+ """Returns an object representing the C locale.
+
+ Required argument, en_US, is the corresponding object for the
+ en_US locale (or the en_US_POSIX one if we ever support
+ variants). The C locale inherits from this, overriding what it
+ may need to."""
+ base = en_US.__dict__.copy()
+ # Soroush's original contribution shortened Jalali month names
+ # - contrary to CLDR, which doesn't abbreviate these in
+ # root.xml or en.xml, although some locales do, e.g. fr_CA.
+ # For compatibility with that,
+ for k in ('shortMonths_persian', 'standaloneShortMonths_persian'):
+ base[k] = ';'.join(x[:3] for x in base[k].split(';'))
+
+ return cls(base,
+ language='C', language_code='',
+ language_id=0, languageEndonym='',
+ script='AnyScript', script_code='', script_id=0,
+ territory='AnyTerritory', territory_code='',
+ territory_id=0, territoryEndonym='',
+ variant='', variant_code='', variant_id=0,
+ # CLDR has non-ASCII versions of these:
quotationStart='"', quotationEnd='"',
- alternateQuotationStart='\'', alternateQuotationEnd='\'',
- listPatternPartStart='%1, %2',
- listPatternPartMiddle='%1, %2',
- listPatternPartEnd='%1, %2',
- listPatternPartTwo='%1, %2',
- byte_unit='bytes',
- byte_si_quantified=';'.join(q + 'B' for q in quantifiers),
- byte_iec_quantified=';'.join(q.upper() + 'iB' for q in quantifiers),
- am='AM', pm='PM', firstDayOfWeek='mon',
- weekendStart='sat', weekendEnd='sun',
+ alternateQuotationStart="'", alternateQuotationEnd="'",
+ # CLDR gives 'dddd, MMMM d, yyyy', 'M/d/yy', 'h:mm:ss Ap tttt',
+ # 'h:mm Ap' with non-breaking space before Ap.
longDateFormat='dddd, d MMMM yyyy', shortDateFormat='d MMM yyyy',
longTimeFormat='HH:mm:ss t', shortTimeFormat='HH:mm:ss',
- longDays=';'.join(days),
- shortDays=';'.join(d[:3] for d in days),
- narrowDays='7;1;2;3;4;5;6',
- standaloneLongDays=';'.join(days),
- standaloneShortDays=';'.join(d[:3] for d in days),
- standaloneNarrowDays=';'.join(d[:1] for d in days),
- currencyIsoCode='', currencySymbol='',
- currencyDisplayName='',
+ # CLDR has US-$ and US-style formats:
+ currencyIsoCode='', currencySymbol='', currencyDisplayName='',
currencyDigits=2, currencyRounding=1,
- currencyFormat='%1%2', currencyNegativeFormat='')
+ currencyFormat='%1%2', currencyNegativeFormat='',
+ # We may want to fall back to CLDR for some of these:
+ firstDayOfWeek='mon', # CLDR has 'sun'
+ exp='e', # CLDR has 'E'
+ listPatternPartEnd='%1, %2', # CLDR has '%1, and %2'
+ listPatternPartTwo='%1, %2', # CLDR has '%1 and %2'
+ narrowDays='7;1;2;3;4;5;6', # CLDR has letters
+ narrowMonths_gregorian='1;2;3;4;5;6;7;8;9;10;11;12', # CLDR has letters
+ standaloneNarrowMonths_persian='F;O;K;T;M;S;M;A;A;D;B;E', # CLDR has digits
+ # Keep these explicit, despite matching CLDR:
+ decimal='.', group=',', percent='%',
+ zero='0', minus='-', plus='+',
+ am='AM', pm='PM', weekendStart='sat', weekendEnd='sun')
diff --git a/util/locale_database/qlocalexml.rnc b/util/locale_database/qlocalexml.rnc
index 818aa8f9c3..0a2aa28f6b 100644
--- a/util/locale_database/qlocalexml.rnc
+++ b/util/locale_database/qlocalexml.rnc
@@ -11,11 +11,14 @@
# package manager lacks the jing package.
start = element localeDatabase {
- element version { text },
+ attribute versionCldr { text },
+ attribute versionQt { text },
element languageList { Language+ },
element scriptList { Script+ },
element territoryList { Territory+ },
element likelySubtags { LikelySubtag+ },
+ element zoneAliases { ZoneAlias+ },
+ element windowsZone { MsLandZones+, MsZoneIana+ },
element localeList { Locale+ }
}
@@ -39,6 +42,23 @@ LocaleTriplet = (
element territory { text }
)
+# TODO: xsd patterns for IANA IDs and space-joined lists of them
+ZoneAlias = element zoneAlias {
+ element alias { text },
+ element iana { text }
+}
+
+MsLandZones = element msLandZones {
+ element msid { text },
+ element territorycode { text },
+ element ianaids { text }
+}
+
+MsZoneIana = element msZoneIana {
+ element msid { text },
+ element iana { text }
+}
+
WeekDay = ("sun" | "mon" | "tue" | "wed" | "thu" | "fri" | "sat")
Digit = xsd:string { pattern = "\d" }
Punctuation = xsd:string { pattern = "\p{P}" }
diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py
index b20e4fd155..dd01589672 100755
--- a/util/locale_database/qlocalexml2cpp.py
+++ b/util/locale_database/qlocalexml2cpp.py
@@ -22,6 +22,23 @@ from typing import Optional
from qlocalexml import QLocaleXmlReader
from localetools import *
from iso639_3 import LanguageCodeData
+from zonedata import utcIdList, windowsIdList
+
+
+# Sanity check the zone data:
+
+# Offsets of the windows tables, in minutes, where whole numbers:
+winOff = set(m for m, s in (divmod(v, 60) for k, v in windowsIdList) if s == 0)
+# The UTC±HH:mm forms of the non-zero offsets:
+winUtc = set(f'UTC-{h:02}:{m:02}'
+ for h, m in (divmod(-o, 60) for o in winOff if o < 0)
+ ).union(f'UTC+{h:02}:{m:02}'
+ for h, m in (divmod(o, 60) for o in winOff if o > 0))
+# All such offsets should be represented by entries in utcIdList:
+newUtc = winUtc.difference(utcIdList)
+assert not newUtc, (
+ 'Please add missing UTC-offset zones to to zonedata.utcIdList', newUtc)
+
class LocaleKeySorter:
"""Sort-ordering representation of a locale key.
@@ -47,39 +64,64 @@ class LocaleKeySorter:
# TODO: should we compare territory before or after script ?
return (key[0], self.foreign(key)) + key[1:]
-class StringDataToken:
- def __init__(self, index, length, bits):
+class ByteArrayData:
+ # Only for use with ASCII data, e.g. IANA IDs.
+ def __init__(self):
+ self.data, self.hash = [], {}
+
+ def append(self, s):
+ assert s.isascii(), s
+ s += '\0'
+ if s in self.hash:
+ return self.hash[s]
+
+ index = len(self.data)
if index > 0xffff:
- raise ValueError(f'Start-index ({index}) exceeds the uint16 range!')
- if length >= (1 << bits):
- raise ValueError(f'Data size ({length}) exceeds the {bits}-bit range!')
+ raise Error(f'Index ({index}) outside the uint16 range !')
+ self.hash[s] = index
+ self.data += unicode2hex(s)
+ return index
+
+ def write(self, out, name):
+ out(f'\nstatic constexpr char {name}[] = {{\n')
+ out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ')
+ # All data is ASCII, so only two-digit hex is ever needed.
+ out('\n};\n')
+
+class StringDataToken:
+ def __init__(self, index, length, lenbits, indbits):
+ if index >= (1 << indbits):
+ raise ValueError(f'Start-index ({index}) exceeds the {indbits}-bit range!')
+ if length >= (1 << lenbits):
+ raise ValueError(f'Data size ({length}) exceeds the {lenbits}-bit range!')
self.index = index
self.length = length
class StringData:
- def __init__(self, name):
+ def __init__(self, name, lenbits = 8, indbits = 16):
self.data = []
self.hash = {}
self.name = name
self.text = '' # Used in quick-search for matches in data
+ self.__bits = lenbits, indbits
- def append(self, s, bits = 8):
+ def append(self, s):
try:
token = self.hash[s]
except KeyError:
- token = self.__store(s, bits)
+ token = self.__store(s)
self.hash[s] = token
return token
- def __store(self, s, bits):
+ def __store(self, s):
"""Add string s to known data.
Seeks to avoid duplication, where possible.
For example, short-forms may be prefixes of long-forms.
"""
if not s:
- return StringDataToken(0, 0, bits)
+ return StringDataToken(0, 0, *self.__bits)
ucs2 = unicode2hex(s)
try:
index = self.text.index(s) - 1
@@ -97,14 +139,16 @@ class StringData:
assert index >= 0
try:
- return StringDataToken(index, len(ucs2), bits)
+ return StringDataToken(index, len(ucs2), *self.__bits)
except ValueError as e:
e.args += (self.name, s)
raise
def write(self, fd):
- if len(self.data) > 0xffff:
- raise ValueError(f'Data is too big ({len(self.data)}) for quint16 index to its end!',
+ indbits = self.__bits[1]
+ if len(self.data) >= (1 << indbits):
+ raise ValueError(f'Data is too big ({len(self.data)}) '
+ f'for {indbits}-bit index to its end!',
self.name)
fd.write(f"\nstatic constexpr char16_t {self.name}[] = {{\n")
fd.write(wrap_list(self.data, 12)) # 12 == 100 // len('0xhhhh, ')
@@ -136,6 +180,92 @@ class LocaleSourceEditor (SourceFileEditor):
""")
+class TimeZoneDataWriter (LocaleSourceEditor):
+ def __init__(self, path: Path, temp: Path, version: str):
+ super().__init__(path, temp, version)
+ self.__ianaTable = ByteArrayData() # Single IANA IDs
+ self.__ianaListTable = ByteArrayData() # Space-joined lists of IDs
+ self.__windowsTable = ByteArrayData() # Windows names for zones
+ self.__windowsList = sorted(windowsIdList,
+ key=lambda p: p[0].lower())
+ self.windowsKey = {name: (key, off) for key, (name, off)
+ in enumerate(self.__windowsList, 1)}
+
+ def utcTable(self):
+ offsetMap, out = {}, self.writer.write
+ for name in utcIdList:
+ offset = self.__offsetOf(name)
+ offsetMap[offset] = offsetMap.get(offset, ()) + (name,)
+
+ # Write UTC ID key table
+ out('// IANA ID Index, UTC Offset\n')
+ out('static constexpr UtcData utcDataTable[] = {\n')
+ for offset in sorted(offsetMap.keys()): # Sort so C++ can binary-chop.
+ names = offsetMap[offset];
+ joined = self.__ianaListTable.append(' '.join(names))
+ out(f' {{ {joined:6d},{offset:6d} }}, // {names[0]}\n')
+ out('};\n')
+
+ def aliasToIana(self, pairs):
+ out, store = self.writer.write, self.__ianaTable.append
+
+ out('// Alias ID Index, Alias ID Index\n')
+ out('static constexpr AliasData aliasMappingTable[] = {\n')
+ for name, iana in pairs: # They're ready-sorted
+ assert name != iana, (alias, iana) # Filtered out in QLocaleXmlWriter
+ out(f' {{ {store(name):6d},{store(iana):6d} }},'
+ f' // {name} -> {iana}\n')
+ out('};\n\n')
+
+ def msToIana(self, pairs):
+ out, winStore = self.writer.write, self.__windowsTable.append
+ ianaStore = self.__ianaListTable.append # TODO: Should be __ianaTable
+ alias = dict(pairs) # {MS name: IANA ID}
+
+ out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n')
+ out('static constexpr WindowsData windowsDataTable[] = {\n')
+ # Sorted by Windows ID key:
+
+ for index, (name, offset) in enumerate(self.__windowsList, 1):
+ out(f' {{ {index:6d},{winStore(name):6d},'
+ f'{ianaStore(alias[name]):6d},{offset:6d} }}, // {name}\n')
+ out('};\n\n')
+
+ def msLandIanas(self, triples): # (MS name, territory code, IANA list)
+ out, store = self.writer.write, self.__ianaListTable.append
+ from enumdata import territory_map
+ landKey = {code: (i, name) for i, (name, code) in territory_map.items()}
+ seq = sorted((self.windowsKey[name][0], landKey[land][0], name, landKey[land][1], ianas)
+ for name, land, ianas in triples)
+
+ out('// Windows ID Key, Territory Enum, IANA ID Index\n')
+ out('static constexpr ZoneData zoneDataTable[] = {\n')
+ # Sorted by (Windows ID Key, territory enum)
+ for winId, landId, name, land, ianas in seq:
+ out(f' {{ {winId:6d},{landId:6d},{store(ianas):6d} }},'
+ f' // {name} / {land}\n')
+ out('};\n\n')
+
+ def writeTables(self):
+ self.__windowsTable.write(self.writer.write, 'windowsIdData')
+ # TODO: these are misnamed, entries in the first are lists,
+ # those in the next are single IANA IDs
+ self.__ianaListTable.write(self.writer.write, 'ianaIdData')
+ self.__ianaTable.write(self.writer.write, 'aliasIdData')
+
+ # Implementation details:
+ @staticmethod
+ def __offsetOf(utcName):
+ "Maps a UTC±HH:mm name to its offset in seconds"
+ assert utcName.startswith('UTC')
+ if len(utcName) == 3:
+ return 0
+ assert utcName[3] in '+-', utcName
+ sign = -1 if utcName[3] == '-' else 1
+ assert len(utcName) == 9 and utcName[6] == ':', utcName
+ hour, mins = int(utcName[4:6]), int(utcName[-2:])
+ return sign * (hour * 60 + mins) * 60
+
class LocaleDataWriter (LocaleSourceEditor):
def likelySubtags(self, likely):
# First sort likely, so that we can use binary search in C++
@@ -414,7 +544,7 @@ class CalendarDataWriter (LocaleSourceEditor):
+ ','.join(('{:6d}',) * 3 + ('{:5d}',) * 6 + ('{:3d}',) * 6)
+ ' }},').format
def write(self, calendar, locales, names):
- months_data = StringData('months_data')
+ months_data = StringData('months_data', 16)
self.writer.write('static constexpr QCalendarLocale locale_data[] = {\n')
self.writer.write(
@@ -438,11 +568,10 @@ class CalendarDataWriter (LocaleSourceEditor):
# Sequence of StringDataToken:
try:
# Twelve long month names can add up to more than 256 (e.g. kde_TZ: 264)
- ranges = (tuple(months_data.append(m[calendar], 16) for m in
- (locale.standaloneLongMonths, locale.longMonths)) +
- tuple(months_data.append(m[calendar]) for m in
- (locale.standaloneShortMonths, locale.shortMonths,
- locale.standaloneNarrowMonths, locale.narrowMonths)))
+ ranges = tuple(months_data.append(m[calendar]) for m in
+ (locale.standaloneLongMonths, locale.longMonths,
+ locale.standaloneShortMonths, locale.shortMonths,
+ locale.standaloneNarrowMonths, locale.narrowMonths))
except ValueError as e:
e.args += (locale.language, locale.script, locale.territory)
raise
@@ -555,6 +684,10 @@ def main(argv, out, err):
parser.add_argument('--calendars', help='select calendars to emit data for',
nargs='+', metavar='CALENDAR',
choices=all_calendars, default=all_calendars)
+ parser.add_argument('-v', '--verbose', help='more verbose output',
+ action='count', default=0)
+ parser.add_argument('-q', '--quiet', help='less output',
+ dest='verbose', action='store_const', const=-1)
args = parser.parse_args(argv[1:])
qlocalexml = args.input_file
@@ -588,6 +721,8 @@ def main(argv, out, err):
writer.territoryCodes(reader.territories)
except Exception as e:
err.write(f'\nError updating locale data: {e}\n')
+ if args.verbose > 0:
+ raise
return 1
# Generate calendar data
@@ -599,6 +734,9 @@ def main(argv, out, err):
writer.write(calendar, locale_map, locale_keys)
except Exception as e:
err.write(f'\nError updating {calendar} locale data: {e}\n')
+ if args.verbose > 0:
+ raise
+ return 1
# qlocale.h
try:
@@ -609,6 +747,9 @@ def main(argv, out, err):
writer.territories(reader.territories)
except Exception as e:
err.write(f'\nError updating qlocale.h: {e}\n')
+ if args.verbose > 0:
+ raise
+ return 1
# qlocale.qdoc
try:
@@ -621,6 +762,24 @@ def main(argv, out, err):
qdoc.writer.write(line)
except Exception as e:
err.write(f'\nError updating qlocale.h: {e}\n')
+ if args.verbose > 0:
+ raise
+ return 1
+
+ # Locale-independent timezone data
+ try:
+ with TimeZoneDataWriter(qtsrcdir.joinpath(
+ 'src/corelib/time/qtimezoneprivate_data_p.h'),
+ qtsrcdir, reader.cldrVersion) as writer:
+ writer.aliasToIana(reader.aliasToIana())
+ writer.msLandIanas(reader.msLandIanas())
+ writer.msToIana(reader.msToIana())
+ writer.utcTable()
+ writer.writeTables()
+ except Exception as e:
+ err.write(f'\nError updating qtimezoneprivate_data_p.h: {e}\n')
+ if args.verbose > 0:
+ raise
return 1
# ./testlocales/localemodel.cpp
@@ -631,6 +790,9 @@ def main(argv, out, err):
test.localeList(locale_keys)
except Exception as e:
err.write(f'\nError updating localemodel.cpp: {e}\n')
+ if args.verbose > 0:
+ raise
+ return 1
return 0
diff --git a/util/locale_database/zonedata.py b/util/locale_database/zonedata.py
index bf32038801..b73290f330 100644
--- a/util/locale_database/zonedata.py
+++ b/util/locale_database/zonedata.py
@@ -17,17 +17,22 @@ here, research the relevant zone's offset and add a new entry to the
list of twoples, preserving the ordering. Internet search engines and
timeanddate.com can help with researching the offset. Note that some
UTC offset zones (giving only the hour) are present in windowsIdList.
+When adding an entry to windowsIdList, check whether its offset
+corresponds to that of some entry in utcIdList; if not, add such an
+entry.
-The utcIdList is again a list of tuples (name, offset), associating
-various UTC-offset names with their offsets in seconds. Aside from
+The utcIdList is a simple list of various UTC-offset names. Aside from
'UTC' itself, shared with windowsIdList, these include minutes in
-their offsets even when they are whole hour offsets. It is not clear
-where this particular list of offsets came from, but entries should
-not be removed as they make up the available zones of the UTC
-back-end. (That recognizes other offset zones, and its is-available
-check will accept them, but it leaves them out of its list. There are,
-after all, thousands of possible offset zones, but relatively few are
-widely used.)
+their offsets even when they are whole hour offsets. The list contains
+the UTC-equivalents of all offsets seen in the windowsIdList, plus the
+whole hours out to ±14 hours, the two verbose forms of UTC±00:00 and
+any legacy entries from past Windows zone offsets. Entries should not
+be removed, even if the relevant Windows ID becomes obsolete or
+switches to a different offset, as they make up the available zones of
+the UTC back-end. (That recognizes other offset zones, and its
+is-available check will accept them, but it leaves them out of its
+list. There are, after all, thousands of possible offset zones, but
+relatively few are widely used.)
Note: -00:00 (without the UTC prefix) was introduced in RFC3339 as a
way to indicate that a date-time has been converted to UTC but its use
@@ -44,48 +49,37 @@ backwards compatibility.
# Do not remove IDs, as each entry is part of the API/behavior guarantee.
# IDs for the same offset shall be space-joined; list the preferred ID first.
-# ( UTC Id, Offset Seconds )
utcIdList = (
- ('UTC-14:00', -50400),
- ('UTC-13:00', -46800),
- ('UTC-12:00', -43200),
- ('UTC-11:00', -39600),
- ('UTC-10:00', -36000),
- ('UTC-09:00', -32400),
- ('UTC-08:00', -28800),
- ('UTC-07:00', -25200),
- ('UTC-06:00', -21600),
- ('UTC-05:00', -18000),
- ('UTC-04:30', -16200),
- ('UTC-04:00', -14400),
- ('UTC-03:30', -12600),
- ('UTC-03:00', -10800),
- ('UTC-02:00', -7200),
- ('UTC-01:00', -3600),
- ('UTC', 0), # Goes first (among zero-offset) to be default
- ('UTC+00:00', 0),
- ('UTC-00:00', 0), # Should recognize, but avoid using (see Note above).
- ('UTC+01:00', 3600),
- ('UTC+02:00', 7200),
- ('UTC+03:00', 10800),
- ('UTC+03:30', 12600),
- ('UTC+04:00', 14400),
- ('UTC+04:30', 16200),
- ('UTC+05:00', 18000),
- ('UTC+05:30', 19800),
- ('UTC+05:45', 20700),
- ('UTC+06:00', 21600),
- ('UTC+06:30', 23400),
- ('UTC+07:00', 25200),
- ('UTC+08:00', 28800),
- ('UTC+08:30', 30600),
- ('UTC+09:00', 32400),
- ('UTC+09:30', 34200),
- ('UTC+10:00', 36000),
- ('UTC+11:00', 39600),
- ('UTC+12:00', 43200),
- ('UTC+13:00', 46800),
- ('UTC+14:00', 50400),
+ 'UTC-14:00',
+ 'UTC-13:00',
+ 'UTC-12:00',
+ 'UTC-11:00',
+ 'UTC-10:00', 'UTC-09:30',
+ 'UTC-09:00',
+ 'UTC-08:00',
+ 'UTC-07:00',
+ 'UTC-06:00',
+ 'UTC-05:00', 'UTC-04:30',
+ 'UTC-04:00', 'UTC-03:30',
+ 'UTC-03:00',
+ 'UTC-02:00',
+ 'UTC-01:00',
+ # UTC Goes first (among zero-offset) to be default:
+ 'UTC', 'UTC+00:00', 'UTC-00:00',
+ 'UTC+01:00',
+ 'UTC+02:00',
+ 'UTC+03:00', 'UTC+03:30',
+ 'UTC+04:00', 'UTC+04:30',
+ 'UTC+05:00', 'UTC+05:30', 'UTC+05:45',
+ 'UTC+06:00', 'UTC+06:30',
+ 'UTC+07:00',
+ 'UTC+08:00', 'UTC+08:30', 'UTC+08:45',
+ 'UTC+09:00', 'UTC+09:30',
+ 'UTC+10:00', 'UTC+10:30',
+ 'UTC+11:00',
+ 'UTC+12:00', 'UTC+12:45',
+ 'UTC+13:00',
+ 'UTC+14:00',
)
# ( Windows Id, Offset Seconds )
diff --git a/util/update_public_suffix_list.sh b/util/update_public_suffix_list.sh
index d284d03df3..1e9793a1a8 100755
--- a/util/update_public_suffix_list.sh
+++ b/util/update_public_suffix_list.sh
@@ -2,7 +2,7 @@
# Copyright (C) 2023 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
-PICK_TO_BRANCHES="6.7 6.6 6.5 6.2 5.15"
+PICK_TO_BRANCHES="6.8 6.7 6.5 6.2 5.15"
#UPSTREAM=github.com:publicsuffix/list.git # use this if you have a github account
UPSTREAM=https://github.com/publicsuffix/list.git # and this if you don't