diff options
Diffstat (limited to 'util/locale_database/cldr2qtimezone.py')
-rwxr-xr-x | util/locale_database/cldr2qtimezone.py | 106 |
1 files changed, 55 insertions, 51 deletions
diff --git a/util/locale_database/cldr2qtimezone.py b/util/locale_database/cldr2qtimezone.py index d0d48df6fe..27987d5a58 100755 --- a/util/locale_database/cldr2qtimezone.py +++ b/util/locale_database/cldr2qtimezone.py @@ -1,39 +1,15 @@ #!/usr/bin/env python3 -############################################################################# -## -## Copyright (C) 2021 The Qt Company Ltd. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the test suite of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:GPL-EXCEPT$ -## Commercial License Usage -## Licensees holding valid commercial Qt licenses may use this file in -## accordance with the commercial license agreement provided with the -## Software or, alternatively, in accordance with the terms contained in -## a written agreement between you and The Qt Company. For licensing terms -## and conditions see https://www.qt.io/terms-conditions. For further -## information use the contact form at https://www.qt.io/contact-us. -## -## GNU General Public License Usage -## Alternatively, this file may be used under the terms of the GNU -## General Public License version 3 as published by the Free Software -## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -## included in the packaging of this file. Please review the following -## information to ensure the GNU General Public License requirements will -## be met: https://www.gnu.org/licenses/gpl-3.0.html. -## -## $QT_END_LICENSE$ -## -############################################################################# +# Copyright (C) 2021 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 """Parse CLDR data for QTimeZone use with MS-Windows Script to parse the CLDR common/supplemental/windowsZones.xml file and -encode for use in QTimeZone. See ``./cldr2qlocalexml.py`` for where -to get the CLDR data. Pass its root directory as first parameter to -this script and the qtbase root directory as second parameter. It -shall update qtbase's src/corelib/time/qtimezoneprivate_data_p.h ready -for use. +prepare its data for use in QTimeZone. See ``./cldr2qlocalexml.py`` for +where to get the CLDR data. Pass its root directory as first parameter +to this script. You can optionally pass the qtbase root directory as +second parameter; it defaults to the root of the checkout containing +this script. This script updates qtbase's +src/corelib/time/qtimezoneprivate_data_p.h with the new data. """ import datetime @@ -41,7 +17,7 @@ from pathlib import Path import textwrap import argparse -from localetools import unicode2hex, wrap_list, Error, SourceFileEditor +from localetools import unicode2hex, wrap_list, Error, SourceFileEditor, qtbase_root from cldr import CldrAccess ### Data that may need updates in response to new entries in the CLDR file ### @@ -84,10 +60,10 @@ windowsIdList = ( ('Central Europe Standard Time', 3600), ('Central European Standard Time', 3600), ('Central Pacific Standard Time', 39600), - ('Central Standard Time (Mexico)', -21600), ('Central Standard Time', -21600), - ('China Standard Time', 28800), + ('Central Standard Time (Mexico)', -21600), ('Chatham Islands Standard Time', 45900), + ('China Standard Time', 28800), ('Cuba Standard Time', -18000), ('Dateline Standard Time', -43200), ('E. Africa Standard Time', 10800), @@ -124,8 +100,8 @@ windowsIdList = ( ('Middle East Standard Time', 7200), ('Montevideo Standard Time', -10800), ('Morocco Standard Time', 0), - ('Mountain Standard Time (Mexico)', -25200), ('Mountain Standard Time', -25200), + ('Mountain Standard Time (Mexico)', -25200), ('Myanmar Standard Time', 23400), ('N. Central Asia Standard Time', 21600), ('Namibia Standard Time', 3600), @@ -144,9 +120,9 @@ windowsIdList = ( ('Paraguay Standard Time', -14400), ('Qyzylorda Standard Time', 18000), # a.k.a. Kyzylorda, in Kazakhstan ('Romance Standard Time', 3600), - ('Russia Time Zone 3', 14400), ('Russia Time Zone 10', 39600), ('Russia Time Zone 11', 43200), + ('Russia Time Zone 3', 14400), ('Russian Standard Time', 10800), ('SA Eastern Standard Time', -10800), ('SA Pacific Standard Time', -18000), @@ -159,6 +135,7 @@ windowsIdList = ( ('SE Asia Standard Time', 25200), ('Singapore Standard Time', 28800), ('South Africa Standard Time', 7200), + ('South Sudan Standard Time', 7200), ('Sri Lanka Standard Time', 19800), ('Sudan Standard Time', 7200), # unless they mean South Sudan, +03:00 ('Syria Standard Time', 7200), @@ -174,13 +151,14 @@ windowsIdList = ( ('Ulaanbaatar Standard Time', 28800), ('US Eastern Standard Time', -18000), ('US Mountain Standard Time', -25200), - ('UTC-11', -39600), - ('UTC-09', -32400), - ('UTC-08', -28800), - ('UTC-02', -7200), ('UTC', 0), + # Lexical order: '+' < '-' ('UTC+12', 43200), ('UTC+13', 46800), + ('UTC-02', -7200), + ('UTC-08', -28800), + ('UTC-09', -32400), + ('UTC-11', -39600), ('Venezuela Standard Time', -16200), ('Vladivostok Standard Time', 36000), ('Volgograd Standard Time', 14400), @@ -197,9 +175,9 @@ windowsIdList = ( # List of standard UTC IDs to use. Not public so may be safely changed. # Do not remove IDs, as each entry is part of the API/behavior guarantee. +# IDs for the same offset shall be space-joined; list the preferred ID first. # ( UTC Id, Offset Seconds ) utcIdList = ( - ('UTC', 0), # Goes first so is default ('UTC-14:00', -50400), ('UTC-13:00', -46800), ('UTC-12:00', -43200), @@ -216,8 +194,9 @@ utcIdList = ( ('UTC-03:00', -10800), ('UTC-02:00', -7200), ('UTC-01:00', -3600), - ('UTC-00:00', 0), + ('UTC', 0), # Goes first (among zero-offset) to be default ('UTC+00:00', 0), + ('UTC-00:00', 0), # Should recognize, but avoid using (see Note below). ('UTC+01:00', 3600), ('UTC+02:00', 7200), ('UTC+03:00', 10800), @@ -243,6 +222,17 @@ utcIdList = ( ### End of data that may need updates in response to CLDR ### +# Note: -00:00 (without the UTC prefix) was introduced in RFC3339 as a +# way to indicate that a date-time has been converted to UTC but its +# use should not be understood to say anything about the local time of +# the origin of the message using it. However, ISO 8601 has, since +# 2000, forbidden this as an offset suffix. The more recent compromise +# is to use Z to convey the meaning RFC3339 gave to -00:00. So the use +# of -00:00 as offset suffix should be avoided (and, by extension, +# likewise for UTC-00:00 as a zone ID), but this suffix (and ID) +# should be recognized when consuming data generated by other sources, +# for backwards compatibility. + class ByteArrayData: def __init__(self): self.data = [] @@ -262,11 +252,13 @@ class ByteArrayData: return index def write(self, out, name): - out(f'\nstatic const char {name}[] = {{\n') - out(wrap_list(self.data)) + out(f'\nstatic constexpr char {name}[] = {{\n') + out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ') + # Will over-spill 100-col if some 4-digit hex show up, but none do (yet). out('\n};\n') class ZoneIdWriter (SourceFileEditor): + # All the output goes into namespace QtTimeZoneCldr. def write(self, version, defaults, windowsIds): self.__writeWarning(version) windows, iana = self.__writeTables(self.writer.write, defaults, windowsIds) @@ -293,7 +285,8 @@ class ZoneIdWriter (SourceFileEditor): # Write Windows/IANA table out('// Windows ID Key, Territory Enum, IANA ID Index\n') - out('static const QZoneData zoneDataTable[] = {\n') + out('static constexpr ZoneData zoneDataTable[] = {\n') + # Sorted by (Windows ID Key, territory enum) for index, data in sorted(windowsIds.items()): out(' {{ {:6d},{:6d},{:6d} }}, // {} / {}\n'.format( data['windowsKey'], data['territoryId'], @@ -303,7 +296,12 @@ class ZoneIdWriter (SourceFileEditor): # Write Windows ID key table out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n') - out('static const QWindowsData windowsDataTable[] = {\n') + out('static constexpr WindowsData windowsDataTable[] = {\n') + # Sorted by Windows ID key; sorting case-insensitively by + # Windows ID must give the same order. + winIdNames = [x.lower() for x, y in windowsIdList] + assert all(x == y for x, y in zip(winIdNames, sorted(winIdNames))), \ + [(x, y) for x, y in zip(winIdNames, sorted(winIdNames)) if x != y] for index, pair in enumerate(windowsIdList, 1): out(' {{ {:6d},{:6d},{:6d},{:6d} }}, // {}\n'.format( index, @@ -312,12 +310,16 @@ class ZoneIdWriter (SourceFileEditor): pair[1], pair[0])) out('};\n\n') + offsetMap = {} + for pair in utcIdList: + offsetMap[pair[1]] = offsetMap.get(pair[1], ()) + (pair[0],) # Write UTC ID key table out('// IANA ID Index, UTC Offset\n') - out('static const QUtcData utcDataTable[] = {\n') - for pair in utcIdList: + out('static constexpr UtcData utcDataTable[] = {\n') + for offset in sorted(offsetMap.keys()): # Sort so C++ can binary-chop. + names = offsetMap[offset]; out(' {{ {:6d},{:6d} }}, // {}\n'.format( - ianaIdData.append(pair[0]), pair[1], pair[0])) + ianaIdData.append(' '.join(names)), offset, names[0])) out('};\n') return windowsIdData, ianaIdData @@ -334,7 +336,9 @@ def main(out, err): parser = argparse.ArgumentParser( description="Update Qt's CLDR-derived timezone data.") parser.add_argument('cldr_path', help='path to the root of the CLDR tree') - parser.add_argument('qtbase_path', help='path to the root of the qtbase source tree') + parser.add_argument('qtbase_path', + help='path to the root of the qtbase source tree', + nargs='?', default=qtbase_root) args = parser.parse_args() |