#!/usr/bin/env python3 # Copyright (C) 2021 The Qt Company Ltd. # SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 """Parse CLDR data for QTimeZone use with MS-Windows Script to parse the CLDR common/supplemental/windowsZones.xml file and prepare its data for use in QTimeZone. See ``./cldr2qlocalexml.py`` for where to get the CLDR data. Pass its root directory as first parameter to this script. You can optionally pass the qtbase root directory as second parameter; it defaults to the root of the checkout containing this script. This script updates qtbase's src/corelib/time/qtimezoneprivate_data_p.h with the new data. """ import datetime from pathlib import Path import textwrap import argparse from localetools import unicode2hex, wrap_list, Error, SourceFileEditor, qtbase_root from cldr import CldrAccess ### Data that may need updates in response to new entries in the CLDR file ### # This script shall report the updates you need to make, if any arise. # However, you may need to research the relevant zone's standard offset. # List of currently known Windows IDs. # If this script reports missing IDs, please add them here. # Look up the offset using (google and) timeanddate.com. # Not public so may safely be changed. Please keep in alphabetic order by ID. # ( Windows Id, Offset Seconds ) windowsIdList = ( ('Afghanistan Standard Time', 16200), ('Alaskan Standard Time', -32400), ('Aleutian Standard Time', -36000), ('Altai Standard Time', 25200), ('Arab Standard Time', 10800), ('Arabian Standard Time', 14400), ('Arabic Standard Time', 10800), ('Argentina Standard Time', -10800), ('Astrakhan Standard Time', 14400), ('Atlantic Standard Time', -14400), ('AUS Central Standard Time', 34200), ('Aus Central W. Standard Time', 31500), ('AUS Eastern Standard Time', 36000), ('Azerbaijan Standard Time', 14400), ('Azores Standard Time', -3600), ('Bahia Standard Time', -10800), ('Bangladesh Standard Time', 21600), ('Belarus Standard Time', 10800), ('Bougainville Standard Time', 39600), ('Canada Central Standard Time', -21600), ('Cape Verde Standard Time', -3600), ('Caucasus Standard Time', 14400), ('Cen. Australia Standard Time', 34200), ('Central America Standard Time', -21600), ('Central Asia Standard Time', 21600), ('Central Brazilian Standard Time', -14400), ('Central Europe Standard Time', 3600), ('Central European Standard Time', 3600), ('Central Pacific Standard Time', 39600), ('Central Standard Time', -21600), ('Central Standard Time (Mexico)', -21600), ('Chatham Islands Standard Time', 45900), ('China Standard Time', 28800), ('Cuba Standard Time', -18000), ('Dateline Standard Time', -43200), ('E. Africa Standard Time', 10800), ('E. Australia Standard Time', 36000), ('E. Europe Standard Time', 7200), ('E. South America Standard Time', -10800), ('Easter Island Standard Time', -21600), ('Eastern Standard Time', -18000), ('Eastern Standard Time (Mexico)', -18000), ('Egypt Standard Time', 7200), ('Ekaterinburg Standard Time', 18000), ('Fiji Standard Time', 43200), ('FLE Standard Time', 7200), ('Georgian Standard Time', 14400), ('GMT Standard Time', 0), ('Greenland Standard Time', -10800), ('Greenwich Standard Time', 0), ('GTB Standard Time', 7200), ('Haiti Standard Time', -18000), ('Hawaiian Standard Time', -36000), ('India Standard Time', 19800), ('Iran Standard Time', 12600), ('Israel Standard Time', 7200), ('Jordan Standard Time', 7200), ('Kaliningrad Standard Time', 7200), ('Korea Standard Time', 32400), ('Libya Standard Time', 7200), ('Line Islands Standard Time', 50400), ('Lord Howe Standard Time', 37800), ('Magadan Standard Time', 36000), ('Magallanes Standard Time', -10800), # permanent DST ('Marquesas Standard Time', -34200), ('Mauritius Standard Time', 14400), ('Middle East Standard Time', 7200), ('Montevideo Standard Time', -10800), ('Morocco Standard Time', 0), ('Mountain Standard Time', -25200), ('Mountain Standard Time (Mexico)', -25200), ('Myanmar Standard Time', 23400), ('N. Central Asia Standard Time', 21600), ('Namibia Standard Time', 3600), ('Nepal Standard Time', 20700), ('New Zealand Standard Time', 43200), ('Newfoundland Standard Time', -12600), ('Norfolk Standard Time', 39600), ('North Asia East Standard Time', 28800), ('North Asia Standard Time', 25200), ('North Korea Standard Time', 30600), ('Omsk Standard Time', 21600), ('Pacific SA Standard Time', -10800), ('Pacific Standard Time', -28800), ('Pacific Standard Time (Mexico)', -28800), ('Pakistan Standard Time', 18000), ('Paraguay Standard Time', -14400), ('Qyzylorda Standard Time', 18000), # a.k.a. Kyzylorda, in Kazakhstan ('Romance Standard Time', 3600), ('Russia Time Zone 10', 39600), ('Russia Time Zone 11', 43200), ('Russia Time Zone 3', 14400), ('Russian Standard Time', 10800), ('SA Eastern Standard Time', -10800), ('SA Pacific Standard Time', -18000), ('SA Western Standard Time', -14400), ('Saint Pierre Standard Time', -10800), # New France ('Sakhalin Standard Time', 39600), ('Samoa Standard Time', 46800), ('Sao Tome Standard Time', 0), ('Saratov Standard Time', 14400), ('SE Asia Standard Time', 25200), ('Singapore Standard Time', 28800), ('South Africa Standard Time', 7200), ('South Sudan Standard Time', 7200), ('Sri Lanka Standard Time', 19800), ('Sudan Standard Time', 7200), # unless they mean South Sudan, +03:00 ('Syria Standard Time', 7200), ('Taipei Standard Time', 28800), ('Tasmania Standard Time', 36000), ('Tocantins Standard Time', -10800), ('Tokyo Standard Time', 32400), ('Tomsk Standard Time', 25200), ('Tonga Standard Time', 46800), ('Transbaikal Standard Time', 32400), # Yakutsk ('Turkey Standard Time', 7200), ('Turks And Caicos Standard Time', -14400), ('Ulaanbaatar Standard Time', 28800), ('US Eastern Standard Time', -18000), ('US Mountain Standard Time', -25200), ('UTC', 0), # Lexical order: '+' < '-' ('UTC+12', 43200), ('UTC+13', 46800), ('UTC-02', -7200), ('UTC-08', -28800), ('UTC-09', -32400), ('UTC-11', -39600), ('Venezuela Standard Time', -16200), ('Vladivostok Standard Time', 36000), ('Volgograd Standard Time', 14400), ('W. Australia Standard Time', 28800), ('W. Central Africa Standard Time', 3600), ('W. Europe Standard Time', 3600), ('W. Mongolia Standard Time', 25200), # Hovd ('West Asia Standard Time', 18000), ('West Bank Standard Time', 7200), ('West Pacific Standard Time', 36000), ('Yakutsk Standard Time', 32400), ('Yukon Standard Time', -25200), # Non-DST Mountain Standard Time since 2020-11-01 ) # List of standard UTC IDs to use. Not public so may be safely changed. # Do not remove IDs, as each entry is part of the API/behavior guarantee. # IDs for the same offset shall be space-joined; list the preferred ID first. # ( UTC Id, Offset Seconds ) utcIdList = ( ('UTC-14:00', -50400), ('UTC-13:00', -46800), ('UTC-12:00', -43200), ('UTC-11:00', -39600), ('UTC-10:00', -36000), ('UTC-09:00', -32400), ('UTC-08:00', -28800), ('UTC-07:00', -25200), ('UTC-06:00', -21600), ('UTC-05:00', -18000), ('UTC-04:30', -16200), ('UTC-04:00', -14400), ('UTC-03:30', -12600), ('UTC-03:00', -10800), ('UTC-02:00', -7200), ('UTC-01:00', -3600), ('UTC', 0), # Goes first (among zero-offset) to be default ('UTC+00:00', 0), ('UTC-00:00', 0), # Should recognize, but avoid using (see Note below). ('UTC+01:00', 3600), ('UTC+02:00', 7200), ('UTC+03:00', 10800), ('UTC+03:30', 12600), ('UTC+04:00', 14400), ('UTC+04:30', 16200), ('UTC+05:00', 18000), ('UTC+05:30', 19800), ('UTC+05:45', 20700), ('UTC+06:00', 21600), ('UTC+06:30', 23400), ('UTC+07:00', 25200), ('UTC+08:00', 28800), ('UTC+08:30', 30600), ('UTC+09:00', 32400), ('UTC+09:30', 34200), ('UTC+10:00', 36000), ('UTC+11:00', 39600), ('UTC+12:00', 43200), ('UTC+13:00', 46800), ('UTC+14:00', 50400), ) ### End of data that may need updates in response to CLDR ### # Note: -00:00 (without the UTC prefix) was introduced in RFC3339 as a # way to indicate that a date-time has been converted to UTC but its # use should not be understood to say anything about the local time of # the origin of the message using it. However, ISO 8601 has, since # 2000, forbidden this as an offset suffix. The more recent compromise # is to use Z to convey the meaning RFC3339 gave to -00:00. So the use # of -00:00 as offset suffix should be avoided (and, by extension, # likewise for UTC-00:00 as a zone ID), but this suffix (and ID) # should be recognized when consuming data generated by other sources, # for backwards compatibility. class ByteArrayData: def __init__(self): self.data = [] self.hash = {} def append(self, s): s = s + '\0' if s in self.hash: return self.hash[s] lst = unicode2hex(s) index = len(self.data) if index > 0xffff: raise Error(f'Index ({index}) outside the uint16 range !') self.hash[s] = index self.data += lst return index def write(self, out, name): out(f'\nstatic constexpr char {name}[] = {{\n') out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ') # Will over-spill 100-col if some 4-digit hex show up, but none do (yet). out('\n};\n') class ZoneIdWriter (SourceFileEditor): # All the output goes into namespace QtTimeZoneCldr. def write(self, version, defaults, windowsIds): self.__writeWarning(version) windows, iana = self.__writeTables(self.writer.write, defaults, windowsIds) windows.write(self.writer.write, 'windowsIdData') iana.write(self.writer.write, 'ianaIdData') def __writeWarning(self, version): self.writer.write(f""" /* This part of the file was generated on {datetime.date.today()} from the Common Locale Data Repository v{version} file supplemental/windowsZones.xml http://www.unicode.org/cldr/ Do not edit this code: run cldr2qtimezone.py on updated (or edited) CLDR data; see qtbase/util/locale_database/. */ """) @staticmethod def __writeTables(out, defaults, windowsIds): windowsIdData, ianaIdData = ByteArrayData(), ByteArrayData() # Write Windows/IANA table out('// Windows ID Key, Territory Enum, IANA ID Index\n') out('static constexpr ZoneData zoneDataTable[] = {\n') # Sorted by (Windows ID Key, territory enum) for index, data in sorted(windowsIds.items()): out(' {{ {:6d},{:6d},{:6d} }}, // {} / {}\n'.format( data['windowsKey'], data['territoryId'], ianaIdData.append(data['ianaList']), data['windowsId'], data['territory'])) out('};\n\n') # Write Windows ID key table out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n') out('static constexpr WindowsData windowsDataTable[] = {\n') # Sorted by Windows ID key; sorting case-insensitively by # Windows ID must give the same order. winIdNames = [x.lower() for x, y in windowsIdList] assert all(x == y for x, y in zip(winIdNames, sorted(winIdNames))), \ [(x, y) for x, y in zip(winIdNames, sorted(winIdNames)) if x != y] for index, pair in enumerate(windowsIdList, 1): out(' {{ {:6d},{:6d},{:6d},{:6d} }}, // {}\n'.format( index, windowsIdData.append(pair[0]), ianaIdData.append(defaults[index]), pair[1], pair[0])) out('};\n\n') offsetMap = {} for pair in utcIdList: offsetMap[pair[1]] = offsetMap.get(pair[1], ()) + (pair[0],) # Write UTC ID key table out('// IANA ID Index, UTC Offset\n') out('static constexpr UtcData utcDataTable[] = {\n') for offset in sorted(offsetMap.keys()): # Sort so C++ can binary-chop. names = offsetMap[offset]; out(' {{ {:6d},{:6d} }}, // {}\n'.format( ianaIdData.append(' '.join(names)), offset, names[0])) out('};\n') return windowsIdData, ianaIdData def main(out, err): """Parses CLDR's data and updates Qt's representation of it. Takes sys.stdout, sys.stderr (or equivalents) as arguments. Expects two command-line options: the root of the unpacked CLDR data-file tree and the root of the qtbase module's checkout. Updates QTimeZone's private data about Windows time-zone IDs.""" parser = argparse.ArgumentParser( description="Update Qt's CLDR-derived timezone data.") parser.add_argument('cldr_path', help='path to the root of the CLDR tree') parser.add_argument('qtbase_path', help='path to the root of the qtbase source tree', nargs='?', default=qtbase_root) args = parser.parse_args() cldrPath = Path(args.cldr_path) qtPath = Path(args.qtbase_path) if not qtPath.is_dir(): parser.error(f"No such Qt directory: {qtPath}") if not cldrPath.is_dir(): parser.error(f"No such CLDR directory: {cldrPath}") dataFilePath = qtPath.joinpath('src/corelib/time/qtimezoneprivate_data_p.h') if not dataFilePath.is_file(): parser.error(f'No such file: {dataFilePath}') try: version, defaults, winIds = CldrAccess(cldrPath).readWindowsTimeZones( dict((name, ind) for ind, name in enumerate((x[0] for x in windowsIdList), 1))) except IOError as e: parser.error( f'Failed to open common/supplemental/windowsZones.xml: {e}') return 1 except Error as e: err.write('\n'.join(textwrap.wrap( f'Failed to read windowsZones.xml: {e}', subsequent_indent=' ', width=80)) + '\n') return 1 out.write('Input file parsed, now writing data\n') try: with ZoneIdWriter(dataFilePath, qtPath) as writer: writer.write(version, defaults, winIds) except Exception as e: err.write(f'\nError while updating timezone data: {e}\n') return 1 out.write(f'Data generation completed, please check the new file at {dataFilePath}\n') return 0 if __name__ == '__main__': import sys sys.exit(main(sys.stdout, sys.stderr))