From 5b1c33cc7834b0811784980f4b1ab9d31863fbe8 Mon Sep 17 00:00:00 2001 From: Edward Welbourne Date: Wed, 19 Feb 2020 16:10:45 +0100 Subject: Rework cldr2qtimezone.py into more maintainable form Broke out the updating of a source file to a ZoneIdWriter helper class, which enables tidying away the temporary file if we fail. Collected up the rest of the script into a main() that's now called from a __name__ == '__main__' block. Rationalized the imports. Eliminated an inefficient lookup function by constructing a suitable dict() before entering the loop that needed it. Separated the "data you might need to update" tables from the code that does the work, to make it easier for those adding support for new zones to see what they're doing. Removed the spurious $Revision$ from the output and reworded the premable of the generated file. (It would seem CLDR no longer uses an RCS-based version-control system.) Generated output is otherwise unchanged. Task-number: QTBUG-81344 Change-Id: I7d9de8357ebcb599d154de9f862e25f7ade00390 Reviewed-by: Lars Knoll Reviewed-by: Cristian Maureira-Fredes --- src/corelib/time/qtimezoneprivate_data_p.h | 4 +- util/locale_database/cldr2qtimezone.py | 325 ++++++++++++++--------------- 2 files changed, 162 insertions(+), 167 deletions(-) diff --git a/src/corelib/time/qtimezoneprivate_data_p.h b/src/corelib/time/qtimezoneprivate_data_p.h index 822af9c703..6d2bbc83c1 100644 --- a/src/corelib/time/qtimezoneprivate_data_p.h +++ b/src/corelib/time/qtimezoneprivate_data_p.h @@ -115,8 +115,8 @@ struct QUtcData { // GENERATED PART STARTS HERE /* - This part of the file was generated on 2019-10-24 from the - Common Locale Data Repository v36 supplemental/windowsZones.xml file $Revision$ + This part of the file was generated on 2020-02-28 from the + Common Locale Data Repository v36 file supplemental/windowsZones.xml http://www.unicode.org/cldr/ diff --git a/util/locale_database/cldr2qtimezone.py b/util/locale_database/cldr2qtimezone.py index 7816abc9e1..f2d2003d53 100755 --- a/util/locale_database/cldr2qtimezone.py +++ b/util/locale_database/cldr2qtimezone.py @@ -1,7 +1,7 @@ #!/usr/bin/env python2 ############################################################################# ## -## Copyright (C) 2019 The Qt Company Ltd. +## Copyright (C) 2020 The Qt Company Ltd. ## Contact: https://www.qt.io/licensing/ ## ## This file is part of the test suite of the Qt Toolkit. @@ -35,7 +35,7 @@ script and the qtbase root directory as second parameter. It shall update qtbase's src/corelib/time/qtimezoneprivate_data_p.h ready for use. -The XML structure is as follows: +The XML structure we read has the form: @@ -53,34 +53,18 @@ The XML structure is as follows: """ import os -import sys import re import datetime -import tempfile import enumdata -from localetools import unicode2hex, wrap_list, Error +from localetools import unicode2hex, wrap_list, Error, SourceFileEditor from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, \ _findEntryInFile as findEntryInFile -class ByteArrayData: - def __init__(self): - self.data = [] - self.hash = {} - def append(self, s): - s = s + '\0' - if s in self.hash: - return self.hash[s] +### Data that may need updates in response to new entries in the CLDR file ### - lst = unicode2hex(s) - index = len(self.data) - if index > 65535: - print "\n\n\n#error Data index is too big!" - sys.stderr.write ("\n\n\nERROR: index exceeds the uint16 range! index = %d\n" % index) - sys.exit(1) - self.hash[s] = index - self.data += lst - return index +# This script shall report the update you need, if this arises. +# However, you may need to research the relevant zone's standard offset. # List of currently known Windows IDs. # If this script reports missing IDs, please add them here. @@ -227,12 +211,6 @@ windowsIdList = ( (u'Yakutsk Standard Time', 32400), ) -def windowsIdToKey(windowsId): - for index, pair in enumerate(windowsIdList): - if pair[0] == windowsId: - return index + 1 - return 0 - # List of standard UTC IDs to use. Not public so may be safely changed. # Do not remove IDs, as each entry is part of the API/behavior guarantee. # ( UTC Id, Offset Seconds ) @@ -279,42 +257,143 @@ utcIdList = ( (u'UTC+14:00', 50400), ) -def usage(): - print "Usage: cldr2qtimezone.py " - sys.exit() +### End of data that may need updates in response to CLDR ### -if len(sys.argv) != 3: - usage() - -cldrPath = sys.argv[1] -qtPath = sys.argv[2] +class ByteArrayData: + def __init__(self): + self.data = [] + self.hash = {} -if not os.path.isdir(cldrPath) or not os.path.isdir(qtPath): - usage() + def append(self, s): + s = s + '\0' + if s in self.hash: + return self.hash[s] -windowsZonesPath = cldrPath + "/supplemental/windowsZones.xml" -tempFileDir = qtPath -dataFilePath = qtPath + "/src/corelib/time/qtimezoneprivate_data_p.h" + lst = unicode2hex(s) + index = len(self.data) + if index > 0xffff: + raise Error('Index ({}) outside the uint16 range !'.format(index)) + self.hash[s] = index + self.data += lst + return index -if not (os.path.isfile(windowsZonesPath) and os.path.isfile(dataFilePath)): - usage() + def write(self, out, name): + out('\nstatic const char {}[] = {{\n'.format(name)) + out(wrap_list(self.data)) + out('\n};\n') -cldr_version = 'unknown' -ldml = open(cldrPath + "/dtd/ldml.dtd", "r") -for line in ldml: - if 'version cldrVersion CDATA #FIXED' in line: - cldr_version = line.split('"')[1] +class ZoneIdWriter (SourceFileEditor): + def write(self, version, defaults, windowsIds): + self.__writeWarning(version) + windows, iana = self.__writeTables(self.writer.write, defaults, windowsIds) + windows.write(self.writer.write, 'windowsIdData') + iana.write(self.writer.write, 'ianaIdData') -# [[u'version', [(u'number', u'$Revision: 7825 $')]]] -versionNumber = findTagsInFile(windowsZonesPath, "version")[0][1][0][1] + def __writeWarning(self, version): + self.writer.write(""" +/* + This part of the file was generated on {} from the + Common Locale Data Repository v{} file supplemental/windowsZones.xml -mapTimezones = findTagsInFile(windowsZonesPath, "windowsZones/mapTimezones") + http://www.unicode.org/cldr/ -defaultDict = {} -windowsIdDict = {} + Do not edit this code: run cldr2qtimezone.py on updated (or + edited) CLDR data; see qtbase/util/locale_database/. +*/ -if mapTimezones: +""".format(str(datetime.date.today()), version)) + + @staticmethod + def __writeTables(out, defaults, windowsIds): + windowsIdData, ianaIdData = ByteArrayData(), ByteArrayData() + + # Write Windows/IANA table + out('// Windows ID Key, Country Enum, IANA ID Index\n') + out('static const QZoneData zoneDataTable[] = {\n') + for index, data in sorted(windowsIds.items()): + out(' {{ {:6d},{:6d},{:6d} }}, // {} / {}\n'.format( + data['windowsKey'], data['countryId'], + ianaIdData.append(data['ianaList']), + data['windowsId'], data['country'])) + out(' { 0, 0, 0 } // Trailing zeroes\n') + out('};\n\n') + + # Write Windows ID key table + out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n') + out('static const QWindowsData windowsDataTable[] = {\n') + for index, pair in enumerate(windowsIdList, 1): + out(' {{ {:6d},{:6d},{:6d},{:6d} }}, // {}\n'.format( + index, + windowsIdData.append(pair[0]), + ianaIdData.append(defaults[index]), + pair[1], pair[0])) + out(' { 0, 0, 0, 0 } // Trailing zeroes\n') + out('};\n\n') + + # Write UTC ID key table + out('// IANA ID Index, UTC Offset\n') + out('static const QUtcData utcDataTable[] = {\n') + for pair in utcIdList: + out(' {{ {:6d},{:6d} }}, // {}\n'.format( + ianaIdData.append(pair[0]), pair[1], pair[0])) + out(' { 0, 0 } // Trailing zeroes\n') + out('};\n') + + return windowsIdData, ianaIdData + +def usage(err, name, message=''): + err.write("""Usage: {} path/to/cldr/core/common path/to/qtbase +""".format(name)) # TODO: more interesting message + if message: + err.write('\n' + message + '\n') + +def main(args, out, err): + """Parses CLDR's data and updates Qt's representation of it. + + Takes sys.argv, sys.stdout, sys.stderr (or equivalents) as + arguments. Expects two command-line options: the common/ + subdirectory of the unpacked CLDR data-file tree and the root of + the qtbase module's checkout. Updates QTimeZone's private data + about Windows time-zone IDs.""" + name = args.pop(0) + if len(args) != 2: + usage(err, name, "Expected two arguments") + return 1 + + cldrPath = args.pop(0) + qtPath = args.pop(0) + + if not os.path.isdir(qtPath): + usage(err, name, "No such Qt directory: " + qtPath) + return 1 + if not os.path.isdir(cldrPath): + usage(err, name, "No such CLDR directory: " + cldrPath) + return 1 + + dataFilePath = os.path.join(qtPath, 'src', 'corelib', 'time', 'qtimezoneprivate_data_p.h') + if not os.path.isfile(dataFilePath): + usage(err, name, 'No such file: ' + dataFilePath) + return 1 + + windowsZonesPath = cldrPath + "/supplemental/windowsZones.xml" + if not os.path.isfile(windowsZonesPath): + usage(err, name, 'Failed to find CLDR data file: ' + windowsZonesPath) + return 1 + + cldrVersion = 'unknown' + ldml = open(cldrPath + "/dtd/ldml.dtd", "r") + for line in ldml: + if 'version cldrVersion CDATA #FIXED' in line: + cldrVersion = line.split('"')[1] + + mapTimezones = findTagsInFile(windowsZonesPath, "windowsZones/mapTimezones") + if not mapTimezones: + err.write('Failed to find time-zone data - aborting !\n') + return 1 + + defaultDict, windowsIdDict = {}, {} badZones = set() + winIdToIndex = dict((name, ind + 1) for ind, name in enumerate(x[0] for x in windowsIdList)) for mapZone in mapTimezones: # [u'mapZone', [(u'territory', u'MH'), (u'other', u'UTC+12'), (u'type', u'Pacific/Majuro Pacific/Kwajalein')]] if mapZone[0] == u'mapZone': @@ -327,8 +406,9 @@ if mapTimezones: if attribute[0] == u'type': data['ianaList'] = attribute[1] - data['windowsKey'] = windowsIdToKey(data['windowsId']) - if data['windowsKey'] <= 0: + try: + data['windowsKey'] = winIdToIndex[data['windowsId']] + except KeyError: badZones.add(data['windowsId']) countryId = 0 @@ -341,113 +421,28 @@ if mapTimezones: data['country'] = enumdata.country_list[data['countryId']][0] windowsIdDict[data['windowsKey'], data['countryId']] = data if badZones: - sys.stderr.write('\n\t'.join(["\nUnknown Windows ID, please add:"] + sorted(badZones)) - + "\nto the windowIdList in cldr2qtimezone.py\n\n") - raise Error('Unknown Windows IDs') - -print "Input file parsed, now writing data" - -GENERATED_BLOCK_START = "// GENERATED PART STARTS HERE\n" -GENERATED_BLOCK_END = "// GENERATED PART ENDS HERE\n" - -# Create a temp file to write the new data into -(newTempFile, newTempFilePath) = tempfile.mkstemp("qtimezone_data_p", dir=tempFileDir) -newTempFile = os.fdopen(newTempFile, "w") - -# Open the old file and copy over the first non-generated section to the new file -oldDataFile = open(dataFilePath, "r") -s = oldDataFile.readline() -while s and s != GENERATED_BLOCK_START: - newTempFile.write(s) - s = oldDataFile.readline() - -# Write out generated block start tag and warning -newTempFile.write(GENERATED_BLOCK_START) -newTempFile.write(""" -/* - This part of the file was generated on %s from the - Common Locale Data Repository v%s supplemental/windowsZones.xml file %s - - http://www.unicode.org/cldr/ - - Do not edit this code: run cldr2qtimezone.py on updated (or - edited) CLDR data; see qtbase/util/locale_database/. -*/ + err.write('\n\t'.join(["\nUnknown Windows ID, please add:"] + sorted(badZones)) + + "\nto the windowsIdList in cldr2qtimezone.py\n\n") + return 1 + + out.write('Input file parsed, now writing data\n') + try: + writer = ZoneIdWriter(dataFilePath, qtPath) + except IOError as e: + err.write('Failed to open files to transcribe: {}'.format(e.message or e.args[1])) + return 1 + + try: + writer.write(cldrVersion, defaultDict, windowsIdDict) + except Error as e: + writer.cleanup() + err.write('\nError in Windows ID data: ' + e.message + '\n') + return 1 + + writer.close() + out.write('Data generation completed, please check the new file at ' + dataFilePath + '\n') + return 0 -""" % (str(datetime.date.today()), cldr_version, versionNumber) ) - -windowsIdData = ByteArrayData() -ianaIdData = ByteArrayData() - -# Write Windows/IANA table -newTempFile.write("// Windows ID Key, Country Enum, IANA ID Index\n") -newTempFile.write("static const QZoneData zoneDataTable[] = {\n") -for index in sorted(windowsIdDict): - data = windowsIdDict[index] - newTempFile.write(" { %6d,%6d,%6d }, // %s / %s\n" - % (data['windowsKey'], - data['countryId'], - ianaIdData.append(data['ianaList']), - data['windowsId'], - data['country'])) -newTempFile.write(" { 0, 0, 0 } // Trailing zeroes\n") -newTempFile.write("};\n\n") - -print "Done Zone Data" - -# Write Windows ID key table -newTempFile.write("// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n") -newTempFile.write("static const QWindowsData windowsDataTable[] = {\n") -for index, pair in enumerate(windowsIdList): - newTempFile.write(" { %6d,%6d,%6d,%6d }, // %s\n" - % (index + 1, windowsIdData.append(pair[0]), - ianaIdData.append(defaultDict[index + 1]), pair[1], pair[0])) -newTempFile.write(" { 0, 0, 0, 0 } // Trailing zeroes\n") -newTempFile.write("};\n\n") - -print "Done Windows Data Table" - -# Write UTC ID key table -newTempFile.write("// IANA ID Index, UTC Offset\n") -newTempFile.write("static const QUtcData utcDataTable[] = {\n") -for pair in utcIdList: - newTempFile.write(" { %6d,%6d }, // %s\n" - % (ianaIdData.append(pair[0]), pair[1], pair[0])) -newTempFile.write(" { 0, 0 } // Trailing zeroes\n") -newTempFile.write("};\n\n") - -print "Done UTC Data Table" - -# Write out Windows ID's data -newTempFile.write("static const char windowsIdData[] = {\n") -newTempFile.write(wrap_list(windowsIdData.data)) -newTempFile.write("\n};\n\n") - -# Write out IANA ID's data -newTempFile.write("static const char ianaIdData[] = {\n") -newTempFile.write(wrap_list(ianaIdData.data)) -newTempFile.write("\n};\n") - -print "Done ID Data Table" - -# Write out the end of generated block tag -newTempFile.write(GENERATED_BLOCK_END) -s = oldDataFile.readline() - -# Skip through the old generated data in the old file -while s and s != GENERATED_BLOCK_END: - s = oldDataFile.readline() - -# Now copy the rest of the original file into the new file -s = oldDataFile.readline() -while s: - newTempFile.write(s) - s = oldDataFile.readline() - -# Now close the old and new file, delete the old file and copy the new file in its place -newTempFile.close() -oldDataFile.close() -os.remove(dataFilePath) -os.rename(newTempFilePath, dataFilePath) - -print "Data generation completed, please check the new file at " + dataFilePath +if __name__ == '__main__': + import sys + sys.exit(main(sys.argv, sys.stdout, sys.stderr)) -- cgit v1.2.3