diff options
Diffstat (limited to 'util/locale_database/cldr2qtimezone.py')
-rwxr-xr-x | util/locale_database/cldr2qtimezone.py | 369 |
1 files changed, 152 insertions, 217 deletions
diff --git a/util/locale_database/cldr2qtimezone.py b/util/locale_database/cldr2qtimezone.py index 4c3609056d..70b5d1e69e 100755 --- a/util/locale_database/cldr2qtimezone.py +++ b/util/locale_database/cldr2qtimezone.py @@ -1,7 +1,7 @@ #!/usr/bin/env python2 ############################################################################# ## -## Copyright (C) 2019 The Qt Company Ltd. +## Copyright (C) 2020 The Qt Company Ltd. ## Contact: https://www.qt.io/licensing/ ## ## This file is part of the test suite of the Qt Toolkit. @@ -34,59 +34,20 @@ the CLDR data. Pass its common/ directory as first parameter to this script and the qtbase root directory as second parameter. It shall update qtbase's src/corelib/time/qtimezoneprivate_data_p.h ready for use. - -The XML structure is as follows: - - <supplementalData> - <version number="$Revision:...$"/> - <generation date="$Date:...$"/> - <windowsZones> - <mapTimezones otherVersion="..." typeVersion="..."> - <!-- (UTC-08:00) Pacific Time (US & Canada) --> - <mapZone other="Pacific Standard Time" territory="001" type="America/Los_Angeles"/> - <mapZone other="Pacific Standard Time" territory="CA" type="America/Vancouver America/Dawson America/Whitehorse"/> - <mapZone other="Pacific Standard Time" territory="US" type="America/Los_Angeles America/Metlakatla"/> - <mapZone other="Pacific Standard Time" territory="ZZ" type="PST8PDT"/> - </mapTimezones> - </windowsZones> - </supplementalData> """ import os -import sys -import datetime -import tempfile -import enumdata -import xpathlite -from xpathlite import DraftResolution import re -import qlocalexml2cpp +import datetime +import textwrap -findAlias = xpathlite.findAlias -findEntry = xpathlite.findEntry -findEntryInFile = xpathlite._findEntryInFile -findTagsInFile = xpathlite.findTagsInFile -unicode2hex = qlocalexml2cpp.unicode2hex -wrap_list = qlocalexml2cpp.wrap_list +from localetools import unicode2hex, wrap_list, Error, SourceFileEditor +from cldr import CldrAccess -class ByteArrayData: - def __init__(self): - self.data = [] - self.hash = {} - def append(self, s): - s = s + '\0' - if s in self.hash: - return self.hash[s] +### Data that may need updates in response to new entries in the CLDR file ### - lst = unicode2hex(s) - index = len(self.data) - if index > 65535: - print "\n\n\n#error Data index is too big!" - sys.stderr.write ("\n\n\nERROR: index exceeds the uint16 range! index = %d\n" % index) - sys.exit(1) - self.hash[s] = index - self.data += lst - return index +# This script shall report the update you need, if this arises. +# However, you may need to research the relevant zone's standard offset. # List of currently known Windows IDs. # If this script reports missing IDs, please add them here. @@ -233,12 +194,6 @@ windowsIdList = ( (u'Yakutsk Standard Time', 32400), ) -def windowsIdToKey(windowsId): - for index, pair in enumerate(windowsIdList): - if pair[0] == windowsId: - return index + 1 - return 0 - # List of standard UTC IDs to use. Not public so may be safely changed. # Do not remove IDs, as each entry is part of the API/behavior guarantee. # ( UTC Id, Offset Seconds ) @@ -285,94 +240,43 @@ utcIdList = ( (u'UTC+14:00', 50400), ) -def usage(): - print "Usage: cldr2qtimezone.py <path to cldr core/common> <path to qtbase>" - sys.exit() - -if len(sys.argv) != 3: - usage() - -cldrPath = sys.argv[1] -qtPath = sys.argv[2] - -if not os.path.isdir(cldrPath) or not os.path.isdir(qtPath): - usage() - -windowsZonesPath = cldrPath + "/supplemental/windowsZones.xml" -tempFileDir = qtPath -dataFilePath = qtPath + "/src/corelib/time/qtimezoneprivate_data_p.h" - -if not (os.path.isfile(windowsZonesPath) and os.path.isfile(dataFilePath)): - usage() - -cldr_version = 'unknown' -ldml = open(cldrPath + "/dtd/ldml.dtd", "r") -for line in ldml: - if 'version cldrVersion CDATA #FIXED' in line: - cldr_version = line.split('"')[1] - -# [[u'version', [(u'number', u'$Revision: 7825 $')]]] -versionNumber = findTagsInFile(windowsZonesPath, "version")[0][1][0][1] - -mapTimezones = findTagsInFile(windowsZonesPath, "windowsZones/mapTimezones") - -defaultDict = {} -windowsIdDict = {} - -if mapTimezones: - badZones = set() - for mapZone in mapTimezones: - # [u'mapZone', [(u'territory', u'MH'), (u'other', u'UTC+12'), (u'type', u'Pacific/Majuro Pacific/Kwajalein')]] - if mapZone[0] == u'mapZone': - data = {} - for attribute in mapZone[1]: - if attribute[0] == u'other': - data['windowsId'] = attribute[1] - if attribute[0] == u'territory': - data['countryCode'] = attribute[1] - if attribute[0] == u'type': - data['ianaList'] = attribute[1] - - data['windowsKey'] = windowsIdToKey(data['windowsId']) - if data['windowsKey'] <= 0: - badZones.add(data['windowsId']) - - countryId = 0 - if data['countryCode'] == u'001': - defaultDict[data['windowsKey']] = data['ianaList'] - else: - data['countryId'] = enumdata.countryCodeToId(data['countryCode']) - if data['countryId'] < 0: - raise xpathlite.Error("Unknown Country Code \"%s\"" % data['countryCode']) - data['country'] = enumdata.country_list[data['countryId']][0] - windowsIdDict[data['windowsKey'], data['countryId']] = data - if badZones: - sys.stderr.write('\n\t'.join(["\nUnknown Windows ID, please add:"] + sorted(badZones)) - + "\nto the windowIdList in cldr2qtimezone.py\n\n") - raise xpathlite.Error("Unknown Windows IDs") - -print "Input file parsed, now writing data" - -GENERATED_BLOCK_START = "// GENERATED PART STARTS HERE\n" -GENERATED_BLOCK_END = "// GENERATED PART ENDS HERE\n" - -# Create a temp file to write the new data into -(newTempFile, newTempFilePath) = tempfile.mkstemp("qtimezone_data_p", dir=tempFileDir) -newTempFile = os.fdopen(newTempFile, "w") - -# Open the old file and copy over the first non-generated section to the new file -oldDataFile = open(dataFilePath, "r") -s = oldDataFile.readline() -while s and s != GENERATED_BLOCK_START: - newTempFile.write(s) - s = oldDataFile.readline() - -# Write out generated block start tag and warning -newTempFile.write(GENERATED_BLOCK_START) -newTempFile.write(""" +### End of data that may need updates in response to CLDR ### + +class ByteArrayData: + def __init__(self): + self.data = [] + self.hash = {} + + def append(self, s): + s = s + '\0' + if s in self.hash: + return self.hash[s] + + lst = unicode2hex(s) + index = len(self.data) + if index > 0xffff: + raise Error('Index ({}) outside the uint16 range !'.format(index)) + self.hash[s] = index + self.data += lst + return index + + def write(self, out, name): + out('\nstatic const char {}[] = {{\n'.format(name)) + out(wrap_list(self.data)) + out('\n};\n') + +class ZoneIdWriter (SourceFileEditor): + def write(self, version, defaults, windowsIds): + self.__writeWarning(version) + windows, iana = self.__writeTables(self.writer.write, defaults, windowsIds) + windows.write(self.writer.write, 'windowsIdData') + iana.write(self.writer.write, 'ianaIdData') + + def __writeWarning(self, version): + self.writer.write(""" /* - This part of the file was generated on %s from the - Common Locale Data Repository v%s supplemental/windowsZones.xml file %s + This part of the file was generated on {} from the + Common Locale Data Repository v{} file supplemental/windowsZones.xml http://www.unicode.org/cldr/ @@ -380,80 +284,111 @@ newTempFile.write(""" edited) CLDR data; see qtbase/util/locale_database/. */ -""" % (str(datetime.date.today()), cldr_version, versionNumber) ) - -windowsIdData = ByteArrayData() -ianaIdData = ByteArrayData() - -# Write Windows/IANA table -newTempFile.write("// Windows ID Key, Country Enum, IANA ID Index\n") -newTempFile.write("static const QZoneData zoneDataTable[] = {\n") -for index in sorted(windowsIdDict): - data = windowsIdDict[index] - newTempFile.write(" { %6d,%6d,%6d }, // %s / %s\n" - % (data['windowsKey'], - data['countryId'], - ianaIdData.append(data['ianaList']), - data['windowsId'], - data['country'])) -newTempFile.write(" { 0, 0, 0 } // Trailing zeroes\n") -newTempFile.write("};\n\n") - -print "Done Zone Data" - -# Write Windows ID key table -newTempFile.write("// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n") -newTempFile.write("static const QWindowsData windowsDataTable[] = {\n") -for index, pair in enumerate(windowsIdList): - newTempFile.write(" { %6d,%6d,%6d,%6d }, // %s\n" - % (index + 1, windowsIdData.append(pair[0]), - ianaIdData.append(defaultDict[index + 1]), pair[1], pair[0])) -newTempFile.write(" { 0, 0, 0, 0 } // Trailing zeroes\n") -newTempFile.write("};\n\n") - -print "Done Windows Data Table" - -# Write UTC ID key table -newTempFile.write("// IANA ID Index, UTC Offset\n") -newTempFile.write("static const QUtcData utcDataTable[] = {\n") -for pair in utcIdList: - newTempFile.write(" { %6d,%6d }, // %s\n" - % (ianaIdData.append(pair[0]), pair[1], pair[0])) -newTempFile.write(" { 0, 0 } // Trailing zeroes\n") -newTempFile.write("};\n\n") - -print "Done UTC Data Table" - -# Write out Windows ID's data -newTempFile.write("static const char windowsIdData[] = {\n") -newTempFile.write(wrap_list(windowsIdData.data)) -newTempFile.write("\n};\n\n") - -# Write out IANA ID's data -newTempFile.write("static const char ianaIdData[] = {\n") -newTempFile.write(wrap_list(ianaIdData.data)) -newTempFile.write("\n};\n") - -print "Done ID Data Table" - -# Write out the end of generated block tag -newTempFile.write(GENERATED_BLOCK_END) -s = oldDataFile.readline() - -# Skip through the old generated data in the old file -while s and s != GENERATED_BLOCK_END: - s = oldDataFile.readline() - -# Now copy the rest of the original file into the new file -s = oldDataFile.readline() -while s: - newTempFile.write(s) - s = oldDataFile.readline() - -# Now close the old and new file, delete the old file and copy the new file in its place -newTempFile.close() -oldDataFile.close() -os.remove(dataFilePath) -os.rename(newTempFilePath, dataFilePath) - -print "Data generation completed, please check the new file at " + dataFilePath +""".format(str(datetime.date.today()), version)) + + @staticmethod + def __writeTables(out, defaults, windowsIds): + windowsIdData, ianaIdData = ByteArrayData(), ByteArrayData() + + # Write Windows/IANA table + out('// Windows ID Key, Country Enum, IANA ID Index\n') + out('static const QZoneData zoneDataTable[] = {\n') + for index, data in sorted(windowsIds.items()): + out(' {{ {:6d},{:6d},{:6d} }}, // {} / {}\n'.format( + data['windowsKey'], data['countryId'], + ianaIdData.append(data['ianaList']), + data['windowsId'], data['country'])) + out(' { 0, 0, 0 } // Trailing zeroes\n') + out('};\n\n') + + # Write Windows ID key table + out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n') + out('static const QWindowsData windowsDataTable[] = {\n') + for index, pair in enumerate(windowsIdList, 1): + out(' {{ {:6d},{:6d},{:6d},{:6d} }}, // {}\n'.format( + index, + windowsIdData.append(pair[0]), + ianaIdData.append(defaults[index]), + pair[1], pair[0])) + out(' { 0, 0, 0, 0 } // Trailing zeroes\n') + out('};\n\n') + + # Write UTC ID key table + out('// IANA ID Index, UTC Offset\n') + out('static const QUtcData utcDataTable[] = {\n') + for pair in utcIdList: + out(' {{ {:6d},{:6d} }}, // {}\n'.format( + ianaIdData.append(pair[0]), pair[1], pair[0])) + out(' { 0, 0 } // Trailing zeroes\n') + out('};\n') + + return windowsIdData, ianaIdData + +def usage(err, name, message=''): + err.write("""Usage: {} path/to/cldr/core/common path/to/qtbase +""".format(name)) # TODO: more interesting message + if message: + err.write('\n' + message + '\n') + +def main(args, out, err): + """Parses CLDR's data and updates Qt's representation of it. + + Takes sys.argv, sys.stdout, sys.stderr (or equivalents) as + arguments. Expects two command-line options: the root of the + unpacked CLDR data-file tree and the root of the qtbase module's + checkout. Updates QTimeZone's private data about Windows time-zone + IDs.""" + name = args.pop(0) + if len(args) != 2: + usage(err, name, "Expected two arguments") + return 1 + + cldrPath = args.pop(0) + qtPath = args.pop(0) + + if not os.path.isdir(qtPath): + usage(err, name, "No such Qt directory: " + qtPath) + return 1 + if not os.path.isdir(cldrPath): + usage(err, name, "No such CLDR directory: " + cldrPath) + return 1 + + dataFilePath = os.path.join(qtPath, 'src', 'corelib', 'time', 'qtimezoneprivate_data_p.h') + if not os.path.isfile(dataFilePath): + usage(err, name, 'No such file: ' + dataFilePath) + return 1 + + try: + version, defaults, winIds = CldrAccess(cldrPath).readWindowsTimeZones( + dict((name, ind) for ind, name in enumerate((x[0] for x in windowsIdList), 1))) + except IOError as e: + usage(err, name, + 'Failed to open common/supplemental/windowsZones.xml: ' + (e.message or e.args[1])) + return 1 + except Error as e: + err.write('\n'.join(textwrap.wrap( + 'Failed to read windowsZones.xml: ' + (e.message or e.args[1]), + subsequent_indent=' ', width=80)) + '\n') + return 1 + + out.write('Input file parsed, now writing data\n') + try: + writer = ZoneIdWriter(dataFilePath, qtPath) + except IOError as e: + err.write('Failed to open files to transcribe: {}'.format(e.message or e.args[1])) + return 1 + + try: + writer.write(version, defaults, winIds) + except Error as e: + writer.cleanup() + err.write('\nError in Windows ID data: ' + e.message + '\n') + return 1 + + writer.close() + out.write('Data generation completed, please check the new file at ' + dataFilePath + '\n') + return 0 + +if __name__ == '__main__': + import sys + sys.exit(main(sys.argv, sys.stdout, sys.stderr)) |