diff options
Diffstat (limited to 'util/locale_database/qlocalexml2cpp.py')
-rwxr-xr-x | util/locale_database/qlocalexml2cpp.py | 664 |
1 files changed, 418 insertions, 246 deletions
diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py index 1483545ce7..dd01589672 100755 --- a/util/locale_database/qlocalexml2cpp.py +++ b/util/locale_database/qlocalexml2cpp.py @@ -1,112 +1,127 @@ -#!/usr/bin/env python2 -############################################################################# -## -## Copyright (C) 2020 The Qt Company Ltd. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the test suite of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:GPL-EXCEPT$ -## Commercial License Usage -## Licensees holding valid commercial Qt licenses may use this file in -## accordance with the commercial license agreement provided with the -## Software or, alternatively, in accordance with the terms contained in -## a written agreement between you and The Qt Company. For licensing terms -## and conditions see https://www.qt.io/terms-conditions. For further -## information use the contact form at https://www.qt.io/contact-us. -## -## GNU General Public License Usage -## Alternatively, this file may be used under the terms of the GNU -## General Public License version 3 as published by the Free Software -## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -## included in the packaging of this file. Please review the following -## information to ensure the GNU General Public License requirements will -## be met: https://www.gnu.org/licenses/gpl-3.0.html. -## -## $QT_END_LICENSE$ -## -############################################################################# -"""Script to generate C++ code from CLDR data in qLocaleXML form - -See ``cldr2qlocalexml.py`` for how to generate the qLocaleXML data itself. -Pass the output file from that as first parameter to this script; pass -the root of the qtbase check-out as second parameter. -""" - -import os -import datetime - -from qlocalexml import QLocaleXmlReader -from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor - -def compareLocaleKeys(key1, key2): - if key1 == key2: - return 0 +#!/usr/bin/env python3 +# Copyright (C) 2021 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 +"""Script to generate C++ code from CLDR data in QLocaleXML form - if key1[0] != key2[0]: # First sort by language: - return key1[0] - key2[0] +See ``cldr2qlocalexml.py`` for how to generate the QLocaleXML data itself. +Pass the output file from that as first parameter to this script; pass the ISO +639-3 data file as second parameter. You can optionally pass the root of the +qtbase check-out as third parameter; it defaults to the root of the qtbase +check-out containing this script. - defaults = compareLocaleKeys.default_map - # maps {(language, script): country} by ID - try: - country = defaults[key1[:2]] - except KeyError: - pass - else: - if key1[2] == country: - return -1 - if key2[2] == country: - return 1 +The ISO 639-3 data file can be downloaded from the SIL website: - if key1[1] == key2[1]: - return key1[2] - key2[2] + https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab +""" - try: - country = defaults[key2[:2]] - except KeyError: - pass - else: - if key2[2] == country: - return 1 - if key1[2] == country: - return -1 +import datetime +import argparse +from pathlib import Path +from typing import Optional - return key1[1] - key2[1] +from qlocalexml import QLocaleXmlReader +from localetools import * +from iso639_3 import LanguageCodeData +from zonedata import utcIdList, windowsIdList + + +# Sanity check the zone data: + +# Offsets of the windows tables, in minutes, where whole numbers: +winOff = set(m for m, s in (divmod(v, 60) for k, v in windowsIdList) if s == 0) +# The UTC±HH:mm forms of the non-zero offsets: +winUtc = set(f'UTC-{h:02}:{m:02}' + for h, m in (divmod(-o, 60) for o in winOff if o < 0) + ).union(f'UTC+{h:02}:{m:02}' + for h, m in (divmod(o, 60) for o in winOff if o > 0)) +# All such offsets should be represented by entries in utcIdList: +newUtc = winUtc.difference(utcIdList) +assert not newUtc, ( + 'Please add missing UTC-offset zones to to zonedata.utcIdList', newUtc) + + +class LocaleKeySorter: + """Sort-ordering representation of a locale key. + + This is for passing to a sorting algorithm as key-function, that + it applies to each entry in the list to decide which belong + earlier. It adds an entry to the (language, script, territory) + triple, just before script, that sorts earlier if the territory is + the default for the given language and script, later otherwise. + """ + + # TODO: study the relationship between this and CLDR's likely + # sub-tags algorithm. Work out how locale sort-order impacts + # QLocale's likely sub-tag matching algorithms. Make sure this is + # sorting in an order compatible with those algorithms. + + def __init__(self, defaults): + self.map = dict(defaults) + def foreign(self, key): + default = self.map.get(key[:2]) + return default is None or default != key[2] + def __call__(self, key): + # TODO: should we compare territory before or after script ? + return (key[0], self.foreign(key)) + key[1:] + +class ByteArrayData: + # Only for use with ASCII data, e.g. IANA IDs. + def __init__(self): + self.data, self.hash = [], {} + + def append(self, s): + assert s.isascii(), s + s += '\0' + if s in self.hash: + return self.hash[s] + + index = len(self.data) + if index > 0xffff: + raise Error(f'Index ({index}) outside the uint16 range !') + self.hash[s] = index + self.data += unicode2hex(s) + return index + def write(self, out, name): + out(f'\nstatic constexpr char {name}[] = {{\n') + out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ') + # All data is ASCII, so only two-digit hex is ever needed. + out('\n};\n') class StringDataToken: - def __init__(self, index, length, bits): - if index > 0xffff: - raise ValueError('Start-index ({}) exceeds the uint16 range!'.format(index)) - if length >= (1 << bits): - raise ValueError('Data size ({}) exceeds the {}-bit range!'.format(length, bits)) + def __init__(self, index, length, lenbits, indbits): + if index >= (1 << indbits): + raise ValueError(f'Start-index ({index}) exceeds the {indbits}-bit range!') + if length >= (1 << lenbits): + raise ValueError(f'Data size ({length}) exceeds the {lenbits}-bit range!') self.index = index self.length = length class StringData: - def __init__(self, name): + def __init__(self, name, lenbits = 8, indbits = 16): self.data = [] self.hash = {} self.name = name self.text = '' # Used in quick-search for matches in data + self.__bits = lenbits, indbits - def append(self, s, bits = 8): + def append(self, s): try: token = self.hash[s] except KeyError: - token = self.__store(s, bits) + token = self.__store(s) self.hash[s] = token return token - def __store(self, s, bits): + def __store(self, s): """Add string s to known data. Seeks to avoid duplication, where possible. For example, short-forms may be prefixes of long-forms. """ if not s: - return StringDataToken(0, 0, bits) + return StringDataToken(0, 0, *self.__bits) ucs2 = unicode2hex(s) try: index = self.text.index(s) - 1 @@ -124,18 +139,19 @@ class StringData: assert index >= 0 try: - return StringDataToken(index, len(ucs2), bits) + return StringDataToken(index, len(ucs2), *self.__bits) except ValueError as e: e.args += (self.name, s) raise def write(self, fd): - if len(self.data) > 0xffff: - raise ValueError('Data is too big ({}) for quint16 index to its end!' - .format(len(self.data)), + indbits = self.__bits[1] + if len(self.data) >= (1 << indbits): + raise ValueError(f'Data is too big ({len(self.data)}) ' + f'for {indbits}-bit index to its end!', self.name) - fd.write("\nstatic const char16_t {}[] = {{\n".format(self.name)) - fd.write(wrap_list(self.data)) + fd.write(f"\nstatic constexpr char16_t {self.name}[] = {{\n") + fd.write(wrap_list(self.data, 12)) # 12 == 100 // len('0xhhhh, ') fd.write("\n};\n") def currencyIsoCodeData(s): @@ -144,13 +160,16 @@ def currencyIsoCodeData(s): return "{0,0,0}" class LocaleSourceEditor (SourceFileEditor): - __upinit = SourceFileEditor.__init__ - def __init__(self, path, temp, version): - self.__upinit(path, temp) - self.writer.write(""" + def __init__(self, path: Path, temp: Path, version: str): + super().__init__(path, temp) + self.version = version + + def onEnter(self) -> None: + super().onEnter() + self.writer.write(f""" /* - This part of the file was generated on {} from the - Common Locale Data Repository v{} + This part of the file was generated on {datetime.date.today()} from the + Common Locale Data Repository v{self.version} http://www.unicode.org/cldr/ @@ -159,7 +178,93 @@ class LocaleSourceEditor (SourceFileEditor): edited) CLDR data; see qtbase/util/locale_database/. */ -""".format(datetime.date.today(), version)) +""") + +class TimeZoneDataWriter (LocaleSourceEditor): + def __init__(self, path: Path, temp: Path, version: str): + super().__init__(path, temp, version) + self.__ianaTable = ByteArrayData() # Single IANA IDs + self.__ianaListTable = ByteArrayData() # Space-joined lists of IDs + self.__windowsTable = ByteArrayData() # Windows names for zones + self.__windowsList = sorted(windowsIdList, + key=lambda p: p[0].lower()) + self.windowsKey = {name: (key, off) for key, (name, off) + in enumerate(self.__windowsList, 1)} + + def utcTable(self): + offsetMap, out = {}, self.writer.write + for name in utcIdList: + offset = self.__offsetOf(name) + offsetMap[offset] = offsetMap.get(offset, ()) + (name,) + + # Write UTC ID key table + out('// IANA ID Index, UTC Offset\n') + out('static constexpr UtcData utcDataTable[] = {\n') + for offset in sorted(offsetMap.keys()): # Sort so C++ can binary-chop. + names = offsetMap[offset]; + joined = self.__ianaListTable.append(' '.join(names)) + out(f' {{ {joined:6d},{offset:6d} }}, // {names[0]}\n') + out('};\n') + + def aliasToIana(self, pairs): + out, store = self.writer.write, self.__ianaTable.append + + out('// Alias ID Index, Alias ID Index\n') + out('static constexpr AliasData aliasMappingTable[] = {\n') + for name, iana in pairs: # They're ready-sorted + assert name != iana, (alias, iana) # Filtered out in QLocaleXmlWriter + out(f' {{ {store(name):6d},{store(iana):6d} }},' + f' // {name} -> {iana}\n') + out('};\n\n') + + def msToIana(self, pairs): + out, winStore = self.writer.write, self.__windowsTable.append + ianaStore = self.__ianaListTable.append # TODO: Should be __ianaTable + alias = dict(pairs) # {MS name: IANA ID} + + out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n') + out('static constexpr WindowsData windowsDataTable[] = {\n') + # Sorted by Windows ID key: + + for index, (name, offset) in enumerate(self.__windowsList, 1): + out(f' {{ {index:6d},{winStore(name):6d},' + f'{ianaStore(alias[name]):6d},{offset:6d} }}, // {name}\n') + out('};\n\n') + + def msLandIanas(self, triples): # (MS name, territory code, IANA list) + out, store = self.writer.write, self.__ianaListTable.append + from enumdata import territory_map + landKey = {code: (i, name) for i, (name, code) in territory_map.items()} + seq = sorted((self.windowsKey[name][0], landKey[land][0], name, landKey[land][1], ianas) + for name, land, ianas in triples) + + out('// Windows ID Key, Territory Enum, IANA ID Index\n') + out('static constexpr ZoneData zoneDataTable[] = {\n') + # Sorted by (Windows ID Key, territory enum) + for winId, landId, name, land, ianas in seq: + out(f' {{ {winId:6d},{landId:6d},{store(ianas):6d} }},' + f' // {name} / {land}\n') + out('};\n\n') + + def writeTables(self): + self.__windowsTable.write(self.writer.write, 'windowsIdData') + # TODO: these are misnamed, entries in the first are lists, + # those in the next are single IANA IDs + self.__ianaListTable.write(self.writer.write, 'ianaIdData') + self.__ianaTable.write(self.writer.write, 'aliasIdData') + + # Implementation details: + @staticmethod + def __offsetOf(utcName): + "Maps a UTC±HH:mm name to its offset in seconds" + assert utcName.startswith('UTC') + if len(utcName) == 3: + return 0 + assert utcName[3] in '+-', utcName + sign = -1 if utcName[3] == '-' else 1 + assert len(utcName) == 9 and utcName[6] == ':', utcName + hour, mins = int(utcName[4:6]), int(utcName[-2:]) + return sign * (hour * 60 + mins) * 60 class LocaleDataWriter (LocaleSourceEditor): def likelySubtags(self, likely): @@ -173,23 +278,22 @@ class LocaleDataWriter (LocaleSourceEditor): def keyLikely(entry): have = entry[1] # Numeric id triple return have[0] or huge, have[2] or huge, have[1] or huge # language, region, script - likely = list(likely) # Turn generator into list so we can sort it - likely.sort(key=keyLikely) + likely = sorted(likely, key=keyLikely) i = 0 - self.writer.write('static const QLocaleId likely_subtags[] = {\n') + self.writer.write('static constexpr QLocaleId likely_subtags[] = {\n') for had, have, got, give in likely: i += 1 self.writer.write(' {{ {:3d}, {:3d}, {:3d} }}'.format(*have)) self.writer.write(', {{ {:3d}, {:3d}, {:3d} }}'.format(*give)) self.writer.write(' ' if i == len(likely) else ',') - self.writer.write(' // {} -> {}\n'.format(had, got)) + self.writer.write(f' // {had} -> {got}\n') self.writer.write('};\n\n') def localeIndex(self, indices): - self.writer.write('static const quint16 locale_index[] = {\n') - for pair in indices: - self.writer.write('{:6d}, // {}\n'.format(*pair)) + self.writer.write('static constexpr quint16 locale_index[] = {\n') + for index, name in indices: + self.writer.write(f'{index:6d}, // {name}\n') self.writer.write(' 0 // trailing 0\n') self.writer.write('};\n\n') @@ -208,7 +312,7 @@ class LocaleDataWriter (LocaleSourceEditor): endonyms_data = StringData('endonyms_data') # Locale data - self.writer.write('static const QLocaleData locale_data[] = {\n') + self.writer.write('static constexpr QLocaleData locale_data[] = {\n') # Table headings: keep each label centred in its field, matching line_format: self.writer.write(' // ' # Width 6 + comma @@ -309,7 +413,7 @@ class LocaleDataWriter (LocaleSourceEditor): locale.minus, locale.plus, locale.exp, locale.quotationStart, locale.quotationEnd, locale.alternateQuotationStart, locale.alternateQuotationEnd)) + - tuple (date_format_data.append(f) for f in # 2 entries: + tuple(date_format_data.append(f) for f in # 2 entries: (locale.longDateFormat, locale.shortDateFormat)) + tuple(time_format_data.append(f) for f in # 2 entries: (locale.longTimeFormat, locale.shortTimeFormat)) + @@ -327,7 +431,7 @@ class LocaleDataWriter (LocaleSourceEditor): currency_format_data.append(locale.currencyFormat), currency_format_data.append(locale.currencyNegativeFormat), endonyms_data.append(locale.languageEndonym), - endonyms_data.append(locale.countryEndonym)) # 6 entries + endonyms_data.append(locale.territoryEndonym)) # 6 entries ) # Total: 37 entries assert len(ranges) == 37 @@ -340,8 +444,7 @@ class LocaleDataWriter (LocaleSourceEditor): locale.currencyRounding, # unused (QTBUG-81343) locale.firstDayOfWeek, locale.weekendStart, locale.weekendEnd, locale.groupTop, locale.groupHigher, locale.groupLeast) )) - + ', // {}/{}/{}\n'.format( - locale.language, locale.script, locale.country)) + + f', // {locale.language}/{locale.script}/{locale.territory}\n') self.writer.write(formatLine(*( # All zeros, matching the format: (0,) * 3 + (0,) * 37 * 2 + (currencyIsoCodeData(0),) @@ -359,32 +462,35 @@ class LocaleDataWriter (LocaleSourceEditor): @staticmethod def __writeNameData(out, book, form): - out('static const char {}_name_list[] =\n'.format(form)) + out(f'static constexpr char {form}_name_list[] =\n') out('"Default\\0"\n') for key, value in book.items(): if key == 0: continue - out('"' + value[0] + '\\0"\n') + enum, name = value[0], value[-1] + if names_clash(name, enum): + out(f'"{name}\\0" // {enum}\n') + else: + out(f'"{name}\\0"\n') # Automagically utf-8 encoded out(';\n\n') - out('static const quint16 {}_name_index[] = {{\n'.format(form)) - out(' 0, // Any{}\n'.format(form.capitalize())) + out(f'static constexpr quint16 {form}_name_index[] = {{\n') + out(f' 0, // Any{form.capitalize()}\n') index = 8 for key, value in book.items(): if key == 0: continue - name = value[0] - out('{:6d}, // {}\n'.format(index, name)) - index += len(name) + 1 + out(f'{index:6d}, // {value[0]}\n') + index += len(value[-1].encode('utf-8')) + 1 out('};\n\n') @staticmethod def __writeCodeList(out, book, form, width): - out('static const unsigned char {}_code_list[] =\n'.format(form)) + out(f'static constexpr unsigned char {form}_code_list[] =\n') for key, value in book.items(): code = value[1] code += r'\0' * max(width - len(code), 0) - out('"{}" // {}\n'.format(code, value[0])) + out(f'"{code}" // {value[0]}\n') out(';\n\n') def languageNames(self, languages): @@ -393,20 +499,44 @@ class LocaleDataWriter (LocaleSourceEditor): def scriptNames(self, scripts): self.__writeNameData(self.writer.write, scripts, 'script') - def countryNames(self, countries): - self.__writeNameData(self.writer.write, countries, 'country') + def territoryNames(self, territories): + self.__writeNameData(self.writer.write, territories, 'territory') # TODO: unify these next three into the previous three; kept # separate for now to verify we're not changing data. - def languageCodes(self, languages): - self.__writeCodeList(self.writer.write, languages, 'language', 3) + def languageCodes(self, languages, code_data: LanguageCodeData): + out = self.writer.write + + out(f'constexpr std::array<LanguageCodeEntry, {len(languages)}> languageCodeList {{\n') + + def q(val: Optional[str], size: int) -> str: + """Quote the value and adjust the result for tabular view.""" + s = '' if val is None else ', '.join(f"'{c}'" for c in val) + return f'{{{s}}}' if size == 0 else f'{{{s}}},'.ljust(size * 5 + 2) + + for key, value in languages.items(): + code = value[1] + if key < 2: + result = code_data.query('und') + else: + result = code_data.query(code) + assert code == result.id() + assert result is not None + + codeString = q(result.part1Code, 2) + codeString += q(result.part2BCode, 3) + codeString += q(result.part2TCode, 3) + codeString += q(result.part3Code, 0) + out(f' LanguageCodeEntry {{{codeString}}}, // {value[0]}\n') + + out('};\n\n') def scriptCodes(self, scripts): self.__writeCodeList(self.writer.write, scripts, 'script', 4) - def countryCodes(self, countries): # TODO: unify with countryNames() - self.__writeCodeList(self.writer.write, countries, 'country', 3) + def territoryCodes(self, territories): # TODO: unify with territoryNames() + self.__writeCodeList(self.writer.write, territories, 'territory', 3) class CalendarDataWriter (LocaleSourceEditor): formatCalendar = ( @@ -414,9 +544,9 @@ class CalendarDataWriter (LocaleSourceEditor): + ','.join(('{:6d}',) * 3 + ('{:5d}',) * 6 + ('{:3d}',) * 6) + ' }},').format def write(self, calendar, locales, names): - months_data = StringData('months_data') + months_data = StringData('months_data', 16) - self.writer.write('static const QCalendarLocale locale_data[] = {\n') + self.writer.write('static constexpr QCalendarLocale locale_data[] = {\n') self.writer.write( ' //' # IDs, width 7 (6 + comma) @@ -438,13 +568,12 @@ class CalendarDataWriter (LocaleSourceEditor): # Sequence of StringDataToken: try: # Twelve long month names can add up to more than 256 (e.g. kde_TZ: 264) - ranges = (tuple(months_data.append(m[calendar], 16) for m in - (locale.standaloneLongMonths, locale.longMonths)) + - tuple(months_data.append(m[calendar]) for m in - (locale.standaloneShortMonths, locale.shortMonths, - locale.standaloneNarrowMonths, locale.narrowMonths))) + ranges = tuple(months_data.append(m[calendar]) for m in + (locale.standaloneLongMonths, locale.longMonths, + locale.standaloneShortMonths, locale.shortMonths, + locale.standaloneNarrowMonths, locale.narrowMonths)) except ValueError as e: - e.args += (locale.language, locale.script, locale.country, stem) + e.args += (locale.language, locale.script, locale.territory) raise self.writer.write( @@ -452,24 +581,41 @@ class CalendarDataWriter (LocaleSourceEditor): key + tuple(r.index for r in ranges) + tuple(r.length for r in ranges) )) - + '// {}/{}/{}\n'.format(locale.language, locale.script, locale.country)) + + f'// {locale.language}/{locale.script}/{locale.territory}\n') self.writer.write(self.formatCalendar(*( (0,) * (3 + 6 * 2) )) + '// trailing zeros\n') self.writer.write('};\n') months_data.write(self.writer) + +class TestLocaleWriter (LocaleSourceEditor): + def localeList(self, locales): + self.writer.write('const LocaleListItem g_locale_list[] = {\n') + from enumdata import language_map, territory_map + # TODO: update testlocales/ to include script. + # For now, only mention each (lang, land) pair once: + pairs = set((lang, land) for lang, script, land in locales) + for lang, script, land in locales: + if (lang, land) in pairs: + pairs.discard((lang, land)) + langName = language_map[lang][0] + landName = territory_map[land][0] + self.writer.write(f' {{ {lang:6d},{land:6d} }}, // {langName}/{landName}\n') + self.writer.write('};\n\n') + + class LocaleHeaderWriter (SourceFileEditor): - __upinit = SourceFileEditor.__init__ - def __init__(self, path, temp, dupes): - self.__upinit(path, temp) - self.__dupes = dupes + def __init__(self, path, temp, enumify): + super().__init__(path, temp) + self.__enumify = enumify def languages(self, languages): self.__enum('Language', languages, self.__language) self.writer.write('\n') - def countries(self, countries): - self.__enum('Country', countries, self.__country) + def territories(self, territories): + self.writer.write(" // ### Qt 7: Rename to Territory\n") + self.__enum('Country', territories, self.__territory, 'Territory') def scripts(self, scripts): self.__enum('Script', scripts, self.__script) @@ -477,151 +623,177 @@ class LocaleHeaderWriter (SourceFileEditor): # Implementation details from enumdata import (language_aliases as __language, - country_aliases as __country, + territory_aliases as __territory, script_aliases as __script) - def __enum(self, name, book, alias): + def __enum(self, name, book, alias, suffix = None): assert book - out, dupes = self.writer.write, self.__dupes - out(' enum {} : ushort {{\n'.format(name)) - for key, value in book.items(): - member = value[0].replace('-', ' ') - if name == 'Script': - # Don't .capitalize() as some names are already camel-case (see enumdata.py): - member = ''.join(word[0].upper() + word[1:] for word in member.split()) - if not member.endswith('Script'): - member += 'Script' - if member in dupes: - raise Error('The script name "{}" is messy'.format(member)) - else: - member = ''.join(member.split()) - member = member + name if member in dupes else member - out(' {} = {},\n'.format(member, key)) - out('\n ' - + ',\n '.join('{} = {}'.format(*pair) - for pair in sorted(alias.items())) - + ',\n\n Last{} = {}\n }};\n'.format(name, member)) - -def usage(name, err, message = ''): - err.write("""Usage: {} path/to/qlocale.xml root/of/qtbase -""".format(name)) # TODO: elaborate - if message: - err.write('\n' + message + '\n') - -def main(args, out, err): - # TODO: Make calendars a command-line parameter - # map { CLDR name: Qt file name } - calendars = {'gregorian': 'roman', 'persian': 'jalali', 'islamic': 'hijri',} # 'hebrew': 'hebrew', - - name = args.pop(0) - if len(args) != 2: - usage(name, err, 'I expect two arguments') - return 1 + if suffix is None: + suffix = name - qlocalexml = args.pop(0) - qtsrcdir = args.pop(0) + out, enumify = self.writer.write, self.__enumify + out(f' enum {name} : ushort {{\n') + for key, value in book.items(): + member = enumify(value[0], suffix) + out(f' {member} = {key},\n') - if not (os.path.isdir(qtsrcdir) - and all(os.path.isfile(os.path.join(qtsrcdir, 'src', 'corelib', 'text', leaf)) + out('\n ' + + ',\n '.join(f'{k} = {v}' for k, v in sorted(alias.items())) + + f',\n\n Last{suffix} = {member}') + + # for "LastCountry = LastTerritory" + # ### Qt 7: Remove + if suffix != name: + out(f',\n Last{name} = Last{suffix}') + + out('\n };\n') + + +def main(argv, out, err): + """Updates QLocale's CLDR data from a QLocaleXML file. + + Takes sys.argv, sys.stdout, sys.stderr (or equivalents) as + arguments. In argv[1:] it expects the QLocaleXML file as first + parameter and the ISO 639-3 data table as second + parameter. Accepts the root of the qtbase checkout as third + parameter (default is inferred from this script's path) and a + --calendars option to select which calendars to support (all + available by default). + + Updates various src/corelib/t*/q*_data_p.h files within the qtbase + checkout to contain data extracted from the QLocaleXML file.""" + calendars_map = { + # CLDR name: Qt file name fragment + 'gregorian': 'roman', + 'persian': 'jalali', + 'islamic': 'hijri', + } + all_calendars = list(calendars_map.keys()) + + parser = argparse.ArgumentParser( + prog=Path(argv[0]).name, + description='Generate C++ code from CLDR data in QLocaleXML form.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('input_file', help='input XML file name', + metavar='input-file.xml') + parser.add_argument('iso_path', help='path to the ISO 639-3 data file', + metavar='iso-639-3.tab') + parser.add_argument('qtbase_path', help='path to the root of the qtbase source tree', + nargs='?', default=qtbase_root) + parser.add_argument('--calendars', help='select calendars to emit data for', + nargs='+', metavar='CALENDAR', + choices=all_calendars, default=all_calendars) + parser.add_argument('-v', '--verbose', help='more verbose output', + action='count', default=0) + parser.add_argument('-q', '--quiet', help='less output', + dest='verbose', action='store_const', const=-1) + args = parser.parse_args(argv[1:]) + + qlocalexml = args.input_file + qtsrcdir = Path(args.qtbase_path) + calendars = {cal: calendars_map[cal] for cal in args.calendars} + + if not (qtsrcdir.is_dir() + and all(qtsrcdir.joinpath('src/corelib/text', leaf).is_file() for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))): - usage(name, err, 'Missing expected files under qtbase source root ' + qtsrcdir) - return 1 + parser.error(f'Missing expected files under qtbase source root {qtsrcdir}') reader = QLocaleXmlReader(qlocalexml) locale_map = dict(reader.loadLocaleMap(calendars, err.write)) + locale_keys = sorted(locale_map.keys(), key=LocaleKeySorter(reader.defaultMap())) - locale_keys = locale_map.keys() - compareLocaleKeys.default_map = dict(reader.defaultMap()) - locale_keys.sort(compareLocaleKeys) - - try: - writer = LocaleDataWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'text', - 'qlocale_data_p.h'), - qtsrcdir, reader.cldrVersion) - except IOError as e: - err.write('Failed to open files to transcribe locale data: ' + (e.message or e.args[1])) - return 1 + code_data = LanguageCodeData(args.iso_path) try: - writer.likelySubtags(reader.likelyMap()) - writer.localeIndex(reader.languageIndices(tuple(k[0] for k in locale_map))) - writer.localeData(locale_map, locale_keys) - writer.writer.write('\n') - writer.languageNames(reader.languages) - writer.scriptNames(reader.scripts) - writer.countryNames(reader.countries) - # TODO: merge the next three into the previous three - writer.languageCodes(reader.languages) - writer.scriptCodes(reader.scripts) - writer.countryCodes(reader.countries) - except Error as e: - writer.cleanup() - err.write('\nError updating locale data: ' + e.message + '\n') + with LocaleDataWriter(qtsrcdir.joinpath('src/corelib/text/qlocale_data_p.h'), + qtsrcdir, reader.cldrVersion) as writer: + writer.likelySubtags(reader.likelyMap()) + writer.localeIndex(reader.languageIndices(tuple(k[0] for k in locale_map))) + writer.localeData(locale_map, locale_keys) + writer.writer.write('\n') + writer.languageNames(reader.languages) + writer.scriptNames(reader.scripts) + writer.territoryNames(reader.territories) + # TODO: merge the next three into the previous three + writer.languageCodes(reader.languages, code_data) + writer.scriptCodes(reader.scripts) + writer.territoryCodes(reader.territories) + except Exception as e: + err.write(f'\nError updating locale data: {e}\n') + if args.verbose > 0: + raise return 1 - writer.close() - # Generate calendar data for calendar, stem in calendars.items(): try: - writer = CalendarDataWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'time', - 'q{}calendar_data_p.h'.format(stem)), - qtsrcdir, reader.cldrVersion) - except IOError as e: - err.write('Failed to open files to transcribe ' + calendar - + ' data ' + (e.message or e.args[1])) - return 1 - - try: - writer.write(calendar, locale_map, locale_keys) - except Error as e: - writer.cleanup() - err.write('\nError updating ' + calendar + ' locale data: ' + e.message + '\n') + with CalendarDataWriter( + qtsrcdir.joinpath(f'src/corelib/time/q{stem}calendar_data_p.h'), + qtsrcdir, reader.cldrVersion) as writer: + writer.write(calendar, locale_map, locale_keys) + except Exception as e: + err.write(f'\nError updating {calendar} locale data: {e}\n') + if args.verbose > 0: + raise return 1 - writer.close() - # qlocale.h try: - writer = LocaleHeaderWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.h'), - qtsrcdir, reader.dupes) - except IOError as e: - err.write('Failed to open files to transcribe qlocale.h: ' + (e.message or e.args[1])) + with LocaleHeaderWriter(qtsrcdir.joinpath('src/corelib/text/qlocale.h'), + qtsrcdir, reader.enumify) as writer: + writer.languages(reader.languages) + writer.scripts(reader.scripts) + writer.territories(reader.territories) + except Exception as e: + err.write(f'\nError updating qlocale.h: {e}\n') + if args.verbose > 0: + raise return 1 + # qlocale.qdoc try: - writer.languages(reader.languages) - writer.scripts(reader.scripts) - writer.countries(reader.countries) - except Error as e: - writer.cleanup() - err.write('\nError updating qlocale.h: ' + e.message + '\n') + with Transcriber(qtsrcdir.joinpath('src/corelib/text/qlocale.qdoc'), qtsrcdir) as qdoc: + DOCSTRING = " QLocale's data is based on Common Locale Data Repository " + for line in qdoc.reader: + if DOCSTRING in line: + qdoc.writer.write(f'{DOCSTRING}v{reader.cldrVersion}.\n') + else: + qdoc.writer.write(line) + except Exception as e: + err.write(f'\nError updating qlocale.h: {e}\n') + if args.verbose > 0: + raise return 1 - writer.close() - - # qlocale.qdoc + # Locale-independent timezone data try: - writer = Transcriber(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.qdoc'), - qtsrcdir) - except IOError as e: - err.write('Failed to open files to transcribe qlocale.qdoc: ' + (e.message or e.args[1])) + with TimeZoneDataWriter(qtsrcdir.joinpath( + 'src/corelib/time/qtimezoneprivate_data_p.h'), + qtsrcdir, reader.cldrVersion) as writer: + writer.aliasToIana(reader.aliasToIana()) + writer.msLandIanas(reader.msLandIanas()) + writer.msToIana(reader.msToIana()) + writer.utcTable() + writer.writeTables() + except Exception as e: + err.write(f'\nError updating qtimezoneprivate_data_p.h: {e}\n') + if args.verbose > 0: + raise return 1 - DOCSTRING = " QLocale's data is based on Common Locale Data Repository " + # ./testlocales/localemodel.cpp try: - for line in writer.reader: - if DOCSTRING in line: - writer.writer.write(DOCSTRING + 'v' + reader.cldrVersion + '.\n') - else: - writer.writer.write(line) - except Error as e: - writer.cleanup() - err.write('\nError updating qlocale.qdoc: ' + e.message + '\n') + path = 'util/locale_database/testlocales/localemodel.cpp' + with TestLocaleWriter(qtsrcdir.joinpath(path), qtsrcdir, + reader.cldrVersion) as test: + test.localeList(locale_keys) + except Exception as e: + err.write(f'\nError updating localemodel.cpp: {e}\n') + if args.verbose > 0: + raise return 1 - writer.close() return 0 if __name__ == "__main__": |