#!/usr/bin/env python2 ############################################################################# ## ## Copyright (C) 2020 The Qt Company Ltd. ## Contact: https://www.qt.io/licensing/ ## ## This file is part of the test suite of the Qt Toolkit. ## ## $QT_BEGIN_LICENSE:GPL-EXCEPT$ ## Commercial License Usage ## Licensees holding valid commercial Qt licenses may use this file in ## accordance with the commercial license agreement provided with the ## Software or, alternatively, in accordance with the terms contained in ## a written agreement between you and The Qt Company. For licensing terms ## and conditions see https://www.qt.io/terms-conditions. For further ## information use the contact form at https://www.qt.io/contact-us. ## ## GNU General Public License Usage ## Alternatively, this file may be used under the terms of the GNU ## General Public License version 3 as published by the Free Software ## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT ## included in the packaging of this file. Please review the following ## information to ensure the GNU General Public License requirements will ## be met: https://www.gnu.org/licenses/gpl-3.0.html. ## ## $QT_END_LICENSE$ ## ############################################################################# """Script to generate C++ code from CLDR data in qLocaleXML form See ``cldr2qlocalexml.py`` for how to generate the qLocaleXML data itself. Pass the output file from that as first parameter to this script; pass the root of the qtbase check-out as second parameter. """ import os import datetime from qlocalexml import QLocaleXmlReader from xml.dom import minidom from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor def compareLocaleKeys(key1, key2): if key1 == key2: return 0 if key1[0] != key2[0]: # First sort by language: return key1[0] - key2[0] defaults = compareLocaleKeys.default_map # maps {(language, script): country} by ID try: country = defaults[key1[:2]] except KeyError: pass else: if key1[2] == country: return -1 if key2[2] == country: return 1 if key1[1] == key2[1]: return key1[2] - key2[2] try: country = defaults[key2[:2]] except KeyError: pass else: if key2[2] == country: return 1 if key1[2] == country: return -1 return key1[1] - key2[1] class StringDataToken: def __init__(self, index, length): if index > 0xFFFF or length > 0xFFFF: raise Error("Position exceeds ushort range: {},{}".format(index, length)) self.index = index self.length = length def __str__(self): return " {},{} ".format(self.index, self.length) class StringData: def __init__(self, name): self.data = [] self.hash = {} self.name = name def append(self, s): if s in self.hash: return self.hash[s] lst = unicode2hex(s) index = len(self.data) if index > 0xffff: raise Error('Data index {} is too big for uint16!'.format(index)) size = len(lst) if size >= 0xffff: raise Error('Data is too big ({}) for uint16 size!'.format(size)) token = None try: token = StringDataToken(index, size) except Error as e: e.message += '(on data "{}")'.format(s) raise self.hash[s] = token self.data += lst return token def write(self, fd): fd.write("\nstatic const ushort {}[] = {{\n".format(self.name)) fd.write(wrap_list(self.data)) fd.write("\n};\n") def currencyIsoCodeData(s): if s: return '{' + ",".join(str(ord(x)) for x in s) + '}' return "{0,0,0}" class LocaleSourceEditor (SourceFileEditor): __upinit = SourceFileEditor.__init__ def __init__(self, path, temp, version): self.__upinit(path, temp) self.writer.write(""" /* This part of the file was generated on {} from the Common Locale Data Repository v{} http://www.unicode.org/cldr/ Do not edit this section: instead regenerate it using cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or edited) CLDR data; see qtbase/util/locale_database/. */ """.format(datetime.date.today(), version)) class LocaleDataWriter (LocaleSourceEditor): def likelySubtags(self, likely): self.writer.write('static const QLocaleId likely_subtags[] = {\n') for had, have, got, give, last in likely: self.writer.write(' {{ {:3d}, {:3d}, {:3d} }}'.format(*have)) self.writer.write(', {{ {:3d}, {:3d}, {:3d} }}'.format(*give)) self.writer.write(' ' if last else ',') self.writer.write(' // {} -> {}\n'.format(had, got)) self.writer.write('};\n\n') def localeIndex(self, indices): self.writer.write('static const quint16 locale_index[] = {\n') for pair in indices: self.writer.write('{:6d}, // {}\n'.format(*pair)) self.writer.write(' 0 // trailing 0\n') self.writer.write('};\n\n') def localeData(self, locales, names): list_pattern_part_data = StringData('list_pattern_part_data') date_format_data = StringData('date_format_data') time_format_data = StringData('time_format_data') days_data = StringData('days_data') am_data = StringData('am_data') pm_data = StringData('pm_data') byte_unit_data = StringData('byte_unit_data') currency_symbol_data = StringData('currency_symbol_data') currency_display_name_data = StringData('currency_display_name_data') currency_format_data = StringData('currency_format_data') endonyms_data = StringData('endonyms_data') # Locale data self.writer.write('static const QLocaleData locale_data[] = {\n') # Table headings: keep each label centred in its field, matching line_format: self.writer.write(' // ' # Width 6 + comma ' lang ' # IDs 'script ' ' terr ' ' dec ' # Numeric punctuation ' group ' ' list ' # Delimiter for *numeric* lists ' prcnt ' # Arithmetic symbols ' zero ' ' minus ' ' plus ' ' exp ' # Width 8 + comma - to make space for these wide labels ! ' quotOpn ' # Quotation marks ' quotEnd ' 'altQtOpn ' 'altQtEnd ' # Width 11 + comma ' lpStart ' # List pattern ' lpMid ' ' lpEnd ' ' lpTwo ' ' sDtFmt ' # Date format ' lDtFmt ' ' sTmFmt ' # Time format ' lTmFmt ' ' ssDays ' # Days ' slDays ' ' snDays ' ' sDays ' ' lDays ' ' nDays ' ' am ' # am/pm indicators ' pm ' # Width 8 + comma ' byte ' ' siQuant ' 'iecQuant ' # Width 8+4 + comma ' currISO ' # Width 11 + comma ' currSym ' # Currency formatting ' currDsply ' ' currFmt ' ' currFmtNeg ' ' endoLang ' # Name of language in itself, and of country ' endoCntry ' # Width 6 + comma 'curDgt ' # Currency number representation 'curRnd ' 'dow1st ' # First day of week ' wknd+ ' # Week-end start/end days ' wknd-' # No trailing space on last entry (be sure to # pad before adding anything after it). '\n') formatLine = ''.join(( ' {{ ', # Locale-identifier '{:6d},' * 3, # Numeric formats, list delimiter '{:6d},' * 8, # Quotation marks '{:8d},' * 4, # List patterns, date/time formats, month/day names, am/pm '{:>11s},' * 16, # SI/IEC byte-unit abbreviations '{:>8s},' * 3, # Currency ISO code ' {:>10s}, ', # Currency and endonyms '{:>11s},' * 6, # Currency formatting '{:6d},{:6d}', # Day of week and week-end ',{:6d}' * 3, ' }}')).format for key in names: locale = locales[key] self.writer.write(formatLine( key[0], key[1], key[2], locale.decimal, locale.group, locale.listDelim, locale.percent, locale.zero, locale.minus, locale.plus, locale.exp, locale.quotationStart, locale.quotationEnd, locale.alternateQuotationStart, locale.alternateQuotationEnd, list_pattern_part_data.append(locale.listPatternPartStart), list_pattern_part_data.append(locale.listPatternPartMiddle), list_pattern_part_data.append(locale.listPatternPartEnd), list_pattern_part_data.append(locale.listPatternPartTwo), date_format_data.append(locale.shortDateFormat), date_format_data.append(locale.longDateFormat), time_format_data.append(locale.shortTimeFormat), time_format_data.append(locale.longTimeFormat), days_data.append(locale.standaloneShortDays), days_data.append(locale.standaloneLongDays), days_data.append(locale.standaloneNarrowDays), days_data.append(locale.shortDays), days_data.append(locale.longDays), days_data.append(locale.narrowDays), am_data.append(locale.am), pm_data.append(locale.pm), byte_unit_data.append(locale.byte_unit), byte_unit_data.append(locale.byte_si_quantified), byte_unit_data.append(locale.byte_iec_quantified), currencyIsoCodeData(locale.currencyIsoCode), currency_symbol_data.append(locale.currencySymbol), currency_display_name_data.append(locale.currencyDisplayName), currency_format_data.append(locale.currencyFormat), currency_format_data.append(locale.currencyNegativeFormat), endonyms_data.append(locale.languageEndonym), endonyms_data.append(locale.countryEndonym), locale.currencyDigits, locale.currencyRounding, # unused (QTBUG-81343) locale.firstDayOfWeek, locale.weekendStart, locale.weekendEnd) + ', // {}/{}/{}\n'.format( locale.language, locale.script, locale.country)) self.writer.write(formatLine(*( # All zeros, matching the format: (0,) * (3 + 8 + 4) + ('0,0',) * (16 + 3) + (currencyIsoCodeData(0),) + ('0,0',) * 6 + (0,) * (2 + 3) )) + ' // trailing zeros\n') self.writer.write('};\n') # StringData tables: for data in (list_pattern_part_data, date_format_data, time_format_data, days_data, byte_unit_data, am_data, pm_data, currency_symbol_data, currency_display_name_data, currency_format_data, endonyms_data): data.write(self.writer) @staticmethod def __writeNameData(out, book, form): out('static const char {}_name_list[] =\n'.format(form)) out('"Default\\0"\n') for key, value in book.items(): if key == 0: continue out('"' + value[0] + '\\0"\n') out(';\n\n') out('static const quint16 {}_name_index[] = {{\n'.format(form)) out(' 0, // Any{}\n'.format(form.capitalize())) index = 8 for key, value in book.items(): if key == 0: continue name = value[0] out('{:6d}, // {}\n'.format(index, name)) index += len(name) + 1 out('};\n\n') @staticmethod def __writeCodeList(out, book, form, width): out('static const unsigned char {}_code_list[] =\n'.format(form)) for key, value in book.items(): code = value[1] code += r'\0' * max(width - len(code), 0) out('"{}" // {}\n'.format(code, value[0])) out(';\n\n') def languageNames(self, languages): self.__writeNameData(self.writer.write, languages, 'language') def scriptNames(self, scripts): self.__writeNameData(self.writer.write, scripts, 'script') def countryNames(self, countries): self.__writeNameData(self.writer.write, countries, 'country') # TODO: unify these next three into the previous three; kept # separate for now to verify we're not changing data. def languageCodes(self, languages): self.__writeCodeList(self.writer.write, languages, 'language', 3) def scriptCodes(self, scripts): self.__writeCodeList(self.writer.write, scripts, 'script', 4) def countryCodes(self, countries): # TODO: unify with countryNames() self.__writeCodeList(self.writer.write, countries, 'country', 3) class CalendarDataWriter (LocaleSourceEditor): formatCalendar = ''.join(( ' {{', '{:6d}', ',{:6d}' * 2, ',{{{:>5s}}}' * 6, '}}, ')).format def write(self, calendar, locales, names): months_data = StringData('months_data') self.writer.write('static const QCalendarLocale locale_data[] = {\n') self.writer.write(' // ' # IDs, width 7 (6 + comma) + ' lang ' + ' script' + ' terr ' # Month-name start-end pairs, width 8 (5 plus '{},'): + ' sShort ' + ' sLong ' + ' sNarrow' + ' short ' + ' long ' + ' narrow' # No trailing space on last; be sure # to pad before adding later entries. + '\n') for key in names: locale = locales[key] self.writer.write( self.formatCalendar( key[0], key[1], key[2], months_data.append(locale.standaloneShortMonths[calendar]), months_data.append(locale.standaloneLongMonths[calendar]), months_data.append(locale.standaloneNarrowMonths[calendar]), months_data.append(locale.shortMonths[calendar]), months_data.append(locale.longMonths[calendar]), months_data.append(locale.narrowMonths[calendar])) + '// {}/{}/{}\n'.format(locale.language, locale.script, locale.country)) self.writer.write(self.formatCalendar(*( (0,) * 3 + ('0,0',) * 6 )) + '// trailing zeros\n') self.writer.write('};\n') months_data.write(self.writer) class LocaleHeaderWriter (SourceFileEditor): __upinit = SourceFileEditor.__init__ def __init__(self, path, temp, dupes): self.__upinit(path, temp) self.__dupes = dupes def languages(self, languages): self.__enum('Language', languages, self.__language) self.writer.write('\n') def countries(self, countries): self.__enum('Country', countries, self.__country) def scripts(self, scripts): self.__enum('Script', scripts, self.__script) self.writer.write('\n') # Implementation details from enumdata import (language_aliases as __language, country_aliases as __country, script_aliases as __script) def __enum(self, name, book, alias): assert book out, dupes = self.writer.write, self.__dupes out(' enum {} {{\n'.format(name)) for key, value in book.items(): member = value[0] if name == 'Script': # Don't .capitalize() as some names are already camel-case (see enumdata.py): member = ''.join(word[0].upper() + word[1:] for word in member.split()) if not member.endswith('Script'): member += 'Script' if member in dupes: raise Error('The script name "{}" is messy'.format(member)) else: member = ''.join(member.split()) member = member + name if member in dupes else member out(' {} = {},\n'.format(member, key)) out('\n ' + ',\n '.join('{} = {}'.format(*pair) for pair in sorted(alias.items())) + ',\n\n Last{} = {}\n }};\n'.format(name, member)) def usage(name, err, message = ''): err.write("""Usage: {} path/to/qlocale.xml root/of/qtbase """.format(name)) # TODO: elaborate if message: err.write('\n' + message + '\n') def main(args, out, err): # TODO: Make calendars a command-line parameter # map { CLDR name: Qt file name } calendars = {'gregorian': 'roman', 'persian': 'jalali', 'islamic': 'hijri',} # 'hebrew': 'hebrew', name = args.pop(0) if len(args) != 2: usage(name, err, 'I expect two arguments') return 1 qlocalexml = args.pop(0) qtsrcdir = args.pop(0) if not (os.path.isdir(qtsrcdir) and all(os.path.isfile(os.path.join(qtsrcdir, 'src', 'corelib', 'text', leaf)) for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))): usage(name, err, 'Missing expected files under qtbase source root ' + qtsrcdir) return 1 reader = QLocaleXmlReader(qlocalexml) locale_map = dict(reader.loadLocaleMap(calendars, err.write)) locale_keys = locale_map.keys() compareLocaleKeys.default_map = dict(reader.defaultMap()) locale_keys.sort(compareLocaleKeys) try: writer = LocaleDataWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale_data_p.h'), qtsrcdir, reader.cldrVersion) except IOError as e: err.write('Failed to open files to transcribe locale data: ' + (e.message or e.args[1])) return 1 try: writer.likelySubtags(reader.likelyMap()) writer.localeIndex(reader.languageIndices(tuple(k[0] for k in locale_map))) writer.localeData(locale_map, locale_keys) writer.writer.write('\n') writer.languageNames(reader.languages) writer.scriptNames(reader.scripts) writer.countryNames(reader.countries) # TODO: merge the next three into the previous three writer.languageCodes(reader.languages) writer.scriptCodes(reader.scripts) writer.countryCodes(reader.countries) except Error as e: writer.cleanup() err.write('\nError updating locale data: ' + e.message + '\n') return 1 writer.close() # Generate calendar data for calendar, stem in calendars.items(): try: writer = CalendarDataWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'time', 'q{}calendar_data_p.h'.format(stem)), qtsrcdir, reader.cldrVersion) except IOError as e: err.write('Failed to open files to transcribe ' + calendar + ' data ' + (e.message or e.args[1])) return 1 try: writer.write(calendar, locale_map, locale_keys) except Error as e: writer.cleanup() err.write('\nError updating ' + calendar + ' locale data: ' + e.message + '\n') return 1 writer.close() # qlocale.h try: writer = LocaleHeaderWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.h'), qtsrcdir, reader.dupes) except IOError as e: err.write('Failed to open files to transcribe qlocale.h: ' + (e.message or e.args[1])) return 1 try: writer.languages(reader.languages) writer.scripts(reader.scripts) writer.countries(reader.countries) except Error as e: writer.cleanup() err.write('\nError updating qlocale.h: ' + e.message + '\n') return 1 writer.close() # qlocale.qdoc try: writer = Transcriber(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.qdoc'), qtsrcdir) except IOError as e: err.write('Failed to open files to transcribe qlocale.qdoc: ' + (e.message or e.args[1])) return 1 DOCSTRING = " QLocale's data is based on Common Locale Data Repository " try: for line in writer.reader: if DOCSTRING in line: writer.writer.write(DOCSTRING + 'v' + reader.cldrVersion + '.\n') else: writer.writer.write(line) except Error as e: writer.cleanup() err.write('\nError updating qlocale.qdoc: ' + e.message + '\n') return 1 writer.close() return 0 if __name__ == "__main__": import sys sys.exit(main(sys.argv, sys.stdout, sys.stderr))