diff options
Diffstat (limited to 'util/locale_database')
-rw-r--r-- | util/locale_database/cldr.py | 301 | ||||
-rwxr-xr-x | util/locale_database/cldr2qlocalexml.py | 153 | ||||
-rwxr-xr-x | util/locale_database/cldr2qtimezone.py | 530 | ||||
-rw-r--r--[-rwxr-xr-x] | util/locale_database/dateconverter.py | 302 | ||||
-rw-r--r-- | util/locale_database/enumdata.py | 218 | ||||
-rw-r--r-- | util/locale_database/iso639_3.py | 80 | ||||
-rw-r--r-- | util/locale_database/ldml.py | 215 | ||||
-rw-r--r-- | util/locale_database/localetools.py | 239 | ||||
-rw-r--r-- | util/locale_database/qlocalexml.py | 302 | ||||
-rw-r--r-- | util/locale_database/qlocalexml.rnc | 119 | ||||
-rwxr-xr-x | util/locale_database/qlocalexml2cpp.py | 466 | ||||
-rw-r--r-- | util/locale_database/testlocales/localemodel.cpp | 913 | ||||
-rw-r--r-- | util/locale_database/testlocales/localemodel.h | 51 | ||||
-rw-r--r-- | util/locale_database/testlocales/localewidget.cpp | 53 | ||||
-rw-r--r-- | util/locale_database/testlocales/localewidget.h | 29 | ||||
-rw-r--r-- | util/locale_database/testlocales/main.cpp | 29 | ||||
-rw-r--r-- | util/locale_database/testlocales/testlocales.pro | 3 |
17 files changed, 2308 insertions, 1695 deletions
diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py index 9b08d8a652..9e0bae9667 100644 --- a/util/locale_database/cldr.py +++ b/util/locale_database/cldr.py @@ -1,31 +1,5 @@ -# -*- coding: utf-8; -*- -############################################################################# -## -## Copyright (C) 2020 The Qt Company Ltd. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the test suite of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:GPL-EXCEPT$ -## Commercial License Usage -## Licensees holding valid commercial Qt licenses may use this file in -## accordance with the commercial license agreement provided with the -## Software or, alternatively, in accordance with the terms contained in -## a written agreement between you and The Qt Company. For licensing terms -## and conditions see https://www.qt.io/terms-conditions. For further -## information use the contact form at https://www.qt.io/contact-us. -## -## GNU General Public License Usage -## Alternatively, this file may be used under the terms of the GNU -## General Public License version 3 as published by the Free Software -## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -## included in the packaging of this file. Please review the following -## information to ensure the GNU General Public License requirements will -## be met: https://www.gnu.org/licenses/gpl-3.0.html. -## -## $QT_END_LICENSE$ -## -############################################################################# +# Copyright (C) 2021 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 """Digesting the CLDR's data. Provides two classes: @@ -36,15 +10,17 @@ The former should normally be all you need to access. See individual classes for further detail. """ +from typing import Iterable, TextIO from xml.dom import minidom from weakref import WeakValueDictionary as CacheDict -import os +from pathlib import Path from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner +from localetools import names_clash from qlocalexml import Locale class CldrReader (object): - def __init__(self, root, grumble = lambda msg: None, whitter = lambda msg: None): + def __init__(self, root: Path, grumble = lambda msg: None, whitter = lambda msg: None): """Set up a reader object for reading CLDR data. Single parameter, root, is the file-system path to the root of @@ -67,7 +43,7 @@ class CldrReader (object): Yields pairs (have, give) of 4-tuples; if what you have matches the left member, giving the right member is probably sensible. Each 4-tuple's entries are the full names of a - language, a script, a country (strictly territory) and a + language, a script, a territory (usually a country) and a variant (currently ignored).""" skips = [] for got, use in self.root.likelySubTags(): @@ -79,7 +55,7 @@ class CldrReader (object): and e.message.startswith('Unknown ') and ' code ' in e.message): skips.append(use) else: - self.grumble('Skipping likelySubtag "{}" -> "{}" ({})\n'.format(got, use, e.message)) + self.grumble(f'Skipping likelySubtag "{got}" -> "{use}" ({e})\n') continue if all(code.startswith('Any') and code[3].isupper() for code in have[:-1]): continue @@ -99,50 +75,49 @@ class CldrReader (object): pass # self.__wrapped(self.whitter, 'Skipping likelySubtags (for unknown codes): ', skips) def readLocales(self, calendars = ('gregorian',)): - locales = tuple(self.__allLocales(calendars)) - return dict(((k.language_id, k.script_id, k.country_id, k.variant_code), - k) for k in locales) + return {(k.language_id, k.script_id, k.territory_id, k.variant_code): k + for k in self.__allLocales(calendars)} def __allLocales(self, calendars): def skip(locale, reason): - return 'Skipping defaultContent locale "{}" ({})\n'.format(locale, reason) + return f'Skipping defaultContent locale "{locale}" ({reason})\n' for locale in self.root.defaultContentLocales: try: - language, script, country, variant = self.__splitLocale(locale) + language, script, territory, variant = self.__splitLocale(locale) except ValueError: self.whitter(skip(locale, 'only language tag')) continue - if not (script or country): + if not (script or territory): self.grumble(skip(locale, 'second tag is neither script nor territory')) continue - if not (language and country): + if not (language and territory): continue try: yield self.__getLocaleData(self.root.locale(locale), calendars, - language, script, country, variant) + language, script, territory, variant) except Error as e: self.grumble(skip(locale, e.message)) for locale in self.root.fileLocales: try: chain = self.root.locale(locale) - language, script, country, variant = chain.tagCodes() + language, script, territory, variant = chain.tagCodes() assert language # TODO: this skip should probably be based on likely - # sub-tags, instead of empty country: if locale has a + # sub-tags, instead of empty territory: if locale has a # likely-subtag expansion, that's what QLocale uses, # and we'll be saving its data for the expanded locale # anyway, so don't need to record it for itself. # See also QLocaleXmlReader.loadLocaleMap's grumble. - if not country: + if not territory: continue - yield self.__getLocaleData(chain, calendars, language, script, country, variant) + yield self.__getLocaleData(chain, calendars, language, script, territory, variant) except Error as e: - self.grumble('Skipping file locale "{}" ({})\n'.format(locale, e.message)) + self.grumble(f'Skipping file locale "{locale}" ({e})\n') import textwrap @staticmethod @@ -153,13 +128,13 @@ class CldrReader (object): def __parseTags(self, locale): tags = self.__splitLocale(locale) - language = tags.next() - script = country = variant = '' + language = next(tags) + script = territory = variant = '' try: - script, country, variant = tags + script, territory, variant = tags except ValueError: pass - return tuple(p[1] for p in self.root.codesToIdName(language, script, country, variant)) + return tuple(p[1] for p in self.root.codesToIdName(language, script, territory, variant)) def __splitLocale(self, name): """Generate (language, script, territory, variant) from a locale name @@ -171,14 +146,18 @@ class CldrReader (object): single tag (i.e. contains no underscores). Always yields 1 or 4 values, never 2 or 3.""" tags = iter(name.split('_')) - yield tags.next() # Language - tag = tags.next() # may raise StopIteration + yield next(tags) # Language + + try: + tag = next(tags) + except StopIteration: + return # Script is always four letters, always capitalised: if len(tag) == 4 and tag[0].isupper() and tag[1:].islower(): yield tag try: - tag = tags.next() + tag = next(tags) except StopIteration: tag = '' else: @@ -188,7 +167,7 @@ class CldrReader (object): if tag and tag.isupper() or tag.isdigit(): yield tag try: - tag = tags.next() + tag = next(tags) except StopIteration: tag = '' else: @@ -201,21 +180,22 @@ class CldrReader (object): else: yield '' - # If nothing is left, StopIteration will avoid the warning: - if not tag: - tag = tags.next() - self.grumble('Ignoring unparsed cruft {} in {}\n'.format('_'.join(tag + tuple(tags)), name)) + rest = [tag] if tag else [] + rest.extend(tags) - def __getLocaleData(self, scan, calendars, language, script, country, variant): - ids, names = zip(*self.root.codesToIdName(language, script, country, variant)) - assert ids[0] > 0 and ids[2] > 0, (language, script, country, variant) + if rest: + self.grumble(f'Ignoring unparsed cruft {"_".join(rest)} in {name}\n') + + def __getLocaleData(self, scan, calendars, language, script, territory, variant): + ids, names = zip(*self.root.codesToIdName(language, script, territory, variant)) + assert ids[0] > 0 and ids[2] > 0, (language, script, territory, variant) locale = Locale( language = names[0], language_code = language, language_id = ids[0], script = names[1], script_code = script, script_id = ids[1], - country = names[2], country_code = country, country_id = ids[2], + territory = names[2], territory_code = territory, territory_id = ids[2], variant_code = variant) - firstDay, weStart, weEnd = self.root.weekData(country) + firstDay, weStart, weEnd = self.root.weekData(territory) assert all(day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun') for day in (firstDay, weStart, weEnd)) @@ -223,7 +203,7 @@ class CldrReader (object): weekendStart = weStart, weekendEnd = weEnd) - iso, digits, rounding = self.root.currencyData(country) + iso, digits, rounding = self.root.currencyData(territory) locale.update(currencyIsoCode = iso, currencyDigits = int(digits), currencyRounding = int(rounding)) @@ -231,7 +211,7 @@ class CldrReader (object): locale.update(scan.currencyData(iso)) locale.update(scan.numericData(self.root.numberSystem, self.whitter)) locale.update(scan.textPatternData()) - locale.update(scan.endonyms(language, script, country, variant)) + locale.update(scan.endonyms(language, script, territory, variant)) locale.update(scan.unitData()) # byte, kB, MB, GB, ..., KiB, MiB, GiB, ... locale.update(scan.calendarNames(calendars)) # Names of days and months @@ -242,7 +222,7 @@ class CldrReader (object): # the cache. If a process were to instantiate this class with distinct # roots, each cache would be filled by the first to need it ! class CldrAccess (object): - def __init__(self, root): + def __init__(self, root: Path): """Set up a master object for accessing CLDR data. Single parameter, root, is the file-system path to the root of @@ -250,18 +230,18 @@ class CldrAccess (object): contain dtd/, main/ and supplemental/ sub-directories.""" self.root = root - def xml(self, *path): + def xml(self, relative_path: str): """Load a single XML file and return its root element as an XmlScanner. The path is interpreted relative to self.root""" - return XmlScanner(Node(self.__xml(path))) + return XmlScanner(Node(self.__xml(relative_path))) def supplement(self, name): """Loads supplemental data as a Supplement object. The name should be that of a file in common/supplemental/, without path. """ - return Supplement(Node(self.__xml(('common', 'supplemental', name)))) + return Supplement(Node(self.__xml(f'common/supplemental/{name}'))) def locale(self, name): """Loads all data for a locale as a LocaleScanner object. @@ -273,17 +253,18 @@ class CldrAccess (object): inheritance, where relevant.""" return LocaleScanner(name, self.__localeRoots(name), self.__rootLocale) + def englishNaming(self, tag): # see QLocaleXmlWriter.enumData() + return self.__codeMap(tag).get + @property - def fileLocales(self, joinPath = os.path.join, listDirectory = os.listdir, - splitExtension = os.path.splitext): + def fileLocales(self) -> Iterable[str]: """Generator for locale IDs seen in file-names. All *.xml other than root.xml in common/main/ are assumed to identify locales.""" - for name in listDirectory(joinPath(self.root, 'common', 'main')): - stem, ext = splitExtension(name) - if ext == '.xml' and stem != 'root': - yield stem + for path in self.root.joinpath('common/main').glob('*.xml'): + if path.stem != 'root': + yield path.stem @property def defaultContentLocales(self): @@ -304,44 +285,44 @@ class CldrAccess (object): def numberSystem(self, system): """Get a description of a numbering system. - Returns a mapping, with keys u'digits', u'type' and u'id'; the + Returns a mapping, with keys 'digits', 'type' and 'id'; the value for this last is system. Raises KeyError for unknown number system, ldml.Error on failure to load data.""" try: return self.__numberSystems[system] except KeyError: - raise Error('Unsupported number system: {}'.format(system)) + raise Error(f'Unsupported number system: {system}') - def weekData(self, country): + def weekData(self, territory): """Data on the weekly cycle. Returns a triple (W, S, E) of en's short names for week-days; W is the first day of the week, S the start of the week-end - and E the end of the week-end. Where data for a country is + and E the end of the week-end. Where data for a territory is unavailable, the data for CLDR's territory 001 (The World) is used.""" try: - return self.__weekData[country] + return self.__weekData[territory] except KeyError: return self.__weekData['001'] - def currencyData(self, country): - """Returns currency data for the given country code. + def currencyData(self, territory): + """Returns currency data for the given territory code. Return value is a tuple (ISO4217 code, digit count, rounding - mode). If CLDR provides no data for this country, ('', 2, 1) + mode). If CLDR provides no data for this territory, ('', 2, 1) is the default result. """ try: - return self.__currencyData[country] + return self.__currencyData[territory] except KeyError: return '', 2, 1 - def codesToIdName(self, language, script, country, variant = ''): + def codesToIdName(self, language, script, territory, variant = ''): """Maps each code to the appropriate ID and name. Returns a 4-tuple of (ID, name) pairs corresponding to the - language, script, country and variant given. Raises a + language, script, territory and variant given. Raises a suitable error if any of them is unknown, indicating all that are unknown plus suitable names for any that could sensibly be added to enumdata.py to make them known. @@ -353,33 +334,33 @@ class CldrAccess (object): try: return (enum('language')[language], enum('script')[script], - enum('country')[country], + enum('territory')[territory], enum('variant')[variant]) except KeyError: pass - parts, values = [], [language, script, country, variant] - for index, key in enumerate(('language', 'script', 'country', 'variant')): + parts, values = [], [language, script, territory, variant] + for index, key in enumerate(('language', 'script', 'territory', 'variant')): naming, enums = self.__codeMap(key), enum(key) value = values[index] if value not in enums: - text = '{} code {}'.format(key, value) + text = f'{key} code {value}' name = naming.get(value) if name and value != 'POSIX': - text += u' (could add {})'.format(name) + text += f' (could add {name})' parts.append(text) if len(parts) > 1: parts[-1] = 'and ' + parts[-1] - assert parts + else: + assert parts + if parts[0].startswith('variant'): + raise Error(f'No support for {parts[0]}', + language, script, territory, variant) raise Error('Unknown ' + ', '.join(parts), - language, script, country, variant) + language, script, territory, variant) @staticmethod - def __checkEnum(given, proper, scraps, - remap = { u'å': 'a', u'ã': 'a', u'ç': 'c', u'é': 'e', u'í': 'i', u'ü': 'u'}, - prefix = { 'St.': 'Saint', 'U.S.': 'United States' }, - suffixes = ( 'Han', ), - skip = u'\u02bc'): + def __checkEnum(given, proper, scraps): # Each is a { code: full name } mapping for code, name in given.items(): try: right = proper[code] @@ -387,35 +368,23 @@ class CldrAccess (object): # No en.xml name for this code, but supplementalData's # parentLocale may still believe in it: if code not in scraps: - yield name, '[Found no CLDR name for code {}]'.format(code) - continue - if name == right: continue - ok = right.replace('&', 'And') - for k, v in prefix.items(): - if ok.startswith(k + ' '): - ok = v + ok[len(k):] - while '(' in ok: - try: f, t = ok.index('('), ok.index(')') - except ValueError: break - ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip() - if any(name == ok + ' ' + s for s in suffixes): - continue - if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join( - remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip): + yield name, f'[Found no CLDR name for code {code}]' continue - yield name, ok + cleaned = names_clash(right, name) + if cleaned: + yield name, cleaned def checkEnumData(self, grumble): scraps = set() for k in self.__parentLocale.keys(): for f in k.split('_'): scraps.add(f) - from enumdata import language_list, country_list, script_list - language = dict((v, k) for k, v in language_list.values() if not v.isspace()) - country = dict((v, k) for k, v in country_list.values() if v != 'ZZ') - script = dict((v, k) for k, v in script_list.values() if v != 'Zzzz') + from enumdata import language_map, territory_map, script_map + language = {v: k for k, v in language_map.values() if not v.isspace()} + territory = {v: k for k, v in territory_map.values() if v != 'ZZ'} + script = {v: k for k, v in script_map.values() if v != 'Zzzz'} lang = dict(self.__checkEnum(language, self.__codeMap('language'), scraps)) - land = dict(self.__checkEnum(country, self.__codeMap('country'), scraps)) + land = dict(self.__checkEnum(territory, self.__codeMap('territory'), scraps)) text = dict(self.__checkEnum(script, self.__codeMap('script'), scraps)) if lang or land or text: grumble("""\ @@ -424,15 +393,15 @@ enumdata.py (keeping the old name as an alias): """) if lang: grumble('Language:\n\t' - + '\n\t'.join('{} -> {}'.format(k, v) for k, v in lang.items()) + + '\n\t'.join(f'{k} -> {v}' for k, v in lang.items()) + '\n') if land: - grumble('Country:\n\t' - + '\n\t'.join('{} -> {}'.format(k, v) for k, v in land.items()) + grumble('Territory:\n\t' + + '\n\t'.join(f'{k} -> {v}' for k, v in land.items()) + '\n') if text: grumble('Script:\n\t' - + '\n\t'.join('{} -> {}'.format(k, v) for k, v in text.items()) + + '\n\t'.join(f'{k} -> {v}' for k, v in text.items()) + '\n') grumble('\n') @@ -442,7 +411,7 @@ enumdata.py (keeping the old name as an alias): MS-Win have their own eccentric names for time-zones. CLDR helpfully provides a translation to more orthodox names. - Singe argument, lookup, is a mapping from known MS-Win names + Single argument, lookup, is a mapping from known MS-Win names for locales to a unique integer index (starting at 1). The XML structure we read has the form: @@ -460,7 +429,7 @@ enumdata.py (keeping the old name as an alias): </supplementalData> """ zones = self.supplement('windowsZones.xml') - enum = self.__enumMap('country') + enum = self.__enumMap('territory') badZones, unLands, defaults, windows = set(), set(), {}, {} for name, attrs in zones.find('windowsZones/mapTimezones'): @@ -469,8 +438,8 @@ enumdata.py (keeping the old name as an alias): wid, code = attrs['other'], attrs['territory'] data = dict(windowsId = wid, - countryCode = code, - ianaList = attrs['type']) + territoryCode = code, + ianaList = ' '.join(attrs['type'].split())) try: key = lookup[wid] @@ -479,7 +448,7 @@ enumdata.py (keeping the old name as an alias): key = 0 data['windowsKey'] = key - if code == u'001': + if code == '001': defaults[key] = data['ianaList'] else: try: @@ -487,11 +456,11 @@ enumdata.py (keeping the old name as an alias): except KeyError: unLands.append(code) continue - data.update(countryId = cid, country = name) + data.update(territoryId = cid, territory = name) windows[key, cid] = data if unLands: - raise Error('Unknown country codes, please add to enumdata.py: ' + raise Error('Unknown territory codes, please add to enumdata.py: ' + ', '.join(sorted(unLands))) if badZones: @@ -507,20 +476,20 @@ enumdata.py (keeping the old name as an alias): return self.__cldrVersion # Implementation details - def __xml(self, path, cache = CacheDict(), read = minidom.parse, joinPath = os.path.join): + def __xml(self, relative_path: str, cache = CacheDict(), read = minidom.parse): try: - doc = cache[path] + doc = cache[relative_path] except KeyError: - cache[path] = doc = read(joinPath(self.root, *path)).documentElement + cache[relative_path] = doc = read(str(self.root.joinpath(relative_path))).documentElement return doc - def __open(self, path, joinPath=os.path.join): - return open(joinPath(self.root, *path)) + def __open(self, relative_path: str) -> TextIO: + return self.root.joinpath(relative_path).open() @property def __rootLocale(self, cache = []): if not cache: - cache.append(self.xml('common', 'main', 'root.xml')) + cache.append(self.xml('common/main/root.xml')) return cache[0] @property @@ -530,7 +499,7 @@ enumdata.py (keeping the old name as an alias): return cache[0] @property - def __numberSystems(self, cache = {}, joinPath=os.path.join): + def __numberSystems(self, cache = {}): if not cache: for ignore, attrs in self.supplement('numberingSystems.xml').find('numberingSystems'): cache[attrs['id']] = attrs @@ -563,7 +532,7 @@ enumdata.py (keeping the old name as an alias): source = self.__supplementalData for key in ('firstDay', 'weekendStart', 'weekendEnd'): result = {} - for ignore, attrs in source.find('weekData/' + key): + for ignore, attrs in source.find(f'weekData/{key}'): assert ignore == key day = attrs['day'] assert day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'), day @@ -580,7 +549,7 @@ enumdata.py (keeping the old name as an alias): for elt in source.findNodes('currencyData/region'): iso, digits, rounding = '', 2, 1 try: - country = elt.dom.attributes['iso3166'].nodeValue + territory = elt.dom.attributes['iso3166'].nodeValue except KeyError: continue for child in elt.findAllChildren('currency'): @@ -596,16 +565,16 @@ enumdata.py (keeping the old name as an alias): break if iso: for tag, data in source.find( - 'currencyData/fractions/info[iso4217={}]'.format(iso)): + f'currencyData/fractions/info[iso4217={iso}]'): digits = data['digits'] rounding = data['rounding'] - cache[country] = iso, digits, rounding + cache[territory] = iso, digits, rounding assert cache return cache @property - def __unDistinguishedAttributes(self, cache = {}, joinPath = os.path.join): + def __unDistinguishedAttributes(self, cache = {}): """Mapping from tag names to lists of attributes. LDML defines some attributes as 'distinguishing': if a node @@ -625,7 +594,7 @@ enumdata.py (keeping the old name as an alias): return cache - def __scanLdmlDtd(self, joinPath = os.path.join): + def __scanLdmlDtd(self): """Scan the LDML DTD, record CLDR version Yields (tag, attrs) pairs: on elements with a given tag, @@ -635,7 +604,7 @@ enumdata.py (keeping the old name as an alias): Sets self.__cldrVersion as a side-effect, since this information is found in the same file.""" - with self.__open(('common', 'dtd', 'ldml.dtd')) as dtd: + with self.__open('common/dtd/ldml.dtd') as dtd: tag, ignored, last = None, None, None for line in dtd: @@ -670,15 +639,15 @@ enumdata.py (keeping the old name as an alias): def __enumMap(self, key, cache = {}): if not cache: cache['variant'] = {'': (0, 'This should never be seen outside ldml.py')} - # They're not actually lists: mappings from numeric value - # to pairs of full name and short code. What we want, in - # each case, is a mapping from code to the other two. - from enumdata import language_list, script_list, country_list - for form, book, empty in (('language', language_list, 'AnyLanguage'), - ('script', script_list, 'AnyScript'), - ('country', country_list, 'AnyTerritory')): - cache[form] = dict((pair[1], (num, pair[0])) - for num, pair in book.items() if pair[0] != 'C') + # They're mappings from numeric value to pairs of full + # name and short code. What we want, in each case, is a + # mapping from code to the other two. + from enumdata import language_map, script_map, territory_map + for form, book, empty in (('language', language_map, 'AnyLanguage'), + ('script', script_map, 'AnyScript'), + ('territory', territory_map, 'AnyTerritory')): + cache[form] = {pair[1]: (num, pair[0]) + for num, pair in book.items() if pair[0] != 'C'} # (Have to filter out the C locale, as we give it the # same (all space) code as AnyLanguage, whose code # should probably be 'und' instead.) @@ -693,9 +662,9 @@ enumdata.py (keeping the old name as an alias): def __codeMap(self, key, cache = {}, # Maps our name for it to CLDR's name: naming = {'language': 'languages', 'script': 'scripts', - 'country': 'territories', 'variant': 'variants'}): + 'territory': 'territories', 'variant': 'variants'}): if not cache: - root = self.xml('common', 'main', 'en.xml').root.findUniqueChild('localeDisplayNames') + root = self.xml('common/main/en.xml').root.findUniqueChild('localeDisplayNames') for dst, src in naming.items(): cache[dst] = dict(self.__codeMapScan(root.findUniqueChild(src))) assert cache @@ -721,7 +690,13 @@ enumdata.py (keeping the old name as an alias): except (KeyError, ValueError, TypeError): pass else: - if key not in seen or not elt.attributes.has_key('alt'): + # Prefer stand-alone forms of names when present, ignore other + # alt="..." entries. For example, Traditional and Simplified + # Han omit "Han" in the plain form, but include it for + # stand-alone. As the stand-alone version appears later, it + # over-writes the plain one. + if (key not in seen or 'alt' not in elt.attributes + or elt.attributes['alt'].nodeValue == 'stand-alone'): yield key, value seen.add(key) @@ -730,7 +705,8 @@ enumdata.py (keeping the old name as an alias): def __parentLocale(self, cache = {}): # see http://www.unicode.org/reports/tr35/#Parent_Locales if not cache: - for tag, attrs in self.__supplementalData.find('parentLocales'): + for tag, attrs in self.__supplementalData.find('parentLocales', + ('component',)): parent = attrs.get('parent', '') for child in attrs['locales'].split(): cache[child] = parent @@ -738,10 +714,9 @@ enumdata.py (keeping the old name as an alias): return cache - def __localeAsDoc(self, name, aliasFor = None, - joinPath = os.path.join, exists = os.path.isfile): - path = ('common', 'main', name + '.xml') - if exists(joinPath(self.root, *path)): + def __localeAsDoc(self, name: str, aliasFor = None): + path = f'common/main/{name}.xml' + if self.root.joinpath(path).exists(): elt = self.__xml(path) for child in Node(elt).findAllChildren('alias'): try: @@ -754,8 +729,8 @@ enumdata.py (keeping the old name as an alias): return elt if aliasFor: - raise Error('Fatal error: found an alias "{}" -> "{}", but found no file for the alias' - .format(aliasFor, name)) + raise Error(f'Fatal error: found an alias "{aliasFor}" -> "{name}", ' + 'but found no file for the alias') def __scanLocaleRoots(self, name): while name and name != 'root': @@ -780,4 +755,4 @@ enumdata.py (keeping the old name as an alias): return chain # Unpolute the namespace: we don't need to export these. -del minidom, CacheDict, os +del minidom, CacheDict diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py index 20dda77965..d3aa88ec38 100755 --- a/util/locale_database/cldr2qlocalexml.py +++ b/util/locale_database/cldr2qlocalexml.py @@ -1,48 +1,27 @@ -#!/usr/bin/env python2 -# coding=utf8 -############################################################################# -## -## Copyright (C) 2020 The Qt Company Ltd. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the test suite of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:GPL-EXCEPT$ -## Commercial License Usage -## Licensees holding valid commercial Qt licenses may use this file in -## accordance with the commercial license agreement provided with the -## Software or, alternatively, in accordance with the terms contained in -## a written agreement between you and The Qt Company. For licensing terms -## and conditions see https://www.qt.io/terms-conditions. For further -## information use the contact form at https://www.qt.io/contact-us. -## -## GNU General Public License Usage -## Alternatively, this file may be used under the terms of the GNU -## General Public License version 3 as published by the Free Software -## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -## included in the packaging of this file. Please review the following -## information to ensure the GNU General Public License requirements will -## be met: https://www.gnu.org/licenses/gpl-3.0.html. -## -## $QT_END_LICENSE$ -## -############################################################################# -"""Convert CLDR data to qLocaleXML - -The CLDR data can be downloaded from CLDR_, which has a sub-directory -for each version; you need the ``core.zip`` file for your version of -choice (typically the latest). This script has had updates to cope up -to v38.1; for later versions, we may need adaptations. Unpack the -downloaded ``core.zip`` and check it has a common/main/ sub-directory: -pass the path of that root of the download to this script as its first -command-line argument. Pass the name of the file in which to write -output as the second argument; either omit it or use '-' to select the -standard output. This file is the input needed by -``./qlocalexml2cpp.py`` +#!/usr/bin/env python3 +# Copyright (C) 2021 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 +"""Convert CLDR data to QLocaleXML + +The CLDR data can be downloaded as a zip-file from CLDR_, which has a +sub-directory for each version; you need the ``core.zip`` file for +your version of choice (typically the latest), which you should then +unpack. Alternatively, you can clone the git repo from github_, which +has a tag for each release and a maint/maint-$ver branch for each +major version. Either way, the CLDR top-level directory should have a +subdirectory called common/ which contains (among other things) +subdirectories main/ and supplemental/. + +This script has had updates to cope up to v44.1; for later versions, +we may need adaptations. Pass the path of the CLDR top-level directory +to this script as its first command-line argument. Pass the name of +the file in which to write output as the second argument; either omit +it or use '-' to select the standard output. This file is the input +needed by ``./qlocalexml2cpp.py`` When you update the CLDR data, be sure to also update src/corelib/text/qt_attribution.json's entry for unicode-cldr. Check -this script's output for unknown language, country or script messages; +this script's output for unknown language, territory or script messages; if any can be resolved, use their entry in common/main/en.xml to append new entries to enumdata.py's lists and update documentation in src/corelib/text/qlocale.qdoc, adding the new entries in alphabetic @@ -51,71 +30,73 @@ order. While updating the locale data, check also for updates to MS-Win's time zone names; see cldr2qtimezone.py for details. -.. _CLDR: ftp://unicode.org/Public/cldr/ +All the scripts mentioned support --help to tell you how to use them. + +.. _CLDR: https://unicode.org/Public/cldr/ +.. _github: https://github.com/unicode-org/cldr """ -import os -import sys +from pathlib import Path +import argparse from cldr import CldrReader from qlocalexml import QLocaleXmlWriter -from enumdata import language_list, script_list, country_list - -def usage(name, err, message = ''): - err.write("""Usage: {} path/to/cldr/common/main [out-file.xml] -""".format(name)) # TODO: expand command-line, improve help message - if message: - err.write('\n' + message + '\n') - -def main(args, out, err): - # TODO: make calendars a command-line option - calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew' - - # TODO: make argument parsing more sophisticated - name = args.pop(0) - if not args: - usage(name, err, 'Where is your CLDR data tree ?') - return 1 - - root = args.pop(0) - if not os.path.exists(os.path.join(root, 'common', 'main', 'root.xml')): - usage(name, err, - 'First argument is the root of the CLDR tree: found no common/main/root.xml under ' - + root) - return 1 - - xml = args.pop(0) if args else None + + +def main(argv, out, err): + """Generate a QLocaleXML file from CLDR data. + + Takes sys.argv, sys.stdout, sys.stderr (or equivalents) as + arguments. In argv[1:], it expects the root of the CLDR data + directory as first parameter and the name of the file in which to + save QLocaleXML data as second parameter. It accepts a --calendars + option to select which calendars to support (all available by + default).""" + all_calendars = ['gregorian', 'persian', 'islamic'] + + parser = argparse.ArgumentParser( + prog=Path(argv[0]).name, + description='Generate QLocaleXML from CLDR data.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('cldr_path', help='path to the root of the CLDR tree') + parser.add_argument('out_file', help='output XML file name', + nargs='?', metavar='out-file.xml') + parser.add_argument('--calendars', help='select calendars to emit data for', + nargs='+', metavar='CALENDAR', + choices=all_calendars, default=all_calendars) + + args = parser.parse_args(argv[1:]) + + root = Path(args.cldr_path) + root_xml_path = 'common/main/root.xml' + + if not root.joinpath(root_xml_path).exists(): + parser.error('First argument is the root of the CLDR tree: ' + f'found no {root_xml_path} under {root}') + + xml = args.out_file if not xml or xml == '-': emit = out elif not xml.endswith('.xml'): - usage(name, err, 'Please use a .xml extension on your output file name, not ' + xml) - return 1 + parser.error(f'Please use a .xml extension on your output file name, not {xml}') else: try: emit = open(xml, 'w') except IOError as e: - usage(name, err, 'Failed to open "{}" to write output to it\n'.format(xml)) - return 1 - - if args: - usage(name, err, 'Too many arguments - excess: ' + ' '.join(args)) - return 1 - - if emit.encoding != 'UTF-8' or (emit.encoding is None and sys.getdefaultencoding() != 'UTF-8'): - reload(sys) # Weirdly, this gets a richer sys module than the plain import got us ! - sys.setdefaultencoding('UTF-8') + parser.error(f'Failed to open "{xml}" to write output to it') # TODO - command line options to tune choice of grumble and whitter: reader = CldrReader(root, err.write, err.write) writer = QLocaleXmlWriter(emit.write) writer.version(reader.root.cldrVersion) - writer.enumData(language_list, script_list, country_list) + writer.enumData(reader.root.englishNaming) writer.likelySubTags(reader.likelySubTags()) - writer.locales(reader.readLocales(calendars), calendars) + writer.locales(reader.readLocales(args.calendars), args.calendars) - writer.close() + writer.close(err.write) return 0 if __name__ == '__main__': + import sys sys.exit(main(sys.argv, sys.stdout, sys.stderr)) diff --git a/util/locale_database/cldr2qtimezone.py b/util/locale_database/cldr2qtimezone.py index 7c06fe8561..27987d5a58 100755 --- a/util/locale_database/cldr2qtimezone.py +++ b/util/locale_database/cldr2qtimezone.py @@ -1,46 +1,23 @@ -#!/usr/bin/env python2 -############################################################################# -## -## Copyright (C) 2020 The Qt Company Ltd. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the test suite of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:GPL-EXCEPT$ -## Commercial License Usage -## Licensees holding valid commercial Qt licenses may use this file in -## accordance with the commercial license agreement provided with the -## Software or, alternatively, in accordance with the terms contained in -## a written agreement between you and The Qt Company. For licensing terms -## and conditions see https://www.qt.io/terms-conditions. For further -## information use the contact form at https://www.qt.io/contact-us. -## -## GNU General Public License Usage -## Alternatively, this file may be used under the terms of the GNU -## General Public License version 3 as published by the Free Software -## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -## included in the packaging of this file. Please review the following -## information to ensure the GNU General Public License requirements will -## be met: https://www.gnu.org/licenses/gpl-3.0.html. -## -## $QT_END_LICENSE$ -## -############################################################################# +#!/usr/bin/env python3 +# Copyright (C) 2021 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 """Parse CLDR data for QTimeZone use with MS-Windows Script to parse the CLDR common/supplemental/windowsZones.xml file and -encode for use in QTimeZone. See ``./cldr2qlocalexml.py`` for where -to get the CLDR data. Pass its root directory as first parameter to -this script and the qtbase root directory as second parameter. It -shall update qtbase's src/corelib/time/qtimezoneprivate_data_p.h ready -for use. +prepare its data for use in QTimeZone. See ``./cldr2qlocalexml.py`` for +where to get the CLDR data. Pass its root directory as first parameter +to this script. You can optionally pass the qtbase root directory as +second parameter; it defaults to the root of the checkout containing +this script. This script updates qtbase's +src/corelib/time/qtimezoneprivate_data_p.h with the new data. """ -import os import datetime +from pathlib import Path import textwrap +import argparse -from localetools import unicode2hex, wrap_list, Error, SourceFileEditor +from localetools import unicode2hex, wrap_list, Error, SourceFileEditor, qtbase_root from cldr import CldrAccess ### Data that may need updates in response to new entries in the CLDR file ### @@ -54,194 +31,208 @@ from cldr import CldrAccess # Not public so may safely be changed. Please keep in alphabetic order by ID. # ( Windows Id, Offset Seconds ) windowsIdList = ( - (u'Afghanistan Standard Time', 16200), - (u'Alaskan Standard Time', -32400), - (u'Aleutian Standard Time', -36000), - (u'Altai Standard Time', 25200), - (u'Arab Standard Time', 10800), - (u'Arabian Standard Time', 14400), - (u'Arabic Standard Time', 10800), - (u'Argentina Standard Time', -10800), - (u'Astrakhan Standard Time', 14400), - (u'Atlantic Standard Time', -14400), - (u'AUS Central Standard Time', 34200), - (u'Aus Central W. Standard Time', 31500), - (u'AUS Eastern Standard Time', 36000), - (u'Azerbaijan Standard Time', 14400), - (u'Azores Standard Time', -3600), - (u'Bahia Standard Time', -10800), - (u'Bangladesh Standard Time', 21600), - (u'Belarus Standard Time', 10800), - (u'Bougainville Standard Time', 39600), - (u'Canada Central Standard Time', -21600), - (u'Cape Verde Standard Time', -3600), - (u'Caucasus Standard Time', 14400), - (u'Cen. Australia Standard Time', 34200), - (u'Central America Standard Time', -21600), - (u'Central Asia Standard Time', 21600), - (u'Central Brazilian Standard Time', -14400), - (u'Central Europe Standard Time', 3600), - (u'Central European Standard Time', 3600), - (u'Central Pacific Standard Time', 39600), - (u'Central Standard Time (Mexico)', -21600), - (u'Central Standard Time', -21600), - (u'China Standard Time', 28800), - (u'Chatham Islands Standard Time', 45900), - (u'Cuba Standard Time', -18000), - (u'Dateline Standard Time', -43200), - (u'E. Africa Standard Time', 10800), - (u'E. Australia Standard Time', 36000), - (u'E. Europe Standard Time', 7200), - (u'E. South America Standard Time', -10800), - (u'Easter Island Standard Time', -21600), - (u'Eastern Standard Time', -18000), - (u'Eastern Standard Time (Mexico)', -18000), - (u'Egypt Standard Time', 7200), - (u'Ekaterinburg Standard Time', 18000), - (u'Fiji Standard Time', 43200), - (u'FLE Standard Time', 7200), - (u'Georgian Standard Time', 14400), - (u'GMT Standard Time', 0), - (u'Greenland Standard Time', -10800), - (u'Greenwich Standard Time', 0), - (u'GTB Standard Time', 7200), - (u'Haiti Standard Time', -18000), - (u'Hawaiian Standard Time', -36000), - (u'India Standard Time', 19800), - (u'Iran Standard Time', 12600), - (u'Israel Standard Time', 7200), - (u'Jordan Standard Time', 7200), - (u'Kaliningrad Standard Time', 7200), - (u'Korea Standard Time', 32400), - (u'Libya Standard Time', 7200), - (u'Line Islands Standard Time', 50400), - (u'Lord Howe Standard Time', 37800), - (u'Magadan Standard Time', 36000), - (u'Magallanes Standard Time', -10800), # permanent DST - (u'Marquesas Standard Time', -34200), - (u'Mauritius Standard Time', 14400), - (u'Middle East Standard Time', 7200), - (u'Montevideo Standard Time', -10800), - (u'Morocco Standard Time', 0), - (u'Mountain Standard Time (Mexico)', -25200), - (u'Mountain Standard Time', -25200), - (u'Myanmar Standard Time', 23400), - (u'N. Central Asia Standard Time', 21600), - (u'Namibia Standard Time', 3600), - (u'Nepal Standard Time', 20700), - (u'New Zealand Standard Time', 43200), - (u'Newfoundland Standard Time', -12600), - (u'Norfolk Standard Time', 39600), - (u'North Asia East Standard Time', 28800), - (u'North Asia Standard Time', 25200), - (u'North Korea Standard Time', 30600), - (u'Omsk Standard Time', 21600), - (u'Pacific SA Standard Time', -10800), - (u'Pacific Standard Time', -28800), - (u'Pacific Standard Time (Mexico)', -28800), - (u'Pakistan Standard Time', 18000), - (u'Paraguay Standard Time', -14400), - (u'Qyzylorda Standard Time', 18000), # a.k.a. Kyzylorda, in Kazakhstan - (u'Romance Standard Time', 3600), - (u'Russia Time Zone 3', 14400), - (u'Russia Time Zone 10', 39600), - (u'Russia Time Zone 11', 43200), - (u'Russian Standard Time', 10800), - (u'SA Eastern Standard Time', -10800), - (u'SA Pacific Standard Time', -18000), - (u'SA Western Standard Time', -14400), - (u'Saint Pierre Standard Time', -10800), # New France - (u'Sakhalin Standard Time', 39600), - (u'Samoa Standard Time', 46800), - (u'Sao Tome Standard Time', 0), - (u'Saratov Standard Time', 14400), - (u'SE Asia Standard Time', 25200), - (u'Singapore Standard Time', 28800), - (u'South Africa Standard Time', 7200), - (u'Sri Lanka Standard Time', 19800), - (u'Sudan Standard Time', 7200), # unless they mean South Sudan, +03:00 - (u'Syria Standard Time', 7200), - (u'Taipei Standard Time', 28800), - (u'Tasmania Standard Time', 36000), - (u'Tocantins Standard Time', -10800), - (u'Tokyo Standard Time', 32400), - (u'Tomsk Standard Time', 25200), - (u'Tonga Standard Time', 46800), - (u'Transbaikal Standard Time', 32400), # Yakutsk - (u'Turkey Standard Time', 7200), - (u'Turks And Caicos Standard Time', -14400), - (u'Ulaanbaatar Standard Time', 28800), - (u'US Eastern Standard Time', -18000), - (u'US Mountain Standard Time', -25200), - (u'UTC-11', -39600), - (u'UTC-09', -32400), - (u'UTC-08', -28800), - (u'UTC-02', -7200), - (u'UTC', 0), - (u'UTC+12', 43200), - (u'UTC+13', 46800), - (u'Venezuela Standard Time', -16200), - (u'Vladivostok Standard Time', 36000), - (u'Volgograd Standard Time', 14400), - (u'W. Australia Standard Time', 28800), - (u'W. Central Africa Standard Time', 3600), - (u'W. Europe Standard Time', 3600), - (u'W. Mongolia Standard Time', 25200), # Hovd - (u'West Asia Standard Time', 18000), - (u'West Bank Standard Time', 7200), - (u'West Pacific Standard Time', 36000), - (u'Yakutsk Standard Time', 32400), - (u'Yukon Standard Time', -25200), # Non-DST Mountain Standard Time since 2020-11-01 + ('Afghanistan Standard Time', 16200), + ('Alaskan Standard Time', -32400), + ('Aleutian Standard Time', -36000), + ('Altai Standard Time', 25200), + ('Arab Standard Time', 10800), + ('Arabian Standard Time', 14400), + ('Arabic Standard Time', 10800), + ('Argentina Standard Time', -10800), + ('Astrakhan Standard Time', 14400), + ('Atlantic Standard Time', -14400), + ('AUS Central Standard Time', 34200), + ('Aus Central W. Standard Time', 31500), + ('AUS Eastern Standard Time', 36000), + ('Azerbaijan Standard Time', 14400), + ('Azores Standard Time', -3600), + ('Bahia Standard Time', -10800), + ('Bangladesh Standard Time', 21600), + ('Belarus Standard Time', 10800), + ('Bougainville Standard Time', 39600), + ('Canada Central Standard Time', -21600), + ('Cape Verde Standard Time', -3600), + ('Caucasus Standard Time', 14400), + ('Cen. Australia Standard Time', 34200), + ('Central America Standard Time', -21600), + ('Central Asia Standard Time', 21600), + ('Central Brazilian Standard Time', -14400), + ('Central Europe Standard Time', 3600), + ('Central European Standard Time', 3600), + ('Central Pacific Standard Time', 39600), + ('Central Standard Time', -21600), + ('Central Standard Time (Mexico)', -21600), + ('Chatham Islands Standard Time', 45900), + ('China Standard Time', 28800), + ('Cuba Standard Time', -18000), + ('Dateline Standard Time', -43200), + ('E. Africa Standard Time', 10800), + ('E. Australia Standard Time', 36000), + ('E. Europe Standard Time', 7200), + ('E. South America Standard Time', -10800), + ('Easter Island Standard Time', -21600), + ('Eastern Standard Time', -18000), + ('Eastern Standard Time (Mexico)', -18000), + ('Egypt Standard Time', 7200), + ('Ekaterinburg Standard Time', 18000), + ('Fiji Standard Time', 43200), + ('FLE Standard Time', 7200), + ('Georgian Standard Time', 14400), + ('GMT Standard Time', 0), + ('Greenland Standard Time', -10800), + ('Greenwich Standard Time', 0), + ('GTB Standard Time', 7200), + ('Haiti Standard Time', -18000), + ('Hawaiian Standard Time', -36000), + ('India Standard Time', 19800), + ('Iran Standard Time', 12600), + ('Israel Standard Time', 7200), + ('Jordan Standard Time', 7200), + ('Kaliningrad Standard Time', 7200), + ('Korea Standard Time', 32400), + ('Libya Standard Time', 7200), + ('Line Islands Standard Time', 50400), + ('Lord Howe Standard Time', 37800), + ('Magadan Standard Time', 36000), + ('Magallanes Standard Time', -10800), # permanent DST + ('Marquesas Standard Time', -34200), + ('Mauritius Standard Time', 14400), + ('Middle East Standard Time', 7200), + ('Montevideo Standard Time', -10800), + ('Morocco Standard Time', 0), + ('Mountain Standard Time', -25200), + ('Mountain Standard Time (Mexico)', -25200), + ('Myanmar Standard Time', 23400), + ('N. Central Asia Standard Time', 21600), + ('Namibia Standard Time', 3600), + ('Nepal Standard Time', 20700), + ('New Zealand Standard Time', 43200), + ('Newfoundland Standard Time', -12600), + ('Norfolk Standard Time', 39600), + ('North Asia East Standard Time', 28800), + ('North Asia Standard Time', 25200), + ('North Korea Standard Time', 30600), + ('Omsk Standard Time', 21600), + ('Pacific SA Standard Time', -10800), + ('Pacific Standard Time', -28800), + ('Pacific Standard Time (Mexico)', -28800), + ('Pakistan Standard Time', 18000), + ('Paraguay Standard Time', -14400), + ('Qyzylorda Standard Time', 18000), # a.k.a. Kyzylorda, in Kazakhstan + ('Romance Standard Time', 3600), + ('Russia Time Zone 10', 39600), + ('Russia Time Zone 11', 43200), + ('Russia Time Zone 3', 14400), + ('Russian Standard Time', 10800), + ('SA Eastern Standard Time', -10800), + ('SA Pacific Standard Time', -18000), + ('SA Western Standard Time', -14400), + ('Saint Pierre Standard Time', -10800), # New France + ('Sakhalin Standard Time', 39600), + ('Samoa Standard Time', 46800), + ('Sao Tome Standard Time', 0), + ('Saratov Standard Time', 14400), + ('SE Asia Standard Time', 25200), + ('Singapore Standard Time', 28800), + ('South Africa Standard Time', 7200), + ('South Sudan Standard Time', 7200), + ('Sri Lanka Standard Time', 19800), + ('Sudan Standard Time', 7200), # unless they mean South Sudan, +03:00 + ('Syria Standard Time', 7200), + ('Taipei Standard Time', 28800), + ('Tasmania Standard Time', 36000), + ('Tocantins Standard Time', -10800), + ('Tokyo Standard Time', 32400), + ('Tomsk Standard Time', 25200), + ('Tonga Standard Time', 46800), + ('Transbaikal Standard Time', 32400), # Yakutsk + ('Turkey Standard Time', 7200), + ('Turks And Caicos Standard Time', -14400), + ('Ulaanbaatar Standard Time', 28800), + ('US Eastern Standard Time', -18000), + ('US Mountain Standard Time', -25200), + ('UTC', 0), + # Lexical order: '+' < '-' + ('UTC+12', 43200), + ('UTC+13', 46800), + ('UTC-02', -7200), + ('UTC-08', -28800), + ('UTC-09', -32400), + ('UTC-11', -39600), + ('Venezuela Standard Time', -16200), + ('Vladivostok Standard Time', 36000), + ('Volgograd Standard Time', 14400), + ('W. Australia Standard Time', 28800), + ('W. Central Africa Standard Time', 3600), + ('W. Europe Standard Time', 3600), + ('W. Mongolia Standard Time', 25200), # Hovd + ('West Asia Standard Time', 18000), + ('West Bank Standard Time', 7200), + ('West Pacific Standard Time', 36000), + ('Yakutsk Standard Time', 32400), + ('Yukon Standard Time', -25200), # Non-DST Mountain Standard Time since 2020-11-01 ) # List of standard UTC IDs to use. Not public so may be safely changed. # Do not remove IDs, as each entry is part of the API/behavior guarantee. +# IDs for the same offset shall be space-joined; list the preferred ID first. # ( UTC Id, Offset Seconds ) utcIdList = ( - (u'UTC', 0), # Goes first so is default - (u'UTC-14:00', -50400), - (u'UTC-13:00', -46800), - (u'UTC-12:00', -43200), - (u'UTC-11:00', -39600), - (u'UTC-10:00', -36000), - (u'UTC-09:00', -32400), - (u'UTC-08:00', -28800), - (u'UTC-07:00', -25200), - (u'UTC-06:00', -21600), - (u'UTC-05:00', -18000), - (u'UTC-04:30', -16200), - (u'UTC-04:00', -14400), - (u'UTC-03:30', -12600), - (u'UTC-03:00', -10800), - (u'UTC-02:00', -7200), - (u'UTC-01:00', -3600), - (u'UTC-00:00', 0), - (u'UTC+00:00', 0), - (u'UTC+01:00', 3600), - (u'UTC+02:00', 7200), - (u'UTC+03:00', 10800), - (u'UTC+03:30', 12600), - (u'UTC+04:00', 14400), - (u'UTC+04:30', 16200), - (u'UTC+05:00', 18000), - (u'UTC+05:30', 19800), - (u'UTC+05:45', 20700), - (u'UTC+06:00', 21600), - (u'UTC+06:30', 23400), - (u'UTC+07:00', 25200), - (u'UTC+08:00', 28800), - (u'UTC+08:30', 30600), - (u'UTC+09:00', 32400), - (u'UTC+09:30', 34200), - (u'UTC+10:00', 36000), - (u'UTC+11:00', 39600), - (u'UTC+12:00', 43200), - (u'UTC+13:00', 46800), - (u'UTC+14:00', 50400), + ('UTC-14:00', -50400), + ('UTC-13:00', -46800), + ('UTC-12:00', -43200), + ('UTC-11:00', -39600), + ('UTC-10:00', -36000), + ('UTC-09:00', -32400), + ('UTC-08:00', -28800), + ('UTC-07:00', -25200), + ('UTC-06:00', -21600), + ('UTC-05:00', -18000), + ('UTC-04:30', -16200), + ('UTC-04:00', -14400), + ('UTC-03:30', -12600), + ('UTC-03:00', -10800), + ('UTC-02:00', -7200), + ('UTC-01:00', -3600), + ('UTC', 0), # Goes first (among zero-offset) to be default + ('UTC+00:00', 0), + ('UTC-00:00', 0), # Should recognize, but avoid using (see Note below). + ('UTC+01:00', 3600), + ('UTC+02:00', 7200), + ('UTC+03:00', 10800), + ('UTC+03:30', 12600), + ('UTC+04:00', 14400), + ('UTC+04:30', 16200), + ('UTC+05:00', 18000), + ('UTC+05:30', 19800), + ('UTC+05:45', 20700), + ('UTC+06:00', 21600), + ('UTC+06:30', 23400), + ('UTC+07:00', 25200), + ('UTC+08:00', 28800), + ('UTC+08:30', 30600), + ('UTC+09:00', 32400), + ('UTC+09:30', 34200), + ('UTC+10:00', 36000), + ('UTC+11:00', 39600), + ('UTC+12:00', 43200), + ('UTC+13:00', 46800), + ('UTC+14:00', 50400), ) ### End of data that may need updates in response to CLDR ### +# Note: -00:00 (without the UTC prefix) was introduced in RFC3339 as a +# way to indicate that a date-time has been converted to UTC but its +# use should not be understood to say anything about the local time of +# the origin of the message using it. However, ISO 8601 has, since +# 2000, forbidden this as an offset suffix. The more recent compromise +# is to use Z to convey the meaning RFC3339 gave to -00:00. So the use +# of -00:00 as offset suffix should be avoided (and, by extension, +# likewise for UTC-00:00 as a zone ID), but this suffix (and ID) +# should be recognized when consuming data generated by other sources, +# for backwards compatibility. + class ByteArrayData: def __init__(self): self.data = [] @@ -255,17 +246,19 @@ class ByteArrayData: lst = unicode2hex(s) index = len(self.data) if index > 0xffff: - raise Error('Index ({}) outside the uint16 range !'.format(index)) + raise Error(f'Index ({index}) outside the uint16 range !') self.hash[s] = index self.data += lst return index def write(self, out, name): - out('\nstatic const char {}[] = {{\n'.format(name)) - out(wrap_list(self.data)) + out(f'\nstatic constexpr char {name}[] = {{\n') + out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ') + # Will over-spill 100-col if some 4-digit hex show up, but none do (yet). out('\n};\n') class ZoneIdWriter (SourceFileEditor): + # All the output goes into namespace QtTimeZoneCldr. def write(self, version, defaults, windowsIds): self.__writeWarning(version) windows, iana = self.__writeTables(self.writer.write, defaults, windowsIds) @@ -273,10 +266,10 @@ class ZoneIdWriter (SourceFileEditor): iana.write(self.writer.write, 'ianaIdData') def __writeWarning(self, version): - self.writer.write(""" + self.writer.write(f""" /* - This part of the file was generated on {} from the - Common Locale Data Repository v{} file supplemental/windowsZones.xml + This part of the file was generated on {datetime.date.today()} from the + Common Locale Data Repository v{version} file supplemental/windowsZones.xml http://www.unicode.org/cldr/ @@ -284,111 +277,110 @@ class ZoneIdWriter (SourceFileEditor): edited) CLDR data; see qtbase/util/locale_database/. */ -""".format(str(datetime.date.today()), version)) +""") @staticmethod def __writeTables(out, defaults, windowsIds): windowsIdData, ianaIdData = ByteArrayData(), ByteArrayData() # Write Windows/IANA table - out('// Windows ID Key, Country Enum, IANA ID Index\n') - out('static const QZoneData zoneDataTable[] = {\n') + out('// Windows ID Key, Territory Enum, IANA ID Index\n') + out('static constexpr ZoneData zoneDataTable[] = {\n') + # Sorted by (Windows ID Key, territory enum) for index, data in sorted(windowsIds.items()): out(' {{ {:6d},{:6d},{:6d} }}, // {} / {}\n'.format( - data['windowsKey'], data['countryId'], + data['windowsKey'], data['territoryId'], ianaIdData.append(data['ianaList']), - data['windowsId'], data['country'])) - out(' { 0, 0, 0 } // Trailing zeroes\n') + data['windowsId'], data['territory'])) out('};\n\n') # Write Windows ID key table out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n') - out('static const QWindowsData windowsDataTable[] = {\n') + out('static constexpr WindowsData windowsDataTable[] = {\n') + # Sorted by Windows ID key; sorting case-insensitively by + # Windows ID must give the same order. + winIdNames = [x.lower() for x, y in windowsIdList] + assert all(x == y for x, y in zip(winIdNames, sorted(winIdNames))), \ + [(x, y) for x, y in zip(winIdNames, sorted(winIdNames)) if x != y] for index, pair in enumerate(windowsIdList, 1): out(' {{ {:6d},{:6d},{:6d},{:6d} }}, // {}\n'.format( index, windowsIdData.append(pair[0]), ianaIdData.append(defaults[index]), pair[1], pair[0])) - out(' { 0, 0, 0, 0 } // Trailing zeroes\n') out('};\n\n') + offsetMap = {} + for pair in utcIdList: + offsetMap[pair[1]] = offsetMap.get(pair[1], ()) + (pair[0],) # Write UTC ID key table out('// IANA ID Index, UTC Offset\n') - out('static const QUtcData utcDataTable[] = {\n') - for pair in utcIdList: + out('static constexpr UtcData utcDataTable[] = {\n') + for offset in sorted(offsetMap.keys()): # Sort so C++ can binary-chop. + names = offsetMap[offset]; out(' {{ {:6d},{:6d} }}, // {}\n'.format( - ianaIdData.append(pair[0]), pair[1], pair[0])) - out(' { 0, 0 } // Trailing zeroes\n') + ianaIdData.append(' '.join(names)), offset, names[0])) out('};\n') return windowsIdData, ianaIdData -def usage(err, name, message=''): - err.write("""Usage: {} path/to/cldr/root path/to/qtbase -""".format(name)) # TODO: more interesting message - if message: - err.write('\n' + message + '\n') -def main(args, out, err): +def main(out, err): """Parses CLDR's data and updates Qt's representation of it. - Takes sys.argv, sys.stdout, sys.stderr (or equivalents) as + Takes sys.stdout, sys.stderr (or equivalents) as arguments. Expects two command-line options: the root of the unpacked CLDR data-file tree and the root of the qtbase module's checkout. Updates QTimeZone's private data about Windows time-zone IDs.""" - name = args.pop(0) - if len(args) != 2: - usage(err, name, "Expected two arguments") - return 1 + parser = argparse.ArgumentParser( + description="Update Qt's CLDR-derived timezone data.") + parser.add_argument('cldr_path', help='path to the root of the CLDR tree') + parser.add_argument('qtbase_path', + help='path to the root of the qtbase source tree', + nargs='?', default=qtbase_root) - cldrPath = args.pop(0) - qtPath = args.pop(0) + args = parser.parse_args() - if not os.path.isdir(qtPath): - usage(err, name, "No such Qt directory: " + qtPath) - return 1 - if not os.path.isdir(cldrPath): - usage(err, name, "No such CLDR directory: " + cldrPath) - return 1 + cldrPath = Path(args.cldr_path) + qtPath = Path(args.qtbase_path) - dataFilePath = os.path.join(qtPath, 'src', 'corelib', 'time', 'qtimezoneprivate_data_p.h') - if not os.path.isfile(dataFilePath): - usage(err, name, 'No such file: ' + dataFilePath) - return 1 + if not qtPath.is_dir(): + parser.error(f"No such Qt directory: {qtPath}") + + if not cldrPath.is_dir(): + parser.error(f"No such CLDR directory: {cldrPath}") + + dataFilePath = qtPath.joinpath('src/corelib/time/qtimezoneprivate_data_p.h') + + if not dataFilePath.is_file(): + parser.error(f'No such file: {dataFilePath}') try: version, defaults, winIds = CldrAccess(cldrPath).readWindowsTimeZones( dict((name, ind) for ind, name in enumerate((x[0] for x in windowsIdList), 1))) except IOError as e: - usage(err, name, - 'Failed to open common/supplemental/windowsZones.xml: ' + (e.message or e.args[1])) + parser.error( + f'Failed to open common/supplemental/windowsZones.xml: {e}') return 1 except Error as e: err.write('\n'.join(textwrap.wrap( - 'Failed to read windowsZones.xml: ' + (e.message or e.args[1]), + f'Failed to read windowsZones.xml: {e}', subsequent_indent=' ', width=80)) + '\n') return 1 out.write('Input file parsed, now writing data\n') - try: - writer = ZoneIdWriter(dataFilePath, qtPath) - except IOError as e: - err.write('Failed to open files to transcribe: {}'.format(e.message or e.args[1])) - return 1 try: - writer.write(version, defaults, winIds) - except Error as e: - writer.cleanup() - err.write('\nError in Windows ID data: ' + e.message + '\n') + with ZoneIdWriter(dataFilePath, qtPath) as writer: + writer.write(version, defaults, winIds) + except Exception as e: + err.write(f'\nError while updating timezone data: {e}\n') return 1 - writer.close() - out.write('Data generation completed, please check the new file at ' + dataFilePath + '\n') + out.write(f'Data generation completed, please check the new file at {dataFilePath}\n') return 0 if __name__ == '__main__': import sys - sys.exit(main(sys.argv, sys.stdout, sys.stderr)) + sys.exit(main(sys.stdout, sys.stderr)) diff --git a/util/locale_database/dateconverter.py b/util/locale_database/dateconverter.py index 1990fe0c61..8ca15405f7 100755..100644 --- a/util/locale_database/dateconverter.py +++ b/util/locale_database/dateconverter.py @@ -1,107 +1,195 @@ -#!/usr/bin/env python -############################################################################# -## -## Copyright (C) 2016 The Qt Company Ltd. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the test suite of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:GPL-EXCEPT$ -## Commercial License Usage -## Licensees holding valid commercial Qt licenses may use this file in -## accordance with the commercial license agreement provided with the -## Software or, alternatively, in accordance with the terms contained in -## a written agreement between you and The Qt Company. For licensing terms -## and conditions see https://www.qt.io/terms-conditions. For further -## information use the contact form at https://www.qt.io/contact-us. -## -## GNU General Public License Usage -## Alternatively, this file may be used under the terms of the GNU -## General Public License version 3 as published by the Free Software -## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -## included in the packaging of this file. Please review the following -## information to ensure the GNU General Public License requirements will -## be met: https://www.gnu.org/licenses/gpl-3.0.html. -## -## $QT_END_LICENSE$ -## -############################################################################# - -import re - -def _convert_pattern(pattern): - # patterns from http://www.unicode.org/reports/tr35/#Date_Format_Patterns - qt_regexps = { - r"yyy{3,}" : "yyyy", # more that three digits hence convert to four-digit year - r"L" : "M", # stand-alone month names. not supported. - r"g{1,}": "", # modified julian day. not supported. - r"S{1,}" : "", # fractional seconds. not supported. - r"A{1,}" : "" # milliseconds in day. not supported. - } - qt_patterns = { - "G" : "", "GG" : "", "GGG" : "", "GGGG" : "", "GGGGG" : "", # Era. not supported. - "y" : "yyyy", # four-digit year without leading zeroes - "Q" : "", "QQ" : "", "QQQ" : "", "QQQQ" : "", # quarter. not supported. - "q" : "", "qq" : "", "qqq" : "", "qqqq" : "", # quarter. not supported. - "MMMMM" : "MMM", # narrow month name. - "LLLLL" : "MMM", # stand-alone narrow month name. - "l" : "", # special symbol for chinese leap month. not supported. - "w" : "", "W" : "", # week of year/month. not supported. - "D" : "", "DD" : "", "DDD" : "", # day of year. not supported. - "F" : "", # day of week in month. not supported. - "E" : "ddd", "EE" : "ddd", "EEE" : "ddd", "EEEEE" : "ddd", "EEEE" : "dddd", # day of week - "e" : "ddd", "ee" : "ddd", "eee" : "ddd", "eeeee" : "ddd", "eeee" : "dddd", # local day of week - "c" : "ddd", "cc" : "ddd", "ccc" : "ddd", "ccccc" : "ddd", "cccc" : "dddd", # stand-alone local day of week - "a" : "AP", # AM/PM - "K" : "h", # Hour 0-11 - "k" : "H", # Hour 1-24 - "j" : "", # special reserved symbol. - "z" : "t", "zz" : "t", "zzz" : "t", "zzzz" : "t", # timezone - "Z" : "t", "ZZ" : "t", "ZZZ" : "t", "ZZZZ" : "t", # timezone - "v" : "t", "vv" : "t", "vvv" : "t", "vvvv" : "t", # timezone - "V" : "t", "VV" : "t", "VVV" : "t", "VVVV" : "t" # timezone - } - if qt_patterns.has_key(pattern): - return qt_patterns[pattern] - for r,v in qt_regexps.items(): - pattern = re.sub(r, v, pattern) - return pattern - -def convert_date(input): - result = "" - patterns = "GyYuQqMLlwWdDFgEecahHKkjmsSAzZvV" - last = "" - inquote = 0 - chars_to_strip = " -" - for c in input: - if c == "'": - inquote = inquote + 1 - if inquote % 2 == 0: - if c in patterns: - if not last: - last = c - else: - if c in last: - last += c - else: - # pattern changed - converted = _convert_pattern(last) - result += converted - if not converted: - result = result.rstrip(chars_to_strip) - last = c - continue - if last: - # pattern ended - converted = _convert_pattern(last) - result += converted - if not converted: - result = result.rstrip(chars_to_strip) - last = "" - result += c - if last: - converted = _convert_pattern(last) - result += converted - if not converted: - result = result.rstrip(chars_to_strip) - return result.lstrip(chars_to_strip) +# Copyright (C) 2016 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 + +class Converter (object): + """Conversion between CLDR and Qt datetime formats. + + Keep in sync with qlocale_mac.mm's macToQtFormat(). + The definitive source of truth is: + https://www.unicode.org/reports/tr35/tr35-68/tr35-dates.html#Date_Field_Symbol_Table + + See convert() for explanation of the approach taken. Each method + with a single-letter name is used to scan a prefix of a text, + presumed to begin with that letter (or one Qt treats as equivalent + to it) and returns a pair (Qt format, length), to use the given Qt + format in place of text[:length]. In all cases, length must be + positive.""" + + @staticmethod + def __is_reserved(ch): + """Every ASCII letter is a reserved symbol in CLDR datetime formats""" + assert len(ch) == 1, ch + return ch.isascii() and ch.isalpha(); + @staticmethod + def __count_first(text): + """How many of text[0] appear at the start of text ?""" + assert text + return len(text) - len(text.lstrip(text[0])) + @classmethod + def __verbatim(cls, text): + # Used where our format coincides with LDML's, including on length. + n = cls.__count_first(text) + return text[:n], n + @classmethod + def __treat_as(cls, mimic, text): + # Helper for aliases + n = cls.__count_first(text) + return mimic * n, n + + # Please follow alphabetic order, with two cases of the same + # letter adjacent, lower before upper. + @classmethod + def a(cls, text): # AM/PM indicator; use locale-appropriate case + return 'Ap', cls.__count_first(text) + + # A: Milliseconds in day. Not supported. + b = a # AM/PM/noon/midnight + B = a # "Flexible day period" (e.g. "at night" / "in the day") + # (Only zh_Hant_TW affected; zh_Hant_{HK,MO} use 'ah', mapped to + # 'APh', so do the same here.) + + @classmethod + def c(cls, text): # Stand-alone local day of week + # Has length-variants for several cases Qt doesn't support, as + # do 'e' and 'E': just map all simply to weekday, abbreviated + # or full. + n = cls.__count_first(text) + return ('dddd' if n == 4 else 'ddd'), n + + # C: Input skeleton symbol + d = __verbatim # day (of month or of week, depends on length) + # D: Day of year. Not supported. + e = c # Local day of week + E = c # Just plain day of week + # F: Day of week in month. Not supported. + # g: Modified julian day. Not supported. + # G: Era. Not supported. + h = __verbatim # Hour 1-12, treat as 0-11 + H = __verbatim # Hour 0-23 + # j: Input skeleton symbol + # J: Input skeleton symbol + + @classmethod + def k(cls, text): # Hour 1-24, treat as 0-23 + return cls.__treat_as('H', text) + @classmethod + def K(cls, text): # Hour 0-11 + return cls.__treat_as('h', text) + + # l: Deprecated Chinese leap month indicator. + @classmethod + def L(cls, text): # Stand-alone month names: treat as plain month names. + n = cls.__count_first(text) + # Length five is narrow; treat same as abbreviated; anything + # shorter matches Qt's month forms. + return ('MMM' if n > 4 else 'M' * n), n + + m = __verbatim # Minute within the hour. + M = L # Plain month names, possibly abbreviated, and numbers. + + @classmethod + def O(cls, text): # Localized GMT±offset formats. Map to Z-or-UTC±HH:mm + return 't', cls.__count_first(text) + + # q: Quarter. Not supported. + # Q: Quarter. Not supported. + + s = __verbatim # Seconds within the minute. + @classmethod + def S(cls, text): # Fractional seconds. Only milliseconds supported. + # FIXME: spec is unclear, do we need to include the leading + # dot or not ? For now, no known locale actually exercises + # this, so stick with what we've done on Darwin since long + # before adding support here. + n = cls.__count_first(text) + return ('z' if n < 3 else 'zzz'), n + + @classmethod + def u(cls, text): # Extended year (numeric) + # Officially, 'u' is simply the full year number, zero-padded + # to the length of the field. Qt's closest to that is four-digit. + # It explicitly has no special case for two-digit year. + return 'yyyy', cls.__count_first(text) + + # U: Cyclic Year Name. Not supported + @classmethod + def v(cls, text): # Generic non-location format. Map to name. + return 'tttt', cls.__count_first(text) + + V = v # Zone ID in various forms; VV is IANA ID. Map to name. + # w: Week of year. Not supported. + # W: Week of month. Not supported. + + @classmethod + def x(cls, text): # Variations on offset format. + n = cls.__count_first(text) + # Ignore: n == 1 may omit minutes, n > 3 may include seconds. + return ('ttt' if n > 1 and n & 1 else 'tt'), n + X = x # Should use Z for zero offset. + + @classmethod + def y(cls, text): # Year number. + n = cls.__count_first(text) + return ('yy' if n == 2 else 'yyyy'), n + # Y: Year for Week-of-year calendars + + z = v # Specific (i.e. distinguish standard from DST) non-location format. + @classmethod + def Z(cls, text): # Offset format, optionaly with GMT (Qt uses UTC) prefix. + n = cls.__count_first(text) + return ('tt' if n < 4 else 'ttt' if n > 4 else 't'), n + + @staticmethod + def scanQuote(text): # Can't have ' as a method name, so handle specially + assert text.startswith("'") + i = text.find("'", 1) # Find the next; -1 if not present. + i = len(text) if i < 0 else i + 1 # Include the close-quote. + return text[:i], i + + # Now put all of those to use: + @classmethod + def convert(cls, text): + """Convert a CLDR datetime format string into a Qt one. + + Presumes that the caller will ''.join() the fragments it + yields. Each sequence of CLDR field symbols that corresponds + to a Qt format token is converted to it; all other CLDR field + symbols are discarded; the literals in between fields are + preserved verbatim, except that space and hyphen separators + immediately before a discarded field are discarded with it. + + The approach is to look at the first symbol of the remainder + of the text, at each iteration, and use that first symbol to + select a function that will identify how much of the text to + consume and what to replace it with.""" + sep = '' + while text: + ch = text[0] + if ch == "'": + quoted, length = cls.scanQuote(text) + text = text[length:] + sep += quoted + elif hasattr(cls, ch): + qtform, length = getattr(cls, ch)(text) + assert qtform and length > 0, (ch, text, qtform, length) + text = text[length:] + if sep: + yield sep + sep = '' + yield qtform + elif cls.__is_reserved(ch): + text = text[cls.__count_first(text):] + # Discard space or dash separator that was only there + # for the sake of the unsupported field: + sep = sep.rstrip(' -') + # TODO: should we also strip [ -]* from text + # immediately following unsupported forms ? + else: + sep += ch + text = text[1:] + if sep: + yield sep + +def convert_date(text): + # See Converter.convert() + return ''.join(Converter.convert(text)) diff --git a/util/locale_database/enumdata.py b/util/locale_database/enumdata.py index 4181e290ac..66b8840cb1 100644 --- a/util/locale_database/enumdata.py +++ b/util/locale_database/enumdata.py @@ -1,58 +1,62 @@ -# -*- coding: utf-8; -*- -############################################################################# -## -## Copyright (C) 2020 The Qt Company Ltd. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the test suite of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:GPL-EXCEPT$ -## Commercial License Usage -## Licensees holding valid commercial Qt licenses may use this file in -## accordance with the commercial license agreement provided with the -## Software or, alternatively, in accordance with the terms contained in -## a written agreement between you and The Qt Company. For licensing terms -## and conditions see https://www.qt.io/terms-conditions. For further -## information use the contact form at https://www.qt.io/contact-us. -## -## GNU General Public License Usage -## Alternatively, this file may be used under the terms of the GNU -## General Public License version 3 as published by the Free Software -## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -## included in the packaging of this file. Please review the following -## information to ensure the GNU General Public License requirements will -## be met: https://www.gnu.org/licenses/gpl-3.0.html. -## -## $QT_END_LICENSE$ -## -############################################################################# +# Copyright (C) 2021 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 -# A run of cldr2qlocalexml.py will produce output reporting any -# language, script and country codes it sees, in data, for which it -# can find a name (taken always from en.xml) that could potentially be -# used. There is no point adding a mapping for such a code unless the -# CLDR's common/main/ contains an XML file for at least one locale -# that exerciss it. +"""Assorted enumerations implicated in public API. -# Each *_list reflects the current values of its enums in qlocale.h; -# if new xml language files are available in CLDR, these languages and -# countries need to be *appended* to this list (for compatibility -# between versions). Include any spaces present in names (scripts -# shall squish them out for the enum entries) in *_list, but use the -# squished forms of names in the *_aliases mappings. +The numberings of these enumerations can only change at major +versions. When new CLDR data implies adding entries, the new ones must +go after all existing ones. See also zonedata.py for enumerations +related to timezones and CLDR, which can more freely be changed +between versions. -# For a new major version (and only then), we can change the -# numbering, so re-sort each list into alphabetic order (e.g. using -# sort -k2); but keep the Any and C entries first. That's why those -# are offset with a blank line, below. After doing that, regenerate -# locale data as usual; this will cause a binary-incompatible change. +A run of cldr2qlocalexml.py will produce output reporting any +language, script and territory codes it sees, in data, for which it +can find a name (taken always from en.xml) that could potentially be +used. There is no point adding a mapping for such a code unless the +CLDR's common/main/ contains an XML file for at least one locale that +exercises it (and little point, even then, absent substantial data, +ignoring draft='unconfirmed' entries). -# Note on "macrolanguage" comments: see "ISO 639 macrolanguage" on -# Wikipedia. A "macrolanguage" is (loosely-speaking) a group of -# languages so closely related to one another that they could also be -# regarded as divergent dialects of the macrolanguage. +Each *_map reflects the current values of its enums in qlocale.h; if +new xml language files are available in CLDR, these languages and +territories need to be *appended* to this list (for compatibility +between versions). Include any spaces and dashes present in names +(they'll be squished out for the enum entries) in *_map, but use the +squished forms of names in the *_aliases mappings. The squishing also +turns the first letter of each word into a capital so you can safely +preserve the case of en.xml's name; but omit (or replace with space) +any punctuation aside from dashes and map any accented letters to +their un-accented plain ASCII. The two tables, for each enum, have +the forms: +* map { Numeric value: ("Proper name", "ISO code") } +* alias { "OldName": "CurrentName" } -language_list = { +TODO: add support for marking entries as deprecated from a specified +version. For aliases that merely deprecates the name. Where we have a +name for which CLDR offers no data, we may also want to deprecate +entries in the map - although they may be worth keeping for the +benefit of QLocaleSelector (see QTBUG-112765), if other +locale-specific resources might have use of them. + +For a new major version (and only then), we can change the numbering, +so re-sort each list into alphabetic order (e.g. using sort -k2); but +keep the Any and C entries first. That's why those are offset with a +blank line, below. After doing that, regenerate locale data as usual; +this will cause a binary-incompatible change. + +Note on 'macrolanguage' comments: see QTBUG-107781 and 'ISO 639 +macrolanguage' on Wikipedia. A 'macrolanguage' is (loosely-speaking) a +group of languages so closely related to one another that they could +also be regarded as divergent dialects of the macrolanguage. In some +cases this may mean a resource (such as translation or text-to-speech +data) may describe itself as pertaining to the macrolanguage, implying +its suitability for use in any of the languages within the +macrolanguage. For example, no_NO might be used for a generic +Norwegian resource, embracing both nb_NO and nn_NO. + +""" + +language_map = { 0: ("AnyLanguage", " "), 1: ("C", " "), @@ -177,7 +181,7 @@ language_list = { 120: ("Japanese", "ja"), 121: ("Javanese", "jv"), 122: ("Jju", "kaj"), - 123: ("Jola Fonyi", "dyo"), + 123: ("Jola-Fonyi", "dyo"), 124: ("Kabuverdianu", "kea"), 125: ("Kabyle", "kab"), 126: ("Kako", "kkj"), @@ -218,7 +222,7 @@ language_list = { 161: ("Lojban", "jbo"), 162: ("Lower Sorbian", "dsb"), 163: ("Low German", "nds"), - 164: ("Luba Katanga", "lu"), + 164: ("Luba-Katanga", "lu"), 165: ("Lule Sami", "smj"), 166: ("Luo", "luo"), 167: ("Luxembourgish", "lb"), @@ -226,7 +230,7 @@ language_list = { 169: ("Macedonian", "mk"), 170: ("Machame", "jmc"), 171: ("Maithili", "mai"), - 172: ("Makhuwa Meetto", "mgh"), + 172: ("Makhuwa-Meetto", "mgh"), 173: ("Makonde", "kde"), 174: ("Malagasy", "mg"), # macrolanguage 175: ("Malayalam", "ml"), @@ -382,7 +386,31 @@ language_list = { 325: ("Zarma", "dje"), 326: ("Zhuang", "za"), # macrolanguage 327: ("Zulu", "zu"), + # added in CLDR v40 + 328: ("Kaingang", "kgp"), + 329: ("Nheengatu", "yrl"), + # added in CLDR v42 + 330: ("Haryanvi", "bgc"), + 331: ("Northern Frisian", "frr"), + 332: ("Rajasthani", "raj"), + 333: ("Moksha", "mdf"), + 334: ("Toki Pona", "tok"), + 335: ("Pijin", "pis"), + 336: ("Obolo", "ann"), + # added in CLDR v43 + 337: ("Baluchi", "bal"), + 338: ("Ligurian", "lij"), + 339: ("Rohingya", "rhg"), + 340: ("Torwali", "trw"), + # added in CLDR v44 + 341: ("Anii", "blo"), + 342: ("Kangri", "xnr"), + 343: ("Venetian", "vec"), } +# Don't add languages just because they exist; check CLDR does provide +# substantial data for locales using it; and check, once added, they +# don't show up in cldr2qlocalexmo.py's unused listing. Do also check +# the data's draft status; if it's (nearly) all unconfirmed, leave it. language_aliases = { # Renamings prior to Qt 6.0 (CLDR v37): @@ -406,9 +434,9 @@ language_aliases = { 'Navaho': 'Navajo', 'Oriya': 'Odia', 'Kirghiz': 'Kyrgyz' - } +} -country_list = { +territory_map = { 0: ("AnyTerritory", "ZZ"), 1: ("Afghanistan", "AF"), @@ -420,7 +448,7 @@ country_list = { 7: ("Angola", "AO"), 8: ("Anguilla", "AI"), 9: ("Antarctica", "AQ"), - 10: ("Antigua And Barbuda", "AG"), + 10: ("Antigua and Barbuda", "AG"), 11: ("Argentina", "AR"), 12: ("Armenia", "AM"), 13: ("Aruba", "AW"), @@ -439,7 +467,7 @@ country_list = { 26: ("Bermuda", "BM"), 27: ("Bhutan", "BT"), 28: ("Bolivia", "BO"), - 29: ("Bosnia And Herzegovina", "BA"), + 29: ("Bosnia and Herzegovina", "BA"), 30: ("Botswana", "BW"), 31: ("Bouvet Island", "BV"), 32: ("Brazil", "BR"), @@ -457,7 +485,7 @@ country_list = { 44: ("Caribbean Netherlands", "BQ"), 45: ("Cayman Islands", "KY"), 46: ("Central African Republic", "CF"), - 47: ("Ceuta And Melilla", "EA"), + 47: ("Ceuta and Melilla", "EA"), 48: ("Chad", "TD"), 49: ("Chile", "CL"), 50: ("China", "CN"), @@ -466,8 +494,8 @@ country_list = { 53: ("Cocos Islands", "CC"), 54: ("Colombia", "CO"), 55: ("Comoros", "KM"), - 56: ("Congo Brazzaville", "CG"), - 57: ("Congo Kinshasa", "CD"), + 56: ("Congo - Brazzaville", "CG"), + 57: ("Congo - Kinshasa", "CD"), 58: ("Cook Islands", "CK"), 59: ("Costa Rica", "CR"), 60: ("Croatia", "HR"), @@ -511,11 +539,11 @@ country_list = { 98: ("Guam", "GU"), 99: ("Guatemala", "GT"), 100: ("Guernsey", "GG"), - 101: ("Guinea Bissau", "GW"), + 101: ("Guinea-Bissau", "GW"), 102: ("Guinea", "GN"), 103: ("Guyana", "GY"), 104: ("Haiti", "HT"), - 105: ("Heard And McDonald Islands", "HM"), + 105: ("Heard and McDonald Islands", "HM"), 106: ("Honduras", "HN"), 107: ("Hong Kong", "HK"), 108: ("Hungary", "HU"), @@ -525,12 +553,12 @@ country_list = { 112: ("Iran", "IR"), 113: ("Iraq", "IQ"), 114: ("Ireland", "IE"), - 115: ("Isle Of Man", "IM"), + 115: ("Isle of Man", "IM"), 116: ("Israel", "IL"), 117: ("Italy", "IT"), - # Officially Côte d’Ivoire, which we'd ned to map to CotedIvoire - # or CoteDIvoire, either failing to make the d' separate from - # Cote or messing with its case. So stick with Ivory Coast: + # Officially Côte d’Ivoire, which we'd need to map to CotedIvoire + # or CoteDIvoire, either failing to make the d' separate from Cote + # or messing with its case. So stick with Ivory Coast: 118: ("Ivory Coast", "CI"), 119: ("Jamaica", "JM"), 120: ("Japan", "JP"), @@ -610,14 +638,14 @@ country_list = { 194: ("Rwanda", "RW"), 195: ("Saint Barthelemy", "BL"), 196: ("Saint Helena", "SH"), - 197: ("Saint Kitts And Nevis", "KN"), + 197: ("Saint Kitts and Nevis", "KN"), 198: ("Saint Lucia", "LC"), 199: ("Saint Martin", "MF"), - 200: ("Saint Pierre And Miquelon", "PM"), - 201: ("Saint Vincent And Grenadines", "VC"), + 200: ("Saint Pierre and Miquelon", "PM"), + 201: ("Saint Vincent and Grenadines", "VC"), 202: ("Samoa", "WS"), 203: ("San Marino", "SM"), - 204: ("Sao Tome And Principe", "ST"), + 204: ("Sao Tome and Principe", "ST"), 205: ("Saudi Arabia", "SA"), 206: ("Senegal", "SN"), 207: ("Serbia", "RS"), @@ -630,14 +658,14 @@ country_list = { 214: ("Solomon Islands", "SB"), 215: ("Somalia", "SO"), 216: ("South Africa", "ZA"), - 217: ("South Georgia And South Sandwich Islands", "GS"), + 217: ("South Georgia and South Sandwich Islands", "GS"), 218: ("South Korea", "KR"), 219: ("South Sudan", "SS"), 220: ("Spain", "ES"), 221: ("Sri Lanka", "LK"), 222: ("Sudan", "SD"), 223: ("Suriname", "SR"), - 224: ("Svalbard And Jan Mayen", "SJ"), + 224: ("Svalbard and Jan Mayen", "SJ"), 225: ("Sweden", "SE"), 226: ("Switzerland", "CH"), 227: ("Syria", "SY"), @@ -649,12 +677,12 @@ country_list = { 233: ("Togo", "TG"), 234: ("Tokelau", "TK"), 235: ("Tonga", "TO"), - 236: ("Trinidad And Tobago", "TT"), - 237: ("Tristan Da Cunha", "TA"), + 236: ("Trinidad and Tobago", "TT"), + 237: ("Tristan da Cunha", "TA"), 238: ("Tunisia", "TN"), 239: ("Turkey", "TR"), 240: ("Turkmenistan", "TM"), - 241: ("Turks And Caicos Islands", "TC"), + 241: ("Turks and Caicos Islands", "TC"), 242: ("Tuvalu", "TV"), 243: ("Uganda", "UG"), 244: ("Ukraine", "UA"), @@ -669,15 +697,15 @@ country_list = { 253: ("Vatican City", "VA"), 254: ("Venezuela", "VE"), 255: ("Vietnam", "VN"), - 256: ("Wallis And Futuna", "WF"), + 256: ("Wallis and Futuna", "WF"), 257: ("Western Sahara", "EH"), - 258: ("World", "001"), + 258: ("world", "001"), 259: ("Yemen", "YE"), 260: ("Zambia", "ZM"), 261: ("Zimbabwe", "ZW"), } -country_aliases = { +territory_aliases = { # Renamings prior to Qt 6.0 (CLDR v37): 'DemocraticRepublicOfCongo': 'CongoKinshasa', 'PeoplesRepublicOfCongo': 'CongoBrazzaville', @@ -708,7 +736,7 @@ country_aliases = { 'TuvaluCountry': 'TuvaluTerritory', } -script_list = { +script_map = { 0: ("AnyScript", "Zzzz"), 1: ("Adlam", "Adlm"), @@ -741,7 +769,7 @@ script_list = { 28: ("Deseret", "Dsrt"), 29: ("Devanagari", "Deva"), 30: ("Duployan", "Dupl"), - 31: ("Egyptian Hieroglyphs", "Egyp"), + 31: ("Egyptian hieroglyphs", "Egyp"), 32: ("Elbasan", "Elba"), 33: ("Ethiopic", "Ethi"), 34: ("Fraser", "Lisu"), @@ -816,7 +844,7 @@ script_list = { 103: ("Pahawh Hmong", "Hmng"), 104: ("Palmyrene", "Palm"), 105: ("Pau Cin Hau", "Pauc"), - 106: ("Phags Pa", "Phag"), + 106: ("Phags-pa", "Phag"), 107: ("Phoenician", "Phnx"), 108: ("Pollard Phonetic", "Plrd"), 109: ("Psalter Pahlavi", "Phlp"), @@ -827,7 +855,7 @@ script_list = { 114: ("Sharada", "Shrd"), 115: ("Shavian", "Shaw"), 116: ("Siddham", "Sidd"), - 117: ("Sign Writing", "Sgnw"), + 117: ("SignWriting", "Sgnw"), # Oddly, en.xml leaves no space in it. 118: ("Simplified Han", "Hans"), 119: ("Sinhala", "Sinh"), 120: ("Sora Sompeng", "Sora"), @@ -852,6 +880,8 @@ script_list = { 139: ("Vai", "Vaii"), 140: ("Varang Kshiti", "Wara"), 141: ("Yi", "Yiii"), + # Added at CLDR v43 + 142: ("Hanifi", "Rohg"), # Used for Rohingya } script_aliases = { @@ -863,27 +893,3 @@ script_aliases = { 'MendeKikakuiScript': 'MendeScript', 'BengaliScript': 'BanglaScript', } - -def countryCodeToId(code): - if not code: - return 0 - for country_id in country_list: - if country_list[country_id][1] == code: - return country_id - return -1 - -def languageCodeToId(code): - if not code: - return 0 - for language_id in language_list: - if language_list[language_id][1] == code: - return language_id - return -1 - -def scriptCodeToId(code): - if not code: - return 0 - for script_id in script_list: - if script_list[script_id][1] == code: - return script_id - return -1 diff --git a/util/locale_database/iso639_3.py b/util/locale_database/iso639_3.py new file mode 100644 index 0000000000..0d23065cf9 --- /dev/null +++ b/util/locale_database/iso639_3.py @@ -0,0 +1,80 @@ +# Copyright (C) 2021 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 + +from dataclasses import dataclass +from typing import Dict, Optional + + +@dataclass +class LanguageCodeEntry: + part3Code: str + part2BCode: Optional[str] + part2TCode: Optional[str] + part1Code: Optional[str] + + def id(self) -> str: + if self.part1Code: + return self.part1Code + if self.part2BCode: + return self.part2BCode + return self.part3Code + + def __repr__(self) -> str: + parts = [f'{self.__class__.__name__}({self.id()!r}, part3Code={self.part3Code!r}'] + if self.part2BCode is not None and self.part2BCode != self.part3Code: + parts.append(f', part2BCode={self.part2BCode!r}') + if self.part2TCode != self.part2BCode: + parts.append(f', part2TCode={self.part2TCode!r}') + if self.part1Code is not None: + parts.append(f', part1Code={self.part1Code!r}') + parts.append(')') + return ''.join(parts) + + +class LanguageCodeData: + """ + Representation of ISO639-2 language code data. + """ + def __init__(self, fileName: str): + """ + Construct the object populating the data from the given file. + """ + self.__codeMap: Dict[str, LanguageCodeEntry] = {} + + with open(fileName, 'r', encoding='utf-8') as stream: + stream.readline() # skip the header + for line in stream.readlines(): + part3Code, part2BCode, part2TCode, part1Code, _ = line.split('\t', 4) + + # sanity checks + assert all(p.isascii() for p in (part3Code, part2BCode, part2TCode, part1Code)), \ + f'Non-ascii characters in code names: {part3Code!r} {part2BCode!r} '\ + f'{part2TCode!r} {part1Code!r}' + + assert len(part3Code) == 3, f'Invalid Part 3 code length for {part3Code!r}' + assert not part1Code or len(part1Code) == 2, \ + f'Invalid Part 1 code length for {part3Code!r}: {part1Code!r}' + assert not part2BCode or len(part2BCode) == 3, \ + f'Invalid Part 2B code length for {part3Code!r}: {part2BCode!r}' + assert not part2TCode or len(part2TCode) == 3, \ + f'Invalid Part 2T code length for {part3Code!r}: {part2TCode!r}' + + assert (part2BCode == '') == (part2TCode == ''), \ + f'Only one Part 2 code is specified for {part3Code!r}: ' \ + f'{part2BCode!r} vs {part2TCode!r}' + assert not part2TCode or part2TCode == part3Code, \ + f'Part 3 code {part3Code!r} does not match Part 2T code {part2TCode!r}' + + entry = LanguageCodeEntry(part3Code, part2BCode or None, + part2TCode or None, part1Code or None) + + self.__codeMap[entry.id()] = entry + + def query(self, code: str) -> Optional[LanguageCodeEntry]: + """ + Lookup the entry with the given code and return it. + + The entries can be looked up by using either the Alpha2 code or the bibliographical + Alpha3 code. + """ + return self.__codeMap.get(code) diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py index 110e5b7573..b94c242172 100644 --- a/util/locale_database/ldml.py +++ b/util/locale_database/ldml.py @@ -1,30 +1,5 @@ -############################################################################# -## -## Copyright (C) 2020 The Qt Company Ltd. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the test suite of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:GPL-EXCEPT$ -## Commercial License Usage -## Licensees holding valid commercial Qt licenses may use this file in -## accordance with the commercial license agreement provided with the -## Software or, alternatively, in accordance with the terms contained in -## a written agreement between you and The Qt Company. For licensing terms -## and conditions see https://www.qt.io/terms-conditions. For further -## information use the contact form at https://www.qt.io/contact-us. -## -## GNU General Public License Usage -## Alternatively, this file may be used under the terms of the GNU -## General Public License version 3 as published by the Free Software -## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -## included in the packaging of this file. Please review the following -## information to ensure the GNU General Public License requirements will -## be met: https://www.gnu.org/licenses/gpl-3.0.html. -## -## $QT_END_LICENSE$ -## -############################################################################# +# Copyright (C) 2020 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 """Parsing the Locale Data Markup Language It's an XML format, so the raw parsing of XML is, of course, delegated @@ -46,6 +21,13 @@ See individual classes for further detail. from localetools import Error from dateconverter import convert_date +# The github version of CLDR uses '↑↑↑' to indicate "inherit" +INHERIT = '↑↑↑' + +def _attrsFromDom(dom): + return { k: (v if isinstance(v, str) else v.nodeValue) + for k, v in dom.attributes.items() } + class Node (object): """Wrapper for an arbitrary DOM node. @@ -75,6 +57,9 @@ class Node (object): else: self.draft = max(draft, self.draftScore(attr)) + def attributes(self): + return _attrsFromDom(self.dom) + def findAllChildren(self, tag, wanted = None, allDull = False): """All children that do have the given tag and attributes. @@ -124,7 +109,7 @@ class Node (object): one.""" seq = self.findAllChildren(tag) try: - node = seq.next() + node = next(seq) except StopIteration: raise Error('No child found where one was expected', tag) for it in seq: @@ -191,17 +176,35 @@ class XmlScanner (object): return elts class Supplement (XmlScanner): - def find(self, xpath): + def find(self, xpath, exclude=()): + """Finds nodes by matching a specified xpath. + + If exclude is passed, it should be a sequence of attribute names (its + default is empty). Any matches to the given xpath that also have any + attribute in this sequence will be excluded. + + For each childless node matching the xpath, or child of a node matching + the xpath, this yields a twople (name, attrs) where name is the + nodeName and attrs is a dict mapping the node's attribute's names to + their values. For attribute values that are not simple strings, the + nodeValue of the attribute node is used.""" elts = self.findNodes(xpath) - for elt in _iterateEach(e.dom.childNodes if e.dom.childNodes else (e.dom,) - for e in elts): + for elt in _iterateEach(e.dom.childNodes or (e.dom,) + for e in elts + if not any(a in e.dom.attributes + for a in exclude)): if elt.attributes: - yield (elt.nodeName, - dict((k, v if isinstance(v, basestring) else v.nodeValue) - for k, v in elt.attributes.items())) + yield elt.nodeName, _attrsFromDom(elt) class LocaleScanner (object): def __init__(self, name, nodes, root): + """Set up to scan data for a specified locale. + + First parameter is the name of the locale; it will be used in + error messages. Second is a tuple of DOM root-nodes of files + with locale data, later ones serving as fall-backs for data + missing in earlier ones. Third parameter is the root locale's + DOM node.""" self.name, self.nodes, self.base = name, nodes, root def find(self, xpath, default = None, draft = None): @@ -227,7 +230,7 @@ class LocaleScanner (object): def tagCodes(self): """Yields four tag codes - The tag codes are language, script, country and variant; an + The tag codes are language, script, territory and variant; an empty value for any of them indicates that no value was provided. The values are obtained from the primary file's top-level <identity> element. An Error is raised if any @@ -241,7 +244,7 @@ class LocaleScanner (object): except (KeyError, AttributeError): pass else: - raise Error('Alias to {}'.format(source)) + raise Error(f'Alias to {source}') ids = root.findUniqueChild('identity') for code in ('language', 'script', 'territory', 'variant'): @@ -259,12 +262,12 @@ class LocaleScanner (object): """Fetches currency data for this locale. Single argument, isoCode, is the ISO currency code for the - currency in use in the country. See also numericData, which + currency in use in the territory. See also numericData, which includes some currency formats. """ if isoCode: - stem = 'numbers/currencies/currency[{}]/'.format(isoCode) - symbol = self.find(stem + 'symbol', '') + stem = f'numbers/currencies/currency[{isoCode}]/' + symbol = self.find(f'{stem}symbol', '') name = self.__currencyDisplayName(stem) else: symbol = name = '' @@ -276,31 +279,38 @@ class LocaleScanner (object): First argument, lookup, is a callable that maps a numbering system's name to certain data about the system, as a mapping; - we expect this to have u'digits' as a key. + we expect this to have 'digits' as a key. """ system = self.find('numbers/defaultNumberingSystem') - stem = 'numbers/symbols[numberSystem={}]/'.format(system) - decimal = self.find(stem + 'decimal') - group = self.find(stem + 'group') - assert decimal != group, (self.name, system, decimal) + stem = f'numbers/symbols[numberSystem={system}]/' + decimal = self.find(f'{stem}decimal') + group = self.find(f'{stem}group') + if decimal == group: + # mn_Mong_MN @v43 :-( + clean = Node.draftScore('approved') + decimal = self.find(f'{stem}decimal', draft=clean) + group = self.find(f'{stem}group', draft=clean) + assert decimal != group, (self.name, system, decimal) + yield 'decimal', decimal yield 'group', group - yield 'percent', self.find(stem + 'percentSign') - yield 'list', self.find(stem + 'list') - yield 'exp', self.find(stem + 'exponential') + yield 'percent', self.find(f'{stem}percentSign') + yield 'list', self.find(f'{stem}list') + yield 'exp', self.find(f'{stem}exponential') yield 'groupSizes', self.__numberGrouping(system) digits = lookup(system)['digits'] assert len(digits) == 10 zero = digits[0] # Qt's number-formatting code assumes digits are consecutive - # (except Suzhou, CLDR's hanidec - see QTBUG-85409): + # (except Suzhou - see QTBUG-85409 - which shares its zero + # with CLDR's very-non-contiguous hanidec): assert all(ord(c) == i + (0x3020 if ord(zero) == 0x3007 else ord(zero)) for i, c in enumerate(digits[1:], 1)) yield 'zero', zero - plus = self.find(stem + 'plusSign') - minus = self.find(stem + 'minusSign') + plus = self.find(f'{stem}plusSign') + minus = self.find(f'{stem}minusSign') yield 'plus', plus yield 'minus', minus @@ -308,11 +318,11 @@ class LocaleScanner (object): xpath = 'numbers/currencyFormats/currencyFormatLength/currencyFormat[accounting]/pattern' try: money = self.find(xpath.replace('Formats/', - 'Formats[numberSystem={}]/'.format(system))) + f'Formats[numberSystem={system}]/')) except Error: money = self.find(xpath) money = self.__currencyFormats(money, plus, minus) - yield 'currencyFormat', money.next() + yield 'currencyFormat', next(money) neg = '' for it in money: assert not neg, 'There should be at most one more pattern' @@ -322,12 +332,12 @@ class LocaleScanner (object): def textPatternData(self): for key in ('quotationStart', 'alternateQuotationEnd', 'quotationEnd', 'alternateQuotationStart'): - yield key, self.find('delimiters/' + key) + yield key, self.find(f'delimiters/{key}') for key in ('start', 'middle', 'end'): - yield ('listPatternPart' + key.capitalize(), + yield (f'listPatternPart{key.capitalize()}', self.__fromLdmlListPattern(self.find( - 'listPatterns/listPattern/listPatternPart[{}]'.format(key)))) + f'listPatterns/listPattern/listPatternPart[{key}]'))) yield ('listPatternPartTwo', self.__fromLdmlListPattern(self.find( 'listPatterns/listPattern/listPatternPart[2]'))) @@ -335,28 +345,26 @@ class LocaleScanner (object): stem = 'dates/calendars/calendar[gregorian]/' # TODO: is wide really the right width to use here ? # abbreviated might be an option ... or try both ? - meridiem = stem + 'dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/' + meridiem = f'{stem}dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/' for key in ('am', 'pm'): - yield key, self.find(meridiem + 'dayPeriod[{}]'.format(key), + yield key, self.find(f'{meridiem}dayPeriod[{key}]', draft = Node.draftScore('contributed')) for pair in (('long', 'full'), ('short', 'short')): for key in ('time', 'date'): - yield (pair[0] + key.capitalize() + 'Format', + yield (f'{pair[0]}{key.capitalize()}Format', convert_date(self.find( - stem + '{}Formats/{}FormatLength[{}]/{}Format/pattern'.format( - key, key, pair[1], key)))) + f'{stem}{key}Formats/{key}FormatLength[{pair[1]}]/{key}Format/pattern'))) - def endonyms(self, language, script, country, variant): + def endonyms(self, language, script, territory, variant): # TODO: take variant into account ? - for seq in ((language, script, country), - (language, script), (language, country), (language,)): + for seq in ((language, script, territory), + (language, script), (language, territory), (language,)): if not all(seq): continue try: yield ('languageEndonym', - self.find('localeDisplayNames/languages/language[{}]' - .format('_'.join(seq)))) + self.find(f'localeDisplayNames/languages/language[{"_".join(seq)}]')) except Error: pass else: @@ -365,9 +373,8 @@ class LocaleScanner (object): # grumble(failed to find endonym for language) yield 'languageEndonym', '' - yield ('countryEndonym', - self.find('localeDisplayNames/territories/territory[{}]' - .format(country), '')) + yield ('territoryEndonym', + self.find(f'localeDisplayNames/territories/territory[{territory}]', '')) def unitData(self): yield ('byte_unit', @@ -386,20 +393,20 @@ class LocaleScanner (object): def calendarNames(self, calendars): namings = self.__nameForms for cal in calendars: - stem = 'dates/calendars/calendar[' + cal + ']/months/' + stem = f'dates/calendars/calendar[{cal}]/months/' for key, mode, size in namings: - prop = 'monthContext[' + mode + ']/monthWidth[' + size + ']/' - yield (key + 'Months_' + cal, - ';'.join(self.find(stem + prop + 'month[{}]'.format(i)) + prop = f'monthContext[{mode}]/monthWidth[{size}]/' + yield (f'{key}Months_{cal}', + ';'.join(self.find(f'{stem}{prop}month[{i}]') for i in range(1, 13))) # Day data (for Gregorian, at least): stem = 'dates/calendars/calendar[gregorian]/days/' days = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat') for (key, mode, size) in namings: - prop = 'dayContext[' + mode + ']/dayWidth[' + size + ']/day' - yield (key + 'Days', - ';'.join(self.find(stem + prop + '[' + day + ']') + prop = f'dayContext[{mode}]/dayWidth[{size}]/day' + yield (f'{key}Days', + ';'.join(self.find(f'{stem}{prop}[{day}]') for day in days)) # Implementation details @@ -410,10 +417,10 @@ class LocaleScanner (object): ('long', 'format', 'wide'), ('short', 'format', 'abbreviated'), ('narrow', 'format', 'narrow'), - ) # Used for month and day names + ) # Used for month and day names def __find(self, xpath): - retries = [ xpath.split('/') ] + retries, foundNone = [ xpath.split('/') ], True while retries: tags, elts, roots = retries.pop(), self.nodes, (self.base.root,) for selector in tags: @@ -423,6 +430,9 @@ class LocaleScanner (object): break else: # Found matching elements + elts = tuple(self.__skipInheritors(elts)) + if elts: + foundNone = False # Possibly filter elts to prefer the least drafty ? for elt in elts: yield elt @@ -442,29 +452,42 @@ class LocaleScanner (object): if not roots: if retries: # Let outer loop fall back on an alias path: break - sought = '/'.join(tags) - if sought != xpath: - sought += ' (for {})'.format(xpath) - raise Error('All lack child {} for {} in {}'.format( - selector, sought, self.name)) + if foundNone: + sought = '/'.join(tags) + if sought != xpath: + sought += f' (for {xpath})' + raise Error(f'All lack child {selector} for {sought} in {self.name}') else: # Found matching elements + roots = tuple(self.__skipInheritors(roots)) + if roots: + foundNone = False for elt in roots: yield elt - sought = '/'.join(tags) - if sought != xpath: - sought += ' (for {})'.format(xpath) - raise Error('No {} in {}'.format(sought, self.name)) + if foundNone: + sought = '/'.join(tags) + if sought != xpath: + sought += f' (for {xpath})' + raise Error(f'No {sought} in {self.name}') + + @staticmethod + def __skipInheritors(elts): + for elt in elts: + try: + if elt.dom.firstChild.nodeValue != INHERIT: + yield elt + except (AttributeError, KeyError): + yield elt def __currencyDisplayName(self, stem): try: return self.find(stem + 'displayName') except Error: pass - for x in ('zero', 'one', 'two', 'few', 'many', 'other'): + for x in ('zero', 'one', 'two', 'few', 'many', 'other'): try: - return self.find(stem + 'displayName[count={}]'.format(x)) + return self.find(f'{stem}displayName[count={x}]') except Error: pass return '' @@ -474,10 +497,10 @@ class LocaleScanner (object): # (even for unitLength[narrow]) instead of kB (etc.), so # prefer any unitPattern provided, but prune its placeholder: for size in ('short', 'narrow'): # TODO: reverse order ? - stem = 'units/unitLength[{}]/unit[digital-{}byte]/'.format(size + keySuffix, quantify) + stem = f'units/unitLength[{size}{keySuffix}]/unit[digital-{quantify}byte]/' for count in ('many', 'few', 'two', 'other', 'zero', 'one'): try: - ans = self.find(stem + 'unitPattern[count={}]'.format(count)) + ans = self.find(f'{stem}unitPattern[count={count}]') except Error: continue @@ -490,7 +513,7 @@ class LocaleScanner (object): return ans try: - return self.find(stem + 'displayName') + return self.find(f'{stem}displayName') except Error: pass @@ -518,10 +541,10 @@ class LocaleScanner (object): if cache: byte = cache.pop() if all(byte == k for k in cache): - suffix = 'i' + byte + suffix = f'i{byte}' for q in siQuantifiers: # Those don't (yet, v36) exist in CLDR, so we always get the fall-back: - yield self.__findUnit(keySuffix, q[:2], q[0].upper() + suffix) + yield self.__findUnit(keySuffix, q[:2], f'{q[0].upper()}{suffix}') else: # first call tail = suffix = suffix or 'B' for q in siQuantifiers: @@ -556,8 +579,8 @@ class LocaleScanner (object): elsewhere).""" top = int(self.find('numbers/minimumGroupingDigits')) assert top < 4, top # We store it in a 2-bit field - grouping = self.find('numbers/decimalFormats[numberSystem=' - + system + ']/decimalFormatLength/decimalFormat/pattern') + grouping = self.find(f'numbers/decimalFormats[numberSystem={system}]/' + 'decimalFormatLength/decimalFormat/pattern') groups = grouping.split('.')[0].split(',')[-3:] assert all(len(x) < 8 for x in groups[-2:]), grouping # we store them in 3-bit fields if len(groups) > 2: @@ -580,7 +603,7 @@ class LocaleScanner (object): # According to http://www.unicode.org/reports/tr35/#Number_Format_Patterns # there can be doubled or trippled currency sign, however none of the # locales use that. - p = p.replace(u'\xa4', "%2") + p = p.replace('\xa4', "%2") # Single quote goes away, but double goes to single: p = p.replace("''", '###').replace("'", '').replace('###', "'") # Use number system's signs: diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py index 29153366b3..02ec7cafc7 100644 --- a/util/locale_database/localetools.py +++ b/util/locale_database/localetools.py @@ -1,30 +1,5 @@ -############################################################################# -## -## Copyright (C) 2020 The Qt Company Ltd. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the test suite of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:GPL-EXCEPT$ -## Commercial License Usage -## Licensees holding valid commercial Qt licenses may use this file in -## accordance with the commercial license agreement provided with the -## Software or, alternatively, in accordance with the terms contained in -## a written agreement between you and The Qt Company. For licensing terms -## and conditions see https://www.qt.io/terms-conditions. For further -## information use the contact form at https://www.qt.io/contact-us. -## -## GNU General Public License Usage -## Alternatively, this file may be used under the terms of the GNU -## General Public License version 3 as published by the Free Software -## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -## included in the packaging of this file. Please review the following -## information to ensure the GNU General Public License requirements will -## be met: https://www.gnu.org/licenses/gpl-3.0.html. -## -## $QT_END_LICENSE$ -## -############################################################################# +# Copyright (C) 2020 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 """Utilities shared among the CLDR extraction tools. Functions: @@ -37,13 +12,16 @@ Classes: SourceFileEditor -- adds standard prelude and tail handling to Transcriber. """ -import os -import tempfile +from contextlib import ExitStack, contextmanager +from pathlib import Path +from tempfile import NamedTemporaryFile -class Error (StandardError): - __upinit = StandardError.__init__ +qtbase_root = Path(__file__).parents[2] +assert qtbase_root.name == 'qtbase' + +class Error (Exception): def __init__(self, msg, *args): - self.__upinit(msg, *args) + super().__init__(msg, *args) self.message = msg def __str__(self): return self.message @@ -63,48 +41,141 @@ def unicode2hex(s): lst.append(hex(v)) return lst -def wrap_list(lst): +def wrap_list(lst, perline=20): def split(lst, size): while lst: head, lst = lst[:size], lst[size:] yield head - return ",\n".join(", ".join(x) for x in split(lst, 20)) - -class Transcriber (object): - """Helper class to facilitate rewriting source files. - - This class takes care of the temporary file manipulation. Derived - classes need to implement transcribing of the content, with + return ",\n".join(", ".join(x) for x in split(lst, perline)) + +def names_clash(cldr, enum): + """True if the reader might not recognize cldr as the name of enum + + First argument, cldr, is the name CLDR gives for some language, + script or territory; second, enum, is the name enumdata.py gives + for it. If these are enough alike, returns None; otherwise, a + non-empty string that results from adapting cldr to be more like + how enumdata.py would express it.""" + if cldr == enum: + return None + + # Some common substitutions: + cldr = cldr.replace('&', 'And') + prefix = { 'St.': 'Saint', 'U.S.': 'United States' } + for k, v in prefix.items(): + if cldr.startswith(k + ' '): + cldr = v + cldr[len(k):] + + # Chop out any parenthesised part, e.g. (Burma): + while '(' in cldr: + try: + f, t = cldr.index('('), cldr.rindex(')') + except ValueError: + break + cldr = cldr[:f].rstrip() + ' ' + cldr[t + 1:].lstrip() + + # Various accented letters: + remap = { 'ã': 'a', 'å': 'a', 'ā': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'} + skip = '\u02bc' # Punctuation for which .isalpha() is true. + # Let cldr match (ignoring non-letters and case) any substring as enum: + if ''.join(enum.lower().split()) in ''.join( + remap.get(ch, ch) for ch in cldr.lower() if ch.isalpha() and ch not in skip): + return None + return cldr + + +@contextmanager +def AtomicRenameTemporaryFile(originalLocation: Path, *, prefix: str, dir: Path): + """Context manager for safe file update via a temporary file. + + Accepts path to the file to be updated. Yields a temporary file to the user + code, open for writing. + + On success closes the temporary file and moves its content to the original + location. On error, removes temporary file, without disturbing the original. + """ + tempFile = NamedTemporaryFile('w', prefix=prefix, dir=dir, delete=False) + try: + yield tempFile + tempFile.close() + # Move the modified file to the original location + Path(tempFile.name).rename(originalLocation) + except Exception: + # delete the temporary file in case of error + tempFile.close() + Path(tempFile.name).unlink() + raise + + +class Transcriber: + """Context manager base-class to manage source file rewrites. + + Derived classes need to implement transcribing of the content, with whatever modifications they may want. Members reader and writer are exposed; use writer.write() to output to the new file; use reader.readline() or iterate reader to read the original. - Callers should call close() on success or cleanup() on failure (to - clear away the temporary file). + This class is intended to be used as context manager only (inside a + `with` statement). + + Reimplement onEnter() to write any preamble the file may have, + onExit() to write any tail. The body of the with statement takes + care of anything in between, using methods provided by derived classes. + + The data is written to a temporary file first. The temporary file data + is then moved to the original location if there were no errors. Otherwise + the temporary file is removed and the original is left unchanged. """ - def __init__(self, path, temp): - # Open the old file - self.reader = open(path) - # Create a temp file to write the new data into - temp, tempPath = tempfile.mkstemp(os.path.split(path)[1], dir = temp) - self.__names = path, tempPath - self.writer = os.fdopen(temp, "w") - - def close(self): - self.reader.close() - self.writer.close() - self.reader = self.writer = None - source, temp = self.__names - os.remove(source) - os.rename(temp, source) - - def cleanup(self): - if self.__names: - self.reader.close() - self.writer.close() - # Remove temp-file: - os.remove(self.__names[1]) - self.__names = () + def __init__(self, path: Path, temp_dir: Path): + self.path = path + self.tempDir = temp_dir + + def onEnter(self) -> None: + """ + Called before transferring control to user code. + + This function can be overridden in derived classes to perform actions + before transferring control to the user code. + + The default implementation does nothing. + """ + pass + + def onExit(self) -> None: + """ + Called after return from user code. + + This function can be overridden in derived classes to perform actions + after successful return from user code. + + The default implementation does nothing. + """ + pass + + def __enter__(self): + with ExitStack() as resources: + # Create a temp file to write the new data into + self.writer = resources.enter_context( + AtomicRenameTemporaryFile(self.path, prefix=self.path.name, dir=self.tempDir)) + # Open the old file + self.reader = resources.enter_context(open(self.path)) + + self.onEnter() + + # Prevent resources from being closed on normal return from this + # method and make them available inside __exit__(): + self.__resources = resources.pop_all() + return self + + def __exit__(self, exc_type, exc_value, traceback): + if exc_type is None: + with self.__resources: + self.onExit() + else: + self.__resources.__exit__(exc_type, exc_value, traceback) + + return False + class SourceFileEditor (Transcriber): """Transcriber with transcription of code around a gnerated block. @@ -117,43 +188,27 @@ class SourceFileEditor (Transcriber): the new version to replace it. This class takes care of transcribing the parts before and after - the generated content; on creation, an instance will copy the - preamble up to the start marker; its close() will skip over the - original's generated content and resume transcribing with the end - marker. Derived classes need only implement the generation of the - content in between. - - Callers should call close() on success or cleanup() on failure (to - clear away the temporary file); see Transcriber. + the generated content; on entering the context, an instance will + copy the preamble up to the start marker; on exit from the context + it will skip over the original's generated content and resume + transcribing with the end marker. + + This class is only intended to be used as a context manager: + see Transcriber. Derived classes implement suitable methods for use in + the body of the with statement, using self.writer to rewrite the part + of the file between the start and end markers. """ - __upinit = Transcriber.__init__ - def __init__(self, path, temp): - """Set up the source file editor. - - Requires two arguments: the path to the source file to be read - and, on success, replaced with a new version; and the - directory in which to store the temporary file during the - rewrite.""" - self.__upinit(path, temp) - self.__copyPrelude() - - __upclose = Transcriber.close - def close(self): - self.__copyTail() - self.__upclose() - - # Implementation details: GENERATED_BLOCK_START = '// GENERATED PART STARTS HERE' GENERATED_BLOCK_END = '// GENERATED PART ENDS HERE' - def __copyPrelude(self): + def onEnter(self) -> None: # Copy over the first non-generated section to the new file for line in self.reader: self.writer.write(line) if line.strip() == self.GENERATED_BLOCK_START: break - def __copyTail(self): + def onExit(self) -> None: # Skip through the old generated data in the old file for line in self.reader: if line.strip() == self.GENERATED_BLOCK_END: diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py index 4fcfe32a43..5cb56c2165 100644 --- a/util/locale_database/qlocalexml.py +++ b/util/locale_database/qlocalexml.py @@ -1,31 +1,5 @@ -# coding=utf8 -############################################################################# -## -## Copyright (C) 2020 The Qt Company Ltd. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the test suite of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:GPL-EXCEPT$ -## Commercial License Usage -## Licensees holding valid commercial Qt licenses may use this file in -## accordance with the commercial license agreement provided with the -## Software or, alternatively, in accordance with the terms contained in -## a written agreement between you and The Qt Company. For licensing terms -## and conditions see https://www.qt.io/terms-conditions. For further -## information use the contact form at https://www.qt.io/contact-us. -## -## GNU General Public License Usage -## Alternatively, this file may be used under the terms of the GNU -## General Public License version 3 as published by the Free Software -## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -## included in the packaging of this file. Please review the following -## information to ensure the GNU General Public License requirements will -## be met: https://www.gnu.org/licenses/gpl-3.0.html. -## -## $QT_END_LICENSE$ -## -############################################################################# +# Copyright (C) 2021 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 """Shared serialization-scanning code for QLocaleXML format. Provides classes: @@ -35,15 +9,23 @@ Provides classes: Support: Spacer -- provides control over indentation of the output. + +RelaxNG schema for the used file format can be found in qlocalexml.rnc. +QLocaleXML files can be validated using: + + jing -c qlocalexml.rnc <file.xml> + +You can download jing from https://relaxng.org/jclark/jing.html if your +package manager lacks the jing package. """ -from __future__ import print_function + from xml.sax.saxutils import escape from localetools import Error # Tools used by Locale: def camel(seq): - yield seq.next() + yield next(seq) for word in seq: yield word.capitalize() @@ -51,88 +33,39 @@ def camelCase(words): return ''.join(camel(iter(words))) def addEscapes(s): - return ''.join(c if n < 128 else '\\x{:02x}'.format(n) + return ''.join(c if n < 128 else f'\\x{n:02x}' for n, c in ((ord(c), c) for c in s)) def startCount(c, text): # strspn """First index in text where it doesn't have a character in c""" assert text and text[0] in c try: - return (j for j, d in enumerate(text) if d not in c).next() + return next((j for j, d in enumerate(text) if d not in c)) except StopIteration: return len(text) -def convertFormat(format): - """Convert date/time format-specier from CLDR to Qt - - Match up (as best we can) the differences between: - * https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table - * QDateTimeParser::parseFormat() and QLocalePrivate::dateTimeToString() - """ - # Compare and contrast dateconverter.py's convert_date(). - # Need to (check consistency and) reduce redundancy ! - result = "" - i = 0 - while i < len(format): - if format[i] == "'": - result += "'" - i += 1 - while i < len(format) and format[i] != "'": - result += format[i] - i += 1 - if i < len(format): - result += "'" - i += 1 - else: - s = format[i:] - if s.startswith('E'): # week-day - n = startCount('E', s) - if n < 3: - result += 'ddd' - elif n == 4: - result += 'dddd' - else: # 5: narrow, 6 short; but should be name, not number :-( - result += 'd' if n < 6 else 'dd' - i += n - elif s[0] in 'ab': # am/pm - # 'b' should distinguish noon/midnight, too :-( - result += "AP" - i += startCount('ab', s) - elif s.startswith('S'): # fractions of seconds: count('S') == number of decimals to show - result += 'z' - i += startCount('S', s) - elif s.startswith('V'): # long time zone specifiers (and a deprecated short ID) - result += 't' - i += startCount('V', s) - elif s[0] in 'zv': # zone - # Should use full name, e.g. "Central European Time", if 'zzzz' :-( - # 'v' should get generic non-location format, e.g. PT for "Pacific Time", no DST indicator - result += "t" - i += startCount('zv', s) - else: - result += format[i] - i += 1 - - return result - class QLocaleXmlReader (object): def __init__(self, filename): self.root = self.__parse(filename) - # Lists of (id, name, code) triples: - languages = tuple(self.__loadMap('language')) - scripts = tuple(self.__loadMap('script')) - countries = tuple(self.__loadMap('country')) + + from enumdata import language_map, script_map, territory_map + # Lists of (id, enum name, code, en.xml name) tuples: + languages = tuple(self.__loadMap('language', language_map)) + scripts = tuple(self.__loadMap('script', script_map)) + territories = tuple(self.__loadMap('territory', territory_map)) self.__likely = tuple(self.__likelySubtagsMap()) - # Mappings {ID: (name, code)} - self.languages = dict((v[0], v[1:]) for v in languages) - self.scripts = dict((v[0], v[1:]) for v in scripts) - self.countries = dict((v[0], v[1:]) for v in countries) - # Private mappings {name: (ID, code)} - self.__langByName = dict((v[1], (v[0], v[2])) for v in languages) - self.__textByName = dict((v[1], (v[0], v[2])) for v in scripts) - self.__landByName = dict((v[1], (v[0], v[2])) for v in countries) + + # Mappings {ID: (enum name, code, en.xml name)} + self.languages = {v[0]: v[1:] for v in languages} + self.scripts = {v[0]: v[1:] for v in scripts} + self.territories = {v[0]: v[1:] for v in territories} + + # Private mappings {enum name: (ID, code)} + self.__langByName = {v[1]: (v[0], v[2]) for v in languages} + self.__textByName = {v[1]: (v[0], v[2]) for v in scripts} + self.__landByName = {v[1]: (v[0], v[2]) for v in territories} # Other properties: - self.dupes = set(v[1] for v in languages) & set(v[1] for v in countries) + self.__dupes = set(v[1] for v in languages) & set(v[1] for v in territories) self.cldrVersion = self.__firstChildText(self.root, "version") def loadLocaleMap(self, calendars, grumble = lambda text: None): @@ -142,18 +75,18 @@ class QLocaleXmlReader (object): locale = Locale.fromXmlData(lambda k: kid(elt, k), calendars) language = self.__langByName[locale.language][0] script = self.__textByName[locale.script][0] - country = self.__landByName[locale.country][0] + territory = self.__landByName[locale.territory][0] if language != 1: # C - if country == 0: - grumble('loadLocaleMap: No country id for "{}"\n'.format(locale.language)) + if territory == 0: + grumble(f'loadLocaleMap: No territory id for "{locale.language}"\n') if script == 0: - # Find default script for the given language and country - see: + # Find default script for the given language and territory - see: # http://www.unicode.org/reports/tr35/#Likely_Subtags try: try: - to = likely[(locale.language, 'AnyScript', locale.country)] + to = likely[(locale.language, 'AnyScript', locale.territory)] except KeyError: to = likely[(locale.language, 'AnyScript', 'AnyTerritory')] except KeyError: @@ -162,11 +95,11 @@ class QLocaleXmlReader (object): locale.script = to[1] script = self.__textByName[locale.script][0] - yield (language, script, country), locale + yield (language, script, territory), locale def languageIndices(self, locales): index = 0 - for key, value in self.languages.iteritems(): + for key, value in self.languages.items(): i, count = 0, locales.count(key) if count > 0: i = index @@ -190,11 +123,11 @@ class QLocaleXmlReader (object): '_'.join(tag(give)), ids(give)) def defaultMap(self): - """Map language and script to their default country by ID. + """Map language and script to their default territory by ID. - Yields ((language, script), country) wherever the likely + Yields ((language, script), territory) wherever the likely sub-tags mapping says language's default locale uses the given - script and country.""" + script and territory.""" for have, give in self.__likely: if have[1:] == ('AnyScript', 'AnyTerritory') and give[2] != 'AnyTerritory': assert have[0] == give[0], (have, give) @@ -202,14 +135,41 @@ class QLocaleXmlReader (object): self.__textByName[give[1]][0]), self.__landByName[give[2]][0]) + def enumify(self, name, suffix): + """Stick together the parts of an enumdata.py name. + + Names given in enumdata.py include spaces and hyphens that we + can't include in an identifier, such as the name of a member + of an enum type. Removing those would lose the word + boundaries, so make sure each word starts with a capital (but + don't simply capitalize() as some names contain words, + e.g. McDonald, that have later capitals in them). + + We also need to resolve duplication between languages and + territories (by adding a suffix to each) and add Script to the + ends of script-names that don't already end in it.""" + name = name.replace('-', ' ') + # Don't .capitalize() as McDonald is already camel-case (see enumdata.py): + name = ''.join(word[0].upper() + word[1:] for word in name.split()) + if suffix != 'Script': + assert not(name in self.__dupes and name.endswith(suffix)) + return name + suffix if name in self.__dupes else name + + if not name.endswith(suffix): + name += suffix + if name in self.__dupes: + raise Error(f'The script name "{name}" is messy') + return name + # Implementation details: - def __loadMap(self, category): + def __loadMap(self, category, enum): kid = self.__firstChildText - for element in self.__eachEltInGroup(self.root, category + 'List', category): - yield int(kid(element, 'id')), kid(element, 'name'), kid(element, 'code') + for element in self.__eachEltInGroup(self.root, f'{category}List', category): + key = int(kid(element, 'id')) + yield key, enum[key][0], kid(element, 'code'), kid(element, 'name') def __likelySubtagsMap(self): - def triplet(element, keys=('language', 'script', 'country'), kid = self.__firstChildText): + def triplet(element, keys=('language', 'script', 'territory'), kid = self.__firstChildText): return tuple(kid(element, key) for key in keys) kid = self.__firstChildElt @@ -246,7 +206,7 @@ class QLocaleXmlReader (object): return child child = child.nextSibling - raise Error('No {} child found'.format(name)) + raise Error(f'No {name} child found') @classmethod def __firstChildText(cls, elt, key): @@ -302,7 +262,7 @@ class Spacer (object): elif line.startswith('<') and not line.startswith('<!'): cut = line.find('>') tag = (line[1:] if cut < 0 else line[1 : cut]).strip().split()[0] - if '</{}>'.format(tag) not in line: + if f'</{tag}>' not in line: self.current += self.__each return indent + line + '\n' @@ -333,10 +293,28 @@ class QLocaleXmlWriter (object): self.__write('<localeDatabase>') # Output of various sections, in their usual order: - def enumData(self, languages, scripts, countries): - self.__enumTable('language', languages) - self.__enumTable('script', scripts) - self.__enumTable('country', countries) + def enumData(self, code2name): + """Output name/id/code tables for language, script and territory. + + Parameter, code2name, is a function taking 'language', + 'script' or 'territory' and returning a lookup function that + maps codes, of the relevant type, to their English names. This + lookup function is passed a code and the name, both taken from + enumdata.py, that QLocale uses, so the .get() of a dict will + work. The English name from this lookup will be used by + QLocale::*ToString() for the enum member whose name is based + on the enumdata.py name passed as fallback to the lookup.""" + from enumdata import language_map, script_map, territory_map + self.__enumTable('language', language_map, code2name) + self.__enumTable('script', script_map, code2name) + self.__enumTable('territory', territory_map, code2name) + # Prepare to detect any unused codes (see __writeLocale(), close()): + self.__languages = set(p[1] for p in language_map.values() + if not p[1].isspace()) + self.__scripts = set(p[1] for p in script_map.values() + if p[1] != 'Zzzz') + self.__territories = set(p[1] for p in territory_map.values() + if p[1] != 'ZZ') def likelySubTags(self, entries): self.__openTag('likelySubtags') @@ -350,13 +328,11 @@ class QLocaleXmlWriter (object): def locales(self, locales, calendars): self.__openTag('localeList') self.__openTag('locale') - Locale.C(calendars).toXml(self.inTag, calendars) + self.__writeLocale(Locale.C(calendars), calendars) self.__closeTag('locale') - keys = locales.keys() - keys.sort() - for key in keys: + for key in sorted(locales.keys()): self.__openTag('locale') - locales[key].toXml(self.inTag, calendars) + self.__writeLocale(locales[key], calendars) self.__closeTag('locale') self.__closeTag('localeList') @@ -364,13 +340,27 @@ class QLocaleXmlWriter (object): self.inTag('version', cldrVersion) def inTag(self, tag, text): - self.__write('<{0}>{1}</{0}>'.format(tag, text)) + self.__write(f'<{tag}>{text}</{tag}>') - def close(self): + def close(self, grumble): + """Finish writing and grumble about any issues discovered.""" if self.__rawOutput != self.__complain: self.__write('</localeDatabase>') self.__rawOutput = self.__complain + if self.__languages or self.__scripts or self.__territories: + grumble('Some enum members are unused, corresponding to these tags:\n') + import textwrap + def kvetch(kind, seq, g = grumble, w = textwrap.wrap): + g('\n\t'.join(w(f' {kind}: {", ".join(sorted(seq))}', width=80)) + '\n') + if self.__languages: + kvetch('Languages', self.__languages) + if self.__scripts: + kvetch('Scripts', self.__scripts) + if self.__territories: + kvetch('Territories', self.__territories) + grumble('It may make sense to deprecate them.\n') + # Implementation details @staticmethod def __printit(text): @@ -379,28 +369,39 @@ class QLocaleXmlWriter (object): def __complain(text): raise Error('Attempted to write data after closing :-(') - def __enumTable(self, tag, table): - self.__openTag(tag + 'List') - for key, value in table.iteritems(): + @staticmethod + def __xmlSafe(text): + return text.replace('&', '&').replace('<', '<').replace('>', '>') + + def __enumTable(self, tag, table, code2name): + self.__openTag(f'{tag}List') + enname, safe = code2name(tag), self.__xmlSafe + for key, (name, code) in table.items(): self.__openTag(tag) - self.inTag('name', value[0]) + self.inTag('name', safe(enname(code, name))) self.inTag('id', key) - self.inTag('code', value[1]) + self.inTag('code', code) self.__closeTag(tag) - self.__closeTag(tag + 'List') + self.__closeTag(f'{tag}List') def __likelySubTag(self, tag, likely): self.__openTag(tag) self.inTag('language', likely[0]) self.inTag('script', likely[1]) - self.inTag('country', likely[2]) + self.inTag('territory', likely[2]) # self.inTag('variant', likely[3]) self.__closeTag(tag) + def __writeLocale(self, locale, calendars): + locale.toXml(self.inTag, calendars) + self.__languages.discard(locale.language_code) + self.__scripts.discard(locale.script_code) + self.__territories.discard(locale.territory_code) + def __openTag(self, tag): - self.__write('<{}>'.format(tag)) + self.__write(f'<{tag}>') def __closeTag(self, tag): - self.__write('</{}>'.format(tag)) + self.__write(f'</{tag}>') def __write(self, line): self.__rawOutput(self.__wrap(line)) @@ -432,16 +433,16 @@ class Locale (object): __asint = ("currencyDigits", "currencyRounding") # Convert day-name to Qt day-of-week number: __asdow = ("firstDayOfWeek", "weekendStart", "weekendEnd") - # Convert from CLDR format-strings to QDateTimeParser ones: - __asfmt = ("longDateFormat", "shortDateFormat", "longTimeFormat", "shortTimeFormat") # Just use the raw text: - __astxt = ("language", "languageEndonym", "script", "country", "countryEndonym", + __astxt = ("language", "languageEndonym", "script", "territory", "territoryEndonym", "decimal", "group", "zero", "list", "percent", "minus", "plus", "exp", "quotationStart", "quotationEnd", "alternateQuotationStart", "alternateQuotationEnd", "listPatternPartStart", "listPatternPartMiddle", "listPatternPartEnd", "listPatternPartTwo", "am", "pm", + "longDateFormat", "shortDateFormat", + "longTimeFormat", "shortTimeFormat", 'byte_unit', 'byte_si_quantified', 'byte_iec_quantified', "currencyIsoCode", "currencySymbol", "currencyDisplayName", "currencyFormat", "currencyNegativeFormat") @@ -466,14 +467,11 @@ class Locale (object): for k in cls.__asdow: data[k] = cls.__qDoW[lookup(k)] - for k in cls.__asfmt: - data[k] = convertFormat(lookup(k)) - for k in cls.__astxt + tuple(cls.propsMonthDay('days')): data['listDelim' if k == 'list' else k] = lookup(k) for k in cls.propsMonthDay('months'): - data[k] = dict((cal, lookup('_'.join((k, cal)))) for cal in calendars) + data[k] = {cal: lookup('_'.join((k, cal))) for cal in calendars} grouping = lookup('groupSizes').split(';') data.update(groupLeast = int(grouping[0]), @@ -493,15 +491,15 @@ class Locale (object): form used by CLDR; its default is ('gregorian',). """ get = lambda k: getattr(self, k) - for key in ('language', 'script', 'country'): + for key in ('language', 'script', 'territory'): write(key, get(key)) - write('{}code'.format(key), get('{}_code'.format(key))) + write(f'{key}code', get(f'{key}_code')) for key in ('decimal', 'group', 'zero', 'list', 'percent', 'minus', 'plus', 'exp'): write(key, get(key)) - for key in ('languageEndonym', 'countryEndonym', + for key in ('languageEndonym', 'territoryEndonym', 'quotationStart', 'quotationEnd', 'alternateQuotationStart', 'alternateQuotationEnd', 'listPatternPartStart', 'listPatternPartMiddle', @@ -517,7 +515,7 @@ class Locale (object): '_'.join((k, cal)) for k in self.propsMonthDay('months') for cal in calendars): - write(key, escape(get(key)).encode('utf-8')) + write(key, escape(get(key))) write('groupSizes', ';'.join(str(x) for x in get('groupSizes'))) for key in ('currencyDigits', 'currencyRounding'): @@ -554,9 +552,9 @@ class Locale (object): (fullName, fullName), (firstThree, firstThree), (number, initial)), - 'islamic': ((u'Muharram', u'Safar', u'Rabiʻ I', u'Rabiʻ II', u'Jumada I', - u'Jumada II', u'Rajab', u'Shaʻban', u'Ramadan', u'Shawwal', - u'Dhuʻl-Qiʻdah', u'Dhuʻl-Hijjah'), + 'islamic': (('Muharram', 'Safar', 'Rabiʻ I', 'Rabiʻ II', 'Jumada I', + 'Jumada II', 'Rajab', 'Shaʻban', 'Ramadan', 'Shawwal', + 'Dhuʻl-Qiʻdah', 'Dhuʻl-Hijjah'), (fullName, fullName), (islamicShort, islamicShort), (number, number)), @@ -565,7 +563,7 @@ class Locale (object): (fullName, fullName), (fullName, fullName), (number, number)), - }, + }, sizes=('long', 'short', 'narrow')): for cal in calendars: try: @@ -590,7 +588,7 @@ class Locale (object): return cls(cls.__monthNames(calendars), language='C', language_code='0', languageEndonym='', script='AnyScript', script_code='0', - country='AnyTerritory', country_code='0', countryEndonym='', + territory='AnyTerritory', territory_code='0', territoryEndonym='', groupSizes=(3, 3, 1), decimal='.', group=',', list=';', percent='%', zero='0', minus='-', plus='+', exp='e', @@ -605,8 +603,8 @@ class Locale (object): byte_iec_quantified=';'.join(q.upper() + 'iB' for q in quantifiers), am='AM', pm='PM', firstDayOfWeek='mon', weekendStart='sat', weekendEnd='sun', - longDateFormat='EEEE, d MMMM yyyy', shortDateFormat='d MMM yyyy', - longTimeFormat='HH:mm:ss z', shortTimeFormat='HH:mm:ss', + longDateFormat='dddd, d MMMM yyyy', shortDateFormat='d MMM yyyy', + longTimeFormat='HH:mm:ss t', shortTimeFormat='HH:mm:ss', longDays=';'.join(days), shortDays=';'.join(d[:3] for d in days), narrowDays='7;1;2;3;4;5;6', diff --git a/util/locale_database/qlocalexml.rnc b/util/locale_database/qlocalexml.rnc new file mode 100644 index 0000000000..818aa8f9c3 --- /dev/null +++ b/util/locale_database/qlocalexml.rnc @@ -0,0 +1,119 @@ +# Copyright (C) 2021 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 +# This is RelaxNG compact schema for qLocaleXML interemediate locale data +# representation format produced and consumed by the qlocalexml module. +# +# To validate an xml file run: +# +# jing -c qlocalexml.rnc <your-file.xml> +# +# You can download jing from https://relaxng.org/jclark/jing.html if your +# package manager lacks the jing package. + +start = element localeDatabase { + element version { text }, + element languageList { Language+ }, + element scriptList { Script+ }, + element territoryList { Territory+ }, + element likelySubtags { LikelySubtag+ }, + element localeList { Locale+ } +} + +Language = element language { TagDescriptor } +Script = element script { TagDescriptor } +Territory = element territory { TagDescriptor } +TagDescriptor = ( + element name { text }, + element id { xsd:nonNegativeInteger }, + element code { text } +) + +LikelySubtag = element likelySubtag { + element from { LocaleTriplet }, + element to { LocaleTriplet } +} + +LocaleTriplet = ( + element language { text }, + element script { text }, + element territory { text } +) + +WeekDay = ("sun" | "mon" | "tue" | "wed" | "thu" | "fri" | "sat") +Digit = xsd:string { pattern = "\d" } +Punctuation = xsd:string { pattern = "\p{P}" } +GroupSizes = xsd:string { pattern = "\d;\d;\d" } + +Locale = element locale { + element language { text }, + element languagecode { text }, + element script { text }, + element scriptcode { text }, + element territory { text }, + element territorycode { text }, + element decimal { Punctuation }, + element group { text }, + element zero { Digit }, + element list { Punctuation }, + element percent { text }, + element minus { text }, + element plus { text }, + element exp { text }, + element languageEndonym { text }, + element territoryEndonym { text }, + element quotationStart { Punctuation }, + element quotationEnd { Punctuation }, + element alternateQuotationStart { Punctuation }, + element alternateQuotationEnd { Punctuation }, + element listPatternPartStart { text }, + element listPatternPartMiddle { text }, + element listPatternPartEnd { text }, + element listPatternPartTwo { text }, + element byte_unit { text }, + element byte_si_quantified { text }, + element byte_iec_quantified { text }, + element am { text }, + element pm { text }, + element firstDayOfWeek { text }, + element weekendStart { WeekDay }, + element weekendEnd { WeekDay }, + element longDateFormat { text }, + element shortDateFormat { text }, + element longTimeFormat { text }, + element shortTimeFormat { text }, + element currencyIsoCode { text }, + element currencySymbol { text }, + element currencyDisplayName { text }, + element currencyFormat { text }, + element currencyNegativeFormat { text }, + element longDays { text }, + element standaloneLongDays { text }, + element shortDays { text }, + element standaloneShortDays { text }, + element narrowDays { text }, + element standaloneNarrowDays { text }, + + # Some of these entries may be absent depending on command line arguments + element longMonths_gregorian { text }?, + element longMonths_persian { text }?, + element longMonths_islamic { text }?, + element standaloneLongMonths_gregorian { text }?, + element standaloneLongMonths_persian { text }?, + element standaloneLongMonths_islamic { text }?, + element shortMonths_gregorian { text }?, + element shortMonths_persian { text }?, + element shortMonths_islamic { text }?, + element standaloneShortMonths_gregorian { text }?, + element standaloneShortMonths_persian { text }?, + element standaloneShortMonths_islamic { text }?, + element narrowMonths_gregorian { text }?, + element narrowMonths_persian { text }?, + element narrowMonths_islamic { text }?, + element standaloneNarrowMonths_gregorian { text }?, + element standaloneNarrowMonths_persian { text }?, + element standaloneNarrowMonths_islamic { text }?, + + element groupSizes { GroupSizes }, + element currencyDigits { xsd:nonNegativeInteger }, + element currencyRounding { xsd:nonNegativeInteger } +} diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py index c15d6d2f55..b20e4fd155 100755 --- a/util/locale_database/qlocalexml2cpp.py +++ b/util/locale_database/qlocalexml2cpp.py @@ -1,85 +1,58 @@ -#!/usr/bin/env python2 -############################################################################# -## -## Copyright (C) 2020 The Qt Company Ltd. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the test suite of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:GPL-EXCEPT$ -## Commercial License Usage -## Licensees holding valid commercial Qt licenses may use this file in -## accordance with the commercial license agreement provided with the -## Software or, alternatively, in accordance with the terms contained in -## a written agreement between you and The Qt Company. For licensing terms -## and conditions see https://www.qt.io/terms-conditions. For further -## information use the contact form at https://www.qt.io/contact-us. -## -## GNU General Public License Usage -## Alternatively, this file may be used under the terms of the GNU -## General Public License version 3 as published by the Free Software -## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -## included in the packaging of this file. Please review the following -## information to ensure the GNU General Public License requirements will -## be met: https://www.gnu.org/licenses/gpl-3.0.html. -## -## $QT_END_LICENSE$ -## -############################################################################# -"""Script to generate C++ code from CLDR data in qLocaleXML form - -See ``cldr2qlocalexml.py`` for how to generate the qLocaleXML data itself. -Pass the output file from that as first parameter to this script; pass -the root of the qtbase check-out as second parameter. -""" - -import os -import datetime - -from qlocalexml import QLocaleXmlReader -from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor - -def compareLocaleKeys(key1, key2): - if key1 == key2: - return 0 +#!/usr/bin/env python3 +# Copyright (C) 2021 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 +"""Script to generate C++ code from CLDR data in QLocaleXML form - if key1[0] != key2[0]: # First sort by language: - return key1[0] - key2[0] +See ``cldr2qlocalexml.py`` for how to generate the QLocaleXML data itself. +Pass the output file from that as first parameter to this script; pass the ISO +639-3 data file as second parameter. You can optionally pass the root of the +qtbase check-out as third parameter; it defaults to the root of the qtbase +check-out containing this script. - defaults = compareLocaleKeys.default_map - # maps {(language, script): country} by ID - try: - country = defaults[key1[:2]] - except KeyError: - pass - else: - if key1[2] == country: - return -1 - if key2[2] == country: - return 1 - - if key1[1] == key2[1]: - return key1[2] - key2[2] +The ISO 639-3 data file can be downloaded from the SIL website: - try: - country = defaults[key2[:2]] - except KeyError: - pass - else: - if key2[2] == country: - return 1 - if key1[2] == country: - return -1 + https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab +""" - return key1[1] - key2[1] +import datetime +import argparse +from pathlib import Path +from typing import Optional +from qlocalexml import QLocaleXmlReader +from localetools import * +from iso639_3 import LanguageCodeData + +class LocaleKeySorter: + """Sort-ordering representation of a locale key. + + This is for passing to a sorting algorithm as key-function, that + it applies to each entry in the list to decide which belong + earlier. It adds an entry to the (language, script, territory) + triple, just before script, that sorts earlier if the territory is + the default for the given language and script, later otherwise. + """ + + # TODO: study the relationship between this and CLDR's likely + # sub-tags algorithm. Work out how locale sort-order impacts + # QLocale's likely sub-tag matching algorithms. Make sure this is + # sorting in an order compatible with those algorithms. + + def __init__(self, defaults): + self.map = dict(defaults) + def foreign(self, key): + default = self.map.get(key[:2]) + return default is None or default != key[2] + def __call__(self, key): + # TODO: should we compare territory before or after script ? + return (key[0], self.foreign(key)) + key[1:] class StringDataToken: def __init__(self, index, length, bits): if index > 0xffff: - raise ValueError('Start-index ({}) exceeds the uint16 range!'.format(index)) + raise ValueError(f'Start-index ({index}) exceeds the uint16 range!') if length >= (1 << bits): - raise ValueError('Data size ({}) exceeds the {}-bit range!'.format(length, bits)) + raise ValueError(f'Data size ({length}) exceeds the {bits}-bit range!') self.index = index self.length = length @@ -131,11 +104,10 @@ class StringData: def write(self, fd): if len(self.data) > 0xffff: - raise ValueError('Data is too big ({}) for quint16 index to its end!' - .format(len(self.data)), + raise ValueError(f'Data is too big ({len(self.data)}) for quint16 index to its end!', self.name) - fd.write("\nstatic const char16_t {}[] = {{\n".format(self.name)) - fd.write(wrap_list(self.data)) + fd.write(f"\nstatic constexpr char16_t {self.name}[] = {{\n") + fd.write(wrap_list(self.data, 12)) # 12 == 100 // len('0xhhhh, ') fd.write("\n};\n") def currencyIsoCodeData(s): @@ -144,13 +116,16 @@ def currencyIsoCodeData(s): return "{0,0,0}" class LocaleSourceEditor (SourceFileEditor): - __upinit = SourceFileEditor.__init__ - def __init__(self, path, temp, version): - self.__upinit(path, temp) - self.writer.write(""" + def __init__(self, path: Path, temp: Path, version: str): + super().__init__(path, temp) + self.version = version + + def onEnter(self) -> None: + super().onEnter() + self.writer.write(f""" /* - This part of the file was generated on {} from the - Common Locale Data Repository v{} + This part of the file was generated on {datetime.date.today()} from the + Common Locale Data Repository v{self.version} http://www.unicode.org/cldr/ @@ -159,7 +134,7 @@ class LocaleSourceEditor (SourceFileEditor): edited) CLDR data; see qtbase/util/locale_database/. */ -""".format(datetime.date.today(), version)) +""") class LocaleDataWriter (LocaleSourceEditor): def likelySubtags(self, likely): @@ -173,23 +148,22 @@ class LocaleDataWriter (LocaleSourceEditor): def keyLikely(entry): have = entry[1] # Numeric id triple return have[0] or huge, have[2] or huge, have[1] or huge # language, region, script - likely = list(likely) # Turn generator into list so we can sort it - likely.sort(key=keyLikely) + likely = sorted(likely, key=keyLikely) i = 0 - self.writer.write('static const QLocaleId likely_subtags[] = {\n') + self.writer.write('static constexpr QLocaleId likely_subtags[] = {\n') for had, have, got, give in likely: i += 1 self.writer.write(' {{ {:3d}, {:3d}, {:3d} }}'.format(*have)) self.writer.write(', {{ {:3d}, {:3d}, {:3d} }}'.format(*give)) self.writer.write(' ' if i == len(likely) else ',') - self.writer.write(' // {} -> {}\n'.format(had, got)) + self.writer.write(f' // {had} -> {got}\n') self.writer.write('};\n\n') def localeIndex(self, indices): - self.writer.write('static const quint16 locale_index[] = {\n') - for pair in indices: - self.writer.write('{:6d}, // {}\n'.format(*pair)) + self.writer.write('static constexpr quint16 locale_index[] = {\n') + for index, name in indices: + self.writer.write(f'{index:6d}, // {name}\n') self.writer.write(' 0 // trailing 0\n') self.writer.write('};\n\n') @@ -208,7 +182,7 @@ class LocaleDataWriter (LocaleSourceEditor): endonyms_data = StringData('endonyms_data') # Locale data - self.writer.write('static const QLocaleData locale_data[] = {\n') + self.writer.write('static constexpr QLocaleData locale_data[] = {\n') # Table headings: keep each label centred in its field, matching line_format: self.writer.write(' // ' # Width 6 + comma @@ -309,7 +283,7 @@ class LocaleDataWriter (LocaleSourceEditor): locale.minus, locale.plus, locale.exp, locale.quotationStart, locale.quotationEnd, locale.alternateQuotationStart, locale.alternateQuotationEnd)) + - tuple (date_format_data.append(f) for f in # 2 entries: + tuple(date_format_data.append(f) for f in # 2 entries: (locale.longDateFormat, locale.shortDateFormat)) + tuple(time_format_data.append(f) for f in # 2 entries: (locale.longTimeFormat, locale.shortTimeFormat)) + @@ -327,7 +301,7 @@ class LocaleDataWriter (LocaleSourceEditor): currency_format_data.append(locale.currencyFormat), currency_format_data.append(locale.currencyNegativeFormat), endonyms_data.append(locale.languageEndonym), - endonyms_data.append(locale.countryEndonym)) # 6 entries + endonyms_data.append(locale.territoryEndonym)) # 6 entries ) # Total: 37 entries assert len(ranges) == 37 @@ -340,8 +314,7 @@ class LocaleDataWriter (LocaleSourceEditor): locale.currencyRounding, # unused (QTBUG-81343) locale.firstDayOfWeek, locale.weekendStart, locale.weekendEnd, locale.groupTop, locale.groupHigher, locale.groupLeast) )) - + ', // {}/{}/{}\n'.format( - locale.language, locale.script, locale.country)) + + f', // {locale.language}/{locale.script}/{locale.territory}\n') self.writer.write(formatLine(*( # All zeros, matching the format: (0,) * 3 + (0,) * 37 * 2 + (currencyIsoCodeData(0),) @@ -359,32 +332,35 @@ class LocaleDataWriter (LocaleSourceEditor): @staticmethod def __writeNameData(out, book, form): - out('static const char {}_name_list[] =\n'.format(form)) + out(f'static constexpr char {form}_name_list[] =\n') out('"Default\\0"\n') for key, value in book.items(): if key == 0: continue - out('"' + value[0] + '\\0"\n') + enum, name = value[0], value[-1] + if names_clash(name, enum): + out(f'"{name}\\0" // {enum}\n') + else: + out(f'"{name}\\0"\n') # Automagically utf-8 encoded out(';\n\n') - out('static const quint16 {}_name_index[] = {{\n'.format(form)) - out(' 0, // Any{}\n'.format(form.capitalize())) + out(f'static constexpr quint16 {form}_name_index[] = {{\n') + out(f' 0, // Any{form.capitalize()}\n') index = 8 for key, value in book.items(): if key == 0: continue - name = value[0] - out('{:6d}, // {}\n'.format(index, name)) - index += len(name) + 1 + out(f'{index:6d}, // {value[0]}\n') + index += len(value[-1].encode('utf-8')) + 1 out('};\n\n') @staticmethod def __writeCodeList(out, book, form, width): - out('static const unsigned char {}_code_list[] =\n'.format(form)) + out(f'static constexpr unsigned char {form}_code_list[] =\n') for key, value in book.items(): code = value[1] code += r'\0' * max(width - len(code), 0) - out('"{}" // {}\n'.format(code, value[0])) + out(f'"{code}" // {value[0]}\n') out(';\n\n') def languageNames(self, languages): @@ -393,20 +369,44 @@ class LocaleDataWriter (LocaleSourceEditor): def scriptNames(self, scripts): self.__writeNameData(self.writer.write, scripts, 'script') - def countryNames(self, countries): - self.__writeNameData(self.writer.write, countries, 'territory') + def territoryNames(self, territories): + self.__writeNameData(self.writer.write, territories, 'territory') # TODO: unify these next three into the previous three; kept # separate for now to verify we're not changing data. - def languageCodes(self, languages): - self.__writeCodeList(self.writer.write, languages, 'language', 3) + def languageCodes(self, languages, code_data: LanguageCodeData): + out = self.writer.write + + out(f'constexpr std::array<LanguageCodeEntry, {len(languages)}> languageCodeList {{\n') + + def q(val: Optional[str], size: int) -> str: + """Quote the value and adjust the result for tabular view.""" + s = '' if val is None else ', '.join(f"'{c}'" for c in val) + return f'{{{s}}}' if size == 0 else f'{{{s}}},'.ljust(size * 5 + 2) + + for key, value in languages.items(): + code = value[1] + if key < 2: + result = code_data.query('und') + else: + result = code_data.query(code) + assert code == result.id() + assert result is not None + + codeString = q(result.part1Code, 2) + codeString += q(result.part2BCode, 3) + codeString += q(result.part2TCode, 3) + codeString += q(result.part3Code, 0) + out(f' LanguageCodeEntry {{{codeString}}}, // {value[0]}\n') + + out('};\n\n') def scriptCodes(self, scripts): self.__writeCodeList(self.writer.write, scripts, 'script', 4) - def countryCodes(self, countries): # TODO: unify with countryNames() - self.__writeCodeList(self.writer.write, countries, 'territory', 3) + def territoryCodes(self, territories): # TODO: unify with territoryNames() + self.__writeCodeList(self.writer.write, territories, 'territory', 3) class CalendarDataWriter (LocaleSourceEditor): formatCalendar = ( @@ -416,7 +416,7 @@ class CalendarDataWriter (LocaleSourceEditor): def write(self, calendar, locales, names): months_data = StringData('months_data') - self.writer.write('static const QCalendarLocale locale_data[] = {\n') + self.writer.write('static constexpr QCalendarLocale locale_data[] = {\n') self.writer.write( ' //' # IDs, width 7 (6 + comma) @@ -444,7 +444,7 @@ class CalendarDataWriter (LocaleSourceEditor): (locale.standaloneShortMonths, locale.shortMonths, locale.standaloneNarrowMonths, locale.narrowMonths))) except ValueError as e: - e.args += (locale.language, locale.script, locale.country, stem) + e.args += (locale.language, locale.script, locale.territory) raise self.writer.write( @@ -452,25 +452,41 @@ class CalendarDataWriter (LocaleSourceEditor): key + tuple(r.index for r in ranges) + tuple(r.length for r in ranges) )) - + '// {}/{}/{}\n'.format(locale.language, locale.script, locale.country)) + + f'// {locale.language}/{locale.script}/{locale.territory}\n') self.writer.write(self.formatCalendar(*( (0,) * (3 + 6 * 2) )) + '// trailing zeros\n') self.writer.write('};\n') months_data.write(self.writer) + +class TestLocaleWriter (LocaleSourceEditor): + def localeList(self, locales): + self.writer.write('const LocaleListItem g_locale_list[] = {\n') + from enumdata import language_map, territory_map + # TODO: update testlocales/ to include script. + # For now, only mention each (lang, land) pair once: + pairs = set((lang, land) for lang, script, land in locales) + for lang, script, land in locales: + if (lang, land) in pairs: + pairs.discard((lang, land)) + langName = language_map[lang][0] + landName = territory_map[land][0] + self.writer.write(f' {{ {lang:6d},{land:6d} }}, // {langName}/{landName}\n') + self.writer.write('};\n\n') + + class LocaleHeaderWriter (SourceFileEditor): - __upinit = SourceFileEditor.__init__ - def __init__(self, path, temp, dupes): - self.__upinit(path, temp) - self.__dupes = dupes + def __init__(self, path, temp, enumify): + super().__init__(path, temp) + self.__enumify = enumify def languages(self, languages): self.__enum('Language', languages, self.__language) self.writer.write('\n') - def countries(self, countries): + def territories(self, territories): self.writer.write(" // ### Qt 7: Rename to Territory\n") - self.__enum('Country', countries, self.__country, 'Territory') + self.__enum('Country', territories, self.__territory, 'Territory') def scripts(self, scripts): self.__enum('Script', scripts, self.__script) @@ -478,7 +494,7 @@ class LocaleHeaderWriter (SourceFileEditor): # Implementation details from enumdata import (language_aliases as __language, - country_aliases as __country, + territory_aliases as __territory, script_aliases as __script) def __enum(self, name, book, alias, suffix = None): @@ -487,153 +503,135 @@ class LocaleHeaderWriter (SourceFileEditor): if suffix is None: suffix = name - out, dupes = self.writer.write, self.__dupes - out(' enum {} : ushort {{\n'.format(name)) + out, enumify = self.writer.write, self.__enumify + out(f' enum {name} : ushort {{\n') for key, value in book.items(): - member = value[0].replace('-', ' ') - if name == 'Script': - # Don't .capitalize() as some names are already camel-case (see enumdata.py): - member = ''.join(word[0].upper() + word[1:] for word in member.split()) - if not member.endswith('Script'): - member += 'Script' - if member in dupes: - raise Error('The script name "{}" is messy'.format(member)) - else: - member = ''.join(member.split()) - member = member + suffix if member in dupes else member - out(' {} = {},\n'.format(member, key)) + member = enumify(value[0], suffix) + out(f' {member} = {key},\n') out('\n ' - + ',\n '.join('{} = {}'.format(*pair) - for pair in sorted(alias.items())) - + ',\n\n Last{} = {}'.format(suffix, member)) + + ',\n '.join(f'{k} = {v}' for k, v in sorted(alias.items())) + + f',\n\n Last{suffix} = {member}') # for "LastCountry = LastTerritory" # ### Qt 7: Remove if suffix != name: - out(',\n Last{} = Last{}'.format(name, suffix)) + out(f',\n Last{name} = Last{suffix}') out('\n };\n') -def usage(name, err, message = ''): - err.write("""Usage: {} path/to/qlocale.xml root/of/qtbase -""".format(name)) # TODO: elaborate - if message: - err.write('\n' + message + '\n') - -def main(args, out, err): - # TODO: Make calendars a command-line parameter - # map { CLDR name: Qt file name } - calendars = {'gregorian': 'roman', 'persian': 'jalali', 'islamic': 'hijri',} # 'hebrew': 'hebrew', - - name = args.pop(0) - if len(args) != 2: - usage(name, err, 'I expect two arguments') - return 1 - qlocalexml = args.pop(0) - qtsrcdir = args.pop(0) - - if not (os.path.isdir(qtsrcdir) - and all(os.path.isfile(os.path.join(qtsrcdir, 'src', 'corelib', 'text', leaf)) +def main(argv, out, err): + """Updates QLocale's CLDR data from a QLocaleXML file. + + Takes sys.argv, sys.stdout, sys.stderr (or equivalents) as + arguments. In argv[1:] it expects the QLocaleXML file as first + parameter and the ISO 639-3 data table as second + parameter. Accepts the root of the qtbase checkout as third + parameter (default is inferred from this script's path) and a + --calendars option to select which calendars to support (all + available by default). + + Updates various src/corelib/t*/q*_data_p.h files within the qtbase + checkout to contain data extracted from the QLocaleXML file.""" + calendars_map = { + # CLDR name: Qt file name fragment + 'gregorian': 'roman', + 'persian': 'jalali', + 'islamic': 'hijri', + } + all_calendars = list(calendars_map.keys()) + + parser = argparse.ArgumentParser( + prog=Path(argv[0]).name, + description='Generate C++ code from CLDR data in QLocaleXML form.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('input_file', help='input XML file name', + metavar='input-file.xml') + parser.add_argument('iso_path', help='path to the ISO 639-3 data file', + metavar='iso-639-3.tab') + parser.add_argument('qtbase_path', help='path to the root of the qtbase source tree', + nargs='?', default=qtbase_root) + parser.add_argument('--calendars', help='select calendars to emit data for', + nargs='+', metavar='CALENDAR', + choices=all_calendars, default=all_calendars) + args = parser.parse_args(argv[1:]) + + qlocalexml = args.input_file + qtsrcdir = Path(args.qtbase_path) + calendars = {cal: calendars_map[cal] for cal in args.calendars} + + if not (qtsrcdir.is_dir() + and all(qtsrcdir.joinpath('src/corelib/text', leaf).is_file() for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))): - usage(name, err, 'Missing expected files under qtbase source root ' + qtsrcdir) - return 1 + parser.error(f'Missing expected files under qtbase source root {qtsrcdir}') reader = QLocaleXmlReader(qlocalexml) locale_map = dict(reader.loadLocaleMap(calendars, err.write)) + locale_keys = sorted(locale_map.keys(), key=LocaleKeySorter(reader.defaultMap())) - locale_keys = locale_map.keys() - compareLocaleKeys.default_map = dict(reader.defaultMap()) - locale_keys.sort(compareLocaleKeys) - - try: - writer = LocaleDataWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'text', - 'qlocale_data_p.h'), - qtsrcdir, reader.cldrVersion) - except IOError as e: - err.write('Failed to open files to transcribe locale data: ' + (e.message or e.args[1])) - return 1 + code_data = LanguageCodeData(args.iso_path) try: - writer.likelySubtags(reader.likelyMap()) - writer.localeIndex(reader.languageIndices(tuple(k[0] for k in locale_map))) - writer.localeData(locale_map, locale_keys) - writer.writer.write('\n') - writer.languageNames(reader.languages) - writer.scriptNames(reader.scripts) - writer.countryNames(reader.countries) - # TODO: merge the next three into the previous three - writer.languageCodes(reader.languages) - writer.scriptCodes(reader.scripts) - writer.countryCodes(reader.countries) - except Error as e: - writer.cleanup() - err.write('\nError updating locale data: ' + e.message + '\n') + with LocaleDataWriter(qtsrcdir.joinpath('src/corelib/text/qlocale_data_p.h'), + qtsrcdir, reader.cldrVersion) as writer: + writer.likelySubtags(reader.likelyMap()) + writer.localeIndex(reader.languageIndices(tuple(k[0] for k in locale_map))) + writer.localeData(locale_map, locale_keys) + writer.writer.write('\n') + writer.languageNames(reader.languages) + writer.scriptNames(reader.scripts) + writer.territoryNames(reader.territories) + # TODO: merge the next three into the previous three + writer.languageCodes(reader.languages, code_data) + writer.scriptCodes(reader.scripts) + writer.territoryCodes(reader.territories) + except Exception as e: + err.write(f'\nError updating locale data: {e}\n') return 1 - writer.close() - # Generate calendar data for calendar, stem in calendars.items(): try: - writer = CalendarDataWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'time', - 'q{}calendar_data_p.h'.format(stem)), - qtsrcdir, reader.cldrVersion) - except IOError as e: - err.write('Failed to open files to transcribe ' + calendar - + ' data ' + (e.message or e.args[1])) - return 1 - - try: - writer.write(calendar, locale_map, locale_keys) - except Error as e: - writer.cleanup() - err.write('\nError updating ' + calendar + ' locale data: ' + e.message + '\n') - return 1 - - writer.close() + with CalendarDataWriter( + qtsrcdir.joinpath(f'src/corelib/time/q{stem}calendar_data_p.h'), + qtsrcdir, reader.cldrVersion) as writer: + writer.write(calendar, locale_map, locale_keys) + except Exception as e: + err.write(f'\nError updating {calendar} locale data: {e}\n') # qlocale.h try: - writer = LocaleHeaderWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.h'), - qtsrcdir, reader.dupes) - except IOError as e: - err.write('Failed to open files to transcribe qlocale.h: ' + (e.message or e.args[1])) - return 1 - - try: - writer.languages(reader.languages) - writer.scripts(reader.scripts) - writer.countries(reader.countries) - except Error as e: - writer.cleanup() - err.write('\nError updating qlocale.h: ' + e.message + '\n') - return 1 - - writer.close() + with LocaleHeaderWriter(qtsrcdir.joinpath('src/corelib/text/qlocale.h'), + qtsrcdir, reader.enumify) as writer: + writer.languages(reader.languages) + writer.scripts(reader.scripts) + writer.territories(reader.territories) + except Exception as e: + err.write(f'\nError updating qlocale.h: {e}\n') # qlocale.qdoc try: - writer = Transcriber(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.qdoc'), - qtsrcdir) - except IOError as e: - err.write('Failed to open files to transcribe qlocale.qdoc: ' + (e.message or e.args[1])) + with Transcriber(qtsrcdir.joinpath('src/corelib/text/qlocale.qdoc'), qtsrcdir) as qdoc: + DOCSTRING = " QLocale's data is based on Common Locale Data Repository " + for line in qdoc.reader: + if DOCSTRING in line: + qdoc.writer.write(f'{DOCSTRING}v{reader.cldrVersion}.\n') + else: + qdoc.writer.write(line) + except Exception as e: + err.write(f'\nError updating qlocale.h: {e}\n') return 1 - DOCSTRING = " QLocale's data is based on Common Locale Data Repository " + # ./testlocales/localemodel.cpp try: - for line in writer.reader: - if DOCSTRING in line: - writer.writer.write(DOCSTRING + 'v' + reader.cldrVersion + '.\n') - else: - writer.writer.write(line) - except Error as e: - writer.cleanup() - err.write('\nError updating qlocale.qdoc: ' + e.message + '\n') - return 1 + path = 'util/locale_database/testlocales/localemodel.cpp' + with TestLocaleWriter(qtsrcdir.joinpath(path), qtsrcdir, + reader.cldrVersion) as test: + test.localeList(locale_keys) + except Exception as e: + err.write(f'\nError updating localemodel.cpp: {e}\n') - writer.close() return 0 if __name__ == "__main__": diff --git a/util/locale_database/testlocales/localemodel.cpp b/util/locale_database/testlocales/localemodel.cpp index d171bc9855..7f0150c7e0 100644 --- a/util/locale_database/testlocales/localemodel.cpp +++ b/util/locale_database/testlocales/localemodel.cpp @@ -1,30 +1,5 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the utils of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:GPL-EXCEPT$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 3 as published by the Free Software -** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2016 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 #include "localemodel.h" #include <QLocale> @@ -39,236 +14,658 @@ struct LocaleListItem int territory; }; +// GENERATED PART STARTS HERE + +/* + This part of the file was generated on 2024-04-22 from the + Common Locale Data Repository v44.1 + + http://www.unicode.org/cldr/ + + Do not edit this section: instead regenerate it using + cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or + edited) CLDR data; see qtbase/util/locale_database/. +*/ + const LocaleListItem g_locale_list[] = { { 1, 0 }, // C/AnyTerritory - { 3, 69 }, // Afan/Ethiopia - { 3, 111 }, // Afan/Kenya - { 4, 59 }, // Afar/Djibouti - { 4, 67 }, // Afar/Eritrea - { 4, 69 }, // Afar/Ethiopia - { 5, 195 }, // Afrikaans/SouthAfrica - { 5, 148 }, // Afrikaans/Namibia - { 6, 2 }, // Albanian/Albania - { 7, 69 }, // Amharic/Ethiopia - { 8, 186 }, // Arabic/SaudiArabia - { 8, 3 }, // Arabic/Algeria - { 8, 17 }, // Arabic/Bahrain - { 8, 64 }, // Arabic/Egypt - { 8, 103 }, // Arabic/Iraq - { 8, 109 }, // Arabic/Jordan - { 8, 115 }, // Arabic/Kuwait - { 8, 119 }, // Arabic/Lebanon - { 8, 122 }, // Arabic/LibyanArabJamahiriya - { 8, 145 }, // Arabic/Morocco - { 8, 162 }, // Arabic/Oman - { 8, 175 }, // Arabic/Qatar - { 8, 201 }, // Arabic/Sudan - { 8, 207 }, // Arabic/SyrianArabRepublic - { 8, 216 }, // Arabic/Tunisia - { 8, 223 }, // Arabic/UnitedArabEmirates - { 8, 237 }, // Arabic/Yemen - { 9, 11 }, // Armenian/Armenia - { 10, 100 }, // Assamese/India - { 12, 15 }, // Azerbaijani/Azerbaijan - { 14, 197 }, // Basque/Spain - { 15, 18 }, // Bengali/Bangladesh - { 15, 100 }, // Bengali/India - { 16, 25 }, // Bhutani/Bhutan - { 20, 33 }, // Bulgarian/Bulgaria - { 22, 20 }, // Byelorussian/Belarus - { 23, 36 }, // Cambodian/Cambodia - { 24, 197 }, // Catalan/Spain - { 25, 44 }, // Chinese/China - { 25, 97 }, // Chinese/HongKong - { 25, 126 }, // Chinese/Macau - { 25, 190 }, // Chinese/Singapore - { 25, 208 }, // Chinese/Taiwan - { 27, 54 }, // Croatian/Croatia - { 28, 57 }, // Czech/CzechRepublic - { 29, 58 }, // Danish/Denmark - { 30, 151 }, // Dutch/Netherlands - { 30, 21 }, // Dutch/Belgium - { 31, 225 }, // English/UnitedStates - { 31, 4 }, // English/AmericanSamoa - { 31, 13 }, // English/Australia - { 31, 21 }, // English/Belgium - { 31, 22 }, // English/Belize - { 31, 28 }, // English/Botswana - { 31, 38 }, // English/Canada - { 31, 89 }, // English/Guam - { 31, 97 }, // English/HongKong - { 31, 100 }, // English/India - { 31, 104 }, // English/Ireland - { 31, 107 }, // English/Jamaica - { 31, 133 }, // English/Malta - { 31, 134 }, // English/MarshallIslands - { 31, 148 }, // English/Namibia - { 31, 154 }, // English/NewZealand - { 31, 160 }, // English/NorthernMarianaIslands - { 31, 163 }, // English/Pakistan - { 31, 170 }, // English/Philippines - { 31, 190 }, // English/Singapore - { 31, 195 }, // English/SouthAfrica - { 31, 215 }, // English/TrinidadAndTobago - { 31, 224 }, // English/UnitedKingdom - { 31, 226 }, // English/UnitedStatesMinorOutlyingIslands - { 31, 234 }, // English/USVirginIslands - { 31, 240 }, // English/Zimbabwe - { 33, 68 }, // Estonian/Estonia - { 34, 71 }, // Faroese/FaroeIslands - { 36, 73 }, // Finnish/Finland - { 37, 74 }, // French/France - { 37, 21 }, // French/Belgium - { 37, 38 }, // French/Canada - { 37, 125 }, // French/Luxembourg - { 37, 142 }, // French/Monaco - { 37, 206 }, // French/Switzerland - { 40, 197 }, // Galician/Spain - { 41, 81 }, // Georgian/Georgia - { 42, 82 }, // German/Germany - { 42, 14 }, // German/Austria - { 42, 21 }, // German/Belgium - { 42, 123 }, // German/Liechtenstein - { 42, 125 }, // German/Luxembourg - { 42, 206 }, // German/Switzerland - { 43, 85 }, // Greek/Greece - { 43, 56 }, // Greek/Cyprus - { 44, 86 }, // Greenlandic/Greenland - { 46, 100 }, // Gujarati/India - { 47, 83 }, // Hausa/Ghana - { 47, 156 }, // Hausa/Niger - { 47, 157 }, // Hausa/Nigeria - { 48, 105 }, // Hebrew/Israel - { 49, 100 }, // Hindi/India - { 50, 98 }, // Hungarian/Hungary - { 51, 99 }, // Icelandic/Iceland - { 52, 101 }, // Indonesian/Indonesia - { 57, 104 }, // Irish/Ireland - { 58, 106 }, // Italian/Italy - { 58, 206 }, // Italian/Switzerland - { 59, 108 }, // Japanese/Japan - { 61, 100 }, // Kannada/India - { 63, 110 }, // Kazakh/Kazakhstan - { 64, 179 }, // Kinyarwanda/Rwanda - { 65, 116 }, // Kirghiz/Kyrgyzstan - { 66, 114 }, // Korean/RepublicOfKorea - { 67, 102 }, // Kurdish/Iran - { 67, 103 }, // Kurdish/Iraq - { 67, 207 }, // Kurdish/SyrianArabRepublic - { 67, 217 }, // Kurdish/Turkey - { 69, 117 }, // Laothian/Lao - { 71, 118 }, // Latvian/Latvia - { 72, 49 }, // Lingala/DemocraticRepublicOfCongo - { 72, 50 }, // Lingala/PeoplesRepublicOfCongo - { 73, 124 }, // Lithuanian/Lithuania - { 74, 127 }, // Macedonian/Macedonia - { 76, 130 }, // Malay/Malaysia - { 76, 32 }, // Malay/BruneiDarussalam - { 77, 100 }, // Malayalam/India - { 78, 133 }, // Maltese/Malta - { 80, 100 }, // Marathi/India - { 82, 143 }, // Mongolian/Mongolia - { 84, 150 }, // Nepali/Nepal - { 85, 161 }, // Norwegian/Norway - { 87, 100 }, // Oriya/India - { 88, 1 }, // Pashto/Afghanistan - { 89, 102 }, // Persian/Iran - { 89, 1 }, // Persian/Afghanistan - { 90, 172 }, // Polish/Poland - { 91, 173 }, // Portuguese/Portugal - { 91, 30 }, // Portuguese/Brazil - { 92, 100 }, // Punjabi/India - { 92, 163 }, // Punjabi/Pakistan - { 95, 177 }, // Romanian/Romania - { 96, 178 }, // Russian/RussianFederation - { 96, 222 }, // Russian/Ukraine - { 99, 100 }, // Sanskrit/India - { 100, 241 }, // Serbian/SerbiaAndMontenegro - { 100, 27 }, // Serbian/BosniaAndHerzegowina - { 100, 238 }, // Serbian/Yugoslavia - { 101, 241 }, // SerboCroatian/SerbiaAndMontenegro - { 101, 27 }, // SerboCroatian/BosniaAndHerzegowina - { 101, 238 }, // SerboCroatian/Yugoslavia - { 102, 195 }, // Sesotho/SouthAfrica - { 103, 195 }, // Setswana/SouthAfrica - { 107, 195 }, // Siswati/SouthAfrica - { 108, 191 }, // Slovak/Slovakia - { 109, 192 }, // Slovenian/Slovenia - { 110, 194 }, // Somali/Somalia - { 110, 59 }, // Somali/Djibouti - { 110, 69 }, // Somali/Ethiopia - { 110, 111 }, // Somali/Kenya - { 111, 197 }, // Spanish/Spain - { 111, 10 }, // Spanish/Argentina - { 111, 26 }, // Spanish/Bolivia - { 111, 43 }, // Spanish/Chile - { 111, 47 }, // Spanish/Colombia - { 111, 52 }, // Spanish/CostaRica - { 111, 61 }, // Spanish/DominicanRepublic - { 111, 63 }, // Spanish/Ecuador - { 111, 65 }, // Spanish/ElSalvador - { 111, 90 }, // Spanish/Guatemala - { 111, 96 }, // Spanish/Honduras - { 111, 139 }, // Spanish/Mexico - { 111, 155 }, // Spanish/Nicaragua - { 111, 166 }, // Spanish/Panama - { 111, 168 }, // Spanish/Paraguay - { 111, 169 }, // Spanish/Peru - { 111, 174 }, // Spanish/PuertoRico - { 111, 225 }, // Spanish/UnitedStates - { 111, 227 }, // Spanish/Uruguay - { 111, 231 }, // Spanish/Venezuela - { 113, 111 }, // Swahili/Kenya - { 113, 210 }, // Swahili/Tanzania - { 114, 205 }, // Swedish/Sweden - { 114, 73 }, // Swedish/Finland - { 116, 209 }, // Tajik/Tajikistan - { 117, 100 }, // Tamil/India - { 118, 178 }, // Tatar/RussianFederation - { 119, 100 }, // Telugu/India - { 120, 211 }, // Thai/Thailand - { 122, 67 }, // Tigrinya/Eritrea - { 122, 69 }, // Tigrinya/Ethiopia - { 124, 195 }, // Tsonga/SouthAfrica - { 125, 217 }, // Turkish/Turkey - { 129, 222 }, // Ukrainian/Ukraine - { 130, 100 }, // Urdu/India - { 130, 163 }, // Urdu/Pakistan - { 131, 228 }, // Uzbek/Uzbekistan - { 131, 1 }, // Uzbek/Afghanistan - { 132, 232 }, // Vietnamese/VietNam - { 134, 224 }, // Welsh/UnitedKingdom - { 136, 195 }, // Xhosa/SouthAfrica - { 138, 157 }, // Yoruba/Nigeria - { 140, 195 }, // Zulu/SouthAfrica - { 141, 161 }, // Nynorsk/Norway - { 142, 27 }, // Bosnian/BosniaAndHerzegowina - { 143, 131 }, // Divehi/Maldives - { 144, 224 }, // Manx/UnitedKingdom - { 145, 224 }, // Cornish/UnitedKingdom - { 146, 83 }, // Akan/Ghana - { 147, 100 }, // Konkani/India - { 148, 83 }, // Ga/Ghana - { 149, 157 }, // Igbo/Nigeria - { 150, 111 }, // Kamba/Kenya - { 151, 207 }, // Syriac/SyrianArabRepublic - { 152, 67 }, // Blin/Eritrea - { 153, 67 }, // Geez/Eritrea - { 153, 69 }, // Geez/Ethiopia - { 154, 157 }, // Koro/Nigeria - { 155, 69 }, // Sidamo/Ethiopia - { 156, 157 }, // Atsam/Nigeria - { 157, 67 }, // Tigre/Eritrea - { 158, 157 }, // Jju/Nigeria - { 159, 106 }, // Friulian/Italy - { 160, 195 }, // Venda/SouthAfrica - { 161, 83 }, // Ewe/Ghana - { 161, 212 }, // Ewe/Togo - { 163, 225 }, // Hawaiian/UnitedStates - { 164, 157 }, // Tyap/Nigeria - { 165, 129 }, // Chewa/Malawi + { 2, 90 }, // Abkhazian/Georgia + { 3, 77 }, // Afar/Ethiopia + { 3, 67 }, // Afar/Djibouti + { 3, 74 }, // Afar/Eritrea + { 4, 216 }, // Afrikaans/South Africa + { 4, 162 }, // Afrikaans/Namibia + { 5, 40 }, // Aghem/Cameroon + { 6, 92 }, // Akan/Ghana + { 8, 40 }, // Akoose/Cameroon + { 9, 3 }, // Albanian/Albania + { 9, 126 }, // Albanian/Kosovo + { 9, 140 }, // Albanian/Macedonia + { 11, 77 }, // Amharic/Ethiopia + { 14, 71 }, // Arabic/Egypt + { 14, 4 }, // Arabic/Algeria + { 14, 19 }, // Arabic/Bahrain + { 14, 48 }, // Arabic/Chad + { 14, 55 }, // Arabic/Comoros + { 14, 67 }, // Arabic/Djibouti + { 14, 74 }, // Arabic/Eritrea + { 14, 113 }, // Arabic/Iraq + { 14, 116 }, // Arabic/Israel + { 14, 122 }, // Arabic/Jordan + { 14, 127 }, // Arabic/Kuwait + { 14, 132 }, // Arabic/Lebanon + { 14, 135 }, // Arabic/Libya + { 14, 149 }, // Arabic/Mauritania + { 14, 159 }, // Arabic/Morocco + { 14, 176 }, // Arabic/Oman + { 14, 180 }, // Arabic/Palestinian Territories + { 14, 190 }, // Arabic/Qatar + { 14, 205 }, // Arabic/Saudi Arabia + { 14, 215 }, // Arabic/Somalia + { 14, 219 }, // Arabic/South Sudan + { 14, 222 }, // Arabic/Sudan + { 14, 227 }, // Arabic/Syria + { 14, 238 }, // Arabic/Tunisia + { 14, 245 }, // Arabic/United Arab Emirates + { 14, 257 }, // Arabic/Western Sahara + { 14, 258 }, // Arabic/world + { 14, 259 }, // Arabic/Yemen + { 15, 220 }, // Aragonese/Spain + { 17, 12 }, // Armenian/Armenia + { 18, 110 }, // Assamese/India + { 19, 220 }, // Asturian/Spain + { 20, 230 }, // Asu/Tanzania + { 21, 169 }, // Atsam/Nigeria + { 25, 17 }, // Azerbaijani/Azerbaijan + { 25, 112 }, // Azerbaijani/Iran + { 25, 113 }, // Azerbaijani/Iraq + { 25, 239 }, // Azerbaijani/Turkey + { 26, 40 }, // Bafia/Cameroon + { 28, 145 }, // Bambara/Mali + { 30, 20 }, // Bangla/Bangladesh + { 30, 110 }, // Bangla/India + { 31, 40 }, // Basaa/Cameroon + { 32, 193 }, // Bashkir/Russia + { 33, 220 }, // Basque/Spain + { 35, 22 }, // Belarusian/Belarus + { 36, 260 }, // Bemba/Zambia + { 37, 230 }, // Bena/Tanzania + { 38, 110 }, // Bhojpuri/India + { 40, 74 }, // Blin/Eritrea + { 41, 110 }, // Bodo/India + { 42, 29 }, // Bosnian/Bosnia and Herzegovina + { 43, 84 }, // Breton/France + { 45, 36 }, // Bulgarian/Bulgaria + { 46, 161 }, // Burmese/Myanmar + { 47, 107 }, // Cantonese/Hong Kong + { 47, 50 }, // Cantonese/China + { 48, 220 }, // Catalan/Spain + { 48, 6 }, // Catalan/Andorra + { 48, 84 }, // Catalan/France + { 48, 117 }, // Catalan/Italy + { 49, 185 }, // Cebuano/Philippines + { 50, 159 }, // Central Atlas Tamazight/Morocco + { 51, 113 }, // Central Kurdish/Iraq + { 51, 112 }, // Central Kurdish/Iran + { 52, 20 }, // Chakma/Bangladesh + { 52, 110 }, // Chakma/India + { 54, 193 }, // Chechen/Russia + { 55, 248 }, // Cherokee/United States + { 56, 248 }, // Chickasaw/United States + { 57, 243 }, // Chiga/Uganda + { 58, 50 }, // Chinese/China + { 58, 107 }, // Chinese/Hong Kong + { 58, 139 }, // Chinese/Macao + { 58, 210 }, // Chinese/Singapore + { 58, 228 }, // Chinese/Taiwan + { 59, 193 }, // Church/Russia + { 60, 193 }, // Chuvash/Russia + { 61, 91 }, // Colognian/Germany + { 63, 246 }, // Cornish/United Kingdom + { 64, 84 }, // Corsican/France + { 66, 60 }, // Croatian/Croatia + { 66, 29 }, // Croatian/Bosnia and Herzegovina + { 67, 64 }, // Czech/Czechia + { 68, 65 }, // Danish/Denmark + { 68, 95 }, // Danish/Greenland + { 69, 144 }, // Divehi/Maldives + { 70, 110 }, // Dogri/India + { 71, 40 }, // Duala/Cameroon + { 72, 165 }, // Dutch/Netherlands + { 72, 13 }, // Dutch/Aruba + { 72, 23 }, // Dutch/Belgium + { 72, 44 }, // Dutch/Caribbean Netherlands + { 72, 62 }, // Dutch/Curacao + { 72, 211 }, // Dutch/Sint Maarten + { 72, 223 }, // Dutch/Suriname + { 73, 27 }, // Dzongkha/Bhutan + { 74, 124 }, // Embu/Kenya + { 75, 248 }, // English/United States + { 75, 5 }, // English/American Samoa + { 75, 8 }, // English/Anguilla + { 75, 10 }, // English/Antigua and Barbuda + { 75, 15 }, // English/Australia + { 75, 16 }, // English/Austria + { 75, 18 }, // English/Bahamas + { 75, 21 }, // English/Barbados + { 75, 23 }, // English/Belgium + { 75, 24 }, // English/Belize + { 75, 26 }, // English/Bermuda + { 75, 30 }, // English/Botswana + { 75, 33 }, // English/British Indian Ocean Territory + { 75, 34 }, // English/British Virgin Islands + { 75, 38 }, // English/Burundi + { 75, 40 }, // English/Cameroon + { 75, 41 }, // English/Canada + { 75, 45 }, // English/Cayman Islands + { 75, 51 }, // English/Christmas Island + { 75, 53 }, // English/Cocos Islands + { 75, 58 }, // English/Cook Islands + { 75, 63 }, // English/Cyprus + { 75, 65 }, // English/Denmark + { 75, 66 }, // English/Diego Garcia + { 75, 68 }, // English/Dominica + { 75, 74 }, // English/Eritrea + { 75, 76 }, // English/Eswatini + { 75, 78 }, // English/Europe + { 75, 80 }, // English/Falkland Islands + { 75, 82 }, // English/Fiji + { 75, 83 }, // English/Finland + { 75, 89 }, // English/Gambia + { 75, 91 }, // English/Germany + { 75, 92 }, // English/Ghana + { 75, 93 }, // English/Gibraltar + { 75, 96 }, // English/Grenada + { 75, 98 }, // English/Guam + { 75, 100 }, // English/Guernsey + { 75, 103 }, // English/Guyana + { 75, 107 }, // English/Hong Kong + { 75, 110 }, // English/India + { 75, 111 }, // English/Indonesia + { 75, 114 }, // English/Ireland + { 75, 115 }, // English/Isle of Man + { 75, 116 }, // English/Israel + { 75, 119 }, // English/Jamaica + { 75, 121 }, // English/Jersey + { 75, 124 }, // English/Kenya + { 75, 125 }, // English/Kiribati + { 75, 133 }, // English/Lesotho + { 75, 134 }, // English/Liberia + { 75, 139 }, // English/Macao + { 75, 141 }, // English/Madagascar + { 75, 142 }, // English/Malawi + { 75, 143 }, // English/Malaysia + { 75, 144 }, // English/Maldives + { 75, 146 }, // English/Malta + { 75, 147 }, // English/Marshall Islands + { 75, 150 }, // English/Mauritius + { 75, 153 }, // English/Micronesia + { 75, 158 }, // English/Montserrat + { 75, 162 }, // English/Namibia + { 75, 163 }, // English/Nauru + { 75, 165 }, // English/Netherlands + { 75, 167 }, // English/New Zealand + { 75, 169 }, // English/Nigeria + { 75, 171 }, // English/Niue + { 75, 172 }, // English/Norfolk Island + { 75, 173 }, // English/Northern Mariana Islands + { 75, 178 }, // English/Pakistan + { 75, 179 }, // English/Palau + { 75, 182 }, // English/Papua New Guinea + { 75, 185 }, // English/Philippines + { 75, 186 }, // English/Pitcairn + { 75, 189 }, // English/Puerto Rico + { 75, 194 }, // English/Rwanda + { 75, 196 }, // English/Saint Helena + { 75, 197 }, // English/Saint Kitts and Nevis + { 75, 198 }, // English/Saint Lucia + { 75, 201 }, // English/Saint Vincent and Grenadines + { 75, 202 }, // English/Samoa + { 75, 208 }, // English/Seychelles + { 75, 209 }, // English/Sierra Leone + { 75, 210 }, // English/Singapore + { 75, 211 }, // English/Sint Maarten + { 75, 213 }, // English/Slovenia + { 75, 214 }, // English/Solomon Islands + { 75, 216 }, // English/South Africa + { 75, 219 }, // English/South Sudan + { 75, 222 }, // English/Sudan + { 75, 225 }, // English/Sweden + { 75, 226 }, // English/Switzerland + { 75, 230 }, // English/Tanzania + { 75, 234 }, // English/Tokelau + { 75, 235 }, // English/Tonga + { 75, 236 }, // English/Trinidad and Tobago + { 75, 241 }, // English/Turks and Caicos Islands + { 75, 242 }, // English/Tuvalu + { 75, 243 }, // English/Uganda + { 75, 245 }, // English/United Arab Emirates + { 75, 246 }, // English/United Kingdom + { 75, 247 }, // English/United States Outlying Islands + { 75, 249 }, // English/United States Virgin Islands + { 75, 252 }, // English/Vanuatu + { 75, 258 }, // English/world + { 75, 260 }, // English/Zambia + { 75, 261 }, // English/Zimbabwe + { 76, 193 }, // Erzya/Russia + { 77, 258 }, // Esperanto/world + { 78, 75 }, // Estonian/Estonia + { 79, 92 }, // Ewe/Ghana + { 79, 233 }, // Ewe/Togo + { 80, 40 }, // Ewondo/Cameroon + { 81, 81 }, // Faroese/Faroe Islands + { 81, 65 }, // Faroese/Denmark + { 83, 185 }, // Filipino/Philippines + { 84, 83 }, // Finnish/Finland + { 85, 84 }, // French/France + { 85, 4 }, // French/Algeria + { 85, 23 }, // French/Belgium + { 85, 25 }, // French/Benin + { 85, 37 }, // French/Burkina Faso + { 85, 38 }, // French/Burundi + { 85, 40 }, // French/Cameroon + { 85, 41 }, // French/Canada + { 85, 46 }, // French/Central African Republic + { 85, 48 }, // French/Chad + { 85, 55 }, // French/Comoros + { 85, 56 }, // French/Congo - Brazzaville + { 85, 57 }, // French/Congo - Kinshasa + { 85, 67 }, // French/Djibouti + { 85, 73 }, // French/Equatorial Guinea + { 85, 85 }, // French/French Guiana + { 85, 86 }, // French/French Polynesia + { 85, 88 }, // French/Gabon + { 85, 97 }, // French/Guadeloupe + { 85, 102 }, // French/Guinea + { 85, 104 }, // French/Haiti + { 85, 118 }, // French/Ivory Coast + { 85, 138 }, // French/Luxembourg + { 85, 141 }, // French/Madagascar + { 85, 145 }, // French/Mali + { 85, 148 }, // French/Martinique + { 85, 149 }, // French/Mauritania + { 85, 150 }, // French/Mauritius + { 85, 151 }, // French/Mayotte + { 85, 155 }, // French/Monaco + { 85, 159 }, // French/Morocco + { 85, 166 }, // French/New Caledonia + { 85, 170 }, // French/Niger + { 85, 191 }, // French/Reunion + { 85, 194 }, // French/Rwanda + { 85, 195 }, // French/Saint Barthelemy + { 85, 199 }, // French/Saint Martin + { 85, 200 }, // French/Saint Pierre and Miquelon + { 85, 206 }, // French/Senegal + { 85, 208 }, // French/Seychelles + { 85, 226 }, // French/Switzerland + { 85, 227 }, // French/Syria + { 85, 233 }, // French/Togo + { 85, 238 }, // French/Tunisia + { 85, 252 }, // French/Vanuatu + { 85, 256 }, // French/Wallis and Futuna + { 86, 117 }, // Friulian/Italy + { 87, 206 }, // Fulah/Senegal + { 87, 37 }, // Fulah/Burkina Faso + { 87, 40 }, // Fulah/Cameroon + { 87, 89 }, // Fulah/Gambia + { 87, 92 }, // Fulah/Ghana + { 87, 101 }, // Fulah/Guinea-Bissau + { 87, 102 }, // Fulah/Guinea + { 87, 134 }, // Fulah/Liberia + { 87, 149 }, // Fulah/Mauritania + { 87, 169 }, // Fulah/Nigeria + { 87, 170 }, // Fulah/Niger + { 87, 209 }, // Fulah/Sierra Leone + { 88, 246 }, // Gaelic/United Kingdom + { 89, 92 }, // Ga/Ghana + { 90, 220 }, // Galician/Spain + { 91, 243 }, // Ganda/Uganda + { 92, 77 }, // Geez/Ethiopia + { 92, 74 }, // Geez/Eritrea + { 93, 90 }, // Georgian/Georgia + { 94, 91 }, // German/Germany + { 94, 16 }, // German/Austria + { 94, 23 }, // German/Belgium + { 94, 117 }, // German/Italy + { 94, 136 }, // German/Liechtenstein + { 94, 138 }, // German/Luxembourg + { 94, 226 }, // German/Switzerland + { 96, 94 }, // Greek/Greece + { 96, 63 }, // Greek/Cyprus + { 97, 183 }, // Guarani/Paraguay + { 98, 110 }, // Gujarati/India + { 99, 124 }, // Gusii/Kenya + { 101, 169 }, // Hausa/Nigeria + { 101, 222 }, // Hausa/Sudan + { 101, 92 }, // Hausa/Ghana + { 101, 170 }, // Hausa/Niger + { 102, 248 }, // Hawaiian/United States + { 103, 116 }, // Hebrew/Israel + { 105, 110 }, // Hindi/India + { 107, 108 }, // Hungarian/Hungary + { 108, 109 }, // Icelandic/Iceland + { 109, 258 }, // Ido/world + { 110, 169 }, // Igbo/Nigeria + { 111, 83 }, // Inari Sami/Finland + { 112, 111 }, // Indonesian/Indonesia + { 114, 258 }, // Interlingua/world + { 115, 75 }, // Interlingue/Estonia + { 116, 41 }, // Inuktitut/Canada + { 118, 114 }, // Irish/Ireland + { 118, 246 }, // Irish/United Kingdom + { 119, 117 }, // Italian/Italy + { 119, 203 }, // Italian/San Marino + { 119, 226 }, // Italian/Switzerland + { 119, 253 }, // Italian/Vatican City + { 120, 120 }, // Japanese/Japan + { 121, 111 }, // Javanese/Indonesia + { 122, 169 }, // Jju/Nigeria + { 123, 206 }, // Jola-Fonyi/Senegal + { 124, 43 }, // Kabuverdianu/Cape Verde + { 125, 4 }, // Kabyle/Algeria + { 126, 40 }, // Kako/Cameroon + { 127, 95 }, // Kalaallisut/Greenland + { 128, 124 }, // Kalenjin/Kenya + { 129, 124 }, // Kamba/Kenya + { 130, 110 }, // Kannada/India + { 132, 110 }, // Kashmiri/India + { 133, 123 }, // Kazakh/Kazakhstan + { 134, 40 }, // Kenyang/Cameroon + { 135, 39 }, // Khmer/Cambodia + { 136, 99 }, // Kiche/Guatemala + { 137, 124 }, // Kikuyu/Kenya + { 138, 194 }, // Kinyarwanda/Rwanda + { 141, 110 }, // Konkani/India + { 142, 218 }, // Korean/South Korea + { 142, 50 }, // Korean/China + { 142, 174 }, // Korean/North Korea + { 144, 145 }, // Koyraboro Senni/Mali + { 145, 145 }, // Koyra Chiini/Mali + { 146, 134 }, // Kpelle/Liberia + { 146, 102 }, // Kpelle/Guinea + { 148, 239 }, // Kurdish/Turkey + { 149, 40 }, // Kwasio/Cameroon + { 150, 128 }, // Kyrgyz/Kyrgyzstan + { 151, 248 }, // Lakota/United States + { 152, 230 }, // Langi/Tanzania + { 153, 129 }, // Lao/Laos + { 154, 253 }, // Latin/Vatican City + { 155, 131 }, // Latvian/Latvia + { 158, 57 }, // Lingala/Congo - Kinshasa + { 158, 7 }, // Lingala/Angola + { 158, 46 }, // Lingala/Central African Republic + { 158, 56 }, // Lingala/Congo - Brazzaville + { 160, 137 }, // Lithuanian/Lithuania + { 161, 258 }, // Lojban/world + { 162, 91 }, // Lower Sorbian/Germany + { 163, 91 }, // Low German/Germany + { 163, 165 }, // Low German/Netherlands + { 164, 57 }, // Luba-Katanga/Congo - Kinshasa + { 165, 225 }, // Lule Sami/Sweden + { 165, 175 }, // Lule Sami/Norway + { 166, 124 }, // Luo/Kenya + { 167, 138 }, // Luxembourgish/Luxembourg + { 168, 124 }, // Luyia/Kenya + { 169, 140 }, // Macedonian/Macedonia + { 170, 230 }, // Machame/Tanzania + { 171, 110 }, // Maithili/India + { 172, 160 }, // Makhuwa-Meetto/Mozambique + { 173, 230 }, // Makonde/Tanzania + { 174, 141 }, // Malagasy/Madagascar + { 175, 110 }, // Malayalam/India + { 176, 143 }, // Malay/Malaysia + { 176, 35 }, // Malay/Brunei + { 176, 111 }, // Malay/Indonesia + { 176, 210 }, // Malay/Singapore + { 177, 146 }, // Maltese/Malta + { 179, 110 }, // Manipuri/India + { 180, 115 }, // Manx/Isle of Man + { 181, 167 }, // Maori/New Zealand + { 182, 49 }, // Mapuche/Chile + { 183, 110 }, // Marathi/India + { 185, 124 }, // Masai/Kenya + { 185, 230 }, // Masai/Tanzania + { 186, 112 }, // Mazanderani/Iran + { 188, 124 }, // Meru/Kenya + { 189, 40 }, // Meta/Cameroon + { 190, 41 }, // Mohawk/Canada + { 191, 156 }, // Mongolian/Mongolia + { 191, 50 }, // Mongolian/China + { 192, 150 }, // Morisyen/Mauritius + { 193, 40 }, // Mundang/Cameroon + { 194, 248 }, // Muscogee/United States + { 195, 162 }, // Nama/Namibia + { 197, 248 }, // Navajo/United States + { 199, 164 }, // Nepali/Nepal + { 199, 110 }, // Nepali/India + { 201, 40 }, // Ngiemboon/Cameroon + { 202, 40 }, // Ngomba/Cameroon + { 203, 169 }, // Nigerian Pidgin/Nigeria + { 204, 102 }, // Nko/Guinea + { 205, 112 }, // Northern Luri/Iran + { 205, 113 }, // Northern Luri/Iraq + { 206, 175 }, // Northern Sami/Norway + { 206, 83 }, // Northern Sami/Finland + { 206, 225 }, // Northern Sami/Sweden + { 207, 216 }, // Northern Sotho/South Africa + { 208, 261 }, // North Ndebele/Zimbabwe + { 209, 175 }, // Norwegian Bokmal/Norway + { 209, 224 }, // Norwegian Bokmal/Svalbard and Jan Mayen + { 210, 175 }, // Norwegian Nynorsk/Norway + { 211, 219 }, // Nuer/South Sudan + { 212, 142 }, // Nyanja/Malawi + { 213, 243 }, // Nyankole/Uganda + { 214, 84 }, // Occitan/France + { 214, 220 }, // Occitan/Spain + { 215, 110 }, // Odia/India + { 220, 77 }, // Oromo/Ethiopia + { 220, 124 }, // Oromo/Kenya + { 221, 248 }, // Osage/United States + { 222, 90 }, // Ossetic/Georgia + { 222, 193 }, // Ossetic/Russia + { 226, 62 }, // Papiamento/Curacao + { 226, 13 }, // Papiamento/Aruba + { 227, 1 }, // Pashto/Afghanistan + { 227, 178 }, // Pashto/Pakistan + { 228, 112 }, // Persian/Iran + { 228, 1 }, // Persian/Afghanistan + { 230, 187 }, // Polish/Poland + { 231, 32 }, // Portuguese/Brazil + { 231, 7 }, // Portuguese/Angola + { 231, 43 }, // Portuguese/Cape Verde + { 231, 73 }, // Portuguese/Equatorial Guinea + { 231, 101 }, // Portuguese/Guinea-Bissau + { 231, 138 }, // Portuguese/Luxembourg + { 231, 139 }, // Portuguese/Macao + { 231, 160 }, // Portuguese/Mozambique + { 231, 188 }, // Portuguese/Portugal + { 231, 204 }, // Portuguese/Sao Tome and Principe + { 231, 226 }, // Portuguese/Switzerland + { 231, 232 }, // Portuguese/Timor-Leste + { 232, 187 }, // Prussian/Poland + { 233, 110 }, // Punjabi/India + { 233, 178 }, // Punjabi/Pakistan + { 234, 184 }, // Quechua/Peru + { 234, 28 }, // Quechua/Bolivia + { 234, 70 }, // Quechua/Ecuador + { 235, 192 }, // Romanian/Romania + { 235, 154 }, // Romanian/Moldova + { 236, 226 }, // Romansh/Switzerland + { 237, 230 }, // Rombo/Tanzania + { 238, 38 }, // Rundi/Burundi + { 239, 193 }, // Russian/Russia + { 239, 22 }, // Russian/Belarus + { 239, 123 }, // Russian/Kazakhstan + { 239, 128 }, // Russian/Kyrgyzstan + { 239, 154 }, // Russian/Moldova + { 239, 244 }, // Russian/Ukraine + { 240, 230 }, // Rwa/Tanzania + { 241, 74 }, // Saho/Eritrea + { 242, 193 }, // Sakha/Russia + { 243, 124 }, // Samburu/Kenya + { 245, 46 }, // Sango/Central African Republic + { 246, 230 }, // Sangu/Tanzania + { 247, 110 }, // Sanskrit/India + { 248, 110 }, // Santali/India + { 249, 117 }, // Sardinian/Italy + { 251, 160 }, // Sena/Mozambique + { 252, 207 }, // Serbian/Serbia + { 252, 29 }, // Serbian/Bosnia and Herzegovina + { 252, 126 }, // Serbian/Kosovo + { 252, 157 }, // Serbian/Montenegro + { 253, 230 }, // Shambala/Tanzania + { 254, 261 }, // Shona/Zimbabwe + { 255, 50 }, // Sichuan Yi/China + { 256, 117 }, // Sicilian/Italy + { 257, 77 }, // Sidamo/Ethiopia + { 258, 187 }, // Silesian/Poland + { 259, 178 }, // Sindhi/Pakistan + { 259, 110 }, // Sindhi/India + { 260, 221 }, // Sinhala/Sri Lanka + { 261, 83 }, // Skolt Sami/Finland + { 262, 212 }, // Slovak/Slovakia + { 263, 213 }, // Slovenian/Slovenia + { 264, 243 }, // Soga/Uganda + { 265, 215 }, // Somali/Somalia + { 265, 67 }, // Somali/Djibouti + { 265, 77 }, // Somali/Ethiopia + { 265, 124 }, // Somali/Kenya + { 266, 112 }, // Southern Kurdish/Iran + { 266, 113 }, // Southern Kurdish/Iraq + { 267, 225 }, // Southern Sami/Sweden + { 267, 175 }, // Southern Sami/Norway + { 268, 216 }, // Southern Sotho/South Africa + { 268, 133 }, // Southern Sotho/Lesotho + { 269, 216 }, // South Ndebele/South Africa + { 270, 220 }, // Spanish/Spain + { 270, 11 }, // Spanish/Argentina + { 270, 24 }, // Spanish/Belize + { 270, 28 }, // Spanish/Bolivia + { 270, 32 }, // Spanish/Brazil + { 270, 42 }, // Spanish/Canary Islands + { 270, 47 }, // Spanish/Ceuta and Melilla + { 270, 49 }, // Spanish/Chile + { 270, 54 }, // Spanish/Colombia + { 270, 59 }, // Spanish/Costa Rica + { 270, 61 }, // Spanish/Cuba + { 270, 69 }, // Spanish/Dominican Republic + { 270, 70 }, // Spanish/Ecuador + { 270, 72 }, // Spanish/El Salvador + { 270, 73 }, // Spanish/Equatorial Guinea + { 270, 99 }, // Spanish/Guatemala + { 270, 106 }, // Spanish/Honduras + { 270, 130 }, // Spanish/Latin America + { 270, 152 }, // Spanish/Mexico + { 270, 168 }, // Spanish/Nicaragua + { 270, 181 }, // Spanish/Panama + { 270, 183 }, // Spanish/Paraguay + { 270, 184 }, // Spanish/Peru + { 270, 185 }, // Spanish/Philippines + { 270, 189 }, // Spanish/Puerto Rico + { 270, 248 }, // Spanish/United States + { 270, 250 }, // Spanish/Uruguay + { 270, 254 }, // Spanish/Venezuela + { 271, 159 }, // Standard Moroccan Tamazight/Morocco + { 272, 111 }, // Sundanese/Indonesia + { 273, 230 }, // Swahili/Tanzania + { 273, 57 }, // Swahili/Congo - Kinshasa + { 273, 124 }, // Swahili/Kenya + { 273, 243 }, // Swahili/Uganda + { 274, 216 }, // Swati/South Africa + { 274, 76 }, // Swati/Eswatini + { 275, 225 }, // Swedish/Sweden + { 275, 2 }, // Swedish/Aland Islands + { 275, 83 }, // Swedish/Finland + { 276, 226 }, // Swiss German/Switzerland + { 276, 84 }, // Swiss German/France + { 276, 136 }, // Swiss German/Liechtenstein + { 277, 113 }, // Syriac/Iraq + { 277, 227 }, // Syriac/Syria + { 278, 159 }, // Tachelhit/Morocco + { 280, 255 }, // Tai Dam/Vietnam + { 281, 124 }, // Taita/Kenya + { 282, 229 }, // Tajik/Tajikistan + { 283, 110 }, // Tamil/India + { 283, 143 }, // Tamil/Malaysia + { 283, 210 }, // Tamil/Singapore + { 283, 221 }, // Tamil/Sri Lanka + { 284, 228 }, // Taroko/Taiwan + { 285, 170 }, // Tasawaq/Niger + { 286, 193 }, // Tatar/Russia + { 287, 110 }, // Telugu/India + { 288, 243 }, // Teso/Uganda + { 288, 124 }, // Teso/Kenya + { 289, 231 }, // Thai/Thailand + { 290, 50 }, // Tibetan/China + { 290, 110 }, // Tibetan/India + { 291, 74 }, // Tigre/Eritrea + { 292, 77 }, // Tigrinya/Ethiopia + { 292, 74 }, // Tigrinya/Eritrea + { 294, 182 }, // Tok Pisin/Papua New Guinea + { 295, 235 }, // Tongan/Tonga + { 296, 216 }, // Tsonga/South Africa + { 297, 216 }, // Tswana/South Africa + { 297, 30 }, // Tswana/Botswana + { 298, 239 }, // Turkish/Turkey + { 298, 63 }, // Turkish/Cyprus + { 299, 240 }, // Turkmen/Turkmenistan + { 301, 169 }, // Tyap/Nigeria + { 303, 244 }, // Ukrainian/Ukraine + { 304, 91 }, // Upper Sorbian/Germany + { 305, 178 }, // Urdu/Pakistan + { 305, 110 }, // Urdu/India + { 306, 50 }, // Uyghur/China + { 307, 251 }, // Uzbek/Uzbekistan + { 307, 1 }, // Uzbek/Afghanistan + { 308, 134 }, // Vai/Liberia + { 309, 216 }, // Venda/South Africa + { 310, 255 }, // Vietnamese/Vietnam + { 311, 258 }, // Volapuk/world + { 312, 230 }, // Vunjo/Tanzania + { 313, 23 }, // Walloon/Belgium + { 314, 226 }, // Walser/Switzerland + { 315, 15 }, // Warlpiri/Australia + { 316, 246 }, // Welsh/United Kingdom + { 317, 178 }, // Western Balochi/Pakistan + { 317, 1 }, // Western Balochi/Afghanistan + { 317, 112 }, // Western Balochi/Iran + { 317, 176 }, // Western Balochi/Oman + { 317, 245 }, // Western Balochi/United Arab Emirates + { 318, 165 }, // Western Frisian/Netherlands + { 319, 77 }, // Wolaytta/Ethiopia + { 320, 206 }, // Wolof/Senegal + { 321, 216 }, // Xhosa/South Africa + { 322, 40 }, // Yangben/Cameroon + { 323, 244 }, // Yiddish/Ukraine + { 324, 169 }, // Yoruba/Nigeria + { 324, 25 }, // Yoruba/Benin + { 325, 170 }, // Zarma/Niger + { 326, 50 }, // Zhuang/China + { 327, 216 }, // Zulu/South Africa + { 328, 32 }, // Kaingang/Brazil + { 329, 32 }, // Nheengatu/Brazil + { 329, 54 }, // Nheengatu/Colombia + { 329, 254 }, // Nheengatu/Venezuela + { 330, 110 }, // Haryanvi/India + { 331, 91 }, // Northern Frisian/Germany + { 332, 110 }, // Rajasthani/India + { 333, 193 }, // Moksha/Russia + { 334, 258 }, // Toki Pona/world + { 335, 214 }, // Pijin/Solomon Islands + { 336, 169 }, // Obolo/Nigeria + { 337, 178 }, // Baluchi/Pakistan + { 338, 117 }, // Ligurian/Italy + { 339, 161 }, // Rohingya/Myanmar + { 339, 20 }, // Rohingya/Bangladesh + { 340, 178 }, // Torwali/Pakistan + { 341, 25 }, // Anii/Benin + { 342, 110 }, // Kangri/India + { 343, 117 }, // Venetian/Italy }; -static const int g_locale_list_count = sizeof(g_locale_list)/sizeof(g_locale_list[0]); + +// GENERATED PART ENDS HERE + +static const int g_locale_list_count = std::size(g_locale_list); LocaleModel::LocaleModel(QObject *parent) : QAbstractItemModel(parent) @@ -283,7 +680,7 @@ LocaleModel::LocaleModel(QObject *parent) QVariant LocaleModel::data(const QModelIndex &index, int role) const { if (!index.isValid() - || role != Qt::DisplayRole && role != Qt::EditRole && role != Qt::ToolTipRole + || (role != Qt::DisplayRole && role != Qt::EditRole && role != Qt::ToolTipRole) || index.column() >= g_model_cols || index.row() >= g_locale_list_count + 2) return QVariant(); @@ -424,9 +821,9 @@ int LocaleModel::rowCount(const QModelIndex &parent) const Qt::ItemFlags LocaleModel::flags(const QModelIndex &index) const { if (!index.isValid()) - return 0; + return {}; if (index.row() == 0 && index.column() == g_model_cols - 1) - return 0; + return {}; if (index.row() == 0) return QAbstractItemModel::flags(index) | Qt::ItemIsEditable; return QAbstractItemModel::flags(index); @@ -438,7 +835,7 @@ bool LocaleModel::setData(const QModelIndex &index, const QVariant &value, int r || index.row() != 0 || index.column() >= g_model_cols - 1 || role != Qt::EditRole - || m_data_list.at(index.column()).type() != value.type()) + || m_data_list.at(index.column()).typeId() != value.typeId()) return false; m_data_list[index.column()] = value; diff --git a/util/locale_database/testlocales/localemodel.h b/util/locale_database/testlocales/localemodel.h index f35b984b4c..a0ba45bb15 100644 --- a/util/locale_database/testlocales/localemodel.h +++ b/util/locale_database/testlocales/localemodel.h @@ -1,30 +1,5 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the utils of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:GPL-EXCEPT$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 3 as published by the Free Software -** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2016 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 #ifndef LOCALEMODEL_H #define LOCALEMODEL_H @@ -38,17 +13,17 @@ class LocaleModel : public QAbstractItemModel public: LocaleModel(QObject *parent = nullptr); - virtual int columnCount(const QModelIndex &parent = QModelIndex()) const; - virtual QVariant data(const QModelIndex &index, int role = Qt::DisplayRole) const; - virtual QModelIndex index(int row, int column, - const QModelIndex &parent = QModelIndex()) const; - virtual QModelIndex parent(const QModelIndex &index) const; - virtual int rowCount(const QModelIndex &parent = QModelIndex()) const; - virtual QVariant headerData(int section, Qt::Orientation orientation, - int role = Qt::DisplayRole ) const; - virtual Qt::ItemFlags flags(const QModelIndex &index) const; - virtual bool setData(const QModelIndex &index, const QVariant &value, - int role = Qt::EditRole); + int columnCount(const QModelIndex &parent = QModelIndex()) const override; + QVariant data(const QModelIndex &index, int role = Qt::DisplayRole) const override; + QModelIndex index(int row, int column, + const QModelIndex &parent = QModelIndex()) const override; + QModelIndex parent(const QModelIndex &index) const override; + int rowCount(const QModelIndex &parent = QModelIndex()) const override; + QVariant headerData(int section, Qt::Orientation orientation, + int role = Qt::DisplayRole ) const override; + Qt::ItemFlags flags(const QModelIndex &index) const override; + bool setData(const QModelIndex &index, const QVariant &value, + int role = Qt::EditRole) override; private: QList<QVariant> m_data_list; }; diff --git a/util/locale_database/testlocales/localewidget.cpp b/util/locale_database/testlocales/localewidget.cpp index 3ff7f73a98..df8a3c28ab 100644 --- a/util/locale_database/testlocales/localewidget.cpp +++ b/util/locale_database/testlocales/localewidget.cpp @@ -1,33 +1,8 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the utils of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:GPL-EXCEPT$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 3 as published by the Free Software -** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2016 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 #include <QTableView> #include <QVBoxLayout> -#include <QItemDelegate> +#include <QStyledItemDelegate> #include <QItemEditorFactory> #include <QDoubleSpinBox> @@ -51,26 +26,26 @@ public: class EditorFactory : public QItemEditorFactory { public: - EditorFactory() { - static DoubleEditorCreator double_editor_creator; - registerEditor(QVariant::Double, &double_editor_creator); + EditorFactory() + { + // registerEditor() assumes ownership of the creator. + registerEditor(QVariant::Double, new DoubleEditorCreator); } }; LocaleWidget::LocaleWidget(QWidget *parent) - : QWidget(parent) + : QWidget(parent), + m_model(new LocaleModel(this)), + m_view(new QTableView(this)) { - m_model = new LocaleModel(this); - m_view = new QTableView(this); - - QItemDelegate *delegate = qobject_cast<QItemDelegate*>(m_view->itemDelegate()); + QStyledItemDelegate *delegate = qobject_cast<QStyledItemDelegate*>(m_view->itemDelegate()); Q_ASSERT(delegate != 0); - static EditorFactory editor_factory; - delegate->setItemEditorFactory(&editor_factory); + static EditorFactory editorFactory; + delegate->setItemEditorFactory(&editorFactory); m_view->setModel(m_model); QVBoxLayout *layout = new QVBoxLayout(this); - layout->setMargin(0); + layout->setContentsMargins(0, 0, 0, 0); layout->addWidget(m_view); } diff --git a/util/locale_database/testlocales/localewidget.h b/util/locale_database/testlocales/localewidget.h index c562da119b..36613135ea 100644 --- a/util/locale_database/testlocales/localewidget.h +++ b/util/locale_database/testlocales/localewidget.h @@ -1,30 +1,5 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the utils of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:GPL-EXCEPT$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 3 as published by the Free Software -** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2016 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 #ifndef LOCALEWIDGET_H #define LOCALEWIDGET_H diff --git a/util/locale_database/testlocales/main.cpp b/util/locale_database/testlocales/main.cpp index 0c3c45f989..d94726a2a9 100644 --- a/util/locale_database/testlocales/main.cpp +++ b/util/locale_database/testlocales/main.cpp @@ -1,30 +1,5 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the utils of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:GPL-EXCEPT$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 3 as published by the Free Software -** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2016 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 #include <QApplication> #include "localewidget.h" diff --git a/util/locale_database/testlocales/testlocales.pro b/util/locale_database/testlocales/testlocales.pro index a9a6247f96..126c19589b 100644 --- a/util/locale_database/testlocales/testlocales.pro +++ b/util/locale_database/testlocales/testlocales.pro @@ -1,4 +1,5 @@ TARGET = testlocales CONFIG += debug +QT += widgets SOURCES += localemodel.cpp localewidget.cpp main.cpp -HEADERS += localemodel.h localewidget.h
\ No newline at end of file +HEADERS += localemodel.h localewidget.h |