summaryrefslogtreecommitdiffstats
path: root/util/locale_database
diff options
context:
space:
mode:
Diffstat (limited to 'util/locale_database')
-rw-r--r--util/locale_database/cldr.py301
-rwxr-xr-xutil/locale_database/cldr2qlocalexml.py153
-rwxr-xr-xutil/locale_database/cldr2qtimezone.py530
-rw-r--r--[-rwxr-xr-x]util/locale_database/dateconverter.py302
-rw-r--r--util/locale_database/enumdata.py218
-rw-r--r--util/locale_database/iso639_3.py80
-rw-r--r--util/locale_database/ldml.py215
-rw-r--r--util/locale_database/localetools.py239
-rw-r--r--util/locale_database/qlocalexml.py302
-rw-r--r--util/locale_database/qlocalexml.rnc119
-rwxr-xr-xutil/locale_database/qlocalexml2cpp.py466
-rw-r--r--util/locale_database/testlocales/localemodel.cpp913
-rw-r--r--util/locale_database/testlocales/localemodel.h51
-rw-r--r--util/locale_database/testlocales/localewidget.cpp53
-rw-r--r--util/locale_database/testlocales/localewidget.h29
-rw-r--r--util/locale_database/testlocales/main.cpp29
-rw-r--r--util/locale_database/testlocales/testlocales.pro3
17 files changed, 2308 insertions, 1695 deletions
diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py
index 9b08d8a652..9e0bae9667 100644
--- a/util/locale_database/cldr.py
+++ b/util/locale_database/cldr.py
@@ -1,31 +1,5 @@
-# -*- coding: utf-8; -*-
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
"""Digesting the CLDR's data.
Provides two classes:
@@ -36,15 +10,17 @@ The former should normally be all you need to access.
See individual classes for further detail.
"""
+from typing import Iterable, TextIO
from xml.dom import minidom
from weakref import WeakValueDictionary as CacheDict
-import os
+from pathlib import Path
from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner
+from localetools import names_clash
from qlocalexml import Locale
class CldrReader (object):
- def __init__(self, root, grumble = lambda msg: None, whitter = lambda msg: None):
+ def __init__(self, root: Path, grumble = lambda msg: None, whitter = lambda msg: None):
"""Set up a reader object for reading CLDR data.
Single parameter, root, is the file-system path to the root of
@@ -67,7 +43,7 @@ class CldrReader (object):
Yields pairs (have, give) of 4-tuples; if what you have
matches the left member, giving the right member is probably
sensible. Each 4-tuple's entries are the full names of a
- language, a script, a country (strictly territory) and a
+ language, a script, a territory (usually a country) and a
variant (currently ignored)."""
skips = []
for got, use in self.root.likelySubTags():
@@ -79,7 +55,7 @@ class CldrReader (object):
and e.message.startswith('Unknown ') and ' code ' in e.message):
skips.append(use)
else:
- self.grumble('Skipping likelySubtag "{}" -> "{}" ({})\n'.format(got, use, e.message))
+ self.grumble(f'Skipping likelySubtag "{got}" -> "{use}" ({e})\n')
continue
if all(code.startswith('Any') and code[3].isupper() for code in have[:-1]):
continue
@@ -99,50 +75,49 @@ class CldrReader (object):
pass # self.__wrapped(self.whitter, 'Skipping likelySubtags (for unknown codes): ', skips)
def readLocales(self, calendars = ('gregorian',)):
- locales = tuple(self.__allLocales(calendars))
- return dict(((k.language_id, k.script_id, k.country_id, k.variant_code),
- k) for k in locales)
+ return {(k.language_id, k.script_id, k.territory_id, k.variant_code): k
+ for k in self.__allLocales(calendars)}
def __allLocales(self, calendars):
def skip(locale, reason):
- return 'Skipping defaultContent locale "{}" ({})\n'.format(locale, reason)
+ return f'Skipping defaultContent locale "{locale}" ({reason})\n'
for locale in self.root.defaultContentLocales:
try:
- language, script, country, variant = self.__splitLocale(locale)
+ language, script, territory, variant = self.__splitLocale(locale)
except ValueError:
self.whitter(skip(locale, 'only language tag'))
continue
- if not (script or country):
+ if not (script or territory):
self.grumble(skip(locale, 'second tag is neither script nor territory'))
continue
- if not (language and country):
+ if not (language and territory):
continue
try:
yield self.__getLocaleData(self.root.locale(locale), calendars,
- language, script, country, variant)
+ language, script, territory, variant)
except Error as e:
self.grumble(skip(locale, e.message))
for locale in self.root.fileLocales:
try:
chain = self.root.locale(locale)
- language, script, country, variant = chain.tagCodes()
+ language, script, territory, variant = chain.tagCodes()
assert language
# TODO: this skip should probably be based on likely
- # sub-tags, instead of empty country: if locale has a
+ # sub-tags, instead of empty territory: if locale has a
# likely-subtag expansion, that's what QLocale uses,
# and we'll be saving its data for the expanded locale
# anyway, so don't need to record it for itself.
# See also QLocaleXmlReader.loadLocaleMap's grumble.
- if not country:
+ if not territory:
continue
- yield self.__getLocaleData(chain, calendars, language, script, country, variant)
+ yield self.__getLocaleData(chain, calendars, language, script, territory, variant)
except Error as e:
- self.grumble('Skipping file locale "{}" ({})\n'.format(locale, e.message))
+ self.grumble(f'Skipping file locale "{locale}" ({e})\n')
import textwrap
@staticmethod
@@ -153,13 +128,13 @@ class CldrReader (object):
def __parseTags(self, locale):
tags = self.__splitLocale(locale)
- language = tags.next()
- script = country = variant = ''
+ language = next(tags)
+ script = territory = variant = ''
try:
- script, country, variant = tags
+ script, territory, variant = tags
except ValueError:
pass
- return tuple(p[1] for p in self.root.codesToIdName(language, script, country, variant))
+ return tuple(p[1] for p in self.root.codesToIdName(language, script, territory, variant))
def __splitLocale(self, name):
"""Generate (language, script, territory, variant) from a locale name
@@ -171,14 +146,18 @@ class CldrReader (object):
single tag (i.e. contains no underscores). Always yields 1 or
4 values, never 2 or 3."""
tags = iter(name.split('_'))
- yield tags.next() # Language
- tag = tags.next() # may raise StopIteration
+ yield next(tags) # Language
+
+ try:
+ tag = next(tags)
+ except StopIteration:
+ return
# Script is always four letters, always capitalised:
if len(tag) == 4 and tag[0].isupper() and tag[1:].islower():
yield tag
try:
- tag = tags.next()
+ tag = next(tags)
except StopIteration:
tag = ''
else:
@@ -188,7 +167,7 @@ class CldrReader (object):
if tag and tag.isupper() or tag.isdigit():
yield tag
try:
- tag = tags.next()
+ tag = next(tags)
except StopIteration:
tag = ''
else:
@@ -201,21 +180,22 @@ class CldrReader (object):
else:
yield ''
- # If nothing is left, StopIteration will avoid the warning:
- if not tag:
- tag = tags.next()
- self.grumble('Ignoring unparsed cruft {} in {}\n'.format('_'.join(tag + tuple(tags)), name))
+ rest = [tag] if tag else []
+ rest.extend(tags)
- def __getLocaleData(self, scan, calendars, language, script, country, variant):
- ids, names = zip(*self.root.codesToIdName(language, script, country, variant))
- assert ids[0] > 0 and ids[2] > 0, (language, script, country, variant)
+ if rest:
+ self.grumble(f'Ignoring unparsed cruft {"_".join(rest)} in {name}\n')
+
+ def __getLocaleData(self, scan, calendars, language, script, territory, variant):
+ ids, names = zip(*self.root.codesToIdName(language, script, territory, variant))
+ assert ids[0] > 0 and ids[2] > 0, (language, script, territory, variant)
locale = Locale(
language = names[0], language_code = language, language_id = ids[0],
script = names[1], script_code = script, script_id = ids[1],
- country = names[2], country_code = country, country_id = ids[2],
+ territory = names[2], territory_code = territory, territory_id = ids[2],
variant_code = variant)
- firstDay, weStart, weEnd = self.root.weekData(country)
+ firstDay, weStart, weEnd = self.root.weekData(territory)
assert all(day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun')
for day in (firstDay, weStart, weEnd))
@@ -223,7 +203,7 @@ class CldrReader (object):
weekendStart = weStart,
weekendEnd = weEnd)
- iso, digits, rounding = self.root.currencyData(country)
+ iso, digits, rounding = self.root.currencyData(territory)
locale.update(currencyIsoCode = iso,
currencyDigits = int(digits),
currencyRounding = int(rounding))
@@ -231,7 +211,7 @@ class CldrReader (object):
locale.update(scan.currencyData(iso))
locale.update(scan.numericData(self.root.numberSystem, self.whitter))
locale.update(scan.textPatternData())
- locale.update(scan.endonyms(language, script, country, variant))
+ locale.update(scan.endonyms(language, script, territory, variant))
locale.update(scan.unitData()) # byte, kB, MB, GB, ..., KiB, MiB, GiB, ...
locale.update(scan.calendarNames(calendars)) # Names of days and months
@@ -242,7 +222,7 @@ class CldrReader (object):
# the cache. If a process were to instantiate this class with distinct
# roots, each cache would be filled by the first to need it !
class CldrAccess (object):
- def __init__(self, root):
+ def __init__(self, root: Path):
"""Set up a master object for accessing CLDR data.
Single parameter, root, is the file-system path to the root of
@@ -250,18 +230,18 @@ class CldrAccess (object):
contain dtd/, main/ and supplemental/ sub-directories."""
self.root = root
- def xml(self, *path):
+ def xml(self, relative_path: str):
"""Load a single XML file and return its root element as an XmlScanner.
The path is interpreted relative to self.root"""
- return XmlScanner(Node(self.__xml(path)))
+ return XmlScanner(Node(self.__xml(relative_path)))
def supplement(self, name):
"""Loads supplemental data as a Supplement object.
The name should be that of a file in common/supplemental/, without path.
"""
- return Supplement(Node(self.__xml(('common', 'supplemental', name))))
+ return Supplement(Node(self.__xml(f'common/supplemental/{name}')))
def locale(self, name):
"""Loads all data for a locale as a LocaleScanner object.
@@ -273,17 +253,18 @@ class CldrAccess (object):
inheritance, where relevant."""
return LocaleScanner(name, self.__localeRoots(name), self.__rootLocale)
+ def englishNaming(self, tag): # see QLocaleXmlWriter.enumData()
+ return self.__codeMap(tag).get
+
@property
- def fileLocales(self, joinPath = os.path.join, listDirectory = os.listdir,
- splitExtension = os.path.splitext):
+ def fileLocales(self) -> Iterable[str]:
"""Generator for locale IDs seen in file-names.
All *.xml other than root.xml in common/main/ are assumed to
identify locales."""
- for name in listDirectory(joinPath(self.root, 'common', 'main')):
- stem, ext = splitExtension(name)
- if ext == '.xml' and stem != 'root':
- yield stem
+ for path in self.root.joinpath('common/main').glob('*.xml'):
+ if path.stem != 'root':
+ yield path.stem
@property
def defaultContentLocales(self):
@@ -304,44 +285,44 @@ class CldrAccess (object):
def numberSystem(self, system):
"""Get a description of a numbering system.
- Returns a mapping, with keys u'digits', u'type' and u'id'; the
+ Returns a mapping, with keys 'digits', 'type' and 'id'; the
value for this last is system. Raises KeyError for unknown
number system, ldml.Error on failure to load data."""
try:
return self.__numberSystems[system]
except KeyError:
- raise Error('Unsupported number system: {}'.format(system))
+ raise Error(f'Unsupported number system: {system}')
- def weekData(self, country):
+ def weekData(self, territory):
"""Data on the weekly cycle.
Returns a triple (W, S, E) of en's short names for week-days;
W is the first day of the week, S the start of the week-end
- and E the end of the week-end. Where data for a country is
+ and E the end of the week-end. Where data for a territory is
unavailable, the data for CLDR's territory 001 (The World) is
used."""
try:
- return self.__weekData[country]
+ return self.__weekData[territory]
except KeyError:
return self.__weekData['001']
- def currencyData(self, country):
- """Returns currency data for the given country code.
+ def currencyData(self, territory):
+ """Returns currency data for the given territory code.
Return value is a tuple (ISO4217 code, digit count, rounding
- mode). If CLDR provides no data for this country, ('', 2, 1)
+ mode). If CLDR provides no data for this territory, ('', 2, 1)
is the default result.
"""
try:
- return self.__currencyData[country]
+ return self.__currencyData[territory]
except KeyError:
return '', 2, 1
- def codesToIdName(self, language, script, country, variant = ''):
+ def codesToIdName(self, language, script, territory, variant = ''):
"""Maps each code to the appropriate ID and name.
Returns a 4-tuple of (ID, name) pairs corresponding to the
- language, script, country and variant given. Raises a
+ language, script, territory and variant given. Raises a
suitable error if any of them is unknown, indicating all that
are unknown plus suitable names for any that could sensibly be
added to enumdata.py to make them known.
@@ -353,33 +334,33 @@ class CldrAccess (object):
try:
return (enum('language')[language],
enum('script')[script],
- enum('country')[country],
+ enum('territory')[territory],
enum('variant')[variant])
except KeyError:
pass
- parts, values = [], [language, script, country, variant]
- for index, key in enumerate(('language', 'script', 'country', 'variant')):
+ parts, values = [], [language, script, territory, variant]
+ for index, key in enumerate(('language', 'script', 'territory', 'variant')):
naming, enums = self.__codeMap(key), enum(key)
value = values[index]
if value not in enums:
- text = '{} code {}'.format(key, value)
+ text = f'{key} code {value}'
name = naming.get(value)
if name and value != 'POSIX':
- text += u' (could add {})'.format(name)
+ text += f' (could add {name})'
parts.append(text)
if len(parts) > 1:
parts[-1] = 'and ' + parts[-1]
- assert parts
+ else:
+ assert parts
+ if parts[0].startswith('variant'):
+ raise Error(f'No support for {parts[0]}',
+ language, script, territory, variant)
raise Error('Unknown ' + ', '.join(parts),
- language, script, country, variant)
+ language, script, territory, variant)
@staticmethod
- def __checkEnum(given, proper, scraps,
- remap = { u'å': 'a', u'ã': 'a', u'ç': 'c', u'é': 'e', u'í': 'i', u'ü': 'u'},
- prefix = { 'St.': 'Saint', 'U.S.': 'United States' },
- suffixes = ( 'Han', ),
- skip = u'\u02bc'):
+ def __checkEnum(given, proper, scraps):
# Each is a { code: full name } mapping
for code, name in given.items():
try: right = proper[code]
@@ -387,35 +368,23 @@ class CldrAccess (object):
# No en.xml name for this code, but supplementalData's
# parentLocale may still believe in it:
if code not in scraps:
- yield name, '[Found no CLDR name for code {}]'.format(code)
- continue
- if name == right: continue
- ok = right.replace('&', 'And')
- for k, v in prefix.items():
- if ok.startswith(k + ' '):
- ok = v + ok[len(k):]
- while '(' in ok:
- try: f, t = ok.index('('), ok.index(')')
- except ValueError: break
- ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip()
- if any(name == ok + ' ' + s for s in suffixes):
- continue
- if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join(
- remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip):
+ yield name, f'[Found no CLDR name for code {code}]'
continue
- yield name, ok
+ cleaned = names_clash(right, name)
+ if cleaned:
+ yield name, cleaned
def checkEnumData(self, grumble):
scraps = set()
for k in self.__parentLocale.keys():
for f in k.split('_'):
scraps.add(f)
- from enumdata import language_list, country_list, script_list
- language = dict((v, k) for k, v in language_list.values() if not v.isspace())
- country = dict((v, k) for k, v in country_list.values() if v != 'ZZ')
- script = dict((v, k) for k, v in script_list.values() if v != 'Zzzz')
+ from enumdata import language_map, territory_map, script_map
+ language = {v: k for k, v in language_map.values() if not v.isspace()}
+ territory = {v: k for k, v in territory_map.values() if v != 'ZZ'}
+ script = {v: k for k, v in script_map.values() if v != 'Zzzz'}
lang = dict(self.__checkEnum(language, self.__codeMap('language'), scraps))
- land = dict(self.__checkEnum(country, self.__codeMap('country'), scraps))
+ land = dict(self.__checkEnum(territory, self.__codeMap('territory'), scraps))
text = dict(self.__checkEnum(script, self.__codeMap('script'), scraps))
if lang or land or text:
grumble("""\
@@ -424,15 +393,15 @@ enumdata.py (keeping the old name as an alias):
""")
if lang:
grumble('Language:\n\t'
- + '\n\t'.join('{} -> {}'.format(k, v) for k, v in lang.items())
+ + '\n\t'.join(f'{k} -> {v}' for k, v in lang.items())
+ '\n')
if land:
- grumble('Country:\n\t'
- + '\n\t'.join('{} -> {}'.format(k, v) for k, v in land.items())
+ grumble('Territory:\n\t'
+ + '\n\t'.join(f'{k} -> {v}' for k, v in land.items())
+ '\n')
if text:
grumble('Script:\n\t'
- + '\n\t'.join('{} -> {}'.format(k, v) for k, v in text.items())
+ + '\n\t'.join(f'{k} -> {v}' for k, v in text.items())
+ '\n')
grumble('\n')
@@ -442,7 +411,7 @@ enumdata.py (keeping the old name as an alias):
MS-Win have their own eccentric names for time-zones. CLDR
helpfully provides a translation to more orthodox names.
- Singe argument, lookup, is a mapping from known MS-Win names
+ Single argument, lookup, is a mapping from known MS-Win names
for locales to a unique integer index (starting at 1).
The XML structure we read has the form:
@@ -460,7 +429,7 @@ enumdata.py (keeping the old name as an alias):
</supplementalData>
"""
zones = self.supplement('windowsZones.xml')
- enum = self.__enumMap('country')
+ enum = self.__enumMap('territory')
badZones, unLands, defaults, windows = set(), set(), {}, {}
for name, attrs in zones.find('windowsZones/mapTimezones'):
@@ -469,8 +438,8 @@ enumdata.py (keeping the old name as an alias):
wid, code = attrs['other'], attrs['territory']
data = dict(windowsId = wid,
- countryCode = code,
- ianaList = attrs['type'])
+ territoryCode = code,
+ ianaList = ' '.join(attrs['type'].split()))
try:
key = lookup[wid]
@@ -479,7 +448,7 @@ enumdata.py (keeping the old name as an alias):
key = 0
data['windowsKey'] = key
- if code == u'001':
+ if code == '001':
defaults[key] = data['ianaList']
else:
try:
@@ -487,11 +456,11 @@ enumdata.py (keeping the old name as an alias):
except KeyError:
unLands.append(code)
continue
- data.update(countryId = cid, country = name)
+ data.update(territoryId = cid, territory = name)
windows[key, cid] = data
if unLands:
- raise Error('Unknown country codes, please add to enumdata.py: '
+ raise Error('Unknown territory codes, please add to enumdata.py: '
+ ', '.join(sorted(unLands)))
if badZones:
@@ -507,20 +476,20 @@ enumdata.py (keeping the old name as an alias):
return self.__cldrVersion
# Implementation details
- def __xml(self, path, cache = CacheDict(), read = minidom.parse, joinPath = os.path.join):
+ def __xml(self, relative_path: str, cache = CacheDict(), read = minidom.parse):
try:
- doc = cache[path]
+ doc = cache[relative_path]
except KeyError:
- cache[path] = doc = read(joinPath(self.root, *path)).documentElement
+ cache[relative_path] = doc = read(str(self.root.joinpath(relative_path))).documentElement
return doc
- def __open(self, path, joinPath=os.path.join):
- return open(joinPath(self.root, *path))
+ def __open(self, relative_path: str) -> TextIO:
+ return self.root.joinpath(relative_path).open()
@property
def __rootLocale(self, cache = []):
if not cache:
- cache.append(self.xml('common', 'main', 'root.xml'))
+ cache.append(self.xml('common/main/root.xml'))
return cache[0]
@property
@@ -530,7 +499,7 @@ enumdata.py (keeping the old name as an alias):
return cache[0]
@property
- def __numberSystems(self, cache = {}, joinPath=os.path.join):
+ def __numberSystems(self, cache = {}):
if not cache:
for ignore, attrs in self.supplement('numberingSystems.xml').find('numberingSystems'):
cache[attrs['id']] = attrs
@@ -563,7 +532,7 @@ enumdata.py (keeping the old name as an alias):
source = self.__supplementalData
for key in ('firstDay', 'weekendStart', 'weekendEnd'):
result = {}
- for ignore, attrs in source.find('weekData/' + key):
+ for ignore, attrs in source.find(f'weekData/{key}'):
assert ignore == key
day = attrs['day']
assert day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'), day
@@ -580,7 +549,7 @@ enumdata.py (keeping the old name as an alias):
for elt in source.findNodes('currencyData/region'):
iso, digits, rounding = '', 2, 1
try:
- country = elt.dom.attributes['iso3166'].nodeValue
+ territory = elt.dom.attributes['iso3166'].nodeValue
except KeyError:
continue
for child in elt.findAllChildren('currency'):
@@ -596,16 +565,16 @@ enumdata.py (keeping the old name as an alias):
break
if iso:
for tag, data in source.find(
- 'currencyData/fractions/info[iso4217={}]'.format(iso)):
+ f'currencyData/fractions/info[iso4217={iso}]'):
digits = data['digits']
rounding = data['rounding']
- cache[country] = iso, digits, rounding
+ cache[territory] = iso, digits, rounding
assert cache
return cache
@property
- def __unDistinguishedAttributes(self, cache = {}, joinPath = os.path.join):
+ def __unDistinguishedAttributes(self, cache = {}):
"""Mapping from tag names to lists of attributes.
LDML defines some attributes as 'distinguishing': if a node
@@ -625,7 +594,7 @@ enumdata.py (keeping the old name as an alias):
return cache
- def __scanLdmlDtd(self, joinPath = os.path.join):
+ def __scanLdmlDtd(self):
"""Scan the LDML DTD, record CLDR version
Yields (tag, attrs) pairs: on elements with a given tag,
@@ -635,7 +604,7 @@ enumdata.py (keeping the old name as an alias):
Sets self.__cldrVersion as a side-effect, since this
information is found in the same file."""
- with self.__open(('common', 'dtd', 'ldml.dtd')) as dtd:
+ with self.__open('common/dtd/ldml.dtd') as dtd:
tag, ignored, last = None, None, None
for line in dtd:
@@ -670,15 +639,15 @@ enumdata.py (keeping the old name as an alias):
def __enumMap(self, key, cache = {}):
if not cache:
cache['variant'] = {'': (0, 'This should never be seen outside ldml.py')}
- # They're not actually lists: mappings from numeric value
- # to pairs of full name and short code. What we want, in
- # each case, is a mapping from code to the other two.
- from enumdata import language_list, script_list, country_list
- for form, book, empty in (('language', language_list, 'AnyLanguage'),
- ('script', script_list, 'AnyScript'),
- ('country', country_list, 'AnyTerritory')):
- cache[form] = dict((pair[1], (num, pair[0]))
- for num, pair in book.items() if pair[0] != 'C')
+ # They're mappings from numeric value to pairs of full
+ # name and short code. What we want, in each case, is a
+ # mapping from code to the other two.
+ from enumdata import language_map, script_map, territory_map
+ for form, book, empty in (('language', language_map, 'AnyLanguage'),
+ ('script', script_map, 'AnyScript'),
+ ('territory', territory_map, 'AnyTerritory')):
+ cache[form] = {pair[1]: (num, pair[0])
+ for num, pair in book.items() if pair[0] != 'C'}
# (Have to filter out the C locale, as we give it the
# same (all space) code as AnyLanguage, whose code
# should probably be 'und' instead.)
@@ -693,9 +662,9 @@ enumdata.py (keeping the old name as an alias):
def __codeMap(self, key, cache = {},
# Maps our name for it to CLDR's name:
naming = {'language': 'languages', 'script': 'scripts',
- 'country': 'territories', 'variant': 'variants'}):
+ 'territory': 'territories', 'variant': 'variants'}):
if not cache:
- root = self.xml('common', 'main', 'en.xml').root.findUniqueChild('localeDisplayNames')
+ root = self.xml('common/main/en.xml').root.findUniqueChild('localeDisplayNames')
for dst, src in naming.items():
cache[dst] = dict(self.__codeMapScan(root.findUniqueChild(src)))
assert cache
@@ -721,7 +690,13 @@ enumdata.py (keeping the old name as an alias):
except (KeyError, ValueError, TypeError):
pass
else:
- if key not in seen or not elt.attributes.has_key('alt'):
+ # Prefer stand-alone forms of names when present, ignore other
+ # alt="..." entries. For example, Traditional and Simplified
+ # Han omit "Han" in the plain form, but include it for
+ # stand-alone. As the stand-alone version appears later, it
+ # over-writes the plain one.
+ if (key not in seen or 'alt' not in elt.attributes
+ or elt.attributes['alt'].nodeValue == 'stand-alone'):
yield key, value
seen.add(key)
@@ -730,7 +705,8 @@ enumdata.py (keeping the old name as an alias):
def __parentLocale(self, cache = {}):
# see http://www.unicode.org/reports/tr35/#Parent_Locales
if not cache:
- for tag, attrs in self.__supplementalData.find('parentLocales'):
+ for tag, attrs in self.__supplementalData.find('parentLocales',
+ ('component',)):
parent = attrs.get('parent', '')
for child in attrs['locales'].split():
cache[child] = parent
@@ -738,10 +714,9 @@ enumdata.py (keeping the old name as an alias):
return cache
- def __localeAsDoc(self, name, aliasFor = None,
- joinPath = os.path.join, exists = os.path.isfile):
- path = ('common', 'main', name + '.xml')
- if exists(joinPath(self.root, *path)):
+ def __localeAsDoc(self, name: str, aliasFor = None):
+ path = f'common/main/{name}.xml'
+ if self.root.joinpath(path).exists():
elt = self.__xml(path)
for child in Node(elt).findAllChildren('alias'):
try:
@@ -754,8 +729,8 @@ enumdata.py (keeping the old name as an alias):
return elt
if aliasFor:
- raise Error('Fatal error: found an alias "{}" -> "{}", but found no file for the alias'
- .format(aliasFor, name))
+ raise Error(f'Fatal error: found an alias "{aliasFor}" -> "{name}", '
+ 'but found no file for the alias')
def __scanLocaleRoots(self, name):
while name and name != 'root':
@@ -780,4 +755,4 @@ enumdata.py (keeping the old name as an alias):
return chain
# Unpolute the namespace: we don't need to export these.
-del minidom, CacheDict, os
+del minidom, CacheDict
diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py
index 20dda77965..d3aa88ec38 100755
--- a/util/locale_database/cldr2qlocalexml.py
+++ b/util/locale_database/cldr2qlocalexml.py
@@ -1,48 +1,27 @@
-#!/usr/bin/env python2
-# coding=utf8
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
-"""Convert CLDR data to qLocaleXML
-
-The CLDR data can be downloaded from CLDR_, which has a sub-directory
-for each version; you need the ``core.zip`` file for your version of
-choice (typically the latest). This script has had updates to cope up
-to v38.1; for later versions, we may need adaptations. Unpack the
-downloaded ``core.zip`` and check it has a common/main/ sub-directory:
-pass the path of that root of the download to this script as its first
-command-line argument. Pass the name of the file in which to write
-output as the second argument; either omit it or use '-' to select the
-standard output. This file is the input needed by
-``./qlocalexml2cpp.py``
+#!/usr/bin/env python3
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+"""Convert CLDR data to QLocaleXML
+
+The CLDR data can be downloaded as a zip-file from CLDR_, which has a
+sub-directory for each version; you need the ``core.zip`` file for
+your version of choice (typically the latest), which you should then
+unpack. Alternatively, you can clone the git repo from github_, which
+has a tag for each release and a maint/maint-$ver branch for each
+major version. Either way, the CLDR top-level directory should have a
+subdirectory called common/ which contains (among other things)
+subdirectories main/ and supplemental/.
+
+This script has had updates to cope up to v44.1; for later versions,
+we may need adaptations. Pass the path of the CLDR top-level directory
+to this script as its first command-line argument. Pass the name of
+the file in which to write output as the second argument; either omit
+it or use '-' to select the standard output. This file is the input
+needed by ``./qlocalexml2cpp.py``
When you update the CLDR data, be sure to also update
src/corelib/text/qt_attribution.json's entry for unicode-cldr. Check
-this script's output for unknown language, country or script messages;
+this script's output for unknown language, territory or script messages;
if any can be resolved, use their entry in common/main/en.xml to
append new entries to enumdata.py's lists and update documentation in
src/corelib/text/qlocale.qdoc, adding the new entries in alphabetic
@@ -51,71 +30,73 @@ order.
While updating the locale data, check also for updates to MS-Win's
time zone names; see cldr2qtimezone.py for details.
-.. _CLDR: ftp://unicode.org/Public/cldr/
+All the scripts mentioned support --help to tell you how to use them.
+
+.. _CLDR: https://unicode.org/Public/cldr/
+.. _github: https://github.com/unicode-org/cldr
"""
-import os
-import sys
+from pathlib import Path
+import argparse
from cldr import CldrReader
from qlocalexml import QLocaleXmlWriter
-from enumdata import language_list, script_list, country_list
-
-def usage(name, err, message = ''):
- err.write("""Usage: {} path/to/cldr/common/main [out-file.xml]
-""".format(name)) # TODO: expand command-line, improve help message
- if message:
- err.write('\n' + message + '\n')
-
-def main(args, out, err):
- # TODO: make calendars a command-line option
- calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew'
-
- # TODO: make argument parsing more sophisticated
- name = args.pop(0)
- if not args:
- usage(name, err, 'Where is your CLDR data tree ?')
- return 1
-
- root = args.pop(0)
- if not os.path.exists(os.path.join(root, 'common', 'main', 'root.xml')):
- usage(name, err,
- 'First argument is the root of the CLDR tree: found no common/main/root.xml under '
- + root)
- return 1
-
- xml = args.pop(0) if args else None
+
+
+def main(argv, out, err):
+ """Generate a QLocaleXML file from CLDR data.
+
+ Takes sys.argv, sys.stdout, sys.stderr (or equivalents) as
+ arguments. In argv[1:], it expects the root of the CLDR data
+ directory as first parameter and the name of the file in which to
+ save QLocaleXML data as second parameter. It accepts a --calendars
+ option to select which calendars to support (all available by
+ default)."""
+ all_calendars = ['gregorian', 'persian', 'islamic']
+
+ parser = argparse.ArgumentParser(
+ prog=Path(argv[0]).name,
+ description='Generate QLocaleXML from CLDR data.',
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument('cldr_path', help='path to the root of the CLDR tree')
+ parser.add_argument('out_file', help='output XML file name',
+ nargs='?', metavar='out-file.xml')
+ parser.add_argument('--calendars', help='select calendars to emit data for',
+ nargs='+', metavar='CALENDAR',
+ choices=all_calendars, default=all_calendars)
+
+ args = parser.parse_args(argv[1:])
+
+ root = Path(args.cldr_path)
+ root_xml_path = 'common/main/root.xml'
+
+ if not root.joinpath(root_xml_path).exists():
+ parser.error('First argument is the root of the CLDR tree: '
+ f'found no {root_xml_path} under {root}')
+
+ xml = args.out_file
if not xml or xml == '-':
emit = out
elif not xml.endswith('.xml'):
- usage(name, err, 'Please use a .xml extension on your output file name, not ' + xml)
- return 1
+ parser.error(f'Please use a .xml extension on your output file name, not {xml}')
else:
try:
emit = open(xml, 'w')
except IOError as e:
- usage(name, err, 'Failed to open "{}" to write output to it\n'.format(xml))
- return 1
-
- if args:
- usage(name, err, 'Too many arguments - excess: ' + ' '.join(args))
- return 1
-
- if emit.encoding != 'UTF-8' or (emit.encoding is None and sys.getdefaultencoding() != 'UTF-8'):
- reload(sys) # Weirdly, this gets a richer sys module than the plain import got us !
- sys.setdefaultencoding('UTF-8')
+ parser.error(f'Failed to open "{xml}" to write output to it')
# TODO - command line options to tune choice of grumble and whitter:
reader = CldrReader(root, err.write, err.write)
writer = QLocaleXmlWriter(emit.write)
writer.version(reader.root.cldrVersion)
- writer.enumData(language_list, script_list, country_list)
+ writer.enumData(reader.root.englishNaming)
writer.likelySubTags(reader.likelySubTags())
- writer.locales(reader.readLocales(calendars), calendars)
+ writer.locales(reader.readLocales(args.calendars), args.calendars)
- writer.close()
+ writer.close(err.write)
return 0
if __name__ == '__main__':
+ import sys
sys.exit(main(sys.argv, sys.stdout, sys.stderr))
diff --git a/util/locale_database/cldr2qtimezone.py b/util/locale_database/cldr2qtimezone.py
index 7c06fe8561..27987d5a58 100755
--- a/util/locale_database/cldr2qtimezone.py
+++ b/util/locale_database/cldr2qtimezone.py
@@ -1,46 +1,23 @@
-#!/usr/bin/env python2
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
+#!/usr/bin/env python3
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
"""Parse CLDR data for QTimeZone use with MS-Windows
Script to parse the CLDR common/supplemental/windowsZones.xml file and
-encode for use in QTimeZone. See ``./cldr2qlocalexml.py`` for where
-to get the CLDR data. Pass its root directory as first parameter to
-this script and the qtbase root directory as second parameter. It
-shall update qtbase's src/corelib/time/qtimezoneprivate_data_p.h ready
-for use.
+prepare its data for use in QTimeZone. See ``./cldr2qlocalexml.py`` for
+where to get the CLDR data. Pass its root directory as first parameter
+to this script. You can optionally pass the qtbase root directory as
+second parameter; it defaults to the root of the checkout containing
+this script. This script updates qtbase's
+src/corelib/time/qtimezoneprivate_data_p.h with the new data.
"""
-import os
import datetime
+from pathlib import Path
import textwrap
+import argparse
-from localetools import unicode2hex, wrap_list, Error, SourceFileEditor
+from localetools import unicode2hex, wrap_list, Error, SourceFileEditor, qtbase_root
from cldr import CldrAccess
### Data that may need updates in response to new entries in the CLDR file ###
@@ -54,194 +31,208 @@ from cldr import CldrAccess
# Not public so may safely be changed. Please keep in alphabetic order by ID.
# ( Windows Id, Offset Seconds )
windowsIdList = (
- (u'Afghanistan Standard Time', 16200),
- (u'Alaskan Standard Time', -32400),
- (u'Aleutian Standard Time', -36000),
- (u'Altai Standard Time', 25200),
- (u'Arab Standard Time', 10800),
- (u'Arabian Standard Time', 14400),
- (u'Arabic Standard Time', 10800),
- (u'Argentina Standard Time', -10800),
- (u'Astrakhan Standard Time', 14400),
- (u'Atlantic Standard Time', -14400),
- (u'AUS Central Standard Time', 34200),
- (u'Aus Central W. Standard Time', 31500),
- (u'AUS Eastern Standard Time', 36000),
- (u'Azerbaijan Standard Time', 14400),
- (u'Azores Standard Time', -3600),
- (u'Bahia Standard Time', -10800),
- (u'Bangladesh Standard Time', 21600),
- (u'Belarus Standard Time', 10800),
- (u'Bougainville Standard Time', 39600),
- (u'Canada Central Standard Time', -21600),
- (u'Cape Verde Standard Time', -3600),
- (u'Caucasus Standard Time', 14400),
- (u'Cen. Australia Standard Time', 34200),
- (u'Central America Standard Time', -21600),
- (u'Central Asia Standard Time', 21600),
- (u'Central Brazilian Standard Time', -14400),
- (u'Central Europe Standard Time', 3600),
- (u'Central European Standard Time', 3600),
- (u'Central Pacific Standard Time', 39600),
- (u'Central Standard Time (Mexico)', -21600),
- (u'Central Standard Time', -21600),
- (u'China Standard Time', 28800),
- (u'Chatham Islands Standard Time', 45900),
- (u'Cuba Standard Time', -18000),
- (u'Dateline Standard Time', -43200),
- (u'E. Africa Standard Time', 10800),
- (u'E. Australia Standard Time', 36000),
- (u'E. Europe Standard Time', 7200),
- (u'E. South America Standard Time', -10800),
- (u'Easter Island Standard Time', -21600),
- (u'Eastern Standard Time', -18000),
- (u'Eastern Standard Time (Mexico)', -18000),
- (u'Egypt Standard Time', 7200),
- (u'Ekaterinburg Standard Time', 18000),
- (u'Fiji Standard Time', 43200),
- (u'FLE Standard Time', 7200),
- (u'Georgian Standard Time', 14400),
- (u'GMT Standard Time', 0),
- (u'Greenland Standard Time', -10800),
- (u'Greenwich Standard Time', 0),
- (u'GTB Standard Time', 7200),
- (u'Haiti Standard Time', -18000),
- (u'Hawaiian Standard Time', -36000),
- (u'India Standard Time', 19800),
- (u'Iran Standard Time', 12600),
- (u'Israel Standard Time', 7200),
- (u'Jordan Standard Time', 7200),
- (u'Kaliningrad Standard Time', 7200),
- (u'Korea Standard Time', 32400),
- (u'Libya Standard Time', 7200),
- (u'Line Islands Standard Time', 50400),
- (u'Lord Howe Standard Time', 37800),
- (u'Magadan Standard Time', 36000),
- (u'Magallanes Standard Time', -10800), # permanent DST
- (u'Marquesas Standard Time', -34200),
- (u'Mauritius Standard Time', 14400),
- (u'Middle East Standard Time', 7200),
- (u'Montevideo Standard Time', -10800),
- (u'Morocco Standard Time', 0),
- (u'Mountain Standard Time (Mexico)', -25200),
- (u'Mountain Standard Time', -25200),
- (u'Myanmar Standard Time', 23400),
- (u'N. Central Asia Standard Time', 21600),
- (u'Namibia Standard Time', 3600),
- (u'Nepal Standard Time', 20700),
- (u'New Zealand Standard Time', 43200),
- (u'Newfoundland Standard Time', -12600),
- (u'Norfolk Standard Time', 39600),
- (u'North Asia East Standard Time', 28800),
- (u'North Asia Standard Time', 25200),
- (u'North Korea Standard Time', 30600),
- (u'Omsk Standard Time', 21600),
- (u'Pacific SA Standard Time', -10800),
- (u'Pacific Standard Time', -28800),
- (u'Pacific Standard Time (Mexico)', -28800),
- (u'Pakistan Standard Time', 18000),
- (u'Paraguay Standard Time', -14400),
- (u'Qyzylorda Standard Time', 18000), # a.k.a. Kyzylorda, in Kazakhstan
- (u'Romance Standard Time', 3600),
- (u'Russia Time Zone 3', 14400),
- (u'Russia Time Zone 10', 39600),
- (u'Russia Time Zone 11', 43200),
- (u'Russian Standard Time', 10800),
- (u'SA Eastern Standard Time', -10800),
- (u'SA Pacific Standard Time', -18000),
- (u'SA Western Standard Time', -14400),
- (u'Saint Pierre Standard Time', -10800), # New France
- (u'Sakhalin Standard Time', 39600),
- (u'Samoa Standard Time', 46800),
- (u'Sao Tome Standard Time', 0),
- (u'Saratov Standard Time', 14400),
- (u'SE Asia Standard Time', 25200),
- (u'Singapore Standard Time', 28800),
- (u'South Africa Standard Time', 7200),
- (u'Sri Lanka Standard Time', 19800),
- (u'Sudan Standard Time', 7200), # unless they mean South Sudan, +03:00
- (u'Syria Standard Time', 7200),
- (u'Taipei Standard Time', 28800),
- (u'Tasmania Standard Time', 36000),
- (u'Tocantins Standard Time', -10800),
- (u'Tokyo Standard Time', 32400),
- (u'Tomsk Standard Time', 25200),
- (u'Tonga Standard Time', 46800),
- (u'Transbaikal Standard Time', 32400), # Yakutsk
- (u'Turkey Standard Time', 7200),
- (u'Turks And Caicos Standard Time', -14400),
- (u'Ulaanbaatar Standard Time', 28800),
- (u'US Eastern Standard Time', -18000),
- (u'US Mountain Standard Time', -25200),
- (u'UTC-11', -39600),
- (u'UTC-09', -32400),
- (u'UTC-08', -28800),
- (u'UTC-02', -7200),
- (u'UTC', 0),
- (u'UTC+12', 43200),
- (u'UTC+13', 46800),
- (u'Venezuela Standard Time', -16200),
- (u'Vladivostok Standard Time', 36000),
- (u'Volgograd Standard Time', 14400),
- (u'W. Australia Standard Time', 28800),
- (u'W. Central Africa Standard Time', 3600),
- (u'W. Europe Standard Time', 3600),
- (u'W. Mongolia Standard Time', 25200), # Hovd
- (u'West Asia Standard Time', 18000),
- (u'West Bank Standard Time', 7200),
- (u'West Pacific Standard Time', 36000),
- (u'Yakutsk Standard Time', 32400),
- (u'Yukon Standard Time', -25200), # Non-DST Mountain Standard Time since 2020-11-01
+ ('Afghanistan Standard Time', 16200),
+ ('Alaskan Standard Time', -32400),
+ ('Aleutian Standard Time', -36000),
+ ('Altai Standard Time', 25200),
+ ('Arab Standard Time', 10800),
+ ('Arabian Standard Time', 14400),
+ ('Arabic Standard Time', 10800),
+ ('Argentina Standard Time', -10800),
+ ('Astrakhan Standard Time', 14400),
+ ('Atlantic Standard Time', -14400),
+ ('AUS Central Standard Time', 34200),
+ ('Aus Central W. Standard Time', 31500),
+ ('AUS Eastern Standard Time', 36000),
+ ('Azerbaijan Standard Time', 14400),
+ ('Azores Standard Time', -3600),
+ ('Bahia Standard Time', -10800),
+ ('Bangladesh Standard Time', 21600),
+ ('Belarus Standard Time', 10800),
+ ('Bougainville Standard Time', 39600),
+ ('Canada Central Standard Time', -21600),
+ ('Cape Verde Standard Time', -3600),
+ ('Caucasus Standard Time', 14400),
+ ('Cen. Australia Standard Time', 34200),
+ ('Central America Standard Time', -21600),
+ ('Central Asia Standard Time', 21600),
+ ('Central Brazilian Standard Time', -14400),
+ ('Central Europe Standard Time', 3600),
+ ('Central European Standard Time', 3600),
+ ('Central Pacific Standard Time', 39600),
+ ('Central Standard Time', -21600),
+ ('Central Standard Time (Mexico)', -21600),
+ ('Chatham Islands Standard Time', 45900),
+ ('China Standard Time', 28800),
+ ('Cuba Standard Time', -18000),
+ ('Dateline Standard Time', -43200),
+ ('E. Africa Standard Time', 10800),
+ ('E. Australia Standard Time', 36000),
+ ('E. Europe Standard Time', 7200),
+ ('E. South America Standard Time', -10800),
+ ('Easter Island Standard Time', -21600),
+ ('Eastern Standard Time', -18000),
+ ('Eastern Standard Time (Mexico)', -18000),
+ ('Egypt Standard Time', 7200),
+ ('Ekaterinburg Standard Time', 18000),
+ ('Fiji Standard Time', 43200),
+ ('FLE Standard Time', 7200),
+ ('Georgian Standard Time', 14400),
+ ('GMT Standard Time', 0),
+ ('Greenland Standard Time', -10800),
+ ('Greenwich Standard Time', 0),
+ ('GTB Standard Time', 7200),
+ ('Haiti Standard Time', -18000),
+ ('Hawaiian Standard Time', -36000),
+ ('India Standard Time', 19800),
+ ('Iran Standard Time', 12600),
+ ('Israel Standard Time', 7200),
+ ('Jordan Standard Time', 7200),
+ ('Kaliningrad Standard Time', 7200),
+ ('Korea Standard Time', 32400),
+ ('Libya Standard Time', 7200),
+ ('Line Islands Standard Time', 50400),
+ ('Lord Howe Standard Time', 37800),
+ ('Magadan Standard Time', 36000),
+ ('Magallanes Standard Time', -10800), # permanent DST
+ ('Marquesas Standard Time', -34200),
+ ('Mauritius Standard Time', 14400),
+ ('Middle East Standard Time', 7200),
+ ('Montevideo Standard Time', -10800),
+ ('Morocco Standard Time', 0),
+ ('Mountain Standard Time', -25200),
+ ('Mountain Standard Time (Mexico)', -25200),
+ ('Myanmar Standard Time', 23400),
+ ('N. Central Asia Standard Time', 21600),
+ ('Namibia Standard Time', 3600),
+ ('Nepal Standard Time', 20700),
+ ('New Zealand Standard Time', 43200),
+ ('Newfoundland Standard Time', -12600),
+ ('Norfolk Standard Time', 39600),
+ ('North Asia East Standard Time', 28800),
+ ('North Asia Standard Time', 25200),
+ ('North Korea Standard Time', 30600),
+ ('Omsk Standard Time', 21600),
+ ('Pacific SA Standard Time', -10800),
+ ('Pacific Standard Time', -28800),
+ ('Pacific Standard Time (Mexico)', -28800),
+ ('Pakistan Standard Time', 18000),
+ ('Paraguay Standard Time', -14400),
+ ('Qyzylorda Standard Time', 18000), # a.k.a. Kyzylorda, in Kazakhstan
+ ('Romance Standard Time', 3600),
+ ('Russia Time Zone 10', 39600),
+ ('Russia Time Zone 11', 43200),
+ ('Russia Time Zone 3', 14400),
+ ('Russian Standard Time', 10800),
+ ('SA Eastern Standard Time', -10800),
+ ('SA Pacific Standard Time', -18000),
+ ('SA Western Standard Time', -14400),
+ ('Saint Pierre Standard Time', -10800), # New France
+ ('Sakhalin Standard Time', 39600),
+ ('Samoa Standard Time', 46800),
+ ('Sao Tome Standard Time', 0),
+ ('Saratov Standard Time', 14400),
+ ('SE Asia Standard Time', 25200),
+ ('Singapore Standard Time', 28800),
+ ('South Africa Standard Time', 7200),
+ ('South Sudan Standard Time', 7200),
+ ('Sri Lanka Standard Time', 19800),
+ ('Sudan Standard Time', 7200), # unless they mean South Sudan, +03:00
+ ('Syria Standard Time', 7200),
+ ('Taipei Standard Time', 28800),
+ ('Tasmania Standard Time', 36000),
+ ('Tocantins Standard Time', -10800),
+ ('Tokyo Standard Time', 32400),
+ ('Tomsk Standard Time', 25200),
+ ('Tonga Standard Time', 46800),
+ ('Transbaikal Standard Time', 32400), # Yakutsk
+ ('Turkey Standard Time', 7200),
+ ('Turks And Caicos Standard Time', -14400),
+ ('Ulaanbaatar Standard Time', 28800),
+ ('US Eastern Standard Time', -18000),
+ ('US Mountain Standard Time', -25200),
+ ('UTC', 0),
+ # Lexical order: '+' < '-'
+ ('UTC+12', 43200),
+ ('UTC+13', 46800),
+ ('UTC-02', -7200),
+ ('UTC-08', -28800),
+ ('UTC-09', -32400),
+ ('UTC-11', -39600),
+ ('Venezuela Standard Time', -16200),
+ ('Vladivostok Standard Time', 36000),
+ ('Volgograd Standard Time', 14400),
+ ('W. Australia Standard Time', 28800),
+ ('W. Central Africa Standard Time', 3600),
+ ('W. Europe Standard Time', 3600),
+ ('W. Mongolia Standard Time', 25200), # Hovd
+ ('West Asia Standard Time', 18000),
+ ('West Bank Standard Time', 7200),
+ ('West Pacific Standard Time', 36000),
+ ('Yakutsk Standard Time', 32400),
+ ('Yukon Standard Time', -25200), # Non-DST Mountain Standard Time since 2020-11-01
)
# List of standard UTC IDs to use. Not public so may be safely changed.
# Do not remove IDs, as each entry is part of the API/behavior guarantee.
+# IDs for the same offset shall be space-joined; list the preferred ID first.
# ( UTC Id, Offset Seconds )
utcIdList = (
- (u'UTC', 0), # Goes first so is default
- (u'UTC-14:00', -50400),
- (u'UTC-13:00', -46800),
- (u'UTC-12:00', -43200),
- (u'UTC-11:00', -39600),
- (u'UTC-10:00', -36000),
- (u'UTC-09:00', -32400),
- (u'UTC-08:00', -28800),
- (u'UTC-07:00', -25200),
- (u'UTC-06:00', -21600),
- (u'UTC-05:00', -18000),
- (u'UTC-04:30', -16200),
- (u'UTC-04:00', -14400),
- (u'UTC-03:30', -12600),
- (u'UTC-03:00', -10800),
- (u'UTC-02:00', -7200),
- (u'UTC-01:00', -3600),
- (u'UTC-00:00', 0),
- (u'UTC+00:00', 0),
- (u'UTC+01:00', 3600),
- (u'UTC+02:00', 7200),
- (u'UTC+03:00', 10800),
- (u'UTC+03:30', 12600),
- (u'UTC+04:00', 14400),
- (u'UTC+04:30', 16200),
- (u'UTC+05:00', 18000),
- (u'UTC+05:30', 19800),
- (u'UTC+05:45', 20700),
- (u'UTC+06:00', 21600),
- (u'UTC+06:30', 23400),
- (u'UTC+07:00', 25200),
- (u'UTC+08:00', 28800),
- (u'UTC+08:30', 30600),
- (u'UTC+09:00', 32400),
- (u'UTC+09:30', 34200),
- (u'UTC+10:00', 36000),
- (u'UTC+11:00', 39600),
- (u'UTC+12:00', 43200),
- (u'UTC+13:00', 46800),
- (u'UTC+14:00', 50400),
+ ('UTC-14:00', -50400),
+ ('UTC-13:00', -46800),
+ ('UTC-12:00', -43200),
+ ('UTC-11:00', -39600),
+ ('UTC-10:00', -36000),
+ ('UTC-09:00', -32400),
+ ('UTC-08:00', -28800),
+ ('UTC-07:00', -25200),
+ ('UTC-06:00', -21600),
+ ('UTC-05:00', -18000),
+ ('UTC-04:30', -16200),
+ ('UTC-04:00', -14400),
+ ('UTC-03:30', -12600),
+ ('UTC-03:00', -10800),
+ ('UTC-02:00', -7200),
+ ('UTC-01:00', -3600),
+ ('UTC', 0), # Goes first (among zero-offset) to be default
+ ('UTC+00:00', 0),
+ ('UTC-00:00', 0), # Should recognize, but avoid using (see Note below).
+ ('UTC+01:00', 3600),
+ ('UTC+02:00', 7200),
+ ('UTC+03:00', 10800),
+ ('UTC+03:30', 12600),
+ ('UTC+04:00', 14400),
+ ('UTC+04:30', 16200),
+ ('UTC+05:00', 18000),
+ ('UTC+05:30', 19800),
+ ('UTC+05:45', 20700),
+ ('UTC+06:00', 21600),
+ ('UTC+06:30', 23400),
+ ('UTC+07:00', 25200),
+ ('UTC+08:00', 28800),
+ ('UTC+08:30', 30600),
+ ('UTC+09:00', 32400),
+ ('UTC+09:30', 34200),
+ ('UTC+10:00', 36000),
+ ('UTC+11:00', 39600),
+ ('UTC+12:00', 43200),
+ ('UTC+13:00', 46800),
+ ('UTC+14:00', 50400),
)
### End of data that may need updates in response to CLDR ###
+# Note: -00:00 (without the UTC prefix) was introduced in RFC3339 as a
+# way to indicate that a date-time has been converted to UTC but its
+# use should not be understood to say anything about the local time of
+# the origin of the message using it. However, ISO 8601 has, since
+# 2000, forbidden this as an offset suffix. The more recent compromise
+# is to use Z to convey the meaning RFC3339 gave to -00:00. So the use
+# of -00:00 as offset suffix should be avoided (and, by extension,
+# likewise for UTC-00:00 as a zone ID), but this suffix (and ID)
+# should be recognized when consuming data generated by other sources,
+# for backwards compatibility.
+
class ByteArrayData:
def __init__(self):
self.data = []
@@ -255,17 +246,19 @@ class ByteArrayData:
lst = unicode2hex(s)
index = len(self.data)
if index > 0xffff:
- raise Error('Index ({}) outside the uint16 range !'.format(index))
+ raise Error(f'Index ({index}) outside the uint16 range !')
self.hash[s] = index
self.data += lst
return index
def write(self, out, name):
- out('\nstatic const char {}[] = {{\n'.format(name))
- out(wrap_list(self.data))
+ out(f'\nstatic constexpr char {name}[] = {{\n')
+ out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ')
+ # Will over-spill 100-col if some 4-digit hex show up, but none do (yet).
out('\n};\n')
class ZoneIdWriter (SourceFileEditor):
+ # All the output goes into namespace QtTimeZoneCldr.
def write(self, version, defaults, windowsIds):
self.__writeWarning(version)
windows, iana = self.__writeTables(self.writer.write, defaults, windowsIds)
@@ -273,10 +266,10 @@ class ZoneIdWriter (SourceFileEditor):
iana.write(self.writer.write, 'ianaIdData')
def __writeWarning(self, version):
- self.writer.write("""
+ self.writer.write(f"""
/*
- This part of the file was generated on {} from the
- Common Locale Data Repository v{} file supplemental/windowsZones.xml
+ This part of the file was generated on {datetime.date.today()} from the
+ Common Locale Data Repository v{version} file supplemental/windowsZones.xml
http://www.unicode.org/cldr/
@@ -284,111 +277,110 @@ class ZoneIdWriter (SourceFileEditor):
edited) CLDR data; see qtbase/util/locale_database/.
*/
-""".format(str(datetime.date.today()), version))
+""")
@staticmethod
def __writeTables(out, defaults, windowsIds):
windowsIdData, ianaIdData = ByteArrayData(), ByteArrayData()
# Write Windows/IANA table
- out('// Windows ID Key, Country Enum, IANA ID Index\n')
- out('static const QZoneData zoneDataTable[] = {\n')
+ out('// Windows ID Key, Territory Enum, IANA ID Index\n')
+ out('static constexpr ZoneData zoneDataTable[] = {\n')
+ # Sorted by (Windows ID Key, territory enum)
for index, data in sorted(windowsIds.items()):
out(' {{ {:6d},{:6d},{:6d} }}, // {} / {}\n'.format(
- data['windowsKey'], data['countryId'],
+ data['windowsKey'], data['territoryId'],
ianaIdData.append(data['ianaList']),
- data['windowsId'], data['country']))
- out(' { 0, 0, 0 } // Trailing zeroes\n')
+ data['windowsId'], data['territory']))
out('};\n\n')
# Write Windows ID key table
out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n')
- out('static const QWindowsData windowsDataTable[] = {\n')
+ out('static constexpr WindowsData windowsDataTable[] = {\n')
+ # Sorted by Windows ID key; sorting case-insensitively by
+ # Windows ID must give the same order.
+ winIdNames = [x.lower() for x, y in windowsIdList]
+ assert all(x == y for x, y in zip(winIdNames, sorted(winIdNames))), \
+ [(x, y) for x, y in zip(winIdNames, sorted(winIdNames)) if x != y]
for index, pair in enumerate(windowsIdList, 1):
out(' {{ {:6d},{:6d},{:6d},{:6d} }}, // {}\n'.format(
index,
windowsIdData.append(pair[0]),
ianaIdData.append(defaults[index]),
pair[1], pair[0]))
- out(' { 0, 0, 0, 0 } // Trailing zeroes\n')
out('};\n\n')
+ offsetMap = {}
+ for pair in utcIdList:
+ offsetMap[pair[1]] = offsetMap.get(pair[1], ()) + (pair[0],)
# Write UTC ID key table
out('// IANA ID Index, UTC Offset\n')
- out('static const QUtcData utcDataTable[] = {\n')
- for pair in utcIdList:
+ out('static constexpr UtcData utcDataTable[] = {\n')
+ for offset in sorted(offsetMap.keys()): # Sort so C++ can binary-chop.
+ names = offsetMap[offset];
out(' {{ {:6d},{:6d} }}, // {}\n'.format(
- ianaIdData.append(pair[0]), pair[1], pair[0]))
- out(' { 0, 0 } // Trailing zeroes\n')
+ ianaIdData.append(' '.join(names)), offset, names[0]))
out('};\n')
return windowsIdData, ianaIdData
-def usage(err, name, message=''):
- err.write("""Usage: {} path/to/cldr/root path/to/qtbase
-""".format(name)) # TODO: more interesting message
- if message:
- err.write('\n' + message + '\n')
-def main(args, out, err):
+def main(out, err):
"""Parses CLDR's data and updates Qt's representation of it.
- Takes sys.argv, sys.stdout, sys.stderr (or equivalents) as
+ Takes sys.stdout, sys.stderr (or equivalents) as
arguments. Expects two command-line options: the root of the
unpacked CLDR data-file tree and the root of the qtbase module's
checkout. Updates QTimeZone's private data about Windows time-zone
IDs."""
- name = args.pop(0)
- if len(args) != 2:
- usage(err, name, "Expected two arguments")
- return 1
+ parser = argparse.ArgumentParser(
+ description="Update Qt's CLDR-derived timezone data.")
+ parser.add_argument('cldr_path', help='path to the root of the CLDR tree')
+ parser.add_argument('qtbase_path',
+ help='path to the root of the qtbase source tree',
+ nargs='?', default=qtbase_root)
- cldrPath = args.pop(0)
- qtPath = args.pop(0)
+ args = parser.parse_args()
- if not os.path.isdir(qtPath):
- usage(err, name, "No such Qt directory: " + qtPath)
- return 1
- if not os.path.isdir(cldrPath):
- usage(err, name, "No such CLDR directory: " + cldrPath)
- return 1
+ cldrPath = Path(args.cldr_path)
+ qtPath = Path(args.qtbase_path)
- dataFilePath = os.path.join(qtPath, 'src', 'corelib', 'time', 'qtimezoneprivate_data_p.h')
- if not os.path.isfile(dataFilePath):
- usage(err, name, 'No such file: ' + dataFilePath)
- return 1
+ if not qtPath.is_dir():
+ parser.error(f"No such Qt directory: {qtPath}")
+
+ if not cldrPath.is_dir():
+ parser.error(f"No such CLDR directory: {cldrPath}")
+
+ dataFilePath = qtPath.joinpath('src/corelib/time/qtimezoneprivate_data_p.h')
+
+ if not dataFilePath.is_file():
+ parser.error(f'No such file: {dataFilePath}')
try:
version, defaults, winIds = CldrAccess(cldrPath).readWindowsTimeZones(
dict((name, ind) for ind, name in enumerate((x[0] for x in windowsIdList), 1)))
except IOError as e:
- usage(err, name,
- 'Failed to open common/supplemental/windowsZones.xml: ' + (e.message or e.args[1]))
+ parser.error(
+ f'Failed to open common/supplemental/windowsZones.xml: {e}')
return 1
except Error as e:
err.write('\n'.join(textwrap.wrap(
- 'Failed to read windowsZones.xml: ' + (e.message or e.args[1]),
+ f'Failed to read windowsZones.xml: {e}',
subsequent_indent=' ', width=80)) + '\n')
return 1
out.write('Input file parsed, now writing data\n')
- try:
- writer = ZoneIdWriter(dataFilePath, qtPath)
- except IOError as e:
- err.write('Failed to open files to transcribe: {}'.format(e.message or e.args[1]))
- return 1
try:
- writer.write(version, defaults, winIds)
- except Error as e:
- writer.cleanup()
- err.write('\nError in Windows ID data: ' + e.message + '\n')
+ with ZoneIdWriter(dataFilePath, qtPath) as writer:
+ writer.write(version, defaults, winIds)
+ except Exception as e:
+ err.write(f'\nError while updating timezone data: {e}\n')
return 1
- writer.close()
- out.write('Data generation completed, please check the new file at ' + dataFilePath + '\n')
+ out.write(f'Data generation completed, please check the new file at {dataFilePath}\n')
return 0
if __name__ == '__main__':
import sys
- sys.exit(main(sys.argv, sys.stdout, sys.stderr))
+ sys.exit(main(sys.stdout, sys.stderr))
diff --git a/util/locale_database/dateconverter.py b/util/locale_database/dateconverter.py
index 1990fe0c61..8ca15405f7 100755..100644
--- a/util/locale_database/dateconverter.py
+++ b/util/locale_database/dateconverter.py
@@ -1,107 +1,195 @@
-#!/usr/bin/env python
-#############################################################################
-##
-## Copyright (C) 2016 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
-
-import re
-
-def _convert_pattern(pattern):
- # patterns from http://www.unicode.org/reports/tr35/#Date_Format_Patterns
- qt_regexps = {
- r"yyy{3,}" : "yyyy", # more that three digits hence convert to four-digit year
- r"L" : "M", # stand-alone month names. not supported.
- r"g{1,}": "", # modified julian day. not supported.
- r"S{1,}" : "", # fractional seconds. not supported.
- r"A{1,}" : "" # milliseconds in day. not supported.
- }
- qt_patterns = {
- "G" : "", "GG" : "", "GGG" : "", "GGGG" : "", "GGGGG" : "", # Era. not supported.
- "y" : "yyyy", # four-digit year without leading zeroes
- "Q" : "", "QQ" : "", "QQQ" : "", "QQQQ" : "", # quarter. not supported.
- "q" : "", "qq" : "", "qqq" : "", "qqqq" : "", # quarter. not supported.
- "MMMMM" : "MMM", # narrow month name.
- "LLLLL" : "MMM", # stand-alone narrow month name.
- "l" : "", # special symbol for chinese leap month. not supported.
- "w" : "", "W" : "", # week of year/month. not supported.
- "D" : "", "DD" : "", "DDD" : "", # day of year. not supported.
- "F" : "", # day of week in month. not supported.
- "E" : "ddd", "EE" : "ddd", "EEE" : "ddd", "EEEEE" : "ddd", "EEEE" : "dddd", # day of week
- "e" : "ddd", "ee" : "ddd", "eee" : "ddd", "eeeee" : "ddd", "eeee" : "dddd", # local day of week
- "c" : "ddd", "cc" : "ddd", "ccc" : "ddd", "ccccc" : "ddd", "cccc" : "dddd", # stand-alone local day of week
- "a" : "AP", # AM/PM
- "K" : "h", # Hour 0-11
- "k" : "H", # Hour 1-24
- "j" : "", # special reserved symbol.
- "z" : "t", "zz" : "t", "zzz" : "t", "zzzz" : "t", # timezone
- "Z" : "t", "ZZ" : "t", "ZZZ" : "t", "ZZZZ" : "t", # timezone
- "v" : "t", "vv" : "t", "vvv" : "t", "vvvv" : "t", # timezone
- "V" : "t", "VV" : "t", "VVV" : "t", "VVVV" : "t" # timezone
- }
- if qt_patterns.has_key(pattern):
- return qt_patterns[pattern]
- for r,v in qt_regexps.items():
- pattern = re.sub(r, v, pattern)
- return pattern
-
-def convert_date(input):
- result = ""
- patterns = "GyYuQqMLlwWdDFgEecahHKkjmsSAzZvV"
- last = ""
- inquote = 0
- chars_to_strip = " -"
- for c in input:
- if c == "'":
- inquote = inquote + 1
- if inquote % 2 == 0:
- if c in patterns:
- if not last:
- last = c
- else:
- if c in last:
- last += c
- else:
- # pattern changed
- converted = _convert_pattern(last)
- result += converted
- if not converted:
- result = result.rstrip(chars_to_strip)
- last = c
- continue
- if last:
- # pattern ended
- converted = _convert_pattern(last)
- result += converted
- if not converted:
- result = result.rstrip(chars_to_strip)
- last = ""
- result += c
- if last:
- converted = _convert_pattern(last)
- result += converted
- if not converted:
- result = result.rstrip(chars_to_strip)
- return result.lstrip(chars_to_strip)
+# Copyright (C) 2016 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+
+class Converter (object):
+ """Conversion between CLDR and Qt datetime formats.
+
+ Keep in sync with qlocale_mac.mm's macToQtFormat().
+ The definitive source of truth is:
+ https://www.unicode.org/reports/tr35/tr35-68/tr35-dates.html#Date_Field_Symbol_Table
+
+ See convert() for explanation of the approach taken. Each method
+ with a single-letter name is used to scan a prefix of a text,
+ presumed to begin with that letter (or one Qt treats as equivalent
+ to it) and returns a pair (Qt format, length), to use the given Qt
+ format in place of text[:length]. In all cases, length must be
+ positive."""
+
+ @staticmethod
+ def __is_reserved(ch):
+ """Every ASCII letter is a reserved symbol in CLDR datetime formats"""
+ assert len(ch) == 1, ch
+ return ch.isascii() and ch.isalpha();
+ @staticmethod
+ def __count_first(text):
+ """How many of text[0] appear at the start of text ?"""
+ assert text
+ return len(text) - len(text.lstrip(text[0]))
+ @classmethod
+ def __verbatim(cls, text):
+ # Used where our format coincides with LDML's, including on length.
+ n = cls.__count_first(text)
+ return text[:n], n
+ @classmethod
+ def __treat_as(cls, mimic, text):
+ # Helper for aliases
+ n = cls.__count_first(text)
+ return mimic * n, n
+
+ # Please follow alphabetic order, with two cases of the same
+ # letter adjacent, lower before upper.
+ @classmethod
+ def a(cls, text): # AM/PM indicator; use locale-appropriate case
+ return 'Ap', cls.__count_first(text)
+
+ # A: Milliseconds in day. Not supported.
+ b = a # AM/PM/noon/midnight
+ B = a # "Flexible day period" (e.g. "at night" / "in the day")
+ # (Only zh_Hant_TW affected; zh_Hant_{HK,MO} use 'ah', mapped to
+ # 'APh', so do the same here.)
+
+ @classmethod
+ def c(cls, text): # Stand-alone local day of week
+ # Has length-variants for several cases Qt doesn't support, as
+ # do 'e' and 'E': just map all simply to weekday, abbreviated
+ # or full.
+ n = cls.__count_first(text)
+ return ('dddd' if n == 4 else 'ddd'), n
+
+ # C: Input skeleton symbol
+ d = __verbatim # day (of month or of week, depends on length)
+ # D: Day of year. Not supported.
+ e = c # Local day of week
+ E = c # Just plain day of week
+ # F: Day of week in month. Not supported.
+ # g: Modified julian day. Not supported.
+ # G: Era. Not supported.
+ h = __verbatim # Hour 1-12, treat as 0-11
+ H = __verbatim # Hour 0-23
+ # j: Input skeleton symbol
+ # J: Input skeleton symbol
+
+ @classmethod
+ def k(cls, text): # Hour 1-24, treat as 0-23
+ return cls.__treat_as('H', text)
+ @classmethod
+ def K(cls, text): # Hour 0-11
+ return cls.__treat_as('h', text)
+
+ # l: Deprecated Chinese leap month indicator.
+ @classmethod
+ def L(cls, text): # Stand-alone month names: treat as plain month names.
+ n = cls.__count_first(text)
+ # Length five is narrow; treat same as abbreviated; anything
+ # shorter matches Qt's month forms.
+ return ('MMM' if n > 4 else 'M' * n), n
+
+ m = __verbatim # Minute within the hour.
+ M = L # Plain month names, possibly abbreviated, and numbers.
+
+ @classmethod
+ def O(cls, text): # Localized GMT±offset formats. Map to Z-or-UTC±HH:mm
+ return 't', cls.__count_first(text)
+
+ # q: Quarter. Not supported.
+ # Q: Quarter. Not supported.
+
+ s = __verbatim # Seconds within the minute.
+ @classmethod
+ def S(cls, text): # Fractional seconds. Only milliseconds supported.
+ # FIXME: spec is unclear, do we need to include the leading
+ # dot or not ? For now, no known locale actually exercises
+ # this, so stick with what we've done on Darwin since long
+ # before adding support here.
+ n = cls.__count_first(text)
+ return ('z' if n < 3 else 'zzz'), n
+
+ @classmethod
+ def u(cls, text): # Extended year (numeric)
+ # Officially, 'u' is simply the full year number, zero-padded
+ # to the length of the field. Qt's closest to that is four-digit.
+ # It explicitly has no special case for two-digit year.
+ return 'yyyy', cls.__count_first(text)
+
+ # U: Cyclic Year Name. Not supported
+ @classmethod
+ def v(cls, text): # Generic non-location format. Map to name.
+ return 'tttt', cls.__count_first(text)
+
+ V = v # Zone ID in various forms; VV is IANA ID. Map to name.
+ # w: Week of year. Not supported.
+ # W: Week of month. Not supported.
+
+ @classmethod
+ def x(cls, text): # Variations on offset format.
+ n = cls.__count_first(text)
+ # Ignore: n == 1 may omit minutes, n > 3 may include seconds.
+ return ('ttt' if n > 1 and n & 1 else 'tt'), n
+ X = x # Should use Z for zero offset.
+
+ @classmethod
+ def y(cls, text): # Year number.
+ n = cls.__count_first(text)
+ return ('yy' if n == 2 else 'yyyy'), n
+ # Y: Year for Week-of-year calendars
+
+ z = v # Specific (i.e. distinguish standard from DST) non-location format.
+ @classmethod
+ def Z(cls, text): # Offset format, optionaly with GMT (Qt uses UTC) prefix.
+ n = cls.__count_first(text)
+ return ('tt' if n < 4 else 'ttt' if n > 4 else 't'), n
+
+ @staticmethod
+ def scanQuote(text): # Can't have ' as a method name, so handle specially
+ assert text.startswith("'")
+ i = text.find("'", 1) # Find the next; -1 if not present.
+ i = len(text) if i < 0 else i + 1 # Include the close-quote.
+ return text[:i], i
+
+ # Now put all of those to use:
+ @classmethod
+ def convert(cls, text):
+ """Convert a CLDR datetime format string into a Qt one.
+
+ Presumes that the caller will ''.join() the fragments it
+ yields. Each sequence of CLDR field symbols that corresponds
+ to a Qt format token is converted to it; all other CLDR field
+ symbols are discarded; the literals in between fields are
+ preserved verbatim, except that space and hyphen separators
+ immediately before a discarded field are discarded with it.
+
+ The approach is to look at the first symbol of the remainder
+ of the text, at each iteration, and use that first symbol to
+ select a function that will identify how much of the text to
+ consume and what to replace it with."""
+ sep = ''
+ while text:
+ ch = text[0]
+ if ch == "'":
+ quoted, length = cls.scanQuote(text)
+ text = text[length:]
+ sep += quoted
+ elif hasattr(cls, ch):
+ qtform, length = getattr(cls, ch)(text)
+ assert qtform and length > 0, (ch, text, qtform, length)
+ text = text[length:]
+ if sep:
+ yield sep
+ sep = ''
+ yield qtform
+ elif cls.__is_reserved(ch):
+ text = text[cls.__count_first(text):]
+ # Discard space or dash separator that was only there
+ # for the sake of the unsupported field:
+ sep = sep.rstrip(' -')
+ # TODO: should we also strip [ -]* from text
+ # immediately following unsupported forms ?
+ else:
+ sep += ch
+ text = text[1:]
+ if sep:
+ yield sep
+
+def convert_date(text):
+ # See Converter.convert()
+ return ''.join(Converter.convert(text))
diff --git a/util/locale_database/enumdata.py b/util/locale_database/enumdata.py
index 4181e290ac..66b8840cb1 100644
--- a/util/locale_database/enumdata.py
+++ b/util/locale_database/enumdata.py
@@ -1,58 +1,62 @@
-# -*- coding: utf-8; -*-
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
-# A run of cldr2qlocalexml.py will produce output reporting any
-# language, script and country codes it sees, in data, for which it
-# can find a name (taken always from en.xml) that could potentially be
-# used. There is no point adding a mapping for such a code unless the
-# CLDR's common/main/ contains an XML file for at least one locale
-# that exerciss it.
+"""Assorted enumerations implicated in public API.
-# Each *_list reflects the current values of its enums in qlocale.h;
-# if new xml language files are available in CLDR, these languages and
-# countries need to be *appended* to this list (for compatibility
-# between versions). Include any spaces present in names (scripts
-# shall squish them out for the enum entries) in *_list, but use the
-# squished forms of names in the *_aliases mappings.
+The numberings of these enumerations can only change at major
+versions. When new CLDR data implies adding entries, the new ones must
+go after all existing ones. See also zonedata.py for enumerations
+related to timezones and CLDR, which can more freely be changed
+between versions.
-# For a new major version (and only then), we can change the
-# numbering, so re-sort each list into alphabetic order (e.g. using
-# sort -k2); but keep the Any and C entries first. That's why those
-# are offset with a blank line, below. After doing that, regenerate
-# locale data as usual; this will cause a binary-incompatible change.
+A run of cldr2qlocalexml.py will produce output reporting any
+language, script and territory codes it sees, in data, for which it
+can find a name (taken always from en.xml) that could potentially be
+used. There is no point adding a mapping for such a code unless the
+CLDR's common/main/ contains an XML file for at least one locale that
+exercises it (and little point, even then, absent substantial data,
+ignoring draft='unconfirmed' entries).
-# Note on "macrolanguage" comments: see "ISO 639 macrolanguage" on
-# Wikipedia. A "macrolanguage" is (loosely-speaking) a group of
-# languages so closely related to one another that they could also be
-# regarded as divergent dialects of the macrolanguage.
+Each *_map reflects the current values of its enums in qlocale.h; if
+new xml language files are available in CLDR, these languages and
+territories need to be *appended* to this list (for compatibility
+between versions). Include any spaces and dashes present in names
+(they'll be squished out for the enum entries) in *_map, but use the
+squished forms of names in the *_aliases mappings. The squishing also
+turns the first letter of each word into a capital so you can safely
+preserve the case of en.xml's name; but omit (or replace with space)
+any punctuation aside from dashes and map any accented letters to
+their un-accented plain ASCII. The two tables, for each enum, have
+the forms:
+* map { Numeric value: ("Proper name", "ISO code") }
+* alias { "OldName": "CurrentName" }
-language_list = {
+TODO: add support for marking entries as deprecated from a specified
+version. For aliases that merely deprecates the name. Where we have a
+name for which CLDR offers no data, we may also want to deprecate
+entries in the map - although they may be worth keeping for the
+benefit of QLocaleSelector (see QTBUG-112765), if other
+locale-specific resources might have use of them.
+
+For a new major version (and only then), we can change the numbering,
+so re-sort each list into alphabetic order (e.g. using sort -k2); but
+keep the Any and C entries first. That's why those are offset with a
+blank line, below. After doing that, regenerate locale data as usual;
+this will cause a binary-incompatible change.
+
+Note on 'macrolanguage' comments: see QTBUG-107781 and 'ISO 639
+macrolanguage' on Wikipedia. A 'macrolanguage' is (loosely-speaking) a
+group of languages so closely related to one another that they could
+also be regarded as divergent dialects of the macrolanguage. In some
+cases this may mean a resource (such as translation or text-to-speech
+data) may describe itself as pertaining to the macrolanguage, implying
+its suitability for use in any of the languages within the
+macrolanguage. For example, no_NO might be used for a generic
+Norwegian resource, embracing both nb_NO and nn_NO.
+
+"""
+
+language_map = {
0: ("AnyLanguage", " "),
1: ("C", " "),
@@ -177,7 +181,7 @@ language_list = {
120: ("Japanese", "ja"),
121: ("Javanese", "jv"),
122: ("Jju", "kaj"),
- 123: ("Jola Fonyi", "dyo"),
+ 123: ("Jola-Fonyi", "dyo"),
124: ("Kabuverdianu", "kea"),
125: ("Kabyle", "kab"),
126: ("Kako", "kkj"),
@@ -218,7 +222,7 @@ language_list = {
161: ("Lojban", "jbo"),
162: ("Lower Sorbian", "dsb"),
163: ("Low German", "nds"),
- 164: ("Luba Katanga", "lu"),
+ 164: ("Luba-Katanga", "lu"),
165: ("Lule Sami", "smj"),
166: ("Luo", "luo"),
167: ("Luxembourgish", "lb"),
@@ -226,7 +230,7 @@ language_list = {
169: ("Macedonian", "mk"),
170: ("Machame", "jmc"),
171: ("Maithili", "mai"),
- 172: ("Makhuwa Meetto", "mgh"),
+ 172: ("Makhuwa-Meetto", "mgh"),
173: ("Makonde", "kde"),
174: ("Malagasy", "mg"), # macrolanguage
175: ("Malayalam", "ml"),
@@ -382,7 +386,31 @@ language_list = {
325: ("Zarma", "dje"),
326: ("Zhuang", "za"), # macrolanguage
327: ("Zulu", "zu"),
+ # added in CLDR v40
+ 328: ("Kaingang", "kgp"),
+ 329: ("Nheengatu", "yrl"),
+ # added in CLDR v42
+ 330: ("Haryanvi", "bgc"),
+ 331: ("Northern Frisian", "frr"),
+ 332: ("Rajasthani", "raj"),
+ 333: ("Moksha", "mdf"),
+ 334: ("Toki Pona", "tok"),
+ 335: ("Pijin", "pis"),
+ 336: ("Obolo", "ann"),
+ # added in CLDR v43
+ 337: ("Baluchi", "bal"),
+ 338: ("Ligurian", "lij"),
+ 339: ("Rohingya", "rhg"),
+ 340: ("Torwali", "trw"),
+ # added in CLDR v44
+ 341: ("Anii", "blo"),
+ 342: ("Kangri", "xnr"),
+ 343: ("Venetian", "vec"),
}
+# Don't add languages just because they exist; check CLDR does provide
+# substantial data for locales using it; and check, once added, they
+# don't show up in cldr2qlocalexmo.py's unused listing. Do also check
+# the data's draft status; if it's (nearly) all unconfirmed, leave it.
language_aliases = {
# Renamings prior to Qt 6.0 (CLDR v37):
@@ -406,9 +434,9 @@ language_aliases = {
'Navaho': 'Navajo',
'Oriya': 'Odia',
'Kirghiz': 'Kyrgyz'
- }
+}
-country_list = {
+territory_map = {
0: ("AnyTerritory", "ZZ"),
1: ("Afghanistan", "AF"),
@@ -420,7 +448,7 @@ country_list = {
7: ("Angola", "AO"),
8: ("Anguilla", "AI"),
9: ("Antarctica", "AQ"),
- 10: ("Antigua And Barbuda", "AG"),
+ 10: ("Antigua and Barbuda", "AG"),
11: ("Argentina", "AR"),
12: ("Armenia", "AM"),
13: ("Aruba", "AW"),
@@ -439,7 +467,7 @@ country_list = {
26: ("Bermuda", "BM"),
27: ("Bhutan", "BT"),
28: ("Bolivia", "BO"),
- 29: ("Bosnia And Herzegovina", "BA"),
+ 29: ("Bosnia and Herzegovina", "BA"),
30: ("Botswana", "BW"),
31: ("Bouvet Island", "BV"),
32: ("Brazil", "BR"),
@@ -457,7 +485,7 @@ country_list = {
44: ("Caribbean Netherlands", "BQ"),
45: ("Cayman Islands", "KY"),
46: ("Central African Republic", "CF"),
- 47: ("Ceuta And Melilla", "EA"),
+ 47: ("Ceuta and Melilla", "EA"),
48: ("Chad", "TD"),
49: ("Chile", "CL"),
50: ("China", "CN"),
@@ -466,8 +494,8 @@ country_list = {
53: ("Cocos Islands", "CC"),
54: ("Colombia", "CO"),
55: ("Comoros", "KM"),
- 56: ("Congo Brazzaville", "CG"),
- 57: ("Congo Kinshasa", "CD"),
+ 56: ("Congo - Brazzaville", "CG"),
+ 57: ("Congo - Kinshasa", "CD"),
58: ("Cook Islands", "CK"),
59: ("Costa Rica", "CR"),
60: ("Croatia", "HR"),
@@ -511,11 +539,11 @@ country_list = {
98: ("Guam", "GU"),
99: ("Guatemala", "GT"),
100: ("Guernsey", "GG"),
- 101: ("Guinea Bissau", "GW"),
+ 101: ("Guinea-Bissau", "GW"),
102: ("Guinea", "GN"),
103: ("Guyana", "GY"),
104: ("Haiti", "HT"),
- 105: ("Heard And McDonald Islands", "HM"),
+ 105: ("Heard and McDonald Islands", "HM"),
106: ("Honduras", "HN"),
107: ("Hong Kong", "HK"),
108: ("Hungary", "HU"),
@@ -525,12 +553,12 @@ country_list = {
112: ("Iran", "IR"),
113: ("Iraq", "IQ"),
114: ("Ireland", "IE"),
- 115: ("Isle Of Man", "IM"),
+ 115: ("Isle of Man", "IM"),
116: ("Israel", "IL"),
117: ("Italy", "IT"),
- # Officially Côte d’Ivoire, which we'd ned to map to CotedIvoire
- # or CoteDIvoire, either failing to make the d' separate from
- # Cote or messing with its case. So stick with Ivory Coast:
+ # Officially Côte d’Ivoire, which we'd need to map to CotedIvoire
+ # or CoteDIvoire, either failing to make the d' separate from Cote
+ # or messing with its case. So stick with Ivory Coast:
118: ("Ivory Coast", "CI"),
119: ("Jamaica", "JM"),
120: ("Japan", "JP"),
@@ -610,14 +638,14 @@ country_list = {
194: ("Rwanda", "RW"),
195: ("Saint Barthelemy", "BL"),
196: ("Saint Helena", "SH"),
- 197: ("Saint Kitts And Nevis", "KN"),
+ 197: ("Saint Kitts and Nevis", "KN"),
198: ("Saint Lucia", "LC"),
199: ("Saint Martin", "MF"),
- 200: ("Saint Pierre And Miquelon", "PM"),
- 201: ("Saint Vincent And Grenadines", "VC"),
+ 200: ("Saint Pierre and Miquelon", "PM"),
+ 201: ("Saint Vincent and Grenadines", "VC"),
202: ("Samoa", "WS"),
203: ("San Marino", "SM"),
- 204: ("Sao Tome And Principe", "ST"),
+ 204: ("Sao Tome and Principe", "ST"),
205: ("Saudi Arabia", "SA"),
206: ("Senegal", "SN"),
207: ("Serbia", "RS"),
@@ -630,14 +658,14 @@ country_list = {
214: ("Solomon Islands", "SB"),
215: ("Somalia", "SO"),
216: ("South Africa", "ZA"),
- 217: ("South Georgia And South Sandwich Islands", "GS"),
+ 217: ("South Georgia and South Sandwich Islands", "GS"),
218: ("South Korea", "KR"),
219: ("South Sudan", "SS"),
220: ("Spain", "ES"),
221: ("Sri Lanka", "LK"),
222: ("Sudan", "SD"),
223: ("Suriname", "SR"),
- 224: ("Svalbard And Jan Mayen", "SJ"),
+ 224: ("Svalbard and Jan Mayen", "SJ"),
225: ("Sweden", "SE"),
226: ("Switzerland", "CH"),
227: ("Syria", "SY"),
@@ -649,12 +677,12 @@ country_list = {
233: ("Togo", "TG"),
234: ("Tokelau", "TK"),
235: ("Tonga", "TO"),
- 236: ("Trinidad And Tobago", "TT"),
- 237: ("Tristan Da Cunha", "TA"),
+ 236: ("Trinidad and Tobago", "TT"),
+ 237: ("Tristan da Cunha", "TA"),
238: ("Tunisia", "TN"),
239: ("Turkey", "TR"),
240: ("Turkmenistan", "TM"),
- 241: ("Turks And Caicos Islands", "TC"),
+ 241: ("Turks and Caicos Islands", "TC"),
242: ("Tuvalu", "TV"),
243: ("Uganda", "UG"),
244: ("Ukraine", "UA"),
@@ -669,15 +697,15 @@ country_list = {
253: ("Vatican City", "VA"),
254: ("Venezuela", "VE"),
255: ("Vietnam", "VN"),
- 256: ("Wallis And Futuna", "WF"),
+ 256: ("Wallis and Futuna", "WF"),
257: ("Western Sahara", "EH"),
- 258: ("World", "001"),
+ 258: ("world", "001"),
259: ("Yemen", "YE"),
260: ("Zambia", "ZM"),
261: ("Zimbabwe", "ZW"),
}
-country_aliases = {
+territory_aliases = {
# Renamings prior to Qt 6.0 (CLDR v37):
'DemocraticRepublicOfCongo': 'CongoKinshasa',
'PeoplesRepublicOfCongo': 'CongoBrazzaville',
@@ -708,7 +736,7 @@ country_aliases = {
'TuvaluCountry': 'TuvaluTerritory',
}
-script_list = {
+script_map = {
0: ("AnyScript", "Zzzz"),
1: ("Adlam", "Adlm"),
@@ -741,7 +769,7 @@ script_list = {
28: ("Deseret", "Dsrt"),
29: ("Devanagari", "Deva"),
30: ("Duployan", "Dupl"),
- 31: ("Egyptian Hieroglyphs", "Egyp"),
+ 31: ("Egyptian hieroglyphs", "Egyp"),
32: ("Elbasan", "Elba"),
33: ("Ethiopic", "Ethi"),
34: ("Fraser", "Lisu"),
@@ -816,7 +844,7 @@ script_list = {
103: ("Pahawh Hmong", "Hmng"),
104: ("Palmyrene", "Palm"),
105: ("Pau Cin Hau", "Pauc"),
- 106: ("Phags Pa", "Phag"),
+ 106: ("Phags-pa", "Phag"),
107: ("Phoenician", "Phnx"),
108: ("Pollard Phonetic", "Plrd"),
109: ("Psalter Pahlavi", "Phlp"),
@@ -827,7 +855,7 @@ script_list = {
114: ("Sharada", "Shrd"),
115: ("Shavian", "Shaw"),
116: ("Siddham", "Sidd"),
- 117: ("Sign Writing", "Sgnw"),
+ 117: ("SignWriting", "Sgnw"), # Oddly, en.xml leaves no space in it.
118: ("Simplified Han", "Hans"),
119: ("Sinhala", "Sinh"),
120: ("Sora Sompeng", "Sora"),
@@ -852,6 +880,8 @@ script_list = {
139: ("Vai", "Vaii"),
140: ("Varang Kshiti", "Wara"),
141: ("Yi", "Yiii"),
+ # Added at CLDR v43
+ 142: ("Hanifi", "Rohg"), # Used for Rohingya
}
script_aliases = {
@@ -863,27 +893,3 @@ script_aliases = {
'MendeKikakuiScript': 'MendeScript',
'BengaliScript': 'BanglaScript',
}
-
-def countryCodeToId(code):
- if not code:
- return 0
- for country_id in country_list:
- if country_list[country_id][1] == code:
- return country_id
- return -1
-
-def languageCodeToId(code):
- if not code:
- return 0
- for language_id in language_list:
- if language_list[language_id][1] == code:
- return language_id
- return -1
-
-def scriptCodeToId(code):
- if not code:
- return 0
- for script_id in script_list:
- if script_list[script_id][1] == code:
- return script_id
- return -1
diff --git a/util/locale_database/iso639_3.py b/util/locale_database/iso639_3.py
new file mode 100644
index 0000000000..0d23065cf9
--- /dev/null
+++ b/util/locale_database/iso639_3.py
@@ -0,0 +1,80 @@
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+
+from dataclasses import dataclass
+from typing import Dict, Optional
+
+
+@dataclass
+class LanguageCodeEntry:
+ part3Code: str
+ part2BCode: Optional[str]
+ part2TCode: Optional[str]
+ part1Code: Optional[str]
+
+ def id(self) -> str:
+ if self.part1Code:
+ return self.part1Code
+ if self.part2BCode:
+ return self.part2BCode
+ return self.part3Code
+
+ def __repr__(self) -> str:
+ parts = [f'{self.__class__.__name__}({self.id()!r}, part3Code={self.part3Code!r}']
+ if self.part2BCode is not None and self.part2BCode != self.part3Code:
+ parts.append(f', part2BCode={self.part2BCode!r}')
+ if self.part2TCode != self.part2BCode:
+ parts.append(f', part2TCode={self.part2TCode!r}')
+ if self.part1Code is not None:
+ parts.append(f', part1Code={self.part1Code!r}')
+ parts.append(')')
+ return ''.join(parts)
+
+
+class LanguageCodeData:
+ """
+ Representation of ISO639-2 language code data.
+ """
+ def __init__(self, fileName: str):
+ """
+ Construct the object populating the data from the given file.
+ """
+ self.__codeMap: Dict[str, LanguageCodeEntry] = {}
+
+ with open(fileName, 'r', encoding='utf-8') as stream:
+ stream.readline() # skip the header
+ for line in stream.readlines():
+ part3Code, part2BCode, part2TCode, part1Code, _ = line.split('\t', 4)
+
+ # sanity checks
+ assert all(p.isascii() for p in (part3Code, part2BCode, part2TCode, part1Code)), \
+ f'Non-ascii characters in code names: {part3Code!r} {part2BCode!r} '\
+ f'{part2TCode!r} {part1Code!r}'
+
+ assert len(part3Code) == 3, f'Invalid Part 3 code length for {part3Code!r}'
+ assert not part1Code or len(part1Code) == 2, \
+ f'Invalid Part 1 code length for {part3Code!r}: {part1Code!r}'
+ assert not part2BCode or len(part2BCode) == 3, \
+ f'Invalid Part 2B code length for {part3Code!r}: {part2BCode!r}'
+ assert not part2TCode or len(part2TCode) == 3, \
+ f'Invalid Part 2T code length for {part3Code!r}: {part2TCode!r}'
+
+ assert (part2BCode == '') == (part2TCode == ''), \
+ f'Only one Part 2 code is specified for {part3Code!r}: ' \
+ f'{part2BCode!r} vs {part2TCode!r}'
+ assert not part2TCode or part2TCode == part3Code, \
+ f'Part 3 code {part3Code!r} does not match Part 2T code {part2TCode!r}'
+
+ entry = LanguageCodeEntry(part3Code, part2BCode or None,
+ part2TCode or None, part1Code or None)
+
+ self.__codeMap[entry.id()] = entry
+
+ def query(self, code: str) -> Optional[LanguageCodeEntry]:
+ """
+ Lookup the entry with the given code and return it.
+
+ The entries can be looked up by using either the Alpha2 code or the bibliographical
+ Alpha3 code.
+ """
+ return self.__codeMap.get(code)
diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py
index 110e5b7573..b94c242172 100644
--- a/util/locale_database/ldml.py
+++ b/util/locale_database/ldml.py
@@ -1,30 +1,5 @@
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
+# Copyright (C) 2020 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
"""Parsing the Locale Data Markup Language
It's an XML format, so the raw parsing of XML is, of course, delegated
@@ -46,6 +21,13 @@ See individual classes for further detail.
from localetools import Error
from dateconverter import convert_date
+# The github version of CLDR uses '↑↑↑' to indicate "inherit"
+INHERIT = '↑↑↑'
+
+def _attrsFromDom(dom):
+ return { k: (v if isinstance(v, str) else v.nodeValue)
+ for k, v in dom.attributes.items() }
+
class Node (object):
"""Wrapper for an arbitrary DOM node.
@@ -75,6 +57,9 @@ class Node (object):
else:
self.draft = max(draft, self.draftScore(attr))
+ def attributes(self):
+ return _attrsFromDom(self.dom)
+
def findAllChildren(self, tag, wanted = None, allDull = False):
"""All children that do have the given tag and attributes.
@@ -124,7 +109,7 @@ class Node (object):
one."""
seq = self.findAllChildren(tag)
try:
- node = seq.next()
+ node = next(seq)
except StopIteration:
raise Error('No child found where one was expected', tag)
for it in seq:
@@ -191,17 +176,35 @@ class XmlScanner (object):
return elts
class Supplement (XmlScanner):
- def find(self, xpath):
+ def find(self, xpath, exclude=()):
+ """Finds nodes by matching a specified xpath.
+
+ If exclude is passed, it should be a sequence of attribute names (its
+ default is empty). Any matches to the given xpath that also have any
+ attribute in this sequence will be excluded.
+
+ For each childless node matching the xpath, or child of a node matching
+ the xpath, this yields a twople (name, attrs) where name is the
+ nodeName and attrs is a dict mapping the node's attribute's names to
+ their values. For attribute values that are not simple strings, the
+ nodeValue of the attribute node is used."""
elts = self.findNodes(xpath)
- for elt in _iterateEach(e.dom.childNodes if e.dom.childNodes else (e.dom,)
- for e in elts):
+ for elt in _iterateEach(e.dom.childNodes or (e.dom,)
+ for e in elts
+ if not any(a in e.dom.attributes
+ for a in exclude)):
if elt.attributes:
- yield (elt.nodeName,
- dict((k, v if isinstance(v, basestring) else v.nodeValue)
- for k, v in elt.attributes.items()))
+ yield elt.nodeName, _attrsFromDom(elt)
class LocaleScanner (object):
def __init__(self, name, nodes, root):
+ """Set up to scan data for a specified locale.
+
+ First parameter is the name of the locale; it will be used in
+ error messages. Second is a tuple of DOM root-nodes of files
+ with locale data, later ones serving as fall-backs for data
+ missing in earlier ones. Third parameter is the root locale's
+ DOM node."""
self.name, self.nodes, self.base = name, nodes, root
def find(self, xpath, default = None, draft = None):
@@ -227,7 +230,7 @@ class LocaleScanner (object):
def tagCodes(self):
"""Yields four tag codes
- The tag codes are language, script, country and variant; an
+ The tag codes are language, script, territory and variant; an
empty value for any of them indicates that no value was
provided. The values are obtained from the primary file's
top-level <identity> element. An Error is raised if any
@@ -241,7 +244,7 @@ class LocaleScanner (object):
except (KeyError, AttributeError):
pass
else:
- raise Error('Alias to {}'.format(source))
+ raise Error(f'Alias to {source}')
ids = root.findUniqueChild('identity')
for code in ('language', 'script', 'territory', 'variant'):
@@ -259,12 +262,12 @@ class LocaleScanner (object):
"""Fetches currency data for this locale.
Single argument, isoCode, is the ISO currency code for the
- currency in use in the country. See also numericData, which
+ currency in use in the territory. See also numericData, which
includes some currency formats.
"""
if isoCode:
- stem = 'numbers/currencies/currency[{}]/'.format(isoCode)
- symbol = self.find(stem + 'symbol', '')
+ stem = f'numbers/currencies/currency[{isoCode}]/'
+ symbol = self.find(f'{stem}symbol', '')
name = self.__currencyDisplayName(stem)
else:
symbol = name = ''
@@ -276,31 +279,38 @@ class LocaleScanner (object):
First argument, lookup, is a callable that maps a numbering
system's name to certain data about the system, as a mapping;
- we expect this to have u'digits' as a key.
+ we expect this to have 'digits' as a key.
"""
system = self.find('numbers/defaultNumberingSystem')
- stem = 'numbers/symbols[numberSystem={}]/'.format(system)
- decimal = self.find(stem + 'decimal')
- group = self.find(stem + 'group')
- assert decimal != group, (self.name, system, decimal)
+ stem = f'numbers/symbols[numberSystem={system}]/'
+ decimal = self.find(f'{stem}decimal')
+ group = self.find(f'{stem}group')
+ if decimal == group:
+ # mn_Mong_MN @v43 :-(
+ clean = Node.draftScore('approved')
+ decimal = self.find(f'{stem}decimal', draft=clean)
+ group = self.find(f'{stem}group', draft=clean)
+ assert decimal != group, (self.name, system, decimal)
+
yield 'decimal', decimal
yield 'group', group
- yield 'percent', self.find(stem + 'percentSign')
- yield 'list', self.find(stem + 'list')
- yield 'exp', self.find(stem + 'exponential')
+ yield 'percent', self.find(f'{stem}percentSign')
+ yield 'list', self.find(f'{stem}list')
+ yield 'exp', self.find(f'{stem}exponential')
yield 'groupSizes', self.__numberGrouping(system)
digits = lookup(system)['digits']
assert len(digits) == 10
zero = digits[0]
# Qt's number-formatting code assumes digits are consecutive
- # (except Suzhou, CLDR's hanidec - see QTBUG-85409):
+ # (except Suzhou - see QTBUG-85409 - which shares its zero
+ # with CLDR's very-non-contiguous hanidec):
assert all(ord(c) == i + (0x3020 if ord(zero) == 0x3007 else ord(zero))
for i, c in enumerate(digits[1:], 1))
yield 'zero', zero
- plus = self.find(stem + 'plusSign')
- minus = self.find(stem + 'minusSign')
+ plus = self.find(f'{stem}plusSign')
+ minus = self.find(f'{stem}minusSign')
yield 'plus', plus
yield 'minus', minus
@@ -308,11 +318,11 @@ class LocaleScanner (object):
xpath = 'numbers/currencyFormats/currencyFormatLength/currencyFormat[accounting]/pattern'
try:
money = self.find(xpath.replace('Formats/',
- 'Formats[numberSystem={}]/'.format(system)))
+ f'Formats[numberSystem={system}]/'))
except Error:
money = self.find(xpath)
money = self.__currencyFormats(money, plus, minus)
- yield 'currencyFormat', money.next()
+ yield 'currencyFormat', next(money)
neg = ''
for it in money:
assert not neg, 'There should be at most one more pattern'
@@ -322,12 +332,12 @@ class LocaleScanner (object):
def textPatternData(self):
for key in ('quotationStart', 'alternateQuotationEnd',
'quotationEnd', 'alternateQuotationStart'):
- yield key, self.find('delimiters/' + key)
+ yield key, self.find(f'delimiters/{key}')
for key in ('start', 'middle', 'end'):
- yield ('listPatternPart' + key.capitalize(),
+ yield (f'listPatternPart{key.capitalize()}',
self.__fromLdmlListPattern(self.find(
- 'listPatterns/listPattern/listPatternPart[{}]'.format(key))))
+ f'listPatterns/listPattern/listPatternPart[{key}]')))
yield ('listPatternPartTwo',
self.__fromLdmlListPattern(self.find(
'listPatterns/listPattern/listPatternPart[2]')))
@@ -335,28 +345,26 @@ class LocaleScanner (object):
stem = 'dates/calendars/calendar[gregorian]/'
# TODO: is wide really the right width to use here ?
# abbreviated might be an option ... or try both ?
- meridiem = stem + 'dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/'
+ meridiem = f'{stem}dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/'
for key in ('am', 'pm'):
- yield key, self.find(meridiem + 'dayPeriod[{}]'.format(key),
+ yield key, self.find(f'{meridiem}dayPeriod[{key}]',
draft = Node.draftScore('contributed'))
for pair in (('long', 'full'), ('short', 'short')):
for key in ('time', 'date'):
- yield (pair[0] + key.capitalize() + 'Format',
+ yield (f'{pair[0]}{key.capitalize()}Format',
convert_date(self.find(
- stem + '{}Formats/{}FormatLength[{}]/{}Format/pattern'.format(
- key, key, pair[1], key))))
+ f'{stem}{key}Formats/{key}FormatLength[{pair[1]}]/{key}Format/pattern')))
- def endonyms(self, language, script, country, variant):
+ def endonyms(self, language, script, territory, variant):
# TODO: take variant into account ?
- for seq in ((language, script, country),
- (language, script), (language, country), (language,)):
+ for seq in ((language, script, territory),
+ (language, script), (language, territory), (language,)):
if not all(seq):
continue
try:
yield ('languageEndonym',
- self.find('localeDisplayNames/languages/language[{}]'
- .format('_'.join(seq))))
+ self.find(f'localeDisplayNames/languages/language[{"_".join(seq)}]'))
except Error:
pass
else:
@@ -365,9 +373,8 @@ class LocaleScanner (object):
# grumble(failed to find endonym for language)
yield 'languageEndonym', ''
- yield ('countryEndonym',
- self.find('localeDisplayNames/territories/territory[{}]'
- .format(country), ''))
+ yield ('territoryEndonym',
+ self.find(f'localeDisplayNames/territories/territory[{territory}]', ''))
def unitData(self):
yield ('byte_unit',
@@ -386,20 +393,20 @@ class LocaleScanner (object):
def calendarNames(self, calendars):
namings = self.__nameForms
for cal in calendars:
- stem = 'dates/calendars/calendar[' + cal + ']/months/'
+ stem = f'dates/calendars/calendar[{cal}]/months/'
for key, mode, size in namings:
- prop = 'monthContext[' + mode + ']/monthWidth[' + size + ']/'
- yield (key + 'Months_' + cal,
- ';'.join(self.find(stem + prop + 'month[{}]'.format(i))
+ prop = f'monthContext[{mode}]/monthWidth[{size}]/'
+ yield (f'{key}Months_{cal}',
+ ';'.join(self.find(f'{stem}{prop}month[{i}]')
for i in range(1, 13)))
# Day data (for Gregorian, at least):
stem = 'dates/calendars/calendar[gregorian]/days/'
days = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat')
for (key, mode, size) in namings:
- prop = 'dayContext[' + mode + ']/dayWidth[' + size + ']/day'
- yield (key + 'Days',
- ';'.join(self.find(stem + prop + '[' + day + ']')
+ prop = f'dayContext[{mode}]/dayWidth[{size}]/day'
+ yield (f'{key}Days',
+ ';'.join(self.find(f'{stem}{prop}[{day}]')
for day in days))
# Implementation details
@@ -410,10 +417,10 @@ class LocaleScanner (object):
('long', 'format', 'wide'),
('short', 'format', 'abbreviated'),
('narrow', 'format', 'narrow'),
- ) # Used for month and day names
+ ) # Used for month and day names
def __find(self, xpath):
- retries = [ xpath.split('/') ]
+ retries, foundNone = [ xpath.split('/') ], True
while retries:
tags, elts, roots = retries.pop(), self.nodes, (self.base.root,)
for selector in tags:
@@ -423,6 +430,9 @@ class LocaleScanner (object):
break
else: # Found matching elements
+ elts = tuple(self.__skipInheritors(elts))
+ if elts:
+ foundNone = False
# Possibly filter elts to prefer the least drafty ?
for elt in elts:
yield elt
@@ -442,29 +452,42 @@ class LocaleScanner (object):
if not roots:
if retries: # Let outer loop fall back on an alias path:
break
- sought = '/'.join(tags)
- if sought != xpath:
- sought += ' (for {})'.format(xpath)
- raise Error('All lack child {} for {} in {}'.format(
- selector, sought, self.name))
+ if foundNone:
+ sought = '/'.join(tags)
+ if sought != xpath:
+ sought += f' (for {xpath})'
+ raise Error(f'All lack child {selector} for {sought} in {self.name}')
else: # Found matching elements
+ roots = tuple(self.__skipInheritors(roots))
+ if roots:
+ foundNone = False
for elt in roots:
yield elt
- sought = '/'.join(tags)
- if sought != xpath:
- sought += ' (for {})'.format(xpath)
- raise Error('No {} in {}'.format(sought, self.name))
+ if foundNone:
+ sought = '/'.join(tags)
+ if sought != xpath:
+ sought += f' (for {xpath})'
+ raise Error(f'No {sought} in {self.name}')
+
+ @staticmethod
+ def __skipInheritors(elts):
+ for elt in elts:
+ try:
+ if elt.dom.firstChild.nodeValue != INHERIT:
+ yield elt
+ except (AttributeError, KeyError):
+ yield elt
def __currencyDisplayName(self, stem):
try:
return self.find(stem + 'displayName')
except Error:
pass
- for x in ('zero', 'one', 'two', 'few', 'many', 'other'):
+ for x in ('zero', 'one', 'two', 'few', 'many', 'other'):
try:
- return self.find(stem + 'displayName[count={}]'.format(x))
+ return self.find(f'{stem}displayName[count={x}]')
except Error:
pass
return ''
@@ -474,10 +497,10 @@ class LocaleScanner (object):
# (even for unitLength[narrow]) instead of kB (etc.), so
# prefer any unitPattern provided, but prune its placeholder:
for size in ('short', 'narrow'): # TODO: reverse order ?
- stem = 'units/unitLength[{}]/unit[digital-{}byte]/'.format(size + keySuffix, quantify)
+ stem = f'units/unitLength[{size}{keySuffix}]/unit[digital-{quantify}byte]/'
for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
try:
- ans = self.find(stem + 'unitPattern[count={}]'.format(count))
+ ans = self.find(f'{stem}unitPattern[count={count}]')
except Error:
continue
@@ -490,7 +513,7 @@ class LocaleScanner (object):
return ans
try:
- return self.find(stem + 'displayName')
+ return self.find(f'{stem}displayName')
except Error:
pass
@@ -518,10 +541,10 @@ class LocaleScanner (object):
if cache:
byte = cache.pop()
if all(byte == k for k in cache):
- suffix = 'i' + byte
+ suffix = f'i{byte}'
for q in siQuantifiers:
# Those don't (yet, v36) exist in CLDR, so we always get the fall-back:
- yield self.__findUnit(keySuffix, q[:2], q[0].upper() + suffix)
+ yield self.__findUnit(keySuffix, q[:2], f'{q[0].upper()}{suffix}')
else: # first call
tail = suffix = suffix or 'B'
for q in siQuantifiers:
@@ -556,8 +579,8 @@ class LocaleScanner (object):
elsewhere)."""
top = int(self.find('numbers/minimumGroupingDigits'))
assert top < 4, top # We store it in a 2-bit field
- grouping = self.find('numbers/decimalFormats[numberSystem='
- + system + ']/decimalFormatLength/decimalFormat/pattern')
+ grouping = self.find(f'numbers/decimalFormats[numberSystem={system}]/'
+ 'decimalFormatLength/decimalFormat/pattern')
groups = grouping.split('.')[0].split(',')[-3:]
assert all(len(x) < 8 for x in groups[-2:]), grouping # we store them in 3-bit fields
if len(groups) > 2:
@@ -580,7 +603,7 @@ class LocaleScanner (object):
# According to http://www.unicode.org/reports/tr35/#Number_Format_Patterns
# there can be doubled or trippled currency sign, however none of the
# locales use that.
- p = p.replace(u'\xa4', "%2")
+ p = p.replace('\xa4', "%2")
# Single quote goes away, but double goes to single:
p = p.replace("''", '###').replace("'", '').replace('###', "'")
# Use number system's signs:
diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py
index 29153366b3..02ec7cafc7 100644
--- a/util/locale_database/localetools.py
+++ b/util/locale_database/localetools.py
@@ -1,30 +1,5 @@
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
+# Copyright (C) 2020 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
"""Utilities shared among the CLDR extraction tools.
Functions:
@@ -37,13 +12,16 @@ Classes:
SourceFileEditor -- adds standard prelude and tail handling to Transcriber.
"""
-import os
-import tempfile
+from contextlib import ExitStack, contextmanager
+from pathlib import Path
+from tempfile import NamedTemporaryFile
-class Error (StandardError):
- __upinit = StandardError.__init__
+qtbase_root = Path(__file__).parents[2]
+assert qtbase_root.name == 'qtbase'
+
+class Error (Exception):
def __init__(self, msg, *args):
- self.__upinit(msg, *args)
+ super().__init__(msg, *args)
self.message = msg
def __str__(self):
return self.message
@@ -63,48 +41,141 @@ def unicode2hex(s):
lst.append(hex(v))
return lst
-def wrap_list(lst):
+def wrap_list(lst, perline=20):
def split(lst, size):
while lst:
head, lst = lst[:size], lst[size:]
yield head
- return ",\n".join(", ".join(x) for x in split(lst, 20))
-
-class Transcriber (object):
- """Helper class to facilitate rewriting source files.
-
- This class takes care of the temporary file manipulation. Derived
- classes need to implement transcribing of the content, with
+ return ",\n".join(", ".join(x) for x in split(lst, perline))
+
+def names_clash(cldr, enum):
+ """True if the reader might not recognize cldr as the name of enum
+
+ First argument, cldr, is the name CLDR gives for some language,
+ script or territory; second, enum, is the name enumdata.py gives
+ for it. If these are enough alike, returns None; otherwise, a
+ non-empty string that results from adapting cldr to be more like
+ how enumdata.py would express it."""
+ if cldr == enum:
+ return None
+
+ # Some common substitutions:
+ cldr = cldr.replace('&', 'And')
+ prefix = { 'St.': 'Saint', 'U.S.': 'United States' }
+ for k, v in prefix.items():
+ if cldr.startswith(k + ' '):
+ cldr = v + cldr[len(k):]
+
+ # Chop out any parenthesised part, e.g. (Burma):
+ while '(' in cldr:
+ try:
+ f, t = cldr.index('('), cldr.rindex(')')
+ except ValueError:
+ break
+ cldr = cldr[:f].rstrip() + ' ' + cldr[t + 1:].lstrip()
+
+ # Various accented letters:
+ remap = { 'ã': 'a', 'å': 'a', 'ā': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'}
+ skip = '\u02bc' # Punctuation for which .isalpha() is true.
+ # Let cldr match (ignoring non-letters and case) any substring as enum:
+ if ''.join(enum.lower().split()) in ''.join(
+ remap.get(ch, ch) for ch in cldr.lower() if ch.isalpha() and ch not in skip):
+ return None
+ return cldr
+
+
+@contextmanager
+def AtomicRenameTemporaryFile(originalLocation: Path, *, prefix: str, dir: Path):
+ """Context manager for safe file update via a temporary file.
+
+ Accepts path to the file to be updated. Yields a temporary file to the user
+ code, open for writing.
+
+ On success closes the temporary file and moves its content to the original
+ location. On error, removes temporary file, without disturbing the original.
+ """
+ tempFile = NamedTemporaryFile('w', prefix=prefix, dir=dir, delete=False)
+ try:
+ yield tempFile
+ tempFile.close()
+ # Move the modified file to the original location
+ Path(tempFile.name).rename(originalLocation)
+ except Exception:
+ # delete the temporary file in case of error
+ tempFile.close()
+ Path(tempFile.name).unlink()
+ raise
+
+
+class Transcriber:
+ """Context manager base-class to manage source file rewrites.
+
+ Derived classes need to implement transcribing of the content, with
whatever modifications they may want. Members reader and writer
are exposed; use writer.write() to output to the new file; use
reader.readline() or iterate reader to read the original.
- Callers should call close() on success or cleanup() on failure (to
- clear away the temporary file).
+ This class is intended to be used as context manager only (inside a
+ `with` statement).
+
+ Reimplement onEnter() to write any preamble the file may have,
+ onExit() to write any tail. The body of the with statement takes
+ care of anything in between, using methods provided by derived classes.
+
+ The data is written to a temporary file first. The temporary file data
+ is then moved to the original location if there were no errors. Otherwise
+ the temporary file is removed and the original is left unchanged.
"""
- def __init__(self, path, temp):
- # Open the old file
- self.reader = open(path)
- # Create a temp file to write the new data into
- temp, tempPath = tempfile.mkstemp(os.path.split(path)[1], dir = temp)
- self.__names = path, tempPath
- self.writer = os.fdopen(temp, "w")
-
- def close(self):
- self.reader.close()
- self.writer.close()
- self.reader = self.writer = None
- source, temp = self.__names
- os.remove(source)
- os.rename(temp, source)
-
- def cleanup(self):
- if self.__names:
- self.reader.close()
- self.writer.close()
- # Remove temp-file:
- os.remove(self.__names[1])
- self.__names = ()
+ def __init__(self, path: Path, temp_dir: Path):
+ self.path = path
+ self.tempDir = temp_dir
+
+ def onEnter(self) -> None:
+ """
+ Called before transferring control to user code.
+
+ This function can be overridden in derived classes to perform actions
+ before transferring control to the user code.
+
+ The default implementation does nothing.
+ """
+ pass
+
+ def onExit(self) -> None:
+ """
+ Called after return from user code.
+
+ This function can be overridden in derived classes to perform actions
+ after successful return from user code.
+
+ The default implementation does nothing.
+ """
+ pass
+
+ def __enter__(self):
+ with ExitStack() as resources:
+ # Create a temp file to write the new data into
+ self.writer = resources.enter_context(
+ AtomicRenameTemporaryFile(self.path, prefix=self.path.name, dir=self.tempDir))
+ # Open the old file
+ self.reader = resources.enter_context(open(self.path))
+
+ self.onEnter()
+
+ # Prevent resources from being closed on normal return from this
+ # method and make them available inside __exit__():
+ self.__resources = resources.pop_all()
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ if exc_type is None:
+ with self.__resources:
+ self.onExit()
+ else:
+ self.__resources.__exit__(exc_type, exc_value, traceback)
+
+ return False
+
class SourceFileEditor (Transcriber):
"""Transcriber with transcription of code around a gnerated block.
@@ -117,43 +188,27 @@ class SourceFileEditor (Transcriber):
the new version to replace it.
This class takes care of transcribing the parts before and after
- the generated content; on creation, an instance will copy the
- preamble up to the start marker; its close() will skip over the
- original's generated content and resume transcribing with the end
- marker. Derived classes need only implement the generation of the
- content in between.
-
- Callers should call close() on success or cleanup() on failure (to
- clear away the temporary file); see Transcriber.
+ the generated content; on entering the context, an instance will
+ copy the preamble up to the start marker; on exit from the context
+ it will skip over the original's generated content and resume
+ transcribing with the end marker.
+
+ This class is only intended to be used as a context manager:
+ see Transcriber. Derived classes implement suitable methods for use in
+ the body of the with statement, using self.writer to rewrite the part
+ of the file between the start and end markers.
"""
- __upinit = Transcriber.__init__
- def __init__(self, path, temp):
- """Set up the source file editor.
-
- Requires two arguments: the path to the source file to be read
- and, on success, replaced with a new version; and the
- directory in which to store the temporary file during the
- rewrite."""
- self.__upinit(path, temp)
- self.__copyPrelude()
-
- __upclose = Transcriber.close
- def close(self):
- self.__copyTail()
- self.__upclose()
-
- # Implementation details:
GENERATED_BLOCK_START = '// GENERATED PART STARTS HERE'
GENERATED_BLOCK_END = '// GENERATED PART ENDS HERE'
- def __copyPrelude(self):
+ def onEnter(self) -> None:
# Copy over the first non-generated section to the new file
for line in self.reader:
self.writer.write(line)
if line.strip() == self.GENERATED_BLOCK_START:
break
- def __copyTail(self):
+ def onExit(self) -> None:
# Skip through the old generated data in the old file
for line in self.reader:
if line.strip() == self.GENERATED_BLOCK_END:
diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py
index 4fcfe32a43..5cb56c2165 100644
--- a/util/locale_database/qlocalexml.py
+++ b/util/locale_database/qlocalexml.py
@@ -1,31 +1,5 @@
-# coding=utf8
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
"""Shared serialization-scanning code for QLocaleXML format.
Provides classes:
@@ -35,15 +9,23 @@ Provides classes:
Support:
Spacer -- provides control over indentation of the output.
+
+RelaxNG schema for the used file format can be found in qlocalexml.rnc.
+QLocaleXML files can be validated using:
+
+ jing -c qlocalexml.rnc <file.xml>
+
+You can download jing from https://relaxng.org/jclark/jing.html if your
+package manager lacks the jing package.
"""
-from __future__ import print_function
+
from xml.sax.saxutils import escape
from localetools import Error
# Tools used by Locale:
def camel(seq):
- yield seq.next()
+ yield next(seq)
for word in seq:
yield word.capitalize()
@@ -51,88 +33,39 @@ def camelCase(words):
return ''.join(camel(iter(words)))
def addEscapes(s):
- return ''.join(c if n < 128 else '\\x{:02x}'.format(n)
+ return ''.join(c if n < 128 else f'\\x{n:02x}'
for n, c in ((ord(c), c) for c in s))
def startCount(c, text): # strspn
"""First index in text where it doesn't have a character in c"""
assert text and text[0] in c
try:
- return (j for j, d in enumerate(text) if d not in c).next()
+ return next((j for j, d in enumerate(text) if d not in c))
except StopIteration:
return len(text)
-def convertFormat(format):
- """Convert date/time format-specier from CLDR to Qt
-
- Match up (as best we can) the differences between:
- * https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
- * QDateTimeParser::parseFormat() and QLocalePrivate::dateTimeToString()
- """
- # Compare and contrast dateconverter.py's convert_date().
- # Need to (check consistency and) reduce redundancy !
- result = ""
- i = 0
- while i < len(format):
- if format[i] == "'":
- result += "'"
- i += 1
- while i < len(format) and format[i] != "'":
- result += format[i]
- i += 1
- if i < len(format):
- result += "'"
- i += 1
- else:
- s = format[i:]
- if s.startswith('E'): # week-day
- n = startCount('E', s)
- if n < 3:
- result += 'ddd'
- elif n == 4:
- result += 'dddd'
- else: # 5: narrow, 6 short; but should be name, not number :-(
- result += 'd' if n < 6 else 'dd'
- i += n
- elif s[0] in 'ab': # am/pm
- # 'b' should distinguish noon/midnight, too :-(
- result += "AP"
- i += startCount('ab', s)
- elif s.startswith('S'): # fractions of seconds: count('S') == number of decimals to show
- result += 'z'
- i += startCount('S', s)
- elif s.startswith('V'): # long time zone specifiers (and a deprecated short ID)
- result += 't'
- i += startCount('V', s)
- elif s[0] in 'zv': # zone
- # Should use full name, e.g. "Central European Time", if 'zzzz' :-(
- # 'v' should get generic non-location format, e.g. PT for "Pacific Time", no DST indicator
- result += "t"
- i += startCount('zv', s)
- else:
- result += format[i]
- i += 1
-
- return result
-
class QLocaleXmlReader (object):
def __init__(self, filename):
self.root = self.__parse(filename)
- # Lists of (id, name, code) triples:
- languages = tuple(self.__loadMap('language'))
- scripts = tuple(self.__loadMap('script'))
- countries = tuple(self.__loadMap('country'))
+
+ from enumdata import language_map, script_map, territory_map
+ # Lists of (id, enum name, code, en.xml name) tuples:
+ languages = tuple(self.__loadMap('language', language_map))
+ scripts = tuple(self.__loadMap('script', script_map))
+ territories = tuple(self.__loadMap('territory', territory_map))
self.__likely = tuple(self.__likelySubtagsMap())
- # Mappings {ID: (name, code)}
- self.languages = dict((v[0], v[1:]) for v in languages)
- self.scripts = dict((v[0], v[1:]) for v in scripts)
- self.countries = dict((v[0], v[1:]) for v in countries)
- # Private mappings {name: (ID, code)}
- self.__langByName = dict((v[1], (v[0], v[2])) for v in languages)
- self.__textByName = dict((v[1], (v[0], v[2])) for v in scripts)
- self.__landByName = dict((v[1], (v[0], v[2])) for v in countries)
+
+ # Mappings {ID: (enum name, code, en.xml name)}
+ self.languages = {v[0]: v[1:] for v in languages}
+ self.scripts = {v[0]: v[1:] for v in scripts}
+ self.territories = {v[0]: v[1:] for v in territories}
+
+ # Private mappings {enum name: (ID, code)}
+ self.__langByName = {v[1]: (v[0], v[2]) for v in languages}
+ self.__textByName = {v[1]: (v[0], v[2]) for v in scripts}
+ self.__landByName = {v[1]: (v[0], v[2]) for v in territories}
# Other properties:
- self.dupes = set(v[1] for v in languages) & set(v[1] for v in countries)
+ self.__dupes = set(v[1] for v in languages) & set(v[1] for v in territories)
self.cldrVersion = self.__firstChildText(self.root, "version")
def loadLocaleMap(self, calendars, grumble = lambda text: None):
@@ -142,18 +75,18 @@ class QLocaleXmlReader (object):
locale = Locale.fromXmlData(lambda k: kid(elt, k), calendars)
language = self.__langByName[locale.language][0]
script = self.__textByName[locale.script][0]
- country = self.__landByName[locale.country][0]
+ territory = self.__landByName[locale.territory][0]
if language != 1: # C
- if country == 0:
- grumble('loadLocaleMap: No country id for "{}"\n'.format(locale.language))
+ if territory == 0:
+ grumble(f'loadLocaleMap: No territory id for "{locale.language}"\n')
if script == 0:
- # Find default script for the given language and country - see:
+ # Find default script for the given language and territory - see:
# http://www.unicode.org/reports/tr35/#Likely_Subtags
try:
try:
- to = likely[(locale.language, 'AnyScript', locale.country)]
+ to = likely[(locale.language, 'AnyScript', locale.territory)]
except KeyError:
to = likely[(locale.language, 'AnyScript', 'AnyTerritory')]
except KeyError:
@@ -162,11 +95,11 @@ class QLocaleXmlReader (object):
locale.script = to[1]
script = self.__textByName[locale.script][0]
- yield (language, script, country), locale
+ yield (language, script, territory), locale
def languageIndices(self, locales):
index = 0
- for key, value in self.languages.iteritems():
+ for key, value in self.languages.items():
i, count = 0, locales.count(key)
if count > 0:
i = index
@@ -190,11 +123,11 @@ class QLocaleXmlReader (object):
'_'.join(tag(give)), ids(give))
def defaultMap(self):
- """Map language and script to their default country by ID.
+ """Map language and script to their default territory by ID.
- Yields ((language, script), country) wherever the likely
+ Yields ((language, script), territory) wherever the likely
sub-tags mapping says language's default locale uses the given
- script and country."""
+ script and territory."""
for have, give in self.__likely:
if have[1:] == ('AnyScript', 'AnyTerritory') and give[2] != 'AnyTerritory':
assert have[0] == give[0], (have, give)
@@ -202,14 +135,41 @@ class QLocaleXmlReader (object):
self.__textByName[give[1]][0]),
self.__landByName[give[2]][0])
+ def enumify(self, name, suffix):
+ """Stick together the parts of an enumdata.py name.
+
+ Names given in enumdata.py include spaces and hyphens that we
+ can't include in an identifier, such as the name of a member
+ of an enum type. Removing those would lose the word
+ boundaries, so make sure each word starts with a capital (but
+ don't simply capitalize() as some names contain words,
+ e.g. McDonald, that have later capitals in them).
+
+ We also need to resolve duplication between languages and
+ territories (by adding a suffix to each) and add Script to the
+ ends of script-names that don't already end in it."""
+ name = name.replace('-', ' ')
+ # Don't .capitalize() as McDonald is already camel-case (see enumdata.py):
+ name = ''.join(word[0].upper() + word[1:] for word in name.split())
+ if suffix != 'Script':
+ assert not(name in self.__dupes and name.endswith(suffix))
+ return name + suffix if name in self.__dupes else name
+
+ if not name.endswith(suffix):
+ name += suffix
+ if name in self.__dupes:
+ raise Error(f'The script name "{name}" is messy')
+ return name
+
# Implementation details:
- def __loadMap(self, category):
+ def __loadMap(self, category, enum):
kid = self.__firstChildText
- for element in self.__eachEltInGroup(self.root, category + 'List', category):
- yield int(kid(element, 'id')), kid(element, 'name'), kid(element, 'code')
+ for element in self.__eachEltInGroup(self.root, f'{category}List', category):
+ key = int(kid(element, 'id'))
+ yield key, enum[key][0], kid(element, 'code'), kid(element, 'name')
def __likelySubtagsMap(self):
- def triplet(element, keys=('language', 'script', 'country'), kid = self.__firstChildText):
+ def triplet(element, keys=('language', 'script', 'territory'), kid = self.__firstChildText):
return tuple(kid(element, key) for key in keys)
kid = self.__firstChildElt
@@ -246,7 +206,7 @@ class QLocaleXmlReader (object):
return child
child = child.nextSibling
- raise Error('No {} child found'.format(name))
+ raise Error(f'No {name} child found')
@classmethod
def __firstChildText(cls, elt, key):
@@ -302,7 +262,7 @@ class Spacer (object):
elif line.startswith('<') and not line.startswith('<!'):
cut = line.find('>')
tag = (line[1:] if cut < 0 else line[1 : cut]).strip().split()[0]
- if '</{}>'.format(tag) not in line:
+ if f'</{tag}>' not in line:
self.current += self.__each
return indent + line + '\n'
@@ -333,10 +293,28 @@ class QLocaleXmlWriter (object):
self.__write('<localeDatabase>')
# Output of various sections, in their usual order:
- def enumData(self, languages, scripts, countries):
- self.__enumTable('language', languages)
- self.__enumTable('script', scripts)
- self.__enumTable('country', countries)
+ def enumData(self, code2name):
+ """Output name/id/code tables for language, script and territory.
+
+ Parameter, code2name, is a function taking 'language',
+ 'script' or 'territory' and returning a lookup function that
+ maps codes, of the relevant type, to their English names. This
+ lookup function is passed a code and the name, both taken from
+ enumdata.py, that QLocale uses, so the .get() of a dict will
+ work. The English name from this lookup will be used by
+ QLocale::*ToString() for the enum member whose name is based
+ on the enumdata.py name passed as fallback to the lookup."""
+ from enumdata import language_map, script_map, territory_map
+ self.__enumTable('language', language_map, code2name)
+ self.__enumTable('script', script_map, code2name)
+ self.__enumTable('territory', territory_map, code2name)
+ # Prepare to detect any unused codes (see __writeLocale(), close()):
+ self.__languages = set(p[1] for p in language_map.values()
+ if not p[1].isspace())
+ self.__scripts = set(p[1] for p in script_map.values()
+ if p[1] != 'Zzzz')
+ self.__territories = set(p[1] for p in territory_map.values()
+ if p[1] != 'ZZ')
def likelySubTags(self, entries):
self.__openTag('likelySubtags')
@@ -350,13 +328,11 @@ class QLocaleXmlWriter (object):
def locales(self, locales, calendars):
self.__openTag('localeList')
self.__openTag('locale')
- Locale.C(calendars).toXml(self.inTag, calendars)
+ self.__writeLocale(Locale.C(calendars), calendars)
self.__closeTag('locale')
- keys = locales.keys()
- keys.sort()
- for key in keys:
+ for key in sorted(locales.keys()):
self.__openTag('locale')
- locales[key].toXml(self.inTag, calendars)
+ self.__writeLocale(locales[key], calendars)
self.__closeTag('locale')
self.__closeTag('localeList')
@@ -364,13 +340,27 @@ class QLocaleXmlWriter (object):
self.inTag('version', cldrVersion)
def inTag(self, tag, text):
- self.__write('<{0}>{1}</{0}>'.format(tag, text))
+ self.__write(f'<{tag}>{text}</{tag}>')
- def close(self):
+ def close(self, grumble):
+ """Finish writing and grumble about any issues discovered."""
if self.__rawOutput != self.__complain:
self.__write('</localeDatabase>')
self.__rawOutput = self.__complain
+ if self.__languages or self.__scripts or self.__territories:
+ grumble('Some enum members are unused, corresponding to these tags:\n')
+ import textwrap
+ def kvetch(kind, seq, g = grumble, w = textwrap.wrap):
+ g('\n\t'.join(w(f' {kind}: {", ".join(sorted(seq))}', width=80)) + '\n')
+ if self.__languages:
+ kvetch('Languages', self.__languages)
+ if self.__scripts:
+ kvetch('Scripts', self.__scripts)
+ if self.__territories:
+ kvetch('Territories', self.__territories)
+ grumble('It may make sense to deprecate them.\n')
+
# Implementation details
@staticmethod
def __printit(text):
@@ -379,28 +369,39 @@ class QLocaleXmlWriter (object):
def __complain(text):
raise Error('Attempted to write data after closing :-(')
- def __enumTable(self, tag, table):
- self.__openTag(tag + 'List')
- for key, value in table.iteritems():
+ @staticmethod
+ def __xmlSafe(text):
+ return text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+
+ def __enumTable(self, tag, table, code2name):
+ self.__openTag(f'{tag}List')
+ enname, safe = code2name(tag), self.__xmlSafe
+ for key, (name, code) in table.items():
self.__openTag(tag)
- self.inTag('name', value[0])
+ self.inTag('name', safe(enname(code, name)))
self.inTag('id', key)
- self.inTag('code', value[1])
+ self.inTag('code', code)
self.__closeTag(tag)
- self.__closeTag(tag + 'List')
+ self.__closeTag(f'{tag}List')
def __likelySubTag(self, tag, likely):
self.__openTag(tag)
self.inTag('language', likely[0])
self.inTag('script', likely[1])
- self.inTag('country', likely[2])
+ self.inTag('territory', likely[2])
# self.inTag('variant', likely[3])
self.__closeTag(tag)
+ def __writeLocale(self, locale, calendars):
+ locale.toXml(self.inTag, calendars)
+ self.__languages.discard(locale.language_code)
+ self.__scripts.discard(locale.script_code)
+ self.__territories.discard(locale.territory_code)
+
def __openTag(self, tag):
- self.__write('<{}>'.format(tag))
+ self.__write(f'<{tag}>')
def __closeTag(self, tag):
- self.__write('</{}>'.format(tag))
+ self.__write(f'</{tag}>')
def __write(self, line):
self.__rawOutput(self.__wrap(line))
@@ -432,16 +433,16 @@ class Locale (object):
__asint = ("currencyDigits", "currencyRounding")
# Convert day-name to Qt day-of-week number:
__asdow = ("firstDayOfWeek", "weekendStart", "weekendEnd")
- # Convert from CLDR format-strings to QDateTimeParser ones:
- __asfmt = ("longDateFormat", "shortDateFormat", "longTimeFormat", "shortTimeFormat")
# Just use the raw text:
- __astxt = ("language", "languageEndonym", "script", "country", "countryEndonym",
+ __astxt = ("language", "languageEndonym", "script", "territory", "territoryEndonym",
"decimal", "group", "zero",
"list", "percent", "minus", "plus", "exp",
"quotationStart", "quotationEnd",
"alternateQuotationStart", "alternateQuotationEnd",
"listPatternPartStart", "listPatternPartMiddle",
"listPatternPartEnd", "listPatternPartTwo", "am", "pm",
+ "longDateFormat", "shortDateFormat",
+ "longTimeFormat", "shortTimeFormat",
'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
"currencyIsoCode", "currencySymbol", "currencyDisplayName",
"currencyFormat", "currencyNegativeFormat")
@@ -466,14 +467,11 @@ class Locale (object):
for k in cls.__asdow:
data[k] = cls.__qDoW[lookup(k)]
- for k in cls.__asfmt:
- data[k] = convertFormat(lookup(k))
-
for k in cls.__astxt + tuple(cls.propsMonthDay('days')):
data['listDelim' if k == 'list' else k] = lookup(k)
for k in cls.propsMonthDay('months'):
- data[k] = dict((cal, lookup('_'.join((k, cal)))) for cal in calendars)
+ data[k] = {cal: lookup('_'.join((k, cal))) for cal in calendars}
grouping = lookup('groupSizes').split(';')
data.update(groupLeast = int(grouping[0]),
@@ -493,15 +491,15 @@ class Locale (object):
form used by CLDR; its default is ('gregorian',).
"""
get = lambda k: getattr(self, k)
- for key in ('language', 'script', 'country'):
+ for key in ('language', 'script', 'territory'):
write(key, get(key))
- write('{}code'.format(key), get('{}_code'.format(key)))
+ write(f'{key}code', get(f'{key}_code'))
for key in ('decimal', 'group', 'zero', 'list',
'percent', 'minus', 'plus', 'exp'):
write(key, get(key))
- for key in ('languageEndonym', 'countryEndonym',
+ for key in ('languageEndonym', 'territoryEndonym',
'quotationStart', 'quotationEnd',
'alternateQuotationStart', 'alternateQuotationEnd',
'listPatternPartStart', 'listPatternPartMiddle',
@@ -517,7 +515,7 @@ class Locale (object):
'_'.join((k, cal))
for k in self.propsMonthDay('months')
for cal in calendars):
- write(key, escape(get(key)).encode('utf-8'))
+ write(key, escape(get(key)))
write('groupSizes', ';'.join(str(x) for x in get('groupSizes')))
for key in ('currencyDigits', 'currencyRounding'):
@@ -554,9 +552,9 @@ class Locale (object):
(fullName, fullName),
(firstThree, firstThree),
(number, initial)),
- 'islamic': ((u'Muharram', u'Safar', u'Rabiʻ I', u'Rabiʻ II', u'Jumada I',
- u'Jumada II', u'Rajab', u'Shaʻban', u'Ramadan', u'Shawwal',
- u'Dhuʻl-Qiʻdah', u'Dhuʻl-Hijjah'),
+ 'islamic': (('Muharram', 'Safar', 'Rabiʻ I', 'Rabiʻ II', 'Jumada I',
+ 'Jumada II', 'Rajab', 'Shaʻban', 'Ramadan', 'Shawwal',
+ 'Dhuʻl-Qiʻdah', 'Dhuʻl-Hijjah'),
(fullName, fullName),
(islamicShort, islamicShort),
(number, number)),
@@ -565,7 +563,7 @@ class Locale (object):
(fullName, fullName),
(fullName, fullName),
(number, number)),
- },
+ },
sizes=('long', 'short', 'narrow')):
for cal in calendars:
try:
@@ -590,7 +588,7 @@ class Locale (object):
return cls(cls.__monthNames(calendars),
language='C', language_code='0', languageEndonym='',
script='AnyScript', script_code='0',
- country='AnyTerritory', country_code='0', countryEndonym='',
+ territory='AnyTerritory', territory_code='0', territoryEndonym='',
groupSizes=(3, 3, 1),
decimal='.', group=',', list=';', percent='%',
zero='0', minus='-', plus='+', exp='e',
@@ -605,8 +603,8 @@ class Locale (object):
byte_iec_quantified=';'.join(q.upper() + 'iB' for q in quantifiers),
am='AM', pm='PM', firstDayOfWeek='mon',
weekendStart='sat', weekendEnd='sun',
- longDateFormat='EEEE, d MMMM yyyy', shortDateFormat='d MMM yyyy',
- longTimeFormat='HH:mm:ss z', shortTimeFormat='HH:mm:ss',
+ longDateFormat='dddd, d MMMM yyyy', shortDateFormat='d MMM yyyy',
+ longTimeFormat='HH:mm:ss t', shortTimeFormat='HH:mm:ss',
longDays=';'.join(days),
shortDays=';'.join(d[:3] for d in days),
narrowDays='7;1;2;3;4;5;6',
diff --git a/util/locale_database/qlocalexml.rnc b/util/locale_database/qlocalexml.rnc
new file mode 100644
index 0000000000..818aa8f9c3
--- /dev/null
+++ b/util/locale_database/qlocalexml.rnc
@@ -0,0 +1,119 @@
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+# This is RelaxNG compact schema for qLocaleXML interemediate locale data
+# representation format produced and consumed by the qlocalexml module.
+#
+# To validate an xml file run:
+#
+# jing -c qlocalexml.rnc <your-file.xml>
+#
+# You can download jing from https://relaxng.org/jclark/jing.html if your
+# package manager lacks the jing package.
+
+start = element localeDatabase {
+ element version { text },
+ element languageList { Language+ },
+ element scriptList { Script+ },
+ element territoryList { Territory+ },
+ element likelySubtags { LikelySubtag+ },
+ element localeList { Locale+ }
+}
+
+Language = element language { TagDescriptor }
+Script = element script { TagDescriptor }
+Territory = element territory { TagDescriptor }
+TagDescriptor = (
+ element name { text },
+ element id { xsd:nonNegativeInteger },
+ element code { text }
+)
+
+LikelySubtag = element likelySubtag {
+ element from { LocaleTriplet },
+ element to { LocaleTriplet }
+}
+
+LocaleTriplet = (
+ element language { text },
+ element script { text },
+ element territory { text }
+)
+
+WeekDay = ("sun" | "mon" | "tue" | "wed" | "thu" | "fri" | "sat")
+Digit = xsd:string { pattern = "\d" }
+Punctuation = xsd:string { pattern = "\p{P}" }
+GroupSizes = xsd:string { pattern = "\d;\d;\d" }
+
+Locale = element locale {
+ element language { text },
+ element languagecode { text },
+ element script { text },
+ element scriptcode { text },
+ element territory { text },
+ element territorycode { text },
+ element decimal { Punctuation },
+ element group { text },
+ element zero { Digit },
+ element list { Punctuation },
+ element percent { text },
+ element minus { text },
+ element plus { text },
+ element exp { text },
+ element languageEndonym { text },
+ element territoryEndonym { text },
+ element quotationStart { Punctuation },
+ element quotationEnd { Punctuation },
+ element alternateQuotationStart { Punctuation },
+ element alternateQuotationEnd { Punctuation },
+ element listPatternPartStart { text },
+ element listPatternPartMiddle { text },
+ element listPatternPartEnd { text },
+ element listPatternPartTwo { text },
+ element byte_unit { text },
+ element byte_si_quantified { text },
+ element byte_iec_quantified { text },
+ element am { text },
+ element pm { text },
+ element firstDayOfWeek { text },
+ element weekendStart { WeekDay },
+ element weekendEnd { WeekDay },
+ element longDateFormat { text },
+ element shortDateFormat { text },
+ element longTimeFormat { text },
+ element shortTimeFormat { text },
+ element currencyIsoCode { text },
+ element currencySymbol { text },
+ element currencyDisplayName { text },
+ element currencyFormat { text },
+ element currencyNegativeFormat { text },
+ element longDays { text },
+ element standaloneLongDays { text },
+ element shortDays { text },
+ element standaloneShortDays { text },
+ element narrowDays { text },
+ element standaloneNarrowDays { text },
+
+ # Some of these entries may be absent depending on command line arguments
+ element longMonths_gregorian { text }?,
+ element longMonths_persian { text }?,
+ element longMonths_islamic { text }?,
+ element standaloneLongMonths_gregorian { text }?,
+ element standaloneLongMonths_persian { text }?,
+ element standaloneLongMonths_islamic { text }?,
+ element shortMonths_gregorian { text }?,
+ element shortMonths_persian { text }?,
+ element shortMonths_islamic { text }?,
+ element standaloneShortMonths_gregorian { text }?,
+ element standaloneShortMonths_persian { text }?,
+ element standaloneShortMonths_islamic { text }?,
+ element narrowMonths_gregorian { text }?,
+ element narrowMonths_persian { text }?,
+ element narrowMonths_islamic { text }?,
+ element standaloneNarrowMonths_gregorian { text }?,
+ element standaloneNarrowMonths_persian { text }?,
+ element standaloneNarrowMonths_islamic { text }?,
+
+ element groupSizes { GroupSizes },
+ element currencyDigits { xsd:nonNegativeInteger },
+ element currencyRounding { xsd:nonNegativeInteger }
+}
diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py
index c15d6d2f55..b20e4fd155 100755
--- a/util/locale_database/qlocalexml2cpp.py
+++ b/util/locale_database/qlocalexml2cpp.py
@@ -1,85 +1,58 @@
-#!/usr/bin/env python2
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
-"""Script to generate C++ code from CLDR data in qLocaleXML form
-
-See ``cldr2qlocalexml.py`` for how to generate the qLocaleXML data itself.
-Pass the output file from that as first parameter to this script; pass
-the root of the qtbase check-out as second parameter.
-"""
-
-import os
-import datetime
-
-from qlocalexml import QLocaleXmlReader
-from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor
-
-def compareLocaleKeys(key1, key2):
- if key1 == key2:
- return 0
+#!/usr/bin/env python3
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+"""Script to generate C++ code from CLDR data in QLocaleXML form
- if key1[0] != key2[0]: # First sort by language:
- return key1[0] - key2[0]
+See ``cldr2qlocalexml.py`` for how to generate the QLocaleXML data itself.
+Pass the output file from that as first parameter to this script; pass the ISO
+639-3 data file as second parameter. You can optionally pass the root of the
+qtbase check-out as third parameter; it defaults to the root of the qtbase
+check-out containing this script.
- defaults = compareLocaleKeys.default_map
- # maps {(language, script): country} by ID
- try:
- country = defaults[key1[:2]]
- except KeyError:
- pass
- else:
- if key1[2] == country:
- return -1
- if key2[2] == country:
- return 1
-
- if key1[1] == key2[1]:
- return key1[2] - key2[2]
+The ISO 639-3 data file can be downloaded from the SIL website:
- try:
- country = defaults[key2[:2]]
- except KeyError:
- pass
- else:
- if key2[2] == country:
- return 1
- if key1[2] == country:
- return -1
+ https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab
+"""
- return key1[1] - key2[1]
+import datetime
+import argparse
+from pathlib import Path
+from typing import Optional
+from qlocalexml import QLocaleXmlReader
+from localetools import *
+from iso639_3 import LanguageCodeData
+
+class LocaleKeySorter:
+ """Sort-ordering representation of a locale key.
+
+ This is for passing to a sorting algorithm as key-function, that
+ it applies to each entry in the list to decide which belong
+ earlier. It adds an entry to the (language, script, territory)
+ triple, just before script, that sorts earlier if the territory is
+ the default for the given language and script, later otherwise.
+ """
+
+ # TODO: study the relationship between this and CLDR's likely
+ # sub-tags algorithm. Work out how locale sort-order impacts
+ # QLocale's likely sub-tag matching algorithms. Make sure this is
+ # sorting in an order compatible with those algorithms.
+
+ def __init__(self, defaults):
+ self.map = dict(defaults)
+ def foreign(self, key):
+ default = self.map.get(key[:2])
+ return default is None or default != key[2]
+ def __call__(self, key):
+ # TODO: should we compare territory before or after script ?
+ return (key[0], self.foreign(key)) + key[1:]
class StringDataToken:
def __init__(self, index, length, bits):
if index > 0xffff:
- raise ValueError('Start-index ({}) exceeds the uint16 range!'.format(index))
+ raise ValueError(f'Start-index ({index}) exceeds the uint16 range!')
if length >= (1 << bits):
- raise ValueError('Data size ({}) exceeds the {}-bit range!'.format(length, bits))
+ raise ValueError(f'Data size ({length}) exceeds the {bits}-bit range!')
self.index = index
self.length = length
@@ -131,11 +104,10 @@ class StringData:
def write(self, fd):
if len(self.data) > 0xffff:
- raise ValueError('Data is too big ({}) for quint16 index to its end!'
- .format(len(self.data)),
+ raise ValueError(f'Data is too big ({len(self.data)}) for quint16 index to its end!',
self.name)
- fd.write("\nstatic const char16_t {}[] = {{\n".format(self.name))
- fd.write(wrap_list(self.data))
+ fd.write(f"\nstatic constexpr char16_t {self.name}[] = {{\n")
+ fd.write(wrap_list(self.data, 12)) # 12 == 100 // len('0xhhhh, ')
fd.write("\n};\n")
def currencyIsoCodeData(s):
@@ -144,13 +116,16 @@ def currencyIsoCodeData(s):
return "{0,0,0}"
class LocaleSourceEditor (SourceFileEditor):
- __upinit = SourceFileEditor.__init__
- def __init__(self, path, temp, version):
- self.__upinit(path, temp)
- self.writer.write("""
+ def __init__(self, path: Path, temp: Path, version: str):
+ super().__init__(path, temp)
+ self.version = version
+
+ def onEnter(self) -> None:
+ super().onEnter()
+ self.writer.write(f"""
/*
- This part of the file was generated on {} from the
- Common Locale Data Repository v{}
+ This part of the file was generated on {datetime.date.today()} from the
+ Common Locale Data Repository v{self.version}
http://www.unicode.org/cldr/
@@ -159,7 +134,7 @@ class LocaleSourceEditor (SourceFileEditor):
edited) CLDR data; see qtbase/util/locale_database/.
*/
-""".format(datetime.date.today(), version))
+""")
class LocaleDataWriter (LocaleSourceEditor):
def likelySubtags(self, likely):
@@ -173,23 +148,22 @@ class LocaleDataWriter (LocaleSourceEditor):
def keyLikely(entry):
have = entry[1] # Numeric id triple
return have[0] or huge, have[2] or huge, have[1] or huge # language, region, script
- likely = list(likely) # Turn generator into list so we can sort it
- likely.sort(key=keyLikely)
+ likely = sorted(likely, key=keyLikely)
i = 0
- self.writer.write('static const QLocaleId likely_subtags[] = {\n')
+ self.writer.write('static constexpr QLocaleId likely_subtags[] = {\n')
for had, have, got, give in likely:
i += 1
self.writer.write(' {{ {:3d}, {:3d}, {:3d} }}'.format(*have))
self.writer.write(', {{ {:3d}, {:3d}, {:3d} }}'.format(*give))
self.writer.write(' ' if i == len(likely) else ',')
- self.writer.write(' // {} -> {}\n'.format(had, got))
+ self.writer.write(f' // {had} -> {got}\n')
self.writer.write('};\n\n')
def localeIndex(self, indices):
- self.writer.write('static const quint16 locale_index[] = {\n')
- for pair in indices:
- self.writer.write('{:6d}, // {}\n'.format(*pair))
+ self.writer.write('static constexpr quint16 locale_index[] = {\n')
+ for index, name in indices:
+ self.writer.write(f'{index:6d}, // {name}\n')
self.writer.write(' 0 // trailing 0\n')
self.writer.write('};\n\n')
@@ -208,7 +182,7 @@ class LocaleDataWriter (LocaleSourceEditor):
endonyms_data = StringData('endonyms_data')
# Locale data
- self.writer.write('static const QLocaleData locale_data[] = {\n')
+ self.writer.write('static constexpr QLocaleData locale_data[] = {\n')
# Table headings: keep each label centred in its field, matching line_format:
self.writer.write(' // '
# Width 6 + comma
@@ -309,7 +283,7 @@ class LocaleDataWriter (LocaleSourceEditor):
locale.minus, locale.plus, locale.exp,
locale.quotationStart, locale.quotationEnd,
locale.alternateQuotationStart, locale.alternateQuotationEnd)) +
- tuple (date_format_data.append(f) for f in # 2 entries:
+ tuple(date_format_data.append(f) for f in # 2 entries:
(locale.longDateFormat, locale.shortDateFormat)) +
tuple(time_format_data.append(f) for f in # 2 entries:
(locale.longTimeFormat, locale.shortTimeFormat)) +
@@ -327,7 +301,7 @@ class LocaleDataWriter (LocaleSourceEditor):
currency_format_data.append(locale.currencyFormat),
currency_format_data.append(locale.currencyNegativeFormat),
endonyms_data.append(locale.languageEndonym),
- endonyms_data.append(locale.countryEndonym)) # 6 entries
+ endonyms_data.append(locale.territoryEndonym)) # 6 entries
) # Total: 37 entries
assert len(ranges) == 37
@@ -340,8 +314,7 @@ class LocaleDataWriter (LocaleSourceEditor):
locale.currencyRounding, # unused (QTBUG-81343)
locale.firstDayOfWeek, locale.weekendStart, locale.weekendEnd,
locale.groupTop, locale.groupHigher, locale.groupLeast) ))
- + ', // {}/{}/{}\n'.format(
- locale.language, locale.script, locale.country))
+ + f', // {locale.language}/{locale.script}/{locale.territory}\n')
self.writer.write(formatLine(*( # All zeros, matching the format:
(0,) * 3 + (0,) * 37 * 2
+ (currencyIsoCodeData(0),)
@@ -359,32 +332,35 @@ class LocaleDataWriter (LocaleSourceEditor):
@staticmethod
def __writeNameData(out, book, form):
- out('static const char {}_name_list[] =\n'.format(form))
+ out(f'static constexpr char {form}_name_list[] =\n')
out('"Default\\0"\n')
for key, value in book.items():
if key == 0:
continue
- out('"' + value[0] + '\\0"\n')
+ enum, name = value[0], value[-1]
+ if names_clash(name, enum):
+ out(f'"{name}\\0" // {enum}\n')
+ else:
+ out(f'"{name}\\0"\n') # Automagically utf-8 encoded
out(';\n\n')
- out('static const quint16 {}_name_index[] = {{\n'.format(form))
- out(' 0, // Any{}\n'.format(form.capitalize()))
+ out(f'static constexpr quint16 {form}_name_index[] = {{\n')
+ out(f' 0, // Any{form.capitalize()}\n')
index = 8
for key, value in book.items():
if key == 0:
continue
- name = value[0]
- out('{:6d}, // {}\n'.format(index, name))
- index += len(name) + 1
+ out(f'{index:6d}, // {value[0]}\n')
+ index += len(value[-1].encode('utf-8')) + 1
out('};\n\n')
@staticmethod
def __writeCodeList(out, book, form, width):
- out('static const unsigned char {}_code_list[] =\n'.format(form))
+ out(f'static constexpr unsigned char {form}_code_list[] =\n')
for key, value in book.items():
code = value[1]
code += r'\0' * max(width - len(code), 0)
- out('"{}" // {}\n'.format(code, value[0]))
+ out(f'"{code}" // {value[0]}\n')
out(';\n\n')
def languageNames(self, languages):
@@ -393,20 +369,44 @@ class LocaleDataWriter (LocaleSourceEditor):
def scriptNames(self, scripts):
self.__writeNameData(self.writer.write, scripts, 'script')
- def countryNames(self, countries):
- self.__writeNameData(self.writer.write, countries, 'territory')
+ def territoryNames(self, territories):
+ self.__writeNameData(self.writer.write, territories, 'territory')
# TODO: unify these next three into the previous three; kept
# separate for now to verify we're not changing data.
- def languageCodes(self, languages):
- self.__writeCodeList(self.writer.write, languages, 'language', 3)
+ def languageCodes(self, languages, code_data: LanguageCodeData):
+ out = self.writer.write
+
+ out(f'constexpr std::array<LanguageCodeEntry, {len(languages)}> languageCodeList {{\n')
+
+ def q(val: Optional[str], size: int) -> str:
+ """Quote the value and adjust the result for tabular view."""
+ s = '' if val is None else ', '.join(f"'{c}'" for c in val)
+ return f'{{{s}}}' if size == 0 else f'{{{s}}},'.ljust(size * 5 + 2)
+
+ for key, value in languages.items():
+ code = value[1]
+ if key < 2:
+ result = code_data.query('und')
+ else:
+ result = code_data.query(code)
+ assert code == result.id()
+ assert result is not None
+
+ codeString = q(result.part1Code, 2)
+ codeString += q(result.part2BCode, 3)
+ codeString += q(result.part2TCode, 3)
+ codeString += q(result.part3Code, 0)
+ out(f' LanguageCodeEntry {{{codeString}}}, // {value[0]}\n')
+
+ out('};\n\n')
def scriptCodes(self, scripts):
self.__writeCodeList(self.writer.write, scripts, 'script', 4)
- def countryCodes(self, countries): # TODO: unify with countryNames()
- self.__writeCodeList(self.writer.write, countries, 'territory', 3)
+ def territoryCodes(self, territories): # TODO: unify with territoryNames()
+ self.__writeCodeList(self.writer.write, territories, 'territory', 3)
class CalendarDataWriter (LocaleSourceEditor):
formatCalendar = (
@@ -416,7 +416,7 @@ class CalendarDataWriter (LocaleSourceEditor):
def write(self, calendar, locales, names):
months_data = StringData('months_data')
- self.writer.write('static const QCalendarLocale locale_data[] = {\n')
+ self.writer.write('static constexpr QCalendarLocale locale_data[] = {\n')
self.writer.write(
' //'
# IDs, width 7 (6 + comma)
@@ -444,7 +444,7 @@ class CalendarDataWriter (LocaleSourceEditor):
(locale.standaloneShortMonths, locale.shortMonths,
locale.standaloneNarrowMonths, locale.narrowMonths)))
except ValueError as e:
- e.args += (locale.language, locale.script, locale.country, stem)
+ e.args += (locale.language, locale.script, locale.territory)
raise
self.writer.write(
@@ -452,25 +452,41 @@ class CalendarDataWriter (LocaleSourceEditor):
key +
tuple(r.index for r in ranges) +
tuple(r.length for r in ranges) ))
- + '// {}/{}/{}\n'.format(locale.language, locale.script, locale.country))
+ + f'// {locale.language}/{locale.script}/{locale.territory}\n')
self.writer.write(self.formatCalendar(*( (0,) * (3 + 6 * 2) ))
+ '// trailing zeros\n')
self.writer.write('};\n')
months_data.write(self.writer)
+
+class TestLocaleWriter (LocaleSourceEditor):
+ def localeList(self, locales):
+ self.writer.write('const LocaleListItem g_locale_list[] = {\n')
+ from enumdata import language_map, territory_map
+ # TODO: update testlocales/ to include script.
+ # For now, only mention each (lang, land) pair once:
+ pairs = set((lang, land) for lang, script, land in locales)
+ for lang, script, land in locales:
+ if (lang, land) in pairs:
+ pairs.discard((lang, land))
+ langName = language_map[lang][0]
+ landName = territory_map[land][0]
+ self.writer.write(f' {{ {lang:6d},{land:6d} }}, // {langName}/{landName}\n')
+ self.writer.write('};\n\n')
+
+
class LocaleHeaderWriter (SourceFileEditor):
- __upinit = SourceFileEditor.__init__
- def __init__(self, path, temp, dupes):
- self.__upinit(path, temp)
- self.__dupes = dupes
+ def __init__(self, path, temp, enumify):
+ super().__init__(path, temp)
+ self.__enumify = enumify
def languages(self, languages):
self.__enum('Language', languages, self.__language)
self.writer.write('\n')
- def countries(self, countries):
+ def territories(self, territories):
self.writer.write(" // ### Qt 7: Rename to Territory\n")
- self.__enum('Country', countries, self.__country, 'Territory')
+ self.__enum('Country', territories, self.__territory, 'Territory')
def scripts(self, scripts):
self.__enum('Script', scripts, self.__script)
@@ -478,7 +494,7 @@ class LocaleHeaderWriter (SourceFileEditor):
# Implementation details
from enumdata import (language_aliases as __language,
- country_aliases as __country,
+ territory_aliases as __territory,
script_aliases as __script)
def __enum(self, name, book, alias, suffix = None):
@@ -487,153 +503,135 @@ class LocaleHeaderWriter (SourceFileEditor):
if suffix is None:
suffix = name
- out, dupes = self.writer.write, self.__dupes
- out(' enum {} : ushort {{\n'.format(name))
+ out, enumify = self.writer.write, self.__enumify
+ out(f' enum {name} : ushort {{\n')
for key, value in book.items():
- member = value[0].replace('-', ' ')
- if name == 'Script':
- # Don't .capitalize() as some names are already camel-case (see enumdata.py):
- member = ''.join(word[0].upper() + word[1:] for word in member.split())
- if not member.endswith('Script'):
- member += 'Script'
- if member in dupes:
- raise Error('The script name "{}" is messy'.format(member))
- else:
- member = ''.join(member.split())
- member = member + suffix if member in dupes else member
- out(' {} = {},\n'.format(member, key))
+ member = enumify(value[0], suffix)
+ out(f' {member} = {key},\n')
out('\n '
- + ',\n '.join('{} = {}'.format(*pair)
- for pair in sorted(alias.items()))
- + ',\n\n Last{} = {}'.format(suffix, member))
+ + ',\n '.join(f'{k} = {v}' for k, v in sorted(alias.items()))
+ + f',\n\n Last{suffix} = {member}')
# for "LastCountry = LastTerritory"
# ### Qt 7: Remove
if suffix != name:
- out(',\n Last{} = Last{}'.format(name, suffix))
+ out(f',\n Last{name} = Last{suffix}')
out('\n };\n')
-def usage(name, err, message = ''):
- err.write("""Usage: {} path/to/qlocale.xml root/of/qtbase
-""".format(name)) # TODO: elaborate
- if message:
- err.write('\n' + message + '\n')
-
-def main(args, out, err):
- # TODO: Make calendars a command-line parameter
- # map { CLDR name: Qt file name }
- calendars = {'gregorian': 'roman', 'persian': 'jalali', 'islamic': 'hijri',} # 'hebrew': 'hebrew',
-
- name = args.pop(0)
- if len(args) != 2:
- usage(name, err, 'I expect two arguments')
- return 1
- qlocalexml = args.pop(0)
- qtsrcdir = args.pop(0)
-
- if not (os.path.isdir(qtsrcdir)
- and all(os.path.isfile(os.path.join(qtsrcdir, 'src', 'corelib', 'text', leaf))
+def main(argv, out, err):
+ """Updates QLocale's CLDR data from a QLocaleXML file.
+
+ Takes sys.argv, sys.stdout, sys.stderr (or equivalents) as
+ arguments. In argv[1:] it expects the QLocaleXML file as first
+ parameter and the ISO 639-3 data table as second
+ parameter. Accepts the root of the qtbase checkout as third
+ parameter (default is inferred from this script's path) and a
+ --calendars option to select which calendars to support (all
+ available by default).
+
+ Updates various src/corelib/t*/q*_data_p.h files within the qtbase
+ checkout to contain data extracted from the QLocaleXML file."""
+ calendars_map = {
+ # CLDR name: Qt file name fragment
+ 'gregorian': 'roman',
+ 'persian': 'jalali',
+ 'islamic': 'hijri',
+ }
+ all_calendars = list(calendars_map.keys())
+
+ parser = argparse.ArgumentParser(
+ prog=Path(argv[0]).name,
+ description='Generate C++ code from CLDR data in QLocaleXML form.',
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument('input_file', help='input XML file name',
+ metavar='input-file.xml')
+ parser.add_argument('iso_path', help='path to the ISO 639-3 data file',
+ metavar='iso-639-3.tab')
+ parser.add_argument('qtbase_path', help='path to the root of the qtbase source tree',
+ nargs='?', default=qtbase_root)
+ parser.add_argument('--calendars', help='select calendars to emit data for',
+ nargs='+', metavar='CALENDAR',
+ choices=all_calendars, default=all_calendars)
+ args = parser.parse_args(argv[1:])
+
+ qlocalexml = args.input_file
+ qtsrcdir = Path(args.qtbase_path)
+ calendars = {cal: calendars_map[cal] for cal in args.calendars}
+
+ if not (qtsrcdir.is_dir()
+ and all(qtsrcdir.joinpath('src/corelib/text', leaf).is_file()
for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))):
- usage(name, err, 'Missing expected files under qtbase source root ' + qtsrcdir)
- return 1
+ parser.error(f'Missing expected files under qtbase source root {qtsrcdir}')
reader = QLocaleXmlReader(qlocalexml)
locale_map = dict(reader.loadLocaleMap(calendars, err.write))
+ locale_keys = sorted(locale_map.keys(), key=LocaleKeySorter(reader.defaultMap()))
- locale_keys = locale_map.keys()
- compareLocaleKeys.default_map = dict(reader.defaultMap())
- locale_keys.sort(compareLocaleKeys)
-
- try:
- writer = LocaleDataWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'text',
- 'qlocale_data_p.h'),
- qtsrcdir, reader.cldrVersion)
- except IOError as e:
- err.write('Failed to open files to transcribe locale data: ' + (e.message or e.args[1]))
- return 1
+ code_data = LanguageCodeData(args.iso_path)
try:
- writer.likelySubtags(reader.likelyMap())
- writer.localeIndex(reader.languageIndices(tuple(k[0] for k in locale_map)))
- writer.localeData(locale_map, locale_keys)
- writer.writer.write('\n')
- writer.languageNames(reader.languages)
- writer.scriptNames(reader.scripts)
- writer.countryNames(reader.countries)
- # TODO: merge the next three into the previous three
- writer.languageCodes(reader.languages)
- writer.scriptCodes(reader.scripts)
- writer.countryCodes(reader.countries)
- except Error as e:
- writer.cleanup()
- err.write('\nError updating locale data: ' + e.message + '\n')
+ with LocaleDataWriter(qtsrcdir.joinpath('src/corelib/text/qlocale_data_p.h'),
+ qtsrcdir, reader.cldrVersion) as writer:
+ writer.likelySubtags(reader.likelyMap())
+ writer.localeIndex(reader.languageIndices(tuple(k[0] for k in locale_map)))
+ writer.localeData(locale_map, locale_keys)
+ writer.writer.write('\n')
+ writer.languageNames(reader.languages)
+ writer.scriptNames(reader.scripts)
+ writer.territoryNames(reader.territories)
+ # TODO: merge the next three into the previous three
+ writer.languageCodes(reader.languages, code_data)
+ writer.scriptCodes(reader.scripts)
+ writer.territoryCodes(reader.territories)
+ except Exception as e:
+ err.write(f'\nError updating locale data: {e}\n')
return 1
- writer.close()
-
# Generate calendar data
for calendar, stem in calendars.items():
try:
- writer = CalendarDataWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'time',
- 'q{}calendar_data_p.h'.format(stem)),
- qtsrcdir, reader.cldrVersion)
- except IOError as e:
- err.write('Failed to open files to transcribe ' + calendar
- + ' data ' + (e.message or e.args[1]))
- return 1
-
- try:
- writer.write(calendar, locale_map, locale_keys)
- except Error as e:
- writer.cleanup()
- err.write('\nError updating ' + calendar + ' locale data: ' + e.message + '\n')
- return 1
-
- writer.close()
+ with CalendarDataWriter(
+ qtsrcdir.joinpath(f'src/corelib/time/q{stem}calendar_data_p.h'),
+ qtsrcdir, reader.cldrVersion) as writer:
+ writer.write(calendar, locale_map, locale_keys)
+ except Exception as e:
+ err.write(f'\nError updating {calendar} locale data: {e}\n')
# qlocale.h
try:
- writer = LocaleHeaderWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.h'),
- qtsrcdir, reader.dupes)
- except IOError as e:
- err.write('Failed to open files to transcribe qlocale.h: ' + (e.message or e.args[1]))
- return 1
-
- try:
- writer.languages(reader.languages)
- writer.scripts(reader.scripts)
- writer.countries(reader.countries)
- except Error as e:
- writer.cleanup()
- err.write('\nError updating qlocale.h: ' + e.message + '\n')
- return 1
-
- writer.close()
+ with LocaleHeaderWriter(qtsrcdir.joinpath('src/corelib/text/qlocale.h'),
+ qtsrcdir, reader.enumify) as writer:
+ writer.languages(reader.languages)
+ writer.scripts(reader.scripts)
+ writer.territories(reader.territories)
+ except Exception as e:
+ err.write(f'\nError updating qlocale.h: {e}\n')
# qlocale.qdoc
try:
- writer = Transcriber(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.qdoc'),
- qtsrcdir)
- except IOError as e:
- err.write('Failed to open files to transcribe qlocale.qdoc: ' + (e.message or e.args[1]))
+ with Transcriber(qtsrcdir.joinpath('src/corelib/text/qlocale.qdoc'), qtsrcdir) as qdoc:
+ DOCSTRING = " QLocale's data is based on Common Locale Data Repository "
+ for line in qdoc.reader:
+ if DOCSTRING in line:
+ qdoc.writer.write(f'{DOCSTRING}v{reader.cldrVersion}.\n')
+ else:
+ qdoc.writer.write(line)
+ except Exception as e:
+ err.write(f'\nError updating qlocale.h: {e}\n')
return 1
- DOCSTRING = " QLocale's data is based on Common Locale Data Repository "
+ # ./testlocales/localemodel.cpp
try:
- for line in writer.reader:
- if DOCSTRING in line:
- writer.writer.write(DOCSTRING + 'v' + reader.cldrVersion + '.\n')
- else:
- writer.writer.write(line)
- except Error as e:
- writer.cleanup()
- err.write('\nError updating qlocale.qdoc: ' + e.message + '\n')
- return 1
+ path = 'util/locale_database/testlocales/localemodel.cpp'
+ with TestLocaleWriter(qtsrcdir.joinpath(path), qtsrcdir,
+ reader.cldrVersion) as test:
+ test.localeList(locale_keys)
+ except Exception as e:
+ err.write(f'\nError updating localemodel.cpp: {e}\n')
- writer.close()
return 0
if __name__ == "__main__":
diff --git a/util/locale_database/testlocales/localemodel.cpp b/util/locale_database/testlocales/localemodel.cpp
index d171bc9855..7f0150c7e0 100644
--- a/util/locale_database/testlocales/localemodel.cpp
+++ b/util/locale_database/testlocales/localemodel.cpp
@@ -1,30 +1,5 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the utils of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
#include "localemodel.h"
#include <QLocale>
@@ -39,236 +14,658 @@ struct LocaleListItem
int territory;
};
+// GENERATED PART STARTS HERE
+
+/*
+ This part of the file was generated on 2024-04-22 from the
+ Common Locale Data Repository v44.1
+
+ http://www.unicode.org/cldr/
+
+ Do not edit this section: instead regenerate it using
+ cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or
+ edited) CLDR data; see qtbase/util/locale_database/.
+*/
+
const LocaleListItem g_locale_list[] = {
{ 1, 0 }, // C/AnyTerritory
- { 3, 69 }, // Afan/Ethiopia
- { 3, 111 }, // Afan/Kenya
- { 4, 59 }, // Afar/Djibouti
- { 4, 67 }, // Afar/Eritrea
- { 4, 69 }, // Afar/Ethiopia
- { 5, 195 }, // Afrikaans/SouthAfrica
- { 5, 148 }, // Afrikaans/Namibia
- { 6, 2 }, // Albanian/Albania
- { 7, 69 }, // Amharic/Ethiopia
- { 8, 186 }, // Arabic/SaudiArabia
- { 8, 3 }, // Arabic/Algeria
- { 8, 17 }, // Arabic/Bahrain
- { 8, 64 }, // Arabic/Egypt
- { 8, 103 }, // Arabic/Iraq
- { 8, 109 }, // Arabic/Jordan
- { 8, 115 }, // Arabic/Kuwait
- { 8, 119 }, // Arabic/Lebanon
- { 8, 122 }, // Arabic/LibyanArabJamahiriya
- { 8, 145 }, // Arabic/Morocco
- { 8, 162 }, // Arabic/Oman
- { 8, 175 }, // Arabic/Qatar
- { 8, 201 }, // Arabic/Sudan
- { 8, 207 }, // Arabic/SyrianArabRepublic
- { 8, 216 }, // Arabic/Tunisia
- { 8, 223 }, // Arabic/UnitedArabEmirates
- { 8, 237 }, // Arabic/Yemen
- { 9, 11 }, // Armenian/Armenia
- { 10, 100 }, // Assamese/India
- { 12, 15 }, // Azerbaijani/Azerbaijan
- { 14, 197 }, // Basque/Spain
- { 15, 18 }, // Bengali/Bangladesh
- { 15, 100 }, // Bengali/India
- { 16, 25 }, // Bhutani/Bhutan
- { 20, 33 }, // Bulgarian/Bulgaria
- { 22, 20 }, // Byelorussian/Belarus
- { 23, 36 }, // Cambodian/Cambodia
- { 24, 197 }, // Catalan/Spain
- { 25, 44 }, // Chinese/China
- { 25, 97 }, // Chinese/HongKong
- { 25, 126 }, // Chinese/Macau
- { 25, 190 }, // Chinese/Singapore
- { 25, 208 }, // Chinese/Taiwan
- { 27, 54 }, // Croatian/Croatia
- { 28, 57 }, // Czech/CzechRepublic
- { 29, 58 }, // Danish/Denmark
- { 30, 151 }, // Dutch/Netherlands
- { 30, 21 }, // Dutch/Belgium
- { 31, 225 }, // English/UnitedStates
- { 31, 4 }, // English/AmericanSamoa
- { 31, 13 }, // English/Australia
- { 31, 21 }, // English/Belgium
- { 31, 22 }, // English/Belize
- { 31, 28 }, // English/Botswana
- { 31, 38 }, // English/Canada
- { 31, 89 }, // English/Guam
- { 31, 97 }, // English/HongKong
- { 31, 100 }, // English/India
- { 31, 104 }, // English/Ireland
- { 31, 107 }, // English/Jamaica
- { 31, 133 }, // English/Malta
- { 31, 134 }, // English/MarshallIslands
- { 31, 148 }, // English/Namibia
- { 31, 154 }, // English/NewZealand
- { 31, 160 }, // English/NorthernMarianaIslands
- { 31, 163 }, // English/Pakistan
- { 31, 170 }, // English/Philippines
- { 31, 190 }, // English/Singapore
- { 31, 195 }, // English/SouthAfrica
- { 31, 215 }, // English/TrinidadAndTobago
- { 31, 224 }, // English/UnitedKingdom
- { 31, 226 }, // English/UnitedStatesMinorOutlyingIslands
- { 31, 234 }, // English/USVirginIslands
- { 31, 240 }, // English/Zimbabwe
- { 33, 68 }, // Estonian/Estonia
- { 34, 71 }, // Faroese/FaroeIslands
- { 36, 73 }, // Finnish/Finland
- { 37, 74 }, // French/France
- { 37, 21 }, // French/Belgium
- { 37, 38 }, // French/Canada
- { 37, 125 }, // French/Luxembourg
- { 37, 142 }, // French/Monaco
- { 37, 206 }, // French/Switzerland
- { 40, 197 }, // Galician/Spain
- { 41, 81 }, // Georgian/Georgia
- { 42, 82 }, // German/Germany
- { 42, 14 }, // German/Austria
- { 42, 21 }, // German/Belgium
- { 42, 123 }, // German/Liechtenstein
- { 42, 125 }, // German/Luxembourg
- { 42, 206 }, // German/Switzerland
- { 43, 85 }, // Greek/Greece
- { 43, 56 }, // Greek/Cyprus
- { 44, 86 }, // Greenlandic/Greenland
- { 46, 100 }, // Gujarati/India
- { 47, 83 }, // Hausa/Ghana
- { 47, 156 }, // Hausa/Niger
- { 47, 157 }, // Hausa/Nigeria
- { 48, 105 }, // Hebrew/Israel
- { 49, 100 }, // Hindi/India
- { 50, 98 }, // Hungarian/Hungary
- { 51, 99 }, // Icelandic/Iceland
- { 52, 101 }, // Indonesian/Indonesia
- { 57, 104 }, // Irish/Ireland
- { 58, 106 }, // Italian/Italy
- { 58, 206 }, // Italian/Switzerland
- { 59, 108 }, // Japanese/Japan
- { 61, 100 }, // Kannada/India
- { 63, 110 }, // Kazakh/Kazakhstan
- { 64, 179 }, // Kinyarwanda/Rwanda
- { 65, 116 }, // Kirghiz/Kyrgyzstan
- { 66, 114 }, // Korean/RepublicOfKorea
- { 67, 102 }, // Kurdish/Iran
- { 67, 103 }, // Kurdish/Iraq
- { 67, 207 }, // Kurdish/SyrianArabRepublic
- { 67, 217 }, // Kurdish/Turkey
- { 69, 117 }, // Laothian/Lao
- { 71, 118 }, // Latvian/Latvia
- { 72, 49 }, // Lingala/DemocraticRepublicOfCongo
- { 72, 50 }, // Lingala/PeoplesRepublicOfCongo
- { 73, 124 }, // Lithuanian/Lithuania
- { 74, 127 }, // Macedonian/Macedonia
- { 76, 130 }, // Malay/Malaysia
- { 76, 32 }, // Malay/BruneiDarussalam
- { 77, 100 }, // Malayalam/India
- { 78, 133 }, // Maltese/Malta
- { 80, 100 }, // Marathi/India
- { 82, 143 }, // Mongolian/Mongolia
- { 84, 150 }, // Nepali/Nepal
- { 85, 161 }, // Norwegian/Norway
- { 87, 100 }, // Oriya/India
- { 88, 1 }, // Pashto/Afghanistan
- { 89, 102 }, // Persian/Iran
- { 89, 1 }, // Persian/Afghanistan
- { 90, 172 }, // Polish/Poland
- { 91, 173 }, // Portuguese/Portugal
- { 91, 30 }, // Portuguese/Brazil
- { 92, 100 }, // Punjabi/India
- { 92, 163 }, // Punjabi/Pakistan
- { 95, 177 }, // Romanian/Romania
- { 96, 178 }, // Russian/RussianFederation
- { 96, 222 }, // Russian/Ukraine
- { 99, 100 }, // Sanskrit/India
- { 100, 241 }, // Serbian/SerbiaAndMontenegro
- { 100, 27 }, // Serbian/BosniaAndHerzegowina
- { 100, 238 }, // Serbian/Yugoslavia
- { 101, 241 }, // SerboCroatian/SerbiaAndMontenegro
- { 101, 27 }, // SerboCroatian/BosniaAndHerzegowina
- { 101, 238 }, // SerboCroatian/Yugoslavia
- { 102, 195 }, // Sesotho/SouthAfrica
- { 103, 195 }, // Setswana/SouthAfrica
- { 107, 195 }, // Siswati/SouthAfrica
- { 108, 191 }, // Slovak/Slovakia
- { 109, 192 }, // Slovenian/Slovenia
- { 110, 194 }, // Somali/Somalia
- { 110, 59 }, // Somali/Djibouti
- { 110, 69 }, // Somali/Ethiopia
- { 110, 111 }, // Somali/Kenya
- { 111, 197 }, // Spanish/Spain
- { 111, 10 }, // Spanish/Argentina
- { 111, 26 }, // Spanish/Bolivia
- { 111, 43 }, // Spanish/Chile
- { 111, 47 }, // Spanish/Colombia
- { 111, 52 }, // Spanish/CostaRica
- { 111, 61 }, // Spanish/DominicanRepublic
- { 111, 63 }, // Spanish/Ecuador
- { 111, 65 }, // Spanish/ElSalvador
- { 111, 90 }, // Spanish/Guatemala
- { 111, 96 }, // Spanish/Honduras
- { 111, 139 }, // Spanish/Mexico
- { 111, 155 }, // Spanish/Nicaragua
- { 111, 166 }, // Spanish/Panama
- { 111, 168 }, // Spanish/Paraguay
- { 111, 169 }, // Spanish/Peru
- { 111, 174 }, // Spanish/PuertoRico
- { 111, 225 }, // Spanish/UnitedStates
- { 111, 227 }, // Spanish/Uruguay
- { 111, 231 }, // Spanish/Venezuela
- { 113, 111 }, // Swahili/Kenya
- { 113, 210 }, // Swahili/Tanzania
- { 114, 205 }, // Swedish/Sweden
- { 114, 73 }, // Swedish/Finland
- { 116, 209 }, // Tajik/Tajikistan
- { 117, 100 }, // Tamil/India
- { 118, 178 }, // Tatar/RussianFederation
- { 119, 100 }, // Telugu/India
- { 120, 211 }, // Thai/Thailand
- { 122, 67 }, // Tigrinya/Eritrea
- { 122, 69 }, // Tigrinya/Ethiopia
- { 124, 195 }, // Tsonga/SouthAfrica
- { 125, 217 }, // Turkish/Turkey
- { 129, 222 }, // Ukrainian/Ukraine
- { 130, 100 }, // Urdu/India
- { 130, 163 }, // Urdu/Pakistan
- { 131, 228 }, // Uzbek/Uzbekistan
- { 131, 1 }, // Uzbek/Afghanistan
- { 132, 232 }, // Vietnamese/VietNam
- { 134, 224 }, // Welsh/UnitedKingdom
- { 136, 195 }, // Xhosa/SouthAfrica
- { 138, 157 }, // Yoruba/Nigeria
- { 140, 195 }, // Zulu/SouthAfrica
- { 141, 161 }, // Nynorsk/Norway
- { 142, 27 }, // Bosnian/BosniaAndHerzegowina
- { 143, 131 }, // Divehi/Maldives
- { 144, 224 }, // Manx/UnitedKingdom
- { 145, 224 }, // Cornish/UnitedKingdom
- { 146, 83 }, // Akan/Ghana
- { 147, 100 }, // Konkani/India
- { 148, 83 }, // Ga/Ghana
- { 149, 157 }, // Igbo/Nigeria
- { 150, 111 }, // Kamba/Kenya
- { 151, 207 }, // Syriac/SyrianArabRepublic
- { 152, 67 }, // Blin/Eritrea
- { 153, 67 }, // Geez/Eritrea
- { 153, 69 }, // Geez/Ethiopia
- { 154, 157 }, // Koro/Nigeria
- { 155, 69 }, // Sidamo/Ethiopia
- { 156, 157 }, // Atsam/Nigeria
- { 157, 67 }, // Tigre/Eritrea
- { 158, 157 }, // Jju/Nigeria
- { 159, 106 }, // Friulian/Italy
- { 160, 195 }, // Venda/SouthAfrica
- { 161, 83 }, // Ewe/Ghana
- { 161, 212 }, // Ewe/Togo
- { 163, 225 }, // Hawaiian/UnitedStates
- { 164, 157 }, // Tyap/Nigeria
- { 165, 129 }, // Chewa/Malawi
+ { 2, 90 }, // Abkhazian/Georgia
+ { 3, 77 }, // Afar/Ethiopia
+ { 3, 67 }, // Afar/Djibouti
+ { 3, 74 }, // Afar/Eritrea
+ { 4, 216 }, // Afrikaans/South Africa
+ { 4, 162 }, // Afrikaans/Namibia
+ { 5, 40 }, // Aghem/Cameroon
+ { 6, 92 }, // Akan/Ghana
+ { 8, 40 }, // Akoose/Cameroon
+ { 9, 3 }, // Albanian/Albania
+ { 9, 126 }, // Albanian/Kosovo
+ { 9, 140 }, // Albanian/Macedonia
+ { 11, 77 }, // Amharic/Ethiopia
+ { 14, 71 }, // Arabic/Egypt
+ { 14, 4 }, // Arabic/Algeria
+ { 14, 19 }, // Arabic/Bahrain
+ { 14, 48 }, // Arabic/Chad
+ { 14, 55 }, // Arabic/Comoros
+ { 14, 67 }, // Arabic/Djibouti
+ { 14, 74 }, // Arabic/Eritrea
+ { 14, 113 }, // Arabic/Iraq
+ { 14, 116 }, // Arabic/Israel
+ { 14, 122 }, // Arabic/Jordan
+ { 14, 127 }, // Arabic/Kuwait
+ { 14, 132 }, // Arabic/Lebanon
+ { 14, 135 }, // Arabic/Libya
+ { 14, 149 }, // Arabic/Mauritania
+ { 14, 159 }, // Arabic/Morocco
+ { 14, 176 }, // Arabic/Oman
+ { 14, 180 }, // Arabic/Palestinian Territories
+ { 14, 190 }, // Arabic/Qatar
+ { 14, 205 }, // Arabic/Saudi Arabia
+ { 14, 215 }, // Arabic/Somalia
+ { 14, 219 }, // Arabic/South Sudan
+ { 14, 222 }, // Arabic/Sudan
+ { 14, 227 }, // Arabic/Syria
+ { 14, 238 }, // Arabic/Tunisia
+ { 14, 245 }, // Arabic/United Arab Emirates
+ { 14, 257 }, // Arabic/Western Sahara
+ { 14, 258 }, // Arabic/world
+ { 14, 259 }, // Arabic/Yemen
+ { 15, 220 }, // Aragonese/Spain
+ { 17, 12 }, // Armenian/Armenia
+ { 18, 110 }, // Assamese/India
+ { 19, 220 }, // Asturian/Spain
+ { 20, 230 }, // Asu/Tanzania
+ { 21, 169 }, // Atsam/Nigeria
+ { 25, 17 }, // Azerbaijani/Azerbaijan
+ { 25, 112 }, // Azerbaijani/Iran
+ { 25, 113 }, // Azerbaijani/Iraq
+ { 25, 239 }, // Azerbaijani/Turkey
+ { 26, 40 }, // Bafia/Cameroon
+ { 28, 145 }, // Bambara/Mali
+ { 30, 20 }, // Bangla/Bangladesh
+ { 30, 110 }, // Bangla/India
+ { 31, 40 }, // Basaa/Cameroon
+ { 32, 193 }, // Bashkir/Russia
+ { 33, 220 }, // Basque/Spain
+ { 35, 22 }, // Belarusian/Belarus
+ { 36, 260 }, // Bemba/Zambia
+ { 37, 230 }, // Bena/Tanzania
+ { 38, 110 }, // Bhojpuri/India
+ { 40, 74 }, // Blin/Eritrea
+ { 41, 110 }, // Bodo/India
+ { 42, 29 }, // Bosnian/Bosnia and Herzegovina
+ { 43, 84 }, // Breton/France
+ { 45, 36 }, // Bulgarian/Bulgaria
+ { 46, 161 }, // Burmese/Myanmar
+ { 47, 107 }, // Cantonese/Hong Kong
+ { 47, 50 }, // Cantonese/China
+ { 48, 220 }, // Catalan/Spain
+ { 48, 6 }, // Catalan/Andorra
+ { 48, 84 }, // Catalan/France
+ { 48, 117 }, // Catalan/Italy
+ { 49, 185 }, // Cebuano/Philippines
+ { 50, 159 }, // Central Atlas Tamazight/Morocco
+ { 51, 113 }, // Central Kurdish/Iraq
+ { 51, 112 }, // Central Kurdish/Iran
+ { 52, 20 }, // Chakma/Bangladesh
+ { 52, 110 }, // Chakma/India
+ { 54, 193 }, // Chechen/Russia
+ { 55, 248 }, // Cherokee/United States
+ { 56, 248 }, // Chickasaw/United States
+ { 57, 243 }, // Chiga/Uganda
+ { 58, 50 }, // Chinese/China
+ { 58, 107 }, // Chinese/Hong Kong
+ { 58, 139 }, // Chinese/Macao
+ { 58, 210 }, // Chinese/Singapore
+ { 58, 228 }, // Chinese/Taiwan
+ { 59, 193 }, // Church/Russia
+ { 60, 193 }, // Chuvash/Russia
+ { 61, 91 }, // Colognian/Germany
+ { 63, 246 }, // Cornish/United Kingdom
+ { 64, 84 }, // Corsican/France
+ { 66, 60 }, // Croatian/Croatia
+ { 66, 29 }, // Croatian/Bosnia and Herzegovina
+ { 67, 64 }, // Czech/Czechia
+ { 68, 65 }, // Danish/Denmark
+ { 68, 95 }, // Danish/Greenland
+ { 69, 144 }, // Divehi/Maldives
+ { 70, 110 }, // Dogri/India
+ { 71, 40 }, // Duala/Cameroon
+ { 72, 165 }, // Dutch/Netherlands
+ { 72, 13 }, // Dutch/Aruba
+ { 72, 23 }, // Dutch/Belgium
+ { 72, 44 }, // Dutch/Caribbean Netherlands
+ { 72, 62 }, // Dutch/Curacao
+ { 72, 211 }, // Dutch/Sint Maarten
+ { 72, 223 }, // Dutch/Suriname
+ { 73, 27 }, // Dzongkha/Bhutan
+ { 74, 124 }, // Embu/Kenya
+ { 75, 248 }, // English/United States
+ { 75, 5 }, // English/American Samoa
+ { 75, 8 }, // English/Anguilla
+ { 75, 10 }, // English/Antigua and Barbuda
+ { 75, 15 }, // English/Australia
+ { 75, 16 }, // English/Austria
+ { 75, 18 }, // English/Bahamas
+ { 75, 21 }, // English/Barbados
+ { 75, 23 }, // English/Belgium
+ { 75, 24 }, // English/Belize
+ { 75, 26 }, // English/Bermuda
+ { 75, 30 }, // English/Botswana
+ { 75, 33 }, // English/British Indian Ocean Territory
+ { 75, 34 }, // English/British Virgin Islands
+ { 75, 38 }, // English/Burundi
+ { 75, 40 }, // English/Cameroon
+ { 75, 41 }, // English/Canada
+ { 75, 45 }, // English/Cayman Islands
+ { 75, 51 }, // English/Christmas Island
+ { 75, 53 }, // English/Cocos Islands
+ { 75, 58 }, // English/Cook Islands
+ { 75, 63 }, // English/Cyprus
+ { 75, 65 }, // English/Denmark
+ { 75, 66 }, // English/Diego Garcia
+ { 75, 68 }, // English/Dominica
+ { 75, 74 }, // English/Eritrea
+ { 75, 76 }, // English/Eswatini
+ { 75, 78 }, // English/Europe
+ { 75, 80 }, // English/Falkland Islands
+ { 75, 82 }, // English/Fiji
+ { 75, 83 }, // English/Finland
+ { 75, 89 }, // English/Gambia
+ { 75, 91 }, // English/Germany
+ { 75, 92 }, // English/Ghana
+ { 75, 93 }, // English/Gibraltar
+ { 75, 96 }, // English/Grenada
+ { 75, 98 }, // English/Guam
+ { 75, 100 }, // English/Guernsey
+ { 75, 103 }, // English/Guyana
+ { 75, 107 }, // English/Hong Kong
+ { 75, 110 }, // English/India
+ { 75, 111 }, // English/Indonesia
+ { 75, 114 }, // English/Ireland
+ { 75, 115 }, // English/Isle of Man
+ { 75, 116 }, // English/Israel
+ { 75, 119 }, // English/Jamaica
+ { 75, 121 }, // English/Jersey
+ { 75, 124 }, // English/Kenya
+ { 75, 125 }, // English/Kiribati
+ { 75, 133 }, // English/Lesotho
+ { 75, 134 }, // English/Liberia
+ { 75, 139 }, // English/Macao
+ { 75, 141 }, // English/Madagascar
+ { 75, 142 }, // English/Malawi
+ { 75, 143 }, // English/Malaysia
+ { 75, 144 }, // English/Maldives
+ { 75, 146 }, // English/Malta
+ { 75, 147 }, // English/Marshall Islands
+ { 75, 150 }, // English/Mauritius
+ { 75, 153 }, // English/Micronesia
+ { 75, 158 }, // English/Montserrat
+ { 75, 162 }, // English/Namibia
+ { 75, 163 }, // English/Nauru
+ { 75, 165 }, // English/Netherlands
+ { 75, 167 }, // English/New Zealand
+ { 75, 169 }, // English/Nigeria
+ { 75, 171 }, // English/Niue
+ { 75, 172 }, // English/Norfolk Island
+ { 75, 173 }, // English/Northern Mariana Islands
+ { 75, 178 }, // English/Pakistan
+ { 75, 179 }, // English/Palau
+ { 75, 182 }, // English/Papua New Guinea
+ { 75, 185 }, // English/Philippines
+ { 75, 186 }, // English/Pitcairn
+ { 75, 189 }, // English/Puerto Rico
+ { 75, 194 }, // English/Rwanda
+ { 75, 196 }, // English/Saint Helena
+ { 75, 197 }, // English/Saint Kitts and Nevis
+ { 75, 198 }, // English/Saint Lucia
+ { 75, 201 }, // English/Saint Vincent and Grenadines
+ { 75, 202 }, // English/Samoa
+ { 75, 208 }, // English/Seychelles
+ { 75, 209 }, // English/Sierra Leone
+ { 75, 210 }, // English/Singapore
+ { 75, 211 }, // English/Sint Maarten
+ { 75, 213 }, // English/Slovenia
+ { 75, 214 }, // English/Solomon Islands
+ { 75, 216 }, // English/South Africa
+ { 75, 219 }, // English/South Sudan
+ { 75, 222 }, // English/Sudan
+ { 75, 225 }, // English/Sweden
+ { 75, 226 }, // English/Switzerland
+ { 75, 230 }, // English/Tanzania
+ { 75, 234 }, // English/Tokelau
+ { 75, 235 }, // English/Tonga
+ { 75, 236 }, // English/Trinidad and Tobago
+ { 75, 241 }, // English/Turks and Caicos Islands
+ { 75, 242 }, // English/Tuvalu
+ { 75, 243 }, // English/Uganda
+ { 75, 245 }, // English/United Arab Emirates
+ { 75, 246 }, // English/United Kingdom
+ { 75, 247 }, // English/United States Outlying Islands
+ { 75, 249 }, // English/United States Virgin Islands
+ { 75, 252 }, // English/Vanuatu
+ { 75, 258 }, // English/world
+ { 75, 260 }, // English/Zambia
+ { 75, 261 }, // English/Zimbabwe
+ { 76, 193 }, // Erzya/Russia
+ { 77, 258 }, // Esperanto/world
+ { 78, 75 }, // Estonian/Estonia
+ { 79, 92 }, // Ewe/Ghana
+ { 79, 233 }, // Ewe/Togo
+ { 80, 40 }, // Ewondo/Cameroon
+ { 81, 81 }, // Faroese/Faroe Islands
+ { 81, 65 }, // Faroese/Denmark
+ { 83, 185 }, // Filipino/Philippines
+ { 84, 83 }, // Finnish/Finland
+ { 85, 84 }, // French/France
+ { 85, 4 }, // French/Algeria
+ { 85, 23 }, // French/Belgium
+ { 85, 25 }, // French/Benin
+ { 85, 37 }, // French/Burkina Faso
+ { 85, 38 }, // French/Burundi
+ { 85, 40 }, // French/Cameroon
+ { 85, 41 }, // French/Canada
+ { 85, 46 }, // French/Central African Republic
+ { 85, 48 }, // French/Chad
+ { 85, 55 }, // French/Comoros
+ { 85, 56 }, // French/Congo - Brazzaville
+ { 85, 57 }, // French/Congo - Kinshasa
+ { 85, 67 }, // French/Djibouti
+ { 85, 73 }, // French/Equatorial Guinea
+ { 85, 85 }, // French/French Guiana
+ { 85, 86 }, // French/French Polynesia
+ { 85, 88 }, // French/Gabon
+ { 85, 97 }, // French/Guadeloupe
+ { 85, 102 }, // French/Guinea
+ { 85, 104 }, // French/Haiti
+ { 85, 118 }, // French/Ivory Coast
+ { 85, 138 }, // French/Luxembourg
+ { 85, 141 }, // French/Madagascar
+ { 85, 145 }, // French/Mali
+ { 85, 148 }, // French/Martinique
+ { 85, 149 }, // French/Mauritania
+ { 85, 150 }, // French/Mauritius
+ { 85, 151 }, // French/Mayotte
+ { 85, 155 }, // French/Monaco
+ { 85, 159 }, // French/Morocco
+ { 85, 166 }, // French/New Caledonia
+ { 85, 170 }, // French/Niger
+ { 85, 191 }, // French/Reunion
+ { 85, 194 }, // French/Rwanda
+ { 85, 195 }, // French/Saint Barthelemy
+ { 85, 199 }, // French/Saint Martin
+ { 85, 200 }, // French/Saint Pierre and Miquelon
+ { 85, 206 }, // French/Senegal
+ { 85, 208 }, // French/Seychelles
+ { 85, 226 }, // French/Switzerland
+ { 85, 227 }, // French/Syria
+ { 85, 233 }, // French/Togo
+ { 85, 238 }, // French/Tunisia
+ { 85, 252 }, // French/Vanuatu
+ { 85, 256 }, // French/Wallis and Futuna
+ { 86, 117 }, // Friulian/Italy
+ { 87, 206 }, // Fulah/Senegal
+ { 87, 37 }, // Fulah/Burkina Faso
+ { 87, 40 }, // Fulah/Cameroon
+ { 87, 89 }, // Fulah/Gambia
+ { 87, 92 }, // Fulah/Ghana
+ { 87, 101 }, // Fulah/Guinea-Bissau
+ { 87, 102 }, // Fulah/Guinea
+ { 87, 134 }, // Fulah/Liberia
+ { 87, 149 }, // Fulah/Mauritania
+ { 87, 169 }, // Fulah/Nigeria
+ { 87, 170 }, // Fulah/Niger
+ { 87, 209 }, // Fulah/Sierra Leone
+ { 88, 246 }, // Gaelic/United Kingdom
+ { 89, 92 }, // Ga/Ghana
+ { 90, 220 }, // Galician/Spain
+ { 91, 243 }, // Ganda/Uganda
+ { 92, 77 }, // Geez/Ethiopia
+ { 92, 74 }, // Geez/Eritrea
+ { 93, 90 }, // Georgian/Georgia
+ { 94, 91 }, // German/Germany
+ { 94, 16 }, // German/Austria
+ { 94, 23 }, // German/Belgium
+ { 94, 117 }, // German/Italy
+ { 94, 136 }, // German/Liechtenstein
+ { 94, 138 }, // German/Luxembourg
+ { 94, 226 }, // German/Switzerland
+ { 96, 94 }, // Greek/Greece
+ { 96, 63 }, // Greek/Cyprus
+ { 97, 183 }, // Guarani/Paraguay
+ { 98, 110 }, // Gujarati/India
+ { 99, 124 }, // Gusii/Kenya
+ { 101, 169 }, // Hausa/Nigeria
+ { 101, 222 }, // Hausa/Sudan
+ { 101, 92 }, // Hausa/Ghana
+ { 101, 170 }, // Hausa/Niger
+ { 102, 248 }, // Hawaiian/United States
+ { 103, 116 }, // Hebrew/Israel
+ { 105, 110 }, // Hindi/India
+ { 107, 108 }, // Hungarian/Hungary
+ { 108, 109 }, // Icelandic/Iceland
+ { 109, 258 }, // Ido/world
+ { 110, 169 }, // Igbo/Nigeria
+ { 111, 83 }, // Inari Sami/Finland
+ { 112, 111 }, // Indonesian/Indonesia
+ { 114, 258 }, // Interlingua/world
+ { 115, 75 }, // Interlingue/Estonia
+ { 116, 41 }, // Inuktitut/Canada
+ { 118, 114 }, // Irish/Ireland
+ { 118, 246 }, // Irish/United Kingdom
+ { 119, 117 }, // Italian/Italy
+ { 119, 203 }, // Italian/San Marino
+ { 119, 226 }, // Italian/Switzerland
+ { 119, 253 }, // Italian/Vatican City
+ { 120, 120 }, // Japanese/Japan
+ { 121, 111 }, // Javanese/Indonesia
+ { 122, 169 }, // Jju/Nigeria
+ { 123, 206 }, // Jola-Fonyi/Senegal
+ { 124, 43 }, // Kabuverdianu/Cape Verde
+ { 125, 4 }, // Kabyle/Algeria
+ { 126, 40 }, // Kako/Cameroon
+ { 127, 95 }, // Kalaallisut/Greenland
+ { 128, 124 }, // Kalenjin/Kenya
+ { 129, 124 }, // Kamba/Kenya
+ { 130, 110 }, // Kannada/India
+ { 132, 110 }, // Kashmiri/India
+ { 133, 123 }, // Kazakh/Kazakhstan
+ { 134, 40 }, // Kenyang/Cameroon
+ { 135, 39 }, // Khmer/Cambodia
+ { 136, 99 }, // Kiche/Guatemala
+ { 137, 124 }, // Kikuyu/Kenya
+ { 138, 194 }, // Kinyarwanda/Rwanda
+ { 141, 110 }, // Konkani/India
+ { 142, 218 }, // Korean/South Korea
+ { 142, 50 }, // Korean/China
+ { 142, 174 }, // Korean/North Korea
+ { 144, 145 }, // Koyraboro Senni/Mali
+ { 145, 145 }, // Koyra Chiini/Mali
+ { 146, 134 }, // Kpelle/Liberia
+ { 146, 102 }, // Kpelle/Guinea
+ { 148, 239 }, // Kurdish/Turkey
+ { 149, 40 }, // Kwasio/Cameroon
+ { 150, 128 }, // Kyrgyz/Kyrgyzstan
+ { 151, 248 }, // Lakota/United States
+ { 152, 230 }, // Langi/Tanzania
+ { 153, 129 }, // Lao/Laos
+ { 154, 253 }, // Latin/Vatican City
+ { 155, 131 }, // Latvian/Latvia
+ { 158, 57 }, // Lingala/Congo - Kinshasa
+ { 158, 7 }, // Lingala/Angola
+ { 158, 46 }, // Lingala/Central African Republic
+ { 158, 56 }, // Lingala/Congo - Brazzaville
+ { 160, 137 }, // Lithuanian/Lithuania
+ { 161, 258 }, // Lojban/world
+ { 162, 91 }, // Lower Sorbian/Germany
+ { 163, 91 }, // Low German/Germany
+ { 163, 165 }, // Low German/Netherlands
+ { 164, 57 }, // Luba-Katanga/Congo - Kinshasa
+ { 165, 225 }, // Lule Sami/Sweden
+ { 165, 175 }, // Lule Sami/Norway
+ { 166, 124 }, // Luo/Kenya
+ { 167, 138 }, // Luxembourgish/Luxembourg
+ { 168, 124 }, // Luyia/Kenya
+ { 169, 140 }, // Macedonian/Macedonia
+ { 170, 230 }, // Machame/Tanzania
+ { 171, 110 }, // Maithili/India
+ { 172, 160 }, // Makhuwa-Meetto/Mozambique
+ { 173, 230 }, // Makonde/Tanzania
+ { 174, 141 }, // Malagasy/Madagascar
+ { 175, 110 }, // Malayalam/India
+ { 176, 143 }, // Malay/Malaysia
+ { 176, 35 }, // Malay/Brunei
+ { 176, 111 }, // Malay/Indonesia
+ { 176, 210 }, // Malay/Singapore
+ { 177, 146 }, // Maltese/Malta
+ { 179, 110 }, // Manipuri/India
+ { 180, 115 }, // Manx/Isle of Man
+ { 181, 167 }, // Maori/New Zealand
+ { 182, 49 }, // Mapuche/Chile
+ { 183, 110 }, // Marathi/India
+ { 185, 124 }, // Masai/Kenya
+ { 185, 230 }, // Masai/Tanzania
+ { 186, 112 }, // Mazanderani/Iran
+ { 188, 124 }, // Meru/Kenya
+ { 189, 40 }, // Meta/Cameroon
+ { 190, 41 }, // Mohawk/Canada
+ { 191, 156 }, // Mongolian/Mongolia
+ { 191, 50 }, // Mongolian/China
+ { 192, 150 }, // Morisyen/Mauritius
+ { 193, 40 }, // Mundang/Cameroon
+ { 194, 248 }, // Muscogee/United States
+ { 195, 162 }, // Nama/Namibia
+ { 197, 248 }, // Navajo/United States
+ { 199, 164 }, // Nepali/Nepal
+ { 199, 110 }, // Nepali/India
+ { 201, 40 }, // Ngiemboon/Cameroon
+ { 202, 40 }, // Ngomba/Cameroon
+ { 203, 169 }, // Nigerian Pidgin/Nigeria
+ { 204, 102 }, // Nko/Guinea
+ { 205, 112 }, // Northern Luri/Iran
+ { 205, 113 }, // Northern Luri/Iraq
+ { 206, 175 }, // Northern Sami/Norway
+ { 206, 83 }, // Northern Sami/Finland
+ { 206, 225 }, // Northern Sami/Sweden
+ { 207, 216 }, // Northern Sotho/South Africa
+ { 208, 261 }, // North Ndebele/Zimbabwe
+ { 209, 175 }, // Norwegian Bokmal/Norway
+ { 209, 224 }, // Norwegian Bokmal/Svalbard and Jan Mayen
+ { 210, 175 }, // Norwegian Nynorsk/Norway
+ { 211, 219 }, // Nuer/South Sudan
+ { 212, 142 }, // Nyanja/Malawi
+ { 213, 243 }, // Nyankole/Uganda
+ { 214, 84 }, // Occitan/France
+ { 214, 220 }, // Occitan/Spain
+ { 215, 110 }, // Odia/India
+ { 220, 77 }, // Oromo/Ethiopia
+ { 220, 124 }, // Oromo/Kenya
+ { 221, 248 }, // Osage/United States
+ { 222, 90 }, // Ossetic/Georgia
+ { 222, 193 }, // Ossetic/Russia
+ { 226, 62 }, // Papiamento/Curacao
+ { 226, 13 }, // Papiamento/Aruba
+ { 227, 1 }, // Pashto/Afghanistan
+ { 227, 178 }, // Pashto/Pakistan
+ { 228, 112 }, // Persian/Iran
+ { 228, 1 }, // Persian/Afghanistan
+ { 230, 187 }, // Polish/Poland
+ { 231, 32 }, // Portuguese/Brazil
+ { 231, 7 }, // Portuguese/Angola
+ { 231, 43 }, // Portuguese/Cape Verde
+ { 231, 73 }, // Portuguese/Equatorial Guinea
+ { 231, 101 }, // Portuguese/Guinea-Bissau
+ { 231, 138 }, // Portuguese/Luxembourg
+ { 231, 139 }, // Portuguese/Macao
+ { 231, 160 }, // Portuguese/Mozambique
+ { 231, 188 }, // Portuguese/Portugal
+ { 231, 204 }, // Portuguese/Sao Tome and Principe
+ { 231, 226 }, // Portuguese/Switzerland
+ { 231, 232 }, // Portuguese/Timor-Leste
+ { 232, 187 }, // Prussian/Poland
+ { 233, 110 }, // Punjabi/India
+ { 233, 178 }, // Punjabi/Pakistan
+ { 234, 184 }, // Quechua/Peru
+ { 234, 28 }, // Quechua/Bolivia
+ { 234, 70 }, // Quechua/Ecuador
+ { 235, 192 }, // Romanian/Romania
+ { 235, 154 }, // Romanian/Moldova
+ { 236, 226 }, // Romansh/Switzerland
+ { 237, 230 }, // Rombo/Tanzania
+ { 238, 38 }, // Rundi/Burundi
+ { 239, 193 }, // Russian/Russia
+ { 239, 22 }, // Russian/Belarus
+ { 239, 123 }, // Russian/Kazakhstan
+ { 239, 128 }, // Russian/Kyrgyzstan
+ { 239, 154 }, // Russian/Moldova
+ { 239, 244 }, // Russian/Ukraine
+ { 240, 230 }, // Rwa/Tanzania
+ { 241, 74 }, // Saho/Eritrea
+ { 242, 193 }, // Sakha/Russia
+ { 243, 124 }, // Samburu/Kenya
+ { 245, 46 }, // Sango/Central African Republic
+ { 246, 230 }, // Sangu/Tanzania
+ { 247, 110 }, // Sanskrit/India
+ { 248, 110 }, // Santali/India
+ { 249, 117 }, // Sardinian/Italy
+ { 251, 160 }, // Sena/Mozambique
+ { 252, 207 }, // Serbian/Serbia
+ { 252, 29 }, // Serbian/Bosnia and Herzegovina
+ { 252, 126 }, // Serbian/Kosovo
+ { 252, 157 }, // Serbian/Montenegro
+ { 253, 230 }, // Shambala/Tanzania
+ { 254, 261 }, // Shona/Zimbabwe
+ { 255, 50 }, // Sichuan Yi/China
+ { 256, 117 }, // Sicilian/Italy
+ { 257, 77 }, // Sidamo/Ethiopia
+ { 258, 187 }, // Silesian/Poland
+ { 259, 178 }, // Sindhi/Pakistan
+ { 259, 110 }, // Sindhi/India
+ { 260, 221 }, // Sinhala/Sri Lanka
+ { 261, 83 }, // Skolt Sami/Finland
+ { 262, 212 }, // Slovak/Slovakia
+ { 263, 213 }, // Slovenian/Slovenia
+ { 264, 243 }, // Soga/Uganda
+ { 265, 215 }, // Somali/Somalia
+ { 265, 67 }, // Somali/Djibouti
+ { 265, 77 }, // Somali/Ethiopia
+ { 265, 124 }, // Somali/Kenya
+ { 266, 112 }, // Southern Kurdish/Iran
+ { 266, 113 }, // Southern Kurdish/Iraq
+ { 267, 225 }, // Southern Sami/Sweden
+ { 267, 175 }, // Southern Sami/Norway
+ { 268, 216 }, // Southern Sotho/South Africa
+ { 268, 133 }, // Southern Sotho/Lesotho
+ { 269, 216 }, // South Ndebele/South Africa
+ { 270, 220 }, // Spanish/Spain
+ { 270, 11 }, // Spanish/Argentina
+ { 270, 24 }, // Spanish/Belize
+ { 270, 28 }, // Spanish/Bolivia
+ { 270, 32 }, // Spanish/Brazil
+ { 270, 42 }, // Spanish/Canary Islands
+ { 270, 47 }, // Spanish/Ceuta and Melilla
+ { 270, 49 }, // Spanish/Chile
+ { 270, 54 }, // Spanish/Colombia
+ { 270, 59 }, // Spanish/Costa Rica
+ { 270, 61 }, // Spanish/Cuba
+ { 270, 69 }, // Spanish/Dominican Republic
+ { 270, 70 }, // Spanish/Ecuador
+ { 270, 72 }, // Spanish/El Salvador
+ { 270, 73 }, // Spanish/Equatorial Guinea
+ { 270, 99 }, // Spanish/Guatemala
+ { 270, 106 }, // Spanish/Honduras
+ { 270, 130 }, // Spanish/Latin America
+ { 270, 152 }, // Spanish/Mexico
+ { 270, 168 }, // Spanish/Nicaragua
+ { 270, 181 }, // Spanish/Panama
+ { 270, 183 }, // Spanish/Paraguay
+ { 270, 184 }, // Spanish/Peru
+ { 270, 185 }, // Spanish/Philippines
+ { 270, 189 }, // Spanish/Puerto Rico
+ { 270, 248 }, // Spanish/United States
+ { 270, 250 }, // Spanish/Uruguay
+ { 270, 254 }, // Spanish/Venezuela
+ { 271, 159 }, // Standard Moroccan Tamazight/Morocco
+ { 272, 111 }, // Sundanese/Indonesia
+ { 273, 230 }, // Swahili/Tanzania
+ { 273, 57 }, // Swahili/Congo - Kinshasa
+ { 273, 124 }, // Swahili/Kenya
+ { 273, 243 }, // Swahili/Uganda
+ { 274, 216 }, // Swati/South Africa
+ { 274, 76 }, // Swati/Eswatini
+ { 275, 225 }, // Swedish/Sweden
+ { 275, 2 }, // Swedish/Aland Islands
+ { 275, 83 }, // Swedish/Finland
+ { 276, 226 }, // Swiss German/Switzerland
+ { 276, 84 }, // Swiss German/France
+ { 276, 136 }, // Swiss German/Liechtenstein
+ { 277, 113 }, // Syriac/Iraq
+ { 277, 227 }, // Syriac/Syria
+ { 278, 159 }, // Tachelhit/Morocco
+ { 280, 255 }, // Tai Dam/Vietnam
+ { 281, 124 }, // Taita/Kenya
+ { 282, 229 }, // Tajik/Tajikistan
+ { 283, 110 }, // Tamil/India
+ { 283, 143 }, // Tamil/Malaysia
+ { 283, 210 }, // Tamil/Singapore
+ { 283, 221 }, // Tamil/Sri Lanka
+ { 284, 228 }, // Taroko/Taiwan
+ { 285, 170 }, // Tasawaq/Niger
+ { 286, 193 }, // Tatar/Russia
+ { 287, 110 }, // Telugu/India
+ { 288, 243 }, // Teso/Uganda
+ { 288, 124 }, // Teso/Kenya
+ { 289, 231 }, // Thai/Thailand
+ { 290, 50 }, // Tibetan/China
+ { 290, 110 }, // Tibetan/India
+ { 291, 74 }, // Tigre/Eritrea
+ { 292, 77 }, // Tigrinya/Ethiopia
+ { 292, 74 }, // Tigrinya/Eritrea
+ { 294, 182 }, // Tok Pisin/Papua New Guinea
+ { 295, 235 }, // Tongan/Tonga
+ { 296, 216 }, // Tsonga/South Africa
+ { 297, 216 }, // Tswana/South Africa
+ { 297, 30 }, // Tswana/Botswana
+ { 298, 239 }, // Turkish/Turkey
+ { 298, 63 }, // Turkish/Cyprus
+ { 299, 240 }, // Turkmen/Turkmenistan
+ { 301, 169 }, // Tyap/Nigeria
+ { 303, 244 }, // Ukrainian/Ukraine
+ { 304, 91 }, // Upper Sorbian/Germany
+ { 305, 178 }, // Urdu/Pakistan
+ { 305, 110 }, // Urdu/India
+ { 306, 50 }, // Uyghur/China
+ { 307, 251 }, // Uzbek/Uzbekistan
+ { 307, 1 }, // Uzbek/Afghanistan
+ { 308, 134 }, // Vai/Liberia
+ { 309, 216 }, // Venda/South Africa
+ { 310, 255 }, // Vietnamese/Vietnam
+ { 311, 258 }, // Volapuk/world
+ { 312, 230 }, // Vunjo/Tanzania
+ { 313, 23 }, // Walloon/Belgium
+ { 314, 226 }, // Walser/Switzerland
+ { 315, 15 }, // Warlpiri/Australia
+ { 316, 246 }, // Welsh/United Kingdom
+ { 317, 178 }, // Western Balochi/Pakistan
+ { 317, 1 }, // Western Balochi/Afghanistan
+ { 317, 112 }, // Western Balochi/Iran
+ { 317, 176 }, // Western Balochi/Oman
+ { 317, 245 }, // Western Balochi/United Arab Emirates
+ { 318, 165 }, // Western Frisian/Netherlands
+ { 319, 77 }, // Wolaytta/Ethiopia
+ { 320, 206 }, // Wolof/Senegal
+ { 321, 216 }, // Xhosa/South Africa
+ { 322, 40 }, // Yangben/Cameroon
+ { 323, 244 }, // Yiddish/Ukraine
+ { 324, 169 }, // Yoruba/Nigeria
+ { 324, 25 }, // Yoruba/Benin
+ { 325, 170 }, // Zarma/Niger
+ { 326, 50 }, // Zhuang/China
+ { 327, 216 }, // Zulu/South Africa
+ { 328, 32 }, // Kaingang/Brazil
+ { 329, 32 }, // Nheengatu/Brazil
+ { 329, 54 }, // Nheengatu/Colombia
+ { 329, 254 }, // Nheengatu/Venezuela
+ { 330, 110 }, // Haryanvi/India
+ { 331, 91 }, // Northern Frisian/Germany
+ { 332, 110 }, // Rajasthani/India
+ { 333, 193 }, // Moksha/Russia
+ { 334, 258 }, // Toki Pona/world
+ { 335, 214 }, // Pijin/Solomon Islands
+ { 336, 169 }, // Obolo/Nigeria
+ { 337, 178 }, // Baluchi/Pakistan
+ { 338, 117 }, // Ligurian/Italy
+ { 339, 161 }, // Rohingya/Myanmar
+ { 339, 20 }, // Rohingya/Bangladesh
+ { 340, 178 }, // Torwali/Pakistan
+ { 341, 25 }, // Anii/Benin
+ { 342, 110 }, // Kangri/India
+ { 343, 117 }, // Venetian/Italy
};
-static const int g_locale_list_count = sizeof(g_locale_list)/sizeof(g_locale_list[0]);
+
+// GENERATED PART ENDS HERE
+
+static const int g_locale_list_count = std::size(g_locale_list);
LocaleModel::LocaleModel(QObject *parent)
: QAbstractItemModel(parent)
@@ -283,7 +680,7 @@ LocaleModel::LocaleModel(QObject *parent)
QVariant LocaleModel::data(const QModelIndex &index, int role) const
{
if (!index.isValid()
- || role != Qt::DisplayRole && role != Qt::EditRole && role != Qt::ToolTipRole
+ || (role != Qt::DisplayRole && role != Qt::EditRole && role != Qt::ToolTipRole)
|| index.column() >= g_model_cols
|| index.row() >= g_locale_list_count + 2)
return QVariant();
@@ -424,9 +821,9 @@ int LocaleModel::rowCount(const QModelIndex &parent) const
Qt::ItemFlags LocaleModel::flags(const QModelIndex &index) const
{
if (!index.isValid())
- return 0;
+ return {};
if (index.row() == 0 && index.column() == g_model_cols - 1)
- return 0;
+ return {};
if (index.row() == 0)
return QAbstractItemModel::flags(index) | Qt::ItemIsEditable;
return QAbstractItemModel::flags(index);
@@ -438,7 +835,7 @@ bool LocaleModel::setData(const QModelIndex &index, const QVariant &value, int r
|| index.row() != 0
|| index.column() >= g_model_cols - 1
|| role != Qt::EditRole
- || m_data_list.at(index.column()).type() != value.type())
+ || m_data_list.at(index.column()).typeId() != value.typeId())
return false;
m_data_list[index.column()] = value;
diff --git a/util/locale_database/testlocales/localemodel.h b/util/locale_database/testlocales/localemodel.h
index f35b984b4c..a0ba45bb15 100644
--- a/util/locale_database/testlocales/localemodel.h
+++ b/util/locale_database/testlocales/localemodel.h
@@ -1,30 +1,5 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the utils of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
#ifndef LOCALEMODEL_H
#define LOCALEMODEL_H
@@ -38,17 +13,17 @@ class LocaleModel : public QAbstractItemModel
public:
LocaleModel(QObject *parent = nullptr);
- virtual int columnCount(const QModelIndex &parent = QModelIndex()) const;
- virtual QVariant data(const QModelIndex &index, int role = Qt::DisplayRole) const;
- virtual QModelIndex index(int row, int column,
- const QModelIndex &parent = QModelIndex()) const;
- virtual QModelIndex parent(const QModelIndex &index) const;
- virtual int rowCount(const QModelIndex &parent = QModelIndex()) const;
- virtual QVariant headerData(int section, Qt::Orientation orientation,
- int role = Qt::DisplayRole ) const;
- virtual Qt::ItemFlags flags(const QModelIndex &index) const;
- virtual bool setData(const QModelIndex &index, const QVariant &value,
- int role = Qt::EditRole);
+ int columnCount(const QModelIndex &parent = QModelIndex()) const override;
+ QVariant data(const QModelIndex &index, int role = Qt::DisplayRole) const override;
+ QModelIndex index(int row, int column,
+ const QModelIndex &parent = QModelIndex()) const override;
+ QModelIndex parent(const QModelIndex &index) const override;
+ int rowCount(const QModelIndex &parent = QModelIndex()) const override;
+ QVariant headerData(int section, Qt::Orientation orientation,
+ int role = Qt::DisplayRole ) const override;
+ Qt::ItemFlags flags(const QModelIndex &index) const override;
+ bool setData(const QModelIndex &index, const QVariant &value,
+ int role = Qt::EditRole) override;
private:
QList<QVariant> m_data_list;
};
diff --git a/util/locale_database/testlocales/localewidget.cpp b/util/locale_database/testlocales/localewidget.cpp
index 3ff7f73a98..df8a3c28ab 100644
--- a/util/locale_database/testlocales/localewidget.cpp
+++ b/util/locale_database/testlocales/localewidget.cpp
@@ -1,33 +1,8 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the utils of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
#include <QTableView>
#include <QVBoxLayout>
-#include <QItemDelegate>
+#include <QStyledItemDelegate>
#include <QItemEditorFactory>
#include <QDoubleSpinBox>
@@ -51,26 +26,26 @@ public:
class EditorFactory : public QItemEditorFactory
{
public:
- EditorFactory() {
- static DoubleEditorCreator double_editor_creator;
- registerEditor(QVariant::Double, &double_editor_creator);
+ EditorFactory()
+ {
+ // registerEditor() assumes ownership of the creator.
+ registerEditor(QVariant::Double, new DoubleEditorCreator);
}
};
LocaleWidget::LocaleWidget(QWidget *parent)
- : QWidget(parent)
+ : QWidget(parent),
+ m_model(new LocaleModel(this)),
+ m_view(new QTableView(this))
{
- m_model = new LocaleModel(this);
- m_view = new QTableView(this);
-
- QItemDelegate *delegate = qobject_cast<QItemDelegate*>(m_view->itemDelegate());
+ QStyledItemDelegate *delegate = qobject_cast<QStyledItemDelegate*>(m_view->itemDelegate());
Q_ASSERT(delegate != 0);
- static EditorFactory editor_factory;
- delegate->setItemEditorFactory(&editor_factory);
+ static EditorFactory editorFactory;
+ delegate->setItemEditorFactory(&editorFactory);
m_view->setModel(m_model);
QVBoxLayout *layout = new QVBoxLayout(this);
- layout->setMargin(0);
+ layout->setContentsMargins(0, 0, 0, 0);
layout->addWidget(m_view);
}
diff --git a/util/locale_database/testlocales/localewidget.h b/util/locale_database/testlocales/localewidget.h
index c562da119b..36613135ea 100644
--- a/util/locale_database/testlocales/localewidget.h
+++ b/util/locale_database/testlocales/localewidget.h
@@ -1,30 +1,5 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the utils of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
#ifndef LOCALEWIDGET_H
#define LOCALEWIDGET_H
diff --git a/util/locale_database/testlocales/main.cpp b/util/locale_database/testlocales/main.cpp
index 0c3c45f989..d94726a2a9 100644
--- a/util/locale_database/testlocales/main.cpp
+++ b/util/locale_database/testlocales/main.cpp
@@ -1,30 +1,5 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the utils of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
#include <QApplication>
#include "localewidget.h"
diff --git a/util/locale_database/testlocales/testlocales.pro b/util/locale_database/testlocales/testlocales.pro
index a9a6247f96..126c19589b 100644
--- a/util/locale_database/testlocales/testlocales.pro
+++ b/util/locale_database/testlocales/testlocales.pro
@@ -1,4 +1,5 @@
TARGET = testlocales
CONFIG += debug
+QT += widgets
SOURCES += localemodel.cpp localewidget.cpp main.cpp
-HEADERS += localemodel.h localewidget.h \ No newline at end of file
+HEADERS += localemodel.h localewidget.h