summaryrefslogtreecommitdiffstats
path: root/util/locale_database
diff options
context:
space:
mode:
Diffstat (limited to 'util/locale_database')
-rw-r--r--util/locale_database/README12
-rw-r--r--util/locale_database/cldr.py291
-rwxr-xr-xutil/locale_database/cldr2qlocalexml.py57
-rwxr-xr-xutil/locale_database/cldr2qtimezone.py361
-rw-r--r--util/locale_database/dateconverter.py270
-rw-r--r--util/locale_database/enumdata.py157
-rw-r--r--util/locale_database/ldml.py88
-rw-r--r--util/locale_database/localetools.py39
-rw-r--r--util/locale_database/qlocalexml.py393
-rw-r--r--util/locale_database/qlocalexml.rnc19
-rwxr-xr-xutil/locale_database/qlocalexml2cpp.py264
-rw-r--r--util/locale_database/testlocales/localemodel.cpp884
-rw-r--r--util/locale_database/testlocales/localemodel.h22
-rw-r--r--util/locale_database/testlocales/localewidget.cpp24
-rw-r--r--util/locale_database/testlocales/testlocales.pro3
-rw-r--r--util/locale_database/zonedata.py227
16 files changed, 1988 insertions, 1123 deletions
diff --git a/util/locale_database/README b/util/locale_database/README
index b910d36f2a..460f51993a 100644
--- a/util/locale_database/README
+++ b/util/locale_database/README
@@ -5,10 +5,8 @@ data (like date formats, country names etc). It is provided by the
Unicode consortium.
See cldr2qlocalexml.py for how to run it and qlocalexml2cpp.py to
-update the locale data tables (principally text/qlocale_data_p.h and
-time/q*calendar_data_p.h under src/corelib/). See enumdata.py for when
-and how to update the data it provides. You shall definitely need to
-pass --no-verify or -n to git commit for these changes.
-
-See cldr2qtimezone.py on how to update tables of Windows-specific
-names for zones and UTC-offset zone names.
+update the locale data tables (principally text/qlocale_data_p.h,
+time/q*calendar_data_p.h and time/qtimezone*_data_p.h under
+src/corelib/). See enumdata.py and zonedata.py for when and how to
+update the data they provide. You shall definitely need to pass
+--no-verify or -n to git commit for these changes.
diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py
index 91b46d6a01..75d687dd11 100644
--- a/util/locale_database/cldr.py
+++ b/util/locale_database/cldr.py
@@ -16,6 +16,7 @@ from weakref import WeakValueDictionary as CacheDict
from pathlib import Path
from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner
+from localetools import names_clash
from qlocalexml import Locale
class CldrReader (object):
@@ -73,10 +74,80 @@ class CldrReader (object):
# more out.
pass # self.__wrapped(self.whitter, 'Skipping likelySubtags (for unknown codes): ', skips)
+ def zoneData(self):
+ """Locale-independent timezone data.
+
+ Returns a triple (alias, defaults, winIds) in which:
+ * alias is a mapping from aliases for IANA zone IDs, that
+ have the form of IANA IDs, to actual current IANA IDs; in
+ particular, this maps each CLDR zone ID to its
+ corresponding IANA ID.
+ * defaults maps each Windows name for a zone to the IANA ID
+ to use for it by default (when no territory is specified,
+ or when no entry in winIds matches the given Windows name
+ and territory).
+ * winIds is a mapping {(winId, land): ianaList} from Windows
+ name and territory code to the space-joined list of IANA
+ IDs associated with the Windows name in the given
+ territory.
+
+ and reports on any territories found in CLDR timezone data
+ that are not mentioned in enumdata.territory_map, on any
+ Windows IDs given in zonedata.windowsIdList that are no longer
+ covered by the CLDR data."""
+ alias, ignored = self.root.bcp47Aliases()
+ defaults, winIds = self.root.readWindowsTimeZones(alias)
+
+ from zonedata import windowsIdList
+ winUnused = set(n for n, o in windowsIdList).difference(
+ set(defaults).union(w for w, t, ids in winIds))
+ if winUnused:
+ joined = "\n\t".join(winUnused)
+ self.whitter.write(
+ f'No Windows ID in\n\t{joined}\nis still in use.\n'
+ 'They could be removed at the next major version.\n')
+
+ # Check for duplicate entries in winIds:
+ last = ('', '', '')
+ winDup = {}
+ for triple in sorted(winIds):
+ if triple[:2] == last[:2]:
+ try:
+ seq = winDup[triple[:2]]
+ except KeyError:
+ seq = winDup[triple[:2]] = []
+ seq.append(triple[-1])
+ if winDup:
+ joined = '\n\t'.join(f'{t}, {w}: ", ".join(ids)'
+ for (w, t), ids in winDup.items())
+ self.whitter.write(
+ f'Duplicated (territory, Windows ID) entries:\n\t{joined}\n')
+ winIds = [trip for trip in winIds if trip[:2] not in winDup]
+ for (w, t), seq in winDup.items():
+ ianalist = []
+ for ids in seq:
+ for iana in ids.split():
+ if iana not in ianaList:
+ ianaList.append(iana)
+ winIds.append((w, t, ' '.join(ianaList)))
+
+ from enumdata import territory_map
+ unLand = set(t for w, t, ids in winIds).difference(
+ v[1] for k, v in territory_map.items())
+ if unLand:
+ self.grumble.write(
+ 'Unknown territory codes in timezone data: '
+ f'{", ".join(unLand)}\n'
+ 'Skipping Windows zone mappings for these territories\n')
+ winIds = [(w, t, ids) for w, t, ids in winIds if t not in unLand]
+
+ # Convert list of triples to mapping:
+ winIds = {(w, t): ids for w, t, ids in winIds}
+ return alias, defaults, winIds
+
def readLocales(self, calendars = ('gregorian',)):
- locales = tuple(self.__allLocales(calendars))
- return dict(((k.language_id, k.script_id, k.territory_id, k.variant_code),
- k) for k in locales)
+ return {(k.language_id, k.script_id, k.territory_id, k.variant_id): k
+ for k in self.__allLocales(calendars)}
def __allLocales(self, calendars):
def skip(locale, reason):
@@ -193,7 +264,7 @@ class CldrReader (object):
language = names[0], language_code = language, language_id = ids[0],
script = names[1], script_code = script, script_id = ids[1],
territory = names[2], territory_code = territory, territory_id = ids[2],
- variant_code = variant)
+ variant_code = variant, variant_id = ids[3])
firstDay, weStart, weEnd = self.root.weekData(territory)
assert all(day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun')
@@ -253,6 +324,9 @@ class CldrAccess (object):
inheritance, where relevant."""
return LocaleScanner(name, self.__localeRoots(name), self.__rootLocale)
+ def englishNaming(self, tag): # see QLocaleXmlWriter.enumData()
+ return self.__codeMap(tag).get
+
@property
def fileLocales(self) -> Iterable[str]:
"""Generator for locale IDs seen in file-names.
@@ -348,16 +422,16 @@ class CldrAccess (object):
parts.append(text)
if len(parts) > 1:
parts[-1] = 'and ' + parts[-1]
- assert parts
+ else:
+ assert parts
+ if parts[0].startswith('variant'):
+ raise Error(f'No support for {parts[0]}',
+ language, script, territory, variant)
raise Error('Unknown ' + ', '.join(parts),
language, script, territory, variant)
@staticmethod
- def __checkEnum(given, proper, scraps,
- remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ü': 'u'},
- prefix = { 'St.': 'Saint', 'U.S.': 'United States' },
- suffixes = ( 'Han', ),
- skip = '\u02bc'):
+ def __checkEnum(given, proper, scraps):
# Each is a { code: full name } mapping
for code, name in given.items():
try: right = proper[code]
@@ -367,21 +441,9 @@ class CldrAccess (object):
if code not in scraps:
yield name, f'[Found no CLDR name for code {code}]'
continue
- if name == right: continue
- ok = right.replace('&', 'And')
- for k, v in prefix.items():
- if ok.startswith(k + ' '):
- ok = v + ok[len(k):]
- while '(' in ok:
- try: f, t = ok.index('('), ok.index(')')
- except ValueError: break
- ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip()
- if any(name == ok + ' ' + s for s in suffixes):
- continue
- if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join(
- remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip):
- continue
- yield name, ok
+ cleaned = names_clash(right, name)
+ if cleaned:
+ yield name, cleaned
def checkEnumData(self, grumble):
scraps = set()
@@ -389,9 +451,9 @@ class CldrAccess (object):
for f in k.split('_'):
scraps.add(f)
from enumdata import language_map, territory_map, script_map
- language = dict((v, k) for k, v in language_map.values() if not v.isspace())
- territory = dict((v, k) for k, v in territory_map.values() if v != 'ZZ')
- script = dict((v, k) for k, v in script_map.values() if v != 'Zzzz')
+ language = {v: k for k, v in language_map.values() if not v.isspace()}
+ territory = {v: k for k, v in territory_map.values() if v != 'ZZ'}
+ script = {v: k for k, v in script_map.values() if v != 'Zzzz'}
lang = dict(self.__checkEnum(language, self.__codeMap('language'), scraps))
land = dict(self.__checkEnum(territory, self.__codeMap('territory'), scraps))
text = dict(self.__checkEnum(script, self.__codeMap('script'), scraps))
@@ -414,69 +476,115 @@ enumdata.py (keeping the old name as an alias):
+ '\n')
grumble('\n')
- def readWindowsTimeZones(self, lookup): # For use by cldr2qtimezone.py
+ def bcp47Aliases(self):
+ """Reads the mapping from CLDR IDs to IANA IDs
+
+ CLDR identifies timezones in various ways but its standard
+ 'name' for them, here described as a CLDR ID, has the form of
+ an IANA ID. CLDR IDs are stable across time, where IANA IDs
+ may be revised over time, for example Asia/Calcutta became
+ Asia/Kolkata. When a new zone is added to CLDR, it gets the
+ then-current IANA ID as its CLDR ID; if it is later
+ superseded, CLDR continues using the old ID, so we need a
+ mapping from that to current IANA IDs. Helpfully, CLDR
+ provides information about aliasing among time-zone IDs.
+
+ The file common/bcp47/timezone.xml has keyword/key/type
+ elements with attributes:
+
+ name -- zone code (ignore)
+ description -- long name for exemplar location, including
+ territory
+
+ and some of:
+
+ deprecated -- ignore entry if present (has no alias)
+ preferred -- only present if deprecated
+ since -- version at which this entry was added (ignore)
+ alias -- space-joined sequence of IANA-form IDs; first is CLDR ID
+ iana -- if present, repeats the alias entry that's the modern IANA ID
+
+ This returns a pair (alias, naming) wherein: alias is a
+ mapping from IANA-format IDs to actual IANA IDs, that maps
+ each alias to the contemporary ID used by IANA; and naming is
+ a mapping from IANA ID to the description it and its aliases
+ shared in their keyword/key/type entry."""
+ # File has the same form as supplements:
+ root = Supplement(Node(self.__xml('common/bcp47/timezone.xml')))
+
+ # If we ever need a mapping back to CLDR ID, we can make
+ # (description, space-joined-list) the naming values.
+ alias, naming = {}, {} # { alias: iana }, { iana: description }
+ for item, attrs in root.find('keyword/key/type', exclude=('deprecated',)):
+ assert 'description' in attrs, item
+ assert 'alias' in attrs, item
+ names = attrs['alias'].split()
+ assert not any(name in alias for name in names), item
+ # CLDR ID is names[0]; if IANA now uses another name for
+ # it, this is given as the iana attribute.
+ ianaid, fullName = attrs.get('iana', names[0]), attrs['description']
+ alias.update({name: ianaid for name in names})
+ assert not ianaid in naming
+ naming[ianaid] = fullName
+
+ return alias, naming
+
+ def readWindowsTimeZones(self, alias):
"""Digest CLDR's MS-Win time-zone name mapping.
- MS-Win have their own eccentric names for time-zones. CLDR
- helpfully provides a translation to more orthodox names.
-
- Single argument, lookup, is a mapping from known MS-Win names
- for locales to a unique integer index (starting at 1).
-
- The XML structure we read has the form:
-
- <supplementalData>
- <windowsZones>
- <mapTimezones otherVersion="..." typeVersion="...">
- <!-- (UTC-08:00) Pacific Time (US & Canada) -->
- <mapZone other="Pacific Standard Time" territory="001" type="America/Los_Angeles"/>
- <mapZone other="Pacific Standard Time" territory="CA" type="America/Vancouver America/Dawson America/Whitehorse"/>
- <mapZone other="Pacific Standard Time" territory="US" type="America/Los_Angeles America/Metlakatla"/>
- <mapZone other="Pacific Standard Time" territory="ZZ" type="PST8PDT"/>
- </mapTimezones>
- </windowsZones>
- </supplementalData>
-"""
+ Single argument, alias, should be the first part of the pair
+ returned by a call to bcp47Aliases(); it shall be used to
+ transform CLDR IDs into IANA IDs.
+
+ MS-Win have their own eccentric names for time-zones. CLDR
+ helpfully provides a translation to more orthodox names,
+ albeit these are CLDR IDs - see bcp47Aliases() - rather than
+ (up to date) IANA IDs. The windowsZones.xml supplement has
+ supplementalData/windowsZones/mapTimezones/mapZone nodes with
+ attributes
+
+ territory -- ISO code
+ type -- space-joined sequence of CLDR IDs of zones
+ other -- Windows name of these zones in the given territory
+
+ When 'territory' is '001', type is always just a single CLDR
+ zone ID. This is the default zone for the given Windows name.
+
+ For each mapZone node, its type is split on spacing and
+ cleaned up as follows. Those entries that are keys of alias
+ are mapped thereby to their canonical IANA IDs; all others are
+ presumed to be canonical IANA IDs and left unchanged. Any
+ later duplicates of earlier entries are omitted. The result
+ list of IANA IDs is joined with single spaces between to give
+ a string s.
+
+ Returns a twople (defaults, windows) in which defaults is a
+ mapping, from Windows ID to IANA ID (derived from the mapZone
+ nodes with territory='001'), and windows is a list of triples
+ (Windows ID, territory code, IANA ID list) in which the first
+ two entries are the 'other' and 'territory' fields of a
+ mapZone element and the last is s, its cleaned-up list of IANA
+ IDs."""
+
+ defaults, windows = {}, []
zones = self.supplement('windowsZones.xml')
- enum = self.__enumMap('territory')
- badZones, unLands, defaults, windows = set(), set(), {}, {}
-
for name, attrs in zones.find('windowsZones/mapTimezones'):
if name != 'mapZone':
continue
- wid, code = attrs['other'], attrs['territory']
- data = dict(windowsId = wid,
- territoryCode = code,
- ianaList = attrs['type'])
-
- try:
- key = lookup[wid]
- except KeyError:
- badZones.add(wid)
- key = 0
- data['windowsKey'] = key
+ wid, code, ianas = attrs['other'], attrs['territory'], []
+ for cldr in attrs['type'].split():
+ iana = alias.get(cldr, cldr)
+ if iana not in ianas:
+ ianas.append(iana)
if code == '001':
- defaults[key] = data['ianaList']
+ assert len(ianas) == 1, (wid, *ianas)
+ defaults[wid] = ianas[0]
else:
- try:
- cid, name = enum[code]
- except KeyError:
- unLands.append(code)
- continue
- data.update(territoryId = cid, territory = name)
- windows[key, cid] = data
-
- if unLands:
- raise Error('Unknown territory codes, please add to enumdata.py: '
- + ', '.join(sorted(unLands)))
-
- if badZones:
- raise Error('Unknown Windows IDs, please add to cldr2qtimezone.py: '
- + ', '.join(sorted(badZones)))
+ windows.append((wid, code, ' '.join(ianas)))
- return self.cldrVersion, defaults, windows
+ return defaults, windows
@property
def cldrVersion(self):
@@ -557,6 +665,8 @@ enumdata.py (keeping the old name as an alias):
source = self.__supplementalData
for elt in source.findNodes('currencyData/region'):
iso, digits, rounding = '', 2, 1
+ # TODO: fractions/info[iso4217=DEFAULT] has rounding=0 - why do we differ ?
+ # Also: some fractions/info have cashDigits and cashRounding - should we use them ?
try:
territory = elt.dom.attributes['iso3166'].nodeValue
except KeyError:
@@ -648,15 +758,15 @@ enumdata.py (keeping the old name as an alias):
def __enumMap(self, key, cache = {}):
if not cache:
cache['variant'] = {'': (0, 'This should never be seen outside ldml.py')}
- # They're not actually lists: mappings from numeric value
- # to pairs of full name and short code. What we want, in
- # each case, is a mapping from code to the other two.
+ # They're mappings from numeric value to pairs of full
+ # name and short code. What we want, in each case, is a
+ # mapping from code to the other two.
from enumdata import language_map, script_map, territory_map
for form, book, empty in (('language', language_map, 'AnyLanguage'),
('script', script_map, 'AnyScript'),
('territory', territory_map, 'AnyTerritory')):
- cache[form] = dict((pair[1], (num, pair[0]))
- for num, pair in book.items() if pair[0] != 'C')
+ cache[form] = {pair[1]: (num, pair[0])
+ for num, pair in book.items() if pair[0] != 'C'}
# (Have to filter out the C locale, as we give it the
# same (all space) code as AnyLanguage, whose code
# should probably be 'und' instead.)
@@ -699,7 +809,13 @@ enumdata.py (keeping the old name as an alias):
except (KeyError, ValueError, TypeError):
pass
else:
- if key not in seen or 'alt' not in elt.attributes:
+ # Prefer stand-alone forms of names when present, ignore other
+ # alt="..." entries. For example, Traditional and Simplified
+ # Han omit "Han" in the plain form, but include it for
+ # stand-alone. As the stand-alone version appears later, it
+ # over-writes the plain one.
+ if (key not in seen or 'alt' not in elt.attributes
+ or elt.attributes['alt'].nodeValue == 'stand-alone'):
yield key, value
seen.add(key)
@@ -708,7 +824,8 @@ enumdata.py (keeping the old name as an alias):
def __parentLocale(self, cache = {}):
# see http://www.unicode.org/reports/tr35/#Parent_Locales
if not cache:
- for tag, attrs in self.__supplementalData.find('parentLocales'):
+ for tag, attrs in self.__supplementalData.find('parentLocales',
+ ('component',)):
parent = attrs.get('parent', '')
for child in attrs['locales'].split():
cache[child] = parent
diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py
index d5a7fbbb5c..ac0b44789b 100755
--- a/util/locale_database/cldr2qlocalexml.py
+++ b/util/locale_database/cldr2qlocalexml.py
@@ -3,16 +3,21 @@
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
"""Convert CLDR data to QLocaleXML
-The CLDR data can be downloaded from CLDR_, which has a sub-directory
-for each version; you need the ``core.zip`` file for your version of
-choice (typically the latest). This script has had updates to cope up
-to v38.1; for later versions, we may need adaptations. Unpack the
-downloaded ``core.zip`` and check it has a common/main/ sub-directory:
-pass the path of that root of the download to this script as its first
-command-line argument. Pass the name of the file in which to write
-output as the second argument; either omit it or use '-' to select the
-standard output. This file is the input needed by
-``./qlocalexml2cpp.py``
+The CLDR data can be downloaded as a zip-file from CLDR_, which has a
+sub-directory for each version; you need the ``core.zip`` file for
+your version of choice (typically the latest), which you should then
+unpack. Alternatively, you can clone the git repo from github_, which
+has a tag for each release and a maint/maint-$ver branch for each
+major version. Either way, the CLDR top-level directory should have a
+subdirectory called common/ which contains (among other things)
+subdirectories main/ and supplemental/.
+
+This script has had updates to cope up to v44.1; for later versions,
+we may need adaptations. Pass the path of the CLDR top-level directory
+to this script as its first command-line argument. Pass the name of
+the file in which to write output as the second argument; either omit
+it or use '-' to select the standard output. This file is the input
+needed by ``./qlocalexml2cpp.py``
When you update the CLDR data, be sure to also update
src/corelib/text/qt_attribution.json's entry for unicode-cldr. Check
@@ -22,26 +27,33 @@ append new entries to enumdata.py's lists and update documentation in
src/corelib/text/qlocale.qdoc, adding the new entries in alphabetic
order.
-While updating the locale data, check also for updates to MS-Win's
-time zone names; see cldr2qtimezone.py for details.
-
-All the scripts mentioned support --help to tell you how to use them.
+Both of the scripts mentioned support --help to tell you how to use
+them.
.. _CLDR: https://unicode.org/Public/cldr/
+.. _github: https://github.com/unicode-org/cldr
"""
from pathlib import Path
-import sys
import argparse
from cldr import CldrReader
from qlocalexml import QLocaleXmlWriter
-def main(out, err):
- all_calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew'
+def main(argv, out, err):
+ """Generate a QLocaleXML file from CLDR data.
+
+ Takes sys.argv, sys.stdout, sys.stderr (or equivalents) as
+ arguments. In argv[1:], it expects the root of the CLDR data
+ directory as first parameter and the name of the file in which to
+ save QLocaleXML data as second parameter. It accepts a --calendars
+ option to select which calendars to support (all available by
+ default)."""
+ all_calendars = ['gregorian', 'persian', 'islamic']
parser = argparse.ArgumentParser(
+ prog=Path(argv[0]).name,
description='Generate QLocaleXML from CLDR data.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('cldr_path', help='path to the root of the CLDR tree')
@@ -51,7 +63,7 @@ def main(out, err):
nargs='+', metavar='CALENDAR',
choices=all_calendars, default=all_calendars)
- args = parser.parse_args()
+ args = parser.parse_args(argv[1:])
root = Path(args.cldr_path)
root_xml_path = 'common/main/root.xml'
@@ -76,12 +88,15 @@ def main(out, err):
writer = QLocaleXmlWriter(emit.write)
writer.version(reader.root.cldrVersion)
- writer.enumData()
+ writer.enumData(reader.root.englishNaming)
writer.likelySubTags(reader.likelySubTags())
- writer.locales(reader.readLocales(args.calendars), args.calendars)
+ writer.zoneData(*reader.zoneData()) # Locale-independent zone data.
+ en_US = tuple(id for id, name in reader.root.codesToIdName('en', '', 'US'))
+ writer.locales(reader.readLocales(args.calendars), args.calendars, en_US)
writer.close(err.write)
return 0
if __name__ == '__main__':
- sys.exit(main(sys.stdout, sys.stderr))
+ import sys
+ sys.exit(main(sys.argv, sys.stdout, sys.stderr))
diff --git a/util/locale_database/cldr2qtimezone.py b/util/locale_database/cldr2qtimezone.py
deleted file mode 100755
index a2e43d03b7..0000000000
--- a/util/locale_database/cldr2qtimezone.py
+++ /dev/null
@@ -1,361 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (C) 2021 The Qt Company Ltd.
-# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
-"""Parse CLDR data for QTimeZone use with MS-Windows
-
-Script to parse the CLDR common/supplemental/windowsZones.xml file and
-prepare its data for use in QTimeZone. See ``./cldr2qlocalexml.py`` for
-where to get the CLDR data. Pass its root directory as first parameter
-to this script. You can optionally pass the qtbase root directory as
-second parameter; it defaults to the root of the checkout containing
-this script. This script updates qtbase's
-src/corelib/time/qtimezoneprivate_data_p.h with the new data.
-"""
-
-import datetime
-from pathlib import Path
-import textwrap
-import argparse
-
-from localetools import unicode2hex, wrap_list, Error, SourceFileEditor, qtbase_root
-from cldr import CldrAccess
-
-### Data that may need updates in response to new entries in the CLDR file ###
-
-# This script shall report the updates you need to make, if any arise.
-# However, you may need to research the relevant zone's standard offset.
-
-# List of currently known Windows IDs.
-# If this script reports missing IDs, please add them here.
-# Look up the offset using (google and) timeanddate.com.
-# Not public so may safely be changed. Please keep in alphabetic order by ID.
-# ( Windows Id, Offset Seconds )
-windowsIdList = (
- ('Afghanistan Standard Time', 16200),
- ('Alaskan Standard Time', -32400),
- ('Aleutian Standard Time', -36000),
- ('Altai Standard Time', 25200),
- ('Arab Standard Time', 10800),
- ('Arabian Standard Time', 14400),
- ('Arabic Standard Time', 10800),
- ('Argentina Standard Time', -10800),
- ('Astrakhan Standard Time', 14400),
- ('Atlantic Standard Time', -14400),
- ('AUS Central Standard Time', 34200),
- ('Aus Central W. Standard Time', 31500),
- ('AUS Eastern Standard Time', 36000),
- ('Azerbaijan Standard Time', 14400),
- ('Azores Standard Time', -3600),
- ('Bahia Standard Time', -10800),
- ('Bangladesh Standard Time', 21600),
- ('Belarus Standard Time', 10800),
- ('Bougainville Standard Time', 39600),
- ('Canada Central Standard Time', -21600),
- ('Cape Verde Standard Time', -3600),
- ('Caucasus Standard Time', 14400),
- ('Cen. Australia Standard Time', 34200),
- ('Central America Standard Time', -21600),
- ('Central Asia Standard Time', 21600),
- ('Central Brazilian Standard Time', -14400),
- ('Central Europe Standard Time', 3600),
- ('Central European Standard Time', 3600),
- ('Central Pacific Standard Time', 39600),
- ('Central Standard Time (Mexico)', -21600),
- ('Central Standard Time', -21600),
- ('China Standard Time', 28800),
- ('Chatham Islands Standard Time', 45900),
- ('Cuba Standard Time', -18000),
- ('Dateline Standard Time', -43200),
- ('E. Africa Standard Time', 10800),
- ('E. Australia Standard Time', 36000),
- ('E. Europe Standard Time', 7200),
- ('E. South America Standard Time', -10800),
- ('Easter Island Standard Time', -21600),
- ('Eastern Standard Time', -18000),
- ('Eastern Standard Time (Mexico)', -18000),
- ('Egypt Standard Time', 7200),
- ('Ekaterinburg Standard Time', 18000),
- ('Fiji Standard Time', 43200),
- ('FLE Standard Time', 7200),
- ('Georgian Standard Time', 14400),
- ('GMT Standard Time', 0),
- ('Greenland Standard Time', -10800),
- ('Greenwich Standard Time', 0),
- ('GTB Standard Time', 7200),
- ('Haiti Standard Time', -18000),
- ('Hawaiian Standard Time', -36000),
- ('India Standard Time', 19800),
- ('Iran Standard Time', 12600),
- ('Israel Standard Time', 7200),
- ('Jordan Standard Time', 7200),
- ('Kaliningrad Standard Time', 7200),
- ('Korea Standard Time', 32400),
- ('Libya Standard Time', 7200),
- ('Line Islands Standard Time', 50400),
- ('Lord Howe Standard Time', 37800),
- ('Magadan Standard Time', 36000),
- ('Magallanes Standard Time', -10800), # permanent DST
- ('Marquesas Standard Time', -34200),
- ('Mauritius Standard Time', 14400),
- ('Middle East Standard Time', 7200),
- ('Montevideo Standard Time', -10800),
- ('Morocco Standard Time', 0),
- ('Mountain Standard Time (Mexico)', -25200),
- ('Mountain Standard Time', -25200),
- ('Myanmar Standard Time', 23400),
- ('N. Central Asia Standard Time', 21600),
- ('Namibia Standard Time', 3600),
- ('Nepal Standard Time', 20700),
- ('New Zealand Standard Time', 43200),
- ('Newfoundland Standard Time', -12600),
- ('Norfolk Standard Time', 39600),
- ('North Asia East Standard Time', 28800),
- ('North Asia Standard Time', 25200),
- ('North Korea Standard Time', 30600),
- ('Omsk Standard Time', 21600),
- ('Pacific SA Standard Time', -10800),
- ('Pacific Standard Time', -28800),
- ('Pacific Standard Time (Mexico)', -28800),
- ('Pakistan Standard Time', 18000),
- ('Paraguay Standard Time', -14400),
- ('Qyzylorda Standard Time', 18000), # a.k.a. Kyzylorda, in Kazakhstan
- ('Romance Standard Time', 3600),
- ('Russia Time Zone 3', 14400),
- ('Russia Time Zone 10', 39600),
- ('Russia Time Zone 11', 43200),
- ('Russian Standard Time', 10800),
- ('SA Eastern Standard Time', -10800),
- ('SA Pacific Standard Time', -18000),
- ('SA Western Standard Time', -14400),
- ('Saint Pierre Standard Time', -10800), # New France
- ('Sakhalin Standard Time', 39600),
- ('Samoa Standard Time', 46800),
- ('Sao Tome Standard Time', 0),
- ('Saratov Standard Time', 14400),
- ('SE Asia Standard Time', 25200),
- ('Singapore Standard Time', 28800),
- ('South Africa Standard Time', 7200),
- ('South Sudan Standard Time', 7200),
- ('Sri Lanka Standard Time', 19800),
- ('Sudan Standard Time', 7200), # unless they mean South Sudan, +03:00
- ('Syria Standard Time', 7200),
- ('Taipei Standard Time', 28800),
- ('Tasmania Standard Time', 36000),
- ('Tocantins Standard Time', -10800),
- ('Tokyo Standard Time', 32400),
- ('Tomsk Standard Time', 25200),
- ('Tonga Standard Time', 46800),
- ('Transbaikal Standard Time', 32400), # Yakutsk
- ('Turkey Standard Time', 7200),
- ('Turks And Caicos Standard Time', -14400),
- ('Ulaanbaatar Standard Time', 28800),
- ('US Eastern Standard Time', -18000),
- ('US Mountain Standard Time', -25200),
- ('UTC-11', -39600),
- ('UTC-09', -32400),
- ('UTC-08', -28800),
- ('UTC-02', -7200),
- ('UTC', 0),
- ('UTC+12', 43200),
- ('UTC+13', 46800),
- ('Venezuela Standard Time', -16200),
- ('Vladivostok Standard Time', 36000),
- ('Volgograd Standard Time', 14400),
- ('W. Australia Standard Time', 28800),
- ('W. Central Africa Standard Time', 3600),
- ('W. Europe Standard Time', 3600),
- ('W. Mongolia Standard Time', 25200), # Hovd
- ('West Asia Standard Time', 18000),
- ('West Bank Standard Time', 7200),
- ('West Pacific Standard Time', 36000),
- ('Yakutsk Standard Time', 32400),
- ('Yukon Standard Time', -25200), # Non-DST Mountain Standard Time since 2020-11-01
-)
-
-# List of standard UTC IDs to use. Not public so may be safely changed.
-# Do not remove IDs, as each entry is part of the API/behavior guarantee.
-# ( UTC Id, Offset Seconds )
-utcIdList = (
- ('UTC', 0), # Goes first so is default
- ('UTC-14:00', -50400),
- ('UTC-13:00', -46800),
- ('UTC-12:00', -43200),
- ('UTC-11:00', -39600),
- ('UTC-10:00', -36000),
- ('UTC-09:00', -32400),
- ('UTC-08:00', -28800),
- ('UTC-07:00', -25200),
- ('UTC-06:00', -21600),
- ('UTC-05:00', -18000),
- ('UTC-04:30', -16200),
- ('UTC-04:00', -14400),
- ('UTC-03:30', -12600),
- ('UTC-03:00', -10800),
- ('UTC-02:00', -7200),
- ('UTC-01:00', -3600),
- ('UTC-00:00', 0),
- ('UTC+00:00', 0),
- ('UTC+01:00', 3600),
- ('UTC+02:00', 7200),
- ('UTC+03:00', 10800),
- ('UTC+03:30', 12600),
- ('UTC+04:00', 14400),
- ('UTC+04:30', 16200),
- ('UTC+05:00', 18000),
- ('UTC+05:30', 19800),
- ('UTC+05:45', 20700),
- ('UTC+06:00', 21600),
- ('UTC+06:30', 23400),
- ('UTC+07:00', 25200),
- ('UTC+08:00', 28800),
- ('UTC+08:30', 30600),
- ('UTC+09:00', 32400),
- ('UTC+09:30', 34200),
- ('UTC+10:00', 36000),
- ('UTC+11:00', 39600),
- ('UTC+12:00', 43200),
- ('UTC+13:00', 46800),
- ('UTC+14:00', 50400),
-)
-
-### End of data that may need updates in response to CLDR ###
-
-class ByteArrayData:
- def __init__(self):
- self.data = []
- self.hash = {}
-
- def append(self, s):
- s = s + '\0'
- if s in self.hash:
- return self.hash[s]
-
- lst = unicode2hex(s)
- index = len(self.data)
- if index > 0xffff:
- raise Error(f'Index ({index}) outside the uint16 range !')
- self.hash[s] = index
- self.data += lst
- return index
-
- def write(self, out, name):
- out(f'\nstatic constexpr char {name}[] = {{\n')
- out(wrap_list(self.data))
- out('\n};\n')
-
-class ZoneIdWriter (SourceFileEditor):
- def write(self, version, defaults, windowsIds):
- self.__writeWarning(version)
- windows, iana = self.__writeTables(self.writer.write, defaults, windowsIds)
- windows.write(self.writer.write, 'windowsIdData')
- iana.write(self.writer.write, 'ianaIdData')
-
- def __writeWarning(self, version):
- self.writer.write(f"""
-/*
- This part of the file was generated on {datetime.date.today()} from the
- Common Locale Data Repository v{version} file supplemental/windowsZones.xml
-
- http://www.unicode.org/cldr/
-
- Do not edit this code: run cldr2qtimezone.py on updated (or
- edited) CLDR data; see qtbase/util/locale_database/.
-*/
-
-""")
-
- @staticmethod
- def __writeTables(out, defaults, windowsIds):
- windowsIdData, ianaIdData = ByteArrayData(), ByteArrayData()
-
- # Write Windows/IANA table
- out('// Windows ID Key, Territory Enum, IANA ID Index\n')
- out('static constexpr QZoneData zoneDataTable[] = {\n')
- for index, data in sorted(windowsIds.items()):
- out(' {{ {:6d},{:6d},{:6d} }}, // {} / {}\n'.format(
- data['windowsKey'], data['territoryId'],
- ianaIdData.append(data['ianaList']),
- data['windowsId'], data['territory']))
- out('};\n\n')
-
- # Write Windows ID key table
- out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n')
- out('static constexpr QWindowsData windowsDataTable[] = {\n')
- for index, pair in enumerate(windowsIdList, 1):
- out(' {{ {:6d},{:6d},{:6d},{:6d} }}, // {}\n'.format(
- index,
- windowsIdData.append(pair[0]),
- ianaIdData.append(defaults[index]),
- pair[1], pair[0]))
- out('};\n\n')
-
- # Write UTC ID key table
- out('// IANA ID Index, UTC Offset\n')
- out('static constexpr QUtcData utcDataTable[] = {\n')
- for pair in utcIdList:
- out(' {{ {:6d},{:6d} }}, // {}\n'.format(
- ianaIdData.append(pair[0]), pair[1], pair[0]))
- out('};\n')
-
- return windowsIdData, ianaIdData
-
-
-def main(out, err):
- """Parses CLDR's data and updates Qt's representation of it.
-
- Takes sys.stdout, sys.stderr (or equivalents) as
- arguments. Expects two command-line options: the root of the
- unpacked CLDR data-file tree and the root of the qtbase module's
- checkout. Updates QTimeZone's private data about Windows time-zone
- IDs."""
- parser = argparse.ArgumentParser(
- description="Update Qt's CLDR-derived timezone data.")
- parser.add_argument('cldr_path', help='path to the root of the CLDR tree')
- parser.add_argument('qtbase_path',
- help='path to the root of the qtbase source tree',
- nargs='?', default=qtbase_root)
-
- args = parser.parse_args()
-
- cldrPath = Path(args.cldr_path)
- qtPath = Path(args.qtbase_path)
-
- if not qtPath.is_dir():
- parser.error(f"No such Qt directory: {qtPath}")
-
- if not cldrPath.is_dir():
- parser.error(f"No such CLDR directory: {cldrPath}")
-
- dataFilePath = qtPath.joinpath('src/corelib/time/qtimezoneprivate_data_p.h')
-
- if not dataFilePath.is_file():
- parser.error(f'No such file: {dataFilePath}')
-
- try:
- version, defaults, winIds = CldrAccess(cldrPath).readWindowsTimeZones(
- dict((name, ind) for ind, name in enumerate((x[0] for x in windowsIdList), 1)))
- except IOError as e:
- parser.error(
- f'Failed to open common/supplemental/windowsZones.xml: {e}')
- return 1
- except Error as e:
- err.write('\n'.join(textwrap.wrap(
- f'Failed to read windowsZones.xml: {e}',
- subsequent_indent=' ', width=80)) + '\n')
- return 1
-
- out.write('Input file parsed, now writing data\n')
-
- try:
- with ZoneIdWriter(dataFilePath, qtPath) as writer:
- writer.write(version, defaults, winIds)
- except Exception as e:
- err.write(f'\nError while updating timezone data: {e}\n')
- return 1
-
- out.write(f'Data generation completed, please check the new file at {dataFilePath}\n')
- return 0
-
-if __name__ == '__main__':
- import sys
- sys.exit(main(sys.stdout, sys.stderr))
diff --git a/util/locale_database/dateconverter.py b/util/locale_database/dateconverter.py
index e3dcb9fc47..8ca15405f7 100644
--- a/util/locale_database/dateconverter.py
+++ b/util/locale_database/dateconverter.py
@@ -1,81 +1,195 @@
# Copyright (C) 2016 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
-import re
-
-def _convert_pattern(pattern):
- # patterns from http://www.unicode.org/reports/tr35/#Date_Format_Patterns
- qt_regexps = {
- r"yyy{3,}" : "yyyy", # more that three digits hence convert to four-digit year
- r"L" : "M", # stand-alone month names. not supported.
- r"g{1,}": "", # modified julian day. not supported.
- r"S{1,}" : "", # fractional seconds. not supported.
- r"A{1,}" : "" # milliseconds in day. not supported.
- }
- qt_patterns = {
- "G" : "", "GG" : "", "GGG" : "", "GGGG" : "", "GGGGG" : "", # Era. not supported.
- "y" : "yyyy", # four-digit year without leading zeroes
- "Q" : "", "QQ" : "", "QQQ" : "", "QQQQ" : "", # quarter. not supported.
- "q" : "", "qq" : "", "qqq" : "", "qqqq" : "", # quarter. not supported.
- "MMMMM" : "MMM", # narrow month name.
- "LLLLL" : "MMM", # stand-alone narrow month name.
- "l" : "", # special symbol for chinese leap month. not supported.
- "w" : "", "W" : "", # week of year/month. not supported.
- "D" : "", "DD" : "", "DDD" : "", # day of year. not supported.
- "F" : "", # day of week in month. not supported.
- "E" : "ddd", "EE" : "ddd", "EEE" : "ddd", "EEEEE" : "ddd", "EEEE" : "dddd", # day of week
- "e" : "ddd", "ee" : "ddd", "eee" : "ddd", "eeeee" : "ddd", "eeee" : "dddd", # local day of week
- "c" : "ddd", "cc" : "ddd", "ccc" : "ddd", "ccccc" : "ddd", "cccc" : "dddd", # stand-alone local day of week
- "a" : "AP", # AM/PM
- "K" : "h", # Hour 0-11
- "k" : "H", # Hour 1-24
- "j" : "", # special reserved symbol.
- "z" : "t", "zz" : "t", "zzz" : "t", "zzzz" : "t", # timezone
- "Z" : "t", "ZZ" : "t", "ZZZ" : "t", "ZZZZ" : "t", # timezone
- "v" : "t", "vv" : "t", "vvv" : "t", "vvvv" : "t", # timezone
- "V" : "t", "VV" : "t", "VVV" : "t", "VVVV" : "t" # timezone
- }
- if pattern in qt_patterns:
- return qt_patterns[pattern]
- for r,v in qt_regexps.items():
- pattern = re.sub(r, v, pattern)
- return pattern
-
-def convert_date(input):
- result = ""
- patterns = "GyYuQqMLlwWdDFgEecahHKkjmsSAzZvV"
- last = ""
- inquote = 0
- chars_to_strip = " -"
- for c in input:
- if c == "'":
- inquote = inquote + 1
- if inquote % 2 == 0:
- if c in patterns:
- if not last:
- last = c
- else:
- if c in last:
- last += c
- else:
- # pattern changed
- converted = _convert_pattern(last)
- result += converted
- if not converted:
- result = result.rstrip(chars_to_strip)
- last = c
- continue
- if last:
- # pattern ended
- converted = _convert_pattern(last)
- result += converted
- if not converted:
- result = result.rstrip(chars_to_strip)
- last = ""
- result += c
- if last:
- converted = _convert_pattern(last)
- result += converted
- if not converted:
- result = result.rstrip(chars_to_strip)
- return result.lstrip(chars_to_strip)
+class Converter (object):
+ """Conversion between CLDR and Qt datetime formats.
+
+ Keep in sync with qlocale_mac.mm's macToQtFormat().
+ The definitive source of truth is:
+ https://www.unicode.org/reports/tr35/tr35-68/tr35-dates.html#Date_Field_Symbol_Table
+
+ See convert() for explanation of the approach taken. Each method
+ with a single-letter name is used to scan a prefix of a text,
+ presumed to begin with that letter (or one Qt treats as equivalent
+ to it) and returns a pair (Qt format, length), to use the given Qt
+ format in place of text[:length]. In all cases, length must be
+ positive."""
+
+ @staticmethod
+ def __is_reserved(ch):
+ """Every ASCII letter is a reserved symbol in CLDR datetime formats"""
+ assert len(ch) == 1, ch
+ return ch.isascii() and ch.isalpha();
+ @staticmethod
+ def __count_first(text):
+ """How many of text[0] appear at the start of text ?"""
+ assert text
+ return len(text) - len(text.lstrip(text[0]))
+ @classmethod
+ def __verbatim(cls, text):
+ # Used where our format coincides with LDML's, including on length.
+ n = cls.__count_first(text)
+ return text[:n], n
+ @classmethod
+ def __treat_as(cls, mimic, text):
+ # Helper for aliases
+ n = cls.__count_first(text)
+ return mimic * n, n
+
+ # Please follow alphabetic order, with two cases of the same
+ # letter adjacent, lower before upper.
+ @classmethod
+ def a(cls, text): # AM/PM indicator; use locale-appropriate case
+ return 'Ap', cls.__count_first(text)
+
+ # A: Milliseconds in day. Not supported.
+ b = a # AM/PM/noon/midnight
+ B = a # "Flexible day period" (e.g. "at night" / "in the day")
+ # (Only zh_Hant_TW affected; zh_Hant_{HK,MO} use 'ah', mapped to
+ # 'APh', so do the same here.)
+
+ @classmethod
+ def c(cls, text): # Stand-alone local day of week
+ # Has length-variants for several cases Qt doesn't support, as
+ # do 'e' and 'E': just map all simply to weekday, abbreviated
+ # or full.
+ n = cls.__count_first(text)
+ return ('dddd' if n == 4 else 'ddd'), n
+
+ # C: Input skeleton symbol
+ d = __verbatim # day (of month or of week, depends on length)
+ # D: Day of year. Not supported.
+ e = c # Local day of week
+ E = c # Just plain day of week
+ # F: Day of week in month. Not supported.
+ # g: Modified julian day. Not supported.
+ # G: Era. Not supported.
+ h = __verbatim # Hour 1-12, treat as 0-11
+ H = __verbatim # Hour 0-23
+ # j: Input skeleton symbol
+ # J: Input skeleton symbol
+
+ @classmethod
+ def k(cls, text): # Hour 1-24, treat as 0-23
+ return cls.__treat_as('H', text)
+ @classmethod
+ def K(cls, text): # Hour 0-11
+ return cls.__treat_as('h', text)
+
+ # l: Deprecated Chinese leap month indicator.
+ @classmethod
+ def L(cls, text): # Stand-alone month names: treat as plain month names.
+ n = cls.__count_first(text)
+ # Length five is narrow; treat same as abbreviated; anything
+ # shorter matches Qt's month forms.
+ return ('MMM' if n > 4 else 'M' * n), n
+
+ m = __verbatim # Minute within the hour.
+ M = L # Plain month names, possibly abbreviated, and numbers.
+
+ @classmethod
+ def O(cls, text): # Localized GMT±offset formats. Map to Z-or-UTC±HH:mm
+ return 't', cls.__count_first(text)
+
+ # q: Quarter. Not supported.
+ # Q: Quarter. Not supported.
+
+ s = __verbatim # Seconds within the minute.
+ @classmethod
+ def S(cls, text): # Fractional seconds. Only milliseconds supported.
+ # FIXME: spec is unclear, do we need to include the leading
+ # dot or not ? For now, no known locale actually exercises
+ # this, so stick with what we've done on Darwin since long
+ # before adding support here.
+ n = cls.__count_first(text)
+ return ('z' if n < 3 else 'zzz'), n
+
+ @classmethod
+ def u(cls, text): # Extended year (numeric)
+ # Officially, 'u' is simply the full year number, zero-padded
+ # to the length of the field. Qt's closest to that is four-digit.
+ # It explicitly has no special case for two-digit year.
+ return 'yyyy', cls.__count_first(text)
+
+ # U: Cyclic Year Name. Not supported
+ @classmethod
+ def v(cls, text): # Generic non-location format. Map to name.
+ return 'tttt', cls.__count_first(text)
+
+ V = v # Zone ID in various forms; VV is IANA ID. Map to name.
+ # w: Week of year. Not supported.
+ # W: Week of month. Not supported.
+
+ @classmethod
+ def x(cls, text): # Variations on offset format.
+ n = cls.__count_first(text)
+ # Ignore: n == 1 may omit minutes, n > 3 may include seconds.
+ return ('ttt' if n > 1 and n & 1 else 'tt'), n
+ X = x # Should use Z for zero offset.
+
+ @classmethod
+ def y(cls, text): # Year number.
+ n = cls.__count_first(text)
+ return ('yy' if n == 2 else 'yyyy'), n
+ # Y: Year for Week-of-year calendars
+
+ z = v # Specific (i.e. distinguish standard from DST) non-location format.
+ @classmethod
+ def Z(cls, text): # Offset format, optionaly with GMT (Qt uses UTC) prefix.
+ n = cls.__count_first(text)
+ return ('tt' if n < 4 else 'ttt' if n > 4 else 't'), n
+
+ @staticmethod
+ def scanQuote(text): # Can't have ' as a method name, so handle specially
+ assert text.startswith("'")
+ i = text.find("'", 1) # Find the next; -1 if not present.
+ i = len(text) if i < 0 else i + 1 # Include the close-quote.
+ return text[:i], i
+
+ # Now put all of those to use:
+ @classmethod
+ def convert(cls, text):
+ """Convert a CLDR datetime format string into a Qt one.
+
+ Presumes that the caller will ''.join() the fragments it
+ yields. Each sequence of CLDR field symbols that corresponds
+ to a Qt format token is converted to it; all other CLDR field
+ symbols are discarded; the literals in between fields are
+ preserved verbatim, except that space and hyphen separators
+ immediately before a discarded field are discarded with it.
+
+ The approach is to look at the first symbol of the remainder
+ of the text, at each iteration, and use that first symbol to
+ select a function that will identify how much of the text to
+ consume and what to replace it with."""
+ sep = ''
+ while text:
+ ch = text[0]
+ if ch == "'":
+ quoted, length = cls.scanQuote(text)
+ text = text[length:]
+ sep += quoted
+ elif hasattr(cls, ch):
+ qtform, length = getattr(cls, ch)(text)
+ assert qtform and length > 0, (ch, text, qtform, length)
+ text = text[length:]
+ if sep:
+ yield sep
+ sep = ''
+ yield qtform
+ elif cls.__is_reserved(ch):
+ text = text[cls.__count_first(text):]
+ # Discard space or dash separator that was only there
+ # for the sake of the unsupported field:
+ sep = sep.rstrip(' -')
+ # TODO: should we also strip [ -]* from text
+ # immediately following unsupported forms ?
+ else:
+ sep += ch
+ text = text[1:]
+ if sep:
+ yield sep
+
+def convert_date(text):
+ # See Converter.convert()
+ return ''.join(Converter.convert(text))
diff --git a/util/locale_database/enumdata.py b/util/locale_database/enumdata.py
index 5123065ef7..66b8840cb1 100644
--- a/util/locale_database/enumdata.py
+++ b/util/locale_database/enumdata.py
@@ -1,30 +1,60 @@
# Copyright (C) 2021 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
-# A run of cldr2qlocalexml.py will produce output reporting any
-# language, script and territory codes it sees, in data, for which it
-# can find a name (taken always from en.xml) that could potentially be
-# used. There is no point adding a mapping for such a code unless the
-# CLDR's common/main/ contains an XML file for at least one locale
-# that exerciss it.
+"""Assorted enumerations implicated in public API.
-# Each *_list reflects the current values of its enums in qlocale.h;
-# if new xml language files are available in CLDR, these languages and
-# territories need to be *appended* to this list (for compatibility
-# between versions). Include any spaces present in names (scripts
-# shall squish them out for the enum entries) in *_list, but use the
-# squished forms of names in the *_aliases mappings.
+The numberings of these enumerations can only change at major
+versions. When new CLDR data implies adding entries, the new ones must
+go after all existing ones. See also zonedata.py for enumerations
+related to timezones and CLDR, which can more freely be changed
+between versions.
-# For a new major version (and only then), we can change the
-# numbering, so re-sort each list into alphabetic order (e.g. using
-# sort -k2); but keep the Any and C entries first. That's why those
-# are offset with a blank line, below. After doing that, regenerate
-# locale data as usual; this will cause a binary-incompatible change.
+A run of cldr2qlocalexml.py will produce output reporting any
+language, script and territory codes it sees, in data, for which it
+can find a name (taken always from en.xml) that could potentially be
+used. There is no point adding a mapping for such a code unless the
+CLDR's common/main/ contains an XML file for at least one locale that
+exercises it (and little point, even then, absent substantial data,
+ignoring draft='unconfirmed' entries).
-# Note on "macrolanguage" comments: see "ISO 639 macrolanguage" on
-# Wikipedia. A "macrolanguage" is (loosely-speaking) a group of
-# languages so closely related to one another that they could also be
-# regarded as divergent dialects of the macrolanguage.
+Each *_map reflects the current values of its enums in qlocale.h; if
+new xml language files are available in CLDR, these languages and
+territories need to be *appended* to this list (for compatibility
+between versions). Include any spaces and dashes present in names
+(they'll be squished out for the enum entries) in *_map, but use the
+squished forms of names in the *_aliases mappings. The squishing also
+turns the first letter of each word into a capital so you can safely
+preserve the case of en.xml's name; but omit (or replace with space)
+any punctuation aside from dashes and map any accented letters to
+their un-accented plain ASCII. The two tables, for each enum, have
+the forms:
+* map { Numeric value: ("Proper name", "ISO code") }
+* alias { "OldName": "CurrentName" }
+
+TODO: add support for marking entries as deprecated from a specified
+version. For aliases that merely deprecates the name. Where we have a
+name for which CLDR offers no data, we may also want to deprecate
+entries in the map - although they may be worth keeping for the
+benefit of QLocaleSelector (see QTBUG-112765), if other
+locale-specific resources might have use of them.
+
+For a new major version (and only then), we can change the numbering,
+so re-sort each list into alphabetic order (e.g. using sort -k2); but
+keep the Any and C entries first. That's why those are offset with a
+blank line, below. After doing that, regenerate locale data as usual;
+this will cause a binary-incompatible change.
+
+Note on 'macrolanguage' comments: see QTBUG-107781 and 'ISO 639
+macrolanguage' on Wikipedia. A 'macrolanguage' is (loosely-speaking) a
+group of languages so closely related to one another that they could
+also be regarded as divergent dialects of the macrolanguage. In some
+cases this may mean a resource (such as translation or text-to-speech
+data) may describe itself as pertaining to the macrolanguage, implying
+its suitability for use in any of the languages within the
+macrolanguage. For example, no_NO might be used for a generic
+Norwegian resource, embracing both nb_NO and nn_NO.
+
+"""
language_map = {
0: ("AnyLanguage", " "),
@@ -151,7 +181,7 @@ language_map = {
120: ("Japanese", "ja"),
121: ("Javanese", "jv"),
122: ("Jju", "kaj"),
- 123: ("Jola Fonyi", "dyo"),
+ 123: ("Jola-Fonyi", "dyo"),
124: ("Kabuverdianu", "kea"),
125: ("Kabyle", "kab"),
126: ("Kako", "kkj"),
@@ -192,7 +222,7 @@ language_map = {
161: ("Lojban", "jbo"),
162: ("Lower Sorbian", "dsb"),
163: ("Low German", "nds"),
- 164: ("Luba Katanga", "lu"),
+ 164: ("Luba-Katanga", "lu"),
165: ("Lule Sami", "smj"),
166: ("Luo", "luo"),
167: ("Luxembourgish", "lb"),
@@ -200,7 +230,7 @@ language_map = {
169: ("Macedonian", "mk"),
170: ("Machame", "jmc"),
171: ("Maithili", "mai"),
- 172: ("Makhuwa Meetto", "mgh"),
+ 172: ("Makhuwa-Meetto", "mgh"),
173: ("Makonde", "kde"),
174: ("Malagasy", "mg"), # macrolanguage
175: ("Malayalam", "ml"),
@@ -361,13 +391,26 @@ language_map = {
329: ("Nheengatu", "yrl"),
# added in CLDR v42
330: ("Haryanvi", "bgc"),
- 331: ("Moksha", "mdf"),
- 332: ("Northern Frisian", "frr"),
- 333: ("Obolo", "ann"),
- 334: ("Pijin", "pis"),
- 335: ("Rajasthani", "raj"),
- 336: ("Toki Pona", "tok"),
+ 331: ("Northern Frisian", "frr"),
+ 332: ("Rajasthani", "raj"),
+ 333: ("Moksha", "mdf"),
+ 334: ("Toki Pona", "tok"),
+ 335: ("Pijin", "pis"),
+ 336: ("Obolo", "ann"),
+ # added in CLDR v43
+ 337: ("Baluchi", "bal"),
+ 338: ("Ligurian", "lij"),
+ 339: ("Rohingya", "rhg"),
+ 340: ("Torwali", "trw"),
+ # added in CLDR v44
+ 341: ("Anii", "blo"),
+ 342: ("Kangri", "xnr"),
+ 343: ("Venetian", "vec"),
}
+# Don't add languages just because they exist; check CLDR does provide
+# substantial data for locales using it; and check, once added, they
+# don't show up in cldr2qlocalexmo.py's unused listing. Do also check
+# the data's draft status; if it's (nearly) all unconfirmed, leave it.
language_aliases = {
# Renamings prior to Qt 6.0 (CLDR v37):
@@ -391,7 +434,7 @@ language_aliases = {
'Navaho': 'Navajo',
'Oriya': 'Odia',
'Kirghiz': 'Kyrgyz'
- }
+}
territory_map = {
0: ("AnyTerritory", "ZZ"),
@@ -405,7 +448,7 @@ territory_map = {
7: ("Angola", "AO"),
8: ("Anguilla", "AI"),
9: ("Antarctica", "AQ"),
- 10: ("Antigua And Barbuda", "AG"),
+ 10: ("Antigua and Barbuda", "AG"),
11: ("Argentina", "AR"),
12: ("Armenia", "AM"),
13: ("Aruba", "AW"),
@@ -424,7 +467,7 @@ territory_map = {
26: ("Bermuda", "BM"),
27: ("Bhutan", "BT"),
28: ("Bolivia", "BO"),
- 29: ("Bosnia And Herzegovina", "BA"),
+ 29: ("Bosnia and Herzegovina", "BA"),
30: ("Botswana", "BW"),
31: ("Bouvet Island", "BV"),
32: ("Brazil", "BR"),
@@ -442,7 +485,7 @@ territory_map = {
44: ("Caribbean Netherlands", "BQ"),
45: ("Cayman Islands", "KY"),
46: ("Central African Republic", "CF"),
- 47: ("Ceuta And Melilla", "EA"),
+ 47: ("Ceuta and Melilla", "EA"),
48: ("Chad", "TD"),
49: ("Chile", "CL"),
50: ("China", "CN"),
@@ -451,8 +494,8 @@ territory_map = {
53: ("Cocos Islands", "CC"),
54: ("Colombia", "CO"),
55: ("Comoros", "KM"),
- 56: ("Congo Brazzaville", "CG"),
- 57: ("Congo Kinshasa", "CD"),
+ 56: ("Congo - Brazzaville", "CG"),
+ 57: ("Congo - Kinshasa", "CD"),
58: ("Cook Islands", "CK"),
59: ("Costa Rica", "CR"),
60: ("Croatia", "HR"),
@@ -496,11 +539,11 @@ territory_map = {
98: ("Guam", "GU"),
99: ("Guatemala", "GT"),
100: ("Guernsey", "GG"),
- 101: ("Guinea Bissau", "GW"),
+ 101: ("Guinea-Bissau", "GW"),
102: ("Guinea", "GN"),
103: ("Guyana", "GY"),
104: ("Haiti", "HT"),
- 105: ("Heard And McDonald Islands", "HM"),
+ 105: ("Heard and McDonald Islands", "HM"),
106: ("Honduras", "HN"),
107: ("Hong Kong", "HK"),
108: ("Hungary", "HU"),
@@ -510,12 +553,12 @@ territory_map = {
112: ("Iran", "IR"),
113: ("Iraq", "IQ"),
114: ("Ireland", "IE"),
- 115: ("Isle Of Man", "IM"),
+ 115: ("Isle of Man", "IM"),
116: ("Israel", "IL"),
117: ("Italy", "IT"),
- # Officially Côte d’Ivoire, which we'd ned to map to CotedIvoire
- # or CoteDIvoire, either failing to make the d' separate from
- # Cote or messing with its case. So stick with Ivory Coast:
+ # Officially Côte d’Ivoire, which we'd need to map to CotedIvoire
+ # or CoteDIvoire, either failing to make the d' separate from Cote
+ # or messing with its case. So stick with Ivory Coast:
118: ("Ivory Coast", "CI"),
119: ("Jamaica", "JM"),
120: ("Japan", "JP"),
@@ -595,14 +638,14 @@ territory_map = {
194: ("Rwanda", "RW"),
195: ("Saint Barthelemy", "BL"),
196: ("Saint Helena", "SH"),
- 197: ("Saint Kitts And Nevis", "KN"),
+ 197: ("Saint Kitts and Nevis", "KN"),
198: ("Saint Lucia", "LC"),
199: ("Saint Martin", "MF"),
- 200: ("Saint Pierre And Miquelon", "PM"),
- 201: ("Saint Vincent And Grenadines", "VC"),
+ 200: ("Saint Pierre and Miquelon", "PM"),
+ 201: ("Saint Vincent and Grenadines", "VC"),
202: ("Samoa", "WS"),
203: ("San Marino", "SM"),
- 204: ("Sao Tome And Principe", "ST"),
+ 204: ("Sao Tome and Principe", "ST"),
205: ("Saudi Arabia", "SA"),
206: ("Senegal", "SN"),
207: ("Serbia", "RS"),
@@ -615,14 +658,14 @@ territory_map = {
214: ("Solomon Islands", "SB"),
215: ("Somalia", "SO"),
216: ("South Africa", "ZA"),
- 217: ("South Georgia And South Sandwich Islands", "GS"),
+ 217: ("South Georgia and South Sandwich Islands", "GS"),
218: ("South Korea", "KR"),
219: ("South Sudan", "SS"),
220: ("Spain", "ES"),
221: ("Sri Lanka", "LK"),
222: ("Sudan", "SD"),
223: ("Suriname", "SR"),
- 224: ("Svalbard And Jan Mayen", "SJ"),
+ 224: ("Svalbard and Jan Mayen", "SJ"),
225: ("Sweden", "SE"),
226: ("Switzerland", "CH"),
227: ("Syria", "SY"),
@@ -634,12 +677,12 @@ territory_map = {
233: ("Togo", "TG"),
234: ("Tokelau", "TK"),
235: ("Tonga", "TO"),
- 236: ("Trinidad And Tobago", "TT"),
- 237: ("Tristan Da Cunha", "TA"),
+ 236: ("Trinidad and Tobago", "TT"),
+ 237: ("Tristan da Cunha", "TA"),
238: ("Tunisia", "TN"),
239: ("Turkey", "TR"),
240: ("Turkmenistan", "TM"),
- 241: ("Turks And Caicos Islands", "TC"),
+ 241: ("Turks and Caicos Islands", "TC"),
242: ("Tuvalu", "TV"),
243: ("Uganda", "UG"),
244: ("Ukraine", "UA"),
@@ -654,9 +697,9 @@ territory_map = {
253: ("Vatican City", "VA"),
254: ("Venezuela", "VE"),
255: ("Vietnam", "VN"),
- 256: ("Wallis And Futuna", "WF"),
+ 256: ("Wallis and Futuna", "WF"),
257: ("Western Sahara", "EH"),
- 258: ("World", "001"),
+ 258: ("world", "001"),
259: ("Yemen", "YE"),
260: ("Zambia", "ZM"),
261: ("Zimbabwe", "ZW"),
@@ -726,7 +769,7 @@ script_map = {
28: ("Deseret", "Dsrt"),
29: ("Devanagari", "Deva"),
30: ("Duployan", "Dupl"),
- 31: ("Egyptian Hieroglyphs", "Egyp"),
+ 31: ("Egyptian hieroglyphs", "Egyp"),
32: ("Elbasan", "Elba"),
33: ("Ethiopic", "Ethi"),
34: ("Fraser", "Lisu"),
@@ -801,7 +844,7 @@ script_map = {
103: ("Pahawh Hmong", "Hmng"),
104: ("Palmyrene", "Palm"),
105: ("Pau Cin Hau", "Pauc"),
- 106: ("Phags Pa", "Phag"),
+ 106: ("Phags-pa", "Phag"),
107: ("Phoenician", "Phnx"),
108: ("Pollard Phonetic", "Plrd"),
109: ("Psalter Pahlavi", "Phlp"),
@@ -812,7 +855,7 @@ script_map = {
114: ("Sharada", "Shrd"),
115: ("Shavian", "Shaw"),
116: ("Siddham", "Sidd"),
- 117: ("Sign Writing", "Sgnw"),
+ 117: ("SignWriting", "Sgnw"), # Oddly, en.xml leaves no space in it.
118: ("Simplified Han", "Hans"),
119: ("Sinhala", "Sinh"),
120: ("Sora Sompeng", "Sora"),
@@ -837,6 +880,8 @@ script_map = {
139: ("Vai", "Vaii"),
140: ("Varang Kshiti", "Wara"),
141: ("Yi", "Yiii"),
+ # Added at CLDR v43
+ 142: ("Hanifi", "Rohg"), # Used for Rohingya
}
script_aliases = {
diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py
index f292235fb4..219d1f7145 100644
--- a/util/locale_database/ldml.py
+++ b/util/locale_database/ldml.py
@@ -21,6 +21,13 @@ See individual classes for further detail.
from localetools import Error
from dateconverter import convert_date
+# The github version of CLDR uses '↑↑↑' to indicate "inherit"
+INHERIT = '↑↑↑'
+
+def _attrsFromDom(dom):
+ return { k: (v if isinstance(v, str) else v.nodeValue)
+ for k, v in dom.attributes.items() }
+
class Node (object):
"""Wrapper for an arbitrary DOM node.
@@ -50,6 +57,9 @@ class Node (object):
else:
self.draft = max(draft, self.draftScore(attr))
+ def attributes(self):
+ return _attrsFromDom(self.dom)
+
def findAllChildren(self, tag, wanted = None, allDull = False):
"""All children that do have the given tag and attributes.
@@ -166,17 +176,35 @@ class XmlScanner (object):
return elts
class Supplement (XmlScanner):
- def find(self, xpath):
+ def find(self, xpath, exclude=()):
+ """Finds nodes by matching a specified xpath.
+
+ If exclude is passed, it should be a sequence of attribute names (its
+ default is empty). Any matches to the given xpath that also have any
+ attribute in this sequence will be excluded.
+
+ For each childless node matching the xpath, or child of a node matching
+ the xpath, this yields a twople (name, attrs) where name is the
+ nodeName and attrs is a dict mapping the node's attribute's names to
+ their values. For attribute values that are not simple strings, the
+ nodeValue of the attribute node is used."""
elts = self.findNodes(xpath)
- for elt in _iterateEach(e.dom.childNodes if e.dom.childNodes else (e.dom,)
- for e in elts):
+ for elt in _iterateEach(e.dom.childNodes or (e.dom,)
+ for e in elts
+ if not any(a in e.dom.attributes
+ for a in exclude)):
if elt.attributes:
- yield (elt.nodeName,
- dict((k, v if isinstance(v, str) else v.nodeValue)
- for k, v in elt.attributes.items()))
+ yield elt.nodeName, _attrsFromDom(elt)
class LocaleScanner (object):
def __init__(self, name, nodes, root):
+ """Set up to scan data for a specified locale.
+
+ First parameter is the name of the locale; it will be used in
+ error messages. Second is a tuple of DOM root-nodes of files
+ with locale data, later ones serving as fall-backs for data
+ missing in earlier ones. Third parameter is the root locale's
+ DOM node."""
self.name, self.nodes, self.base = name, nodes, root
def find(self, xpath, default = None, draft = None):
@@ -257,7 +285,13 @@ class LocaleScanner (object):
stem = f'numbers/symbols[numberSystem={system}]/'
decimal = self.find(f'{stem}decimal')
group = self.find(f'{stem}group')
- assert decimal != group, (self.name, system, decimal)
+ if decimal == group:
+ # mn_Mong_MN @v43 :-(
+ clean = Node.draftScore('approved')
+ decimal = self.find(f'{stem}decimal', draft=clean)
+ group = self.find(f'{stem}group', draft=clean)
+ assert decimal != group, (self.name, system, decimal)
+
yield 'decimal', decimal
yield 'group', group
yield 'percent', self.find(f'{stem}percentSign')
@@ -324,6 +358,7 @@ class LocaleScanner (object):
def endonyms(self, language, script, territory, variant):
# TODO: take variant into account ?
+ # TODO: QTBUG-47892, support query for all combinations
for seq in ((language, script, territory),
(language, script), (language, territory), (language,)):
if not all(seq):
@@ -383,10 +418,10 @@ class LocaleScanner (object):
('long', 'format', 'wide'),
('short', 'format', 'abbreviated'),
('narrow', 'format', 'narrow'),
- ) # Used for month and day names
+ ) # Used for month and day names
def __find(self, xpath):
- retries = [ xpath.split('/') ]
+ retries, foundNone = [ xpath.split('/') ], True
while retries:
tags, elts, roots = retries.pop(), self.nodes, (self.base.root,)
for selector in tags:
@@ -396,6 +431,9 @@ class LocaleScanner (object):
break
else: # Found matching elements
+ elts = tuple(self.__skipInheritors(elts))
+ if elts:
+ foundNone = False
# Possibly filter elts to prefer the least drafty ?
for elt in elts:
yield elt
@@ -415,26 +453,40 @@ class LocaleScanner (object):
if not roots:
if retries: # Let outer loop fall back on an alias path:
break
- sought = '/'.join(tags)
- if sought != xpath:
- sought += f' (for {xpath})'
- raise Error(f'All lack child {selector} for {sought} in {self.name}')
+ if foundNone:
+ sought = '/'.join(tags)
+ if sought != xpath:
+ sought += f' (for {xpath})'
+ raise Error(f'All lack child {selector} for {sought} in {self.name}')
else: # Found matching elements
+ roots = tuple(self.__skipInheritors(roots))
+ if roots:
+ foundNone = False
for elt in roots:
yield elt
- sought = '/'.join(tags)
- if sought != xpath:
- sought += f' (for {xpath})'
- raise Error(f'No {sought} in {self.name}')
+ if foundNone:
+ sought = '/'.join(tags)
+ if sought != xpath:
+ sought += f' (for {xpath})'
+ raise Error(f'No {sought} in {self.name}')
+
+ @staticmethod
+ def __skipInheritors(elts):
+ for elt in elts:
+ try:
+ if elt.dom.firstChild.nodeValue != INHERIT:
+ yield elt
+ except (AttributeError, KeyError):
+ yield elt
def __currencyDisplayName(self, stem):
try:
return self.find(stem + 'displayName')
except Error:
pass
- for x in ('zero', 'one', 'two', 'few', 'many', 'other'):
+ for x in ('zero', 'one', 'two', 'few', 'many', 'other'):
try:
return self.find(f'{stem}displayName[count={x}]')
except Error:
diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py
index a33ace4eb1..02ec7cafc7 100644
--- a/util/locale_database/localetools.py
+++ b/util/locale_database/localetools.py
@@ -41,12 +41,47 @@ def unicode2hex(s):
lst.append(hex(v))
return lst
-def wrap_list(lst):
+def wrap_list(lst, perline=20):
def split(lst, size):
while lst:
head, lst = lst[:size], lst[size:]
yield head
- return ",\n".join(", ".join(x) for x in split(lst, 20))
+ return ",\n".join(", ".join(x) for x in split(lst, perline))
+
+def names_clash(cldr, enum):
+ """True if the reader might not recognize cldr as the name of enum
+
+ First argument, cldr, is the name CLDR gives for some language,
+ script or territory; second, enum, is the name enumdata.py gives
+ for it. If these are enough alike, returns None; otherwise, a
+ non-empty string that results from adapting cldr to be more like
+ how enumdata.py would express it."""
+ if cldr == enum:
+ return None
+
+ # Some common substitutions:
+ cldr = cldr.replace('&', 'And')
+ prefix = { 'St.': 'Saint', 'U.S.': 'United States' }
+ for k, v in prefix.items():
+ if cldr.startswith(k + ' '):
+ cldr = v + cldr[len(k):]
+
+ # Chop out any parenthesised part, e.g. (Burma):
+ while '(' in cldr:
+ try:
+ f, t = cldr.index('('), cldr.rindex(')')
+ except ValueError:
+ break
+ cldr = cldr[:f].rstrip() + ' ' + cldr[t + 1:].lstrip()
+
+ # Various accented letters:
+ remap = { 'ã': 'a', 'å': 'a', 'ā': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'}
+ skip = '\u02bc' # Punctuation for which .isalpha() is true.
+ # Let cldr match (ignoring non-letters and case) any substring as enum:
+ if ''.join(enum.lower().split()) in ''.join(
+ remap.get(ch, ch) for ch in cldr.lower() if ch.isalpha() and ch not in skip):
+ return None
+ return cldr
@contextmanager
diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py
index a3e2fb9d2b..dae1894cd2 100644
--- a/util/locale_database/qlocalexml.py
+++ b/util/locale_database/qlocalexml.py
@@ -44,77 +44,28 @@ def startCount(c, text): # strspn
except StopIteration:
return len(text)
-def convertFormat(format):
- """Convert date/time format-specier from CLDR to Qt
-
- Match up (as best we can) the differences between:
- * https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
- * QDateTimeParser::parseFormat() and QLocalePrivate::dateTimeToString()
- """
- # Compare and contrast dateconverter.py's convert_date().
- # Need to (check consistency and) reduce redundancy !
- result = ""
- i = 0
- while i < len(format):
- if format[i] == "'":
- result += "'"
- i += 1
- while i < len(format) and format[i] != "'":
- result += format[i]
- i += 1
- if i < len(format):
- result += "'"
- i += 1
- else:
- s = format[i:]
- if s.startswith('E'): # week-day
- n = startCount('E', s)
- if n < 3:
- result += 'ddd'
- elif n == 4:
- result += 'dddd'
- else: # 5: narrow, 6 short; but should be name, not number :-(
- result += 'd' if n < 6 else 'dd'
- i += n
- elif s[0] in 'ab': # am/pm
- # 'b' should distinguish noon/midnight, too :-(
- result += "AP"
- i += startCount('ab', s)
- elif s.startswith('S'): # fractions of seconds: count('S') == number of decimals to show
- result += 'z'
- i += startCount('S', s)
- elif s.startswith('V'): # long time zone specifiers (and a deprecated short ID)
- result += 't'
- i += startCount('V', s)
- elif s[0] in 'zv': # zone
- # Should use full name, e.g. "Central European Time", if 'zzzz' :-(
- # 'v' should get generic non-location format, e.g. PT for "Pacific Time", no DST indicator
- result += "t"
- i += startCount('zv', s)
- else:
- result += format[i]
- i += 1
-
- return result
-
class QLocaleXmlReader (object):
def __init__(self, filename):
self.root = self.__parse(filename)
- # Lists of (id, name, code) triples:
- languages = tuple(self.__loadMap('language'))
- scripts = tuple(self.__loadMap('script'))
- territories = tuple(self.__loadMap('territory'))
+
+ from enumdata import language_map, script_map, territory_map
+ # Lists of (id, enum name, code, en.xml name) tuples:
+ languages = tuple(self.__loadMap('language', language_map))
+ scripts = tuple(self.__loadMap('script', script_map))
+ territories = tuple(self.__loadMap('territory', territory_map))
self.__likely = tuple(self.__likelySubtagsMap())
- # Mappings {ID: (name, code)}
- self.languages = dict((v[0], v[1:]) for v in languages)
- self.scripts = dict((v[0], v[1:]) for v in scripts)
- self.territories = dict((v[0], v[1:]) for v in territories)
- # Private mappings {name: (ID, code)}
- self.__langByName = dict((v[1], (v[0], v[2])) for v in languages)
- self.__textByName = dict((v[1], (v[0], v[2])) for v in scripts)
- self.__landByName = dict((v[1], (v[0], v[2])) for v in territories)
+
+ # Mappings {ID: (enum name, code, en.xml name)}
+ self.languages = {v[0]: v[1:] for v in languages}
+ self.scripts = {v[0]: v[1:] for v in scripts}
+ self.territories = {v[0]: v[1:] for v in territories}
+
+ # Private mappings {enum name: (ID, code)}
+ self.__langByName = {v[1]: (v[0], v[2]) for v in languages}
+ self.__textByName = {v[1]: (v[0], v[2]) for v in scripts}
+ self.__landByName = {v[1]: (v[0], v[2]) for v in territories}
# Other properties:
- self.dupes = set(v[1] for v in languages) & set(v[1] for v in territories)
+ self.__dupes = set(v[1] for v in languages) & set(v[1] for v in territories)
self.cldrVersion = self.__firstChildText(self.root, "version")
def loadLocaleMap(self, calendars, grumble = lambda text: None):
@@ -146,6 +97,21 @@ class QLocaleXmlReader (object):
yield (language, script, territory), locale
+ def aliasToIana(self):
+ kid = self.__firstChildText
+ for elt in self.__eachEltInGroup(self.root, 'zoneAliases', 'zoneAlias'):
+ yield kid(elt, 'alias'), kid(elt, 'iana')
+
+ def msToIana(self):
+ kid = self.__firstChildText
+ for elt in self.__eachEltInGroup(self.root, 'windowsZone', 'msZoneIana'):
+ yield kid(elt, 'msid'), kid(elt, 'iana')
+
+ def msLandIanas(self):
+ kid = self.__firstChildText
+ for elt in self.__eachEltInGroup(self.root, 'windowsZone', 'msLandZones'):
+ yield kid(elt, 'msid'), kid(elt, 'territorycode'), kid(elt, 'ianaids')
+
def languageIndices(self, locales):
index = 0
for key, value in self.languages.items():
@@ -184,11 +150,38 @@ class QLocaleXmlReader (object):
self.__textByName[give[1]][0]),
self.__landByName[give[2]][0])
+ def enumify(self, name, suffix):
+ """Stick together the parts of an enumdata.py name.
+
+ Names given in enumdata.py include spaces and hyphens that we
+ can't include in an identifier, such as the name of a member
+ of an enum type. Removing those would lose the word
+ boundaries, so make sure each word starts with a capital (but
+ don't simply capitalize() as some names contain words,
+ e.g. McDonald, that have later capitals in them).
+
+ We also need to resolve duplication between languages and
+ territories (by adding a suffix to each) and add Script to the
+ ends of script-names that don't already end in it."""
+ name = name.replace('-', ' ')
+ # Don't .capitalize() as McDonald is already camel-case (see enumdata.py):
+ name = ''.join(word[0].upper() + word[1:] for word in name.split())
+ if suffix != 'Script':
+ assert not(name in self.__dupes and name.endswith(suffix))
+ return name + suffix if name in self.__dupes else name
+
+ if not name.endswith(suffix):
+ name += suffix
+ if name in self.__dupes:
+ raise Error(f'The script name "{name}" is messy')
+ return name
+
# Implementation details:
- def __loadMap(self, category):
+ def __loadMap(self, category, enum):
kid = self.__firstChildText
for element in self.__eachEltInGroup(self.root, f'{category}List', category):
- yield int(kid(element, 'id')), kid(element, 'name'), kid(element, 'code')
+ key = int(kid(element, 'id'))
+ yield key, enum[key][0], kid(element, 'code'), kid(element, 'name')
def __likelySubtagsMap(self):
def triplet(element, keys=('language', 'script', 'territory'), kid = self.__firstChildText):
@@ -217,6 +210,8 @@ class QLocaleXmlReader (object):
child = elt.firstChild
while child:
if child.nodeType == elt.TEXT_NODE:
+ # Note: do not strip(), as some group separators are
+ # non-breaking spaces, that strip() will discard.
yield child.nodeValue
child = child.nextSibling
@@ -256,17 +251,16 @@ class Spacer (object):
First argument, indent, is either None (its default, for
'minifying'), an ingeter (number of spaces) or the unit of
text that is to be used for each indentation level (e.g. '\t'
- to use tabs). If indent is None, no indentation is added, nor
+ to use tabs). If indent is None, no indentation is added, nor
are line-breaks; otherwise, self(text), for non-empty text,
shall end with a newline and begin with indentation.
Second argument, initial, is the initial indentation; it is
- ignored if indent is None. Indentation increases after each
+ ignored if indent is None. Indentation increases after each
call to self(text) in which text starts with a tag and doesn't
include its end-tag; indentation decreases if text starts with
- an end-tag. The text is not parsed any more carefully than
- just described.
- """
+ an end-tag. The text is not parsed any more carefully than
+ just described."""
if indent is None:
self.__call = lambda x: x
else:
@@ -292,6 +286,10 @@ class Spacer (object):
return self.__call(line)
class QLocaleXmlWriter (object):
+ """Save the full set of locale data to a QLocaleXML file.
+
+ The output saved by this should conform to qlocalexml.rnc's
+ schema."""
def __init__(self, save = None, space = Spacer(4)):
"""Set up to write digested CLDR data as QLocale XML.
@@ -315,18 +313,28 @@ class QLocaleXmlWriter (object):
self.__write('<localeDatabase>')
# Output of various sections, in their usual order:
- def enumData(self):
+ def enumData(self, code2name):
+ """Output name/id/code tables for language, script and territory.
+
+ Parameter, code2name, is a function taking 'language',
+ 'script' or 'territory' and returning a lookup function that
+ maps codes, of the relevant type, to their English names. This
+ lookup function is passed a code and the name, both taken from
+ enumdata.py, that QLocale uses, so the .get() of a dict will
+ work. The English name from this lookup will be used by
+ QLocale::*ToString() for the enum member whose name is based
+ on the enumdata.py name passed as fallback to the lookup."""
from enumdata import language_map, script_map, territory_map
- self.__enumTable('language', language_map)
- self.__enumTable('script', script_map)
- self.__enumTable('territory', territory_map)
+ self.__enumTable('language', language_map, code2name)
+ self.__enumTable('script', script_map, code2name)
+ self.__enumTable('territory', territory_map, code2name)
# Prepare to detect any unused codes (see __writeLocale(), close()):
self.__languages = set(p[1] for p in language_map.values()
if not p[1].isspace())
self.__scripts = set(p[1] for p in script_map.values()
- if p[1] != 'ZZ')
+ if p[1] != 'Zzzz')
self.__territories = set(p[1] for p in territory_map.values()
- if p[1] != 'Zzzz')
+ if p[1] != 'ZZ')
def likelySubTags(self, entries):
self.__openTag('likelySubtags')
@@ -337,10 +345,48 @@ class QLocaleXmlWriter (object):
self.__closeTag('likelySubtag')
self.__closeTag('likelySubtags')
- def locales(self, locales, calendars):
+ def zoneData(self, alias, defaults, windowsIds):
+ self.__openTag('zoneAliases')
+ # iana is a single IANA ID
+ # name has the same form, but has been made redundant
+ for name, iana in sorted(alias.items()):
+ if name == iana:
+ continue
+ self.__openTag('zoneAlias')
+ self.inTag('alias', name)
+ self.inTag('iana', iana)
+ self.__closeTag('zoneAlias')
+ self.__closeTag('zoneAliases')
+
+ self.__openTag('windowsZone')
+ for (msid, code), ids in windowsIds.items():
+ # ianaids is a space-joined sequence of IANA IDs
+ self.__openTag('msLandZones')
+ self.inTag('msid', msid)
+ self.inTag('territorycode', code)
+ self.inTag('ianaids', ids)
+ self.__closeTag('msLandZones')
+
+ for winid, iana in defaults.items():
+ self.__openTag('msZoneIana')
+ self.inTag('msid', winid)
+ self.inTag('iana', iana)
+ self.__closeTag('msZoneIana')
+ self.__closeTag('windowsZone')
+
+ def locales(self, locales, calendars, en_US):
+ """Write the data for each locale.
+
+ First argument, locales, is the mapping whose values are the
+ Locale objects, with each key being the matching tuple of
+ numeric IDs for language, script, territory and variant.
+ Second argument is a tuple of calendar names. Third is the
+ tuple of numeric IDs that corresponds to en_US (needed to
+ provide fallbacks for the C locale)."""
+
self.__openTag('localeList')
self.__openTag('locale')
- self.__writeLocale(Locale.C(calendars), calendars)
+ self.__writeLocale(Locale.C(locales[en_US]), calendars)
self.__closeTag('locale')
for key in sorted(locales.keys()):
self.__openTag('locale')
@@ -355,12 +401,12 @@ class QLocaleXmlWriter (object):
self.__write(f'<{tag}>{text}</{tag}>')
def close(self, grumble):
- """Finish writing and grumble any issues discovered."""
+ """Finish writing and grumble about any issues discovered."""
if self.__rawOutput != self.__complain:
self.__write('</localeDatabase>')
self.__rawOutput = self.__complain
- if self.__languages or self.__scripts or self.territories:
+ if self.__languages or self.__scripts or self.__territories:
grumble('Some enum members are unused, corresponding to these tags:\n')
import textwrap
def kvetch(kind, seq, g = grumble, w = textwrap.wrap):
@@ -381,13 +427,18 @@ class QLocaleXmlWriter (object):
def __complain(text):
raise Error('Attempted to write data after closing :-(')
- def __enumTable(self, tag, table):
+ @staticmethod
+ def __xmlSafe(text):
+ return text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+
+ def __enumTable(self, tag, table, code2name):
self.__openTag(f'{tag}List')
- for key, value in table.items():
+ enname, safe = code2name(tag), self.__xmlSafe
+ for key, (name, code) in table.items():
self.__openTag(tag)
- self.inTag('name', value[0])
+ self.inTag('name', safe(enname(code, name)))
self.inTag('id', key)
- self.inTag('code', value[1])
+ self.inTag('code', code)
self.__closeTag(tag)
self.__closeTag(f'{tag}List')
@@ -405,7 +456,10 @@ class QLocaleXmlWriter (object):
self.__scripts.discard(locale.script_code)
self.__territories.discard(locale.territory_code)
- def __openTag(self, tag):
+ def __openTag(self, tag, **attrs):
+ if attrs:
+ text = ', '.join(f'{k}="{v}"' for k, v in attrs.items())
+ tag = f'{tag} {text}'
self.__write(f'<{tag}>')
def __closeTag(self, tag):
self.__write(f'</{tag}>')
@@ -440,8 +494,6 @@ class Locale (object):
__asint = ("currencyDigits", "currencyRounding")
# Convert day-name to Qt day-of-week number:
__asdow = ("firstDayOfWeek", "weekendStart", "weekendEnd")
- # Convert from CLDR format-strings to QDateTimeParser ones:
- __asfmt = ("longDateFormat", "shortDateFormat", "longTimeFormat", "shortTimeFormat")
# Just use the raw text:
__astxt = ("language", "languageEndonym", "script", "territory", "territoryEndonym",
"decimal", "group", "zero",
@@ -450,9 +502,12 @@ class Locale (object):
"alternateQuotationStart", "alternateQuotationEnd",
"listPatternPartStart", "listPatternPartMiddle",
"listPatternPartEnd", "listPatternPartTwo", "am", "pm",
+ "longDateFormat", "shortDateFormat",
+ "longTimeFormat", "shortTimeFormat",
'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
"currencyIsoCode", "currencySymbol", "currencyDisplayName",
- "currencyFormat", "currencyNegativeFormat")
+ "currencyFormat", "currencyNegativeFormat",
+ )
# Day-of-Week numbering used by Qt:
__qDoW = {"mon": 1, "tue": 2, "wed": 3, "thu": 4, "fri": 5, "sat": 6, "sun": 7}
@@ -461,12 +516,15 @@ class Locale (object):
def fromXmlData(cls, lookup, calendars=('gregorian',)):
"""Constructor from the contents of XML elements.
- Single parameter, lookup, is called with the names of XML
- elements that should contain the relevant data, within a CLDR
- locale element (within a localeList element); these names are
- used for the attributes of the object constructed. Attribute
- values are obtained by suitably digesting the returned element
- texts.\n"""
+ First parameter, lookup, is called with the names of XML elements that
+ should contain the relevant data, within a QLocaleXML locale element
+ (within a localeList element); these names mostly match the attributes
+ of the object constructed. Its return must be the full text of the
+ first child DOM node element with the given name. Attribute values are
+ obtained by suitably digesting the returned element texts.
+
+ Optional second parameter, calendars, is a sequence of calendars for
+ which data is to be retrieved."""
data = {}
for k in cls.__asint:
data[k] = int(lookup(k))
@@ -474,14 +532,11 @@ class Locale (object):
for k in cls.__asdow:
data[k] = cls.__qDoW[lookup(k)]
- for k in cls.__asfmt:
- data[k] = convertFormat(lookup(k))
-
for k in cls.__astxt + tuple(cls.propsMonthDay('days')):
data['listDelim' if k == 'list' else k] = lookup(k)
for k in cls.propsMonthDay('months'):
- data[k] = dict((cal, lookup('_'.join((k, cal)))) for cal in calendars)
+ data[k] = {cal: lookup('_'.join((k, cal))) for cal in calendars}
grouping = lookup('groupSizes').split(';')
data.update(groupLeast = int(grouping[0]),
@@ -520,7 +575,7 @@ class Locale (object):
'longDateFormat', 'shortDateFormat',
'longTimeFormat', 'shortTimeFormat',
'currencyIsoCode', 'currencySymbol', 'currencyDisplayName',
- 'currencyFormat', 'currencyNegativeFormat'
+ 'currencyFormat', 'currencyNegativeFormat',
) + tuple(self.propsMonthDay('days')) + tuple(
'_'.join((k, cal))
for k in self.propsMonthDay('months')
@@ -531,97 +586,49 @@ class Locale (object):
for key in ('currencyDigits', 'currencyRounding'):
write(key, get(key))
- # Tools used by __monthNames:
- def fullName(i, name): return name
- def firstThree(i, name): return name[:3]
- def initial(i, name): return name[:1]
- def number(i, name): return str(i + 1)
- def islamicShort(i, name):
- if not name: return name
- if name == 'Shawwal': return 'Shaw.'
- words = name.split()
- if words[0].startswith('Dhu'):
- words[0] = words[0][:7] + '.'
- elif len(words[0]) > 3:
- words[0] = words[0][:3] + '.'
- return ' '.join(words)
- @staticmethod
- def __monthNames(calendars,
- known={ # Map calendar to (names, extractors...):
- # TODO: do we even need these ? CLDR's root.xml seems to
- # have them, complete with yeartype="leap" handling for
- # Hebrew's extra.
- 'gregorian': (('January', 'February', 'March', 'April', 'May', 'June', 'July',
- 'August', 'September', 'October', 'November', 'December'),
- # Extractor pairs, (plain, standalone)
- (fullName, fullName), # long
- (firstThree, firstThree), # short
- (number, initial)), # narrow
- 'persian': (('Farvardin', 'Ordibehesht', 'Khordad', 'Tir', 'Mordad',
- 'Shahrivar', 'Mehr', 'Aban', 'Azar', 'Dey', 'Bahman', 'Esfand'),
- (fullName, fullName),
- (firstThree, firstThree),
- (number, initial)),
- 'islamic': (('Muharram', 'Safar', 'Rabiʻ I', 'Rabiʻ II', 'Jumada I',
- 'Jumada II', 'Rajab', 'Shaʻban', 'Ramadan', 'Shawwal',
- 'Dhuʻl-Qiʻdah', 'Dhuʻl-Hijjah'),
- (fullName, fullName),
- (islamicShort, islamicShort),
- (number, number)),
- 'hebrew': (('Tishri', 'Heshvan', 'Kislev', 'Tevet', 'Shevat', 'Adar I',
- 'Adar', 'Nisan', 'Iyar', 'Sivan', 'Tamuz', 'Av'),
- (fullName, fullName),
- (fullName, fullName),
- (number, number)),
- },
- sizes=('long', 'short', 'narrow')):
- for cal in calendars:
- try:
- data = known[cal]
- except KeyError as e: # Need to add an entry to known, above.
- e.args += ('Unsupported calendar:', cal)
- raise
- names, get = data[0], data[1:]
- for n, size in enumerate(sizes):
- yield ('_'.join((camelCase((size, 'months')), cal)),
- ';'.join(get[n][0](i, x) for i, x in enumerate(names)))
- yield ('_'.join((camelCase(('standalone', size, 'months')), cal)),
- ';'.join(get[n][1](i, x) for i, x in enumerate(names)))
- del fullName, firstThree, initial, number, islamicShort
-
@classmethod
- def C(cls, calendars=('gregorian',),
- days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday',
- 'Thursday', 'Friday', 'Saturday'),
- quantifiers=('k', 'M', 'G', 'T', 'P', 'E')):
- """Returns an object representing the C locale."""
- return cls(cls.__monthNames(calendars),
- language='C', language_code='0', languageEndonym='',
- script='AnyScript', script_code='0',
- territory='AnyTerritory', territory_code='0', territoryEndonym='',
- groupSizes=(3, 3, 1),
- decimal='.', group=',', list=';', percent='%',
- zero='0', minus='-', plus='+', exp='e',
+ def C(cls, en_US):
+ """Returns an object representing the C locale.
+
+ Required argument, en_US, is the corresponding object for the
+ en_US locale (or the en_US_POSIX one if we ever support
+ variants). The C locale inherits from this, overriding what it
+ may need to."""
+ base = en_US.__dict__.copy()
+ # Soroush's original contribution shortened Jalali month names
+ # - contrary to CLDR, which doesn't abbreviate these in
+ # root.xml or en.xml, although some locales do, e.g. fr_CA.
+ # For compatibility with that,
+ for k in ('shortMonths_persian', 'standaloneShortMonths_persian'):
+ base[k] = ';'.join(x[:3] for x in base[k].split(';'))
+
+ return cls(base,
+ language='C', language_code='',
+ language_id=0, languageEndonym='',
+ script='AnyScript', script_code='', script_id=0,
+ territory='AnyTerritory', territory_code='',
+ territory_id=0, territoryEndonym='',
+ variant='', variant_code='', variant_id=0,
+ # CLDR has non-ASCII versions of these:
quotationStart='"', quotationEnd='"',
- alternateQuotationStart='\'', alternateQuotationEnd='\'',
- listPatternPartStart='%1, %2',
- listPatternPartMiddle='%1, %2',
- listPatternPartEnd='%1, %2',
- listPatternPartTwo='%1, %2',
- byte_unit='bytes',
- byte_si_quantified=';'.join(q + 'B' for q in quantifiers),
- byte_iec_quantified=';'.join(q.upper() + 'iB' for q in quantifiers),
- am='AM', pm='PM', firstDayOfWeek='mon',
- weekendStart='sat', weekendEnd='sun',
- longDateFormat='EEEE, d MMMM yyyy', shortDateFormat='d MMM yyyy',
- longTimeFormat='HH:mm:ss z', shortTimeFormat='HH:mm:ss',
- longDays=';'.join(days),
- shortDays=';'.join(d[:3] for d in days),
- narrowDays='7;1;2;3;4;5;6',
- standaloneLongDays=';'.join(days),
- standaloneShortDays=';'.join(d[:3] for d in days),
- standaloneNarrowDays=';'.join(d[:1] for d in days),
- currencyIsoCode='', currencySymbol='',
- currencyDisplayName='',
+ alternateQuotationStart="'", alternateQuotationEnd="'",
+ # CLDR gives 'dddd, MMMM d, yyyy', 'M/d/yy', 'h:mm:ss Ap tttt',
+ # 'h:mm Ap' with non-breaking space before Ap.
+ longDateFormat='dddd, d MMMM yyyy', shortDateFormat='d MMM yyyy',
+ longTimeFormat='HH:mm:ss t', shortTimeFormat='HH:mm:ss',
+ # CLDR has US-$ and US-style formats:
+ currencyIsoCode='', currencySymbol='', currencyDisplayName='',
currencyDigits=2, currencyRounding=1,
- currencyFormat='%1%2', currencyNegativeFormat='')
+ currencyFormat='%1%2', currencyNegativeFormat='',
+ # We may want to fall back to CLDR for some of these:
+ firstDayOfWeek='mon', # CLDR has 'sun'
+ exp='e', # CLDR has 'E'
+ listPatternPartEnd='%1, %2', # CLDR has '%1, and %2'
+ listPatternPartTwo='%1, %2', # CLDR has '%1 and %2'
+ narrowDays='7;1;2;3;4;5;6', # CLDR has letters
+ narrowMonths_gregorian='1;2;3;4;5;6;7;8;9;10;11;12', # CLDR has letters
+ standaloneNarrowMonths_persian='F;O;K;T;M;S;M;A;A;D;B;E', # CLDR has digits
+ # Keep these explicit, despite matching CLDR:
+ decimal='.', group=',', percent='%',
+ zero='0', minus='-', plus='+',
+ am='AM', pm='PM', weekendStart='sat', weekendEnd='sun')
diff --git a/util/locale_database/qlocalexml.rnc b/util/locale_database/qlocalexml.rnc
index 818aa8f9c3..f8efe9204f 100644
--- a/util/locale_database/qlocalexml.rnc
+++ b/util/locale_database/qlocalexml.rnc
@@ -16,6 +16,8 @@ start = element localeDatabase {
element scriptList { Script+ },
element territoryList { Territory+ },
element likelySubtags { LikelySubtag+ },
+ element zoneAliases { ZoneAlias+ },
+ element windowsZone { MsLandZones+, MsZoneIana+ },
element localeList { Locale+ }
}
@@ -39,6 +41,23 @@ LocaleTriplet = (
element territory { text }
)
+# TODO: xsd patterns for IANA IDs and space-joined lists of them
+ZoneAlias = element zoneAlias {
+ element alias { text },
+ element iana { text }
+}
+
+MsLandZones = element msLandZones {
+ element msid { text },
+ element territorycode { text },
+ element ianaids { text }
+}
+
+MsZoneIana = element msZoneIana {
+ element msid { text },
+ element iana { text }
+}
+
WeekDay = ("sun" | "mon" | "tue" | "wed" | "thu" | "fri" | "sat")
Digit = xsd:string { pattern = "\d" }
Punctuation = xsd:string { pattern = "\p{P}" }
diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py
index 717c567111..5bc9dd92f2 100755
--- a/util/locale_database/qlocalexml2cpp.py
+++ b/util/locale_database/qlocalexml2cpp.py
@@ -20,8 +20,25 @@ from pathlib import Path
from typing import Optional
from qlocalexml import QLocaleXmlReader
-from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor, qtbase_root
+from localetools import *
from iso639_3 import LanguageCodeData
+from zonedata import utcIdList, windowsIdList
+
+
+# Sanity check the zone data:
+
+# Offsets of the windows tables, in minutes, where whole numbers:
+winOff = set(m for m, s in (divmod(v, 60) for k, v in windowsIdList) if s == 0)
+# The UTC±HH:mm forms of the non-zero offsets:
+winUtc = set(f'UTC-{h:02}:{m:02}'
+ for h, m in (divmod(-o, 60) for o in winOff if o < 0)
+ ).union(f'UTC+{h:02}:{m:02}'
+ for h, m in (divmod(o, 60) for o in winOff if o > 0))
+# All such offsets should be represented by entries in utcIdList:
+newUtc = winUtc.difference(utcIdList)
+assert not newUtc, (
+ 'Please add missing UTC-offset zones to to zonedata.utcIdList', newUtc)
+
class LocaleKeySorter:
"""Sort-ordering representation of a locale key.
@@ -47,39 +64,64 @@ class LocaleKeySorter:
# TODO: should we compare territory before or after script ?
return (key[0], self.foreign(key)) + key[1:]
-class StringDataToken:
- def __init__(self, index, length, bits):
+class ByteArrayData:
+ # Only for use with ASCII data, e.g. IANA IDs.
+ def __init__(self):
+ self.data, self.hash = [], {}
+
+ def append(self, s):
+ assert s.isascii(), s
+ s += '\0'
+ if s in self.hash:
+ return self.hash[s]
+
+ index = len(self.data)
if index > 0xffff:
- raise ValueError(f'Start-index ({index}) exceeds the uint16 range!')
- if length >= (1 << bits):
- raise ValueError(f'Data size ({length}) exceeds the {bits}-bit range!')
+ raise Error(f'Index ({index}) outside the uint16 range !')
+ self.hash[s] = index
+ self.data += unicode2hex(s)
+ return index
+
+ def write(self, out, name):
+ out(f'\nstatic constexpr char {name}[] = {{\n')
+ out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ')
+ # All data is ASCII, so only two-digit hex is ever needed.
+ out('\n};\n')
+
+class StringDataToken:
+ def __init__(self, index, length, lenbits, indbits):
+ if index >= (1 << indbits):
+ raise ValueError(f'Start-index ({index}) exceeds the {indbits}-bit range!')
+ if length >= (1 << lenbits):
+ raise ValueError(f'Data size ({length}) exceeds the {lenbits}-bit range!')
self.index = index
self.length = length
class StringData:
- def __init__(self, name):
+ def __init__(self, name, lenbits = 8, indbits = 16):
self.data = []
self.hash = {}
self.name = name
self.text = '' # Used in quick-search for matches in data
+ self.__bits = lenbits, indbits
- def append(self, s, bits = 8):
+ def append(self, s):
try:
token = self.hash[s]
except KeyError:
- token = self.__store(s, bits)
+ token = self.__store(s)
self.hash[s] = token
return token
- def __store(self, s, bits):
+ def __store(self, s):
"""Add string s to known data.
Seeks to avoid duplication, where possible.
For example, short-forms may be prefixes of long-forms.
"""
if not s:
- return StringDataToken(0, 0, bits)
+ return StringDataToken(0, 0, *self.__bits)
ucs2 = unicode2hex(s)
try:
index = self.text.index(s) - 1
@@ -97,17 +139,19 @@ class StringData:
assert index >= 0
try:
- return StringDataToken(index, len(ucs2), bits)
+ return StringDataToken(index, len(ucs2), *self.__bits)
except ValueError as e:
e.args += (self.name, s)
raise
def write(self, fd):
- if len(self.data) > 0xffff:
- raise ValueError(f'Data is too big ({len(self.data)}) for quint16 index to its end!',
+ indbits = self.__bits[1]
+ if len(self.data) >= (1 << indbits):
+ raise ValueError(f'Data is too big ({len(self.data)}) '
+ f'for {indbits}-bit index to its end!',
self.name)
fd.write(f"\nstatic constexpr char16_t {self.name}[] = {{\n")
- fd.write(wrap_list(self.data))
+ fd.write(wrap_list(self.data, 12)) # 12 == 100 // len('0xhhhh, ')
fd.write("\n};\n")
def currencyIsoCodeData(s):
@@ -136,6 +180,92 @@ class LocaleSourceEditor (SourceFileEditor):
""")
+class TimeZoneDataWriter (LocaleSourceEditor):
+ def __init__(self, path: Path, temp: Path, version: str):
+ super().__init__(path, temp, version)
+ self.__ianaTable = ByteArrayData() # Single IANA IDs
+ self.__ianaListTable = ByteArrayData() # Space-joined lists of IDs
+ self.__windowsTable = ByteArrayData() # Windows names for zones
+ self.__windowsList = sorted(windowsIdList,
+ key=lambda p: p[0].lower())
+ self.windowsKey = {name: (key, off) for key, (name, off)
+ in enumerate(self.__windowsList, 1)}
+
+ def utcTable(self):
+ offsetMap, out = {}, self.writer.write
+ for name in utcIdList:
+ offset = self.__offsetOf(name)
+ offsetMap[offset] = offsetMap.get(offset, ()) + (name,)
+
+ # Write UTC ID key table
+ out('// IANA ID Index, UTC Offset\n')
+ out('static constexpr UtcData utcDataTable[] = {\n')
+ for offset in sorted(offsetMap.keys()): # Sort so C++ can binary-chop.
+ names = offsetMap[offset];
+ joined = self.__ianaListTable.append(' '.join(names))
+ out(f' {{ {joined:6d},{offset:6d} }}, // {names[0]}\n')
+ out('};\n')
+
+ def aliasToIana(self, pairs):
+ out, store = self.writer.write, self.__ianaTable.append
+
+ out('// Alias ID Index, Alias ID Index\n')
+ out('static constexpr AliasData aliasMappingTable[] = {\n')
+ for name, iana in pairs: # They're ready-sorted
+ assert name != iana, (alias, iana) # Filtered out in QLocaleXmlWriter
+ out(f' {{ {store(name):6d},{store(iana):6d} }},'
+ f' // {name} -> {iana}\n')
+ out('};\n\n')
+
+ def msToIana(self, pairs):
+ out, winStore = self.writer.write, self.__windowsTable.append
+ ianaStore = self.__ianaListTable.append # TODO: Should be __ianaTable
+ alias = dict(pairs) # {MS name: IANA ID}
+
+ out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n')
+ out('static constexpr WindowsData windowsDataTable[] = {\n')
+ # Sorted by Windows ID key:
+
+ for index, (name, offset) in enumerate(self.__windowsList, 1):
+ out(f' {{ {index:6d},{winStore(name):6d},'
+ f'{ianaStore(alias[name]):6d},{offset:6d} }}, // {name}\n')
+ out('};\n\n')
+
+ def msLandIanas(self, triples): # (MS name, territory code, IANA list)
+ out, store = self.writer.write, self.__ianaListTable.append
+ from enumdata import territory_map
+ landKey = {code: (i, name) for i, (name, code) in territory_map.items()}
+ seq = sorted((self.windowsKey[name][0], landKey[land][0], name, landKey[land][1], ianas)
+ for name, land, ianas in triples)
+
+ out('// Windows ID Key, Territory Enum, IANA ID Index\n')
+ out('static constexpr ZoneData zoneDataTable[] = {\n')
+ # Sorted by (Windows ID Key, territory enum)
+ for winId, landId, name, land, ianas in seq:
+ out(f' {{ {winId:6d},{landId:6d},{store(ianas):6d} }},'
+ f' // {name} / {land}\n')
+ out('};\n\n')
+
+ def writeTables(self):
+ self.__windowsTable.write(self.writer.write, 'windowsIdData')
+ # TODO: these are misnamed, entries in the first are lists,
+ # those in the next are single IANA IDs
+ self.__ianaListTable.write(self.writer.write, 'ianaIdData')
+ self.__ianaTable.write(self.writer.write, 'aliasIdData')
+
+ # Implementation details:
+ @staticmethod
+ def __offsetOf(utcName):
+ "Maps a UTC±HH:mm name to its offset in seconds"
+ assert utcName.startswith('UTC')
+ if len(utcName) == 3:
+ return 0
+ assert utcName[3] in '+-', utcName
+ sign = -1 if utcName[3] == '-' else 1
+ assert len(utcName) == 9 and utcName[6] == ':', utcName
+ hour, mins = int(utcName[4:6]), int(utcName[-2:])
+ return sign * (hour * 60 + mins) * 60
+
class LocaleDataWriter (LocaleSourceEditor):
def likelySubtags(self, likely):
# First sort likely, so that we can use binary search in C++
@@ -283,7 +413,7 @@ class LocaleDataWriter (LocaleSourceEditor):
locale.minus, locale.plus, locale.exp,
locale.quotationStart, locale.quotationEnd,
locale.alternateQuotationStart, locale.alternateQuotationEnd)) +
- tuple (date_format_data.append(f) for f in # 2 entries:
+ tuple(date_format_data.append(f) for f in # 2 entries:
(locale.longDateFormat, locale.shortDateFormat)) +
tuple(time_format_data.append(f) for f in # 2 entries:
(locale.longTimeFormat, locale.shortTimeFormat)) +
@@ -337,7 +467,11 @@ class LocaleDataWriter (LocaleSourceEditor):
for key, value in book.items():
if key == 0:
continue
- out(f'"{value[0]}\\0"\n')
+ enum, name = value[0], value[-1]
+ if names_clash(name, enum):
+ out(f'"{name}\\0" // {enum}\n')
+ else:
+ out(f'"{name}\\0"\n') # Automagically utf-8 encoded
out(';\n\n')
out(f'static constexpr quint16 {form}_name_index[] = {{\n')
@@ -346,9 +480,8 @@ class LocaleDataWriter (LocaleSourceEditor):
for key, value in book.items():
if key == 0:
continue
- name = value[0]
- out(f'{index:6d}, // {name}\n')
- index += len(name) + 1
+ out(f'{index:6d}, // {value[0]}\n')
+ index += len(value[-1].encode('utf-8')) + 1
out('};\n\n')
@staticmethod
@@ -411,7 +544,7 @@ class CalendarDataWriter (LocaleSourceEditor):
+ ','.join(('{:6d}',) * 3 + ('{:5d}',) * 6 + ('{:3d}',) * 6)
+ ' }},').format
def write(self, calendar, locales, names):
- months_data = StringData('months_data')
+ months_data = StringData('months_data', 16)
self.writer.write('static constexpr QCalendarLocale locale_data[] = {\n')
self.writer.write(
@@ -435,11 +568,10 @@ class CalendarDataWriter (LocaleSourceEditor):
# Sequence of StringDataToken:
try:
# Twelve long month names can add up to more than 256 (e.g. kde_TZ: 264)
- ranges = (tuple(months_data.append(m[calendar], 16) for m in
- (locale.standaloneLongMonths, locale.longMonths)) +
- tuple(months_data.append(m[calendar]) for m in
- (locale.standaloneShortMonths, locale.shortMonths,
- locale.standaloneNarrowMonths, locale.narrowMonths)))
+ ranges = tuple(months_data.append(m[calendar]) for m in
+ (locale.standaloneLongMonths, locale.longMonths,
+ locale.standaloneShortMonths, locale.shortMonths,
+ locale.standaloneNarrowMonths, locale.narrowMonths))
except ValueError as e:
e.args += (locale.language, locale.script, locale.territory)
raise
@@ -455,10 +587,27 @@ class CalendarDataWriter (LocaleSourceEditor):
self.writer.write('};\n')
months_data.write(self.writer)
+
+class TestLocaleWriter (LocaleSourceEditor):
+ def localeList(self, locales):
+ self.writer.write('const LocaleListItem g_locale_list[] = {\n')
+ from enumdata import language_map, territory_map
+ # TODO: update testlocales/ to include script.
+ # For now, only mention each (lang, land) pair once:
+ pairs = set((lang, land) for lang, script, land in locales)
+ for lang, script, land in locales:
+ if (lang, land) in pairs:
+ pairs.discard((lang, land))
+ langName = language_map[lang][0]
+ landName = territory_map[land][0]
+ self.writer.write(f' {{ {lang:6d},{land:6d} }}, // {langName}/{landName}\n')
+ self.writer.write('};\n\n')
+
+
class LocaleHeaderWriter (SourceFileEditor):
- def __init__(self, path, temp, dupes):
+ def __init__(self, path, temp, enumify):
super().__init__(path, temp)
- self.__dupes = dupes
+ self.__enumify = enumify
def languages(self, languages):
self.__enum('Language', languages, self.__language)
@@ -483,20 +632,10 @@ class LocaleHeaderWriter (SourceFileEditor):
if suffix is None:
suffix = name
- out, dupes = self.writer.write, self.__dupes
+ out, enumify = self.writer.write, self.__enumify
out(f' enum {name} : ushort {{\n')
for key, value in book.items():
- member = value[0].replace('-', ' ')
- if name == 'Script':
- # Don't .capitalize() as some names are already camel-case (see enumdata.py):
- member = ''.join(word[0].upper() + word[1:] for word in member.split())
- if not member.endswith('Script'):
- member += 'Script'
- if member in dupes:
- raise Error(f'The script name "{member}" is messy')
- else:
- member = ''.join(member.split())
- member = member + suffix if member in dupes else member
+ member = enumify(value[0], suffix)
out(f' {member} = {key},\n')
out('\n '
@@ -511,17 +650,29 @@ class LocaleHeaderWriter (SourceFileEditor):
out('\n };\n')
-def main(out, err):
+def main(argv, out, err):
+ """Updates QLocale's CLDR data from a QLocaleXML file.
+
+ Takes sys.argv, sys.stdout, sys.stderr (or equivalents) as
+ arguments. In argv[1:] it expects the QLocaleXML file as first
+ parameter and the ISO 639-3 data table as second
+ parameter. Accepts the root of the qtbase checkout as third
+ parameter (default is inferred from this script's path) and a
+ --calendars option to select which calendars to support (all
+ available by default).
+
+ Updates various src/corelib/t*/q*_data_p.h files within the qtbase
+ checkout to contain data extracted from the QLocaleXML file."""
calendars_map = {
# CLDR name: Qt file name fragment
'gregorian': 'roman',
'persian': 'jalali',
'islamic': 'hijri',
- # 'hebrew': 'hebrew'
}
all_calendars = list(calendars_map.keys())
parser = argparse.ArgumentParser(
+ prog=Path(argv[0]).name,
description='Generate C++ code from CLDR data in QLocaleXML form.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('input_file', help='input XML file name',
@@ -533,7 +684,7 @@ def main(out, err):
parser.add_argument('--calendars', help='select calendars to emit data for',
nargs='+', metavar='CALENDAR',
choices=all_calendars, default=all_calendars)
- args = parser.parse_args()
+ args = parser.parse_args(argv[1:])
qlocalexml = args.input_file
qtsrcdir = Path(args.qtbase_path)
@@ -581,7 +732,7 @@ def main(out, err):
# qlocale.h
try:
with LocaleHeaderWriter(qtsrcdir.joinpath('src/corelib/text/qlocale.h'),
- qtsrcdir, reader.dupes) as writer:
+ qtsrcdir, reader.enumify) as writer:
writer.languages(reader.languages)
writer.scripts(reader.scripts)
writer.territories(reader.territories)
@@ -601,8 +752,31 @@ def main(out, err):
err.write(f'\nError updating qlocale.h: {e}\n')
return 1
+ # Locale-independent timezone data
+ try:
+ with TimeZoneDataWriter(qtsrcdir.joinpath(
+ 'src/corelib/time/qtimezoneprivate_data_p.h'),
+ qtsrcdir, reader.cldrVersion) as writer:
+ writer.aliasToIana(reader.aliasToIana())
+ writer.msLandIanas(reader.msLandIanas())
+ writer.msToIana(reader.msToIana())
+ writer.utcTable()
+ writer.writeTables()
+ except Exception as e:
+ err.write(f'\nError updating qtimezoneprivate_data_p.h: {e}\n')
+ return 1
+
+ # ./testlocales/localemodel.cpp
+ try:
+ path = 'util/locale_database/testlocales/localemodel.cpp'
+ with TestLocaleWriter(qtsrcdir.joinpath(path), qtsrcdir,
+ reader.cldrVersion) as test:
+ test.localeList(locale_keys)
+ except Exception as e:
+ err.write(f'\nError updating localemodel.cpp: {e}\n')
+
return 0
if __name__ == "__main__":
import sys
- sys.exit(main(sys.stdout, sys.stderr))
+ sys.exit(main(sys.argv, sys.stdout, sys.stderr))
diff --git a/util/locale_database/testlocales/localemodel.cpp b/util/locale_database/testlocales/localemodel.cpp
index 9642bb36fa..7f0150c7e0 100644
--- a/util/locale_database/testlocales/localemodel.cpp
+++ b/util/locale_database/testlocales/localemodel.cpp
@@ -14,236 +14,658 @@ struct LocaleListItem
int territory;
};
+// GENERATED PART STARTS HERE
+
+/*
+ This part of the file was generated on 2024-04-22 from the
+ Common Locale Data Repository v44.1
+
+ http://www.unicode.org/cldr/
+
+ Do not edit this section: instead regenerate it using
+ cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or
+ edited) CLDR data; see qtbase/util/locale_database/.
+*/
+
const LocaleListItem g_locale_list[] = {
{ 1, 0 }, // C/AnyTerritory
- { 3, 69 }, // Afan/Ethiopia
- { 3, 111 }, // Afan/Kenya
- { 4, 59 }, // Afar/Djibouti
- { 4, 67 }, // Afar/Eritrea
- { 4, 69 }, // Afar/Ethiopia
- { 5, 195 }, // Afrikaans/SouthAfrica
- { 5, 148 }, // Afrikaans/Namibia
- { 6, 2 }, // Albanian/Albania
- { 7, 69 }, // Amharic/Ethiopia
- { 8, 186 }, // Arabic/SaudiArabia
- { 8, 3 }, // Arabic/Algeria
- { 8, 17 }, // Arabic/Bahrain
- { 8, 64 }, // Arabic/Egypt
- { 8, 103 }, // Arabic/Iraq
- { 8, 109 }, // Arabic/Jordan
- { 8, 115 }, // Arabic/Kuwait
- { 8, 119 }, // Arabic/Lebanon
- { 8, 122 }, // Arabic/LibyanArabJamahiriya
- { 8, 145 }, // Arabic/Morocco
- { 8, 162 }, // Arabic/Oman
- { 8, 175 }, // Arabic/Qatar
- { 8, 201 }, // Arabic/Sudan
- { 8, 207 }, // Arabic/SyrianArabRepublic
- { 8, 216 }, // Arabic/Tunisia
- { 8, 223 }, // Arabic/UnitedArabEmirates
- { 8, 237 }, // Arabic/Yemen
- { 9, 11 }, // Armenian/Armenia
- { 10, 100 }, // Assamese/India
- { 12, 15 }, // Azerbaijani/Azerbaijan
- { 14, 197 }, // Basque/Spain
- { 15, 18 }, // Bengali/Bangladesh
- { 15, 100 }, // Bengali/India
- { 16, 25 }, // Bhutani/Bhutan
- { 20, 33 }, // Bulgarian/Bulgaria
- { 22, 20 }, // Byelorussian/Belarus
- { 23, 36 }, // Cambodian/Cambodia
- { 24, 197 }, // Catalan/Spain
- { 25, 44 }, // Chinese/China
- { 25, 97 }, // Chinese/HongKong
- { 25, 126 }, // Chinese/Macau
- { 25, 190 }, // Chinese/Singapore
- { 25, 208 }, // Chinese/Taiwan
- { 27, 54 }, // Croatian/Croatia
- { 28, 57 }, // Czech/CzechRepublic
- { 29, 58 }, // Danish/Denmark
- { 30, 151 }, // Dutch/Netherlands
- { 30, 21 }, // Dutch/Belgium
- { 31, 225 }, // English/UnitedStates
- { 31, 4 }, // English/AmericanSamoa
- { 31, 13 }, // English/Australia
- { 31, 21 }, // English/Belgium
- { 31, 22 }, // English/Belize
- { 31, 28 }, // English/Botswana
- { 31, 38 }, // English/Canada
- { 31, 89 }, // English/Guam
- { 31, 97 }, // English/HongKong
- { 31, 100 }, // English/India
- { 31, 104 }, // English/Ireland
- { 31, 107 }, // English/Jamaica
- { 31, 133 }, // English/Malta
- { 31, 134 }, // English/MarshallIslands
- { 31, 148 }, // English/Namibia
- { 31, 154 }, // English/NewZealand
- { 31, 160 }, // English/NorthernMarianaIslands
- { 31, 163 }, // English/Pakistan
- { 31, 170 }, // English/Philippines
- { 31, 190 }, // English/Singapore
- { 31, 195 }, // English/SouthAfrica
- { 31, 215 }, // English/TrinidadAndTobago
- { 31, 224 }, // English/UnitedKingdom
- { 31, 226 }, // English/UnitedStatesMinorOutlyingIslands
- { 31, 234 }, // English/USVirginIslands
- { 31, 240 }, // English/Zimbabwe
- { 33, 68 }, // Estonian/Estonia
- { 34, 71 }, // Faroese/FaroeIslands
- { 36, 73 }, // Finnish/Finland
- { 37, 74 }, // French/France
- { 37, 21 }, // French/Belgium
- { 37, 38 }, // French/Canada
- { 37, 125 }, // French/Luxembourg
- { 37, 142 }, // French/Monaco
- { 37, 206 }, // French/Switzerland
- { 40, 197 }, // Galician/Spain
- { 41, 81 }, // Georgian/Georgia
- { 42, 82 }, // German/Germany
- { 42, 14 }, // German/Austria
- { 42, 21 }, // German/Belgium
- { 42, 123 }, // German/Liechtenstein
- { 42, 125 }, // German/Luxembourg
- { 42, 206 }, // German/Switzerland
- { 43, 85 }, // Greek/Greece
- { 43, 56 }, // Greek/Cyprus
- { 44, 86 }, // Greenlandic/Greenland
- { 46, 100 }, // Gujarati/India
- { 47, 83 }, // Hausa/Ghana
- { 47, 156 }, // Hausa/Niger
- { 47, 157 }, // Hausa/Nigeria
- { 48, 105 }, // Hebrew/Israel
- { 49, 100 }, // Hindi/India
- { 50, 98 }, // Hungarian/Hungary
- { 51, 99 }, // Icelandic/Iceland
- { 52, 101 }, // Indonesian/Indonesia
- { 57, 104 }, // Irish/Ireland
- { 58, 106 }, // Italian/Italy
- { 58, 206 }, // Italian/Switzerland
- { 59, 108 }, // Japanese/Japan
- { 61, 100 }, // Kannada/India
- { 63, 110 }, // Kazakh/Kazakhstan
- { 64, 179 }, // Kinyarwanda/Rwanda
- { 65, 116 }, // Kirghiz/Kyrgyzstan
- { 66, 114 }, // Korean/RepublicOfKorea
- { 67, 102 }, // Kurdish/Iran
- { 67, 103 }, // Kurdish/Iraq
- { 67, 207 }, // Kurdish/SyrianArabRepublic
- { 67, 217 }, // Kurdish/Turkey
- { 69, 117 }, // Laothian/Lao
- { 71, 118 }, // Latvian/Latvia
- { 72, 49 }, // Lingala/DemocraticRepublicOfCongo
- { 72, 50 }, // Lingala/PeoplesRepublicOfCongo
- { 73, 124 }, // Lithuanian/Lithuania
- { 74, 127 }, // Macedonian/Macedonia
- { 76, 130 }, // Malay/Malaysia
- { 76, 32 }, // Malay/BruneiDarussalam
- { 77, 100 }, // Malayalam/India
- { 78, 133 }, // Maltese/Malta
- { 80, 100 }, // Marathi/India
- { 82, 143 }, // Mongolian/Mongolia
- { 84, 150 }, // Nepali/Nepal
- { 85, 161 }, // Norwegian/Norway
- { 87, 100 }, // Oriya/India
- { 88, 1 }, // Pashto/Afghanistan
- { 89, 102 }, // Persian/Iran
- { 89, 1 }, // Persian/Afghanistan
- { 90, 172 }, // Polish/Poland
- { 91, 173 }, // Portuguese/Portugal
- { 91, 30 }, // Portuguese/Brazil
- { 92, 100 }, // Punjabi/India
- { 92, 163 }, // Punjabi/Pakistan
- { 95, 177 }, // Romanian/Romania
- { 96, 178 }, // Russian/RussianFederation
- { 96, 222 }, // Russian/Ukraine
- { 99, 100 }, // Sanskrit/India
- { 100, 241 }, // Serbian/SerbiaAndMontenegro
- { 100, 27 }, // Serbian/BosniaAndHerzegowina
- { 100, 238 }, // Serbian/Yugoslavia
- { 101, 241 }, // SerboCroatian/SerbiaAndMontenegro
- { 101, 27 }, // SerboCroatian/BosniaAndHerzegowina
- { 101, 238 }, // SerboCroatian/Yugoslavia
- { 102, 195 }, // Sesotho/SouthAfrica
- { 103, 195 }, // Setswana/SouthAfrica
- { 107, 195 }, // Siswati/SouthAfrica
- { 108, 191 }, // Slovak/Slovakia
- { 109, 192 }, // Slovenian/Slovenia
- { 110, 194 }, // Somali/Somalia
- { 110, 59 }, // Somali/Djibouti
- { 110, 69 }, // Somali/Ethiopia
- { 110, 111 }, // Somali/Kenya
- { 111, 197 }, // Spanish/Spain
- { 111, 10 }, // Spanish/Argentina
- { 111, 26 }, // Spanish/Bolivia
- { 111, 43 }, // Spanish/Chile
- { 111, 47 }, // Spanish/Colombia
- { 111, 52 }, // Spanish/CostaRica
- { 111, 61 }, // Spanish/DominicanRepublic
- { 111, 63 }, // Spanish/Ecuador
- { 111, 65 }, // Spanish/ElSalvador
- { 111, 90 }, // Spanish/Guatemala
- { 111, 96 }, // Spanish/Honduras
- { 111, 139 }, // Spanish/Mexico
- { 111, 155 }, // Spanish/Nicaragua
- { 111, 166 }, // Spanish/Panama
- { 111, 168 }, // Spanish/Paraguay
- { 111, 169 }, // Spanish/Peru
- { 111, 174 }, // Spanish/PuertoRico
- { 111, 225 }, // Spanish/UnitedStates
- { 111, 227 }, // Spanish/Uruguay
- { 111, 231 }, // Spanish/Venezuela
- { 113, 111 }, // Swahili/Kenya
- { 113, 210 }, // Swahili/Tanzania
- { 114, 205 }, // Swedish/Sweden
- { 114, 73 }, // Swedish/Finland
- { 116, 209 }, // Tajik/Tajikistan
- { 117, 100 }, // Tamil/India
- { 118, 178 }, // Tatar/RussianFederation
- { 119, 100 }, // Telugu/India
- { 120, 211 }, // Thai/Thailand
- { 122, 67 }, // Tigrinya/Eritrea
- { 122, 69 }, // Tigrinya/Ethiopia
- { 124, 195 }, // Tsonga/SouthAfrica
- { 125, 217 }, // Turkish/Turkey
- { 129, 222 }, // Ukrainian/Ukraine
- { 130, 100 }, // Urdu/India
- { 130, 163 }, // Urdu/Pakistan
- { 131, 228 }, // Uzbek/Uzbekistan
- { 131, 1 }, // Uzbek/Afghanistan
- { 132, 232 }, // Vietnamese/VietNam
- { 134, 224 }, // Welsh/UnitedKingdom
- { 136, 195 }, // Xhosa/SouthAfrica
- { 138, 157 }, // Yoruba/Nigeria
- { 140, 195 }, // Zulu/SouthAfrica
- { 141, 161 }, // Nynorsk/Norway
- { 142, 27 }, // Bosnian/BosniaAndHerzegowina
- { 143, 131 }, // Divehi/Maldives
- { 144, 224 }, // Manx/UnitedKingdom
- { 145, 224 }, // Cornish/UnitedKingdom
- { 146, 83 }, // Akan/Ghana
- { 147, 100 }, // Konkani/India
- { 148, 83 }, // Ga/Ghana
- { 149, 157 }, // Igbo/Nigeria
- { 150, 111 }, // Kamba/Kenya
- { 151, 207 }, // Syriac/SyrianArabRepublic
- { 152, 67 }, // Blin/Eritrea
- { 153, 67 }, // Geez/Eritrea
- { 153, 69 }, // Geez/Ethiopia
- { 154, 157 }, // Koro/Nigeria
- { 155, 69 }, // Sidamo/Ethiopia
- { 156, 157 }, // Atsam/Nigeria
- { 157, 67 }, // Tigre/Eritrea
- { 158, 157 }, // Jju/Nigeria
- { 159, 106 }, // Friulian/Italy
- { 160, 195 }, // Venda/SouthAfrica
- { 161, 83 }, // Ewe/Ghana
- { 161, 212 }, // Ewe/Togo
- { 163, 225 }, // Hawaiian/UnitedStates
- { 164, 157 }, // Tyap/Nigeria
- { 165, 129 }, // Chewa/Malawi
+ { 2, 90 }, // Abkhazian/Georgia
+ { 3, 77 }, // Afar/Ethiopia
+ { 3, 67 }, // Afar/Djibouti
+ { 3, 74 }, // Afar/Eritrea
+ { 4, 216 }, // Afrikaans/South Africa
+ { 4, 162 }, // Afrikaans/Namibia
+ { 5, 40 }, // Aghem/Cameroon
+ { 6, 92 }, // Akan/Ghana
+ { 8, 40 }, // Akoose/Cameroon
+ { 9, 3 }, // Albanian/Albania
+ { 9, 126 }, // Albanian/Kosovo
+ { 9, 140 }, // Albanian/Macedonia
+ { 11, 77 }, // Amharic/Ethiopia
+ { 14, 71 }, // Arabic/Egypt
+ { 14, 4 }, // Arabic/Algeria
+ { 14, 19 }, // Arabic/Bahrain
+ { 14, 48 }, // Arabic/Chad
+ { 14, 55 }, // Arabic/Comoros
+ { 14, 67 }, // Arabic/Djibouti
+ { 14, 74 }, // Arabic/Eritrea
+ { 14, 113 }, // Arabic/Iraq
+ { 14, 116 }, // Arabic/Israel
+ { 14, 122 }, // Arabic/Jordan
+ { 14, 127 }, // Arabic/Kuwait
+ { 14, 132 }, // Arabic/Lebanon
+ { 14, 135 }, // Arabic/Libya
+ { 14, 149 }, // Arabic/Mauritania
+ { 14, 159 }, // Arabic/Morocco
+ { 14, 176 }, // Arabic/Oman
+ { 14, 180 }, // Arabic/Palestinian Territories
+ { 14, 190 }, // Arabic/Qatar
+ { 14, 205 }, // Arabic/Saudi Arabia
+ { 14, 215 }, // Arabic/Somalia
+ { 14, 219 }, // Arabic/South Sudan
+ { 14, 222 }, // Arabic/Sudan
+ { 14, 227 }, // Arabic/Syria
+ { 14, 238 }, // Arabic/Tunisia
+ { 14, 245 }, // Arabic/United Arab Emirates
+ { 14, 257 }, // Arabic/Western Sahara
+ { 14, 258 }, // Arabic/world
+ { 14, 259 }, // Arabic/Yemen
+ { 15, 220 }, // Aragonese/Spain
+ { 17, 12 }, // Armenian/Armenia
+ { 18, 110 }, // Assamese/India
+ { 19, 220 }, // Asturian/Spain
+ { 20, 230 }, // Asu/Tanzania
+ { 21, 169 }, // Atsam/Nigeria
+ { 25, 17 }, // Azerbaijani/Azerbaijan
+ { 25, 112 }, // Azerbaijani/Iran
+ { 25, 113 }, // Azerbaijani/Iraq
+ { 25, 239 }, // Azerbaijani/Turkey
+ { 26, 40 }, // Bafia/Cameroon
+ { 28, 145 }, // Bambara/Mali
+ { 30, 20 }, // Bangla/Bangladesh
+ { 30, 110 }, // Bangla/India
+ { 31, 40 }, // Basaa/Cameroon
+ { 32, 193 }, // Bashkir/Russia
+ { 33, 220 }, // Basque/Spain
+ { 35, 22 }, // Belarusian/Belarus
+ { 36, 260 }, // Bemba/Zambia
+ { 37, 230 }, // Bena/Tanzania
+ { 38, 110 }, // Bhojpuri/India
+ { 40, 74 }, // Blin/Eritrea
+ { 41, 110 }, // Bodo/India
+ { 42, 29 }, // Bosnian/Bosnia and Herzegovina
+ { 43, 84 }, // Breton/France
+ { 45, 36 }, // Bulgarian/Bulgaria
+ { 46, 161 }, // Burmese/Myanmar
+ { 47, 107 }, // Cantonese/Hong Kong
+ { 47, 50 }, // Cantonese/China
+ { 48, 220 }, // Catalan/Spain
+ { 48, 6 }, // Catalan/Andorra
+ { 48, 84 }, // Catalan/France
+ { 48, 117 }, // Catalan/Italy
+ { 49, 185 }, // Cebuano/Philippines
+ { 50, 159 }, // Central Atlas Tamazight/Morocco
+ { 51, 113 }, // Central Kurdish/Iraq
+ { 51, 112 }, // Central Kurdish/Iran
+ { 52, 20 }, // Chakma/Bangladesh
+ { 52, 110 }, // Chakma/India
+ { 54, 193 }, // Chechen/Russia
+ { 55, 248 }, // Cherokee/United States
+ { 56, 248 }, // Chickasaw/United States
+ { 57, 243 }, // Chiga/Uganda
+ { 58, 50 }, // Chinese/China
+ { 58, 107 }, // Chinese/Hong Kong
+ { 58, 139 }, // Chinese/Macao
+ { 58, 210 }, // Chinese/Singapore
+ { 58, 228 }, // Chinese/Taiwan
+ { 59, 193 }, // Church/Russia
+ { 60, 193 }, // Chuvash/Russia
+ { 61, 91 }, // Colognian/Germany
+ { 63, 246 }, // Cornish/United Kingdom
+ { 64, 84 }, // Corsican/France
+ { 66, 60 }, // Croatian/Croatia
+ { 66, 29 }, // Croatian/Bosnia and Herzegovina
+ { 67, 64 }, // Czech/Czechia
+ { 68, 65 }, // Danish/Denmark
+ { 68, 95 }, // Danish/Greenland
+ { 69, 144 }, // Divehi/Maldives
+ { 70, 110 }, // Dogri/India
+ { 71, 40 }, // Duala/Cameroon
+ { 72, 165 }, // Dutch/Netherlands
+ { 72, 13 }, // Dutch/Aruba
+ { 72, 23 }, // Dutch/Belgium
+ { 72, 44 }, // Dutch/Caribbean Netherlands
+ { 72, 62 }, // Dutch/Curacao
+ { 72, 211 }, // Dutch/Sint Maarten
+ { 72, 223 }, // Dutch/Suriname
+ { 73, 27 }, // Dzongkha/Bhutan
+ { 74, 124 }, // Embu/Kenya
+ { 75, 248 }, // English/United States
+ { 75, 5 }, // English/American Samoa
+ { 75, 8 }, // English/Anguilla
+ { 75, 10 }, // English/Antigua and Barbuda
+ { 75, 15 }, // English/Australia
+ { 75, 16 }, // English/Austria
+ { 75, 18 }, // English/Bahamas
+ { 75, 21 }, // English/Barbados
+ { 75, 23 }, // English/Belgium
+ { 75, 24 }, // English/Belize
+ { 75, 26 }, // English/Bermuda
+ { 75, 30 }, // English/Botswana
+ { 75, 33 }, // English/British Indian Ocean Territory
+ { 75, 34 }, // English/British Virgin Islands
+ { 75, 38 }, // English/Burundi
+ { 75, 40 }, // English/Cameroon
+ { 75, 41 }, // English/Canada
+ { 75, 45 }, // English/Cayman Islands
+ { 75, 51 }, // English/Christmas Island
+ { 75, 53 }, // English/Cocos Islands
+ { 75, 58 }, // English/Cook Islands
+ { 75, 63 }, // English/Cyprus
+ { 75, 65 }, // English/Denmark
+ { 75, 66 }, // English/Diego Garcia
+ { 75, 68 }, // English/Dominica
+ { 75, 74 }, // English/Eritrea
+ { 75, 76 }, // English/Eswatini
+ { 75, 78 }, // English/Europe
+ { 75, 80 }, // English/Falkland Islands
+ { 75, 82 }, // English/Fiji
+ { 75, 83 }, // English/Finland
+ { 75, 89 }, // English/Gambia
+ { 75, 91 }, // English/Germany
+ { 75, 92 }, // English/Ghana
+ { 75, 93 }, // English/Gibraltar
+ { 75, 96 }, // English/Grenada
+ { 75, 98 }, // English/Guam
+ { 75, 100 }, // English/Guernsey
+ { 75, 103 }, // English/Guyana
+ { 75, 107 }, // English/Hong Kong
+ { 75, 110 }, // English/India
+ { 75, 111 }, // English/Indonesia
+ { 75, 114 }, // English/Ireland
+ { 75, 115 }, // English/Isle of Man
+ { 75, 116 }, // English/Israel
+ { 75, 119 }, // English/Jamaica
+ { 75, 121 }, // English/Jersey
+ { 75, 124 }, // English/Kenya
+ { 75, 125 }, // English/Kiribati
+ { 75, 133 }, // English/Lesotho
+ { 75, 134 }, // English/Liberia
+ { 75, 139 }, // English/Macao
+ { 75, 141 }, // English/Madagascar
+ { 75, 142 }, // English/Malawi
+ { 75, 143 }, // English/Malaysia
+ { 75, 144 }, // English/Maldives
+ { 75, 146 }, // English/Malta
+ { 75, 147 }, // English/Marshall Islands
+ { 75, 150 }, // English/Mauritius
+ { 75, 153 }, // English/Micronesia
+ { 75, 158 }, // English/Montserrat
+ { 75, 162 }, // English/Namibia
+ { 75, 163 }, // English/Nauru
+ { 75, 165 }, // English/Netherlands
+ { 75, 167 }, // English/New Zealand
+ { 75, 169 }, // English/Nigeria
+ { 75, 171 }, // English/Niue
+ { 75, 172 }, // English/Norfolk Island
+ { 75, 173 }, // English/Northern Mariana Islands
+ { 75, 178 }, // English/Pakistan
+ { 75, 179 }, // English/Palau
+ { 75, 182 }, // English/Papua New Guinea
+ { 75, 185 }, // English/Philippines
+ { 75, 186 }, // English/Pitcairn
+ { 75, 189 }, // English/Puerto Rico
+ { 75, 194 }, // English/Rwanda
+ { 75, 196 }, // English/Saint Helena
+ { 75, 197 }, // English/Saint Kitts and Nevis
+ { 75, 198 }, // English/Saint Lucia
+ { 75, 201 }, // English/Saint Vincent and Grenadines
+ { 75, 202 }, // English/Samoa
+ { 75, 208 }, // English/Seychelles
+ { 75, 209 }, // English/Sierra Leone
+ { 75, 210 }, // English/Singapore
+ { 75, 211 }, // English/Sint Maarten
+ { 75, 213 }, // English/Slovenia
+ { 75, 214 }, // English/Solomon Islands
+ { 75, 216 }, // English/South Africa
+ { 75, 219 }, // English/South Sudan
+ { 75, 222 }, // English/Sudan
+ { 75, 225 }, // English/Sweden
+ { 75, 226 }, // English/Switzerland
+ { 75, 230 }, // English/Tanzania
+ { 75, 234 }, // English/Tokelau
+ { 75, 235 }, // English/Tonga
+ { 75, 236 }, // English/Trinidad and Tobago
+ { 75, 241 }, // English/Turks and Caicos Islands
+ { 75, 242 }, // English/Tuvalu
+ { 75, 243 }, // English/Uganda
+ { 75, 245 }, // English/United Arab Emirates
+ { 75, 246 }, // English/United Kingdom
+ { 75, 247 }, // English/United States Outlying Islands
+ { 75, 249 }, // English/United States Virgin Islands
+ { 75, 252 }, // English/Vanuatu
+ { 75, 258 }, // English/world
+ { 75, 260 }, // English/Zambia
+ { 75, 261 }, // English/Zimbabwe
+ { 76, 193 }, // Erzya/Russia
+ { 77, 258 }, // Esperanto/world
+ { 78, 75 }, // Estonian/Estonia
+ { 79, 92 }, // Ewe/Ghana
+ { 79, 233 }, // Ewe/Togo
+ { 80, 40 }, // Ewondo/Cameroon
+ { 81, 81 }, // Faroese/Faroe Islands
+ { 81, 65 }, // Faroese/Denmark
+ { 83, 185 }, // Filipino/Philippines
+ { 84, 83 }, // Finnish/Finland
+ { 85, 84 }, // French/France
+ { 85, 4 }, // French/Algeria
+ { 85, 23 }, // French/Belgium
+ { 85, 25 }, // French/Benin
+ { 85, 37 }, // French/Burkina Faso
+ { 85, 38 }, // French/Burundi
+ { 85, 40 }, // French/Cameroon
+ { 85, 41 }, // French/Canada
+ { 85, 46 }, // French/Central African Republic
+ { 85, 48 }, // French/Chad
+ { 85, 55 }, // French/Comoros
+ { 85, 56 }, // French/Congo - Brazzaville
+ { 85, 57 }, // French/Congo - Kinshasa
+ { 85, 67 }, // French/Djibouti
+ { 85, 73 }, // French/Equatorial Guinea
+ { 85, 85 }, // French/French Guiana
+ { 85, 86 }, // French/French Polynesia
+ { 85, 88 }, // French/Gabon
+ { 85, 97 }, // French/Guadeloupe
+ { 85, 102 }, // French/Guinea
+ { 85, 104 }, // French/Haiti
+ { 85, 118 }, // French/Ivory Coast
+ { 85, 138 }, // French/Luxembourg
+ { 85, 141 }, // French/Madagascar
+ { 85, 145 }, // French/Mali
+ { 85, 148 }, // French/Martinique
+ { 85, 149 }, // French/Mauritania
+ { 85, 150 }, // French/Mauritius
+ { 85, 151 }, // French/Mayotte
+ { 85, 155 }, // French/Monaco
+ { 85, 159 }, // French/Morocco
+ { 85, 166 }, // French/New Caledonia
+ { 85, 170 }, // French/Niger
+ { 85, 191 }, // French/Reunion
+ { 85, 194 }, // French/Rwanda
+ { 85, 195 }, // French/Saint Barthelemy
+ { 85, 199 }, // French/Saint Martin
+ { 85, 200 }, // French/Saint Pierre and Miquelon
+ { 85, 206 }, // French/Senegal
+ { 85, 208 }, // French/Seychelles
+ { 85, 226 }, // French/Switzerland
+ { 85, 227 }, // French/Syria
+ { 85, 233 }, // French/Togo
+ { 85, 238 }, // French/Tunisia
+ { 85, 252 }, // French/Vanuatu
+ { 85, 256 }, // French/Wallis and Futuna
+ { 86, 117 }, // Friulian/Italy
+ { 87, 206 }, // Fulah/Senegal
+ { 87, 37 }, // Fulah/Burkina Faso
+ { 87, 40 }, // Fulah/Cameroon
+ { 87, 89 }, // Fulah/Gambia
+ { 87, 92 }, // Fulah/Ghana
+ { 87, 101 }, // Fulah/Guinea-Bissau
+ { 87, 102 }, // Fulah/Guinea
+ { 87, 134 }, // Fulah/Liberia
+ { 87, 149 }, // Fulah/Mauritania
+ { 87, 169 }, // Fulah/Nigeria
+ { 87, 170 }, // Fulah/Niger
+ { 87, 209 }, // Fulah/Sierra Leone
+ { 88, 246 }, // Gaelic/United Kingdom
+ { 89, 92 }, // Ga/Ghana
+ { 90, 220 }, // Galician/Spain
+ { 91, 243 }, // Ganda/Uganda
+ { 92, 77 }, // Geez/Ethiopia
+ { 92, 74 }, // Geez/Eritrea
+ { 93, 90 }, // Georgian/Georgia
+ { 94, 91 }, // German/Germany
+ { 94, 16 }, // German/Austria
+ { 94, 23 }, // German/Belgium
+ { 94, 117 }, // German/Italy
+ { 94, 136 }, // German/Liechtenstein
+ { 94, 138 }, // German/Luxembourg
+ { 94, 226 }, // German/Switzerland
+ { 96, 94 }, // Greek/Greece
+ { 96, 63 }, // Greek/Cyprus
+ { 97, 183 }, // Guarani/Paraguay
+ { 98, 110 }, // Gujarati/India
+ { 99, 124 }, // Gusii/Kenya
+ { 101, 169 }, // Hausa/Nigeria
+ { 101, 222 }, // Hausa/Sudan
+ { 101, 92 }, // Hausa/Ghana
+ { 101, 170 }, // Hausa/Niger
+ { 102, 248 }, // Hawaiian/United States
+ { 103, 116 }, // Hebrew/Israel
+ { 105, 110 }, // Hindi/India
+ { 107, 108 }, // Hungarian/Hungary
+ { 108, 109 }, // Icelandic/Iceland
+ { 109, 258 }, // Ido/world
+ { 110, 169 }, // Igbo/Nigeria
+ { 111, 83 }, // Inari Sami/Finland
+ { 112, 111 }, // Indonesian/Indonesia
+ { 114, 258 }, // Interlingua/world
+ { 115, 75 }, // Interlingue/Estonia
+ { 116, 41 }, // Inuktitut/Canada
+ { 118, 114 }, // Irish/Ireland
+ { 118, 246 }, // Irish/United Kingdom
+ { 119, 117 }, // Italian/Italy
+ { 119, 203 }, // Italian/San Marino
+ { 119, 226 }, // Italian/Switzerland
+ { 119, 253 }, // Italian/Vatican City
+ { 120, 120 }, // Japanese/Japan
+ { 121, 111 }, // Javanese/Indonesia
+ { 122, 169 }, // Jju/Nigeria
+ { 123, 206 }, // Jola-Fonyi/Senegal
+ { 124, 43 }, // Kabuverdianu/Cape Verde
+ { 125, 4 }, // Kabyle/Algeria
+ { 126, 40 }, // Kako/Cameroon
+ { 127, 95 }, // Kalaallisut/Greenland
+ { 128, 124 }, // Kalenjin/Kenya
+ { 129, 124 }, // Kamba/Kenya
+ { 130, 110 }, // Kannada/India
+ { 132, 110 }, // Kashmiri/India
+ { 133, 123 }, // Kazakh/Kazakhstan
+ { 134, 40 }, // Kenyang/Cameroon
+ { 135, 39 }, // Khmer/Cambodia
+ { 136, 99 }, // Kiche/Guatemala
+ { 137, 124 }, // Kikuyu/Kenya
+ { 138, 194 }, // Kinyarwanda/Rwanda
+ { 141, 110 }, // Konkani/India
+ { 142, 218 }, // Korean/South Korea
+ { 142, 50 }, // Korean/China
+ { 142, 174 }, // Korean/North Korea
+ { 144, 145 }, // Koyraboro Senni/Mali
+ { 145, 145 }, // Koyra Chiini/Mali
+ { 146, 134 }, // Kpelle/Liberia
+ { 146, 102 }, // Kpelle/Guinea
+ { 148, 239 }, // Kurdish/Turkey
+ { 149, 40 }, // Kwasio/Cameroon
+ { 150, 128 }, // Kyrgyz/Kyrgyzstan
+ { 151, 248 }, // Lakota/United States
+ { 152, 230 }, // Langi/Tanzania
+ { 153, 129 }, // Lao/Laos
+ { 154, 253 }, // Latin/Vatican City
+ { 155, 131 }, // Latvian/Latvia
+ { 158, 57 }, // Lingala/Congo - Kinshasa
+ { 158, 7 }, // Lingala/Angola
+ { 158, 46 }, // Lingala/Central African Republic
+ { 158, 56 }, // Lingala/Congo - Brazzaville
+ { 160, 137 }, // Lithuanian/Lithuania
+ { 161, 258 }, // Lojban/world
+ { 162, 91 }, // Lower Sorbian/Germany
+ { 163, 91 }, // Low German/Germany
+ { 163, 165 }, // Low German/Netherlands
+ { 164, 57 }, // Luba-Katanga/Congo - Kinshasa
+ { 165, 225 }, // Lule Sami/Sweden
+ { 165, 175 }, // Lule Sami/Norway
+ { 166, 124 }, // Luo/Kenya
+ { 167, 138 }, // Luxembourgish/Luxembourg
+ { 168, 124 }, // Luyia/Kenya
+ { 169, 140 }, // Macedonian/Macedonia
+ { 170, 230 }, // Machame/Tanzania
+ { 171, 110 }, // Maithili/India
+ { 172, 160 }, // Makhuwa-Meetto/Mozambique
+ { 173, 230 }, // Makonde/Tanzania
+ { 174, 141 }, // Malagasy/Madagascar
+ { 175, 110 }, // Malayalam/India
+ { 176, 143 }, // Malay/Malaysia
+ { 176, 35 }, // Malay/Brunei
+ { 176, 111 }, // Malay/Indonesia
+ { 176, 210 }, // Malay/Singapore
+ { 177, 146 }, // Maltese/Malta
+ { 179, 110 }, // Manipuri/India
+ { 180, 115 }, // Manx/Isle of Man
+ { 181, 167 }, // Maori/New Zealand
+ { 182, 49 }, // Mapuche/Chile
+ { 183, 110 }, // Marathi/India
+ { 185, 124 }, // Masai/Kenya
+ { 185, 230 }, // Masai/Tanzania
+ { 186, 112 }, // Mazanderani/Iran
+ { 188, 124 }, // Meru/Kenya
+ { 189, 40 }, // Meta/Cameroon
+ { 190, 41 }, // Mohawk/Canada
+ { 191, 156 }, // Mongolian/Mongolia
+ { 191, 50 }, // Mongolian/China
+ { 192, 150 }, // Morisyen/Mauritius
+ { 193, 40 }, // Mundang/Cameroon
+ { 194, 248 }, // Muscogee/United States
+ { 195, 162 }, // Nama/Namibia
+ { 197, 248 }, // Navajo/United States
+ { 199, 164 }, // Nepali/Nepal
+ { 199, 110 }, // Nepali/India
+ { 201, 40 }, // Ngiemboon/Cameroon
+ { 202, 40 }, // Ngomba/Cameroon
+ { 203, 169 }, // Nigerian Pidgin/Nigeria
+ { 204, 102 }, // Nko/Guinea
+ { 205, 112 }, // Northern Luri/Iran
+ { 205, 113 }, // Northern Luri/Iraq
+ { 206, 175 }, // Northern Sami/Norway
+ { 206, 83 }, // Northern Sami/Finland
+ { 206, 225 }, // Northern Sami/Sweden
+ { 207, 216 }, // Northern Sotho/South Africa
+ { 208, 261 }, // North Ndebele/Zimbabwe
+ { 209, 175 }, // Norwegian Bokmal/Norway
+ { 209, 224 }, // Norwegian Bokmal/Svalbard and Jan Mayen
+ { 210, 175 }, // Norwegian Nynorsk/Norway
+ { 211, 219 }, // Nuer/South Sudan
+ { 212, 142 }, // Nyanja/Malawi
+ { 213, 243 }, // Nyankole/Uganda
+ { 214, 84 }, // Occitan/France
+ { 214, 220 }, // Occitan/Spain
+ { 215, 110 }, // Odia/India
+ { 220, 77 }, // Oromo/Ethiopia
+ { 220, 124 }, // Oromo/Kenya
+ { 221, 248 }, // Osage/United States
+ { 222, 90 }, // Ossetic/Georgia
+ { 222, 193 }, // Ossetic/Russia
+ { 226, 62 }, // Papiamento/Curacao
+ { 226, 13 }, // Papiamento/Aruba
+ { 227, 1 }, // Pashto/Afghanistan
+ { 227, 178 }, // Pashto/Pakistan
+ { 228, 112 }, // Persian/Iran
+ { 228, 1 }, // Persian/Afghanistan
+ { 230, 187 }, // Polish/Poland
+ { 231, 32 }, // Portuguese/Brazil
+ { 231, 7 }, // Portuguese/Angola
+ { 231, 43 }, // Portuguese/Cape Verde
+ { 231, 73 }, // Portuguese/Equatorial Guinea
+ { 231, 101 }, // Portuguese/Guinea-Bissau
+ { 231, 138 }, // Portuguese/Luxembourg
+ { 231, 139 }, // Portuguese/Macao
+ { 231, 160 }, // Portuguese/Mozambique
+ { 231, 188 }, // Portuguese/Portugal
+ { 231, 204 }, // Portuguese/Sao Tome and Principe
+ { 231, 226 }, // Portuguese/Switzerland
+ { 231, 232 }, // Portuguese/Timor-Leste
+ { 232, 187 }, // Prussian/Poland
+ { 233, 110 }, // Punjabi/India
+ { 233, 178 }, // Punjabi/Pakistan
+ { 234, 184 }, // Quechua/Peru
+ { 234, 28 }, // Quechua/Bolivia
+ { 234, 70 }, // Quechua/Ecuador
+ { 235, 192 }, // Romanian/Romania
+ { 235, 154 }, // Romanian/Moldova
+ { 236, 226 }, // Romansh/Switzerland
+ { 237, 230 }, // Rombo/Tanzania
+ { 238, 38 }, // Rundi/Burundi
+ { 239, 193 }, // Russian/Russia
+ { 239, 22 }, // Russian/Belarus
+ { 239, 123 }, // Russian/Kazakhstan
+ { 239, 128 }, // Russian/Kyrgyzstan
+ { 239, 154 }, // Russian/Moldova
+ { 239, 244 }, // Russian/Ukraine
+ { 240, 230 }, // Rwa/Tanzania
+ { 241, 74 }, // Saho/Eritrea
+ { 242, 193 }, // Sakha/Russia
+ { 243, 124 }, // Samburu/Kenya
+ { 245, 46 }, // Sango/Central African Republic
+ { 246, 230 }, // Sangu/Tanzania
+ { 247, 110 }, // Sanskrit/India
+ { 248, 110 }, // Santali/India
+ { 249, 117 }, // Sardinian/Italy
+ { 251, 160 }, // Sena/Mozambique
+ { 252, 207 }, // Serbian/Serbia
+ { 252, 29 }, // Serbian/Bosnia and Herzegovina
+ { 252, 126 }, // Serbian/Kosovo
+ { 252, 157 }, // Serbian/Montenegro
+ { 253, 230 }, // Shambala/Tanzania
+ { 254, 261 }, // Shona/Zimbabwe
+ { 255, 50 }, // Sichuan Yi/China
+ { 256, 117 }, // Sicilian/Italy
+ { 257, 77 }, // Sidamo/Ethiopia
+ { 258, 187 }, // Silesian/Poland
+ { 259, 178 }, // Sindhi/Pakistan
+ { 259, 110 }, // Sindhi/India
+ { 260, 221 }, // Sinhala/Sri Lanka
+ { 261, 83 }, // Skolt Sami/Finland
+ { 262, 212 }, // Slovak/Slovakia
+ { 263, 213 }, // Slovenian/Slovenia
+ { 264, 243 }, // Soga/Uganda
+ { 265, 215 }, // Somali/Somalia
+ { 265, 67 }, // Somali/Djibouti
+ { 265, 77 }, // Somali/Ethiopia
+ { 265, 124 }, // Somali/Kenya
+ { 266, 112 }, // Southern Kurdish/Iran
+ { 266, 113 }, // Southern Kurdish/Iraq
+ { 267, 225 }, // Southern Sami/Sweden
+ { 267, 175 }, // Southern Sami/Norway
+ { 268, 216 }, // Southern Sotho/South Africa
+ { 268, 133 }, // Southern Sotho/Lesotho
+ { 269, 216 }, // South Ndebele/South Africa
+ { 270, 220 }, // Spanish/Spain
+ { 270, 11 }, // Spanish/Argentina
+ { 270, 24 }, // Spanish/Belize
+ { 270, 28 }, // Spanish/Bolivia
+ { 270, 32 }, // Spanish/Brazil
+ { 270, 42 }, // Spanish/Canary Islands
+ { 270, 47 }, // Spanish/Ceuta and Melilla
+ { 270, 49 }, // Spanish/Chile
+ { 270, 54 }, // Spanish/Colombia
+ { 270, 59 }, // Spanish/Costa Rica
+ { 270, 61 }, // Spanish/Cuba
+ { 270, 69 }, // Spanish/Dominican Republic
+ { 270, 70 }, // Spanish/Ecuador
+ { 270, 72 }, // Spanish/El Salvador
+ { 270, 73 }, // Spanish/Equatorial Guinea
+ { 270, 99 }, // Spanish/Guatemala
+ { 270, 106 }, // Spanish/Honduras
+ { 270, 130 }, // Spanish/Latin America
+ { 270, 152 }, // Spanish/Mexico
+ { 270, 168 }, // Spanish/Nicaragua
+ { 270, 181 }, // Spanish/Panama
+ { 270, 183 }, // Spanish/Paraguay
+ { 270, 184 }, // Spanish/Peru
+ { 270, 185 }, // Spanish/Philippines
+ { 270, 189 }, // Spanish/Puerto Rico
+ { 270, 248 }, // Spanish/United States
+ { 270, 250 }, // Spanish/Uruguay
+ { 270, 254 }, // Spanish/Venezuela
+ { 271, 159 }, // Standard Moroccan Tamazight/Morocco
+ { 272, 111 }, // Sundanese/Indonesia
+ { 273, 230 }, // Swahili/Tanzania
+ { 273, 57 }, // Swahili/Congo - Kinshasa
+ { 273, 124 }, // Swahili/Kenya
+ { 273, 243 }, // Swahili/Uganda
+ { 274, 216 }, // Swati/South Africa
+ { 274, 76 }, // Swati/Eswatini
+ { 275, 225 }, // Swedish/Sweden
+ { 275, 2 }, // Swedish/Aland Islands
+ { 275, 83 }, // Swedish/Finland
+ { 276, 226 }, // Swiss German/Switzerland
+ { 276, 84 }, // Swiss German/France
+ { 276, 136 }, // Swiss German/Liechtenstein
+ { 277, 113 }, // Syriac/Iraq
+ { 277, 227 }, // Syriac/Syria
+ { 278, 159 }, // Tachelhit/Morocco
+ { 280, 255 }, // Tai Dam/Vietnam
+ { 281, 124 }, // Taita/Kenya
+ { 282, 229 }, // Tajik/Tajikistan
+ { 283, 110 }, // Tamil/India
+ { 283, 143 }, // Tamil/Malaysia
+ { 283, 210 }, // Tamil/Singapore
+ { 283, 221 }, // Tamil/Sri Lanka
+ { 284, 228 }, // Taroko/Taiwan
+ { 285, 170 }, // Tasawaq/Niger
+ { 286, 193 }, // Tatar/Russia
+ { 287, 110 }, // Telugu/India
+ { 288, 243 }, // Teso/Uganda
+ { 288, 124 }, // Teso/Kenya
+ { 289, 231 }, // Thai/Thailand
+ { 290, 50 }, // Tibetan/China
+ { 290, 110 }, // Tibetan/India
+ { 291, 74 }, // Tigre/Eritrea
+ { 292, 77 }, // Tigrinya/Ethiopia
+ { 292, 74 }, // Tigrinya/Eritrea
+ { 294, 182 }, // Tok Pisin/Papua New Guinea
+ { 295, 235 }, // Tongan/Tonga
+ { 296, 216 }, // Tsonga/South Africa
+ { 297, 216 }, // Tswana/South Africa
+ { 297, 30 }, // Tswana/Botswana
+ { 298, 239 }, // Turkish/Turkey
+ { 298, 63 }, // Turkish/Cyprus
+ { 299, 240 }, // Turkmen/Turkmenistan
+ { 301, 169 }, // Tyap/Nigeria
+ { 303, 244 }, // Ukrainian/Ukraine
+ { 304, 91 }, // Upper Sorbian/Germany
+ { 305, 178 }, // Urdu/Pakistan
+ { 305, 110 }, // Urdu/India
+ { 306, 50 }, // Uyghur/China
+ { 307, 251 }, // Uzbek/Uzbekistan
+ { 307, 1 }, // Uzbek/Afghanistan
+ { 308, 134 }, // Vai/Liberia
+ { 309, 216 }, // Venda/South Africa
+ { 310, 255 }, // Vietnamese/Vietnam
+ { 311, 258 }, // Volapuk/world
+ { 312, 230 }, // Vunjo/Tanzania
+ { 313, 23 }, // Walloon/Belgium
+ { 314, 226 }, // Walser/Switzerland
+ { 315, 15 }, // Warlpiri/Australia
+ { 316, 246 }, // Welsh/United Kingdom
+ { 317, 178 }, // Western Balochi/Pakistan
+ { 317, 1 }, // Western Balochi/Afghanistan
+ { 317, 112 }, // Western Balochi/Iran
+ { 317, 176 }, // Western Balochi/Oman
+ { 317, 245 }, // Western Balochi/United Arab Emirates
+ { 318, 165 }, // Western Frisian/Netherlands
+ { 319, 77 }, // Wolaytta/Ethiopia
+ { 320, 206 }, // Wolof/Senegal
+ { 321, 216 }, // Xhosa/South Africa
+ { 322, 40 }, // Yangben/Cameroon
+ { 323, 244 }, // Yiddish/Ukraine
+ { 324, 169 }, // Yoruba/Nigeria
+ { 324, 25 }, // Yoruba/Benin
+ { 325, 170 }, // Zarma/Niger
+ { 326, 50 }, // Zhuang/China
+ { 327, 216 }, // Zulu/South Africa
+ { 328, 32 }, // Kaingang/Brazil
+ { 329, 32 }, // Nheengatu/Brazil
+ { 329, 54 }, // Nheengatu/Colombia
+ { 329, 254 }, // Nheengatu/Venezuela
+ { 330, 110 }, // Haryanvi/India
+ { 331, 91 }, // Northern Frisian/Germany
+ { 332, 110 }, // Rajasthani/India
+ { 333, 193 }, // Moksha/Russia
+ { 334, 258 }, // Toki Pona/world
+ { 335, 214 }, // Pijin/Solomon Islands
+ { 336, 169 }, // Obolo/Nigeria
+ { 337, 178 }, // Baluchi/Pakistan
+ { 338, 117 }, // Ligurian/Italy
+ { 339, 161 }, // Rohingya/Myanmar
+ { 339, 20 }, // Rohingya/Bangladesh
+ { 340, 178 }, // Torwali/Pakistan
+ { 341, 25 }, // Anii/Benin
+ { 342, 110 }, // Kangri/India
+ { 343, 117 }, // Venetian/Italy
};
-static const int g_locale_list_count = sizeof(g_locale_list)/sizeof(g_locale_list[0]);
+
+// GENERATED PART ENDS HERE
+
+static const int g_locale_list_count = std::size(g_locale_list);
LocaleModel::LocaleModel(QObject *parent)
: QAbstractItemModel(parent)
@@ -258,7 +680,7 @@ LocaleModel::LocaleModel(QObject *parent)
QVariant LocaleModel::data(const QModelIndex &index, int role) const
{
if (!index.isValid()
- || role != Qt::DisplayRole && role != Qt::EditRole && role != Qt::ToolTipRole
+ || (role != Qt::DisplayRole && role != Qt::EditRole && role != Qt::ToolTipRole)
|| index.column() >= g_model_cols
|| index.row() >= g_locale_list_count + 2)
return QVariant();
@@ -399,9 +821,9 @@ int LocaleModel::rowCount(const QModelIndex &parent) const
Qt::ItemFlags LocaleModel::flags(const QModelIndex &index) const
{
if (!index.isValid())
- return 0;
+ return {};
if (index.row() == 0 && index.column() == g_model_cols - 1)
- return 0;
+ return {};
if (index.row() == 0)
return QAbstractItemModel::flags(index) | Qt::ItemIsEditable;
return QAbstractItemModel::flags(index);
@@ -413,7 +835,7 @@ bool LocaleModel::setData(const QModelIndex &index, const QVariant &value, int r
|| index.row() != 0
|| index.column() >= g_model_cols - 1
|| role != Qt::EditRole
- || m_data_list.at(index.column()).type() != value.type())
+ || m_data_list.at(index.column()).typeId() != value.typeId())
return false;
m_data_list[index.column()] = value;
diff --git a/util/locale_database/testlocales/localemodel.h b/util/locale_database/testlocales/localemodel.h
index 666ea8493d..a0ba45bb15 100644
--- a/util/locale_database/testlocales/localemodel.h
+++ b/util/locale_database/testlocales/localemodel.h
@@ -13,17 +13,17 @@ class LocaleModel : public QAbstractItemModel
public:
LocaleModel(QObject *parent = nullptr);
- virtual int columnCount(const QModelIndex &parent = QModelIndex()) const;
- virtual QVariant data(const QModelIndex &index, int role = Qt::DisplayRole) const;
- virtual QModelIndex index(int row, int column,
- const QModelIndex &parent = QModelIndex()) const;
- virtual QModelIndex parent(const QModelIndex &index) const;
- virtual int rowCount(const QModelIndex &parent = QModelIndex()) const;
- virtual QVariant headerData(int section, Qt::Orientation orientation,
- int role = Qt::DisplayRole ) const;
- virtual Qt::ItemFlags flags(const QModelIndex &index) const;
- virtual bool setData(const QModelIndex &index, const QVariant &value,
- int role = Qt::EditRole);
+ int columnCount(const QModelIndex &parent = QModelIndex()) const override;
+ QVariant data(const QModelIndex &index, int role = Qt::DisplayRole) const override;
+ QModelIndex index(int row, int column,
+ const QModelIndex &parent = QModelIndex()) const override;
+ QModelIndex parent(const QModelIndex &index) const override;
+ int rowCount(const QModelIndex &parent = QModelIndex()) const override;
+ QVariant headerData(int section, Qt::Orientation orientation,
+ int role = Qt::DisplayRole ) const override;
+ Qt::ItemFlags flags(const QModelIndex &index) const override;
+ bool setData(const QModelIndex &index, const QVariant &value,
+ int role = Qt::EditRole) override;
private:
QList<QVariant> m_data_list;
};
diff --git a/util/locale_database/testlocales/localewidget.cpp b/util/locale_database/testlocales/localewidget.cpp
index 50b9b81594..df8a3c28ab 100644
--- a/util/locale_database/testlocales/localewidget.cpp
+++ b/util/locale_database/testlocales/localewidget.cpp
@@ -2,7 +2,7 @@
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
#include <QTableView>
#include <QVBoxLayout>
-#include <QItemDelegate>
+#include <QStyledItemDelegate>
#include <QItemEditorFactory>
#include <QDoubleSpinBox>
@@ -26,26 +26,26 @@ public:
class EditorFactory : public QItemEditorFactory
{
public:
- EditorFactory() {
- static DoubleEditorCreator double_editor_creator;
- registerEditor(QVariant::Double, &double_editor_creator);
+ EditorFactory()
+ {
+ // registerEditor() assumes ownership of the creator.
+ registerEditor(QVariant::Double, new DoubleEditorCreator);
}
};
LocaleWidget::LocaleWidget(QWidget *parent)
- : QWidget(parent)
+ : QWidget(parent),
+ m_model(new LocaleModel(this)),
+ m_view(new QTableView(this))
{
- m_model = new LocaleModel(this);
- m_view = new QTableView(this);
-
- QItemDelegate *delegate = qobject_cast<QItemDelegate*>(m_view->itemDelegate());
+ QStyledItemDelegate *delegate = qobject_cast<QStyledItemDelegate*>(m_view->itemDelegate());
Q_ASSERT(delegate != 0);
- static EditorFactory editor_factory;
- delegate->setItemEditorFactory(&editor_factory);
+ static EditorFactory editorFactory;
+ delegate->setItemEditorFactory(&editorFactory);
m_view->setModel(m_model);
QVBoxLayout *layout = new QVBoxLayout(this);
- layout->setMargin(0);
+ layout->setContentsMargins(0, 0, 0, 0);
layout->addWidget(m_view);
}
diff --git a/util/locale_database/testlocales/testlocales.pro b/util/locale_database/testlocales/testlocales.pro
index a9a6247f96..126c19589b 100644
--- a/util/locale_database/testlocales/testlocales.pro
+++ b/util/locale_database/testlocales/testlocales.pro
@@ -1,4 +1,5 @@
TARGET = testlocales
CONFIG += debug
+QT += widgets
SOURCES += localemodel.cpp localewidget.cpp main.cpp
-HEADERS += localemodel.h localewidget.h \ No newline at end of file
+HEADERS += localemodel.h localewidget.h
diff --git a/util/locale_database/zonedata.py b/util/locale_database/zonedata.py
new file mode 100644
index 0000000000..b73290f330
--- /dev/null
+++ b/util/locale_database/zonedata.py
@@ -0,0 +1,227 @@
+# Copyright (C) 2024 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+
+"""Data relating to timezones and CLDR.
+
+This is not implicated in public APIs, so may safely be changed.
+Contrast the enumdata.py, where public API is implicated.
+
+Scripts digesting CLDR data shall report the updates you need to make,
+if any arise.
+
+The windowsIdList is a list of twoples (ID, offset), associating each
+Windows-specific ID for a zone with that zone's offset from UTC, in
+seconds. Entries are sorted in case-insensitive lexical order by
+ID. If a script reports that it has found a Windows ID not listed
+here, research the relevant zone's offset and add a new entry to the
+list of twoples, preserving the ordering. Internet search engines and
+timeanddate.com can help with researching the offset. Note that some
+UTC offset zones (giving only the hour) are present in windowsIdList.
+When adding an entry to windowsIdList, check whether its offset
+corresponds to that of some entry in utcIdList; if not, add such an
+entry.
+
+The utcIdList is a simple list of various UTC-offset names. Aside from
+'UTC' itself, shared with windowsIdList, these include minutes in
+their offsets even when they are whole hour offsets. The list contains
+the UTC-equivalents of all offsets seen in the windowsIdList, plus the
+whole hours out to ±14 hours, the two verbose forms of UTC±00:00 and
+any legacy entries from past Windows zone offsets. Entries should not
+be removed, even if the relevant Windows ID becomes obsolete or
+switches to a different offset, as they make up the available zones of
+the UTC back-end. (That recognizes other offset zones, and its
+is-available check will accept them, but it leaves them out of its
+list. There are, after all, thousands of possible offset zones, but
+relatively few are widely used.)
+
+Note: -00:00 (without the UTC prefix) was introduced in RFC3339 as a
+way to indicate that a date-time has been converted to UTC but its use
+should not be understood to say anything about the local time of the
+origin of the message using it. However, ISO 8601 has, since 2000,
+forbidden this as an offset suffix. The more recent compromise is to
+use Z to convey the meaning RFC3339 gave to -00:00. So the use of
+-00:00 as offset suffix should be avoided (and, by extension, likewise
+for UTC-00:00 as a zone ID), but this suffix (and ID) should be
+recognized when consuming data generated by other sources, for
+backwards compatibility.
+
+"""
+
+# Do not remove IDs, as each entry is part of the API/behavior guarantee.
+# IDs for the same offset shall be space-joined; list the preferred ID first.
+utcIdList = (
+ 'UTC-14:00',
+ 'UTC-13:00',
+ 'UTC-12:00',
+ 'UTC-11:00',
+ 'UTC-10:00', 'UTC-09:30',
+ 'UTC-09:00',
+ 'UTC-08:00',
+ 'UTC-07:00',
+ 'UTC-06:00',
+ 'UTC-05:00', 'UTC-04:30',
+ 'UTC-04:00', 'UTC-03:30',
+ 'UTC-03:00',
+ 'UTC-02:00',
+ 'UTC-01:00',
+ # UTC Goes first (among zero-offset) to be default:
+ 'UTC', 'UTC+00:00', 'UTC-00:00',
+ 'UTC+01:00',
+ 'UTC+02:00',
+ 'UTC+03:00', 'UTC+03:30',
+ 'UTC+04:00', 'UTC+04:30',
+ 'UTC+05:00', 'UTC+05:30', 'UTC+05:45',
+ 'UTC+06:00', 'UTC+06:30',
+ 'UTC+07:00',
+ 'UTC+08:00', 'UTC+08:30', 'UTC+08:45',
+ 'UTC+09:00', 'UTC+09:30',
+ 'UTC+10:00', 'UTC+10:30',
+ 'UTC+11:00',
+ 'UTC+12:00', 'UTC+12:45',
+ 'UTC+13:00',
+ 'UTC+14:00',
+)
+
+# ( Windows Id, Offset Seconds )
+windowsIdList = (
+ ('Afghanistan Standard Time', 16200),
+ ('Alaskan Standard Time', -32400),
+ ('Aleutian Standard Time', -36000),
+ ('Altai Standard Time', 25200),
+ ('Arab Standard Time', 10800),
+ ('Arabian Standard Time', 14400),
+ ('Arabic Standard Time', 10800),
+ ('Argentina Standard Time', -10800),
+ ('Astrakhan Standard Time', 14400),
+ ('Atlantic Standard Time', -14400),
+ ('AUS Central Standard Time', 34200),
+ ('Aus Central W. Standard Time', 31500),
+ ('AUS Eastern Standard Time', 36000),
+ ('Azerbaijan Standard Time', 14400),
+ ('Azores Standard Time', -3600),
+ ('Bahia Standard Time', -10800),
+ ('Bangladesh Standard Time', 21600),
+ ('Belarus Standard Time', 10800),
+ ('Bougainville Standard Time', 39600),
+ ('Canada Central Standard Time', -21600),
+ ('Cape Verde Standard Time', -3600),
+ ('Caucasus Standard Time', 14400),
+ ('Cen. Australia Standard Time', 34200),
+ ('Central America Standard Time', -21600),
+ ('Central Asia Standard Time', 21600),
+ ('Central Brazilian Standard Time', -14400),
+ ('Central Europe Standard Time', 3600),
+ ('Central European Standard Time', 3600),
+ ('Central Pacific Standard Time', 39600),
+ ('Central Standard Time', -21600),
+ ('Central Standard Time (Mexico)', -21600),
+ ('Chatham Islands Standard Time', 45900),
+ ('China Standard Time', 28800),
+ ('Cuba Standard Time', -18000),
+ ('Dateline Standard Time', -43200),
+ ('E. Africa Standard Time', 10800),
+ ('E. Australia Standard Time', 36000),
+ ('E. Europe Standard Time', 7200),
+ ('E. South America Standard Time', -10800),
+ ('Easter Island Standard Time', -21600),
+ ('Eastern Standard Time', -18000),
+ ('Eastern Standard Time (Mexico)', -18000),
+ ('Egypt Standard Time', 7200),
+ ('Ekaterinburg Standard Time', 18000),
+ ('Fiji Standard Time', 43200),
+ ('FLE Standard Time', 7200),
+ ('Georgian Standard Time', 14400),
+ ('GMT Standard Time', 0),
+ ('Greenland Standard Time', -10800),
+ ('Greenwich Standard Time', 0),
+ ('GTB Standard Time', 7200),
+ ('Haiti Standard Time', -18000),
+ ('Hawaiian Standard Time', -36000),
+ ('India Standard Time', 19800),
+ ('Iran Standard Time', 12600),
+ ('Israel Standard Time', 7200),
+ ('Jordan Standard Time', 7200),
+ ('Kaliningrad Standard Time', 7200),
+ ('Korea Standard Time', 32400),
+ ('Libya Standard Time', 7200),
+ ('Line Islands Standard Time', 50400),
+ ('Lord Howe Standard Time', 37800),
+ ('Magadan Standard Time', 36000),
+ ('Magallanes Standard Time', -10800), # permanent DST
+ ('Marquesas Standard Time', -34200),
+ ('Mauritius Standard Time', 14400),
+ ('Middle East Standard Time', 7200),
+ ('Montevideo Standard Time', -10800),
+ ('Morocco Standard Time', 0),
+ ('Mountain Standard Time', -25200),
+ ('Mountain Standard Time (Mexico)', -25200),
+ ('Myanmar Standard Time', 23400),
+ ('N. Central Asia Standard Time', 21600),
+ ('Namibia Standard Time', 3600),
+ ('Nepal Standard Time', 20700),
+ ('New Zealand Standard Time', 43200),
+ ('Newfoundland Standard Time', -12600),
+ ('Norfolk Standard Time', 39600),
+ ('North Asia East Standard Time', 28800),
+ ('North Asia Standard Time', 25200),
+ ('North Korea Standard Time', 30600),
+ ('Omsk Standard Time', 21600),
+ ('Pacific SA Standard Time', -10800),
+ ('Pacific Standard Time', -28800),
+ ('Pacific Standard Time (Mexico)', -28800),
+ ('Pakistan Standard Time', 18000),
+ ('Paraguay Standard Time', -14400),
+ ('Qyzylorda Standard Time', 18000), # a.k.a. Kyzylorda, in Kazakhstan
+ ('Romance Standard Time', 3600),
+ ('Russia Time Zone 10', 39600),
+ ('Russia Time Zone 11', 43200),
+ ('Russia Time Zone 3', 14400),
+ ('Russian Standard Time', 10800),
+ ('SA Eastern Standard Time', -10800),
+ ('SA Pacific Standard Time', -18000),
+ ('SA Western Standard Time', -14400),
+ ('Saint Pierre Standard Time', -10800), # New France
+ ('Sakhalin Standard Time', 39600),
+ ('Samoa Standard Time', 46800),
+ ('Sao Tome Standard Time', 0),
+ ('Saratov Standard Time', 14400),
+ ('SE Asia Standard Time', 25200),
+ ('Singapore Standard Time', 28800),
+ ('South Africa Standard Time', 7200),
+ ('South Sudan Standard Time', 7200),
+ ('Sri Lanka Standard Time', 19800),
+ ('Sudan Standard Time', 7200), # unless they mean South Sudan, +03:00
+ ('Syria Standard Time', 7200),
+ ('Taipei Standard Time', 28800),
+ ('Tasmania Standard Time', 36000),
+ ('Tocantins Standard Time', -10800),
+ ('Tokyo Standard Time', 32400),
+ ('Tomsk Standard Time', 25200),
+ ('Tonga Standard Time', 46800),
+ ('Transbaikal Standard Time', 32400), # Yakutsk
+ ('Turkey Standard Time', 7200),
+ ('Turks And Caicos Standard Time', -14400),
+ ('Ulaanbaatar Standard Time', 28800),
+ ('US Eastern Standard Time', -18000),
+ ('US Mountain Standard Time', -25200),
+ ('UTC', 0),
+ # Lexical order: '+' < '-'
+ ('UTC+12', 43200),
+ ('UTC+13', 46800),
+ ('UTC-02', -7200),
+ ('UTC-08', -28800),
+ ('UTC-09', -32400),
+ ('UTC-11', -39600),
+ ('Venezuela Standard Time', -16200),
+ ('Vladivostok Standard Time', 36000),
+ ('Volgograd Standard Time', 14400),
+ ('W. Australia Standard Time', 28800),
+ ('W. Central Africa Standard Time', 3600),
+ ('W. Europe Standard Time', 3600),
+ ('W. Mongolia Standard Time', 25200), # Hovd
+ ('West Asia Standard Time', 18000),
+ ('West Bank Standard Time', 7200),
+ ('West Pacific Standard Time', 36000),
+ ('Yakutsk Standard Time', 32400),
+ ('Yukon Standard Time', -25200), # Non-DST Mountain Standard Time since 2020-11-01
+)