17 files changed, 2308 insertions, 1695 deletions
diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py
index 9b08d8a652..9e0bae9667 100644
--- a/util/locale_database/cldr.py
+++ b/util/locale_database/cldr.py
@@ -1,31 +1,5 @@
-# -*- coding: utf-8; -*-
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
 """Digesting the CLDR's data.
 
 Provides two classes:
@@ -36,15 +10,17 @@ The former should normally be all you need to access.
 See individual classes for further detail.
 """
 
+from typing import Iterable, TextIO
 from xml.dom import minidom
 from weakref import WeakValueDictionary as CacheDict
-import os
+from pathlib import Path
 
 from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner
+from localetools import names_clash
 from qlocalexml import Locale
 
 class CldrReader (object):
-    def __init__(self, root, grumble = lambda msg: None, whitter = lambda msg: None):
+    def __init__(self, root: Path, grumble = lambda msg: None, whitter = lambda msg: None):
         """Set up a reader object for reading CLDR data.
 
         Single parameter, root, is the file-system path to the root of
@@ -67,7 +43,7 @@ class CldrReader (object):
         Yields pairs (have, give) of 4-tuples; if what you have
         matches the left member, giving the right member is probably
         sensible. Each 4-tuple's entries are the full names of a
-        language, a script, a country (strictly territory) and a
+        language, a script, a territory (usually a country) and a
         variant (currently ignored)."""
         skips = []
         for got, use in self.root.likelySubTags():
@@ -79,7 +55,7 @@ class CldrReader (object):
                     and e.message.startswith('Unknown ') and ' code ' in e.message):
                     skips.append(use)
                 else:
-                    self.grumble('Skipping likelySubtag "{}" -> "{}" ({})\n'.format(got, use, e.message))
+                    self.grumble(f'Skipping likelySubtag "{got}" -> "{use}" ({e})\n')
                 continue
             if all(code.startswith('Any') and code[3].isupper() for code in have[:-1]):
                 continue
@@ -99,50 +75,49 @@ class CldrReader (object):
             pass # self.__wrapped(self.whitter, 'Skipping likelySubtags (for unknown codes): ', skips)
 
     def readLocales(self, calendars = ('gregorian',)):
-        locales = tuple(self.__allLocales(calendars))
-        return dict(((k.language_id, k.script_id, k.country_id, k.variant_code),
-                     k) for k in locales)
+        return {(k.language_id, k.script_id, k.territory_id, k.variant_code): k
+                for k in self.__allLocales(calendars)}
 
     def __allLocales(self, calendars):
         def skip(locale, reason):
-            return 'Skipping defaultContent locale "{}" ({})\n'.format(locale, reason)
+            return f'Skipping defaultContent locale "{locale}" ({reason})\n'
 
         for locale in self.root.defaultContentLocales:
             try:
-                language, script, country, variant = self.__splitLocale(locale)
+                language, script, territory, variant = self.__splitLocale(locale)
             except ValueError:
                 self.whitter(skip(locale, 'only language tag'))
                 continue
 
-            if not (script or country):
+            if not (script or territory):
                 self.grumble(skip(locale, 'second tag is neither script nor territory'))
                 continue
 
-            if not (language and country):
+            if not (language and territory):
                 continue
 
             try:
                 yield self.__getLocaleData(self.root.locale(locale), calendars,
-                                           language, script, country, variant)
+                                           language, script, territory, variant)
             except Error as e:
                 self.grumble(skip(locale, e.message))
 
         for locale in self.root.fileLocales:
             try:
                 chain = self.root.locale(locale)
-                language, script, country, variant = chain.tagCodes()
+                language, script, territory, variant = chain.tagCodes()
                 assert language
                 # TODO: this skip should probably be based on likely
-                # sub-tags, instead of empty country: if locale has a
+                # sub-tags, instead of empty territory: if locale has a
                 # likely-subtag expansion, that's what QLocale uses,
                 # and we'll be saving its data for the expanded locale
                 # anyway, so don't need to record it for itself.
                 # See also QLocaleXmlReader.loadLocaleMap's grumble.
-                if not country:
+                if not territory:
                     continue
-                yield self.__getLocaleData(chain, calendars, language, script, country, variant)
+                yield self.__getLocaleData(chain, calendars, language, script, territory, variant)
             except Error as e:
-                self.grumble('Skipping file locale "{}" ({})\n'.format(locale, e.message))
+                self.grumble(f'Skipping file locale "{locale}" ({e})\n')
 
     import textwrap
     @staticmethod
@@ -153,13 +128,13 @@ class CldrReader (object):
 
     def __parseTags(self, locale):
         tags = self.__splitLocale(locale)
-        language = tags.next()
-        script = country = variant = ''
+        language = next(tags)
+        script = territory = variant = ''
         try:
-            script, country, variant = tags
+            script, territory, variant = tags
         except ValueError:
             pass
-        return tuple(p[1] for p in self.root.codesToIdName(language, script, country, variant))
+        return tuple(p[1] for p in self.root.codesToIdName(language, script, territory, variant))
 
     def __splitLocale(self, name):
         """Generate (language, script, territory, variant) from a locale name
@@ -171,14 +146,18 @@ class CldrReader (object):
         single tag (i.e. contains no underscores).  Always yields 1 or
         4 values, never 2 or 3."""
         tags = iter(name.split('_'))
-        yield tags.next() # Language
-        tag = tags.next() # may raise StopIteration
+        yield next(tags) # Language
+
+        try:
+            tag = next(tags)
+        except StopIteration:
+            return
 
         # Script is always four letters, always capitalised:
         if len(tag) == 4 and tag[0].isupper() and tag[1:].islower():
             yield tag
             try:
-                tag = tags.next()
+                tag = next(tags)
             except StopIteration:
                 tag = ''
         else:
@@ -188,7 +167,7 @@ class CldrReader (object):
         if tag and tag.isupper() or tag.isdigit():
             yield tag
             try:
-                tag = tags.next()
+                tag = next(tags)
             except StopIteration:
                 tag = ''
         else:
@@ -201,21 +180,22 @@ class CldrReader (object):
         else:
             yield ''
 
-        # If nothing is left, StopIteration will avoid the warning:
-        if not tag:
-            tag = tags.next()
-        self.grumble('Ignoring unparsed cruft {} in {}\n'.format('_'.join(tag + tuple(tags)), name))
+        rest = [tag] if tag else []
+        rest.extend(tags)
 
-    def __getLocaleData(self, scan, calendars, language, script, country, variant):
-        ids, names = zip(*self.root.codesToIdName(language, script, country, variant))
-        assert ids[0] > 0 and ids[2] > 0, (language, script, country, variant)
+        if rest:
+            self.grumble(f'Ignoring unparsed cruft {"_".join(rest)} in {name}\n')
+
+    def __getLocaleData(self, scan, calendars, language, script, territory, variant):
+        ids, names = zip(*self.root.codesToIdName(language, script, territory, variant))
+        assert ids[0] > 0 and ids[2] > 0, (language, script, territory, variant)
         locale = Locale(
             language = names[0], language_code = language, language_id = ids[0],
             script = names[1], script_code = script, script_id = ids[1],
-            country = names[2], country_code = country, country_id = ids[2],
+            territory = names[2], territory_code = territory, territory_id = ids[2],
             variant_code = variant)
 
-        firstDay, weStart, weEnd = self.root.weekData(country)
+        firstDay, weStart, weEnd = self.root.weekData(territory)
         assert all(day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun')
                    for day in (firstDay, weStart, weEnd))
 
@@ -223,7 +203,7 @@ class CldrReader (object):
                       weekendStart = weStart,
                       weekendEnd = weEnd)
 
-        iso, digits, rounding = self.root.currencyData(country)
+        iso, digits, rounding = self.root.currencyData(territory)
         locale.update(currencyIsoCode = iso,
                       currencyDigits = int(digits),
                       currencyRounding = int(rounding))
@@ -231,7 +211,7 @@ class CldrReader (object):
         locale.update(scan.currencyData(iso))
         locale.update(scan.numericData(self.root.numberSystem, self.whitter))
         locale.update(scan.textPatternData())
-        locale.update(scan.endonyms(language, script, country, variant))
+        locale.update(scan.endonyms(language, script, territory, variant))
         locale.update(scan.unitData()) # byte, kB, MB, GB, ..., KiB, MiB, GiB, ...
         locale.update(scan.calendarNames(calendars)) # Names of days and months
 
@@ -242,7 +222,7 @@ class CldrReader (object):
 # the cache. If a process were to instantiate this class with distinct
 # roots, each cache would be filled by the first to need it !
 class CldrAccess (object):
-    def __init__(self, root):
+    def __init__(self, root: Path):
         """Set up a master object for accessing CLDR data.
 
         Single parameter, root, is the file-system path to the root of
@@ -250,18 +230,18 @@ class CldrAccess (object):
         contain dtd/, main/ and supplemental/ sub-directories."""
         self.root = root
 
-    def xml(self, *path):
+    def xml(self, relative_path: str):
         """Load a single XML file and return its root element as an XmlScanner.
 
         The path is interpreted relative to self.root"""
-        return XmlScanner(Node(self.__xml(path)))
+        return XmlScanner(Node(self.__xml(relative_path)))
 
     def supplement(self, name):
         """Loads supplemental data as a Supplement object.
 
         The name should be that of a file in common/supplemental/, without path.
         """
-        return Supplement(Node(self.__xml(('common', 'supplemental', name))))
+        return Supplement(Node(self.__xml(f'common/supplemental/{name}')))
 
     def locale(self, name):
         """Loads all data for a locale as a LocaleScanner object.
@@ -273,17 +253,18 @@ class CldrAccess (object):
         inheritance, where relevant."""
         return LocaleScanner(name, self.__localeRoots(name), self.__rootLocale)
 
+    def englishNaming(self, tag): # see QLocaleXmlWriter.enumData()
+        return self.__codeMap(tag).get
+
     @property
-    def fileLocales(self, joinPath = os.path.join, listDirectory = os.listdir,
-                    splitExtension = os.path.splitext):
+    def fileLocales(self) -> Iterable[str]:
         """Generator for locale IDs seen in file-names.
 
         All *.xml other than root.xml in common/main/ are assumed to
         identify locales."""
-        for name in listDirectory(joinPath(self.root, 'common', 'main')):
-            stem, ext = splitExtension(name)
-            if ext == '.xml' and stem != 'root':
-                yield stem
+        for path in self.root.joinpath('common/main').glob('*.xml'):
+            if path.stem != 'root':
+                yield path.stem
 
     @property
     def defaultContentLocales(self):
@@ -304,44 +285,44 @@ class CldrAccess (object):
     def numberSystem(self, system):
         """Get a description of a numbering system.
 
-        Returns a mapping, with keys u'digits', u'type' and u'id'; the
+        Returns a mapping, with keys 'digits', 'type' and 'id'; the
         value for this last is system. Raises KeyError for unknown
         number system, ldml.Error on failure to load data."""
         try:
             return self.__numberSystems[system]
         except KeyError:
-            raise Error('Unsupported number system: {}'.format(system))
+            raise Error(f'Unsupported number system: {system}')
 
-    def weekData(self, country):
+    def weekData(self, territory):
         """Data on the weekly cycle.
 
         Returns a triple (W, S, E) of en's short names for week-days;
         W is the first day of the week, S the start of the week-end
-        and E the end of the week-end.  Where data for a country is
+        and E the end of the week-end.  Where data for a territory is
         unavailable, the data for CLDR's territory 001 (The World) is
         used."""
         try:
-            return self.__weekData[country]
+            return self.__weekData[territory]
         except KeyError:
             return self.__weekData['001']
 
-    def currencyData(self, country):
-        """Returns currency data for the given country code.
+    def currencyData(self, territory):
+        """Returns currency data for the given territory code.
 
         Return value is a tuple (ISO4217 code, digit count, rounding
-        mode).  If CLDR provides no data for this country, ('', 2, 1)
+        mode).  If CLDR provides no data for this territory, ('', 2, 1)
         is the default result.
         """
         try:
-            return self.__currencyData[country]
+            return self.__currencyData[territory]
         except KeyError:
             return '', 2, 1
 
-    def codesToIdName(self, language, script, country, variant = ''):
+    def codesToIdName(self, language, script, territory, variant = ''):
         """Maps each code to the appropriate ID and name.
 
         Returns a 4-tuple of (ID, name) pairs corresponding to the
-        language, script, country and variant given.  Raises a
+        language, script, territory and variant given.  Raises a
         suitable error if any of them is unknown, indicating all that
         are unknown plus suitable names for any that could sensibly be
         added to enumdata.py to make them known.
@@ -353,33 +334,33 @@ class CldrAccess (object):
         try:
             return (enum('language')[language],
                     enum('script')[script],
-                    enum('country')[country],
+                    enum('territory')[territory],
                     enum('variant')[variant])
         except KeyError:
             pass
 
-        parts, values = [], [language, script, country, variant]
-        for index, key in enumerate(('language', 'script', 'country', 'variant')):
+        parts, values = [], [language, script, territory, variant]
+        for index, key in enumerate(('language', 'script', 'territory', 'variant')):
             naming, enums = self.__codeMap(key), enum(key)
             value = values[index]
             if value not in enums:
-                text = '{} code {}'.format(key, value)
+                text = f'{key} code {value}'
                 name = naming.get(value)
                 if name and value != 'POSIX':
-                    text += u' (could add {})'.format(name)
+                    text += f' (could add {name})'
                 parts.append(text)
         if len(parts) > 1:
             parts[-1] = 'and ' + parts[-1]
-        assert parts
+        else:
+            assert parts
+            if parts[0].startswith('variant'):
+                raise Error(f'No support for {parts[0]}',
+                            language, script, territory, variant)
         raise Error('Unknown ' + ', '.join(parts),
-                    language, script, country, variant)
+                    language, script, territory, variant)
 
     @staticmethod
-    def __checkEnum(given, proper, scraps,
-                    remap = { u'å': 'a', u'ã': 'a', u'ç': 'c', u'é': 'e', u'í': 'i', u'ü': 'u'},
-                    prefix = { 'St.': 'Saint', 'U.S.': 'United States' },
-                    suffixes = ( 'Han', ),
-                    skip = u'\u02bc'):
+    def __checkEnum(given, proper, scraps):
         # Each is a { code: full name } mapping
         for code, name in given.items():
             try: right = proper[code]
@@ -387,35 +368,23 @@ class CldrAccess (object):
                 # No en.xml name for this code, but supplementalData's
                 # parentLocale may still believe in it:
                 if code not in scraps:
-                    yield name, '[Found no CLDR name for code {}]'.format(code)
-                continue
-            if name == right: continue
-            ok = right.replace('&', 'And')
-            for k, v in prefix.items():
-                if ok.startswith(k + ' '):
-                    ok = v + ok[len(k):]
-            while '(' in ok:
-                try: f, t = ok.index('('), ok.index(')')
-                except ValueError: break
-                ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip()
-            if any(name == ok + ' ' + s for s in suffixes):
-                continue
-            if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join(
-                remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip):
+                    yield name, f'[Found no CLDR name for code {code}]'
                 continue
-            yield name, ok
+            cleaned = names_clash(right, name)
+            if cleaned:
+                yield name, cleaned
 
     def checkEnumData(self, grumble):
         scraps = set()
         for k in self.__parentLocale.keys():
             for f in k.split('_'):
                 scraps.add(f)
-        from enumdata import language_list, country_list, script_list
-        language = dict((v, k) for k, v in language_list.values() if not v.isspace())
-        country = dict((v, k) for k, v in country_list.values() if v != 'ZZ')
-        script = dict((v, k) for k, v in script_list.values() if v != 'Zzzz')
+        from enumdata import language_map, territory_map, script_map
+        language = {v: k for k, v in language_map.values() if not v.isspace()}
+        territory = {v: k for k, v in territory_map.values() if v != 'ZZ'}
+        script = {v: k for k, v in script_map.values() if v != 'Zzzz'}
         lang = dict(self.__checkEnum(language, self.__codeMap('language'), scraps))
-        land = dict(self.__checkEnum(country, self.__codeMap('country'), scraps))
+        land = dict(self.__checkEnum(territory, self.__codeMap('territory'), scraps))
         text = dict(self.__checkEnum(script, self.__codeMap('script'), scraps))
         if lang or land or text:
             grumble("""\
@@ -424,15 +393,15 @@ enumdata.py (keeping the old name as an alias):
 """)
             if lang:
                 grumble('Language:\n\t'
-                        + '\n\t'.join('{} -> {}'.format(k, v) for k, v in lang.items())
+                        + '\n\t'.join(f'{k} -> {v}' for k, v in lang.items())
                         + '\n')
             if land:
-                grumble('Country:\n\t'
-                        + '\n\t'.join('{} -> {}'.format(k, v) for k, v in land.items())
+                grumble('Territory:\n\t'
+                        + '\n\t'.join(f'{k} -> {v}' for k, v in land.items())
                         + '\n')
             if text:
                 grumble('Script:\n\t'
-                        + '\n\t'.join('{} -> {}'.format(k, v) for k, v in text.items())
+                        + '\n\t'.join(f'{k} -> {v}' for k, v in text.items())
                         + '\n')
             grumble('\n')
 
@@ -442,7 +411,7 @@ enumdata.py (keeping the old name as an alias):
         MS-Win have their own eccentric names for time-zones.  CLDR
         helpfully provides a translation to more orthodox names.
 
-        Singe argument, lookup, is a mapping from known MS-Win names
+        Single argument, lookup, is a mapping from known MS-Win names
         for locales to a unique integer index (starting at 1).
 
         The XML structure we read has the form:
@@ -460,7 +429,7 @@ enumdata.py (keeping the old name as an alias):
  </supplementalData>
 """
         zones = self.supplement('windowsZones.xml')
-        enum = self.__enumMap('country')
+        enum = self.__enumMap('territory')
         badZones, unLands, defaults, windows = set(), set(), {}, {}
 
         for name, attrs in zones.find('windowsZones/mapTimezones'):
@@ -469,8 +438,8 @@ enumdata.py (keeping the old name as an alias):
 
             wid, code = attrs['other'], attrs['territory']
             data = dict(windowsId = wid,
-                        countryCode = code,
-                        ianaList = attrs['type'])
+                        territoryCode = code,
+                        ianaList = ' '.join(attrs['type'].split()))
 
             try:
                 key = lookup[wid]
@@ -479,7 +448,7 @@ enumdata.py (keeping the old name as an alias):
                 key = 0
             data['windowsKey'] = key
 
-            if code == u'001':
+            if code == '001':
                 defaults[key] = data['ianaList']
             else:
                 try:
@@ -487,11 +456,11 @@ enumdata.py (keeping the old name as an alias):
                 except KeyError:
                     unLands.append(code)
                     continue
-                data.update(countryId = cid, country = name)
+                data.update(territoryId = cid, territory = name)
                 windows[key, cid] = data
 
         if unLands:
-            raise Error('Unknown country codes, please add to enumdata.py: '
+            raise Error('Unknown territory codes, please add to enumdata.py: '
                         + ', '.join(sorted(unLands)))
 
         if badZones:
@@ -507,20 +476,20 @@ enumdata.py (keeping the old name as an alias):
         return self.__cldrVersion
 
     # Implementation details
-    def __xml(self, path, cache = CacheDict(), read = minidom.parse, joinPath = os.path.join):
+    def __xml(self, relative_path: str, cache = CacheDict(), read = minidom.parse):
         try:
-            doc = cache[path]
+            doc = cache[relative_path]
         except KeyError:
-            cache[path] = doc = read(joinPath(self.root, *path)).documentElement
+            cache[relative_path] = doc = read(str(self.root.joinpath(relative_path))).documentElement
         return doc
 
-    def __open(self, path, joinPath=os.path.join):
-        return open(joinPath(self.root, *path))
+    def __open(self, relative_path: str) -> TextIO:
+        return self.root.joinpath(relative_path).open()
 
     @property
     def __rootLocale(self, cache = []):
         if not cache:
-            cache.append(self.xml('common', 'main', 'root.xml'))
+            cache.append(self.xml('common/main/root.xml'))
         return cache[0]
 
     @property
@@ -530,7 +499,7 @@ enumdata.py (keeping the old name as an alias):
         return cache[0]
 
     @property
-    def __numberSystems(self, cache = {}, joinPath=os.path.join):
+    def __numberSystems(self, cache = {}):
         if not cache:
             for ignore, attrs in self.supplement('numberingSystems.xml').find('numberingSystems'):
                 cache[attrs['id']] = attrs
@@ -563,7 +532,7 @@ enumdata.py (keeping the old name as an alias):
         source = self.__supplementalData
         for key in ('firstDay', 'weekendStart', 'weekendEnd'):
             result = {}
-            for ignore, attrs in source.find('weekData/' + key):
+            for ignore, attrs in source.find(f'weekData/{key}'):
                 assert ignore == key
                 day = attrs['day']
                 assert day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'), day
@@ -580,7 +549,7 @@ enumdata.py (keeping the old name as an alias):
             for elt in source.findNodes('currencyData/region'):
                 iso, digits, rounding = '', 2, 1
                 try:
-                    country = elt.dom.attributes['iso3166'].nodeValue
+                    territory = elt.dom.attributes['iso3166'].nodeValue
                 except KeyError:
                     continue
                 for child in elt.findAllChildren('currency'):
@@ -596,16 +565,16 @@ enumdata.py (keeping the old name as an alias):
                         break
                 if iso:
                     for tag, data in source.find(
-                        'currencyData/fractions/info[iso4217={}]'.format(iso)):
+                        f'currencyData/fractions/info[iso4217={iso}]'):
                         digits = data['digits']
                         rounding = data['rounding']
-                cache[country] = iso, digits, rounding
+                cache[territory] = iso, digits, rounding
             assert cache
 
         return cache
 
     @property
-    def __unDistinguishedAttributes(self, cache = {}, joinPath = os.path.join):
+    def __unDistinguishedAttributes(self, cache = {}):
         """Mapping from tag names to lists of attributes.
 
         LDML defines some attributes as 'distinguishing': if a node
@@ -625,7 +594,7 @@ enumdata.py (keeping the old name as an alias):
 
         return cache
 
-    def __scanLdmlDtd(self, joinPath = os.path.join):
+    def __scanLdmlDtd(self):
         """Scan the LDML DTD, record CLDR version
 
         Yields (tag, attrs) pairs: on elements with a given tag,
@@ -635,7 +604,7 @@ enumdata.py (keeping the old name as an alias):
 
         Sets self.__cldrVersion as a side-effect, since this
         information is found in the same file."""
-        with self.__open(('common', 'dtd', 'ldml.dtd')) as dtd:
+        with self.__open('common/dtd/ldml.dtd') as dtd:
             tag, ignored, last = None, None, None
 
             for line in dtd:
@@ -670,15 +639,15 @@ enumdata.py (keeping the old name as an alias):
     def __enumMap(self, key, cache = {}):
         if not cache:
             cache['variant'] = {'': (0, 'This should never be seen outside ldml.py')}
-            # They're not actually lists: mappings from numeric value
-            # to pairs of full name and short code. What we want, in
-            # each case, is a mapping from code to the other two.
-            from enumdata import language_list, script_list, country_list
-            for form, book, empty in (('language', language_list, 'AnyLanguage'),
-                                      ('script', script_list, 'AnyScript'),
-                                      ('country', country_list, 'AnyTerritory')):
-                cache[form] = dict((pair[1], (num, pair[0]))
-                                   for num, pair in book.items() if pair[0] != 'C')
+            # They're mappings from numeric value to pairs of full
+            # name and short code. What we want, in each case, is a
+            # mapping from code to the other two.
+            from enumdata import language_map, script_map, territory_map
+            for form, book, empty in (('language', language_map, 'AnyLanguage'),
+                                      ('script', script_map, 'AnyScript'),
+                                      ('territory', territory_map, 'AnyTerritory')):
+                cache[form] = {pair[1]: (num, pair[0])
+                               for num, pair in book.items() if pair[0] != 'C'}
                 # (Have to filter out the C locale, as we give it the
                 # same (all space) code as AnyLanguage, whose code
                 # should probably be 'und' instead.)
@@ -693,9 +662,9 @@ enumdata.py (keeping the old name as an alias):
     def __codeMap(self, key, cache = {},
                   # Maps our name for it to CLDR's name:
                   naming = {'language': 'languages', 'script': 'scripts',
-                            'country': 'territories', 'variant': 'variants'}):
+                            'territory': 'territories', 'variant': 'variants'}):
         if not cache:
-            root = self.xml('common', 'main', 'en.xml').root.findUniqueChild('localeDisplayNames')
+            root = self.xml('common/main/en.xml').root.findUniqueChild('localeDisplayNames')
             for dst, src in naming.items():
                 cache[dst] = dict(self.__codeMapScan(root.findUniqueChild(src)))
             assert cache
@@ -721,7 +690,13 @@ enumdata.py (keeping the old name as an alias):
             except (KeyError, ValueError, TypeError):
                 pass
             else:
-                if key not in seen or not elt.attributes.has_key('alt'):
+                # Prefer stand-alone forms of names when present, ignore other
+                # alt="..." entries. For example, Traditional and Simplified
+                # Han omit "Han" in the plain form, but include it for
+                # stand-alone. As the stand-alone version appears later, it
+                # over-writes the plain one.
+                if (key not in seen or 'alt' not in elt.attributes
+                    or elt.attributes['alt'].nodeValue == 'stand-alone'):
                     yield key, value
                     seen.add(key)
 
@@ -730,7 +705,8 @@ enumdata.py (keeping the old name as an alias):
     def __parentLocale(self, cache = {}):
         # see http://www.unicode.org/reports/tr35/#Parent_Locales
         if not cache:
-            for tag, attrs in self.__supplementalData.find('parentLocales'):
+            for tag, attrs in self.__supplementalData.find('parentLocales',
+                                                           ('component',)):
                 parent = attrs.get('parent', '')
                 for child in attrs['locales'].split():
                     cache[child] = parent
@@ -738,10 +714,9 @@ enumdata.py (keeping the old name as an alias):
 
         return cache
 
-    def __localeAsDoc(self, name, aliasFor = None,
-                      joinPath = os.path.join, exists = os.path.isfile):
-        path = ('common', 'main', name + '.xml')
-        if exists(joinPath(self.root, *path)):
+    def __localeAsDoc(self, name: str, aliasFor = None):
+        path = f'common/main/{name}.xml'
+        if self.root.joinpath(path).exists():
             elt = self.__xml(path)
             for child in Node(elt).findAllChildren('alias'):
                 try:
@@ -754,8 +729,8 @@ enumdata.py (keeping the old name as an alias):
             return elt
 
         if aliasFor:
-            raise Error('Fatal error: found an alias "{}" -> "{}", but found no file for the alias'
-                        .format(aliasFor, name))
+            raise Error(f'Fatal error: found an alias "{aliasFor}" -> "{name}", '
+                        'but found no file for the alias')
 
     def __scanLocaleRoots(self, name):
         while name and name != 'root':
@@ -780,4 +755,4 @@ enumdata.py (keeping the old name as an alias):
         return chain
 
 # Unpolute the namespace: we don't need to export these.
-del minidom, CacheDict, os
+del minidom, CacheDict
diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py
index 20dda77965..d3aa88ec38 100755
--- a/util/locale_database/cldr2qlocalexml.py
+++ b/util/locale_database/cldr2qlocalexml.py
@@ -1,48 +1,27 @@
-#!/usr/bin/env python2
-# coding=utf8
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
-"""Convert CLDR data to qLocaleXML
-
-The CLDR data can be downloaded from CLDR_, which has a sub-directory
-for each version; you need the ``core.zip`` file for your version of
-choice (typically the latest). This script has had updates to cope up
-to v38.1; for later versions, we may need adaptations. Unpack the
-downloaded ``core.zip`` and check it has a common/main/ sub-directory:
-pass the path of that root of the download to this script as its first
-command-line argument. Pass the name of the file in which to write
-output as the second argument; either omit it or use '-' to select the
-standard output. This file is the input needed by
-``./qlocalexml2cpp.py``
+#!/usr/bin/env python3
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+"""Convert CLDR data to QLocaleXML
+
+The CLDR data can be downloaded as a zip-file from CLDR_, which has a
+sub-directory for each version; you need the ``core.zip`` file for
+your version of choice (typically the latest), which you should then
+unpack. Alternatively, you can clone the git repo from github_, which
+has a tag for each release and a maint/maint-$ver branch for each
+major version. Either way, the CLDR top-level directory should have a
+subdirectory called common/ which contains (among other things)
+subdirectories main/ and supplemental/.
+
+This script has had updates to cope up to v44.1; for later versions,
+we may need adaptations. Pass the path of the CLDR top-level directory
+to this script as its first command-line argument. Pass the name of
+the file in which to write output as the second argument; either omit
+it or use '-' to select the standard output. This file is the input
+needed by ``./qlocalexml2cpp.py``
 
 When you update the CLDR data, be sure to also update
 src/corelib/text/qt_attribution.json's entry for unicode-cldr. Check
-this script's output for unknown language, country or script messages;
+this script's output for unknown language, territory or script messages;
 if any can be resolved, use their entry in common/main/en.xml to
 append new entries to enumdata.py's lists and update documentation in
 src/corelib/text/qlocale.qdoc, adding the new entries in alphabetic
@@ -51,71 +30,73 @@ order.
 While updating the locale data, check also for updates to MS-Win's
 time zone names; see cldr2qtimezone.py for details.
 
-.. _CLDR: ftp://unicode.org/Public/cldr/
+All the scripts mentioned support --help to tell you how to use them.
+
+.. _CLDR: https://unicode.org/Public/cldr/
+.. _github: https://github.com/unicode-org/cldr
 """
 
-import os
-import sys
+from pathlib import Path
+import argparse
 
 from cldr import CldrReader
 from qlocalexml import QLocaleXmlWriter
-from enumdata import language_list, script_list, country_list
-
-def usage(name, err, message = ''):
-    err.write("""Usage: {} path/to/cldr/common/main [out-file.xml]
-""".format(name)) # TODO: expand command-line, improve help message
-    if message:
-        err.write('\n' + message + '\n')
-
-def main(args, out, err):
-    # TODO: make calendars a command-line option
-    calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew'
-
-    # TODO: make argument parsing more sophisticated
-    name = args.pop(0)
-    if not args:
-        usage(name, err, 'Where is your CLDR data tree ?')
-        return 1
-
-    root = args.pop(0)
-    if not os.path.exists(os.path.join(root, 'common', 'main', 'root.xml')):
-        usage(name, err,
-              'First argument is the root of the CLDR tree: found no common/main/root.xml under '
-              + root)
-        return 1
-
-    xml = args.pop(0) if args else None
+
+
+def main(argv, out, err):
+    """Generate a QLocaleXML file from CLDR data.
+
+    Takes sys.argv, sys.stdout, sys.stderr (or equivalents) as
+    arguments. In argv[1:], it expects the root of the CLDR data
+    directory as first parameter and the name of the file in which to
+    save QLocaleXML data as second parameter. It accepts a --calendars
+    option to select which calendars to support (all available by
+    default)."""
+    all_calendars = ['gregorian', 'persian', 'islamic']
+
+    parser = argparse.ArgumentParser(
+        prog=Path(argv[0]).name,
+        description='Generate QLocaleXML from CLDR data.',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('cldr_path', help='path to the root of the CLDR tree')
+    parser.add_argument('out_file', help='output XML file name',
+                        nargs='?', metavar='out-file.xml')
+    parser.add_argument('--calendars', help='select calendars to emit data for',
+                        nargs='+', metavar='CALENDAR',
+                        choices=all_calendars, default=all_calendars)
+
+    args = parser.parse_args(argv[1:])
+
+    root = Path(args.cldr_path)
+    root_xml_path = 'common/main/root.xml'
+
+    if not root.joinpath(root_xml_path).exists():
+        parser.error('First argument is the root of the CLDR tree: '
+                     f'found no {root_xml_path} under {root}')
+
+    xml = args.out_file
     if not xml or xml == '-':
         emit = out
     elif not xml.endswith('.xml'):
-        usage(name, err, 'Please use a .xml extension on your output file name, not ' + xml)
-        return 1
+        parser.error(f'Please use a .xml extension on your output file name, not {xml}')
     else:
         try:
             emit = open(xml, 'w')
         except IOError as e:
-            usage(name, err, 'Failed to open "{}" to write output to it\n'.format(xml))
-            return 1
-
-    if args:
-        usage(name, err, 'Too many arguments - excess: ' + ' '.join(args))
-        return 1
-
-    if emit.encoding != 'UTF-8' or (emit.encoding is None and sys.getdefaultencoding() != 'UTF-8'):
-        reload(sys) # Weirdly, this gets a richer sys module than the plain import got us !
-        sys.setdefaultencoding('UTF-8')
+            parser.error(f'Failed to open "{xml}" to write output to it')
 
     # TODO - command line options to tune choice of grumble and whitter:
     reader = CldrReader(root, err.write, err.write)
     writer = QLocaleXmlWriter(emit.write)
 
     writer.version(reader.root.cldrVersion)
-    writer.enumData(language_list, script_list, country_list)
+    writer.enumData(reader.root.englishNaming)
     writer.likelySubTags(reader.likelySubTags())
-    writer.locales(reader.readLocales(calendars), calendars)
+    writer.locales(reader.readLocales(args.calendars), args.calendars)
 
-    writer.close()
+    writer.close(err.write)
     return 0
 
 if __name__ == '__main__':
+    import sys
     sys.exit(main(sys.argv, sys.stdout, sys.stderr))
diff --git a/util/locale_database/cldr2qtimezone.py b/util/locale_database/cldr2qtimezone.py
index 7c06fe8561..27987d5a58 100755
--- a/util/locale_database/cldr2qtimezone.py
+++ b/util/locale_database/cldr2qtimezone.py
@@ -1,46 +1,23 @@
-#!/usr/bin/env python2
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
+#!/usr/bin/env python3
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
 """Parse CLDR data for QTimeZone use with MS-Windows
 
 Script to parse the CLDR common/supplemental/windowsZones.xml file and
-encode for use in QTimeZone.  See ``./cldr2qlocalexml.py`` for where
-to get the CLDR data.  Pass its root directory as first parameter to
-this script and the qtbase root directory as second parameter.  It
-shall update qtbase's src/corelib/time/qtimezoneprivate_data_p.h ready
-for use.
+prepare its data for use in QTimeZone.  See ``./cldr2qlocalexml.py`` for
+where to get the CLDR data.  Pass its root directory as first parameter
+to this script.  You can optionally pass the qtbase root directory as
+second parameter; it defaults to the root of the checkout containing
+this script.  This script updates qtbase's
+src/corelib/time/qtimezoneprivate_data_p.h with the new data.
 """
 
-import os
 import datetime
+from pathlib import Path
 import textwrap
+import argparse
 
-from localetools import unicode2hex, wrap_list, Error, SourceFileEditor
+from localetools import unicode2hex, wrap_list, Error, SourceFileEditor, qtbase_root
 from cldr import CldrAccess
 
 ### Data that may need updates in response to new entries in the CLDR file ###
@@ -54,194 +31,208 @@ from cldr import CldrAccess
 # Not public so may safely be changed.  Please keep in alphabetic order by ID.
 # ( Windows Id, Offset Seconds )
 windowsIdList = (
-    (u'Afghanistan Standard Time',        16200),
-    (u'Alaskan Standard Time',           -32400),
-    (u'Aleutian Standard Time',          -36000),
-    (u'Altai Standard Time',              25200),
-    (u'Arab Standard Time',               10800),
-    (u'Arabian Standard Time',            14400),
-    (u'Arabic Standard Time',             10800),
-    (u'Argentina Standard Time',         -10800),
-    (u'Astrakhan Standard Time',          14400),
-    (u'Atlantic Standard Time',          -14400),
-    (u'AUS Central Standard Time',        34200),
-    (u'Aus Central W. Standard Time',     31500),
-    (u'AUS Eastern Standard Time',        36000),
-    (u'Azerbaijan Standard Time',         14400),
-    (u'Azores Standard Time',             -3600),
-    (u'Bahia Standard Time',             -10800),
-    (u'Bangladesh Standard Time',         21600),
-    (u'Belarus Standard Time',            10800),
-    (u'Bougainville Standard Time',       39600),
-    (u'Canada Central Standard Time',    -21600),
-    (u'Cape Verde Standard Time',         -3600),
-    (u'Caucasus Standard Time',           14400),
-    (u'Cen. Australia Standard Time',     34200),
-    (u'Central America Standard Time',   -21600),
-    (u'Central Asia Standard Time',       21600),
-    (u'Central Brazilian Standard Time', -14400),
-    (u'Central Europe Standard Time',      3600),
-    (u'Central European Standard Time',    3600),
-    (u'Central Pacific Standard Time',    39600),
-    (u'Central Standard Time (Mexico)',  -21600),
-    (u'Central Standard Time',           -21600),
-    (u'China Standard Time',              28800),
-    (u'Chatham Islands Standard Time',    45900),
-    (u'Cuba Standard Time',              -18000),
-    (u'Dateline Standard Time',          -43200),
-    (u'E. Africa Standard Time',          10800),
-    (u'E. Australia Standard Time',       36000),
-    (u'E. Europe Standard Time',           7200),
-    (u'E. South America Standard Time',  -10800),
-    (u'Easter Island Standard Time',     -21600),
-    (u'Eastern Standard Time',           -18000),
-    (u'Eastern Standard Time (Mexico)',  -18000),
-    (u'Egypt Standard Time',               7200),
-    (u'Ekaterinburg Standard Time',       18000),
-    (u'Fiji Standard Time',               43200),
-    (u'FLE Standard Time',                 7200),
-    (u'Georgian Standard Time',           14400),
-    (u'GMT Standard Time',                    0),
-    (u'Greenland Standard Time',         -10800),
-    (u'Greenwich Standard Time',              0),
-    (u'GTB Standard Time',                 7200),
-    (u'Haiti Standard Time',             -18000),
-    (u'Hawaiian Standard Time',          -36000),
-    (u'India Standard Time',              19800),
-    (u'Iran Standard Time',               12600),
-    (u'Israel Standard Time',              7200),
-    (u'Jordan Standard Time',              7200),
-    (u'Kaliningrad Standard Time',         7200),
-    (u'Korea Standard Time',              32400),
-    (u'Libya Standard Time',               7200),
-    (u'Line Islands Standard Time',       50400),
-    (u'Lord Howe Standard Time',          37800),
-    (u'Magadan Standard Time',            36000),
-    (u'Magallanes Standard Time',        -10800), # permanent DST
-    (u'Marquesas Standard Time',         -34200),
-    (u'Mauritius Standard Time',          14400),
-    (u'Middle East Standard Time',         7200),
-    (u'Montevideo Standard Time',        -10800),
-    (u'Morocco Standard Time',                0),
-    (u'Mountain Standard Time (Mexico)', -25200),
-    (u'Mountain Standard Time',          -25200),
-    (u'Myanmar Standard Time',            23400),
-    (u'N. Central Asia Standard Time',    21600),
-    (u'Namibia Standard Time',             3600),
-    (u'Nepal Standard Time',              20700),
-    (u'New Zealand Standard Time',        43200),
-    (u'Newfoundland Standard Time',      -12600),
-    (u'Norfolk Standard Time',            39600),
-    (u'North Asia East Standard Time',    28800),
-    (u'North Asia Standard Time',         25200),
-    (u'North Korea Standard Time',        30600),
-    (u'Omsk Standard Time',               21600),
-    (u'Pacific SA Standard Time',        -10800),
-    (u'Pacific Standard Time',           -28800),
-    (u'Pacific Standard Time (Mexico)',  -28800),
-    (u'Pakistan Standard Time',           18000),
-    (u'Paraguay Standard Time',          -14400),
-    (u'Qyzylorda Standard Time',          18000), # a.k.a. Kyzylorda, in Kazakhstan
-    (u'Romance Standard Time',             3600),
-    (u'Russia Time Zone 3',               14400),
-    (u'Russia Time Zone 10',              39600),
-    (u'Russia Time Zone 11',              43200),
-    (u'Russian Standard Time',            10800),
-    (u'SA Eastern Standard Time',        -10800),
-    (u'SA Pacific Standard Time',        -18000),
-    (u'SA Western Standard Time',        -14400),
-    (u'Saint Pierre Standard Time',      -10800), # New France
-    (u'Sakhalin Standard Time',           39600),
-    (u'Samoa Standard Time',              46800),
-    (u'Sao Tome Standard Time',               0),
-    (u'Saratov Standard Time',            14400),
-    (u'SE Asia Standard Time',            25200),
-    (u'Singapore Standard Time',          28800),
-    (u'South Africa Standard Time',        7200),
-    (u'Sri Lanka Standard Time',          19800),
-    (u'Sudan Standard Time',               7200), # unless they mean South Sudan, +03:00
-    (u'Syria Standard Time',               7200),
-    (u'Taipei Standard Time',             28800),
-    (u'Tasmania Standard Time',           36000),
-    (u'Tocantins Standard Time',         -10800),
-    (u'Tokyo Standard Time',              32400),
-    (u'Tomsk Standard Time',              25200),
-    (u'Tonga Standard Time',              46800),
-    (u'Transbaikal Standard Time',        32400), # Yakutsk
-    (u'Turkey Standard Time',              7200),
-    (u'Turks And Caicos Standard Time',  -14400),
-    (u'Ulaanbaatar Standard Time',        28800),
-    (u'US Eastern Standard Time',        -18000),
-    (u'US Mountain Standard Time',       -25200),
-    (u'UTC-11',                          -39600),
-    (u'UTC-09',                          -32400),
-    (u'UTC-08',                          -28800),
-    (u'UTC-02',                           -7200),
-    (u'UTC',                                  0),
-    (u'UTC+12',                           43200),
-    (u'UTC+13',                           46800),
-    (u'Venezuela Standard Time',         -16200),
-    (u'Vladivostok Standard Time',        36000),
-    (u'Volgograd Standard Time',          14400),
-    (u'W. Australia Standard Time',       28800),
-    (u'W. Central Africa Standard Time',   3600),
-    (u'W. Europe Standard Time',           3600),
-    (u'W. Mongolia Standard Time',        25200), # Hovd
-    (u'West Asia Standard Time',          18000),
-    (u'West Bank Standard Time',           7200),
-    (u'West Pacific Standard Time',       36000),
-    (u'Yakutsk Standard Time',            32400),
-    (u'Yukon Standard Time',             -25200), # Non-DST Mountain Standard Time since 2020-11-01
+    ('Afghanistan Standard Time',        16200),
+    ('Alaskan Standard Time',           -32400),
+    ('Aleutian Standard Time',          -36000),
+    ('Altai Standard Time',              25200),
+    ('Arab Standard Time',               10800),
+    ('Arabian Standard Time',            14400),
+    ('Arabic Standard Time',             10800),
+    ('Argentina Standard Time',         -10800),
+    ('Astrakhan Standard Time',          14400),
+    ('Atlantic Standard Time',          -14400),
+    ('AUS Central Standard Time',        34200),
+    ('Aus Central W. Standard Time',     31500),
+    ('AUS Eastern Standard Time',        36000),
+    ('Azerbaijan Standard Time',         14400),
+    ('Azores Standard Time',             -3600),
+    ('Bahia Standard Time',             -10800),
+    ('Bangladesh Standard Time',         21600),
+    ('Belarus Standard Time',            10800),
+    ('Bougainville Standard Time',       39600),
+    ('Canada Central Standard Time',    -21600),
+    ('Cape Verde Standard Time',         -3600),
+    ('Caucasus Standard Time',           14400),
+    ('Cen. Australia Standard Time',     34200),
+    ('Central America Standard Time',   -21600),
+    ('Central Asia Standard Time',       21600),
+    ('Central Brazilian Standard Time', -14400),
+    ('Central Europe Standard Time',      3600),
+    ('Central European Standard Time',    3600),
+    ('Central Pacific Standard Time',    39600),
+    ('Central Standard Time',           -21600),
+    ('Central Standard Time (Mexico)',  -21600),
+    ('Chatham Islands Standard Time',    45900),
+    ('China Standard Time',              28800),
+    ('Cuba Standard Time',              -18000),
+    ('Dateline Standard Time',          -43200),
+    ('E. Africa Standard Time',          10800),
+    ('E. Australia Standard Time',       36000),
+    ('E. Europe Standard Time',           7200),
+    ('E. South America Standard Time',  -10800),
+    ('Easter Island Standard Time',     -21600),
+    ('Eastern Standard Time',           -18000),
+    ('Eastern Standard Time (Mexico)',  -18000),
+    ('Egypt Standard Time',               7200),
+    ('Ekaterinburg Standard Time',       18000),
+    ('Fiji Standard Time',               43200),
+    ('FLE Standard Time',                 7200),
+    ('Georgian Standard Time',           14400),
+    ('GMT Standard Time',                    0),
+    ('Greenland Standard Time',         -10800),
+    ('Greenwich Standard Time',              0),
+    ('GTB Standard Time',                 7200),
+    ('Haiti Standard Time',             -18000),
+    ('Hawaiian Standard Time',          -36000),
+    ('India Standard Time',              19800),
+    ('Iran Standard Time',               12600),
+    ('Israel Standard Time',              7200),
+    ('Jordan Standard Time',              7200),
+    ('Kaliningrad Standard Time',         7200),
+    ('Korea Standard Time',              32400),
+    ('Libya Standard Time',               7200),
+    ('Line Islands Standard Time',       50400),
+    ('Lord Howe Standard Time',          37800),
+    ('Magadan Standard Time',            36000),
+    ('Magallanes Standard Time',        -10800), # permanent DST
+    ('Marquesas Standard Time',         -34200),
+    ('Mauritius Standard Time',          14400),
+    ('Middle East Standard Time',         7200),
+    ('Montevideo Standard Time',        -10800),
+    ('Morocco Standard Time',                0),
+    ('Mountain Standard Time',          -25200),
+    ('Mountain Standard Time (Mexico)', -25200),
+    ('Myanmar Standard Time',            23400),
+    ('N. Central Asia Standard Time',    21600),
+    ('Namibia Standard Time',             3600),
+    ('Nepal Standard Time',              20700),
+    ('New Zealand Standard Time',        43200),
+    ('Newfoundland Standard Time',      -12600),
+    ('Norfolk Standard Time',            39600),
+    ('North Asia East Standard Time',    28800),
+    ('North Asia Standard Time',         25200),
+    ('North Korea Standard Time',        30600),
+    ('Omsk Standard Time',               21600),
+    ('Pacific SA Standard Time',        -10800),
+    ('Pacific Standard Time',           -28800),
+    ('Pacific Standard Time (Mexico)',  -28800),
+    ('Pakistan Standard Time',           18000),
+    ('Paraguay Standard Time',          -14400),
+    ('Qyzylorda Standard Time',          18000), # a.k.a. Kyzylorda, in Kazakhstan
+    ('Romance Standard Time',             3600),
+    ('Russia Time Zone 10',              39600),
+    ('Russia Time Zone 11',              43200),
+    ('Russia Time Zone 3',               14400),
+    ('Russian Standard Time',            10800),
+    ('SA Eastern Standard Time',        -10800),
+    ('SA Pacific Standard Time',        -18000),
+    ('SA Western Standard Time',        -14400),
+    ('Saint Pierre Standard Time',      -10800), # New France
+    ('Sakhalin Standard Time',           39600),
+    ('Samoa Standard Time',              46800),
+    ('Sao Tome Standard Time',               0),
+    ('Saratov Standard Time',            14400),
+    ('SE Asia Standard Time',            25200),
+    ('Singapore Standard Time',          28800),
+    ('South Africa Standard Time',        7200),
+    ('South Sudan Standard Time',         7200),
+    ('Sri Lanka Standard Time',          19800),
+    ('Sudan Standard Time',               7200), # unless they mean South Sudan, +03:00
+    ('Syria Standard Time',               7200),
+    ('Taipei Standard Time',             28800),
+    ('Tasmania Standard Time',           36000),
+    ('Tocantins Standard Time',         -10800),
+    ('Tokyo Standard Time',              32400),
+    ('Tomsk Standard Time',              25200),
+    ('Tonga Standard Time',              46800),
+    ('Transbaikal Standard Time',        32400), # Yakutsk
+    ('Turkey Standard Time',              7200),
+    ('Turks And Caicos Standard Time',  -14400),
+    ('Ulaanbaatar Standard Time',        28800),
+    ('US Eastern Standard Time',        -18000),
+    ('US Mountain Standard Time',       -25200),
+    ('UTC',                                  0),
+    # Lexical order: '+' < '-'
+    ('UTC+12',                           43200),
+    ('UTC+13',                           46800),
+    ('UTC-02',                           -7200),
+    ('UTC-08',                          -28800),
+    ('UTC-09',                          -32400),
+    ('UTC-11',                          -39600),
+    ('Venezuela Standard Time',         -16200),
+    ('Vladivostok Standard Time',        36000),
+    ('Volgograd Standard Time',          14400),
+    ('W. Australia Standard Time',       28800),
+    ('W. Central Africa Standard Time',   3600),
+    ('W. Europe Standard Time',           3600),
+    ('W. Mongolia Standard Time',        25200), # Hovd
+    ('West Asia Standard Time',          18000),
+    ('West Bank Standard Time',           7200),
+    ('West Pacific Standard Time',       36000),
+    ('Yakutsk Standard Time',            32400),
+    ('Yukon Standard Time',             -25200), # Non-DST Mountain Standard Time since 2020-11-01
 )
 
 # List of standard UTC IDs to use.  Not public so may be safely changed.
 # Do not remove IDs, as each entry is part of the API/behavior guarantee.
+# IDs for the same offset shall be space-joined; list the preferred ID first.
 # ( UTC Id, Offset Seconds )
 utcIdList = (
-    (u'UTC',            0),  # Goes first so is default
-    (u'UTC-14:00', -50400),
-    (u'UTC-13:00', -46800),
-    (u'UTC-12:00', -43200),
-    (u'UTC-11:00', -39600),
-    (u'UTC-10:00', -36000),
-    (u'UTC-09:00', -32400),
-    (u'UTC-08:00', -28800),
-    (u'UTC-07:00', -25200),
-    (u'UTC-06:00', -21600),
-    (u'UTC-05:00', -18000),
-    (u'UTC-04:30', -16200),
-    (u'UTC-04:00', -14400),
-    (u'UTC-03:30', -12600),
-    (u'UTC-03:00', -10800),
-    (u'UTC-02:00',  -7200),
-    (u'UTC-01:00',  -3600),
-    (u'UTC-00:00',      0),
-    (u'UTC+00:00',      0),
-    (u'UTC+01:00',   3600),
-    (u'UTC+02:00',   7200),
-    (u'UTC+03:00',  10800),
-    (u'UTC+03:30',  12600),
-    (u'UTC+04:00',  14400),
-    (u'UTC+04:30',  16200),
-    (u'UTC+05:00',  18000),
-    (u'UTC+05:30',  19800),
-    (u'UTC+05:45',  20700),
-    (u'UTC+06:00',  21600),
-    (u'UTC+06:30',  23400),
-    (u'UTC+07:00',  25200),
-    (u'UTC+08:00',  28800),
-    (u'UTC+08:30',  30600),
-    (u'UTC+09:00',  32400),
-    (u'UTC+09:30',  34200),
-    (u'UTC+10:00',  36000),
-    (u'UTC+11:00',  39600),
-    (u'UTC+12:00',  43200),
-    (u'UTC+13:00',  46800),
-    (u'UTC+14:00',  50400),
+    ('UTC-14:00', -50400),
+    ('UTC-13:00', -46800),
+    ('UTC-12:00', -43200),
+    ('UTC-11:00', -39600),
+    ('UTC-10:00', -36000),
+    ('UTC-09:00', -32400),
+    ('UTC-08:00', -28800),
+    ('UTC-07:00', -25200),
+    ('UTC-06:00', -21600),
+    ('UTC-05:00', -18000),
+    ('UTC-04:30', -16200),
+    ('UTC-04:00', -14400),
+    ('UTC-03:30', -12600),
+    ('UTC-03:00', -10800),
+    ('UTC-02:00',  -7200),
+    ('UTC-01:00',  -3600),
+    ('UTC',            0), # Goes first (among zero-offset) to be default
+    ('UTC+00:00',      0),
+    ('UTC-00:00',      0), # Should recognize, but avoid using (see Note below).
+    ('UTC+01:00',   3600),
+    ('UTC+02:00',   7200),
+    ('UTC+03:00',  10800),
+    ('UTC+03:30',  12600),
+    ('UTC+04:00',  14400),
+    ('UTC+04:30',  16200),
+    ('UTC+05:00',  18000),
+    ('UTC+05:30',  19800),
+    ('UTC+05:45',  20700),
+    ('UTC+06:00',  21600),
+    ('UTC+06:30',  23400),
+    ('UTC+07:00',  25200),
+    ('UTC+08:00',  28800),
+    ('UTC+08:30',  30600),
+    ('UTC+09:00',  32400),
+    ('UTC+09:30',  34200),
+    ('UTC+10:00',  36000),
+    ('UTC+11:00',  39600),
+    ('UTC+12:00',  43200),
+    ('UTC+13:00',  46800),
+    ('UTC+14:00',  50400),
 )
 
 ### End of data that may need updates in response to CLDR ###
 
+# Note: -00:00 (without the UTC prefix) was introduced in RFC3339 as a
+# way to indicate that a date-time has been converted to UTC but its
+# use should not be understood to say anything about the local time of
+# the origin of the message using it. However, ISO 8601 has, since
+# 2000, forbidden this as an offset suffix. The more recent compromise
+# is to use Z to convey the meaning RFC3339 gave to -00:00. So the use
+# of -00:00 as offset suffix should be avoided (and, by extension,
+# likewise for UTC-00:00 as a zone ID), but this suffix (and ID)
+# should be recognized when consuming data generated by other sources,
+# for backwards compatibility.
+
 class ByteArrayData:
     def __init__(self):
         self.data = []
@@ -255,17 +246,19 @@ class ByteArrayData:
         lst = unicode2hex(s)
         index = len(self.data)
         if index > 0xffff:
-            raise Error('Index ({}) outside the uint16 range !'.format(index))
+            raise Error(f'Index ({index}) outside the uint16 range !')
         self.hash[s] = index
         self.data += lst
         return index
 
     def write(self, out, name):
-        out('\nstatic const char {}[] = {{\n'.format(name))
-        out(wrap_list(self.data))
+        out(f'\nstatic constexpr char {name}[] = {{\n')
+        out(wrap_list(self.data, 16)) # 16 == 100 // len('0xhh, ')
+        # Will over-spill 100-col if some 4-digit hex show up, but none do (yet).
         out('\n};\n')
 
 class ZoneIdWriter (SourceFileEditor):
+    # All the output goes into namespace QtTimeZoneCldr.
     def write(self, version, defaults, windowsIds):
         self.__writeWarning(version)
         windows, iana = self.__writeTables(self.writer.write, defaults, windowsIds)
@@ -273,10 +266,10 @@ class ZoneIdWriter (SourceFileEditor):
         iana.write(self.writer.write, 'ianaIdData')
 
     def __writeWarning(self, version):
-        self.writer.write("""
+        self.writer.write(f"""
 /*
-    This part of the file was generated on {} from the
-    Common Locale Data Repository v{} file supplemental/windowsZones.xml
+    This part of the file was generated on {datetime.date.today()} from the
+    Common Locale Data Repository v{version} file supplemental/windowsZones.xml
 
     http://www.unicode.org/cldr/
 
@@ -284,111 +277,110 @@ class ZoneIdWriter (SourceFileEditor):
     edited) CLDR data; see qtbase/util/locale_database/.
 */
 
-""".format(str(datetime.date.today()), version))
+""")
 
     @staticmethod
     def __writeTables(out, defaults, windowsIds):
         windowsIdData, ianaIdData = ByteArrayData(), ByteArrayData()
 
         # Write Windows/IANA table
-        out('// Windows ID Key, Country Enum, IANA ID Index\n')
-        out('static const QZoneData zoneDataTable[] = {\n')
+        out('// Windows ID Key, Territory Enum, IANA ID Index\n')
+        out('static constexpr ZoneData zoneDataTable[] = {\n')
+        # Sorted by (Windows ID Key, territory enum)
         for index, data in sorted(windowsIds.items()):
             out('    {{ {:6d},{:6d},{:6d} }}, // {} / {}\n'.format(
-                    data['windowsKey'], data['countryId'],
+                    data['windowsKey'], data['territoryId'],
                     ianaIdData.append(data['ianaList']),
-                    data['windowsId'], data['country']))
-        out('    {      0,     0,     0 } // Trailing zeroes\n')
+                    data['windowsId'], data['territory']))
         out('};\n\n')
 
         # Write Windows ID key table
         out('// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n')
-        out('static const QWindowsData windowsDataTable[] = {\n')
+        out('static constexpr WindowsData windowsDataTable[] = {\n')
+        # Sorted by Windows ID key; sorting case-insensitively by
+        # Windows ID must give the same order.
+        winIdNames = [x.lower() for x, y in windowsIdList]
+        assert all(x == y for x, y in zip(winIdNames, sorted(winIdNames))), \
+            [(x, y) for x, y in zip(winIdNames, sorted(winIdNames)) if x != y]
         for index, pair in enumerate(windowsIdList, 1):
             out('    {{ {:6d},{:6d},{:6d},{:6d} }}, // {}\n'.format(
                     index,
                     windowsIdData.append(pair[0]),
                     ianaIdData.append(defaults[index]),
                     pair[1], pair[0]))
-        out('    {      0,     0,     0,     0 } // Trailing zeroes\n')
         out('};\n\n')
 
+        offsetMap = {}
+        for pair in utcIdList:
+            offsetMap[pair[1]] = offsetMap.get(pair[1], ()) + (pair[0],)
         # Write UTC ID key table
         out('// IANA ID Index, UTC Offset\n')
-        out('static const QUtcData utcDataTable[] = {\n')
-        for pair in utcIdList:
+        out('static constexpr UtcData utcDataTable[] = {\n')
+        for offset in sorted(offsetMap.keys()): # Sort so C++ can binary-chop.
+            names = offsetMap[offset];
             out('    {{ {:6d},{:6d} }}, // {}\n'.format(
-                    ianaIdData.append(pair[0]), pair[1], pair[0]))
-        out('    {     0,      0 } // Trailing zeroes\n')
+                    ianaIdData.append(' '.join(names)), offset, names[0]))
         out('};\n')
 
         return windowsIdData, ianaIdData
 
-def usage(err, name, message=''):
-    err.write("""Usage: {} path/to/cldr/root path/to/qtbase
-""".format(name)) # TODO: more interesting message
-    if message:
-        err.write('\n' + message + '\n')
 
-def main(args, out, err):
+def main(out, err):
     """Parses CLDR's data and updates Qt's representation of it.
 
-    Takes sys.argv, sys.stdout, sys.stderr (or equivalents) as
+    Takes sys.stdout, sys.stderr (or equivalents) as
     arguments. Expects two command-line options: the root of the
     unpacked CLDR data-file tree and the root of the qtbase module's
     checkout. Updates QTimeZone's private data about Windows time-zone
     IDs."""
-    name = args.pop(0)
-    if len(args) != 2:
-        usage(err, name, "Expected two arguments")
-        return 1
+    parser = argparse.ArgumentParser(
+        description="Update Qt's CLDR-derived timezone data.")
+    parser.add_argument('cldr_path', help='path to the root of the CLDR tree')
+    parser.add_argument('qtbase_path',
+                        help='path to the root of the qtbase source tree',
+                        nargs='?', default=qtbase_root)
 
-    cldrPath = args.pop(0)
-    qtPath = args.pop(0)
+    args = parser.parse_args()
 
-    if not os.path.isdir(qtPath):
-        usage(err, name, "No such Qt directory: " + qtPath)
-        return 1
-    if not os.path.isdir(cldrPath):
-        usage(err, name, "No such CLDR directory: " + cldrPath)
-        return 1
+    cldrPath = Path(args.cldr_path)
+    qtPath = Path(args.qtbase_path)
 
-    dataFilePath = os.path.join(qtPath, 'src', 'corelib', 'time', 'qtimezoneprivate_data_p.h')
-    if not os.path.isfile(dataFilePath):
-        usage(err, name, 'No such file: ' + dataFilePath)
-        return 1
+    if not qtPath.is_dir():
+        parser.error(f"No such Qt directory: {qtPath}")
+
+    if not cldrPath.is_dir():
+        parser.error(f"No such CLDR directory: {cldrPath}")
+
+    dataFilePath = qtPath.joinpath('src/corelib/time/qtimezoneprivate_data_p.h')
+
+    if not dataFilePath.is_file():
+        parser.error(f'No such file: {dataFilePath}')
 
     try:
         version, defaults, winIds = CldrAccess(cldrPath).readWindowsTimeZones(
             dict((name, ind) for ind, name in enumerate((x[0] for x in windowsIdList), 1)))
     except IOError as e:
-        usage(err, name,
-              'Failed to open common/supplemental/windowsZones.xml: ' + (e.message or e.args[1]))
+        parser.error(
+            f'Failed to open common/supplemental/windowsZones.xml: {e}')
         return 1
     except Error as e:
         err.write('\n'.join(textwrap.wrap(
-                    'Failed to read windowsZones.xml: ' + (e.message or e.args[1]),
+                    f'Failed to read windowsZones.xml: {e}',
                     subsequent_indent=' ', width=80)) + '\n')
         return 1
 
     out.write('Input file parsed, now writing data\n')
-    try:
-        writer = ZoneIdWriter(dataFilePath, qtPath)
-    except IOError as e:
-        err.write('Failed to open files to transcribe: {}'.format(e.message or e.args[1]))
-        return 1
 
     try:
-        writer.write(version, defaults, winIds)
-    except Error as e:
-        writer.cleanup()
-        err.write('\nError in Windows ID data: ' + e.message + '\n')
+        with ZoneIdWriter(dataFilePath, qtPath) as writer:
+            writer.write(version, defaults, winIds)
+    except Exception as e:
+        err.write(f'\nError while updating timezone data: {e}\n')
         return 1
 
-    writer.close()
-    out.write('Data generation completed, please check the new file at ' + dataFilePath + '\n')
+    out.write(f'Data generation completed, please check the new file at {dataFilePath}\n')
     return 0
 
 if __name__ == '__main__':
     import sys
-    sys.exit(main(sys.argv, sys.stdout, sys.stderr))
+    sys.exit(main(sys.stdout, sys.stderr))
diff --git a/util/locale_database/dateconverter.py b/util/locale_database/dateconverter.py
index 1990fe0c61..8ca15405f7 100755..100644
--- a/util/locale_database/dateconverter.py
+++ b/util/locale_database/dateconverter.py
@@ -1,107 +1,195 @@
-#!/usr/bin/env python
-#############################################################################
-##
-## Copyright (C) 2016 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
-
-import re
-
-def _convert_pattern(pattern):
-    # patterns from http://www.unicode.org/reports/tr35/#Date_Format_Patterns
-    qt_regexps = {
-        r"yyy{3,}" : "yyyy", # more that three digits hence convert to four-digit year
-        r"L" : "M",          # stand-alone month names. not supported.
-        r"g{1,}": "",        # modified julian day. not supported.
-        r"S{1,}" : "",       # fractional seconds. not supported.
-        r"A{1,}" : ""        # milliseconds in day. not supported.
-    }
-    qt_patterns = {
-        "G" : "", "GG" : "", "GGG" : "", "GGGG" : "", "GGGGG" : "", # Era. not supported.
-        "y" : "yyyy", # four-digit year without leading zeroes
-        "Q" : "", "QQ" : "", "QQQ" : "", "QQQQ" : "", # quarter. not supported.
-        "q" : "", "qq" : "", "qqq" : "", "qqqq" : "", # quarter. not supported.
-        "MMMMM" : "MMM", # narrow month name.
-        "LLLLL" : "MMM", # stand-alone narrow month name.
-        "l" : "", # special symbol for chinese leap month. not supported.
-        "w" : "", "W" : "", # week of year/month. not supported.
-        "D" : "", "DD" : "", "DDD" : "", # day of year. not supported.
-        "F" : "", # day of week in month. not supported.
-        "E" : "ddd", "EE" : "ddd", "EEE" : "ddd", "EEEEE" : "ddd", "EEEE" : "dddd", # day of week
-        "e" : "ddd", "ee" : "ddd", "eee" : "ddd", "eeeee" : "ddd", "eeee" : "dddd", # local day of week
-        "c" : "ddd", "cc" : "ddd", "ccc" : "ddd", "ccccc" : "ddd", "cccc" : "dddd", # stand-alone local day of week
-        "a" : "AP", # AM/PM
-        "K" : "h", # Hour 0-11
-        "k" : "H", # Hour 1-24
-        "j" : "", # special reserved symbol.
-        "z" : "t", "zz" : "t", "zzz" : "t", "zzzz" : "t", # timezone
-        "Z" : "t", "ZZ" : "t", "ZZZ" : "t", "ZZZZ" : "t", # timezone
-        "v" : "t", "vv" : "t", "vvv" : "t", "vvvv" : "t", # timezone
-        "V" : "t", "VV" : "t", "VVV" : "t", "VVVV" : "t"  # timezone
-    }
-    if qt_patterns.has_key(pattern):
-        return qt_patterns[pattern]
-    for r,v in qt_regexps.items():
-        pattern = re.sub(r, v, pattern)
-    return pattern
-
-def convert_date(input):
-    result = ""
-    patterns = "GyYuQqMLlwWdDFgEecahHKkjmsSAzZvV"
-    last = ""
-    inquote = 0
-    chars_to_strip = " -"
-    for c in input:
-        if c == "'":
-            inquote = inquote + 1
-        if inquote % 2 == 0:
-            if c in patterns:
-                if not last:
-                    last = c
-                else:
-                    if c in last:
-                        last += c
-                    else:
-                        # pattern changed
-                        converted = _convert_pattern(last)
-                        result += converted
-                        if not converted:
-                            result = result.rstrip(chars_to_strip)
-                        last = c
-                continue
-        if last:
-            # pattern ended
-            converted = _convert_pattern(last)
-            result += converted
-            if not converted:
-                result = result.rstrip(chars_to_strip)
-            last = ""
-        result += c
-    if last:
-        converted = _convert_pattern(last)
-        result += converted
-        if not converted:
-            result = result.rstrip(chars_to_strip)
-    return result.lstrip(chars_to_strip)
+# Copyright (C) 2016 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+
+class Converter (object):
+    """Conversion between CLDR and Qt datetime formats.
+
+    Keep in sync with qlocale_mac.mm's macToQtFormat().
+    The definitive source of truth is:
+    https://www.unicode.org/reports/tr35/tr35-68/tr35-dates.html#Date_Field_Symbol_Table
+
+    See convert() for explanation of the approach taken. Each method
+    with a single-letter name is used to scan a prefix of a text,
+    presumed to begin with that letter (or one Qt treats as equivalent
+    to it) and returns a pair (Qt format, length), to use the given Qt
+    format in place of text[:length]. In all cases, length must be
+    positive."""
+
+    @staticmethod
+    def __is_reserved(ch):
+        """Every ASCII letter is a reserved symbol in CLDR datetime formats"""
+        assert len(ch) == 1, ch
+        return ch.isascii() and ch.isalpha();
+    @staticmethod
+    def __count_first(text):
+        """How many of text[0] appear at the start of text ?"""
+        assert text
+        return len(text) - len(text.lstrip(text[0]))
+    @classmethod
+    def __verbatim(cls, text):
+        # Used where our format coincides with LDML's, including on length.
+        n = cls.__count_first(text)
+        return text[:n], n
+    @classmethod
+    def __treat_as(cls, mimic, text):
+        # Helper for aliases
+        n = cls.__count_first(text)
+        return mimic * n, n
+
+    # Please follow alphabetic order, with two cases of the same
+    # letter adjacent, lower before upper.
+    @classmethod
+    def a(cls, text): # AM/PM indicator; use locale-appropriate case
+        return 'Ap', cls.__count_first(text)
+
+    # A: Milliseconds in day. Not supported.
+    b = a # AM/PM/noon/midnight
+    B = a # "Flexible day period" (e.g. "at night" / "in the day")
+    # (Only zh_Hant_TW affected; zh_Hant_{HK,MO} use 'ah', mapped to
+    # 'APh', so do the same here.)
+
+    @classmethod
+    def c(cls, text): # Stand-alone local day of week
+        # Has length-variants for several cases Qt doesn't support, as
+        # do 'e' and 'E': just map all simply to weekday, abbreviated
+        # or full.
+        n = cls.__count_first(text)
+        return ('dddd' if n == 4 else 'ddd'), n
+
+    # C: Input skeleton symbol
+    d = __verbatim # day (of month or of week, depends on length)
+    # D: Day of year. Not supported.
+    e = c # Local day of week
+    E = c # Just plain day of week
+    # F: Day of week in month. Not supported.
+    # g: Modified julian day. Not supported.
+    # G: Era. Not supported.
+    h = __verbatim # Hour 1-12, treat as 0-11
+    H = __verbatim # Hour 0-23
+    # j: Input skeleton symbol
+    # J: Input skeleton symbol
+
+    @classmethod
+    def k(cls, text): # Hour 1-24, treat as 0-23
+        return cls.__treat_as('H', text)
+    @classmethod
+    def K(cls, text): # Hour 0-11
+        return cls.__treat_as('h', text)
+
+    # l: Deprecated Chinese leap month indicator.
+    @classmethod
+    def L(cls, text): # Stand-alone month names: treat as plain month names.
+        n = cls.__count_first(text)
+        # Length five is narrow; treat same as abbreviated; anything
+        # shorter matches Qt's month forms.
+        return ('MMM' if n > 4 else 'M' * n), n
+
+    m = __verbatim # Minute within the hour.
+    M = L # Plain month names, possibly abbreviated, and numbers.
+
+    @classmethod
+    def O(cls, text): # Localized GMT±offset formats. Map to Z-or-UTC±HH:mm
+        return 't', cls.__count_first(text)
+
+    # q: Quarter. Not supported.
+    # Q: Quarter. Not supported.
+
+    s = __verbatim # Seconds within the minute.
+    @classmethod
+    def S(cls, text): # Fractional seconds. Only milliseconds supported.
+        # FIXME: spec is unclear, do we need to include the leading
+        # dot or not ? For now, no known locale actually exercises
+        # this, so stick with what we've done on Darwin since long
+        # before adding support here.
+        n = cls.__count_first(text)
+        return ('z' if n < 3 else 'zzz'), n
+
+    @classmethod
+    def u(cls, text): # Extended year (numeric)
+        # Officially, 'u' is simply the full year number, zero-padded
+        # to the length of the field. Qt's closest to that is four-digit.
+        # It explicitly has no special case for two-digit year.
+        return 'yyyy', cls.__count_first(text)
+
+    # U: Cyclic Year Name. Not supported
+    @classmethod
+    def v(cls, text): # Generic non-location format. Map to name.
+        return 'tttt', cls.__count_first(text)
+
+    V = v # Zone ID in various forms; VV is IANA ID. Map to name.
+    # w: Week of year. Not supported.
+    # W: Week of month. Not supported.
+
+    @classmethod
+    def x(cls, text): # Variations on offset format.
+        n = cls.__count_first(text)
+        # Ignore: n == 1 may omit minutes, n > 3 may include seconds.
+        return ('ttt' if n > 1 and n & 1 else 'tt'), n
+    X = x # Should use Z for zero offset.
+
+    @classmethod
+    def y(cls, text): # Year number.
+        n = cls.__count_first(text)
+        return ('yy' if n == 2 else 'yyyy'), n
+    # Y: Year for Week-of-year calendars
+
+    z = v # Specific (i.e. distinguish standard from DST) non-location format.
+    @classmethod
+    def Z(cls, text): # Offset format, optionaly with GMT (Qt uses UTC) prefix.
+        n = cls.__count_first(text)
+        return ('tt' if n < 4 else 'ttt' if n > 4 else 't'), n
+
+    @staticmethod
+    def scanQuote(text): # Can't have ' as a method name, so handle specially
+        assert text.startswith("'")
+        i = text.find("'", 1) # Find the next; -1 if not present.
+        i = len(text) if i < 0 else i + 1 # Include the close-quote.
+        return text[:i], i
+
+    # Now put all of those to use:
+    @classmethod
+    def convert(cls, text):
+        """Convert a CLDR datetime format string into a Qt one.
+
+        Presumes that the caller will ''.join() the fragments it
+        yields. Each sequence of CLDR field symbols that corresponds
+        to a Qt format token is converted to it; all other CLDR field
+        symbols are discarded; the literals in between fields are
+        preserved verbatim, except that space and hyphen separators
+        immediately before a discarded field are discarded with it.
+
+        The approach is to look at the first symbol of the remainder
+        of the text, at each iteration, and use that first symbol to
+        select a function that will identify how much of the text to
+        consume and what to replace it with."""
+        sep = ''
+        while text:
+            ch = text[0]
+            if ch == "'":
+                quoted, length = cls.scanQuote(text)
+                text = text[length:]
+                sep += quoted
+            elif hasattr(cls, ch):
+                qtform, length = getattr(cls, ch)(text)
+                assert qtform and length > 0, (ch, text, qtform, length)
+                text = text[length:]
+                if sep:
+                    yield sep
+                    sep = ''
+                yield qtform
+            elif cls.__is_reserved(ch):
+                text = text[cls.__count_first(text):]
+                # Discard space or dash separator that was only there
+                # for the sake of the unsupported field:
+                sep = sep.rstrip(' -')
+                # TODO: should we also strip [ -]* from text
+                # immediately following unsupported forms ?
+            else:
+                sep += ch
+                text = text[1:]
+        if sep:
+            yield sep
+
+def convert_date(text):
+    # See Converter.convert()
+    return ''.join(Converter.convert(text))
diff --git a/util/locale_database/enumdata.py b/util/locale_database/enumdata.py
index 4181e290ac..66b8840cb1 100644
--- a/util/locale_database/enumdata.py
+++ b/util/locale_database/enumdata.py
@@ -1,58 +1,62 @@
-# -*- coding: utf-8; -*-
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
 
-# A run of cldr2qlocalexml.py will produce output reporting any
-# language, script and country codes it sees, in data, for which it
-# can find a name (taken always from en.xml) that could potentially be
-# used. There is no point adding a mapping for such a code unless the
-# CLDR's common/main/ contains an XML file for at least one locale
-# that exerciss it.
+"""Assorted enumerations implicated in public API.
 
-# Each *_list reflects the current values of its enums in qlocale.h;
-# if new xml language files are available in CLDR, these languages and
-# countries need to be *appended* to this list (for compatibility
-# between versions).  Include any spaces present in names (scripts
-# shall squish them out for the enum entries) in *_list, but use the
-# squished forms of names in the *_aliases mappings.
+The numberings of these enumerations can only change at major
+versions. When new CLDR data implies adding entries, the new ones must
+go after all existing ones. See also zonedata.py for enumerations
+related to timezones and CLDR, which can more freely be changed
+between versions.
 
-# For a new major version (and only then), we can change the
-# numbering, so re-sort each list into alphabetic order (e.g. using
-# sort -k2); but keep the Any and C entries first. That's why those
-# are offset with a blank line, below. After doing that, regenerate
-# locale data as usual; this will cause a binary-incompatible change.
+A run of cldr2qlocalexml.py will produce output reporting any
+language, script and territory codes it sees, in data, for which it
+can find a name (taken always from en.xml) that could potentially be
+used. There is no point adding a mapping for such a code unless the
+CLDR's common/main/ contains an XML file for at least one locale that
+exercises it (and little point, even then, absent substantial data,
+ignoring draft='unconfirmed' entries).
 
-# Note on "macrolanguage" comments: see "ISO 639 macrolanguage" on
-# Wikipedia. A "macrolanguage" is (loosely-speaking) a group of
-# languages so closely related to one another that they could also be
-# regarded as divergent dialects of the macrolanguage.
+Each *_map reflects the current values of its enums in qlocale.h; if
+new xml language files are available in CLDR, these languages and
+territories need to be *appended* to this list (for compatibility
+between versions). Include any spaces and dashes present in names
+(they'll be squished out for the enum entries) in *_map, but use the
+squished forms of names in the *_aliases mappings. The squishing also
+turns the first letter of each word into a capital so you can safely
+preserve the case of en.xml's name; but omit (or replace with space)
+any punctuation aside from dashes and map any accented letters to
+their un-accented plain ASCII. The two tables, for each enum, have
+the forms:
+* map { Numeric value: ("Proper name", "ISO code") }
+* alias { "OldName": "CurrentName" }
 
-language_list = {
+TODO: add support for marking entries as deprecated from a specified
+version. For aliases that merely deprecates the name. Where we have a
+name for which CLDR offers no data, we may also want to deprecate
+entries in the map - although they may be worth keeping for the
+benefit of QLocaleSelector (see QTBUG-112765), if other
+locale-specific resources might have use of them.
+
+For a new major version (and only then), we can change the numbering,
+so re-sort each list into alphabetic order (e.g. using sort -k2); but
+keep the Any and C entries first. That's why those are offset with a
+blank line, below. After doing that, regenerate locale data as usual;
+this will cause a binary-incompatible change.
+
+Note on 'macrolanguage' comments: see QTBUG-107781 and 'ISO 639
+macrolanguage' on Wikipedia. A 'macrolanguage' is (loosely-speaking) a
+group of languages so closely related to one another that they could
+also be regarded as divergent dialects of the macrolanguage.  In some
+cases this may mean a resource (such as translation or text-to-speech
+data) may describe itself as pertaining to the macrolanguage, implying
+its suitability for use in any of the languages within the
+macrolanguage. For example, no_NO might be used for a generic
+Norwegian resource, embracing both nb_NO and nn_NO.
+
+"""
+
+language_map = {
       0: ("AnyLanguage",                 "  "),
       1: ("C",                           "  "),
 
@@ -177,7 +181,7 @@ language_list = {
     120: ("Japanese",                    "ja"),
     121: ("Javanese",                    "jv"),
     122: ("Jju",                         "kaj"),
-    123: ("Jola Fonyi",                  "dyo"),
+    123: ("Jola-Fonyi",                  "dyo"),
     124: ("Kabuverdianu",                "kea"),
     125: ("Kabyle",                      "kab"),
     126: ("Kako",                        "kkj"),
@@ -218,7 +222,7 @@ language_list = {
     161: ("Lojban",                      "jbo"),
     162: ("Lower Sorbian",               "dsb"),
     163: ("Low German",                  "nds"),
-    164: ("Luba Katanga",                "lu"),
+    164: ("Luba-Katanga",                "lu"),
     165: ("Lule Sami",                   "smj"),
     166: ("Luo",                         "luo"),
     167: ("Luxembourgish",               "lb"),
@@ -226,7 +230,7 @@ language_list = {
     169: ("Macedonian",                  "mk"),
     170: ("Machame",                     "jmc"),
     171: ("Maithili",                    "mai"),
-    172: ("Makhuwa Meetto",              "mgh"),
+    172: ("Makhuwa-Meetto",              "mgh"),
     173: ("Makonde",                     "kde"),
     174: ("Malagasy",                    "mg"), # macrolanguage
     175: ("Malayalam",                   "ml"),
@@ -382,7 +386,31 @@ language_list = {
     325: ("Zarma",                       "dje"),
     326: ("Zhuang",                      "za"), # macrolanguage
     327: ("Zulu",                        "zu"),
+    # added in CLDR v40
+    328: ("Kaingang",                    "kgp"),
+    329: ("Nheengatu",                   "yrl"),
+    # added in CLDR v42
+    330: ("Haryanvi",                    "bgc"),
+    331: ("Northern Frisian",            "frr"),
+    332: ("Rajasthani",                  "raj"),
+    333: ("Moksha",                      "mdf"),
+    334: ("Toki Pona",                   "tok"),
+    335: ("Pijin",                       "pis"),
+    336: ("Obolo",                       "ann"),
+    # added in CLDR v43
+    337: ("Baluchi",                     "bal"),
+    338: ("Ligurian",                    "lij"),
+    339: ("Rohingya",                    "rhg"),
+    340: ("Torwali",                     "trw"),
+    # added in CLDR v44
+    341: ("Anii",                        "blo"),
+    342: ("Kangri",                      "xnr"),
+    343: ("Venetian",                    "vec"),
 }
+# Don't add languages just because they exist; check CLDR does provide
+# substantial data for locales using it; and check, once added, they
+# don't show up in cldr2qlocalexmo.py's unused listing. Do also check
+# the data's draft status; if it's (nearly) all unconfirmed, leave it.
 
 language_aliases = {
     # Renamings prior to Qt 6.0 (CLDR v37):
@@ -406,9 +434,9 @@ language_aliases = {
     'Navaho': 'Navajo',
     'Oriya': 'Odia',
     'Kirghiz': 'Kyrgyz'
-    }
+}
 
-country_list = {
+territory_map = {
       0: ("AnyTerritory",                                 "ZZ"),
 
       1: ("Afghanistan",                                  "AF"),
@@ -420,7 +448,7 @@ country_list = {
       7: ("Angola",                                       "AO"),
       8: ("Anguilla",                                     "AI"),
       9: ("Antarctica",                                   "AQ"),
-     10: ("Antigua And Barbuda",                          "AG"),
+     10: ("Antigua and Barbuda",                          "AG"),
      11: ("Argentina",                                    "AR"),
      12: ("Armenia",                                      "AM"),
      13: ("Aruba",                                        "AW"),
@@ -439,7 +467,7 @@ country_list = {
      26: ("Bermuda",                                      "BM"),
      27: ("Bhutan",                                       "BT"),
      28: ("Bolivia",                                      "BO"),
-     29: ("Bosnia And Herzegovina",                       "BA"),
+     29: ("Bosnia and Herzegovina",                       "BA"),
      30: ("Botswana",                                     "BW"),
      31: ("Bouvet Island",                                "BV"),
      32: ("Brazil",                                       "BR"),
@@ -457,7 +485,7 @@ country_list = {
      44: ("Caribbean Netherlands",                        "BQ"),
      45: ("Cayman Islands",                               "KY"),
      46: ("Central African Republic",                     "CF"),
-     47: ("Ceuta And Melilla",                            "EA"),
+     47: ("Ceuta and Melilla",                            "EA"),
      48: ("Chad",                                         "TD"),
      49: ("Chile",                                        "CL"),
      50: ("China",                                        "CN"),
@@ -466,8 +494,8 @@ country_list = {
      53: ("Cocos Islands",                                "CC"),
      54: ("Colombia",                                     "CO"),
      55: ("Comoros",                                      "KM"),
-     56: ("Congo Brazzaville",                            "CG"),
-     57: ("Congo Kinshasa",                               "CD"),
+     56: ("Congo - Brazzaville",                          "CG"),
+     57: ("Congo - Kinshasa",                             "CD"),
      58: ("Cook Islands",                                 "CK"),
      59: ("Costa Rica",                                   "CR"),
      60: ("Croatia",                                      "HR"),
@@ -511,11 +539,11 @@ country_list = {
      98: ("Guam",                                         "GU"),
      99: ("Guatemala",                                    "GT"),
     100: ("Guernsey",                                     "GG"),
-    101: ("Guinea Bissau",                                "GW"),
+    101: ("Guinea-Bissau",                                "GW"),
     102: ("Guinea",                                       "GN"),
     103: ("Guyana",                                       "GY"),
     104: ("Haiti",                                        "HT"),
-    105: ("Heard And McDonald Islands",                   "HM"),
+    105: ("Heard and McDonald Islands",                   "HM"),
     106: ("Honduras",                                     "HN"),
     107: ("Hong Kong",                                    "HK"),
     108: ("Hungary",                                      "HU"),
@@ -525,12 +553,12 @@ country_list = {
     112: ("Iran",                                         "IR"),
     113: ("Iraq",                                         "IQ"),
     114: ("Ireland",                                      "IE"),
-    115: ("Isle Of Man",                                  "IM"),
+    115: ("Isle of Man",                                  "IM"),
     116: ("Israel",                                       "IL"),
     117: ("Italy",                                        "IT"),
-      # Officially Côte d’Ivoire, which we'd ned to map to CotedIvoire
-      # or CoteDIvoire, either failing to make the d' separate from
-      # Cote or messing with its case. So stick with Ivory Coast:
+    # Officially Côte d’Ivoire, which we'd need to map to CotedIvoire
+    # or CoteDIvoire, either failing to make the d' separate from Cote
+    # or messing with its case. So stick with Ivory Coast:
     118: ("Ivory Coast",                                  "CI"),
     119: ("Jamaica",                                      "JM"),
     120: ("Japan",                                        "JP"),
@@ -610,14 +638,14 @@ country_list = {
     194: ("Rwanda",                                       "RW"),
     195: ("Saint Barthelemy",                             "BL"),
     196: ("Saint Helena",                                 "SH"),
-    197: ("Saint Kitts And Nevis",                        "KN"),
+    197: ("Saint Kitts and Nevis",                        "KN"),
     198: ("Saint Lucia",                                  "LC"),
     199: ("Saint Martin",                                 "MF"),
-    200: ("Saint Pierre And Miquelon",                    "PM"),
-    201: ("Saint Vincent And Grenadines",                 "VC"),
+    200: ("Saint Pierre and Miquelon",                    "PM"),
+    201: ("Saint Vincent and Grenadines",                 "VC"),
     202: ("Samoa",                                        "WS"),
     203: ("San Marino",                                   "SM"),
-    204: ("Sao Tome And Principe",                        "ST"),
+    204: ("Sao Tome and Principe",                        "ST"),
     205: ("Saudi Arabia",                                 "SA"),
     206: ("Senegal",                                      "SN"),
     207: ("Serbia",                                       "RS"),
@@ -630,14 +658,14 @@ country_list = {
     214: ("Solomon Islands",                              "SB"),
     215: ("Somalia",                                      "SO"),
     216: ("South Africa",                                 "ZA"),
-    217: ("South Georgia And South Sandwich Islands",     "GS"),
+    217: ("South Georgia and South Sandwich Islands",     "GS"),
     218: ("South Korea",                                  "KR"),
     219: ("South Sudan",                                  "SS"),
     220: ("Spain",                                        "ES"),
     221: ("Sri Lanka",                                    "LK"),
     222: ("Sudan",                                        "SD"),
     223: ("Suriname",                                     "SR"),
-    224: ("Svalbard And Jan Mayen",                       "SJ"),
+    224: ("Svalbard and Jan Mayen",                       "SJ"),
     225: ("Sweden",                                       "SE"),
     226: ("Switzerland",                                  "CH"),
     227: ("Syria",                                        "SY"),
@@ -649,12 +677,12 @@ country_list = {
     233: ("Togo",                                         "TG"),
     234: ("Tokelau",                                      "TK"),
     235: ("Tonga",                                        "TO"),
-    236: ("Trinidad And Tobago",                          "TT"),
-    237: ("Tristan Da Cunha",                             "TA"),
+    236: ("Trinidad and Tobago",                          "TT"),
+    237: ("Tristan da Cunha",                             "TA"),
     238: ("Tunisia",                                      "TN"),
     239: ("Turkey",                                       "TR"),
     240: ("Turkmenistan",                                 "TM"),
-    241: ("Turks And Caicos Islands",                     "TC"),
+    241: ("Turks and Caicos Islands",                     "TC"),
     242: ("Tuvalu",                                       "TV"),
     243: ("Uganda",                                       "UG"),
     244: ("Ukraine",                                      "UA"),
@@ -669,15 +697,15 @@ country_list = {
     253: ("Vatican City",                                 "VA"),
     254: ("Venezuela",                                    "VE"),
     255: ("Vietnam",                                      "VN"),
-    256: ("Wallis And Futuna",                            "WF"),
+    256: ("Wallis and Futuna",                            "WF"),
     257: ("Western Sahara",                               "EH"),
-    258: ("World",                                        "001"),
+    258: ("world",                                        "001"),
     259: ("Yemen",                                        "YE"),
     260: ("Zambia",                                       "ZM"),
     261: ("Zimbabwe",                                     "ZW"),
 }
 
-country_aliases = {
+territory_aliases = {
     # Renamings prior to Qt 6.0 (CLDR v37):
     'DemocraticRepublicOfCongo': 'CongoKinshasa',
     'PeoplesRepublicOfCongo': 'CongoBrazzaville',
@@ -708,7 +736,7 @@ country_aliases = {
     'TuvaluCountry': 'TuvaluTerritory',
 }
 
-script_list = {
+script_map = {
       0: ("AnyScript",              "Zzzz"),
 
       1: ("Adlam",                  "Adlm"),
@@ -741,7 +769,7 @@ script_list = {
      28: ("Deseret",                "Dsrt"),
      29: ("Devanagari",             "Deva"),
      30: ("Duployan",               "Dupl"),
-     31: ("Egyptian Hieroglyphs",   "Egyp"),
+     31: ("Egyptian hieroglyphs",   "Egyp"),
      32: ("Elbasan",                "Elba"),
      33: ("Ethiopic",               "Ethi"),
      34: ("Fraser",                 "Lisu"),
@@ -816,7 +844,7 @@ script_list = {
     103: ("Pahawh Hmong",           "Hmng"),
     104: ("Palmyrene",              "Palm"),
     105: ("Pau Cin Hau",            "Pauc"),
-    106: ("Phags Pa",               "Phag"),
+    106: ("Phags-pa",               "Phag"),
     107: ("Phoenician",             "Phnx"),
     108: ("Pollard Phonetic",       "Plrd"),
     109: ("Psalter Pahlavi",        "Phlp"),
@@ -827,7 +855,7 @@ script_list = {
     114: ("Sharada",                "Shrd"),
     115: ("Shavian",                "Shaw"),
     116: ("Siddham",                "Sidd"),
-    117: ("Sign Writing",           "Sgnw"),
+    117: ("SignWriting",            "Sgnw"), # Oddly, en.xml leaves no space in it.
     118: ("Simplified Han",         "Hans"),
     119: ("Sinhala",                "Sinh"),
     120: ("Sora Sompeng",           "Sora"),
@@ -852,6 +880,8 @@ script_list = {
     139: ("Vai",                    "Vaii"),
     140: ("Varang Kshiti",          "Wara"),
     141: ("Yi",                     "Yiii"),
+    # Added at CLDR v43
+    142: ("Hanifi",                 "Rohg"), # Used for Rohingya
 }
 
 script_aliases = {
@@ -863,27 +893,3 @@ script_aliases = {
     'MendeKikakuiScript': 'MendeScript',
     'BengaliScript': 'BanglaScript',
 }
-
-def countryCodeToId(code):
-    if not code:
-        return 0
-    for country_id in country_list:
-        if country_list[country_id][1] == code:
-            return country_id
-    return -1
-
-def languageCodeToId(code):
-    if not code:
-        return 0
-    for language_id in language_list:
-        if language_list[language_id][1] == code:
-            return language_id
-    return -1
-
-def scriptCodeToId(code):
-    if not code:
-        return 0
-    for script_id in script_list:
-        if script_list[script_id][1] == code:
-            return script_id
-    return -1
diff --git a/util/locale_database/iso639_3.py b/util/locale_database/iso639_3.py
new file mode 100644
index 0000000000..0d23065cf9
--- /dev/null
+++ b/util/locale_database/iso639_3.py
@@ -0,0 +1,80 @@
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+
+from dataclasses import dataclass
+from typing import Dict, Optional
+
+
+@dataclass
+class LanguageCodeEntry:
+    part3Code: str
+    part2BCode: Optional[str]
+    part2TCode: Optional[str]
+    part1Code: Optional[str]
+
+    def id(self) -> str:
+        if self.part1Code:
+            return self.part1Code
+        if self.part2BCode:
+            return self.part2BCode
+        return self.part3Code
+
+    def __repr__(self) -> str:
+        parts = [f'{self.__class__.__name__}({self.id()!r}, part3Code={self.part3Code!r}']
+        if self.part2BCode is not None and self.part2BCode != self.part3Code:
+            parts.append(f', part2BCode={self.part2BCode!r}')
+            if self.part2TCode != self.part2BCode:
+                parts.append(f', part2TCode={self.part2TCode!r}')
+        if self.part1Code is not None:
+            parts.append(f', part1Code={self.part1Code!r}')
+        parts.append(')')
+        return ''.join(parts)
+
+
+class LanguageCodeData:
+    """
+    Representation of ISO639-2 language code data.
+    """
+    def __init__(self, fileName: str):
+        """
+        Construct the object populating the data from the given file.
+        """
+        self.__codeMap: Dict[str, LanguageCodeEntry] = {}
+
+        with open(fileName, 'r', encoding='utf-8') as stream:
+            stream.readline() # skip the header
+            for line in stream.readlines():
+                part3Code, part2BCode, part2TCode, part1Code, _ = line.split('\t', 4)
+
+                # sanity checks
+                assert all(p.isascii() for p in (part3Code, part2BCode, part2TCode, part1Code)), \
+                    f'Non-ascii characters in code names: {part3Code!r} {part2BCode!r} '\
+                        f'{part2TCode!r} {part1Code!r}'
+
+                assert len(part3Code) == 3, f'Invalid Part 3 code length for {part3Code!r}'
+                assert not part1Code or len(part1Code) == 2, \
+                    f'Invalid Part 1 code length for {part3Code!r}: {part1Code!r}'
+                assert not part2BCode or len(part2BCode) == 3, \
+                    f'Invalid Part 2B code length for {part3Code!r}: {part2BCode!r}'
+                assert not part2TCode or len(part2TCode) == 3, \
+                    f'Invalid Part 2T code length for {part3Code!r}: {part2TCode!r}'
+
+                assert (part2BCode == '') == (part2TCode == ''), \
+                    f'Only one Part 2 code is specified for {part3Code!r}: ' \
+                    f'{part2BCode!r} vs {part2TCode!r}'
+                assert not part2TCode or part2TCode == part3Code, \
+                    f'Part 3 code {part3Code!r} does not match Part 2T code {part2TCode!r}'
+
+                entry = LanguageCodeEntry(part3Code, part2BCode or None,
+                    part2TCode or None, part1Code or None)
+
+                self.__codeMap[entry.id()] = entry
+
+    def query(self, code: str) -> Optional[LanguageCodeEntry]:
+        """
+        Lookup the entry with the given code and return it.
+
+        The entries can be looked up by using either the Alpha2 code or the bibliographical
+        Alpha3 code.
+        """
+        return self.__codeMap.get(code)
diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py
index 110e5b7573..b94c242172 100644
--- a/util/locale_database/ldml.py
+++ b/util/locale_database/ldml.py
@@ -1,30 +1,5 @@
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
+# Copyright (C) 2020 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
 """Parsing the Locale Data Markup Language
 
 It's an XML format, so the raw parsing of XML is, of course, delegated
@@ -46,6 +21,13 @@ See individual classes for further detail.
 from localetools import Error
 from dateconverter import convert_date
 
+# The github version of CLDR uses '↑↑↑' to indicate "inherit"
+INHERIT = '↑↑↑'
+
+def _attrsFromDom(dom):
+    return { k: (v if isinstance(v, str) else v.nodeValue)
+             for k, v in dom.attributes.items() }
+
 class Node (object):
     """Wrapper for an arbitrary DOM node.
 
@@ -75,6 +57,9 @@ class Node (object):
         else:
             self.draft = max(draft, self.draftScore(attr))
 
+    def attributes(self):
+        return _attrsFromDom(self.dom)
+
     def findAllChildren(self, tag, wanted = None, allDull = False):
         """All children that do have the given tag and attributes.
 
@@ -124,7 +109,7 @@ class Node (object):
         one."""
         seq = self.findAllChildren(tag)
         try:
-            node = seq.next()
+            node = next(seq)
         except StopIteration:
             raise Error('No child found where one was expected', tag)
         for it in seq:
@@ -191,17 +176,35 @@ class XmlScanner (object):
         return elts
 
 class Supplement (XmlScanner):
-    def find(self, xpath):
+    def find(self, xpath, exclude=()):
+        """Finds nodes by matching a specified xpath.
+
+        If exclude is passed, it should be a sequence of attribute names (its
+        default is empty). Any matches to the given xpath that also have any
+        attribute in this sequence will be excluded.
+
+        For each childless node matching the xpath, or child of a node matching
+        the xpath, this yields a twople (name, attrs) where name is the
+        nodeName and attrs is a dict mapping the node's attribute's names to
+        their values. For attribute values that are not simple strings, the
+        nodeValue of the attribute node is used."""
         elts = self.findNodes(xpath)
-        for elt in _iterateEach(e.dom.childNodes if e.dom.childNodes else (e.dom,)
-                                for e in elts):
+        for elt in _iterateEach(e.dom.childNodes or (e.dom,)
+                                for e in elts
+                                if not any(a in e.dom.attributes
+                                           for a in exclude)):
             if elt.attributes:
-                yield (elt.nodeName,
-                       dict((k, v if isinstance(v, basestring) else v.nodeValue)
-                            for k, v in elt.attributes.items()))
+                yield elt.nodeName, _attrsFromDom(elt)
 
 class LocaleScanner (object):
     def __init__(self, name, nodes, root):
+        """Set up to scan data for a specified locale.
+
+        First parameter is the name of the locale; it will be used in
+        error messages. Second is a tuple of DOM root-nodes of files
+        with locale data, later ones serving as fall-backs for data
+        missing in earlier ones. Third parameter is the root locale's
+        DOM node."""
         self.name, self.nodes, self.base = name, nodes, root
 
     def find(self, xpath, default = None, draft = None):
@@ -227,7 +230,7 @@ class LocaleScanner (object):
     def tagCodes(self):
         """Yields four tag codes
 
-        The tag codes are language, script, country and variant; an
+        The tag codes are language, script, territory and variant; an
         empty value for any of them indicates that no value was
         provided.  The values are obtained from the primary file's
         top-level <identity> element.  An Error is raised if any
@@ -241,7 +244,7 @@ class LocaleScanner (object):
             except (KeyError, AttributeError):
                 pass
             else:
-                raise Error('Alias to {}'.format(source))
+                raise Error(f'Alias to {source}')
 
         ids = root.findUniqueChild('identity')
         for code in ('language', 'script', 'territory', 'variant'):
@@ -259,12 +262,12 @@ class LocaleScanner (object):
         """Fetches currency data for this locale.
 
         Single argument, isoCode, is the ISO currency code for the
-        currency in use in the country. See also numericData, which
+        currency in use in the territory. See also numericData, which
         includes some currency formats.
         """
         if isoCode:
-            stem = 'numbers/currencies/currency[{}]/'.format(isoCode)
-            symbol = self.find(stem + 'symbol', '')
+            stem = f'numbers/currencies/currency[{isoCode}]/'
+            symbol = self.find(f'{stem}symbol', '')
             name = self.__currencyDisplayName(stem)
         else:
             symbol = name = ''
@@ -276,31 +279,38 @@ class LocaleScanner (object):
 
         First argument, lookup, is a callable that maps a numbering
         system's name to certain data about the system, as a mapping;
-        we expect this to have u'digits' as a key.
+        we expect this to have 'digits' as a key.
         """
         system = self.find('numbers/defaultNumberingSystem')
-        stem = 'numbers/symbols[numberSystem={}]/'.format(system)
-        decimal = self.find(stem + 'decimal')
-        group = self.find(stem + 'group')
-        assert decimal != group, (self.name, system, decimal)
+        stem = f'numbers/symbols[numberSystem={system}]/'
+        decimal = self.find(f'{stem}decimal')
+        group = self.find(f'{stem}group')
+        if decimal == group:
+            # mn_Mong_MN @v43 :-(
+            clean = Node.draftScore('approved')
+            decimal = self.find(f'{stem}decimal', draft=clean)
+            group = self.find(f'{stem}group', draft=clean)
+            assert decimal != group, (self.name, system, decimal)
+
         yield 'decimal', decimal
         yield 'group', group
-        yield 'percent', self.find(stem + 'percentSign')
-        yield 'list', self.find(stem + 'list')
-        yield 'exp', self.find(stem + 'exponential')
+        yield 'percent', self.find(f'{stem}percentSign')
+        yield 'list', self.find(f'{stem}list')
+        yield 'exp', self.find(f'{stem}exponential')
         yield 'groupSizes', self.__numberGrouping(system)
 
         digits = lookup(system)['digits']
         assert len(digits) == 10
         zero = digits[0]
         # Qt's number-formatting code assumes digits are consecutive
-        # (except Suzhou, CLDR's hanidec - see QTBUG-85409):
+        # (except Suzhou - see QTBUG-85409 - which shares its zero
+        # with CLDR's very-non-contiguous hanidec):
         assert all(ord(c) == i + (0x3020 if ord(zero) == 0x3007 else ord(zero))
                    for i, c in enumerate(digits[1:], 1))
         yield 'zero', zero
 
-        plus = self.find(stem + 'plusSign')
-        minus = self.find(stem + 'minusSign')
+        plus = self.find(f'{stem}plusSign')
+        minus = self.find(f'{stem}minusSign')
         yield 'plus', plus
         yield 'minus', minus
 
@@ -308,11 +318,11 @@ class LocaleScanner (object):
         xpath = 'numbers/currencyFormats/currencyFormatLength/currencyFormat[accounting]/pattern'
         try:
             money = self.find(xpath.replace('Formats/',
-                                            'Formats[numberSystem={}]/'.format(system)))
+                                            f'Formats[numberSystem={system}]/'))
         except Error:
             money = self.find(xpath)
         money = self.__currencyFormats(money, plus, minus)
-        yield 'currencyFormat', money.next()
+        yield 'currencyFormat', next(money)
         neg = ''
         for it in money:
             assert not neg, 'There should be at most one more pattern'
@@ -322,12 +332,12 @@ class LocaleScanner (object):
     def textPatternData(self):
         for key in ('quotationStart', 'alternateQuotationEnd',
                     'quotationEnd', 'alternateQuotationStart'):
-            yield key, self.find('delimiters/' + key)
+            yield key, self.find(f'delimiters/{key}')
 
         for key in ('start', 'middle', 'end'):
-            yield ('listPatternPart' + key.capitalize(),
+            yield (f'listPatternPart{key.capitalize()}',
                    self.__fromLdmlListPattern(self.find(
-                        'listPatterns/listPattern/listPatternPart[{}]'.format(key))))
+                        f'listPatterns/listPattern/listPatternPart[{key}]')))
         yield ('listPatternPartTwo',
                self.__fromLdmlListPattern(self.find(
                     'listPatterns/listPattern/listPatternPart[2]')))
@@ -335,28 +345,26 @@ class LocaleScanner (object):
         stem = 'dates/calendars/calendar[gregorian]/'
         # TODO: is wide really the right width to use here ?
         # abbreviated might be an option ... or try both ?
-        meridiem = stem + 'dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/'
+        meridiem = f'{stem}dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/'
         for key in ('am', 'pm'):
-            yield key, self.find(meridiem + 'dayPeriod[{}]'.format(key),
+            yield key, self.find(f'{meridiem}dayPeriod[{key}]',
                                  draft = Node.draftScore('contributed'))
 
         for pair in (('long', 'full'), ('short', 'short')):
             for key in ('time', 'date'):
-                yield (pair[0] + key.capitalize() + 'Format',
+                yield (f'{pair[0]}{key.capitalize()}Format',
                        convert_date(self.find(
-                            stem + '{}Formats/{}FormatLength[{}]/{}Format/pattern'.format(
-                                key, key, pair[1], key))))
+                            f'{stem}{key}Formats/{key}FormatLength[{pair[1]}]/{key}Format/pattern')))
 
-    def endonyms(self, language, script, country, variant):
+    def endonyms(self, language, script, territory, variant):
         # TODO: take variant into account ?
-        for seq in ((language, script, country),
-                    (language, script), (language, country), (language,)):
+        for seq in ((language, script, territory),
+                    (language, script), (language, territory), (language,)):
             if not all(seq):
                 continue
             try:
                 yield ('languageEndonym',
-                       self.find('localeDisplayNames/languages/language[{}]'
-                                 .format('_'.join(seq))))
+                       self.find(f'localeDisplayNames/languages/language[{"_".join(seq)}]'))
             except Error:
                 pass
             else:
@@ -365,9 +373,8 @@ class LocaleScanner (object):
             # grumble(failed to find endonym for language)
             yield 'languageEndonym', ''
 
-        yield ('countryEndonym',
-               self.find('localeDisplayNames/territories/territory[{}]'
-                         .format(country), ''))
+        yield ('territoryEndonym',
+               self.find(f'localeDisplayNames/territories/territory[{territory}]', ''))
 
     def unitData(self):
         yield ('byte_unit',
@@ -386,20 +393,20 @@ class LocaleScanner (object):
     def calendarNames(self, calendars):
         namings = self.__nameForms
         for cal in calendars:
-            stem = 'dates/calendars/calendar[' + cal + ']/months/'
+            stem = f'dates/calendars/calendar[{cal}]/months/'
             for key, mode, size in namings:
-                prop = 'monthContext[' + mode + ']/monthWidth[' + size + ']/'
-                yield (key + 'Months_' + cal,
-                       ';'.join(self.find(stem + prop + 'month[{}]'.format(i))
+                prop = f'monthContext[{mode}]/monthWidth[{size}]/'
+                yield (f'{key}Months_{cal}',
+                       ';'.join(self.find(f'{stem}{prop}month[{i}]')
                                 for i in range(1, 13)))
 
         # Day data (for Gregorian, at least):
         stem = 'dates/calendars/calendar[gregorian]/days/'
         days = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat')
         for (key, mode, size) in namings:
-            prop = 'dayContext[' + mode + ']/dayWidth[' + size + ']/day'
-            yield (key + 'Days',
-                   ';'.join(self.find(stem + prop + '[' + day + ']')
+            prop = f'dayContext[{mode}]/dayWidth[{size}]/day'
+            yield (f'{key}Days',
+                   ';'.join(self.find(f'{stem}{prop}[{day}]')
                             for day in days))
 
     # Implementation details
@@ -410,10 +417,10 @@ class LocaleScanner (object):
         ('long', 'format', 'wide'),
         ('short', 'format', 'abbreviated'),
         ('narrow', 'format', 'narrow'),
-        ) # Used for month and day names
+    ) # Used for month and day names
 
     def __find(self, xpath):
-        retries = [ xpath.split('/') ]
+        retries, foundNone = [ xpath.split('/') ], True
         while retries:
             tags, elts, roots = retries.pop(), self.nodes, (self.base.root,)
             for selector in tags:
@@ -423,6 +430,9 @@ class LocaleScanner (object):
                     break
 
             else: # Found matching elements
+                elts = tuple(self.__skipInheritors(elts))
+                if elts:
+                    foundNone = False
                 # Possibly filter elts to prefer the least drafty ?
                 for elt in elts:
                     yield elt
@@ -442,29 +452,42 @@ class LocaleScanner (object):
                 if not roots:
                     if retries: # Let outer loop fall back on an alias path:
                         break
-                    sought = '/'.join(tags)
-                    if sought != xpath:
-                        sought += ' (for {})'.format(xpath)
-                    raise Error('All lack child {} for {} in {}'.format(
-                            selector, sought, self.name))
+                    if foundNone:
+                        sought = '/'.join(tags)
+                        if sought != xpath:
+                            sought += f' (for {xpath})'
+                        raise Error(f'All lack child {selector} for {sought} in {self.name}')
 
             else: # Found matching elements
+                roots = tuple(self.__skipInheritors(roots))
+                if roots:
+                    foundNone = False
                 for elt in roots:
                     yield elt
 
-        sought = '/'.join(tags)
-        if sought != xpath:
-            sought += ' (for {})'.format(xpath)
-        raise Error('No {} in {}'.format(sought, self.name))
+        if foundNone:
+            sought = '/'.join(tags)
+            if sought != xpath:
+                sought += f' (for {xpath})'
+            raise Error(f'No {sought} in {self.name}')
+
+    @staticmethod
+    def __skipInheritors(elts):
+        for elt in elts:
+            try:
+                if elt.dom.firstChild.nodeValue != INHERIT:
+                    yield elt
+            except (AttributeError, KeyError):
+                yield elt
 
     def __currencyDisplayName(self, stem):
         try:
             return self.find(stem + 'displayName')
         except Error:
             pass
-        for x in  ('zero', 'one', 'two', 'few', 'many', 'other'):
+        for x in ('zero', 'one', 'two', 'few', 'many', 'other'):
             try:
-                return self.find(stem + 'displayName[count={}]'.format(x))
+                return self.find(f'{stem}displayName[count={x}]')
             except Error:
                 pass
         return ''
@@ -474,10 +497,10 @@ class LocaleScanner (object):
         # (even for unitLength[narrow]) instead of kB (etc.), so
         # prefer any unitPattern provided, but prune its placeholder:
         for size in ('short', 'narrow'): # TODO: reverse order ?
-            stem = 'units/unitLength[{}]/unit[digital-{}byte]/'.format(size + keySuffix, quantify)
+            stem = f'units/unitLength[{size}{keySuffix}]/unit[digital-{quantify}byte]/'
             for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
                 try:
-                    ans = self.find(stem + 'unitPattern[count={}]'.format(count))
+                    ans = self.find(f'{stem}unitPattern[count={count}]')
                 except Error:
                     continue
 
@@ -490,7 +513,7 @@ class LocaleScanner (object):
                     return ans
 
             try:
-                return self.find(stem + 'displayName')
+                return self.find(f'{stem}displayName')
             except Error:
                 pass
 
@@ -518,10 +541,10 @@ class LocaleScanner (object):
             if cache:
                 byte = cache.pop()
                 if all(byte == k for k in cache):
-                    suffix = 'i' + byte
+                    suffix = f'i{byte}'
             for q in siQuantifiers:
                 # Those don't (yet, v36) exist in CLDR, so we always get the fall-back:
-                yield self.__findUnit(keySuffix, q[:2], q[0].upper() + suffix)
+                yield self.__findUnit(keySuffix, q[:2], f'{q[0].upper()}{suffix}')
         else: # first call
             tail = suffix = suffix or 'B'
             for q in siQuantifiers:
@@ -556,8 +579,8 @@ class LocaleScanner (object):
         elsewhere)."""
         top = int(self.find('numbers/minimumGroupingDigits'))
         assert top < 4, top # We store it in a 2-bit field
-        grouping = self.find('numbers/decimalFormats[numberSystem='
-                             + system + ']/decimalFormatLength/decimalFormat/pattern')
+        grouping = self.find(f'numbers/decimalFormats[numberSystem={system}]/'
+                             'decimalFormatLength/decimalFormat/pattern')
         groups = grouping.split('.')[0].split(',')[-3:]
         assert all(len(x) < 8 for x in groups[-2:]), grouping # we store them in 3-bit fields
         if len(groups) > 2:
@@ -580,7 +603,7 @@ class LocaleScanner (object):
             # According to http://www.unicode.org/reports/tr35/#Number_Format_Patterns
             # there can be doubled or trippled currency sign, however none of the
             # locales use that.
-            p = p.replace(u'\xa4', "%2")
+            p = p.replace('\xa4', "%2")
             # Single quote goes away, but double goes to single:
             p = p.replace("''", '###').replace("'", '').replace('###', "'")
             # Use number system's signs:
diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py
index 29153366b3..02ec7cafc7 100644
--- a/util/locale_database/localetools.py
+++ b/util/locale_database/localetools.py
@@ -1,30 +1,5 @@
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
+# Copyright (C) 2020 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
 """Utilities shared among the CLDR extraction tools.
 
 Functions:
@@ -37,13 +12,16 @@ Classes:
   SourceFileEditor -- adds standard prelude and tail handling to Transcriber.
 """
 
-import os
-import tempfile
+from contextlib import ExitStack, contextmanager
+from pathlib import Path
+from tempfile import NamedTemporaryFile
 
-class Error (StandardError):
-    __upinit = StandardError.__init__
+qtbase_root = Path(__file__).parents[2]
+assert qtbase_root.name == 'qtbase'
+
+class Error (Exception):
     def __init__(self, msg, *args):
-        self.__upinit(msg, *args)
+        super().__init__(msg, *args)
         self.message = msg
     def __str__(self):
         return self.message
@@ -63,48 +41,141 @@ def unicode2hex(s):
             lst.append(hex(v))
     return lst
 
-def wrap_list(lst):
+def wrap_list(lst, perline=20):
     def split(lst, size):
         while lst:
             head, lst = lst[:size], lst[size:]
             yield head
-    return ",\n".join(", ".join(x) for x in split(lst, 20))
-
-class Transcriber (object):
-    """Helper class to facilitate rewriting source files.
-
-    This class takes care of the temporary file manipulation. Derived
-    classes need to implement transcribing of the content, with
+    return ",\n".join(", ".join(x) for x in split(lst, perline))
+
+def names_clash(cldr, enum):
+    """True if the reader might not recognize cldr as the name of enum
+
+    First argument, cldr, is the name CLDR gives for some language,
+    script or territory; second, enum, is the name enumdata.py gives
+    for it. If these are enough alike, returns None; otherwise, a
+    non-empty string that results from adapting cldr to be more like
+    how enumdata.py would express it."""
+    if cldr == enum:
+        return None
+
+    # Some common substitutions:
+    cldr = cldr.replace('&', 'And')
+    prefix = { 'St.': 'Saint', 'U.S.': 'United States' }
+    for k, v in prefix.items():
+        if cldr.startswith(k + ' '):
+            cldr = v + cldr[len(k):]
+
+    # Chop out any parenthesised part, e.g. (Burma):
+    while '(' in cldr:
+        try:
+            f, t = cldr.index('('), cldr.rindex(')')
+        except ValueError:
+            break
+        cldr = cldr[:f].rstrip() + ' ' + cldr[t + 1:].lstrip()
+
+    # Various accented letters:
+    remap = { 'ã': 'a', 'å': 'a', 'ā': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'}
+    skip = '\u02bc' # Punctuation for which .isalpha() is true.
+    # Let cldr match (ignoring non-letters and case) any substring as enum:
+    if ''.join(enum.lower().split()) in ''.join(
+            remap.get(ch, ch) for ch in cldr.lower() if ch.isalpha() and ch not in skip):
+        return None
+    return cldr
+
+
+@contextmanager
+def AtomicRenameTemporaryFile(originalLocation: Path, *, prefix: str, dir: Path):
+    """Context manager for safe file update via a temporary file.
+
+    Accepts path to the file to be updated. Yields a temporary file to the user
+    code, open for writing.
+
+    On success closes the temporary file and moves its content to the original
+    location. On error, removes temporary file, without disturbing the original.
+    """
+    tempFile = NamedTemporaryFile('w', prefix=prefix, dir=dir, delete=False)
+    try:
+        yield tempFile
+        tempFile.close()
+        # Move the modified file to the original location
+        Path(tempFile.name).rename(originalLocation)
+    except Exception:
+        # delete the temporary file in case of error
+        tempFile.close()
+        Path(tempFile.name).unlink()
+        raise
+
+
+class Transcriber:
+    """Context manager base-class to manage source file rewrites.
+
+    Derived classes need to implement transcribing of the content, with
     whatever modifications they may want.  Members reader and writer
     are exposed; use writer.write() to output to the new file; use
     reader.readline() or iterate reader to read the original.
 
-    Callers should call close() on success or cleanup() on failure (to
-    clear away the temporary file).
+    This class is intended to be used as context manager only (inside a
+    `with` statement).
+
+    Reimplement onEnter() to write any preamble the file may have,
+    onExit() to write any tail. The body of the with statement takes
+    care of anything in between, using methods provided by derived classes.
+
+    The data is written to a temporary file first. The temporary file data
+    is then moved to the original location if there were no errors. Otherwise
+    the temporary file is removed and the original is left unchanged.
     """
-    def __init__(self, path, temp):
-        # Open the old file
-        self.reader = open(path)
-        # Create a temp file to write the new data into
-        temp, tempPath = tempfile.mkstemp(os.path.split(path)[1], dir = temp)
-        self.__names = path, tempPath
-        self.writer = os.fdopen(temp, "w")
-
-    def close(self):
-        self.reader.close()
-        self.writer.close()
-        self.reader = self.writer = None
-        source, temp = self.__names
-        os.remove(source)
-        os.rename(temp, source)
-
-    def cleanup(self):
-        if self.__names:
-            self.reader.close()
-            self.writer.close()
-            # Remove temp-file:
-            os.remove(self.__names[1])
-            self.__names = ()
+    def __init__(self, path: Path, temp_dir: Path):
+        self.path = path
+        self.tempDir = temp_dir
+
+    def onEnter(self) -> None:
+        """
+        Called before transferring control to user code.
+
+        This function can be overridden in derived classes to perform actions
+        before transferring control to the user code.
+
+        The default implementation does nothing.
+        """
+        pass
+
+    def onExit(self) -> None:
+        """
+        Called after return from user code.
+
+        This function can be overridden in derived classes to perform actions
+        after successful return from user code.
+
+        The default implementation does nothing.
+        """
+        pass
+
+    def __enter__(self):
+        with ExitStack() as resources:
+            # Create a temp file to write the new data into
+            self.writer = resources.enter_context(
+                AtomicRenameTemporaryFile(self.path, prefix=self.path.name, dir=self.tempDir))
+            # Open the old file
+            self.reader = resources.enter_context(open(self.path))
+
+            self.onEnter()
+
+            # Prevent resources from being closed on normal return from this
+            # method and make them available inside __exit__():
+            self.__resources = resources.pop_all()
+            return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        if exc_type is None:
+            with self.__resources:
+               self.onExit()
+        else:
+            self.__resources.__exit__(exc_type, exc_value, traceback)
+
+        return False
+
 
 class SourceFileEditor (Transcriber):
     """Transcriber with transcription of code around a gnerated block.
@@ -117,43 +188,27 @@ class SourceFileEditor (Transcriber):
     the new version to replace it.
 
     This class takes care of transcribing the parts before and after
-    the generated content; on creation, an instance will copy the
-    preamble up to the start marker; its close() will skip over the
-    original's generated content and resume transcribing with the end
-    marker. Derived classes need only implement the generation of the
-    content in between.
-
-    Callers should call close() on success or cleanup() on failure (to
-    clear away the temporary file); see Transcriber.
+    the generated content; on entering the context, an instance will
+    copy the preamble up to the start marker; on exit from the context
+    it will skip over the original's generated content and resume
+    transcribing with the end marker.
+
+    This class is only intended to be used as a context manager:
+    see Transcriber. Derived classes implement suitable methods for use in
+    the body of the with statement, using self.writer to rewrite the part
+    of the file between the start and end markers.
     """
-    __upinit = Transcriber.__init__
-    def __init__(self, path, temp):
-        """Set up the source file editor.
-
-        Requires two arguments: the path to the source file to be read
-        and, on success, replaced with a new version; and the
-        directory in which to store the temporary file during the
-        rewrite."""
-        self.__upinit(path, temp)
-        self.__copyPrelude()
-
-    __upclose = Transcriber.close
-    def close(self):
-        self.__copyTail()
-        self.__upclose()
-
-    # Implementation details:
     GENERATED_BLOCK_START = '// GENERATED PART STARTS HERE'
     GENERATED_BLOCK_END = '// GENERATED PART ENDS HERE'
 
-    def __copyPrelude(self):
+    def onEnter(self) -> None:
         # Copy over the first non-generated section to the new file
         for line in self.reader:
             self.writer.write(line)
             if line.strip() == self.GENERATED_BLOCK_START:
                 break
 
-    def __copyTail(self):
+    def onExit(self) -> None:
         # Skip through the old generated data in the old file
         for line in self.reader:
             if line.strip() == self.GENERATED_BLOCK_END:
diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py
index 4fcfe32a43..5cb56c2165 100644
--- a/util/locale_database/qlocalexml.py
+++ b/util/locale_database/qlocalexml.py
@@ -1,31 +1,5 @@
-# coding=utf8
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
 """Shared serialization-scanning code for QLocaleXML format.
 
 Provides classes:
@@ -35,15 +9,23 @@ Provides classes:
 
 Support:
   Spacer -- provides control over indentation of the output.
+
+RelaxNG schema for the used file format can be found in qlocalexml.rnc.
+QLocaleXML files can be validated using:
+
+    jing -c qlocalexml.rnc <file.xml>
+
+You can download jing from https://relaxng.org/jclark/jing.html if your
+package manager lacks the jing package.
 """
-from __future__ import print_function
+
 from xml.sax.saxutils import escape
 
 from localetools import Error
 
 # Tools used by Locale:
 def camel(seq):
-    yield seq.next()
+    yield next(seq)
     for word in seq:
         yield word.capitalize()
 
@@ -51,88 +33,39 @@ def camelCase(words):
     return ''.join(camel(iter(words)))
 
 def addEscapes(s):
-    return ''.join(c if n < 128 else '\\x{:02x}'.format(n)
+    return ''.join(c if n < 128 else f'\\x{n:02x}'
                    for n, c in ((ord(c), c) for c in s))
 
 def startCount(c, text): # strspn
     """First index in text where it doesn't have a character in c"""
     assert text and text[0] in c
     try:
-        return (j for j, d in enumerate(text) if d not in c).next()
+        return next((j for j, d in enumerate(text) if d not in c))
     except StopIteration:
         return len(text)
 
-def convertFormat(format):
-    """Convert date/time format-specier from CLDR to Qt
-
-    Match up (as best we can) the differences between:
-    * https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
-    * QDateTimeParser::parseFormat() and QLocalePrivate::dateTimeToString()
-    """
-    # Compare and contrast dateconverter.py's convert_date().
-    # Need to (check consistency and) reduce redundancy !
-    result = ""
-    i = 0
-    while i < len(format):
-        if format[i] == "'":
-            result += "'"
-            i += 1
-            while i < len(format) and format[i] != "'":
-                result += format[i]
-                i += 1
-            if i < len(format):
-                result += "'"
-                i += 1
-        else:
-            s = format[i:]
-            if s.startswith('E'): # week-day
-                n = startCount('E', s)
-                if n < 3:
-                    result += 'ddd'
-                elif n == 4:
-                    result += 'dddd'
-                else: # 5: narrow, 6 short; but should be name, not number :-(
-                    result += 'd' if n < 6 else 'dd'
-                i += n
-            elif s[0] in 'ab': # am/pm
-                # 'b' should distinguish noon/midnight, too :-(
-                result += "AP"
-                i += startCount('ab', s)
-            elif s.startswith('S'): # fractions of seconds: count('S') == number of decimals to show
-                result += 'z'
-                i += startCount('S', s)
-            elif s.startswith('V'): # long time zone specifiers (and a deprecated short ID)
-                result += 't'
-                i += startCount('V', s)
-            elif s[0] in 'zv': # zone
-                # Should use full name, e.g. "Central European Time", if 'zzzz' :-(
-                # 'v' should get generic non-location format, e.g. PT for "Pacific Time", no DST indicator
-                result += "t"
-                i += startCount('zv', s)
-            else:
-                result += format[i]
-                i += 1
-
-    return result
-
 class QLocaleXmlReader (object):
     def __init__(self, filename):
         self.root = self.__parse(filename)
-        # Lists of (id, name, code) triples:
-        languages = tuple(self.__loadMap('language'))
-        scripts = tuple(self.__loadMap('script'))
-        countries = tuple(self.__loadMap('country'))
+
+        from enumdata import language_map, script_map, territory_map
+        # Lists of (id, enum name, code, en.xml name) tuples:
+        languages = tuple(self.__loadMap('language', language_map))
+        scripts = tuple(self.__loadMap('script', script_map))
+        territories = tuple(self.__loadMap('territory', territory_map))
         self.__likely = tuple(self.__likelySubtagsMap())
-        # Mappings {ID: (name, code)}
-        self.languages = dict((v[0], v[1:]) for v in languages)
-        self.scripts = dict((v[0], v[1:]) for v in scripts)
-        self.countries = dict((v[0], v[1:]) for v in countries)
-        # Private mappings {name: (ID, code)}
-        self.__langByName = dict((v[1], (v[0], v[2])) for v in languages)
-        self.__textByName = dict((v[1], (v[0], v[2])) for v in scripts)
-        self.__landByName = dict((v[1], (v[0], v[2])) for v in countries)
+
+        # Mappings {ID: (enum name, code, en.xml name)}
+        self.languages = {v[0]: v[1:] for v in languages}
+        self.scripts = {v[0]: v[1:] for v in scripts}
+        self.territories = {v[0]: v[1:] for v in territories}
+
+        # Private mappings {enum name: (ID, code)}
+        self.__langByName = {v[1]: (v[0], v[2]) for v in languages}
+        self.__textByName = {v[1]: (v[0], v[2]) for v in scripts}
+        self.__landByName = {v[1]: (v[0], v[2]) for v in territories}
         # Other properties:
-        self.dupes = set(v[1] for v in languages) & set(v[1] for v in countries)
+        self.__dupes = set(v[1] for v in languages) & set(v[1] for v in territories)
         self.cldrVersion = self.__firstChildText(self.root, "version")
 
     def loadLocaleMap(self, calendars, grumble = lambda text: None):
@@ -142,18 +75,18 @@ class QLocaleXmlReader (object):
             locale = Locale.fromXmlData(lambda k: kid(elt, k), calendars)
             language = self.__langByName[locale.language][0]
             script = self.__textByName[locale.script][0]
-            country = self.__landByName[locale.country][0]
+            territory = self.__landByName[locale.territory][0]
 
             if language != 1: # C
-                if country == 0:
-                    grumble('loadLocaleMap: No country id for "{}"\n'.format(locale.language))
+                if territory == 0:
+                    grumble(f'loadLocaleMap: No territory id for "{locale.language}"\n')
 
                 if script == 0:
-                    # Find default script for the given language and country - see:
+                    # Find default script for the given language and territory - see:
                     # http://www.unicode.org/reports/tr35/#Likely_Subtags
                     try:
                         try:
-                            to = likely[(locale.language, 'AnyScript', locale.country)]
+                            to = likely[(locale.language, 'AnyScript', locale.territory)]
                         except KeyError:
                             to = likely[(locale.language, 'AnyScript', 'AnyTerritory')]
                     except KeyError:
@@ -162,11 +95,11 @@ class QLocaleXmlReader (object):
                         locale.script = to[1]
                         script = self.__textByName[locale.script][0]
 
-            yield (language, script, country), locale
+            yield (language, script, territory), locale
 
     def languageIndices(self, locales):
         index = 0
-        for key, value in self.languages.iteritems():
+        for key, value in self.languages.items():
             i, count = 0, locales.count(key)
             if count > 0:
                 i = index
@@ -190,11 +123,11 @@ class QLocaleXmlReader (object):
                    '_'.join(tag(give)), ids(give))
 
     def defaultMap(self):
-        """Map language and script to their default country by ID.
+        """Map language and script to their default territory by ID.
 
-        Yields ((language, script), country) wherever the likely
+        Yields ((language, script), territory) wherever the likely
         sub-tags mapping says language's default locale uses the given
-        script and country."""
+        script and territory."""
         for have, give in self.__likely:
             if have[1:] == ('AnyScript', 'AnyTerritory') and give[2] != 'AnyTerritory':
                 assert have[0] == give[0], (have, give)
@@ -202,14 +135,41 @@ class QLocaleXmlReader (object):
                         self.__textByName[give[1]][0]),
                        self.__landByName[give[2]][0])
 
+    def enumify(self, name, suffix):
+        """Stick together the parts of an enumdata.py name.
+
+        Names given in enumdata.py include spaces and hyphens that we
+        can't include in an identifier, such as the name of a member
+        of an enum type. Removing those would lose the word
+        boundaries, so make sure each word starts with a capital (but
+        don't simply capitalize() as some names contain words,
+        e.g. McDonald, that have later capitals in them).
+
+        We also need to resolve duplication between languages and
+        territories (by adding a suffix to each) and add Script to the
+        ends of script-names that don't already end in it."""
+        name = name.replace('-', ' ')
+        # Don't .capitalize() as McDonald is already camel-case (see enumdata.py):
+        name = ''.join(word[0].upper() + word[1:] for word in name.split())
+        if suffix != 'Script':
+            assert not(name in self.__dupes and name.endswith(suffix))
+            return name + suffix if name in self.__dupes else name
+
+        if not name.endswith(suffix):
+            name += suffix
+        if name in self.__dupes:
+            raise Error(f'The script name "{name}" is messy')
+        return name
+
     # Implementation details:
-    def __loadMap(self, category):
+    def __loadMap(self, category, enum):
         kid = self.__firstChildText
-        for element in self.__eachEltInGroup(self.root, category + 'List', category):
-            yield int(kid(element, 'id')), kid(element, 'name'), kid(element, 'code')
+        for element in self.__eachEltInGroup(self.root, f'{category}List', category):
+            key = int(kid(element, 'id'))
+            yield key, enum[key][0], kid(element, 'code'), kid(element, 'name')
 
     def __likelySubtagsMap(self):
-        def triplet(element, keys=('language', 'script', 'country'), kid = self.__firstChildText):
+        def triplet(element, keys=('language', 'script', 'territory'), kid = self.__firstChildText):
             return tuple(kid(element, key) for key in keys)
 
         kid = self.__firstChildElt
@@ -246,7 +206,7 @@ class QLocaleXmlReader (object):
                 return child
             child = child.nextSibling
 
-        raise Error('No {} child found'.format(name))
+        raise Error(f'No {name} child found')
 
     @classmethod
     def __firstChildText(cls, elt, key):
@@ -302,7 +262,7 @@ class Spacer (object):
         elif line.startswith('<') and not line.startswith('<!'):
             cut = line.find('>')
             tag = (line[1:] if cut < 0 else line[1 : cut]).strip().split()[0]
-            if '</{}>'.format(tag) not in line:
+            if f'</{tag}>' not in line:
                 self.current += self.__each
         return indent + line + '\n'
 
@@ -333,10 +293,28 @@ class QLocaleXmlWriter (object):
         self.__write('<localeDatabase>')
 
     # Output of various sections, in their usual order:
-    def enumData(self, languages, scripts, countries):
-        self.__enumTable('language', languages)
-        self.__enumTable('script', scripts)
-        self.__enumTable('country', countries)
+    def enumData(self, code2name):
+        """Output name/id/code tables for language, script and territory.
+
+        Parameter, code2name, is a function taking 'language',
+        'script' or 'territory' and returning a lookup function that
+        maps codes, of the relevant type, to their English names. This
+        lookup function is passed a code and the name, both taken from
+        enumdata.py, that QLocale uses, so the .get() of a dict will
+        work. The English name from this lookup will be used by
+        QLocale::*ToString() for the enum member whose name is based
+        on the enumdata.py name passed as fallback to the lookup."""
+        from enumdata import language_map, script_map, territory_map
+        self.__enumTable('language', language_map, code2name)
+        self.__enumTable('script', script_map, code2name)
+        self.__enumTable('territory', territory_map, code2name)
+        # Prepare to detect any unused codes (see __writeLocale(), close()):
+        self.__languages = set(p[1] for p in language_map.values()
+                               if not p[1].isspace())
+        self.__scripts = set(p[1] for p in script_map.values()
+                             if p[1] != 'Zzzz')
+        self.__territories = set(p[1] for p in territory_map.values()
+                                 if p[1] != 'ZZ')
 
     def likelySubTags(self, entries):
         self.__openTag('likelySubtags')
@@ -350,13 +328,11 @@ class QLocaleXmlWriter (object):
     def locales(self, locales, calendars):
         self.__openTag('localeList')
         self.__openTag('locale')
-        Locale.C(calendars).toXml(self.inTag, calendars)
+        self.__writeLocale(Locale.C(calendars), calendars)
         self.__closeTag('locale')
-        keys = locales.keys()
-        keys.sort()
-        for key in keys:
+        for key in sorted(locales.keys()):
             self.__openTag('locale')
-            locales[key].toXml(self.inTag, calendars)
+            self.__writeLocale(locales[key], calendars)
             self.__closeTag('locale')
         self.__closeTag('localeList')
 
@@ -364,13 +340,27 @@ class QLocaleXmlWriter (object):
         self.inTag('version', cldrVersion)
 
     def inTag(self, tag, text):
-        self.__write('<{0}>{1}</{0}>'.format(tag, text))
+        self.__write(f'<{tag}>{text}</{tag}>')
 
-    def close(self):
+    def close(self, grumble):
+        """Finish writing and grumble about any issues discovered."""
         if self.__rawOutput != self.__complain:
             self.__write('</localeDatabase>')
         self.__rawOutput = self.__complain
 
+        if self.__languages or self.__scripts or self.__territories:
+            grumble('Some enum members are unused, corresponding to these tags:\n')
+            import textwrap
+            def kvetch(kind, seq, g = grumble, w = textwrap.wrap):
+                g('\n\t'.join(w(f' {kind}: {", ".join(sorted(seq))}', width=80)) + '\n')
+            if self.__languages:
+                kvetch('Languages', self.__languages)
+            if self.__scripts:
+                kvetch('Scripts', self.__scripts)
+            if self.__territories:
+                kvetch('Territories', self.__territories)
+            grumble('It may make sense to deprecate them.\n')
+
     # Implementation details
     @staticmethod
     def __printit(text):
@@ -379,28 +369,39 @@ class QLocaleXmlWriter (object):
     def __complain(text):
         raise Error('Attempted to write data after closing :-(')
 
-    def __enumTable(self, tag, table):
-        self.__openTag(tag + 'List')
-        for key, value in table.iteritems():
+    @staticmethod
+    def __xmlSafe(text):
+        return text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+
+    def __enumTable(self, tag, table, code2name):
+        self.__openTag(f'{tag}List')
+        enname, safe = code2name(tag), self.__xmlSafe
+        for key, (name, code) in table.items():
             self.__openTag(tag)
-            self.inTag('name', value[0])
+            self.inTag('name', safe(enname(code, name)))
             self.inTag('id', key)
-            self.inTag('code', value[1])
+            self.inTag('code', code)
             self.__closeTag(tag)
-        self.__closeTag(tag + 'List')
+        self.__closeTag(f'{tag}List')
 
     def __likelySubTag(self, tag, likely):
         self.__openTag(tag)
         self.inTag('language', likely[0])
         self.inTag('script', likely[1])
-        self.inTag('country', likely[2])
+        self.inTag('territory', likely[2])
         # self.inTag('variant', likely[3])
         self.__closeTag(tag)
 
+    def __writeLocale(self, locale, calendars):
+        locale.toXml(self.inTag, calendars)
+        self.__languages.discard(locale.language_code)
+        self.__scripts.discard(locale.script_code)
+        self.__territories.discard(locale.territory_code)
+
     def __openTag(self, tag):
-        self.__write('<{}>'.format(tag))
+        self.__write(f'<{tag}>')
     def __closeTag(self, tag):
-        self.__write('</{}>'.format(tag))
+        self.__write(f'</{tag}>')
 
     def __write(self, line):
         self.__rawOutput(self.__wrap(line))
@@ -432,16 +433,16 @@ class Locale (object):
     __asint = ("currencyDigits", "currencyRounding")
     # Convert day-name to Qt day-of-week number:
     __asdow = ("firstDayOfWeek", "weekendStart", "weekendEnd")
-    # Convert from CLDR format-strings to QDateTimeParser ones:
-    __asfmt = ("longDateFormat", "shortDateFormat", "longTimeFormat", "shortTimeFormat")
     # Just use the raw text:
-    __astxt = ("language", "languageEndonym", "script", "country", "countryEndonym",
+    __astxt = ("language", "languageEndonym", "script", "territory", "territoryEndonym",
                "decimal", "group", "zero",
                "list", "percent", "minus", "plus", "exp",
                "quotationStart", "quotationEnd",
                "alternateQuotationStart", "alternateQuotationEnd",
                "listPatternPartStart", "listPatternPartMiddle",
                "listPatternPartEnd", "listPatternPartTwo", "am", "pm",
+               "longDateFormat", "shortDateFormat",
+               "longTimeFormat", "shortTimeFormat",
                'byte_unit', 'byte_si_quantified', 'byte_iec_quantified',
                "currencyIsoCode", "currencySymbol", "currencyDisplayName",
                "currencyFormat", "currencyNegativeFormat")
@@ -466,14 +467,11 @@ class Locale (object):
         for k in cls.__asdow:
             data[k] = cls.__qDoW[lookup(k)]
 
-        for k in cls.__asfmt:
-            data[k] = convertFormat(lookup(k))
-
         for k in cls.__astxt + tuple(cls.propsMonthDay('days')):
             data['listDelim' if k == 'list' else k] = lookup(k)
 
         for k in cls.propsMonthDay('months'):
-            data[k] = dict((cal, lookup('_'.join((k, cal)))) for cal in calendars)
+            data[k] = {cal: lookup('_'.join((k, cal))) for cal in calendars}
 
         grouping = lookup('groupSizes').split(';')
         data.update(groupLeast = int(grouping[0]),
@@ -493,15 +491,15 @@ class Locale (object):
         form used by CLDR; its default is ('gregorian',).
         """
         get = lambda k: getattr(self, k)
-        for key in ('language', 'script', 'country'):
+        for key in ('language', 'script', 'territory'):
             write(key, get(key))
-            write('{}code'.format(key), get('{}_code'.format(key)))
+            write(f'{key}code', get(f'{key}_code'))
 
         for key in ('decimal', 'group', 'zero', 'list',
                     'percent', 'minus', 'plus', 'exp'):
             write(key, get(key))
 
-        for key in ('languageEndonym', 'countryEndonym',
+        for key in ('languageEndonym', 'territoryEndonym',
                     'quotationStart', 'quotationEnd',
                     'alternateQuotationStart', 'alternateQuotationEnd',
                     'listPatternPartStart', 'listPatternPartMiddle',
@@ -517,7 +515,7 @@ class Locale (object):
                 '_'.join((k, cal))
                 for k in self.propsMonthDay('months')
                 for cal in calendars):
-            write(key, escape(get(key)).encode('utf-8'))
+            write(key, escape(get(key)))
 
         write('groupSizes', ';'.join(str(x) for x in get('groupSizes')))
         for key in ('currencyDigits', 'currencyRounding'):
@@ -554,9 +552,9 @@ class Locale (object):
                         (fullName, fullName),
                         (firstThree, firstThree),
                         (number, initial)),
-            'islamic': ((u'Muharram', u'Safar', u'Rabiʻ I', u'Rabiʻ II', u'Jumada I',
-                         u'Jumada II', u'Rajab', u'Shaʻban', u'Ramadan', u'Shawwal',
-                         u'Dhuʻl-Qiʻdah', u'Dhuʻl-Hijjah'),
+            'islamic': (('Muharram', 'Safar', 'Rabiʻ I', 'Rabiʻ II', 'Jumada I',
+                         'Jumada II', 'Rajab', 'Shaʻban', 'Ramadan', 'Shawwal',
+                         'Dhuʻl-Qiʻdah', 'Dhuʻl-Hijjah'),
                         (fullName, fullName),
                         (islamicShort, islamicShort),
                         (number, number)),
@@ -565,7 +563,7 @@ class Locale (object):
                        (fullName, fullName),
                        (fullName, fullName),
                        (number, number)),
-            },
+                     },
                      sizes=('long', 'short', 'narrow')):
         for cal in calendars:
             try:
@@ -590,7 +588,7 @@ class Locale (object):
         return cls(cls.__monthNames(calendars),
                    language='C', language_code='0', languageEndonym='',
                    script='AnyScript', script_code='0',
-                   country='AnyTerritory', country_code='0', countryEndonym='',
+                   territory='AnyTerritory', territory_code='0', territoryEndonym='',
                    groupSizes=(3, 3, 1),
                    decimal='.', group=',', list=';', percent='%',
                    zero='0', minus='-', plus='+', exp='e',
@@ -605,8 +603,8 @@ class Locale (object):
                    byte_iec_quantified=';'.join(q.upper() + 'iB' for q in quantifiers),
                    am='AM', pm='PM', firstDayOfWeek='mon',
                    weekendStart='sat', weekendEnd='sun',
-                   longDateFormat='EEEE, d MMMM yyyy', shortDateFormat='d MMM yyyy',
-                   longTimeFormat='HH:mm:ss z', shortTimeFormat='HH:mm:ss',
+                   longDateFormat='dddd, d MMMM yyyy', shortDateFormat='d MMM yyyy',
+                   longTimeFormat='HH:mm:ss t', shortTimeFormat='HH:mm:ss',
                    longDays=';'.join(days),
                    shortDays=';'.join(d[:3] for d in days),
                    narrowDays='7;1;2;3;4;5;6',
diff --git a/util/locale_database/qlocalexml.rnc b/util/locale_database/qlocalexml.rnc
new file mode 100644
index 0000000000..818aa8f9c3
--- /dev/null
+++ b/util/locale_database/qlocalexml.rnc
@@ -0,0 +1,119 @@
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+# This is RelaxNG compact schema for qLocaleXML interemediate locale data
+# representation format produced and consumed by the qlocalexml module.
+#
+# To validate an xml file run:
+#
+#      jing -c qlocalexml.rnc <your-file.xml>
+#
+# You can download jing from https://relaxng.org/jclark/jing.html if your
+# package manager lacks the jing package.
+
+start = element localeDatabase {
+  element version { text },
+  element languageList { Language+ },
+  element scriptList { Script+ },
+  element territoryList { Territory+ },
+  element likelySubtags { LikelySubtag+ },
+  element localeList { Locale+ }
+}
+
+Language = element language { TagDescriptor }
+Script = element script { TagDescriptor }
+Territory = element territory { TagDescriptor }
+TagDescriptor = (
+  element name { text },
+  element id { xsd:nonNegativeInteger },
+  element code { text }
+)
+
+LikelySubtag = element likelySubtag {
+  element from { LocaleTriplet },
+  element to { LocaleTriplet }
+}
+
+LocaleTriplet = (
+  element language { text },
+  element script { text },
+  element territory { text }
+)
+
+WeekDay = ("sun" | "mon" | "tue" | "wed" | "thu" | "fri" | "sat")
+Digit = xsd:string { pattern = "\d" }
+Punctuation = xsd:string { pattern = "\p{P}" }
+GroupSizes = xsd:string { pattern = "\d;\d;\d" }
+
+Locale = element locale {
+  element language { text },
+  element languagecode { text },
+  element script { text },
+  element scriptcode { text },
+  element territory { text },
+  element territorycode { text },
+  element decimal { Punctuation },
+  element group { text },
+  element zero { Digit },
+  element list { Punctuation },
+  element percent { text },
+  element minus { text },
+  element plus { text },
+  element exp { text },
+  element languageEndonym { text },
+  element territoryEndonym { text },
+  element quotationStart { Punctuation },
+  element quotationEnd { Punctuation },
+  element alternateQuotationStart { Punctuation },
+  element alternateQuotationEnd { Punctuation },
+  element listPatternPartStart { text },
+  element listPatternPartMiddle { text },
+  element listPatternPartEnd { text },
+  element listPatternPartTwo { text },
+  element byte_unit { text },
+  element byte_si_quantified { text },
+  element byte_iec_quantified { text },
+  element am { text },
+  element pm { text },
+  element firstDayOfWeek { text },
+  element weekendStart { WeekDay },
+  element weekendEnd { WeekDay },
+  element longDateFormat { text },
+  element shortDateFormat { text },
+  element longTimeFormat { text },
+  element shortTimeFormat { text },
+  element currencyIsoCode { text },
+  element currencySymbol { text },
+  element currencyDisplayName { text },
+  element currencyFormat { text },
+  element currencyNegativeFormat { text },
+  element longDays { text },
+  element standaloneLongDays { text },
+  element shortDays { text },
+  element standaloneShortDays { text },
+  element narrowDays { text },
+  element standaloneNarrowDays { text },
+
+  # Some of these entries may be absent depending on command line arguments
+  element longMonths_gregorian { text }?,
+  element longMonths_persian { text }?,
+  element longMonths_islamic { text }?,
+  element standaloneLongMonths_gregorian { text }?,
+  element standaloneLongMonths_persian { text }?,
+  element standaloneLongMonths_islamic { text }?,
+  element shortMonths_gregorian { text }?,
+  element shortMonths_persian { text }?,
+  element shortMonths_islamic { text }?,
+  element standaloneShortMonths_gregorian { text }?,
+  element standaloneShortMonths_persian { text }?,
+  element standaloneShortMonths_islamic { text }?,
+  element narrowMonths_gregorian { text }?,
+  element narrowMonths_persian { text }?,
+  element narrowMonths_islamic { text }?,
+  element standaloneNarrowMonths_gregorian { text }?,
+  element standaloneNarrowMonths_persian { text }?,
+  element standaloneNarrowMonths_islamic { text }?,
+
+  element groupSizes { GroupSizes },
+  element currencyDigits { xsd:nonNegativeInteger },
+  element currencyRounding { xsd:nonNegativeInteger }
+}
diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py
index c15d6d2f55..b20e4fd155 100755
--- a/util/locale_database/qlocalexml2cpp.py
+++ b/util/locale_database/qlocalexml2cpp.py
@@ -1,85 +1,58 @@
-#!/usr/bin/env python2
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
-"""Script to generate C++ code from CLDR data in qLocaleXML form
-
-See ``cldr2qlocalexml.py`` for how to generate the qLocaleXML data itself.
-Pass the output file from that as first parameter to this script; pass
-the root of the qtbase check-out as second parameter.
-"""
-
-import os
-import datetime
-
-from qlocalexml import QLocaleXmlReader
-from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor
-
-def compareLocaleKeys(key1, key2):
-    if key1 == key2:
-        return 0
+#!/usr/bin/env python3
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+"""Script to generate C++ code from CLDR data in QLocaleXML form
 
-    if key1[0] != key2[0]: # First sort by language:
-        return key1[0] - key2[0]
+See ``cldr2qlocalexml.py`` for how to generate the QLocaleXML data itself.
+Pass the output file from that as first parameter to this script; pass the ISO
+639-3 data file as second parameter. You can optionally pass the root of the
+qtbase check-out as third parameter; it defaults to the root of the qtbase
+check-out containing this script.
 
-    defaults = compareLocaleKeys.default_map
-    # maps {(language, script): country} by ID
-    try:
-        country = defaults[key1[:2]]
-    except KeyError:
-        pass
-    else:
-        if key1[2] == country:
-            return -1
-        if key2[2] == country:
-            return 1
-
-    if key1[1] == key2[1]:
-        return key1[2] - key2[2]
+The ISO 639-3 data file can be downloaded from the SIL website:
 
-    try:
-        country = defaults[key2[:2]]
-    except KeyError:
-        pass
-    else:
-        if key2[2] == country:
-            return 1
-        if key1[2] == country:
-            return -1
+    https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab
+"""
 
-    return key1[1] - key2[1]
+import datetime
+import argparse
+from pathlib import Path
+from typing import Optional
 
+from qlocalexml import QLocaleXmlReader
+from localetools import *
+from iso639_3 import LanguageCodeData
+
+class LocaleKeySorter:
+    """Sort-ordering representation of a locale key.
+
+    This is for passing to a sorting algorithm as key-function, that
+    it applies to each entry in the list to decide which belong
+    earlier. It adds an entry to the (language, script, territory)
+    triple, just before script, that sorts earlier if the territory is
+    the default for the given language and script, later otherwise.
+    """
+
+    # TODO: study the relationship between this and CLDR's likely
+    # sub-tags algorithm. Work out how locale sort-order impacts
+    # QLocale's likely sub-tag matching algorithms. Make sure this is
+    # sorting in an order compatible with those algorithms.
+
+    def __init__(self, defaults):
+        self.map = dict(defaults)
+    def foreign(self, key):
+        default = self.map.get(key[:2])
+        return default is None or default != key[2]
+    def __call__(self, key):
+        # TODO: should we compare territory before or after script ?
+        return (key[0], self.foreign(key)) + key[1:]
 
 class StringDataToken:
     def __init__(self, index, length, bits):
         if index > 0xffff:
-            raise ValueError('Start-index ({}) exceeds the uint16 range!'.format(index))
+            raise ValueError(f'Start-index ({index}) exceeds the uint16 range!')
         if length >= (1 << bits):
-            raise ValueError('Data size ({}) exceeds the {}-bit range!'.format(length, bits))
+            raise ValueError(f'Data size ({length}) exceeds the {bits}-bit range!')
 
         self.index = index
         self.length = length
@@ -131,11 +104,10 @@ class StringData:
 
     def write(self, fd):
         if len(self.data) > 0xffff:
-            raise ValueError('Data is too big ({}) for quint16 index to its end!'
-                             .format(len(self.data)),
+            raise ValueError(f'Data is too big ({len(self.data)}) for quint16 index to its end!',
                              self.name)
-        fd.write("\nstatic const char16_t {}[] = {{\n".format(self.name))
-        fd.write(wrap_list(self.data))
+        fd.write(f"\nstatic constexpr char16_t {self.name}[] = {{\n")
+        fd.write(wrap_list(self.data, 12)) # 12 == 100 // len('0xhhhh, ')
         fd.write("\n};\n")
 
 def currencyIsoCodeData(s):
@@ -144,13 +116,16 @@ def currencyIsoCodeData(s):
     return "{0,0,0}"
 
 class LocaleSourceEditor (SourceFileEditor):
-    __upinit = SourceFileEditor.__init__
-    def __init__(self, path, temp, version):
-        self.__upinit(path, temp)
-        self.writer.write("""
+    def __init__(self, path: Path, temp: Path, version: str):
+        super().__init__(path, temp)
+        self.version = version
+
+    def onEnter(self) -> None:
+        super().onEnter()
+        self.writer.write(f"""
 /*
-    This part of the file was generated on {} from the
-    Common Locale Data Repository v{}
+    This part of the file was generated on {datetime.date.today()} from the
+    Common Locale Data Repository v{self.version}
 
     http://www.unicode.org/cldr/
 
@@ -159,7 +134,7 @@ class LocaleSourceEditor (SourceFileEditor):
     edited) CLDR data; see qtbase/util/locale_database/.
 */
 
-""".format(datetime.date.today(), version))
+""")
 
 class LocaleDataWriter (LocaleSourceEditor):
     def likelySubtags(self, likely):
@@ -173,23 +148,22 @@ class LocaleDataWriter (LocaleSourceEditor):
         def keyLikely(entry):
             have = entry[1] # Numeric id triple
             return have[0] or huge, have[2] or huge, have[1] or huge # language, region, script
-        likely = list(likely) # Turn generator into list so we can sort it
-        likely.sort(key=keyLikely)
+        likely = sorted(likely, key=keyLikely)
 
         i = 0
-        self.writer.write('static const QLocaleId likely_subtags[] = {\n')
+        self.writer.write('static constexpr QLocaleId likely_subtags[] = {\n')
         for had, have, got, give in likely:
             i += 1
             self.writer.write('    {{ {:3d}, {:3d}, {:3d} }}'.format(*have))
             self.writer.write(', {{ {:3d}, {:3d}, {:3d} }}'.format(*give))
             self.writer.write(' ' if i == len(likely) else ',')
-            self.writer.write(' // {} -> {}\n'.format(had, got))
+            self.writer.write(f' // {had} -> {got}\n')
         self.writer.write('};\n\n')
 
     def localeIndex(self, indices):
-        self.writer.write('static const quint16 locale_index[] = {\n')
-        for pair in indices:
-            self.writer.write('{:6d}, // {}\n'.format(*pair))
+        self.writer.write('static constexpr quint16 locale_index[] = {\n')
+        for index, name in indices:
+            self.writer.write(f'{index:6d}, // {name}\n')
         self.writer.write('     0 // trailing 0\n')
         self.writer.write('};\n\n')
 
@@ -208,7 +182,7 @@ class LocaleDataWriter (LocaleSourceEditor):
         endonyms_data = StringData('endonyms_data')
 
         # Locale data
-        self.writer.write('static const QLocaleData locale_data[] = {\n')
+        self.writer.write('static constexpr QLocaleData locale_data[] = {\n')
         # Table headings: keep each label centred in its field, matching line_format:
         self.writer.write('   // '
                           # Width 6 + comma
@@ -309,7 +283,7 @@ class LocaleDataWriter (LocaleSourceEditor):
                              locale.minus, locale.plus, locale.exp,
                              locale.quotationStart, locale.quotationEnd,
                              locale.alternateQuotationStart, locale.alternateQuotationEnd)) +
-                      tuple (date_format_data.append(f) for f in # 2 entries:
+                      tuple(date_format_data.append(f) for f in # 2 entries:
                              (locale.longDateFormat, locale.shortDateFormat)) +
                       tuple(time_format_data.append(f) for f in # 2 entries:
                             (locale.longTimeFormat, locale.shortTimeFormat)) +
@@ -327,7 +301,7 @@ class LocaleDataWriter (LocaleSourceEditor):
                        currency_format_data.append(locale.currencyFormat),
                        currency_format_data.append(locale.currencyNegativeFormat),
                        endonyms_data.append(locale.languageEndonym),
-                       endonyms_data.append(locale.countryEndonym)) # 6 entries
+                       endonyms_data.append(locale.territoryEndonym)) # 6 entries
                       ) # Total: 37 entries
             assert len(ranges) == 37
 
@@ -340,8 +314,7 @@ class LocaleDataWriter (LocaleSourceEditor):
                          locale.currencyRounding, # unused (QTBUG-81343)
                          locale.firstDayOfWeek, locale.weekendStart, locale.weekendEnd,
                          locale.groupTop, locale.groupHigher, locale.groupLeast) ))
-                              + ', // {}/{}/{}\n'.format(
-                    locale.language, locale.script, locale.country))
+                              + f', // {locale.language}/{locale.script}/{locale.territory}\n')
         self.writer.write(formatLine(*( # All zeros, matching the format:
                     (0,) * 3 + (0,) * 37 * 2
                     + (currencyIsoCodeData(0),)
@@ -359,32 +332,35 @@ class LocaleDataWriter (LocaleSourceEditor):
 
     @staticmethod
     def __writeNameData(out, book, form):
-        out('static const char {}_name_list[] =\n'.format(form))
+        out(f'static constexpr char {form}_name_list[] =\n')
         out('"Default\\0"\n')
         for key, value in book.items():
             if key == 0:
                 continue
-            out('"' + value[0] + '\\0"\n')
+            enum, name = value[0], value[-1]
+            if names_clash(name, enum):
+                out(f'"{name}\\0" // {enum}\n')
+            else:
+                out(f'"{name}\\0"\n') # Automagically utf-8 encoded
         out(';\n\n')
 
-        out('static const quint16 {}_name_index[] = {{\n'.format(form))
-        out('     0, // Any{}\n'.format(form.capitalize()))
+        out(f'static constexpr quint16 {form}_name_index[] = {{\n')
+        out(f'     0, // Any{form.capitalize()}\n')
         index = 8
         for key, value in book.items():
             if key == 0:
                 continue
-            name = value[0]
-            out('{:6d}, // {}\n'.format(index, name))
-            index += len(name) + 1
+            out(f'{index:6d}, // {value[0]}\n')
+            index += len(value[-1].encode('utf-8')) + 1
         out('};\n\n')
 
     @staticmethod
     def __writeCodeList(out, book, form, width):
-        out('static const unsigned char {}_code_list[] =\n'.format(form))
+        out(f'static constexpr unsigned char {form}_code_list[] =\n')
         for key, value in book.items():
             code = value[1]
             code += r'\0' * max(width - len(code), 0)
-            out('"{}" // {}\n'.format(code, value[0]))
+            out(f'"{code}" // {value[0]}\n')
         out(';\n\n')
 
     def languageNames(self, languages):
@@ -393,20 +369,44 @@ class LocaleDataWriter (LocaleSourceEditor):
     def scriptNames(self, scripts):
         self.__writeNameData(self.writer.write, scripts, 'script')
 
-    def countryNames(self, countries):
-        self.__writeNameData(self.writer.write, countries, 'territory')
+    def territoryNames(self, territories):
+        self.__writeNameData(self.writer.write, territories, 'territory')
 
     # TODO: unify these next three into the previous three; kept
     # separate for now to verify we're not changing data.
 
-    def languageCodes(self, languages):
-        self.__writeCodeList(self.writer.write, languages, 'language', 3)
+    def languageCodes(self, languages, code_data: LanguageCodeData):
+        out = self.writer.write
+
+        out(f'constexpr std::array<LanguageCodeEntry, {len(languages)}> languageCodeList {{\n')
+
+        def q(val: Optional[str], size: int) -> str:
+            """Quote the value and adjust the result for tabular view."""
+            s = '' if val is None else ', '.join(f"'{c}'" for c in val)
+            return f'{{{s}}}' if size == 0 else f'{{{s}}},'.ljust(size * 5 + 2)
+
+        for key, value in languages.items():
+            code = value[1]
+            if key < 2:
+                result = code_data.query('und')
+            else:
+                result = code_data.query(code)
+                assert code == result.id()
+            assert result is not None
+
+            codeString = q(result.part1Code, 2)
+            codeString += q(result.part2BCode, 3)
+            codeString += q(result.part2TCode, 3)
+            codeString += q(result.part3Code, 0)
+            out(f'    LanguageCodeEntry {{{codeString}}}, // {value[0]}\n')
+
+        out('};\n\n')
 
     def scriptCodes(self, scripts):
         self.__writeCodeList(self.writer.write, scripts, 'script', 4)
 
-    def countryCodes(self, countries): # TODO: unify with countryNames()
-        self.__writeCodeList(self.writer.write, countries, 'territory', 3)
+    def territoryCodes(self, territories): # TODO: unify with territoryNames()
+        self.__writeCodeList(self.writer.write, territories, 'territory', 3)
 
 class CalendarDataWriter (LocaleSourceEditor):
     formatCalendar = (
@@ -416,7 +416,7 @@ class CalendarDataWriter (LocaleSourceEditor):
     def write(self, calendar, locales, names):
         months_data = StringData('months_data')
 
-        self.writer.write('static const QCalendarLocale locale_data[] = {\n')
+        self.writer.write('static constexpr QCalendarLocale locale_data[] = {\n')
         self.writer.write(
             '     //'
             # IDs, width 7 (6 + comma)
@@ -444,7 +444,7 @@ class CalendarDataWriter (LocaleSourceEditor):
                                 (locale.standaloneShortMonths, locale.shortMonths,
                                  locale.standaloneNarrowMonths, locale.narrowMonths)))
             except ValueError as e:
-                e.args += (locale.language, locale.script, locale.country, stem)
+                e.args += (locale.language, locale.script, locale.territory)
                 raise
 
             self.writer.write(
@@ -452,25 +452,41 @@ class CalendarDataWriter (LocaleSourceEditor):
                         key +
                         tuple(r.index for r in ranges) +
                         tuple(r.length for r in ranges) ))
-                + '// {}/{}/{}\n'.format(locale.language, locale.script, locale.country))
+                + f'// {locale.language}/{locale.script}/{locale.territory}\n')
         self.writer.write(self.formatCalendar(*( (0,) * (3 + 6 * 2) ))
                           + '// trailing zeros\n')
         self.writer.write('};\n')
         months_data.write(self.writer)
 
+
+class TestLocaleWriter (LocaleSourceEditor):
+    def localeList(self, locales):
+        self.writer.write('const LocaleListItem g_locale_list[] = {\n')
+        from enumdata import language_map, territory_map
+        # TODO: update testlocales/ to include script.
+        # For now, only mention each (lang, land) pair once:
+        pairs = set((lang, land) for lang, script, land in locales)
+        for lang, script, land in locales:
+            if (lang, land) in pairs:
+                pairs.discard((lang, land))
+                langName = language_map[lang][0]
+                landName = territory_map[land][0]
+                self.writer.write(f'    {{ {lang:6d},{land:6d} }}, // {langName}/{landName}\n')
+        self.writer.write('};\n\n')
+
+
 class LocaleHeaderWriter (SourceFileEditor):
-    __upinit = SourceFileEditor.__init__
-    def __init__(self, path, temp, dupes):
-        self.__upinit(path, temp)
-        self.__dupes = dupes
+    def __init__(self, path, temp, enumify):
+        super().__init__(path, temp)
+        self.__enumify = enumify
 
     def languages(self, languages):
         self.__enum('Language', languages, self.__language)
         self.writer.write('\n')
 
-    def countries(self, countries):
+    def territories(self, territories):
         self.writer.write("    // ### Qt 7: Rename to Territory\n")
-        self.__enum('Country', countries, self.__country, 'Territory')
+        self.__enum('Country', territories, self.__territory, 'Territory')
 
     def scripts(self, scripts):
         self.__enum('Script', scripts, self.__script)
@@ -478,7 +494,7 @@ class LocaleHeaderWriter (SourceFileEditor):
 
     # Implementation details
     from enumdata import (language_aliases as __language,
-                          country_aliases as __country,
+                          territory_aliases as __territory,
                           script_aliases as __script)
 
     def __enum(self, name, book, alias, suffix = None):
@@ -487,153 +503,135 @@ class LocaleHeaderWriter (SourceFileEditor):
         if suffix is None:
             suffix = name
 
-        out, dupes = self.writer.write, self.__dupes
-        out('    enum {} : ushort {{\n'.format(name))
+        out, enumify = self.writer.write, self.__enumify
+        out(f'    enum {name} : ushort {{\n')
         for key, value in book.items():
-            member = value[0].replace('-', ' ')
-            if name == 'Script':
-                # Don't .capitalize() as some names are already camel-case (see enumdata.py):
-                member = ''.join(word[0].upper() + word[1:] for word in member.split())
-                if not member.endswith('Script'):
-                    member += 'Script'
-                if member in dupes:
-                    raise Error('The script name "{}" is messy'.format(member))
-            else:
-                member = ''.join(member.split())
-                member = member + suffix if member in dupes else member
-            out('        {} = {},\n'.format(member, key))
+            member = enumify(value[0], suffix)
+            out(f'        {member} = {key},\n')
 
         out('\n        '
-            + ',\n        '.join('{} = {}'.format(*pair)
-                                 for pair in sorted(alias.items()))
-            + ',\n\n        Last{} = {}'.format(suffix, member))
+            + ',\n        '.join(f'{k} = {v}' for k, v in sorted(alias.items()))
+            + f',\n\n        Last{suffix} = {member}')
 
         # for "LastCountry = LastTerritory"
         # ### Qt 7: Remove
         if suffix != name:
-            out(',\n        Last{} = Last{}'.format(name, suffix))
+            out(f',\n        Last{name} = Last{suffix}')
 
         out('\n    };\n')
 
-def usage(name, err, message = ''):
-    err.write("""Usage: {} path/to/qlocale.xml root/of/qtbase
-""".format(name)) # TODO: elaborate
-    if message:
-        err.write('\n' + message + '\n')
-
-def main(args, out, err):
-    # TODO: Make calendars a command-line parameter
-    # map { CLDR name: Qt file name }
-    calendars = {'gregorian': 'roman', 'persian': 'jalali', 'islamic': 'hijri',} # 'hebrew': 'hebrew',
-
-    name = args.pop(0)
-    if len(args) != 2:
-        usage(name, err, 'I expect two arguments')
-        return 1
 
-    qlocalexml = args.pop(0)
-    qtsrcdir = args.pop(0)
-
-    if not (os.path.isdir(qtsrcdir)
-            and all(os.path.isfile(os.path.join(qtsrcdir, 'src', 'corelib', 'text', leaf))
+def main(argv, out, err):
+    """Updates QLocale's CLDR data from a QLocaleXML file.
+
+    Takes sys.argv, sys.stdout, sys.stderr (or equivalents) as
+    arguments. In argv[1:] it expects the QLocaleXML file as first
+    parameter and the ISO 639-3 data table as second
+    parameter. Accepts the root of the qtbase checkout as third
+    parameter (default is inferred from this script's path) and a
+    --calendars option to select which calendars to support (all
+    available by default).
+
+    Updates various src/corelib/t*/q*_data_p.h files within the qtbase
+    checkout to contain data extracted from the QLocaleXML file."""
+    calendars_map = {
+        # CLDR name: Qt file name fragment
+        'gregorian': 'roman',
+        'persian': 'jalali',
+        'islamic': 'hijri',
+    }
+    all_calendars = list(calendars_map.keys())
+
+    parser = argparse.ArgumentParser(
+        prog=Path(argv[0]).name,
+        description='Generate C++ code from CLDR data in QLocaleXML form.',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('input_file', help='input XML file name',
+                        metavar='input-file.xml')
+    parser.add_argument('iso_path', help='path to the ISO 639-3 data file',
+                        metavar='iso-639-3.tab')
+    parser.add_argument('qtbase_path', help='path to the root of the qtbase source tree',
+                        nargs='?', default=qtbase_root)
+    parser.add_argument('--calendars', help='select calendars to emit data for',
+                        nargs='+', metavar='CALENDAR',
+                        choices=all_calendars, default=all_calendars)
+    args = parser.parse_args(argv[1:])
+
+    qlocalexml = args.input_file
+    qtsrcdir = Path(args.qtbase_path)
+    calendars = {cal: calendars_map[cal] for cal in args.calendars}
+
+    if not (qtsrcdir.is_dir()
+            and all(qtsrcdir.joinpath('src/corelib/text', leaf).is_file()
                     for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))):
-        usage(name, err, 'Missing expected files under qtbase source root ' + qtsrcdir)
-        return 1
+        parser.error(f'Missing expected files under qtbase source root {qtsrcdir}')
 
     reader = QLocaleXmlReader(qlocalexml)
     locale_map = dict(reader.loadLocaleMap(calendars, err.write))
+    locale_keys = sorted(locale_map.keys(), key=LocaleKeySorter(reader.defaultMap()))
 
-    locale_keys = locale_map.keys()
-    compareLocaleKeys.default_map = dict(reader.defaultMap())
-    locale_keys.sort(compareLocaleKeys)
-
-    try:
-        writer = LocaleDataWriter(os.path.join(qtsrcdir,  'src', 'corelib', 'text',
-                                               'qlocale_data_p.h'),
-                                  qtsrcdir, reader.cldrVersion)
-    except IOError as e:
-        err.write('Failed to open files to transcribe locale data: ' + (e.message or e.args[1]))
-        return 1
+    code_data = LanguageCodeData(args.iso_path)
 
     try:
-        writer.likelySubtags(reader.likelyMap())
-        writer.localeIndex(reader.languageIndices(tuple(k[0] for k in locale_map)))
-        writer.localeData(locale_map, locale_keys)
-        writer.writer.write('\n')
-        writer.languageNames(reader.languages)
-        writer.scriptNames(reader.scripts)
-        writer.countryNames(reader.countries)
-        # TODO: merge the next three into the previous three
-        writer.languageCodes(reader.languages)
-        writer.scriptCodes(reader.scripts)
-        writer.countryCodes(reader.countries)
-    except Error as e:
-        writer.cleanup()
-        err.write('\nError updating locale data: ' + e.message + '\n')
+        with LocaleDataWriter(qtsrcdir.joinpath('src/corelib/text/qlocale_data_p.h'),
+                              qtsrcdir, reader.cldrVersion) as writer:
+            writer.likelySubtags(reader.likelyMap())
+            writer.localeIndex(reader.languageIndices(tuple(k[0] for k in locale_map)))
+            writer.localeData(locale_map, locale_keys)
+            writer.writer.write('\n')
+            writer.languageNames(reader.languages)
+            writer.scriptNames(reader.scripts)
+            writer.territoryNames(reader.territories)
+            # TODO: merge the next three into the previous three
+            writer.languageCodes(reader.languages, code_data)
+            writer.scriptCodes(reader.scripts)
+            writer.territoryCodes(reader.territories)
+    except Exception as e:
+        err.write(f'\nError updating locale data: {e}\n')
         return 1
 
-    writer.close()
-
     # Generate calendar data
     for calendar, stem in calendars.items():
         try:
-            writer = CalendarDataWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'time',
-                                                     'q{}calendar_data_p.h'.format(stem)),
-                                        qtsrcdir, reader.cldrVersion)
-        except IOError as e:
-            err.write('Failed to open files to transcribe ' + calendar
-                             + ' data ' + (e.message or e.args[1]))
-            return 1
-
-        try:
-            writer.write(calendar, locale_map, locale_keys)
-        except Error as e:
-            writer.cleanup()
-            err.write('\nError updating ' + calendar + ' locale data: ' + e.message + '\n')
-            return 1
-
-        writer.close()
+            with CalendarDataWriter(
+                    qtsrcdir.joinpath(f'src/corelib/time/q{stem}calendar_data_p.h'),
+                    qtsrcdir, reader.cldrVersion) as writer:
+                writer.write(calendar, locale_map, locale_keys)
+        except Exception as e:
+            err.write(f'\nError updating {calendar} locale data: {e}\n')
 
     # qlocale.h
     try:
-        writer = LocaleHeaderWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.h'),
-                                    qtsrcdir, reader.dupes)
-    except IOError as e:
-        err.write('Failed to open files to transcribe qlocale.h: ' + (e.message or e.args[1]))
-        return 1
-
-    try:
-        writer.languages(reader.languages)
-        writer.scripts(reader.scripts)
-        writer.countries(reader.countries)
-    except Error as e:
-        writer.cleanup()
-        err.write('\nError updating qlocale.h: ' + e.message + '\n')
-        return 1
-
-    writer.close()
+        with LocaleHeaderWriter(qtsrcdir.joinpath('src/corelib/text/qlocale.h'),
+                                qtsrcdir, reader.enumify) as writer:
+            writer.languages(reader.languages)
+            writer.scripts(reader.scripts)
+            writer.territories(reader.territories)
+    except Exception as e:
+        err.write(f'\nError updating qlocale.h: {e}\n')
 
     # qlocale.qdoc
     try:
-        writer = Transcriber(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.qdoc'),
-                             qtsrcdir)
-    except IOError as e:
-        err.write('Failed to open files to transcribe qlocale.qdoc: ' + (e.message or e.args[1]))
+        with Transcriber(qtsrcdir.joinpath('src/corelib/text/qlocale.qdoc'), qtsrcdir) as qdoc:
+            DOCSTRING = "    QLocale's data is based on Common Locale Data Repository "
+            for line in qdoc.reader:
+                if DOCSTRING in line:
+                    qdoc.writer.write(f'{DOCSTRING}v{reader.cldrVersion}.\n')
+                else:
+                    qdoc.writer.write(line)
+    except Exception as e:
+        err.write(f'\nError updating qlocale.h: {e}\n')
         return 1
 
-    DOCSTRING = "    QLocale's data is based on Common Locale Data Repository "
+    # ./testlocales/localemodel.cpp
     try:
-        for line in writer.reader:
-            if DOCSTRING in line:
-                writer.writer.write(DOCSTRING + 'v' + reader.cldrVersion + '.\n')
-            else:
-                writer.writer.write(line)
-    except Error as e:
-        writer.cleanup()
-        err.write('\nError updating qlocale.qdoc: ' + e.message + '\n')
-        return 1
+        path = 'util/locale_database/testlocales/localemodel.cpp'
+        with TestLocaleWriter(qtsrcdir.joinpath(path), qtsrcdir,
+                              reader.cldrVersion) as test:
+            test.localeList(locale_keys)
+    except Exception as e:
+        err.write(f'\nError updating localemodel.cpp: {e}\n')
 
-    writer.close()
     return 0
 
 if __name__ == "__main__":
diff --git a/util/locale_database/testlocales/localemodel.cpp b/util/locale_database/testlocales/localemodel.cpp
index d171bc9855..7f0150c7e0 100644
--- a/util/locale_database/testlocales/localemodel.cpp
+++ b/util/locale_database/testlocales/localemodel.cpp
@@ -1,30 +1,5 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the utils of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
 #include "localemodel.h"
 
 #include <QLocale>
@@ -39,236 +14,658 @@ struct LocaleListItem
     int territory;
 };
 
+// GENERATED PART STARTS HERE
+
+/*
+    This part of the file was generated on 2024-04-22 from the
+    Common Locale Data Repository v44.1
+
+    http://www.unicode.org/cldr/
+
+    Do not edit this section: instead regenerate it using
+    cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or
+    edited) CLDR data; see qtbase/util/locale_database/.
+*/
+
 const LocaleListItem g_locale_list[] = {
     {      1,     0 }, // C/AnyTerritory
-    {      3,    69 }, // Afan/Ethiopia
-    {      3,   111 }, // Afan/Kenya
-    {      4,    59 }, // Afar/Djibouti
-    {      4,    67 }, // Afar/Eritrea
-    {      4,    69 }, // Afar/Ethiopia
-    {      5,   195 }, // Afrikaans/SouthAfrica
-    {      5,   148 }, // Afrikaans/Namibia
-    {      6,     2 }, // Albanian/Albania
-    {      7,    69 }, // Amharic/Ethiopia
-    {      8,   186 }, // Arabic/SaudiArabia
-    {      8,     3 }, // Arabic/Algeria
-    {      8,    17 }, // Arabic/Bahrain
-    {      8,    64 }, // Arabic/Egypt
-    {      8,   103 }, // Arabic/Iraq
-    {      8,   109 }, // Arabic/Jordan
-    {      8,   115 }, // Arabic/Kuwait
-    {      8,   119 }, // Arabic/Lebanon
-    {      8,   122 }, // Arabic/LibyanArabJamahiriya
-    {      8,   145 }, // Arabic/Morocco
-    {      8,   162 }, // Arabic/Oman
-    {      8,   175 }, // Arabic/Qatar
-    {      8,   201 }, // Arabic/Sudan
-    {      8,   207 }, // Arabic/SyrianArabRepublic
-    {      8,   216 }, // Arabic/Tunisia
-    {      8,   223 }, // Arabic/UnitedArabEmirates
-    {      8,   237 }, // Arabic/Yemen
-    {      9,    11 }, // Armenian/Armenia
-    {     10,   100 }, // Assamese/India
-    {     12,    15 }, // Azerbaijani/Azerbaijan
-    {     14,   197 }, // Basque/Spain
-    {     15,    18 }, // Bengali/Bangladesh
-    {     15,   100 }, // Bengali/India
-    {     16,    25 }, // Bhutani/Bhutan
-    {     20,    33 }, // Bulgarian/Bulgaria
-    {     22,    20 }, // Byelorussian/Belarus
-    {     23,    36 }, // Cambodian/Cambodia
-    {     24,   197 }, // Catalan/Spain
-    {     25,    44 }, // Chinese/China
-    {     25,    97 }, // Chinese/HongKong
-    {     25,   126 }, // Chinese/Macau
-    {     25,   190 }, // Chinese/Singapore
-    {     25,   208 }, // Chinese/Taiwan
-    {     27,    54 }, // Croatian/Croatia
-    {     28,    57 }, // Czech/CzechRepublic
-    {     29,    58 }, // Danish/Denmark
-    {     30,   151 }, // Dutch/Netherlands
-    {     30,    21 }, // Dutch/Belgium
-    {     31,   225 }, // English/UnitedStates
-    {     31,     4 }, // English/AmericanSamoa
-    {     31,    13 }, // English/Australia
-    {     31,    21 }, // English/Belgium
-    {     31,    22 }, // English/Belize
-    {     31,    28 }, // English/Botswana
-    {     31,    38 }, // English/Canada
-    {     31,    89 }, // English/Guam
-    {     31,    97 }, // English/HongKong
-    {     31,   100 }, // English/India
-    {     31,   104 }, // English/Ireland
-    {     31,   107 }, // English/Jamaica
-    {     31,   133 }, // English/Malta
-    {     31,   134 }, // English/MarshallIslands
-    {     31,   148 }, // English/Namibia
-    {     31,   154 }, // English/NewZealand
-    {     31,   160 }, // English/NorthernMarianaIslands
-    {     31,   163 }, // English/Pakistan
-    {     31,   170 }, // English/Philippines
-    {     31,   190 }, // English/Singapore
-    {     31,   195 }, // English/SouthAfrica
-    {     31,   215 }, // English/TrinidadAndTobago
-    {     31,   224 }, // English/UnitedKingdom
-    {     31,   226 }, // English/UnitedStatesMinorOutlyingIslands
-    {     31,   234 }, // English/USVirginIslands
-    {     31,   240 }, // English/Zimbabwe
-    {     33,    68 }, // Estonian/Estonia
-    {     34,    71 }, // Faroese/FaroeIslands
-    {     36,    73 }, // Finnish/Finland
-    {     37,    74 }, // French/France
-    {     37,    21 }, // French/Belgium
-    {     37,    38 }, // French/Canada
-    {     37,   125 }, // French/Luxembourg
-    {     37,   142 }, // French/Monaco
-    {     37,   206 }, // French/Switzerland
-    {     40,   197 }, // Galician/Spain
-    {     41,    81 }, // Georgian/Georgia
-    {     42,    82 }, // German/Germany
-    {     42,    14 }, // German/Austria
-    {     42,    21 }, // German/Belgium
-    {     42,   123 }, // German/Liechtenstein
-    {     42,   125 }, // German/Luxembourg
-    {     42,   206 }, // German/Switzerland
-    {     43,    85 }, // Greek/Greece
-    {     43,    56 }, // Greek/Cyprus
-    {     44,    86 }, // Greenlandic/Greenland
-    {     46,   100 }, // Gujarati/India
-    {     47,    83 }, // Hausa/Ghana
-    {     47,   156 }, // Hausa/Niger
-    {     47,   157 }, // Hausa/Nigeria
-    {     48,   105 }, // Hebrew/Israel
-    {     49,   100 }, // Hindi/India
-    {     50,    98 }, // Hungarian/Hungary
-    {     51,    99 }, // Icelandic/Iceland
-    {     52,   101 }, // Indonesian/Indonesia
-    {     57,   104 }, // Irish/Ireland
-    {     58,   106 }, // Italian/Italy
-    {     58,   206 }, // Italian/Switzerland
-    {     59,   108 }, // Japanese/Japan
-    {     61,   100 }, // Kannada/India
-    {     63,   110 }, // Kazakh/Kazakhstan
-    {     64,   179 }, // Kinyarwanda/Rwanda
-    {     65,   116 }, // Kirghiz/Kyrgyzstan
-    {     66,   114 }, // Korean/RepublicOfKorea
-    {     67,   102 }, // Kurdish/Iran
-    {     67,   103 }, // Kurdish/Iraq
-    {     67,   207 }, // Kurdish/SyrianArabRepublic
-    {     67,   217 }, // Kurdish/Turkey
-    {     69,   117 }, // Laothian/Lao
-    {     71,   118 }, // Latvian/Latvia
-    {     72,    49 }, // Lingala/DemocraticRepublicOfCongo
-    {     72,    50 }, // Lingala/PeoplesRepublicOfCongo
-    {     73,   124 }, // Lithuanian/Lithuania
-    {     74,   127 }, // Macedonian/Macedonia
-    {     76,   130 }, // Malay/Malaysia
-    {     76,    32 }, // Malay/BruneiDarussalam
-    {     77,   100 }, // Malayalam/India
-    {     78,   133 }, // Maltese/Malta
-    {     80,   100 }, // Marathi/India
-    {     82,   143 }, // Mongolian/Mongolia
-    {     84,   150 }, // Nepali/Nepal
-    {     85,   161 }, // Norwegian/Norway
-    {     87,   100 }, // Oriya/India
-    {     88,     1 }, // Pashto/Afghanistan
-    {     89,   102 }, // Persian/Iran
-    {     89,     1 }, // Persian/Afghanistan
-    {     90,   172 }, // Polish/Poland
-    {     91,   173 }, // Portuguese/Portugal
-    {     91,    30 }, // Portuguese/Brazil
-    {     92,   100 }, // Punjabi/India
-    {     92,   163 }, // Punjabi/Pakistan
-    {     95,   177 }, // Romanian/Romania
-    {     96,   178 }, // Russian/RussianFederation
-    {     96,   222 }, // Russian/Ukraine
-    {     99,   100 }, // Sanskrit/India
-    {    100,   241 }, // Serbian/SerbiaAndMontenegro
-    {    100,    27 }, // Serbian/BosniaAndHerzegowina
-    {    100,   238 }, // Serbian/Yugoslavia
-    {    101,   241 }, // SerboCroatian/SerbiaAndMontenegro
-    {    101,    27 }, // SerboCroatian/BosniaAndHerzegowina
-    {    101,   238 }, // SerboCroatian/Yugoslavia
-    {    102,   195 }, // Sesotho/SouthAfrica
-    {    103,   195 }, // Setswana/SouthAfrica
-    {    107,   195 }, // Siswati/SouthAfrica
-    {    108,   191 }, // Slovak/Slovakia
-    {    109,   192 }, // Slovenian/Slovenia
-    {    110,   194 }, // Somali/Somalia
-    {    110,    59 }, // Somali/Djibouti
-    {    110,    69 }, // Somali/Ethiopia
-    {    110,   111 }, // Somali/Kenya
-    {    111,   197 }, // Spanish/Spain
-    {    111,    10 }, // Spanish/Argentina
-    {    111,    26 }, // Spanish/Bolivia
-    {    111,    43 }, // Spanish/Chile
-    {    111,    47 }, // Spanish/Colombia
-    {    111,    52 }, // Spanish/CostaRica
-    {    111,    61 }, // Spanish/DominicanRepublic
-    {    111,    63 }, // Spanish/Ecuador
-    {    111,    65 }, // Spanish/ElSalvador
-    {    111,    90 }, // Spanish/Guatemala
-    {    111,    96 }, // Spanish/Honduras
-    {    111,   139 }, // Spanish/Mexico
-    {    111,   155 }, // Spanish/Nicaragua
-    {    111,   166 }, // Spanish/Panama
-    {    111,   168 }, // Spanish/Paraguay
-    {    111,   169 }, // Spanish/Peru
-    {    111,   174 }, // Spanish/PuertoRico
-    {    111,   225 }, // Spanish/UnitedStates
-    {    111,   227 }, // Spanish/Uruguay
-    {    111,   231 }, // Spanish/Venezuela
-    {    113,   111 }, // Swahili/Kenya
-    {    113,   210 }, // Swahili/Tanzania
-    {    114,   205 }, // Swedish/Sweden
-    {    114,    73 }, // Swedish/Finland
-    {    116,   209 }, // Tajik/Tajikistan
-    {    117,   100 }, // Tamil/India
-    {    118,   178 }, // Tatar/RussianFederation
-    {    119,   100 }, // Telugu/India
-    {    120,   211 }, // Thai/Thailand
-    {    122,    67 }, // Tigrinya/Eritrea
-    {    122,    69 }, // Tigrinya/Ethiopia
-    {    124,   195 }, // Tsonga/SouthAfrica
-    {    125,   217 }, // Turkish/Turkey
-    {    129,   222 }, // Ukrainian/Ukraine
-    {    130,   100 }, // Urdu/India
-    {    130,   163 }, // Urdu/Pakistan
-    {    131,   228 }, // Uzbek/Uzbekistan
-    {    131,     1 }, // Uzbek/Afghanistan
-    {    132,   232 }, // Vietnamese/VietNam
-    {    134,   224 }, // Welsh/UnitedKingdom
-    {    136,   195 }, // Xhosa/SouthAfrica
-    {    138,   157 }, // Yoruba/Nigeria
-    {    140,   195 }, // Zulu/SouthAfrica
-    {    141,   161 }, // Nynorsk/Norway
-    {    142,    27 }, // Bosnian/BosniaAndHerzegowina
-    {    143,   131 }, // Divehi/Maldives
-    {    144,   224 }, // Manx/UnitedKingdom
-    {    145,   224 }, // Cornish/UnitedKingdom
-    {    146,    83 }, // Akan/Ghana
-    {    147,   100 }, // Konkani/India
-    {    148,    83 }, // Ga/Ghana
-    {    149,   157 }, // Igbo/Nigeria
-    {    150,   111 }, // Kamba/Kenya
-    {    151,   207 }, // Syriac/SyrianArabRepublic
-    {    152,    67 }, // Blin/Eritrea
-    {    153,    67 }, // Geez/Eritrea
-    {    153,    69 }, // Geez/Ethiopia
-    {    154,   157 }, // Koro/Nigeria
-    {    155,    69 }, // Sidamo/Ethiopia
-    {    156,   157 }, // Atsam/Nigeria
-    {    157,    67 }, // Tigre/Eritrea
-    {    158,   157 }, // Jju/Nigeria
-    {    159,   106 }, // Friulian/Italy
-    {    160,   195 }, // Venda/SouthAfrica
-    {    161,    83 }, // Ewe/Ghana
-    {    161,   212 }, // Ewe/Togo
-    {    163,   225 }, // Hawaiian/UnitedStates
-    {    164,   157 }, // Tyap/Nigeria
-    {    165,   129 }, // Chewa/Malawi
+    {      2,    90 }, // Abkhazian/Georgia
+    {      3,    77 }, // Afar/Ethiopia
+    {      3,    67 }, // Afar/Djibouti
+    {      3,    74 }, // Afar/Eritrea
+    {      4,   216 }, // Afrikaans/South Africa
+    {      4,   162 }, // Afrikaans/Namibia
+    {      5,    40 }, // Aghem/Cameroon
+    {      6,    92 }, // Akan/Ghana
+    {      8,    40 }, // Akoose/Cameroon
+    {      9,     3 }, // Albanian/Albania
+    {      9,   126 }, // Albanian/Kosovo
+    {      9,   140 }, // Albanian/Macedonia
+    {     11,    77 }, // Amharic/Ethiopia
+    {     14,    71 }, // Arabic/Egypt
+    {     14,     4 }, // Arabic/Algeria
+    {     14,    19 }, // Arabic/Bahrain
+    {     14,    48 }, // Arabic/Chad
+    {     14,    55 }, // Arabic/Comoros
+    {     14,    67 }, // Arabic/Djibouti
+    {     14,    74 }, // Arabic/Eritrea
+    {     14,   113 }, // Arabic/Iraq
+    {     14,   116 }, // Arabic/Israel
+    {     14,   122 }, // Arabic/Jordan
+    {     14,   127 }, // Arabic/Kuwait
+    {     14,   132 }, // Arabic/Lebanon
+    {     14,   135 }, // Arabic/Libya
+    {     14,   149 }, // Arabic/Mauritania
+    {     14,   159 }, // Arabic/Morocco
+    {     14,   176 }, // Arabic/Oman
+    {     14,   180 }, // Arabic/Palestinian Territories
+    {     14,   190 }, // Arabic/Qatar
+    {     14,   205 }, // Arabic/Saudi Arabia
+    {     14,   215 }, // Arabic/Somalia
+    {     14,   219 }, // Arabic/South Sudan
+    {     14,   222 }, // Arabic/Sudan
+    {     14,   227 }, // Arabic/Syria
+    {     14,   238 }, // Arabic/Tunisia
+    {     14,   245 }, // Arabic/United Arab Emirates
+    {     14,   257 }, // Arabic/Western Sahara
+    {     14,   258 }, // Arabic/world
+    {     14,   259 }, // Arabic/Yemen
+    {     15,   220 }, // Aragonese/Spain
+    {     17,    12 }, // Armenian/Armenia
+    {     18,   110 }, // Assamese/India
+    {     19,   220 }, // Asturian/Spain
+    {     20,   230 }, // Asu/Tanzania
+    {     21,   169 }, // Atsam/Nigeria
+    {     25,    17 }, // Azerbaijani/Azerbaijan
+    {     25,   112 }, // Azerbaijani/Iran
+    {     25,   113 }, // Azerbaijani/Iraq
+    {     25,   239 }, // Azerbaijani/Turkey
+    {     26,    40 }, // Bafia/Cameroon
+    {     28,   145 }, // Bambara/Mali
+    {     30,    20 }, // Bangla/Bangladesh
+    {     30,   110 }, // Bangla/India
+    {     31,    40 }, // Basaa/Cameroon
+    {     32,   193 }, // Bashkir/Russia
+    {     33,   220 }, // Basque/Spain
+    {     35,    22 }, // Belarusian/Belarus
+    {     36,   260 }, // Bemba/Zambia
+    {     37,   230 }, // Bena/Tanzania
+    {     38,   110 }, // Bhojpuri/India
+    {     40,    74 }, // Blin/Eritrea
+    {     41,   110 }, // Bodo/India
+    {     42,    29 }, // Bosnian/Bosnia and Herzegovina
+    {     43,    84 }, // Breton/France
+    {     45,    36 }, // Bulgarian/Bulgaria
+    {     46,   161 }, // Burmese/Myanmar
+    {     47,   107 }, // Cantonese/Hong Kong
+    {     47,    50 }, // Cantonese/China
+    {     48,   220 }, // Catalan/Spain
+    {     48,     6 }, // Catalan/Andorra
+    {     48,    84 }, // Catalan/France
+    {     48,   117 }, // Catalan/Italy
+    {     49,   185 }, // Cebuano/Philippines
+    {     50,   159 }, // Central Atlas Tamazight/Morocco
+    {     51,   113 }, // Central Kurdish/Iraq
+    {     51,   112 }, // Central Kurdish/Iran
+    {     52,    20 }, // Chakma/Bangladesh
+    {     52,   110 }, // Chakma/India
+    {     54,   193 }, // Chechen/Russia
+    {     55,   248 }, // Cherokee/United States
+    {     56,   248 }, // Chickasaw/United States
+    {     57,   243 }, // Chiga/Uganda
+    {     58,    50 }, // Chinese/China
+    {     58,   107 }, // Chinese/Hong Kong
+    {     58,   139 }, // Chinese/Macao
+    {     58,   210 }, // Chinese/Singapore
+    {     58,   228 }, // Chinese/Taiwan
+    {     59,   193 }, // Church/Russia
+    {     60,   193 }, // Chuvash/Russia
+    {     61,    91 }, // Colognian/Germany
+    {     63,   246 }, // Cornish/United Kingdom
+    {     64,    84 }, // Corsican/France
+    {     66,    60 }, // Croatian/Croatia
+    {     66,    29 }, // Croatian/Bosnia and Herzegovina
+    {     67,    64 }, // Czech/Czechia
+    {     68,    65 }, // Danish/Denmark
+    {     68,    95 }, // Danish/Greenland
+    {     69,   144 }, // Divehi/Maldives
+    {     70,   110 }, // Dogri/India
+    {     71,    40 }, // Duala/Cameroon
+    {     72,   165 }, // Dutch/Netherlands
+    {     72,    13 }, // Dutch/Aruba
+    {     72,    23 }, // Dutch/Belgium
+    {     72,    44 }, // Dutch/Caribbean Netherlands
+    {     72,    62 }, // Dutch/Curacao
+    {     72,   211 }, // Dutch/Sint Maarten
+    {     72,   223 }, // Dutch/Suriname
+    {     73,    27 }, // Dzongkha/Bhutan
+    {     74,   124 }, // Embu/Kenya
+    {     75,   248 }, // English/United States
+    {     75,     5 }, // English/American Samoa
+    {     75,     8 }, // English/Anguilla
+    {     75,    10 }, // English/Antigua and Barbuda
+    {     75,    15 }, // English/Australia
+    {     75,    16 }, // English/Austria
+    {     75,    18 }, // English/Bahamas
+    {     75,    21 }, // English/Barbados
+    {     75,    23 }, // English/Belgium
+    {     75,    24 }, // English/Belize
+    {     75,    26 }, // English/Bermuda
+    {     75,    30 }, // English/Botswana
+    {     75,    33 }, // English/British Indian Ocean Territory
+    {     75,    34 }, // English/British Virgin Islands
+    {     75,    38 }, // English/Burundi
+    {     75,    40 }, // English/Cameroon
+    {     75,    41 }, // English/Canada
+    {     75,    45 }, // English/Cayman Islands
+    {     75,    51 }, // English/Christmas Island
+    {     75,    53 }, // English/Cocos Islands
+    {     75,    58 }, // English/Cook Islands
+    {     75,    63 }, // English/Cyprus
+    {     75,    65 }, // English/Denmark
+    {     75,    66 }, // English/Diego Garcia
+    {     75,    68 }, // English/Dominica
+    {     75,    74 }, // English/Eritrea
+    {     75,    76 }, // English/Eswatini
+    {     75,    78 }, // English/Europe
+    {     75,    80 }, // English/Falkland Islands
+    {     75,    82 }, // English/Fiji
+    {     75,    83 }, // English/Finland
+    {     75,    89 }, // English/Gambia
+    {     75,    91 }, // English/Germany
+    {     75,    92 }, // English/Ghana
+    {     75,    93 }, // English/Gibraltar
+    {     75,    96 }, // English/Grenada
+    {     75,    98 }, // English/Guam
+    {     75,   100 }, // English/Guernsey
+    {     75,   103 }, // English/Guyana
+    {     75,   107 }, // English/Hong Kong
+    {     75,   110 }, // English/India
+    {     75,   111 }, // English/Indonesia
+    {     75,   114 }, // English/Ireland
+    {     75,   115 }, // English/Isle of Man
+    {     75,   116 }, // English/Israel
+    {     75,   119 }, // English/Jamaica
+    {     75,   121 }, // English/Jersey
+    {     75,   124 }, // English/Kenya
+    {     75,   125 }, // English/Kiribati
+    {     75,   133 }, // English/Lesotho
+    {     75,   134 }, // English/Liberia
+    {     75,   139 }, // English/Macao
+    {     75,   141 }, // English/Madagascar
+    {     75,   142 }, // English/Malawi
+    {     75,   143 }, // English/Malaysia
+    {     75,   144 }, // English/Maldives
+    {     75,   146 }, // English/Malta
+    {     75,   147 }, // English/Marshall Islands
+    {     75,   150 }, // English/Mauritius
+    {     75,   153 }, // English/Micronesia
+    {     75,   158 }, // English/Montserrat
+    {     75,   162 }, // English/Namibia
+    {     75,   163 }, // English/Nauru
+    {     75,   165 }, // English/Netherlands
+    {     75,   167 }, // English/New Zealand
+    {     75,   169 }, // English/Nigeria
+    {     75,   171 }, // English/Niue
+    {     75,   172 }, // English/Norfolk Island
+    {     75,   173 }, // English/Northern Mariana Islands
+    {     75,   178 }, // English/Pakistan
+    {     75,   179 }, // English/Palau
+    {     75,   182 }, // English/Papua New Guinea
+    {     75,   185 }, // English/Philippines
+    {     75,   186 }, // English/Pitcairn
+    {     75,   189 }, // English/Puerto Rico
+    {     75,   194 }, // English/Rwanda
+    {     75,   196 }, // English/Saint Helena
+    {     75,   197 }, // English/Saint Kitts and Nevis
+    {     75,   198 }, // English/Saint Lucia
+    {     75,   201 }, // English/Saint Vincent and Grenadines
+    {     75,   202 }, // English/Samoa
+    {     75,   208 }, // English/Seychelles
+    {     75,   209 }, // English/Sierra Leone
+    {     75,   210 }, // English/Singapore
+    {     75,   211 }, // English/Sint Maarten
+    {     75,   213 }, // English/Slovenia
+    {     75,   214 }, // English/Solomon Islands
+    {     75,   216 }, // English/South Africa
+    {     75,   219 }, // English/South Sudan
+    {     75,   222 }, // English/Sudan
+    {     75,   225 }, // English/Sweden
+    {     75,   226 }, // English/Switzerland
+    {     75,   230 }, // English/Tanzania
+    {     75,   234 }, // English/Tokelau
+    {     75,   235 }, // English/Tonga
+    {     75,   236 }, // English/Trinidad and Tobago
+    {     75,   241 }, // English/Turks and Caicos Islands
+    {     75,   242 }, // English/Tuvalu
+    {     75,   243 }, // English/Uganda
+    {     75,   245 }, // English/United Arab Emirates
+    {     75,   246 }, // English/United Kingdom
+    {     75,   247 }, // English/United States Outlying Islands
+    {     75,   249 }, // English/United States Virgin Islands
+    {     75,   252 }, // English/Vanuatu
+    {     75,   258 }, // English/world
+    {     75,   260 }, // English/Zambia
+    {     75,   261 }, // English/Zimbabwe
+    {     76,   193 }, // Erzya/Russia
+    {     77,   258 }, // Esperanto/world
+    {     78,    75 }, // Estonian/Estonia
+    {     79,    92 }, // Ewe/Ghana
+    {     79,   233 }, // Ewe/Togo
+    {     80,    40 }, // Ewondo/Cameroon
+    {     81,    81 }, // Faroese/Faroe Islands
+    {     81,    65 }, // Faroese/Denmark
+    {     83,   185 }, // Filipino/Philippines
+    {     84,    83 }, // Finnish/Finland
+    {     85,    84 }, // French/France
+    {     85,     4 }, // French/Algeria
+    {     85,    23 }, // French/Belgium
+    {     85,    25 }, // French/Benin
+    {     85,    37 }, // French/Burkina Faso
+    {     85,    38 }, // French/Burundi
+    {     85,    40 }, // French/Cameroon
+    {     85,    41 }, // French/Canada
+    {     85,    46 }, // French/Central African Republic
+    {     85,    48 }, // French/Chad
+    {     85,    55 }, // French/Comoros
+    {     85,    56 }, // French/Congo - Brazzaville
+    {     85,    57 }, // French/Congo - Kinshasa
+    {     85,    67 }, // French/Djibouti
+    {     85,    73 }, // French/Equatorial Guinea
+    {     85,    85 }, // French/French Guiana
+    {     85,    86 }, // French/French Polynesia
+    {     85,    88 }, // French/Gabon
+    {     85,    97 }, // French/Guadeloupe
+    {     85,   102 }, // French/Guinea
+    {     85,   104 }, // French/Haiti
+    {     85,   118 }, // French/Ivory Coast
+    {     85,   138 }, // French/Luxembourg
+    {     85,   141 }, // French/Madagascar
+    {     85,   145 }, // French/Mali
+    {     85,   148 }, // French/Martinique
+    {     85,   149 }, // French/Mauritania
+    {     85,   150 }, // French/Mauritius
+    {     85,   151 }, // French/Mayotte
+    {     85,   155 }, // French/Monaco
+    {     85,   159 }, // French/Morocco
+    {     85,   166 }, // French/New Caledonia
+    {     85,   170 }, // French/Niger
+    {     85,   191 }, // French/Reunion
+    {     85,   194 }, // French/Rwanda
+    {     85,   195 }, // French/Saint Barthelemy
+    {     85,   199 }, // French/Saint Martin
+    {     85,   200 }, // French/Saint Pierre and Miquelon
+    {     85,   206 }, // French/Senegal
+    {     85,   208 }, // French/Seychelles
+    {     85,   226 }, // French/Switzerland
+    {     85,   227 }, // French/Syria
+    {     85,   233 }, // French/Togo
+    {     85,   238 }, // French/Tunisia
+    {     85,   252 }, // French/Vanuatu
+    {     85,   256 }, // French/Wallis and Futuna
+    {     86,   117 }, // Friulian/Italy
+    {     87,   206 }, // Fulah/Senegal
+    {     87,    37 }, // Fulah/Burkina Faso
+    {     87,    40 }, // Fulah/Cameroon
+    {     87,    89 }, // Fulah/Gambia
+    {     87,    92 }, // Fulah/Ghana
+    {     87,   101 }, // Fulah/Guinea-Bissau
+    {     87,   102 }, // Fulah/Guinea
+    {     87,   134 }, // Fulah/Liberia
+    {     87,   149 }, // Fulah/Mauritania
+    {     87,   169 }, // Fulah/Nigeria
+    {     87,   170 }, // Fulah/Niger
+    {     87,   209 }, // Fulah/Sierra Leone
+    {     88,   246 }, // Gaelic/United Kingdom
+    {     89,    92 }, // Ga/Ghana
+    {     90,   220 }, // Galician/Spain
+    {     91,   243 }, // Ganda/Uganda
+    {     92,    77 }, // Geez/Ethiopia
+    {     92,    74 }, // Geez/Eritrea
+    {     93,    90 }, // Georgian/Georgia
+    {     94,    91 }, // German/Germany
+    {     94,    16 }, // German/Austria
+    {     94,    23 }, // German/Belgium
+    {     94,   117 }, // German/Italy
+    {     94,   136 }, // German/Liechtenstein
+    {     94,   138 }, // German/Luxembourg
+    {     94,   226 }, // German/Switzerland
+    {     96,    94 }, // Greek/Greece
+    {     96,    63 }, // Greek/Cyprus
+    {     97,   183 }, // Guarani/Paraguay
+    {     98,   110 }, // Gujarati/India
+    {     99,   124 }, // Gusii/Kenya
+    {    101,   169 }, // Hausa/Nigeria
+    {    101,   222 }, // Hausa/Sudan
+    {    101,    92 }, // Hausa/Ghana
+    {    101,   170 }, // Hausa/Niger
+    {    102,   248 }, // Hawaiian/United States
+    {    103,   116 }, // Hebrew/Israel
+    {    105,   110 }, // Hindi/India
+    {    107,   108 }, // Hungarian/Hungary
+    {    108,   109 }, // Icelandic/Iceland
+    {    109,   258 }, // Ido/world
+    {    110,   169 }, // Igbo/Nigeria
+    {    111,    83 }, // Inari Sami/Finland
+    {    112,   111 }, // Indonesian/Indonesia
+    {    114,   258 }, // Interlingua/world
+    {    115,    75 }, // Interlingue/Estonia
+    {    116,    41 }, // Inuktitut/Canada
+    {    118,   114 }, // Irish/Ireland
+    {    118,   246 }, // Irish/United Kingdom
+    {    119,   117 }, // Italian/Italy
+    {    119,   203 }, // Italian/San Marino
+    {    119,   226 }, // Italian/Switzerland
+    {    119,   253 }, // Italian/Vatican City
+    {    120,   120 }, // Japanese/Japan
+    {    121,   111 }, // Javanese/Indonesia
+    {    122,   169 }, // Jju/Nigeria
+    {    123,   206 }, // Jola-Fonyi/Senegal
+    {    124,    43 }, // Kabuverdianu/Cape Verde
+    {    125,     4 }, // Kabyle/Algeria
+    {    126,    40 }, // Kako/Cameroon
+    {    127,    95 }, // Kalaallisut/Greenland
+    {    128,   124 }, // Kalenjin/Kenya
+    {    129,   124 }, // Kamba/Kenya
+    {    130,   110 }, // Kannada/India
+    {    132,   110 }, // Kashmiri/India
+    {    133,   123 }, // Kazakh/Kazakhstan
+    {    134,    40 }, // Kenyang/Cameroon
+    {    135,    39 }, // Khmer/Cambodia
+    {    136,    99 }, // Kiche/Guatemala
+    {    137,   124 }, // Kikuyu/Kenya
+    {    138,   194 }, // Kinyarwanda/Rwanda
+    {    141,   110 }, // Konkani/India
+    {    142,   218 }, // Korean/South Korea
+    {    142,    50 }, // Korean/China
+    {    142,   174 }, // Korean/North Korea
+    {    144,   145 }, // Koyraboro Senni/Mali
+    {    145,   145 }, // Koyra Chiini/Mali
+    {    146,   134 }, // Kpelle/Liberia
+    {    146,   102 }, // Kpelle/Guinea
+    {    148,   239 }, // Kurdish/Turkey
+    {    149,    40 }, // Kwasio/Cameroon
+    {    150,   128 }, // Kyrgyz/Kyrgyzstan
+    {    151,   248 }, // Lakota/United States
+    {    152,   230 }, // Langi/Tanzania
+    {    153,   129 }, // Lao/Laos
+    {    154,   253 }, // Latin/Vatican City
+    {    155,   131 }, // Latvian/Latvia
+    {    158,    57 }, // Lingala/Congo - Kinshasa
+    {    158,     7 }, // Lingala/Angola
+    {    158,    46 }, // Lingala/Central African Republic
+    {    158,    56 }, // Lingala/Congo - Brazzaville
+    {    160,   137 }, // Lithuanian/Lithuania
+    {    161,   258 }, // Lojban/world
+    {    162,    91 }, // Lower Sorbian/Germany
+    {    163,    91 }, // Low German/Germany
+    {    163,   165 }, // Low German/Netherlands
+    {    164,    57 }, // Luba-Katanga/Congo - Kinshasa
+    {    165,   225 }, // Lule Sami/Sweden
+    {    165,   175 }, // Lule Sami/Norway
+    {    166,   124 }, // Luo/Kenya
+    {    167,   138 }, // Luxembourgish/Luxembourg
+    {    168,   124 }, // Luyia/Kenya
+    {    169,   140 }, // Macedonian/Macedonia
+    {    170,   230 }, // Machame/Tanzania
+    {    171,   110 }, // Maithili/India
+    {    172,   160 }, // Makhuwa-Meetto/Mozambique
+    {    173,   230 }, // Makonde/Tanzania
+    {    174,   141 }, // Malagasy/Madagascar
+    {    175,   110 }, // Malayalam/India
+    {    176,   143 }, // Malay/Malaysia
+    {    176,    35 }, // Malay/Brunei
+    {    176,   111 }, // Malay/Indonesia
+    {    176,   210 }, // Malay/Singapore
+    {    177,   146 }, // Maltese/Malta
+    {    179,   110 }, // Manipuri/India
+    {    180,   115 }, // Manx/Isle of Man
+    {    181,   167 }, // Maori/New Zealand
+    {    182,    49 }, // Mapuche/Chile
+    {    183,   110 }, // Marathi/India
+    {    185,   124 }, // Masai/Kenya
+    {    185,   230 }, // Masai/Tanzania
+    {    186,   112 }, // Mazanderani/Iran
+    {    188,   124 }, // Meru/Kenya
+    {    189,    40 }, // Meta/Cameroon
+    {    190,    41 }, // Mohawk/Canada
+    {    191,   156 }, // Mongolian/Mongolia
+    {    191,    50 }, // Mongolian/China
+    {    192,   150 }, // Morisyen/Mauritius
+    {    193,    40 }, // Mundang/Cameroon
+    {    194,   248 }, // Muscogee/United States
+    {    195,   162 }, // Nama/Namibia
+    {    197,   248 }, // Navajo/United States
+    {    199,   164 }, // Nepali/Nepal
+    {    199,   110 }, // Nepali/India
+    {    201,    40 }, // Ngiemboon/Cameroon
+    {    202,    40 }, // Ngomba/Cameroon
+    {    203,   169 }, // Nigerian Pidgin/Nigeria
+    {    204,   102 }, // Nko/Guinea
+    {    205,   112 }, // Northern Luri/Iran
+    {    205,   113 }, // Northern Luri/Iraq
+    {    206,   175 }, // Northern Sami/Norway
+    {    206,    83 }, // Northern Sami/Finland
+    {    206,   225 }, // Northern Sami/Sweden
+    {    207,   216 }, // Northern Sotho/South Africa
+    {    208,   261 }, // North Ndebele/Zimbabwe
+    {    209,   175 }, // Norwegian Bokmal/Norway
+    {    209,   224 }, // Norwegian Bokmal/Svalbard and Jan Mayen
+    {    210,   175 }, // Norwegian Nynorsk/Norway
+    {    211,   219 }, // Nuer/South Sudan
+    {    212,   142 }, // Nyanja/Malawi
+    {    213,   243 }, // Nyankole/Uganda
+    {    214,    84 }, // Occitan/France
+    {    214,   220 }, // Occitan/Spain
+    {    215,   110 }, // Odia/India
+    {    220,    77 }, // Oromo/Ethiopia
+    {    220,   124 }, // Oromo/Kenya
+    {    221,   248 }, // Osage/United States
+    {    222,    90 }, // Ossetic/Georgia
+    {    222,   193 }, // Ossetic/Russia
+    {    226,    62 }, // Papiamento/Curacao
+    {    226,    13 }, // Papiamento/Aruba
+    {    227,     1 }, // Pashto/Afghanistan
+    {    227,   178 }, // Pashto/Pakistan
+    {    228,   112 }, // Persian/Iran
+    {    228,     1 }, // Persian/Afghanistan
+    {    230,   187 }, // Polish/Poland
+    {    231,    32 }, // Portuguese/Brazil
+    {    231,     7 }, // Portuguese/Angola
+    {    231,    43 }, // Portuguese/Cape Verde
+    {    231,    73 }, // Portuguese/Equatorial Guinea
+    {    231,   101 }, // Portuguese/Guinea-Bissau
+    {    231,   138 }, // Portuguese/Luxembourg
+    {    231,   139 }, // Portuguese/Macao
+    {    231,   160 }, // Portuguese/Mozambique
+    {    231,   188 }, // Portuguese/Portugal
+    {    231,   204 }, // Portuguese/Sao Tome and Principe
+    {    231,   226 }, // Portuguese/Switzerland
+    {    231,   232 }, // Portuguese/Timor-Leste
+    {    232,   187 }, // Prussian/Poland
+    {    233,   110 }, // Punjabi/India
+    {    233,   178 }, // Punjabi/Pakistan
+    {    234,   184 }, // Quechua/Peru
+    {    234,    28 }, // Quechua/Bolivia
+    {    234,    70 }, // Quechua/Ecuador
+    {    235,   192 }, // Romanian/Romania
+    {    235,   154 }, // Romanian/Moldova
+    {    236,   226 }, // Romansh/Switzerland
+    {    237,   230 }, // Rombo/Tanzania
+    {    238,    38 }, // Rundi/Burundi
+    {    239,   193 }, // Russian/Russia
+    {    239,    22 }, // Russian/Belarus
+    {    239,   123 }, // Russian/Kazakhstan
+    {    239,   128 }, // Russian/Kyrgyzstan
+    {    239,   154 }, // Russian/Moldova
+    {    239,   244 }, // Russian/Ukraine
+    {    240,   230 }, // Rwa/Tanzania
+    {    241,    74 }, // Saho/Eritrea
+    {    242,   193 }, // Sakha/Russia
+    {    243,   124 }, // Samburu/Kenya
+    {    245,    46 }, // Sango/Central African Republic
+    {    246,   230 }, // Sangu/Tanzania
+    {    247,   110 }, // Sanskrit/India
+    {    248,   110 }, // Santali/India
+    {    249,   117 }, // Sardinian/Italy
+    {    251,   160 }, // Sena/Mozambique
+    {    252,   207 }, // Serbian/Serbia
+    {    252,    29 }, // Serbian/Bosnia and Herzegovina
+    {    252,   126 }, // Serbian/Kosovo
+    {    252,   157 }, // Serbian/Montenegro
+    {    253,   230 }, // Shambala/Tanzania
+    {    254,   261 }, // Shona/Zimbabwe
+    {    255,    50 }, // Sichuan Yi/China
+    {    256,   117 }, // Sicilian/Italy
+    {    257,    77 }, // Sidamo/Ethiopia
+    {    258,   187 }, // Silesian/Poland
+    {    259,   178 }, // Sindhi/Pakistan
+    {    259,   110 }, // Sindhi/India
+    {    260,   221 }, // Sinhala/Sri Lanka
+    {    261,    83 }, // Skolt Sami/Finland
+    {    262,   212 }, // Slovak/Slovakia
+    {    263,   213 }, // Slovenian/Slovenia
+    {    264,   243 }, // Soga/Uganda
+    {    265,   215 }, // Somali/Somalia
+    {    265,    67 }, // Somali/Djibouti
+    {    265,    77 }, // Somali/Ethiopia
+    {    265,   124 }, // Somali/Kenya
+    {    266,   112 }, // Southern Kurdish/Iran
+    {    266,   113 }, // Southern Kurdish/Iraq
+    {    267,   225 }, // Southern Sami/Sweden
+    {    267,   175 }, // Southern Sami/Norway
+    {    268,   216 }, // Southern Sotho/South Africa
+    {    268,   133 }, // Southern Sotho/Lesotho
+    {    269,   216 }, // South Ndebele/South Africa
+    {    270,   220 }, // Spanish/Spain
+    {    270,    11 }, // Spanish/Argentina
+    {    270,    24 }, // Spanish/Belize
+    {    270,    28 }, // Spanish/Bolivia
+    {    270,    32 }, // Spanish/Brazil
+    {    270,    42 }, // Spanish/Canary Islands
+    {    270,    47 }, // Spanish/Ceuta and Melilla
+    {    270,    49 }, // Spanish/Chile
+    {    270,    54 }, // Spanish/Colombia
+    {    270,    59 }, // Spanish/Costa Rica
+    {    270,    61 }, // Spanish/Cuba
+    {    270,    69 }, // Spanish/Dominican Republic
+    {    270,    70 }, // Spanish/Ecuador
+    {    270,    72 }, // Spanish/El Salvador
+    {    270,    73 }, // Spanish/Equatorial Guinea
+    {    270,    99 }, // Spanish/Guatemala
+    {    270,   106 }, // Spanish/Honduras
+    {    270,   130 }, // Spanish/Latin America
+    {    270,   152 }, // Spanish/Mexico
+    {    270,   168 }, // Spanish/Nicaragua
+    {    270,   181 }, // Spanish/Panama
+    {    270,   183 }, // Spanish/Paraguay
+    {    270,   184 }, // Spanish/Peru
+    {    270,   185 }, // Spanish/Philippines
+    {    270,   189 }, // Spanish/Puerto Rico
+    {    270,   248 }, // Spanish/United States
+    {    270,   250 }, // Spanish/Uruguay
+    {    270,   254 }, // Spanish/Venezuela
+    {    271,   159 }, // Standard Moroccan Tamazight/Morocco
+    {    272,   111 }, // Sundanese/Indonesia
+    {    273,   230 }, // Swahili/Tanzania
+    {    273,    57 }, // Swahili/Congo - Kinshasa
+    {    273,   124 }, // Swahili/Kenya
+    {    273,   243 }, // Swahili/Uganda
+    {    274,   216 }, // Swati/South Africa
+    {    274,    76 }, // Swati/Eswatini
+    {    275,   225 }, // Swedish/Sweden
+    {    275,     2 }, // Swedish/Aland Islands
+    {    275,    83 }, // Swedish/Finland
+    {    276,   226 }, // Swiss German/Switzerland
+    {    276,    84 }, // Swiss German/France
+    {    276,   136 }, // Swiss German/Liechtenstein
+    {    277,   113 }, // Syriac/Iraq
+    {    277,   227 }, // Syriac/Syria
+    {    278,   159 }, // Tachelhit/Morocco
+    {    280,   255 }, // Tai Dam/Vietnam
+    {    281,   124 }, // Taita/Kenya
+    {    282,   229 }, // Tajik/Tajikistan
+    {    283,   110 }, // Tamil/India
+    {    283,   143 }, // Tamil/Malaysia
+    {    283,   210 }, // Tamil/Singapore
+    {    283,   221 }, // Tamil/Sri Lanka
+    {    284,   228 }, // Taroko/Taiwan
+    {    285,   170 }, // Tasawaq/Niger
+    {    286,   193 }, // Tatar/Russia
+    {    287,   110 }, // Telugu/India
+    {    288,   243 }, // Teso/Uganda
+    {    288,   124 }, // Teso/Kenya
+    {    289,   231 }, // Thai/Thailand
+    {    290,    50 }, // Tibetan/China
+    {    290,   110 }, // Tibetan/India
+    {    291,    74 }, // Tigre/Eritrea
+    {    292,    77 }, // Tigrinya/Ethiopia
+    {    292,    74 }, // Tigrinya/Eritrea
+    {    294,   182 }, // Tok Pisin/Papua New Guinea
+    {    295,   235 }, // Tongan/Tonga
+    {    296,   216 }, // Tsonga/South Africa
+    {    297,   216 }, // Tswana/South Africa
+    {    297,    30 }, // Tswana/Botswana
+    {    298,   239 }, // Turkish/Turkey
+    {    298,    63 }, // Turkish/Cyprus
+    {    299,   240 }, // Turkmen/Turkmenistan
+    {    301,   169 }, // Tyap/Nigeria
+    {    303,   244 }, // Ukrainian/Ukraine
+    {    304,    91 }, // Upper Sorbian/Germany
+    {    305,   178 }, // Urdu/Pakistan
+    {    305,   110 }, // Urdu/India
+    {    306,    50 }, // Uyghur/China
+    {    307,   251 }, // Uzbek/Uzbekistan
+    {    307,     1 }, // Uzbek/Afghanistan
+    {    308,   134 }, // Vai/Liberia
+    {    309,   216 }, // Venda/South Africa
+    {    310,   255 }, // Vietnamese/Vietnam
+    {    311,   258 }, // Volapuk/world
+    {    312,   230 }, // Vunjo/Tanzania
+    {    313,    23 }, // Walloon/Belgium
+    {    314,   226 }, // Walser/Switzerland
+    {    315,    15 }, // Warlpiri/Australia
+    {    316,   246 }, // Welsh/United Kingdom
+    {    317,   178 }, // Western Balochi/Pakistan
+    {    317,     1 }, // Western Balochi/Afghanistan
+    {    317,   112 }, // Western Balochi/Iran
+    {    317,   176 }, // Western Balochi/Oman
+    {    317,   245 }, // Western Balochi/United Arab Emirates
+    {    318,   165 }, // Western Frisian/Netherlands
+    {    319,    77 }, // Wolaytta/Ethiopia
+    {    320,   206 }, // Wolof/Senegal
+    {    321,   216 }, // Xhosa/South Africa
+    {    322,    40 }, // Yangben/Cameroon
+    {    323,   244 }, // Yiddish/Ukraine
+    {    324,   169 }, // Yoruba/Nigeria
+    {    324,    25 }, // Yoruba/Benin
+    {    325,   170 }, // Zarma/Niger
+    {    326,    50 }, // Zhuang/China
+    {    327,   216 }, // Zulu/South Africa
+    {    328,    32 }, // Kaingang/Brazil
+    {    329,    32 }, // Nheengatu/Brazil
+    {    329,    54 }, // Nheengatu/Colombia
+    {    329,   254 }, // Nheengatu/Venezuela
+    {    330,   110 }, // Haryanvi/India
+    {    331,    91 }, // Northern Frisian/Germany
+    {    332,   110 }, // Rajasthani/India
+    {    333,   193 }, // Moksha/Russia
+    {    334,   258 }, // Toki Pona/world
+    {    335,   214 }, // Pijin/Solomon Islands
+    {    336,   169 }, // Obolo/Nigeria
+    {    337,   178 }, // Baluchi/Pakistan
+    {    338,   117 }, // Ligurian/Italy
+    {    339,   161 }, // Rohingya/Myanmar
+    {    339,    20 }, // Rohingya/Bangladesh
+    {    340,   178 }, // Torwali/Pakistan
+    {    341,    25 }, // Anii/Benin
+    {    342,   110 }, // Kangri/India
+    {    343,   117 }, // Venetian/Italy
 };
-static const int g_locale_list_count = sizeof(g_locale_list)/sizeof(g_locale_list[0]);
+
+// GENERATED PART ENDS HERE
+
+static const int g_locale_list_count = std::size(g_locale_list);
 
 LocaleModel::LocaleModel(QObject *parent)
     : QAbstractItemModel(parent)
@@ -283,7 +680,7 @@ LocaleModel::LocaleModel(QObject *parent)
 QVariant LocaleModel::data(const QModelIndex &index, int role) const
 {
     if (!index.isValid()
-        || role != Qt::DisplayRole && role != Qt::EditRole && role != Qt::ToolTipRole
+        || (role != Qt::DisplayRole && role != Qt::EditRole && role != Qt::ToolTipRole)
         || index.column() >= g_model_cols
         || index.row() >= g_locale_list_count + 2)
         return QVariant();
@@ -424,9 +821,9 @@ int LocaleModel::rowCount(const QModelIndex &parent) const
 Qt::ItemFlags LocaleModel::flags(const QModelIndex &index) const
 {
     if (!index.isValid())
-        return 0;
+        return {};
     if (index.row() == 0 && index.column() == g_model_cols - 1)
-        return 0;
+        return {};
     if (index.row() == 0)
         return QAbstractItemModel::flags(index) | Qt::ItemIsEditable;
     return QAbstractItemModel::flags(index);
@@ -438,7 +835,7 @@ bool LocaleModel::setData(const QModelIndex &index, const QVariant &value, int r
         || index.row() != 0
         || index.column() >= g_model_cols - 1
         || role != Qt::EditRole
-        || m_data_list.at(index.column()).type() != value.type())
+        || m_data_list.at(index.column()).typeId() != value.typeId())
         return false;
 
     m_data_list[index.column()] = value;
diff --git a/util/locale_database/testlocales/localemodel.h b/util/locale_database/testlocales/localemodel.h
index f35b984b4c..a0ba45bb15 100644
--- a/util/locale_database/testlocales/localemodel.h
+++ b/util/locale_database/testlocales/localemodel.h
@@ -1,30 +1,5 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the utils of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
 #ifndef LOCALEMODEL_H
 #define LOCALEMODEL_H
 
@@ -38,17 +13,17 @@ class LocaleModel : public QAbstractItemModel
 public:
     LocaleModel(QObject *parent = nullptr);
 
-    virtual int columnCount(const QModelIndex &parent = QModelIndex()) const;
-    virtual QVariant data(const QModelIndex &index, int role = Qt::DisplayRole) const;
-    virtual QModelIndex index(int row, int column,
-                                const QModelIndex &parent = QModelIndex()) const;
-    virtual QModelIndex parent(const QModelIndex &index) const;
-    virtual int rowCount(const QModelIndex &parent = QModelIndex()) const;
-    virtual QVariant headerData(int section, Qt::Orientation orientation,
-                                int role = Qt::DisplayRole ) const;
-    virtual Qt::ItemFlags flags(const QModelIndex &index) const;
-    virtual bool setData(const QModelIndex &index, const QVariant &value,
-                            int role = Qt::EditRole);
+    int columnCount(const QModelIndex &parent = QModelIndex()) const override;
+    QVariant data(const QModelIndex &index, int role = Qt::DisplayRole) const override;
+    QModelIndex index(int row, int column,
+                      const QModelIndex &parent = QModelIndex()) const override;
+    QModelIndex parent(const QModelIndex &index) const override;
+    int rowCount(const QModelIndex &parent = QModelIndex()) const override;
+    QVariant headerData(int section, Qt::Orientation orientation,
+                        int role = Qt::DisplayRole ) const override;
+    Qt::ItemFlags flags(const QModelIndex &index) const override;
+    bool setData(const QModelIndex &index, const QVariant &value,
+                 int role = Qt::EditRole) override;
 private:
     QList<QVariant> m_data_list;
 };
diff --git a/util/locale_database/testlocales/localewidget.cpp b/util/locale_database/testlocales/localewidget.cpp
index 3ff7f73a98..df8a3c28ab 100644
--- a/util/locale_database/testlocales/localewidget.cpp
+++ b/util/locale_database/testlocales/localewidget.cpp
@@ -1,33 +1,8 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the utils of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
 #include <QTableView>
 #include <QVBoxLayout>
-#include <QItemDelegate>
+#include <QStyledItemDelegate>
 #include <QItemEditorFactory>
 #include <QDoubleSpinBox>
 
@@ -51,26 +26,26 @@ public:
 class EditorFactory : public QItemEditorFactory
 {
 public:
-    EditorFactory() {
-        static DoubleEditorCreator double_editor_creator;
-        registerEditor(QVariant::Double, &double_editor_creator);
+    EditorFactory()
+    {
+        // registerEditor() assumes ownership of the creator.
+        registerEditor(QVariant::Double, new DoubleEditorCreator);
     }
 };
 
 LocaleWidget::LocaleWidget(QWidget *parent)
-    : QWidget(parent)
+    : QWidget(parent),
+      m_model(new LocaleModel(this)),
+      m_view(new QTableView(this))
 {
-    m_model = new LocaleModel(this);
-    m_view = new QTableView(this);
-
-    QItemDelegate *delegate = qobject_cast<QItemDelegate*>(m_view->itemDelegate());
+    QStyledItemDelegate *delegate = qobject_cast<QStyledItemDelegate*>(m_view->itemDelegate());
     Q_ASSERT(delegate != 0);
-    static EditorFactory editor_factory;
-    delegate->setItemEditorFactory(&editor_factory);
+    static EditorFactory editorFactory;
+    delegate->setItemEditorFactory(&editorFactory);
 
     m_view->setModel(m_model);
 
     QVBoxLayout *layout = new QVBoxLayout(this);
-    layout->setMargin(0);
+    layout->setContentsMargins(0, 0, 0, 0);
     layout->addWidget(m_view);
 }
diff --git a/util/locale_database/testlocales/localewidget.h b/util/locale_database/testlocales/localewidget.h
index c562da119b..36613135ea 100644
--- a/util/locale_database/testlocales/localewidget.h
+++ b/util/locale_database/testlocales/localewidget.h
@@ -1,30 +1,5 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the utils of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
 #ifndef LOCALEWIDGET_H
 #define LOCALEWIDGET_H
 
diff --git a/util/locale_database/testlocales/main.cpp b/util/locale_database/testlocales/main.cpp
index 0c3c45f989..d94726a2a9 100644
--- a/util/locale_database/testlocales/main.cpp
+++ b/util/locale_database/testlocales/main.cpp
@@ -1,30 +1,5 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the utils of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2016 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
 #include <QApplication>
 
 #include "localewidget.h"
diff --git a/util/locale_database/testlocales/testlocales.pro b/util/locale_database/testlocales/testlocales.pro
index a9a6247f96..126c19589b 100644
--- a/util/locale_database/testlocales/testlocales.pro
+++ b/util/locale_database/testlocales/testlocales.pro
@@ -1,4 +1,5 @@
 TARGET = testlocales
 CONFIG += debug
+QT += widgets
 SOURCES += localemodel.cpp  localewidget.cpp  main.cpp
-HEADERS += localemodel.h  localewidget.h
-\ No newline at end of file
+HEADERS += localemodel.h  localewidget.h