1 files changed, 139 insertions, 164 deletions
diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py
index f2b6616fce..9e0bae9667 100644
--- a/util/locale_database/cldr.py
+++ b/util/locale_database/cldr.py
@@ -1,31 +1,5 @@
-# -*- coding: utf-8; -*-
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
 """Digesting the CLDR's data.
 
 Provides two classes:
@@ -36,15 +10,17 @@ The former should normally be all you need to access.
 See individual classes for further detail.
 """
 
+from typing import Iterable, TextIO
 from xml.dom import minidom
 from weakref import WeakValueDictionary as CacheDict
-import os
+from pathlib import Path
 
 from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner
+from localetools import names_clash
 from qlocalexml import Locale
 
 class CldrReader (object):
-    def __init__(self, root, grumble = lambda msg: None, whitter = lambda msg: None):
+    def __init__(self, root: Path, grumble = lambda msg: None, whitter = lambda msg: None):
         """Set up a reader object for reading CLDR data.
 
         Single parameter, root, is the file-system path to the root of
@@ -67,7 +43,7 @@ class CldrReader (object):
         Yields pairs (have, give) of 4-tuples; if what you have
         matches the left member, giving the right member is probably
         sensible. Each 4-tuple's entries are the full names of a
-        language, a script, a country (strictly territory) and a
+        language, a script, a territory (usually a country) and a
         variant (currently ignored)."""
         skips = []
         for got, use in self.root.likelySubTags():
@@ -79,7 +55,7 @@ class CldrReader (object):
                     and e.message.startswith('Unknown ') and ' code ' in e.message):
                     skips.append(use)
                 else:
-                    self.grumble('Skipping likelySubtag "{}" -> "{}" ({})\n'.format(got, use, e.message))
+                    self.grumble(f'Skipping likelySubtag "{got}" -> "{use}" ({e})\n')
                 continue
             if all(code.startswith('Any') and code[3].isupper() for code in have[:-1]):
                 continue
@@ -87,7 +63,7 @@ class CldrReader (object):
             give = (give[0],
                     # Substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
                     have[1] if give[1] == 'AnyScript' else give[1],
-                    have[2] if give[2] == 'AnyCountry' else give[2],
+                    have[2] if give[2] == 'AnyTerritory' else give[2],
                     give[3]) # AnyVariant similarly ?
 
             yield have, give
@@ -99,50 +75,49 @@ class CldrReader (object):
             pass # self.__wrapped(self.whitter, 'Skipping likelySubtags (for unknown codes): ', skips)
 
     def readLocales(self, calendars = ('gregorian',)):
-        locales = tuple(self.__allLocales(calendars))
-        return dict(((k.language_id, k.script_id, k.country_id, k.variant_code),
-                     k) for k in locales)
+        return {(k.language_id, k.script_id, k.territory_id, k.variant_code): k
+                for k in self.__allLocales(calendars)}
 
     def __allLocales(self, calendars):
         def skip(locale, reason):
-            return 'Skipping defaultContent locale "{}" ({})\n'.format(locale, reason)
+            return f'Skipping defaultContent locale "{locale}" ({reason})\n'
 
         for locale in self.root.defaultContentLocales:
             try:
-                language, script, country, variant = self.__splitLocale(locale)
+                language, script, territory, variant = self.__splitLocale(locale)
             except ValueError:
                 self.whitter(skip(locale, 'only language tag'))
                 continue
 
-            if not (script or country):
+            if not (script or territory):
                 self.grumble(skip(locale, 'second tag is neither script nor territory'))
                 continue
 
-            if not (language and country):
+            if not (language and territory):
                 continue
 
             try:
                 yield self.__getLocaleData(self.root.locale(locale), calendars,
-                                           language, script, country, variant)
+                                           language, script, territory, variant)
             except Error as e:
                 self.grumble(skip(locale, e.message))
 
         for locale in self.root.fileLocales:
             try:
                 chain = self.root.locale(locale)
-                language, script, country, variant = chain.tagCodes()
+                language, script, territory, variant = chain.tagCodes()
                 assert language
                 # TODO: this skip should probably be based on likely
-                # sub-tags, instead of empty country: if locale has a
+                # sub-tags, instead of empty territory: if locale has a
                 # likely-subtag expansion, that's what QLocale uses,
                 # and we'll be saving its data for the expanded locale
                 # anyway, so don't need to record it for itself.
                 # See also QLocaleXmlReader.loadLocaleMap's grumble.
-                if not country:
+                if not territory:
                     continue
-                yield self.__getLocaleData(chain, calendars, language, script, country, variant)
+                yield self.__getLocaleData(chain, calendars, language, script, territory, variant)
             except Error as e:
-                self.grumble('Skipping file locale "{}" ({})\n'.format(locale, e.message))
+                self.grumble(f'Skipping file locale "{locale}" ({e})\n')
 
     import textwrap
     @staticmethod
@@ -153,13 +128,13 @@ class CldrReader (object):
 
     def __parseTags(self, locale):
         tags = self.__splitLocale(locale)
-        language = tags.next()
-        script = country = variant = ''
+        language = next(tags)
+        script = territory = variant = ''
         try:
-            script, country, variant = tags
+            script, territory, variant = tags
         except ValueError:
             pass
-        return tuple(p[1] for p in self.root.codesToIdName(language, script, country, variant))
+        return tuple(p[1] for p in self.root.codesToIdName(language, script, territory, variant))
 
     def __splitLocale(self, name):
         """Generate (language, script, territory, variant) from a locale name
@@ -171,14 +146,18 @@ class CldrReader (object):
         single tag (i.e. contains no underscores).  Always yields 1 or
         4 values, never 2 or 3."""
         tags = iter(name.split('_'))
-        yield tags.next() # Language
-        tag = tags.next() # may raise StopIteration
+        yield next(tags) # Language
+
+        try:
+            tag = next(tags)
+        except StopIteration:
+            return
 
         # Script is always four letters, always capitalised:
         if len(tag) == 4 and tag[0].isupper() and tag[1:].islower():
             yield tag
             try:
-                tag = tags.next()
+                tag = next(tags)
             except StopIteration:
                 tag = ''
         else:
@@ -188,7 +167,7 @@ class CldrReader (object):
         if tag and tag.isupper() or tag.isdigit():
             yield tag
             try:
-                tag = tags.next()
+                tag = next(tags)
             except StopIteration:
                 tag = ''
         else:
@@ -201,21 +180,22 @@ class CldrReader (object):
         else:
             yield ''
 
-        # If nothing is left, StopIteration will avoid the warning:
-        if not tag:
-            tag = tags.next()
-        self.grumble('Ignoring unparsed cruft {} in {}\n'.format('_'.join(tag + tuple(tags)), name))
+        rest = [tag] if tag else []
+        rest.extend(tags)
 
-    def __getLocaleData(self, scan, calendars, language, script, country, variant):
-        ids, names = zip(*self.root.codesToIdName(language, script, country, variant))
-        assert ids[0] > 0 and ids[2] > 0, (language, script, country, variant)
+        if rest:
+            self.grumble(f'Ignoring unparsed cruft {"_".join(rest)} in {name}\n')
+
+    def __getLocaleData(self, scan, calendars, language, script, territory, variant):
+        ids, names = zip(*self.root.codesToIdName(language, script, territory, variant))
+        assert ids[0] > 0 and ids[2] > 0, (language, script, territory, variant)
         locale = Locale(
             language = names[0], language_code = language, language_id = ids[0],
             script = names[1], script_code = script, script_id = ids[1],
-            country = names[2], country_code = country, country_id = ids[2],
+            territory = names[2], territory_code = territory, territory_id = ids[2],
             variant_code = variant)
 
-        firstDay, weStart, weEnd = self.root.weekData(country)
+        firstDay, weStart, weEnd = self.root.weekData(territory)
         assert all(day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun')
                    for day in (firstDay, weStart, weEnd))
 
@@ -223,7 +203,7 @@ class CldrReader (object):
                       weekendStart = weStart,
                       weekendEnd = weEnd)
 
-        iso, digits, rounding = self.root.currencyData(country)
+        iso, digits, rounding = self.root.currencyData(territory)
         locale.update(currencyIsoCode = iso,
                       currencyDigits = int(digits),
                       currencyRounding = int(rounding))
@@ -231,7 +211,7 @@ class CldrReader (object):
         locale.update(scan.currencyData(iso))
         locale.update(scan.numericData(self.root.numberSystem, self.whitter))
         locale.update(scan.textPatternData())
-        locale.update(scan.endonyms(language, script, country, variant))
+        locale.update(scan.endonyms(language, script, territory, variant))
         locale.update(scan.unitData()) # byte, kB, MB, GB, ..., KiB, MiB, GiB, ...
         locale.update(scan.calendarNames(calendars)) # Names of days and months
 
@@ -242,7 +222,7 @@ class CldrReader (object):
 # the cache. If a process were to instantiate this class with distinct
 # roots, each cache would be filled by the first to need it !
 class CldrAccess (object):
-    def __init__(self, root):
+    def __init__(self, root: Path):
         """Set up a master object for accessing CLDR data.
 
         Single parameter, root, is the file-system path to the root of
@@ -250,18 +230,18 @@ class CldrAccess (object):
         contain dtd/, main/ and supplemental/ sub-directories."""
         self.root = root
 
-    def xml(self, *path):
+    def xml(self, relative_path: str):
         """Load a single XML file and return its root element as an XmlScanner.
 
         The path is interpreted relative to self.root"""
-        return XmlScanner(Node(self.__xml(path)))
+        return XmlScanner(Node(self.__xml(relative_path)))
 
     def supplement(self, name):
         """Loads supplemental data as a Supplement object.
 
         The name should be that of a file in common/supplemental/, without path.
         """
-        return Supplement(Node(self.__xml(('common', 'supplemental', name))))
+        return Supplement(Node(self.__xml(f'common/supplemental/{name}')))
 
     def locale(self, name):
         """Loads all data for a locale as a LocaleScanner object.
@@ -273,17 +253,18 @@ class CldrAccess (object):
         inheritance, where relevant."""
         return LocaleScanner(name, self.__localeRoots(name), self.__rootLocale)
 
+    def englishNaming(self, tag): # see QLocaleXmlWriter.enumData()
+        return self.__codeMap(tag).get
+
     @property
-    def fileLocales(self, joinPath = os.path.join, listDirectory = os.listdir,
-                    splitExtension = os.path.splitext):
+    def fileLocales(self) -> Iterable[str]:
         """Generator for locale IDs seen in file-names.
 
         All *.xml other than root.xml in common/main/ are assumed to
         identify locales."""
-        for name in listDirectory(joinPath(self.root, 'common', 'main')):
-            stem, ext = splitExtension(name)
-            if ext == '.xml' and stem != 'root':
-                yield stem
+        for path in self.root.joinpath('common/main').glob('*.xml'):
+            if path.stem != 'root':
+                yield path.stem
 
     @property
     def defaultContentLocales(self):
@@ -304,44 +285,44 @@ class CldrAccess (object):
     def numberSystem(self, system):
         """Get a description of a numbering system.
 
-        Returns a mapping, with keys u'digits', u'type' and u'id'; the
+        Returns a mapping, with keys 'digits', 'type' and 'id'; the
         value for this last is system. Raises KeyError for unknown
         number system, ldml.Error on failure to load data."""
         try:
             return self.__numberSystems[system]
         except KeyError:
-            raise Error('Unsupported number system: {}'.format(system))
+            raise Error(f'Unsupported number system: {system}')
 
-    def weekData(self, country):
+    def weekData(self, territory):
         """Data on the weekly cycle.
 
         Returns a triple (W, S, E) of en's short names for week-days;
         W is the first day of the week, S the start of the week-end
-        and E the end of the week-end.  Where data for a country is
+        and E the end of the week-end.  Where data for a territory is
         unavailable, the data for CLDR's territory 001 (The World) is
         used."""
         try:
-            return self.__weekData[country]
+            return self.__weekData[territory]
         except KeyError:
             return self.__weekData['001']
 
-    def currencyData(self, country):
-        """Returns currency data for the given country code.
+    def currencyData(self, territory):
+        """Returns currency data for the given territory code.
 
         Return value is a tuple (ISO4217 code, digit count, rounding
-        mode).  If CLDR provides no data for this country, ('', 2, 1)
+        mode).  If CLDR provides no data for this territory, ('', 2, 1)
         is the default result.
         """
         try:
-            return self.__currencyData[country]
+            return self.__currencyData[territory]
         except KeyError:
             return '', 2, 1
 
-    def codesToIdName(self, language, script, country, variant = ''):
+    def codesToIdName(self, language, script, territory, variant = ''):
         """Maps each code to the appropriate ID and name.
 
         Returns a 4-tuple of (ID, name) pairs corresponding to the
-        language, script, country and variant given.  Raises a
+        language, script, territory and variant given.  Raises a
         suitable error if any of them is unknown, indicating all that
         are unknown plus suitable names for any that could sensibly be
         added to enumdata.py to make them known.
@@ -353,33 +334,33 @@ class CldrAccess (object):
         try:
             return (enum('language')[language],
                     enum('script')[script],
-                    enum('country')[country],
+                    enum('territory')[territory],
                     enum('variant')[variant])
         except KeyError:
             pass
 
-        parts, values = [], [language, script, country, variant]
-        for index, key in enumerate(('language', 'script', 'country', 'variant')):
+        parts, values = [], [language, script, territory, variant]
+        for index, key in enumerate(('language', 'script', 'territory', 'variant')):
             naming, enums = self.__codeMap(key), enum(key)
             value = values[index]
             if value not in enums:
-                text = '{} code {}'.format(key, value)
+                text = f'{key} code {value}'
                 name = naming.get(value)
                 if name and value != 'POSIX':
-                    text += u' (could add {})'.format(name)
+                    text += f' (could add {name})'
                 parts.append(text)
         if len(parts) > 1:
             parts[-1] = 'and ' + parts[-1]
-        assert parts
+        else:
+            assert parts
+            if parts[0].startswith('variant'):
+                raise Error(f'No support for {parts[0]}',
+                            language, script, territory, variant)
         raise Error('Unknown ' + ', '.join(parts),
-                    language, script, country, variant)
+                    language, script, territory, variant)
 
     @staticmethod
-    def __checkEnum(given, proper, scraps,
-                    remap = { u'å': 'a', u'ã': 'a', u'ç': 'c', u'é': 'e', u'í': 'i', u'ü': 'u'},
-                    prefix = { 'St.': 'Saint', 'U.S.': 'United States' },
-                    suffixes = ( 'Han', ),
-                    skip = u'\u02bc'):
+    def __checkEnum(given, proper, scraps):
         # Each is a { code: full name } mapping
         for code, name in given.items():
             try: right = proper[code]
@@ -387,35 +368,23 @@ class CldrAccess (object):
                 # No en.xml name for this code, but supplementalData's
                 # parentLocale may still believe in it:
                 if code not in scraps:
-                    yield name, '[Found no CLDR name for code {}]'.format(code)
-                continue
-            if name == right: continue
-            ok = right.replace('&', 'And')
-            for k, v in prefix.items():
-                if ok.startswith(k + ' '):
-                    ok = v + ok[len(k):]
-            while '(' in ok:
-                try: f, t = ok.index('('), ok.index(')')
-                except ValueError: break
-                ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip()
-            if any(name == ok + ' ' + s for s in suffixes):
-                continue
-            if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join(
-                remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip):
+                    yield name, f'[Found no CLDR name for code {code}]'
                 continue
-            yield name, ok
+            cleaned = names_clash(right, name)
+            if cleaned:
+                yield name, cleaned
 
     def checkEnumData(self, grumble):
         scraps = set()
         for k in self.__parentLocale.keys():
             for f in k.split('_'):
                 scraps.add(f)
-        from enumdata import language_list, country_list, script_list
-        language = dict((v, k) for k, v in language_list.values() if not v.isspace())
-        country = dict((v, k) for k, v in country_list.values() if v != 'ZZ')
-        script = dict((v, k) for k, v in script_list.values() if v != 'Zzzz')
+        from enumdata import language_map, territory_map, script_map
+        language = {v: k for k, v in language_map.values() if not v.isspace()}
+        territory = {v: k for k, v in territory_map.values() if v != 'ZZ'}
+        script = {v: k for k, v in script_map.values() if v != 'Zzzz'}
         lang = dict(self.__checkEnum(language, self.__codeMap('language'), scraps))
-        land = dict(self.__checkEnum(country, self.__codeMap('country'), scraps))
+        land = dict(self.__checkEnum(territory, self.__codeMap('territory'), scraps))
         text = dict(self.__checkEnum(script, self.__codeMap('script'), scraps))
         if lang or land or text:
             grumble("""\
@@ -424,15 +393,15 @@ enumdata.py (keeping the old name as an alias):
 """)
             if lang:
                 grumble('Language:\n\t'
-                        + '\n\t'.join('{} -> {}'.format(k, v) for k, v in lang.items())
+                        + '\n\t'.join(f'{k} -> {v}' for k, v in lang.items())
                         + '\n')
             if land:
-                grumble('Country:\n\t'
-                        + '\n\t'.join('{} -> {}'.format(k, v) for k, v in land.items())
+                grumble('Territory:\n\t'
+                        + '\n\t'.join(f'{k} -> {v}' for k, v in land.items())
                         + '\n')
             if text:
                 grumble('Script:\n\t'
-                        + '\n\t'.join('{} -> {}'.format(k, v) for k, v in text.items())
+                        + '\n\t'.join(f'{k} -> {v}' for k, v in text.items())
                         + '\n')
             grumble('\n')
 
@@ -442,7 +411,7 @@ enumdata.py (keeping the old name as an alias):
         MS-Win have their own eccentric names for time-zones.  CLDR
         helpfully provides a translation to more orthodox names.
 
-        Singe argument, lookup, is a mapping from known MS-Win names
+        Single argument, lookup, is a mapping from known MS-Win names
         for locales to a unique integer index (starting at 1).
 
         The XML structure we read has the form:
@@ -460,7 +429,7 @@ enumdata.py (keeping the old name as an alias):
  </supplementalData>
 """
         zones = self.supplement('windowsZones.xml')
-        enum = self.__enumMap('country')
+        enum = self.__enumMap('territory')
         badZones, unLands, defaults, windows = set(), set(), {}, {}
 
         for name, attrs in zones.find('windowsZones/mapTimezones'):
@@ -469,8 +438,8 @@ enumdata.py (keeping the old name as an alias):
 
             wid, code = attrs['other'], attrs['territory']
             data = dict(windowsId = wid,
-                        countryCode = code,
-                        ianaList = attrs['type'])
+                        territoryCode = code,
+                        ianaList = ' '.join(attrs['type'].split()))
 
             try:
                 key = lookup[wid]
@@ -479,7 +448,7 @@ enumdata.py (keeping the old name as an alias):
                 key = 0
             data['windowsKey'] = key
 
-            if code == u'001':
+            if code == '001':
                 defaults[key] = data['ianaList']
             else:
                 try:
@@ -487,11 +456,11 @@ enumdata.py (keeping the old name as an alias):
                 except KeyError:
                     unLands.append(code)
                     continue
-                data.update(countryId = cid, country = name)
+                data.update(territoryId = cid, territory = name)
                 windows[key, cid] = data
 
         if unLands:
-            raise Error('Unknown country codes, please add to enumdata.py: '
+            raise Error('Unknown territory codes, please add to enumdata.py: '
                         + ', '.join(sorted(unLands)))
 
         if badZones:
@@ -507,20 +476,20 @@ enumdata.py (keeping the old name as an alias):
         return self.__cldrVersion
 
     # Implementation details
-    def __xml(self, path, cache = CacheDict(), read = minidom.parse, joinPath = os.path.join):
+    def __xml(self, relative_path: str, cache = CacheDict(), read = minidom.parse):
         try:
-            doc = cache[path]
+            doc = cache[relative_path]
         except KeyError:
-            cache[path] = doc = read(joinPath(self.root, *path)).documentElement
+            cache[relative_path] = doc = read(str(self.root.joinpath(relative_path))).documentElement
         return doc
 
-    def __open(self, path, joinPath=os.path.join):
-        return open(joinPath(self.root, *path))
+    def __open(self, relative_path: str) -> TextIO:
+        return self.root.joinpath(relative_path).open()
 
     @property
     def __rootLocale(self, cache = []):
         if not cache:
-            cache.append(self.xml('common', 'main', 'root.xml'))
+            cache.append(self.xml('common/main/root.xml'))
         return cache[0]
 
     @property
@@ -530,7 +499,7 @@ enumdata.py (keeping the old name as an alias):
         return cache[0]
 
     @property
-    def __numberSystems(self, cache = {}, joinPath=os.path.join):
+    def __numberSystems(self, cache = {}):
         if not cache:
             for ignore, attrs in self.supplement('numberingSystems.xml').find('numberingSystems'):
                 cache[attrs['id']] = attrs
@@ -563,7 +532,7 @@ enumdata.py (keeping the old name as an alias):
         source = self.__supplementalData
         for key in ('firstDay', 'weekendStart', 'weekendEnd'):
             result = {}
-            for ignore, attrs in source.find('weekData/' + key):
+            for ignore, attrs in source.find(f'weekData/{key}'):
                 assert ignore == key
                 day = attrs['day']
                 assert day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'), day
@@ -580,7 +549,7 @@ enumdata.py (keeping the old name as an alias):
             for elt in source.findNodes('currencyData/region'):
                 iso, digits, rounding = '', 2, 1
                 try:
-                    country = elt.dom.attributes['iso3166'].nodeValue
+                    territory = elt.dom.attributes['iso3166'].nodeValue
                 except KeyError:
                     continue
                 for child in elt.findAllChildren('currency'):
@@ -596,16 +565,16 @@ enumdata.py (keeping the old name as an alias):
                         break
                 if iso:
                     for tag, data in source.find(
-                        'currencyData/fractions/info[iso4217={}]'.format(iso)):
+                        f'currencyData/fractions/info[iso4217={iso}]'):
                         digits = data['digits']
                         rounding = data['rounding']
-                cache[country] = iso, digits, rounding
+                cache[territory] = iso, digits, rounding
             assert cache
 
         return cache
 
     @property
-    def __unDistinguishedAttributes(self, cache = {}, joinPath = os.path.join):
+    def __unDistinguishedAttributes(self, cache = {}):
         """Mapping from tag names to lists of attributes.
 
         LDML defines some attributes as 'distinguishing': if a node
@@ -625,7 +594,7 @@ enumdata.py (keeping the old name as an alias):
 
         return cache
 
-    def __scanLdmlDtd(self, joinPath = os.path.join):
+    def __scanLdmlDtd(self):
         """Scan the LDML DTD, record CLDR version
 
         Yields (tag, attrs) pairs: on elements with a given tag,
@@ -635,7 +604,7 @@ enumdata.py (keeping the old name as an alias):
 
         Sets self.__cldrVersion as a side-effect, since this
         information is found in the same file."""
-        with self.__open(('common', 'dtd', 'ldml.dtd')) as dtd:
+        with self.__open('common/dtd/ldml.dtd') as dtd:
             tag, ignored, last = None, None, None
 
             for line in dtd:
@@ -670,15 +639,15 @@ enumdata.py (keeping the old name as an alias):
     def __enumMap(self, key, cache = {}):
         if not cache:
             cache['variant'] = {'': (0, 'This should never be seen outside ldml.py')}
-            # They're not actually lists: mappings from numeric value
-            # to pairs of full name and short code. What we want, in
-            # each case, is a mapping from code to the other two.
-            from enumdata import language_list, script_list, country_list
-            for form, book, empty in (('language', language_list, 'AnyLanguage'),
-                                      ('script', script_list, 'AnyScript'),
-                                      ('country', country_list, 'AnyCountry')):
-                cache[form] = dict((pair[1], (num, pair[0]))
-                                   for num, pair in book.items() if pair[0] != 'C')
+            # They're mappings from numeric value to pairs of full
+            # name and short code. What we want, in each case, is a
+            # mapping from code to the other two.
+            from enumdata import language_map, script_map, territory_map
+            for form, book, empty in (('language', language_map, 'AnyLanguage'),
+                                      ('script', script_map, 'AnyScript'),
+                                      ('territory', territory_map, 'AnyTerritory')):
+                cache[form] = {pair[1]: (num, pair[0])
+                               for num, pair in book.items() if pair[0] != 'C'}
                 # (Have to filter out the C locale, as we give it the
                 # same (all space) code as AnyLanguage, whose code
                 # should probably be 'und' instead.)
@@ -693,9 +662,9 @@ enumdata.py (keeping the old name as an alias):
     def __codeMap(self, key, cache = {},
                   # Maps our name for it to CLDR's name:
                   naming = {'language': 'languages', 'script': 'scripts',
-                            'country': 'territories', 'variant': 'variants'}):
+                            'territory': 'territories', 'variant': 'variants'}):
         if not cache:
-            root = self.xml('common', 'main', 'en.xml').root.findUniqueChild('localeDisplayNames')
+            root = self.xml('common/main/en.xml').root.findUniqueChild('localeDisplayNames')
             for dst, src in naming.items():
                 cache[dst] = dict(self.__codeMapScan(root.findUniqueChild(src)))
             assert cache
@@ -721,7 +690,13 @@ enumdata.py (keeping the old name as an alias):
             except (KeyError, ValueError, TypeError):
                 pass
             else:
-                if key not in seen or not elt.attributes.has_key('alt'):
+                # Prefer stand-alone forms of names when present, ignore other
+                # alt="..." entries. For example, Traditional and Simplified
+                # Han omit "Han" in the plain form, but include it for
+                # stand-alone. As the stand-alone version appears later, it
+                # over-writes the plain one.
+                if (key not in seen or 'alt' not in elt.attributes
+                    or elt.attributes['alt'].nodeValue == 'stand-alone'):
                     yield key, value
                     seen.add(key)
 
@@ -730,7 +705,8 @@ enumdata.py (keeping the old name as an alias):
     def __parentLocale(self, cache = {}):
         # see http://www.unicode.org/reports/tr35/#Parent_Locales
         if not cache:
-            for tag, attrs in self.__supplementalData.find('parentLocales'):
+            for tag, attrs in self.__supplementalData.find('parentLocales',
+                                                           ('component',)):
                 parent = attrs.get('parent', '')
                 for child in attrs['locales'].split():
                     cache[child] = parent
@@ -738,10 +714,9 @@ enumdata.py (keeping the old name as an alias):
 
         return cache
 
-    def __localeAsDoc(self, name, aliasFor = None,
-                      joinPath = os.path.join, exists = os.path.isfile):
-        path = ('common', 'main', name + '.xml')
-        if exists(joinPath(self.root, *path)):
+    def __localeAsDoc(self, name: str, aliasFor = None):
+        path = f'common/main/{name}.xml'
+        if self.root.joinpath(path).exists():
             elt = self.__xml(path)
             for child in Node(elt).findAllChildren('alias'):
                 try:
@@ -754,8 +729,8 @@ enumdata.py (keeping the old name as an alias):
             return elt
 
         if aliasFor:
-            raise Error('Fatal error: found an alias "{}" -> "{}", but found no file for the alias'
-                        .format(aliasFor, name))
+            raise Error(f'Fatal error: found an alias "{aliasFor}" -> "{name}", '
+                        'but found no file for the alias')
 
     def __scanLocaleRoots(self, name):
         while name and name != 'root':
@@ -780,4 +755,4 @@ enumdata.py (keeping the old name as an alias):
         return chain
 
 # Unpolute the namespace: we don't need to export these.
-del minidom, CacheDict, os
+del minidom, CacheDict