1 files changed, 53 insertions, 2 deletions
diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py
index a33ace4eb1..a7fcd08727 100644
--- a/util/locale_database/localetools.py
+++ b/util/locale_database/localetools.py
@@ -26,6 +26,22 @@ class Error (Exception):
     def __str__(self):
         return self.message
 
+def qtVersion(root = qtbase_root, pfx = 'set(QT_REPO_MODULE_VERSION '):
+    with open(root.joinpath('.cmake.conf')) as fd:
+        for line in fd:
+            if line.startswith(pfx):
+                tail = line[len(pfx):].strip()
+                assert tail, ('No Qt version given', line)
+                if tail.startswith('"') or tail.startswith("'"):
+                    cut = tail.index(tail[0], 1) # assert: doesn't ValueError
+                    assert cut > 5, ('Truncated Qt version', tail)
+                    version = tail[1:cut].strip()
+                    assert all(x.isdigit() for x in version.split('.')), version
+                    return version
+                raise Error(f'Missing quotes on Qt version: {tail}')
+    raise Error(f'Failed to find {pfx}...) line in {root.joinpath(".cmake.conf")}')
+qtVersion = qtVersion()
+
 def unicode2hex(s):
     lst = []
     for x in s:
@@ -41,12 +57,47 @@ def unicode2hex(s):
             lst.append(hex(v))
     return lst
 
-def wrap_list(lst):
+def wrap_list(lst, perline=20):
     def split(lst, size):
         while lst:
             head, lst = lst[:size], lst[size:]
             yield head
-    return ",\n".join(", ".join(x) for x in split(lst, 20))
+    return ",\n".join(", ".join(x) for x in split(lst, perline))
+
+def names_clash(cldr, enum):
+    """True if the reader might not recognize cldr as the name of enum
+
+    First argument, cldr, is the name CLDR gives for some language,
+    script or territory; second, enum, is the name enumdata.py gives
+    for it. If these are enough alike, returns None; otherwise, a
+    non-empty string that results from adapting cldr to be more like
+    how enumdata.py would express it."""
+    if cldr == enum:
+        return None
+
+    # Some common substitutions:
+    cldr = cldr.replace('&', 'And')
+    prefix = { 'St.': 'Saint', 'U.S.': 'United States' }
+    for k, v in prefix.items():
+        if cldr.startswith(k + ' '):
+            cldr = v + cldr[len(k):]
+
+    # Chop out any parenthesised part, e.g. (Burma):
+    while '(' in cldr:
+        try:
+            f, t = cldr.index('('), cldr.rindex(')')
+        except ValueError:
+            break
+        cldr = cldr[:f].rstrip() + ' ' + cldr[t + 1:].lstrip()
+
+    # Various accented letters:
+    remap = { 'ã': 'a', 'å': 'a', 'ā': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'}
+    skip = '\u02bc' # Punctuation for which .isalpha() is true.
+    # Let cldr match (ignoring non-letters and case) any substring as enum:
+    if ''.join(enum.lower().split()) in ''.join(
+            remap.get(ch, ch) for ch in cldr.lower() if ch.isalpha() and ch not in skip):
+        return None
+    return cldr
 
 
 @contextmanager