1 files changed, 37 insertions, 2 deletions
diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py
index a33ace4eb1..02ec7cafc7 100644
--- a/util/locale_database/localetools.py
+++ b/util/locale_database/localetools.py
@@ -41,12 +41,47 @@ def unicode2hex(s):
             lst.append(hex(v))
     return lst
 
-def wrap_list(lst):
+def wrap_list(lst, perline=20):
     def split(lst, size):
         while lst:
             head, lst = lst[:size], lst[size:]
             yield head
-    return ",\n".join(", ".join(x) for x in split(lst, 20))
+    return ",\n".join(", ".join(x) for x in split(lst, perline))
+
+def names_clash(cldr, enum):
+    """True if the reader might not recognize cldr as the name of enum
+
+    First argument, cldr, is the name CLDR gives for some language,
+    script or territory; second, enum, is the name enumdata.py gives
+    for it. If these are enough alike, returns None; otherwise, a
+    non-empty string that results from adapting cldr to be more like
+    how enumdata.py would express it."""
+    if cldr == enum:
+        return None
+
+    # Some common substitutions:
+    cldr = cldr.replace('&', 'And')
+    prefix = { 'St.': 'Saint', 'U.S.': 'United States' }
+    for k, v in prefix.items():
+        if cldr.startswith(k + ' '):
+            cldr = v + cldr[len(k):]
+
+    # Chop out any parenthesised part, e.g. (Burma):
+    while '(' in cldr:
+        try:
+            f, t = cldr.index('('), cldr.rindex(')')
+        except ValueError:
+            break
+        cldr = cldr[:f].rstrip() + ' ' + cldr[t + 1:].lstrip()
+
+    # Various accented letters:
+    remap = { 'ã': 'a', 'å': 'a', 'ā': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'}
+    skip = '\u02bc' # Punctuation for which .isalpha() is true.
+    # Let cldr match (ignoring non-letters and case) any substring as enum:
+    if ''.join(enum.lower().split()) in ''.join(
+            remap.get(ch, ch) for ch in cldr.lower() if ch.isalpha() and ch not in skip):
+        return None
+    return cldr
 
 
 @contextmanager