diff options
Diffstat (limited to 'util/locale_database/localetools.py')
-rw-r--r-- | util/locale_database/localetools.py | 39 |
1 files changed, 37 insertions, 2 deletions
diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py index a33ace4eb1..02ec7cafc7 100644 --- a/util/locale_database/localetools.py +++ b/util/locale_database/localetools.py @@ -41,12 +41,47 @@ def unicode2hex(s): lst.append(hex(v)) return lst -def wrap_list(lst): +def wrap_list(lst, perline=20): def split(lst, size): while lst: head, lst = lst[:size], lst[size:] yield head - return ",\n".join(", ".join(x) for x in split(lst, 20)) + return ",\n".join(", ".join(x) for x in split(lst, perline)) + +def names_clash(cldr, enum): + """True if the reader might not recognize cldr as the name of enum + + First argument, cldr, is the name CLDR gives for some language, + script or territory; second, enum, is the name enumdata.py gives + for it. If these are enough alike, returns None; otherwise, a + non-empty string that results from adapting cldr to be more like + how enumdata.py would express it.""" + if cldr == enum: + return None + + # Some common substitutions: + cldr = cldr.replace('&', 'And') + prefix = { 'St.': 'Saint', 'U.S.': 'United States' } + for k, v in prefix.items(): + if cldr.startswith(k + ' '): + cldr = v + cldr[len(k):] + + # Chop out any parenthesised part, e.g. (Burma): + while '(' in cldr: + try: + f, t = cldr.index('('), cldr.rindex(')') + except ValueError: + break + cldr = cldr[:f].rstrip() + ' ' + cldr[t + 1:].lstrip() + + # Various accented letters: + remap = { 'ã': 'a', 'å': 'a', 'ā': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'} + skip = '\u02bc' # Punctuation for which .isalpha() is true. + # Let cldr match (ignoring non-letters and case) any substring as enum: + if ''.join(enum.lower().split()) in ''.join( + remap.get(ch, ch) for ch in cldr.lower() if ch.isalpha() and ch not in skip): + return None + return cldr @contextmanager |