diff options
Diffstat (limited to 'util/locale_database/localetools.py')
-rw-r--r-- | util/locale_database/localetools.py | 239 |
1 files changed, 147 insertions, 92 deletions
diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py index 29153366b3..02ec7cafc7 100644 --- a/util/locale_database/localetools.py +++ b/util/locale_database/localetools.py @@ -1,30 +1,5 @@ -############################################################################# -## -## Copyright (C) 2020 The Qt Company Ltd. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the test suite of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:GPL-EXCEPT$ -## Commercial License Usage -## Licensees holding valid commercial Qt licenses may use this file in -## accordance with the commercial license agreement provided with the -## Software or, alternatively, in accordance with the terms contained in -## a written agreement between you and The Qt Company. For licensing terms -## and conditions see https://www.qt.io/terms-conditions. For further -## information use the contact form at https://www.qt.io/contact-us. -## -## GNU General Public License Usage -## Alternatively, this file may be used under the terms of the GNU -## General Public License version 3 as published by the Free Software -## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -## included in the packaging of this file. Please review the following -## information to ensure the GNU General Public License requirements will -## be met: https://www.gnu.org/licenses/gpl-3.0.html. -## -## $QT_END_LICENSE$ -## -############################################################################# +# Copyright (C) 2020 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 """Utilities shared among the CLDR extraction tools. Functions: @@ -37,13 +12,16 @@ Classes: SourceFileEditor -- adds standard prelude and tail handling to Transcriber. """ -import os -import tempfile +from contextlib import ExitStack, contextmanager +from pathlib import Path +from tempfile import NamedTemporaryFile -class Error (StandardError): - __upinit = StandardError.__init__ +qtbase_root = Path(__file__).parents[2] +assert qtbase_root.name == 'qtbase' + +class Error (Exception): def __init__(self, msg, *args): - self.__upinit(msg, *args) + super().__init__(msg, *args) self.message = msg def __str__(self): return self.message @@ -63,48 +41,141 @@ def unicode2hex(s): lst.append(hex(v)) return lst -def wrap_list(lst): +def wrap_list(lst, perline=20): def split(lst, size): while lst: head, lst = lst[:size], lst[size:] yield head - return ",\n".join(", ".join(x) for x in split(lst, 20)) - -class Transcriber (object): - """Helper class to facilitate rewriting source files. - - This class takes care of the temporary file manipulation. Derived - classes need to implement transcribing of the content, with + return ",\n".join(", ".join(x) for x in split(lst, perline)) + +def names_clash(cldr, enum): + """True if the reader might not recognize cldr as the name of enum + + First argument, cldr, is the name CLDR gives for some language, + script or territory; second, enum, is the name enumdata.py gives + for it. If these are enough alike, returns None; otherwise, a + non-empty string that results from adapting cldr to be more like + how enumdata.py would express it.""" + if cldr == enum: + return None + + # Some common substitutions: + cldr = cldr.replace('&', 'And') + prefix = { 'St.': 'Saint', 'U.S.': 'United States' } + for k, v in prefix.items(): + if cldr.startswith(k + ' '): + cldr = v + cldr[len(k):] + + # Chop out any parenthesised part, e.g. (Burma): + while '(' in cldr: + try: + f, t = cldr.index('('), cldr.rindex(')') + except ValueError: + break + cldr = cldr[:f].rstrip() + ' ' + cldr[t + 1:].lstrip() + + # Various accented letters: + remap = { 'ã': 'a', 'å': 'a', 'ā': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'} + skip = '\u02bc' # Punctuation for which .isalpha() is true. + # Let cldr match (ignoring non-letters and case) any substring as enum: + if ''.join(enum.lower().split()) in ''.join( + remap.get(ch, ch) for ch in cldr.lower() if ch.isalpha() and ch not in skip): + return None + return cldr + + +@contextmanager +def AtomicRenameTemporaryFile(originalLocation: Path, *, prefix: str, dir: Path): + """Context manager for safe file update via a temporary file. + + Accepts path to the file to be updated. Yields a temporary file to the user + code, open for writing. + + On success closes the temporary file and moves its content to the original + location. On error, removes temporary file, without disturbing the original. + """ + tempFile = NamedTemporaryFile('w', prefix=prefix, dir=dir, delete=False) + try: + yield tempFile + tempFile.close() + # Move the modified file to the original location + Path(tempFile.name).rename(originalLocation) + except Exception: + # delete the temporary file in case of error + tempFile.close() + Path(tempFile.name).unlink() + raise + + +class Transcriber: + """Context manager base-class to manage source file rewrites. + + Derived classes need to implement transcribing of the content, with whatever modifications they may want. Members reader and writer are exposed; use writer.write() to output to the new file; use reader.readline() or iterate reader to read the original. - Callers should call close() on success or cleanup() on failure (to - clear away the temporary file). + This class is intended to be used as context manager only (inside a + `with` statement). + + Reimplement onEnter() to write any preamble the file may have, + onExit() to write any tail. The body of the with statement takes + care of anything in between, using methods provided by derived classes. + + The data is written to a temporary file first. The temporary file data + is then moved to the original location if there were no errors. Otherwise + the temporary file is removed and the original is left unchanged. """ - def __init__(self, path, temp): - # Open the old file - self.reader = open(path) - # Create a temp file to write the new data into - temp, tempPath = tempfile.mkstemp(os.path.split(path)[1], dir = temp) - self.__names = path, tempPath - self.writer = os.fdopen(temp, "w") - - def close(self): - self.reader.close() - self.writer.close() - self.reader = self.writer = None - source, temp = self.__names - os.remove(source) - os.rename(temp, source) - - def cleanup(self): - if self.__names: - self.reader.close() - self.writer.close() - # Remove temp-file: - os.remove(self.__names[1]) - self.__names = () + def __init__(self, path: Path, temp_dir: Path): + self.path = path + self.tempDir = temp_dir + + def onEnter(self) -> None: + """ + Called before transferring control to user code. + + This function can be overridden in derived classes to perform actions + before transferring control to the user code. + + The default implementation does nothing. + """ + pass + + def onExit(self) -> None: + """ + Called after return from user code. + + This function can be overridden in derived classes to perform actions + after successful return from user code. + + The default implementation does nothing. + """ + pass + + def __enter__(self): + with ExitStack() as resources: + # Create a temp file to write the new data into + self.writer = resources.enter_context( + AtomicRenameTemporaryFile(self.path, prefix=self.path.name, dir=self.tempDir)) + # Open the old file + self.reader = resources.enter_context(open(self.path)) + + self.onEnter() + + # Prevent resources from being closed on normal return from this + # method and make them available inside __exit__(): + self.__resources = resources.pop_all() + return self + + def __exit__(self, exc_type, exc_value, traceback): + if exc_type is None: + with self.__resources: + self.onExit() + else: + self.__resources.__exit__(exc_type, exc_value, traceback) + + return False + class SourceFileEditor (Transcriber): """Transcriber with transcription of code around a gnerated block. @@ -117,43 +188,27 @@ class SourceFileEditor (Transcriber): the new version to replace it. This class takes care of transcribing the parts before and after - the generated content; on creation, an instance will copy the - preamble up to the start marker; its close() will skip over the - original's generated content and resume transcribing with the end - marker. Derived classes need only implement the generation of the - content in between. - - Callers should call close() on success or cleanup() on failure (to - clear away the temporary file); see Transcriber. + the generated content; on entering the context, an instance will + copy the preamble up to the start marker; on exit from the context + it will skip over the original's generated content and resume + transcribing with the end marker. + + This class is only intended to be used as a context manager: + see Transcriber. Derived classes implement suitable methods for use in + the body of the with statement, using self.writer to rewrite the part + of the file between the start and end markers. """ - __upinit = Transcriber.__init__ - def __init__(self, path, temp): - """Set up the source file editor. - - Requires two arguments: the path to the source file to be read - and, on success, replaced with a new version; and the - directory in which to store the temporary file during the - rewrite.""" - self.__upinit(path, temp) - self.__copyPrelude() - - __upclose = Transcriber.close - def close(self): - self.__copyTail() - self.__upclose() - - # Implementation details: GENERATED_BLOCK_START = '// GENERATED PART STARTS HERE' GENERATED_BLOCK_END = '// GENERATED PART ENDS HERE' - def __copyPrelude(self): + def onEnter(self) -> None: # Copy over the first non-generated section to the new file for line in self.reader: self.writer.write(line) if line.strip() == self.GENERATED_BLOCK_START: break - def __copyTail(self): + def onExit(self) -> None: # Skip through the old generated data in the old file for line in self.reader: if line.strip() == self.GENERATED_BLOCK_END: |