diff options
Diffstat (limited to 'util/locale_database/iso639_3.py')
-rw-r--r-- | util/locale_database/iso639_3.py | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/util/locale_database/iso639_3.py b/util/locale_database/iso639_3.py new file mode 100644 index 0000000000..0d23065cf9 --- /dev/null +++ b/util/locale_database/iso639_3.py @@ -0,0 +1,80 @@ +# Copyright (C) 2021 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 + +from dataclasses import dataclass +from typing import Dict, Optional + + +@dataclass +class LanguageCodeEntry: + part3Code: str + part2BCode: Optional[str] + part2TCode: Optional[str] + part1Code: Optional[str] + + def id(self) -> str: + if self.part1Code: + return self.part1Code + if self.part2BCode: + return self.part2BCode + return self.part3Code + + def __repr__(self) -> str: + parts = [f'{self.__class__.__name__}({self.id()!r}, part3Code={self.part3Code!r}'] + if self.part2BCode is not None and self.part2BCode != self.part3Code: + parts.append(f', part2BCode={self.part2BCode!r}') + if self.part2TCode != self.part2BCode: + parts.append(f', part2TCode={self.part2TCode!r}') + if self.part1Code is not None: + parts.append(f', part1Code={self.part1Code!r}') + parts.append(')') + return ''.join(parts) + + +class LanguageCodeData: + """ + Representation of ISO639-2 language code data. + """ + def __init__(self, fileName: str): + """ + Construct the object populating the data from the given file. + """ + self.__codeMap: Dict[str, LanguageCodeEntry] = {} + + with open(fileName, 'r', encoding='utf-8') as stream: + stream.readline() # skip the header + for line in stream.readlines(): + part3Code, part2BCode, part2TCode, part1Code, _ = line.split('\t', 4) + + # sanity checks + assert all(p.isascii() for p in (part3Code, part2BCode, part2TCode, part1Code)), \ + f'Non-ascii characters in code names: {part3Code!r} {part2BCode!r} '\ + f'{part2TCode!r} {part1Code!r}' + + assert len(part3Code) == 3, f'Invalid Part 3 code length for {part3Code!r}' + assert not part1Code or len(part1Code) == 2, \ + f'Invalid Part 1 code length for {part3Code!r}: {part1Code!r}' + assert not part2BCode or len(part2BCode) == 3, \ + f'Invalid Part 2B code length for {part3Code!r}: {part2BCode!r}' + assert not part2TCode or len(part2TCode) == 3, \ + f'Invalid Part 2T code length for {part3Code!r}: {part2TCode!r}' + + assert (part2BCode == '') == (part2TCode == ''), \ + f'Only one Part 2 code is specified for {part3Code!r}: ' \ + f'{part2BCode!r} vs {part2TCode!r}' + assert not part2TCode or part2TCode == part3Code, \ + f'Part 3 code {part3Code!r} does not match Part 2T code {part2TCode!r}' + + entry = LanguageCodeEntry(part3Code, part2BCode or None, + part2TCode or None, part1Code or None) + + self.__codeMap[entry.id()] = entry + + def query(self, code: str) -> Optional[LanguageCodeEntry]: + """ + Lookup the entry with the given code and return it. + + The entries can be looked up by using either the Alpha2 code or the bibliographical + Alpha3 code. + """ + return self.__codeMap.get(code) |