diff options
author | Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io> | 2021-11-22 15:56:53 +0100 |
---|---|---|
committer | Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io> | 2021-12-09 03:45:08 +0100 |
commit | 4f53c703e40bea3203259c212c54dc4816c08b09 (patch) | |
tree | 178ce98f0707d0e6b509cc603f0f92580ecd5775 /util | |
parent | 0fbeac01156c57dc6e48087b7a8dea4644294f6a (diff) |
QLocale: Extend support for language codes
This commit extends functionality for QLocale::codeToLanguage()
and QLocale::languageToCode() by adding an additional argument
that allows selection of the ISO 639 code-set to consider for
those operations.
The following ISO 639 codes are supported:
* Part 1
* Part 2 bibliographic
* Part 2 terminological
* Part 3
As a result of this change the codeToLanguage() overload without
the additional argument now returns a Language value if it matches
any know code. Previously a valid language was returned only if
the function argument matched the first code defined for that
language from the above list.
[ChangeLog][QtCore][QLocale] Added overloads for codeToLanguage()
and languageToCode() that support specifying which ISO 639 codes
to consider.
Fixes: QTBUG-98129
Change-Id: I4da8a89e2e68a673cf63a621359cded609873fa2
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Diffstat (limited to 'util')
-rw-r--r-- | util/locale_database/iso639_3.py | 105 | ||||
-rwxr-xr-x | util/locale_database/qlocalexml2cpp.py | 53 |
2 files changed, 154 insertions, 4 deletions
diff --git a/util/locale_database/iso639_3.py b/util/locale_database/iso639_3.py new file mode 100644 index 0000000000..b150855ba9 --- /dev/null +++ b/util/locale_database/iso639_3.py @@ -0,0 +1,105 @@ +############################################################################# +## +## Copyright (C) 2021 The Qt Company Ltd. +## Contact: https://www.qt.io/licensing/ +## +## This file is part of the locale database tools of the Qt Toolkit. +## +## $QT_BEGIN_LICENSE:GPL-EXCEPT$ +## Commercial License Usage +## Licensees holding valid commercial Qt licenses may use this file in +## accordance with the commercial license agreement provided with the +## Software or, alternatively, in accordance with the terms contained in +## a written agreement between you and The Qt Company. For licensing terms +## and conditions see https://www.qt.io/terms-conditions. For further +## information use the contact form at https://www.qt.io/contact-us. +## +## GNU General Public License Usage +## Alternatively, this file may be used under the terms of the GNU +## General Public License version 3 as published by the Free Software +## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +## included in the packaging of this file. Please review the following +## information to ensure the GNU General Public License requirements will +## be met: https://www.gnu.org/licenses/gpl-3.0.html. +## +## $QT_END_LICENSE$ +## +############################################################################# + +from dataclasses import dataclass +from typing import Dict, Optional + + +@dataclass +class LanguageCodeEntry: + part3Code: str + part2BCode: Optional[str] + part2TCode: Optional[str] + part1Code: Optional[str] + + def id(self) -> str: + if self.part1Code: + return self.part1Code + if self.part2BCode: + return self.part2BCode + return self.part3Code + + def __repr__(self) -> str: + parts = [f'{self.__class__.__name__}({self.id()!r}, part3Code={self.part3Code!r}'] + if self.part2BCode is not None and self.part2BCode != self.part3Code: + parts.append(f', part2BCode={self.part2BCode!r}') + if self.part2TCode != self.part2BCode: + parts.append(f', part2TCode={self.part2TCode!r}') + if self.part1Code is not None: + parts.append(f', part1Code={self.part1Code!r}') + parts.append(')') + return ''.join(parts) + + +class LanguageCodeData: + """ + Representation of ISO639-2 language code data. + """ + def __init__(self, fileName: str): + """ + Construct the object populating the data from the given file. + """ + self.__codeMap: Dict[str, LanguageCodeEntry] = {} + + with open(fileName, 'r', encoding='utf-8') as stream: + stream.readline() # skip the header + for line in stream.readlines(): + part3Code, part2BCode, part2TCode, part1Code, _ = line.split('\t', 4) + + # sanity checks + assert all(p.isascii() for p in (part3Code, part2BCode, part2TCode, part1Code)), \ + f'Non-ascii characters in code names: {part3Code!r} {part2BCode!r} '\ + f'{part2TCode!r} {part1Code!r}' + + assert len(part3Code) == 3, f'Invalid Part 3 code length for {part3Code!r}' + assert not part1Code or len(part1Code) == 2, \ + f'Invalid Part 1 code length for {part3Code!r}: {part1Code!r}' + assert not part2BCode or len(part2BCode) == 3, \ + f'Invalid Part 2B code length for {part3Code!r}: {part2BCode!r}' + assert not part2TCode or len(part2TCode) == 3, \ + f'Invalid Part 2T code length for {part3Code!r}: {part2TCode!r}' + + assert (part2BCode == '') == (part2TCode == ''), \ + f'Only one Part 2 code is specified for {part3Code!r}: ' \ + f'{part2BCode!r} vs {part2TCode!r}' + assert not part2TCode or part2TCode == part3Code, \ + f'Part 3 code {part3Code!r} does not match Part 2T code {part2TCode!r}' + + entry = LanguageCodeEntry(part3Code, part2BCode or None, + part2TCode or None, part1Code or None) + + self.__codeMap[entry.id()] = entry + + def query(self, code: str) -> Optional[LanguageCodeEntry]: + """ + Lookup the entry with the given code and return it. + + The entries can be looked up by using either the Alpha2 code or the bibliographical + Alpha3 code. + """ + return self.__codeMap.get(code) diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py index 7ac7945cf8..a7592f7a0c 100755 --- a/util/locale_database/qlocalexml2cpp.py +++ b/util/locale_database/qlocalexml2cpp.py @@ -30,15 +30,22 @@ See ``cldr2qlocalexml.py`` for how to generate the QLocaleXML data itself. Pass the output file from that as first parameter to this script; pass -the root of the qtbase check-out as second parameter. +the ISO 639-3 data file as second parameter; pass the root of the qtbase +check-out as third parameter. + +The ISO 639-3 data file can be downloaded from the SIL website: + + https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab """ import datetime import argparse from pathlib import Path +from typing import Optional from qlocalexml import QLocaleXmlReader from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor +from iso639_3 import LanguageCodeData class LocaleKeySorter: """Sort-ordering representation of a locale key. @@ -389,8 +396,42 @@ class LocaleDataWriter (LocaleSourceEditor): # TODO: unify these next three into the previous three; kept # separate for now to verify we're not changing data. - def languageCodes(self, languages): - self.__writeCodeList(self.writer.write, languages, 'language', 3) + def languageCodes(self, languages, code_data: LanguageCodeData): + out = self.writer.write + + out(f'constexpr std::array<LanguageCodeEntry, {len(languages)}> languageCodeList {{\n') + + def q(val: Optional[str], size: int) -> str: + """Quote the value and adjust the result for tabular view.""" + chars = [] + if val is not None: + for c in val: + chars.append(f"'{c}'") + s = ', '.join(chars) + s = f'{{{s}}}' + else: + s = '' + if size == 0: + return f'{{{s}}}' + else: + return f'{{{s}}},'.ljust(size * 5 + 4) + + for key, value in languages.items(): + code = value[1] + if key < 2: + result = code_data.query('und') + else: + result = code_data.query(code) + assert code == result.id() + assert result is not None + + codeString = q(result.part1Code, 2) + codeString += q(result.part2BCode, 3) + codeString += q(result.part2TCode, 3) + codeString += q(result.part3Code, 0) + out(f' LanguageCodeEntry {{{codeString}}}, // {value[0]}\n') + + out('};\n\n') def scriptCodes(self, scripts): self.__writeCodeList(self.writer.write, scripts, 'script', 4) @@ -519,6 +560,8 @@ def main(out, err): formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('input_file', help='input XML file name', metavar='input-file.xml') + parser.add_argument('iso_path', help='path to the ISO 639-3 data file', + metavar='iso-639-3.tab') parser.add_argument('qtbase_path', help='path to the root of the qtbase source tree') parser.add_argument('--calendars', help='select calendars to emit data for', nargs='+', metavar='CALENDAR', @@ -538,6 +581,8 @@ def main(out, err): locale_map = dict(reader.loadLocaleMap(calendars, err.write)) locale_keys = sorted(locale_map.keys(), key=LocaleKeySorter(reader.defaultMap())) + code_data = LanguageCodeData(args.iso_path) + try: with LocaleDataWriter(qtsrcdir.joinpath('src/corelib/text/qlocale_data_p.h'), qtsrcdir, reader.cldrVersion) as writer: @@ -549,7 +594,7 @@ def main(out, err): writer.scriptNames(reader.scripts) writer.territoryNames(reader.territories) # TODO: merge the next three into the previous three - writer.languageCodes(reader.languages) + writer.languageCodes(reader.languages, code_data) writer.scriptCodes(reader.scripts) writer.territoryCodes(reader.territories) except Exception as e: |