summaryrefslogtreecommitdiffstats
path: root/util
diff options
context:
space:
mode:
authorIevgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>2021-11-22 15:56:53 +0100
committerIevgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>2021-12-09 03:45:08 +0100
commit4f53c703e40bea3203259c212c54dc4816c08b09 (patch)
tree178ce98f0707d0e6b509cc603f0f92580ecd5775 /util
parent0fbeac01156c57dc6e48087b7a8dea4644294f6a (diff)
QLocale: Extend support for language codes
This commit extends functionality for QLocale::codeToLanguage() and QLocale::languageToCode() by adding an additional argument that allows selection of the ISO 639 code-set to consider for those operations. The following ISO 639 codes are supported: * Part 1 * Part 2 bibliographic * Part 2 terminological * Part 3 As a result of this change the codeToLanguage() overload without the additional argument now returns a Language value if it matches any know code. Previously a valid language was returned only if the function argument matched the first code defined for that language from the above list. [ChangeLog][QtCore][QLocale] Added overloads for codeToLanguage() and languageToCode() that support specifying which ISO 639 codes to consider. Fixes: QTBUG-98129 Change-Id: I4da8a89e2e68a673cf63a621359cded609873fa2 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Diffstat (limited to 'util')
-rw-r--r--util/locale_database/iso639_3.py105
-rwxr-xr-xutil/locale_database/qlocalexml2cpp.py53
2 files changed, 154 insertions, 4 deletions
diff --git a/util/locale_database/iso639_3.py b/util/locale_database/iso639_3.py
new file mode 100644
index 0000000000..b150855ba9
--- /dev/null
+++ b/util/locale_database/iso639_3.py
@@ -0,0 +1,105 @@
+#############################################################################
+##
+## Copyright (C) 2021 The Qt Company Ltd.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the locale database tools of the Qt Toolkit.
+##
+## $QT_BEGIN_LICENSE:GPL-EXCEPT$
+## Commercial License Usage
+## Licensees holding valid commercial Qt licenses may use this file in
+## accordance with the commercial license agreement provided with the
+## Software or, alternatively, in accordance with the terms contained in
+## a written agreement between you and The Qt Company. For licensing terms
+## and conditions see https://www.qt.io/terms-conditions. For further
+## information use the contact form at https://www.qt.io/contact-us.
+##
+## GNU General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU
+## General Public License version 3 as published by the Free Software
+## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+## included in the packaging of this file. Please review the following
+## information to ensure the GNU General Public License requirements will
+## be met: https://www.gnu.org/licenses/gpl-3.0.html.
+##
+## $QT_END_LICENSE$
+##
+#############################################################################
+
+from dataclasses import dataclass
+from typing import Dict, Optional
+
+
+@dataclass
+class LanguageCodeEntry:
+ part3Code: str
+ part2BCode: Optional[str]
+ part2TCode: Optional[str]
+ part1Code: Optional[str]
+
+ def id(self) -> str:
+ if self.part1Code:
+ return self.part1Code
+ if self.part2BCode:
+ return self.part2BCode
+ return self.part3Code
+
+ def __repr__(self) -> str:
+ parts = [f'{self.__class__.__name__}({self.id()!r}, part3Code={self.part3Code!r}']
+ if self.part2BCode is not None and self.part2BCode != self.part3Code:
+ parts.append(f', part2BCode={self.part2BCode!r}')
+ if self.part2TCode != self.part2BCode:
+ parts.append(f', part2TCode={self.part2TCode!r}')
+ if self.part1Code is not None:
+ parts.append(f', part1Code={self.part1Code!r}')
+ parts.append(')')
+ return ''.join(parts)
+
+
+class LanguageCodeData:
+ """
+ Representation of ISO639-2 language code data.
+ """
+ def __init__(self, fileName: str):
+ """
+ Construct the object populating the data from the given file.
+ """
+ self.__codeMap: Dict[str, LanguageCodeEntry] = {}
+
+ with open(fileName, 'r', encoding='utf-8') as stream:
+ stream.readline() # skip the header
+ for line in stream.readlines():
+ part3Code, part2BCode, part2TCode, part1Code, _ = line.split('\t', 4)
+
+ # sanity checks
+ assert all(p.isascii() for p in (part3Code, part2BCode, part2TCode, part1Code)), \
+ f'Non-ascii characters in code names: {part3Code!r} {part2BCode!r} '\
+ f'{part2TCode!r} {part1Code!r}'
+
+ assert len(part3Code) == 3, f'Invalid Part 3 code length for {part3Code!r}'
+ assert not part1Code or len(part1Code) == 2, \
+ f'Invalid Part 1 code length for {part3Code!r}: {part1Code!r}'
+ assert not part2BCode or len(part2BCode) == 3, \
+ f'Invalid Part 2B code length for {part3Code!r}: {part2BCode!r}'
+ assert not part2TCode or len(part2TCode) == 3, \
+ f'Invalid Part 2T code length for {part3Code!r}: {part2TCode!r}'
+
+ assert (part2BCode == '') == (part2TCode == ''), \
+ f'Only one Part 2 code is specified for {part3Code!r}: ' \
+ f'{part2BCode!r} vs {part2TCode!r}'
+ assert not part2TCode or part2TCode == part3Code, \
+ f'Part 3 code {part3Code!r} does not match Part 2T code {part2TCode!r}'
+
+ entry = LanguageCodeEntry(part3Code, part2BCode or None,
+ part2TCode or None, part1Code or None)
+
+ self.__codeMap[entry.id()] = entry
+
+ def query(self, code: str) -> Optional[LanguageCodeEntry]:
+ """
+ Lookup the entry with the given code and return it.
+
+ The entries can be looked up by using either the Alpha2 code or the bibliographical
+ Alpha3 code.
+ """
+ return self.__codeMap.get(code)
diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py
index 7ac7945cf8..a7592f7a0c 100755
--- a/util/locale_database/qlocalexml2cpp.py
+++ b/util/locale_database/qlocalexml2cpp.py
@@ -30,15 +30,22 @@
See ``cldr2qlocalexml.py`` for how to generate the QLocaleXML data itself.
Pass the output file from that as first parameter to this script; pass
-the root of the qtbase check-out as second parameter.
+the ISO 639-3 data file as second parameter; pass the root of the qtbase
+check-out as third parameter.
+
+The ISO 639-3 data file can be downloaded from the SIL website:
+
+ https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab
"""
import datetime
import argparse
from pathlib import Path
+from typing import Optional
from qlocalexml import QLocaleXmlReader
from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor
+from iso639_3 import LanguageCodeData
class LocaleKeySorter:
"""Sort-ordering representation of a locale key.
@@ -389,8 +396,42 @@ class LocaleDataWriter (LocaleSourceEditor):
# TODO: unify these next three into the previous three; kept
# separate for now to verify we're not changing data.
- def languageCodes(self, languages):
- self.__writeCodeList(self.writer.write, languages, 'language', 3)
+ def languageCodes(self, languages, code_data: LanguageCodeData):
+ out = self.writer.write
+
+ out(f'constexpr std::array<LanguageCodeEntry, {len(languages)}> languageCodeList {{\n')
+
+ def q(val: Optional[str], size: int) -> str:
+ """Quote the value and adjust the result for tabular view."""
+ chars = []
+ if val is not None:
+ for c in val:
+ chars.append(f"'{c}'")
+ s = ', '.join(chars)
+ s = f'{{{s}}}'
+ else:
+ s = ''
+ if size == 0:
+ return f'{{{s}}}'
+ else:
+ return f'{{{s}}},'.ljust(size * 5 + 4)
+
+ for key, value in languages.items():
+ code = value[1]
+ if key < 2:
+ result = code_data.query('und')
+ else:
+ result = code_data.query(code)
+ assert code == result.id()
+ assert result is not None
+
+ codeString = q(result.part1Code, 2)
+ codeString += q(result.part2BCode, 3)
+ codeString += q(result.part2TCode, 3)
+ codeString += q(result.part3Code, 0)
+ out(f' LanguageCodeEntry {{{codeString}}}, // {value[0]}\n')
+
+ out('};\n\n')
def scriptCodes(self, scripts):
self.__writeCodeList(self.writer.write, scripts, 'script', 4)
@@ -519,6 +560,8 @@ def main(out, err):
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('input_file', help='input XML file name',
metavar='input-file.xml')
+ parser.add_argument('iso_path', help='path to the ISO 639-3 data file',
+ metavar='iso-639-3.tab')
parser.add_argument('qtbase_path', help='path to the root of the qtbase source tree')
parser.add_argument('--calendars', help='select calendars to emit data for',
nargs='+', metavar='CALENDAR',
@@ -538,6 +581,8 @@ def main(out, err):
locale_map = dict(reader.loadLocaleMap(calendars, err.write))
locale_keys = sorted(locale_map.keys(), key=LocaleKeySorter(reader.defaultMap()))
+ code_data = LanguageCodeData(args.iso_path)
+
try:
with LocaleDataWriter(qtsrcdir.joinpath('src/corelib/text/qlocale_data_p.h'),
qtsrcdir, reader.cldrVersion) as writer:
@@ -549,7 +594,7 @@ def main(out, err):
writer.scriptNames(reader.scripts)
writer.territoryNames(reader.territories)
# TODO: merge the next three into the previous three
- writer.languageCodes(reader.languages)
+ writer.languageCodes(reader.languages, code_data)
writer.scriptCodes(reader.scripts)
writer.territoryCodes(reader.territories)
except Exception as e: