summaryrefslogtreecommitdiffstats
path: root/util/locale_database/qlocalexml.py
diff options
context:
space:
mode:
Diffstat (limited to 'util/locale_database/qlocalexml.py')
-rw-r--r--util/locale_database/qlocalexml.py238
1 files changed, 146 insertions, 92 deletions
diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py
index 4fcfe32a43..a104402e23 100644
--- a/util/locale_database/qlocalexml.py
+++ b/util/locale_database/qlocalexml.py
@@ -1,31 +1,5 @@
-# coding=utf8
-#############################################################################
-##
-## Copyright (C) 2020 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
"""Shared serialization-scanning code for QLocaleXML format.
Provides classes:
@@ -35,15 +9,23 @@ Provides classes:
Support:
Spacer -- provides control over indentation of the output.
+
+RelaxNG schema for the used file format can be found in qlocalexml.rnc.
+QLocaleXML files can be validated using:
+
+ jing -c qlocalexml.rnc <file.xml>
+
+You can download jing from https://relaxng.org/jclark/jing.html if your
+package manager lacks the jing package.
"""
-from __future__ import print_function
+
from xml.sax.saxutils import escape
from localetools import Error
# Tools used by Locale:
def camel(seq):
- yield seq.next()
+ yield next(seq)
for word in seq:
yield word.capitalize()
@@ -51,14 +33,14 @@ def camelCase(words):
return ''.join(camel(iter(words)))
def addEscapes(s):
- return ''.join(c if n < 128 else '\\x{:02x}'.format(n)
+ return ''.join(c if n < 128 else f'\\x{n:02x}'
for n, c in ((ord(c), c) for c in s))
def startCount(c, text): # strspn
"""First index in text where it doesn't have a character in c"""
assert text and text[0] in c
try:
- return (j for j, d in enumerate(text) if d not in c).next()
+ return next((j for j, d in enumerate(text) if d not in c))
except StopIteration:
return len(text)
@@ -118,21 +100,25 @@ def convertFormat(format):
class QLocaleXmlReader (object):
def __init__(self, filename):
self.root = self.__parse(filename)
- # Lists of (id, name, code) triples:
- languages = tuple(self.__loadMap('language'))
- scripts = tuple(self.__loadMap('script'))
- countries = tuple(self.__loadMap('country'))
+
+ from enumdata import language_map, script_map, territory_map
+ # Lists of (id, enum name, code, en.xml name) tuples:
+ languages = tuple(self.__loadMap('language', language_map))
+ scripts = tuple(self.__loadMap('script', script_map))
+ territories = tuple(self.__loadMap('territory', territory_map))
self.__likely = tuple(self.__likelySubtagsMap())
- # Mappings {ID: (name, code)}
- self.languages = dict((v[0], v[1:]) for v in languages)
- self.scripts = dict((v[0], v[1:]) for v in scripts)
- self.countries = dict((v[0], v[1:]) for v in countries)
- # Private mappings {name: (ID, code)}
- self.__langByName = dict((v[1], (v[0], v[2])) for v in languages)
- self.__textByName = dict((v[1], (v[0], v[2])) for v in scripts)
- self.__landByName = dict((v[1], (v[0], v[2])) for v in countries)
+
+ # Mappings {ID: (enum name, code, en.xml name)}
+ self.languages = {v[0]: v[1:] for v in languages}
+ self.scripts = {v[0]: v[1:] for v in scripts}
+ self.territories = {v[0]: v[1:] for v in territories}
+
+ # Private mappings {enum name: (ID, code)}
+ self.__langByName = {v[1]: (v[0], v[2]) for v in languages}
+ self.__textByName = {v[1]: (v[0], v[2]) for v in scripts}
+ self.__landByName = {v[1]: (v[0], v[2]) for v in territories}
# Other properties:
- self.dupes = set(v[1] for v in languages) & set(v[1] for v in countries)
+ self.__dupes = set(v[1] for v in languages) & set(v[1] for v in territories)
self.cldrVersion = self.__firstChildText(self.root, "version")
def loadLocaleMap(self, calendars, grumble = lambda text: None):
@@ -142,18 +128,18 @@ class QLocaleXmlReader (object):
locale = Locale.fromXmlData(lambda k: kid(elt, k), calendars)
language = self.__langByName[locale.language][0]
script = self.__textByName[locale.script][0]
- country = self.__landByName[locale.country][0]
+ territory = self.__landByName[locale.territory][0]
if language != 1: # C
- if country == 0:
- grumble('loadLocaleMap: No country id for "{}"\n'.format(locale.language))
+ if territory == 0:
+ grumble(f'loadLocaleMap: No territory id for "{locale.language}"\n')
if script == 0:
- # Find default script for the given language and country - see:
+ # Find default script for the given language and territory - see:
# http://www.unicode.org/reports/tr35/#Likely_Subtags
try:
try:
- to = likely[(locale.language, 'AnyScript', locale.country)]
+ to = likely[(locale.language, 'AnyScript', locale.territory)]
except KeyError:
to = likely[(locale.language, 'AnyScript', 'AnyTerritory')]
except KeyError:
@@ -162,11 +148,11 @@ class QLocaleXmlReader (object):
locale.script = to[1]
script = self.__textByName[locale.script][0]
- yield (language, script, country), locale
+ yield (language, script, territory), locale
def languageIndices(self, locales):
index = 0
- for key, value in self.languages.iteritems():
+ for key, value in self.languages.items():
i, count = 0, locales.count(key)
if count > 0:
i = index
@@ -190,11 +176,11 @@ class QLocaleXmlReader (object):
'_'.join(tag(give)), ids(give))
def defaultMap(self):
- """Map language and script to their default country by ID.
+ """Map language and script to their default territory by ID.
- Yields ((language, script), country) wherever the likely
+ Yields ((language, script), territory) wherever the likely
sub-tags mapping says language's default locale uses the given
- script and country."""
+ script and territory."""
for have, give in self.__likely:
if have[1:] == ('AnyScript', 'AnyTerritory') and give[2] != 'AnyTerritory':
assert have[0] == give[0], (have, give)
@@ -202,14 +188,41 @@ class QLocaleXmlReader (object):
self.__textByName[give[1]][0]),
self.__landByName[give[2]][0])
+ def enumify(self, name, suffix):
+ """Stick together the parts of an enumdata.py name.
+
+ Names given in enumdata.py include spaces and hyphens that we
+ can't include in an identifier, such as the name of a member
+ of an enum type. Removing those would lose the word
+ boundaries, so make sure each word starts with a capital (but
+ don't simply capitalize() as some names contain words,
+ e.g. McDonald, that have later capitals in them).
+
+ We also need to resolve duplication between languages and
+ territories (by adding a suffix to each) and add Script to the
+ ends of script-names that don't already end in it."""
+ name = name.replace('-', ' ')
+ # Don't .capitalize() as McDonald is already camel-case (see enumdata.py):
+ name = ''.join(word[0].upper() + word[1:] for word in name.split())
+ if suffix != 'Script':
+ assert not(name in self.__dupes and name.endswith(suffix))
+ return name + suffix if name in self.__dupes else name
+
+ if not name.endswith(suffix):
+ name += suffix
+ if name in self.__dupes:
+ raise Error(f'The script name "{name}" is messy')
+ return name
+
# Implementation details:
- def __loadMap(self, category):
+ def __loadMap(self, category, enum):
kid = self.__firstChildText
- for element in self.__eachEltInGroup(self.root, category + 'List', category):
- yield int(kid(element, 'id')), kid(element, 'name'), kid(element, 'code')
+ for element in self.__eachEltInGroup(self.root, f'{category}List', category):
+ key = int(kid(element, 'id'))
+ yield key, enum[key][0], kid(element, 'code'), kid(element, 'name')
def __likelySubtagsMap(self):
- def triplet(element, keys=('language', 'script', 'country'), kid = self.__firstChildText):
+ def triplet(element, keys=('language', 'script', 'territory'), kid = self.__firstChildText):
return tuple(kid(element, key) for key in keys)
kid = self.__firstChildElt
@@ -246,7 +259,7 @@ class QLocaleXmlReader (object):
return child
child = child.nextSibling
- raise Error('No {} child found'.format(name))
+ raise Error(f'No {name} child found')
@classmethod
def __firstChildText(cls, elt, key):
@@ -302,7 +315,7 @@ class Spacer (object):
elif line.startswith('<') and not line.startswith('<!'):
cut = line.find('>')
tag = (line[1:] if cut < 0 else line[1 : cut]).strip().split()[0]
- if '</{}>'.format(tag) not in line:
+ if f'</{tag}>' not in line:
self.current += self.__each
return indent + line + '\n'
@@ -333,10 +346,28 @@ class QLocaleXmlWriter (object):
self.__write('<localeDatabase>')
# Output of various sections, in their usual order:
- def enumData(self, languages, scripts, countries):
- self.__enumTable('language', languages)
- self.__enumTable('script', scripts)
- self.__enumTable('country', countries)
+ def enumData(self, code2name):
+ """Output name/id/code tables for language, script and territory.
+
+ Parameter, code2name, is a function taking 'language',
+ 'script' or 'territory' and returning a lookup function that
+ maps codes, of the relevant type, to their English names. This
+ lookup function is passed a code and the name, both taken from
+ enumdata.py, that QLocale uses, so the .get() of a dict will
+ work. The English name from this lookup will be used by
+ QLocale::*ToString() for the enum member whose name is based
+ on the enumdata.py name passed as fallback to the lookup."""
+ from enumdata import language_map, script_map, territory_map
+ self.__enumTable('language', language_map, code2name)
+ self.__enumTable('script', script_map, code2name)
+ self.__enumTable('territory', territory_map, code2name)
+ # Prepare to detect any unused codes (see __writeLocale(), close()):
+ self.__languages = set(p[1] for p in language_map.values()
+ if not p[1].isspace())
+ self.__scripts = set(p[1] for p in script_map.values()
+ if p[1] != 'Zzzz')
+ self.__territories = set(p[1] for p in territory_map.values()
+ if p[1] != 'ZZ')
def likelySubTags(self, entries):
self.__openTag('likelySubtags')
@@ -350,13 +381,11 @@ class QLocaleXmlWriter (object):
def locales(self, locales, calendars):
self.__openTag('localeList')
self.__openTag('locale')
- Locale.C(calendars).toXml(self.inTag, calendars)
+ self.__writeLocale(Locale.C(calendars), calendars)
self.__closeTag('locale')
- keys = locales.keys()
- keys.sort()
- for key in keys:
+ for key in sorted(locales.keys()):
self.__openTag('locale')
- locales[key].toXml(self.inTag, calendars)
+ self.__writeLocale(locales[key], calendars)
self.__closeTag('locale')
self.__closeTag('localeList')
@@ -364,13 +393,27 @@ class QLocaleXmlWriter (object):
self.inTag('version', cldrVersion)
def inTag(self, tag, text):
- self.__write('<{0}>{1}</{0}>'.format(tag, text))
+ self.__write(f'<{tag}>{text}</{tag}>')
- def close(self):
+ def close(self, grumble):
+ """Finish writing and grumble about any issues discovered."""
if self.__rawOutput != self.__complain:
self.__write('</localeDatabase>')
self.__rawOutput = self.__complain
+ if self.__languages or self.__scripts or self.__territories:
+ grumble('Some enum members are unused, corresponding to these tags:\n')
+ import textwrap
+ def kvetch(kind, seq, g = grumble, w = textwrap.wrap):
+ g('\n\t'.join(w(f' {kind}: {", ".join(sorted(seq))}', width=80)) + '\n')
+ if self.__languages:
+ kvetch('Languages', self.__languages)
+ if self.__scripts:
+ kvetch('Scripts', self.__scripts)
+ if self.__territories:
+ kvetch('Territories', self.__territories)
+ grumble('It may make sense to deprecate them.\n')
+
# Implementation details
@staticmethod
def __printit(text):
@@ -379,28 +422,39 @@ class QLocaleXmlWriter (object):
def __complain(text):
raise Error('Attempted to write data after closing :-(')
- def __enumTable(self, tag, table):
- self.__openTag(tag + 'List')
- for key, value in table.iteritems():
+ @staticmethod
+ def __xmlSafe(text):
+ return text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+
+ def __enumTable(self, tag, table, code2name):
+ self.__openTag(f'{tag}List')
+ enname, safe = code2name(tag), self.__xmlSafe
+ for key, (name, code) in table.items():
self.__openTag(tag)
- self.inTag('name', value[0])
+ self.inTag('name', safe(enname(code, name)))
self.inTag('id', key)
- self.inTag('code', value[1])
+ self.inTag('code', code)
self.__closeTag(tag)
- self.__closeTag(tag + 'List')
+ self.__closeTag(f'{tag}List')
def __likelySubTag(self, tag, likely):
self.__openTag(tag)
self.inTag('language', likely[0])
self.inTag('script', likely[1])
- self.inTag('country', likely[2])
+ self.inTag('territory', likely[2])
# self.inTag('variant', likely[3])
self.__closeTag(tag)
+ def __writeLocale(self, locale, calendars):
+ locale.toXml(self.inTag, calendars)
+ self.__languages.discard(locale.language_code)
+ self.__scripts.discard(locale.script_code)
+ self.__territories.discard(locale.territory_code)
+
def __openTag(self, tag):
- self.__write('<{}>'.format(tag))
+ self.__write(f'<{tag}>')
def __closeTag(self, tag):
- self.__write('</{}>'.format(tag))
+ self.__write(f'</{tag}>')
def __write(self, line):
self.__rawOutput(self.__wrap(line))
@@ -435,7 +489,7 @@ class Locale (object):
# Convert from CLDR format-strings to QDateTimeParser ones:
__asfmt = ("longDateFormat", "shortDateFormat", "longTimeFormat", "shortTimeFormat")
# Just use the raw text:
- __astxt = ("language", "languageEndonym", "script", "country", "countryEndonym",
+ __astxt = ("language", "languageEndonym", "script", "territory", "territoryEndonym",
"decimal", "group", "zero",
"list", "percent", "minus", "plus", "exp",
"quotationStart", "quotationEnd",
@@ -473,7 +527,7 @@ class Locale (object):
data['listDelim' if k == 'list' else k] = lookup(k)
for k in cls.propsMonthDay('months'):
- data[k] = dict((cal, lookup('_'.join((k, cal)))) for cal in calendars)
+ data[k] = {cal: lookup('_'.join((k, cal))) for cal in calendars}
grouping = lookup('groupSizes').split(';')
data.update(groupLeast = int(grouping[0]),
@@ -493,15 +547,15 @@ class Locale (object):
form used by CLDR; its default is ('gregorian',).
"""
get = lambda k: getattr(self, k)
- for key in ('language', 'script', 'country'):
+ for key in ('language', 'script', 'territory'):
write(key, get(key))
- write('{}code'.format(key), get('{}_code'.format(key)))
+ write(f'{key}code', get(f'{key}_code'))
for key in ('decimal', 'group', 'zero', 'list',
'percent', 'minus', 'plus', 'exp'):
write(key, get(key))
- for key in ('languageEndonym', 'countryEndonym',
+ for key in ('languageEndonym', 'territoryEndonym',
'quotationStart', 'quotationEnd',
'alternateQuotationStart', 'alternateQuotationEnd',
'listPatternPartStart', 'listPatternPartMiddle',
@@ -517,7 +571,7 @@ class Locale (object):
'_'.join((k, cal))
for k in self.propsMonthDay('months')
for cal in calendars):
- write(key, escape(get(key)).encode('utf-8'))
+ write(key, escape(get(key)))
write('groupSizes', ';'.join(str(x) for x in get('groupSizes')))
for key in ('currencyDigits', 'currencyRounding'):
@@ -554,9 +608,9 @@ class Locale (object):
(fullName, fullName),
(firstThree, firstThree),
(number, initial)),
- 'islamic': ((u'Muharram', u'Safar', u'Rabiʻ I', u'Rabiʻ II', u'Jumada I',
- u'Jumada II', u'Rajab', u'Shaʻban', u'Ramadan', u'Shawwal',
- u'Dhuʻl-Qiʻdah', u'Dhuʻl-Hijjah'),
+ 'islamic': (('Muharram', 'Safar', 'Rabiʻ I', 'Rabiʻ II', 'Jumada I',
+ 'Jumada II', 'Rajab', 'Shaʻban', 'Ramadan', 'Shawwal',
+ 'Dhuʻl-Qiʻdah', 'Dhuʻl-Hijjah'),
(fullName, fullName),
(islamicShort, islamicShort),
(number, number)),
@@ -565,7 +619,7 @@ class Locale (object):
(fullName, fullName),
(fullName, fullName),
(number, number)),
- },
+ },
sizes=('long', 'short', 'narrow')):
for cal in calendars:
try:
@@ -590,7 +644,7 @@ class Locale (object):
return cls(cls.__monthNames(calendars),
language='C', language_code='0', languageEndonym='',
script='AnyScript', script_code='0',
- country='AnyTerritory', country_code='0', countryEndonym='',
+ territory='AnyTerritory', territory_code='0', territoryEndonym='',
groupSizes=(3, 3, 1),
decimal='.', group=',', list=';', percent='%',
zero='0', minus='-', plus='+', exp='e',