diff options
author | Edward Welbourne <edward.welbourne@qt.io> | 2020-02-27 13:58:58 +0100 |
---|---|---|
committer | Edward Welbourne <eddy@chaos.org.uk> | 2020-04-02 19:43:18 +0100 |
commit | be3dfd7a71a276b10bac50075b26c6af58b9d02b (patch) | |
tree | 58e3059b0d1e3f746ac593addcec3e7ef61a6c48 /util/locale_database/xpathlite.py | |
parent | c834dbc6fb8881f543e2a599afbc23ee1277483d (diff) |
Rework cldr2qlocalexml.py's reading of CLDR data
Move the code out to a CldrReader class in cldr.py, expand CldrAccess
with facilities that needs, expand ldml.py to include support for more
features, finally making xpathlite.py redundant. This initial commit
aims, though, to be bug-for-bug compatible with xpathlite in its
reading of the CLDR data.
It turns out we've been using draftier data than we were aware of
(which might not be a bad thing). The xpathlite code appeared to check
for draft attributes, but these only appear on leaf nodes and most
data were fetched by finding a parent and then scanning its children
without the draft check; only am/pm data was actually being excluded
based on draft values. (We allowed contributed, for am/pm, in
addition to approved, which is all the xpathlite code allows
otherwise.) There are also some less equivocal bugs; I'll deal with
these in later commits.
Simplified number-system data look-ups; the old get_number_in_system()
was taking care of old LDML versions' placement of the number system
attribute; this is no longer needed. (It was also being used for a
currency value to which it was not appropriate, which is now handled
separately; this is one of the bugs mentioned above.) Ditched a
fall-back to nativeZeroDigit, which no longer exists in CLDR.
Change the command-line to take the root of the CLDR data tree, rather
than its common/main/ sub-directory. Support naming the file to which
to write output, as a second command-line argument, instead of always
writing to stdout (which remains the default) and leaving whoever runs
the script to redirect stdout.
Support (internally for now, while adding TODOs to give main() more
command-line options) separating the stderr output into its more and
less interesting parts; for now, continue producing both, but suppress
the least interesting entirely.
Task-number: QTBUG-81344
Change-Id: Ie611b47403a9452b51feaeeaaa0fbc8f7e84dc71
Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
Diffstat (limited to 'util/locale_database/xpathlite.py')
-rw-r--r-- | util/locale_database/xpathlite.py | 284 |
1 files changed, 0 insertions, 284 deletions
diff --git a/util/locale_database/xpathlite.py b/util/locale_database/xpathlite.py deleted file mode 100644 index 3da8b24656..0000000000 --- a/util/locale_database/xpathlite.py +++ /dev/null @@ -1,284 +0,0 @@ -#!/usr/bin/env python -############################################################################# -## -## Copyright (C) 2016 The Qt Company Ltd. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the test suite of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:GPL-EXCEPT$ -## Commercial License Usage -## Licensees holding valid commercial Qt licenses may use this file in -## accordance with the commercial license agreement provided with the -## Software or, alternatively, in accordance with the terms contained in -## a written agreement between you and The Qt Company. For licensing terms -## and conditions see https://www.qt.io/terms-conditions. For further -## information use the contact form at https://www.qt.io/contact-us. -## -## GNU General Public License Usage -## Alternatively, this file may be used under the terms of the GNU -## General Public License version 3 as published by the Free Software -## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -## included in the packaging of this file. Please review the following -## information to ensure the GNU General Public License requirements will -## be met: https://www.gnu.org/licenses/gpl-3.0.html. -## -## $QT_END_LICENSE$ -## -############################################################################# - -import sys -import os -import xml.dom.minidom - -from localetools import Error - -class DraftResolution: - # See http://www.unicode.org/cldr/process.html for description - unconfirmed = 'unconfirmed' - provisional = 'provisional' - contributed = 'contributed' - approved = 'approved' - _values = { unconfirmed : 1, provisional : 2, contributed : 3, approved : 4 } - def __init__(self, resolution): - self.resolution = resolution - def toInt(self): - return DraftResolution._values[self.resolution] - -doc_cache = {} -def parseDoc(file): - if not doc_cache.has_key(file): - doc_cache[file] = xml.dom.minidom.parse(file) - return doc_cache[file] - -def findChild(parent, tag_name, arg_name=None, arg_value=None, draft=None): - for node in parent.childNodes: - if node.nodeType != node.ELEMENT_NODE: - continue - if node.nodeName != tag_name: - continue - if arg_value: - if not node.attributes.has_key(arg_name): - continue - if node.attributes[arg_name].nodeValue != arg_value: - continue - if draft: - if not node.attributes.has_key('draft'): - # if draft is not specified then it's approved - return node - value = node.attributes['draft'].nodeValue - value = DraftResolution(value).toInt() - exemplar = DraftResolution(draft).toInt() - if exemplar > value: - continue - return node - return False - -def codeMapsFromFile(file): - """Extract mappings of language, script and country codes to names. - - The file shall typically be common/main/en.xml, which contains a - localeDisplayNames element with children languages, scripts and - territories; each element in each of these has a code as its type - attribute and its name as element content. This returns a mapping - withe keys 'language', 'script' and 'country', each of which - has, as value, a mapping of the relevant codes to names. - """ - parent = findChild(findChild(parseDoc(file), 'ldml'), 'localeDisplayNames') - keys, result = {'languages': 'language', 'scripts': 'script', 'territories': 'country'}, {} - for src, dst in keys.items(): - child = findChild(parent, src) - data = result[dst] = {} - for elt in child.childNodes: - if elt.attributes and elt.attributes.has_key('type'): - key, value = elt.attributes['type'].value, elt.childNodes[0].wholeText - # Don't over-write previously-read data for an alt form: - if elt.attributes.has_key('alt') and data.has_key(key): - continue - data[key] = value - - return result - -def findTagsInFile(file, path): - doc = parseDoc(file) - - elt = doc.documentElement - tag_spec_list = path.split("/") - last_entry = None - for tag_spec in tag_spec_list: - tag_name = tag_spec - arg_name = 'type' - arg_value = '' - left_bracket = tag_spec.find('[') - if left_bracket != -1: - tag_name = tag_spec[:left_bracket] - arg_value = tag_spec[left_bracket+1:-1].split("=") - if len(arg_value) == 2: - arg_name = arg_value[0] - arg_value = arg_value[1] - else: - arg_value = arg_value[0] - elt = findChild(elt, tag_name, arg_name, arg_value) - if not elt: - return None - ret = [] - if elt.childNodes: - for node in elt.childNodes: - if node.attributes: - element = [node.nodeName, None] - element[1] = node.attributes.items() - ret.append(element) - else: - if elt.attributes: - element = [elt.nodeName, None] - element[1] = elt.attributes.items() - ret.append(element) - return ret - -def _findEntryInFile(file, path, draft=None, attribute=None): - doc = parseDoc(file) - - elt = doc.documentElement - tag_spec_list = path.split("/") - last_entry = None - for i in range(len(tag_spec_list)): - tag_spec = tag_spec_list[i] - tag_name = tag_spec - arg_name = 'type' - arg_value = '' - left_bracket = tag_spec.find('[') - if left_bracket != -1: - tag_name = tag_spec[:left_bracket] - arg_value = tag_spec[left_bracket+1:-1].split("=") - if len(arg_value) == 2: - arg_name = arg_value[0].replace("@", "").replace("'", "") - arg_value = arg_value[1] - else: - arg_value = arg_value[0] - alias = findChild(elt, 'alias') - if alias and alias.attributes['source'].nodeValue == 'locale': - path = alias.attributes['path'].nodeValue - aliaspath = tag_spec_list[:i] + path.split("/") - def resolve(x, y): - if y == '..': - return x[:-1] - return x + [y] - # resolve all dot-dot parts of the path - aliaspath = reduce(resolve, aliaspath, []) - # remove attribute specification that our xpathlite doesnt support - aliaspath = map(lambda x: x.replace("@type=", "").replace("'", ""), aliaspath) - # append the remaining path - aliaspath = aliaspath + tag_spec_list[i:] - aliaspath = "/".join(aliaspath) - # "locale" aliases are special - we need to start lookup from scratch - return (None, aliaspath) - elt = findChild(elt, tag_name, arg_name, arg_value, draft) - if not elt: - return ("", None) - if attribute is not None: - if elt.attributes.has_key(attribute): - return (elt.attributes[attribute].nodeValue, None) - return (None, None) - try: - return (elt.firstChild.nodeValue, None) - except: - pass - return (None, None) - -def findAlias(file): - doc = parseDoc(file) - - alias_elt = findChild(doc.documentElement, "alias") - if not alias_elt: - return False - if not alias_elt.attributes.has_key('source'): - return False - return alias_elt.attributes['source'].nodeValue - -lookup_chain_cache = {} -parent_locales = {} -def _fixedLookupChain(dirname, name): - if lookup_chain_cache.has_key(name): - return lookup_chain_cache[name] - - # see http://www.unicode.org/reports/tr35/#Parent_Locales - if not parent_locales: - for ns in findTagsInFile(dirname + "/../supplemental/supplementalData.xml", "parentLocales"): - tmp = {} - parent_locale = "" - for data in ns[1:][0]: # ns looks like this: [u'parentLocale', [(u'parent', u'root'), (u'locales', u'az_Cyrl bs_Cyrl en_Dsrt ..')]] - tmp[data[0]] = data[1] - if data[0] == u"parent": - parent_locale = data[1] - parent_locales[parent_locale] = tmp[u"locales"].split(" ") - - items = name.split("_") - # split locale name into items and iterate through them from back to front - # example: az_Latn_AZ => [az_Latn_AZ, az_Latn, az] - items = list(reversed(map(lambda x: "_".join(items[:x+1]), range(len(items))))) - - for i in range(len(items)): - item = items[i] - for parent_locale in parent_locales.keys(): - for locale in parent_locales[parent_locale]: - if item == locale: - if parent_locale == u"root": - items = items[:i+1] - else: - items = items[:i+1] + _fixedLookupChain(dirname, parent_locale) - lookup_chain_cache[name] = items - return items - - lookup_chain_cache[name] = items - return items - -def _findEntry(base, path, draft=None, attribute=None): - if base.endswith(".xml"): - base = base[:-4] - (dirname, filename) = os.path.split(base) - - items = _fixedLookupChain(dirname, filename) - for item in items: - file = dirname + "/" + item + ".xml" - if os.path.isfile(file): - alias = findAlias(file) - if alias: - # if alias is found we should follow it and stop processing current file - # see http://www.unicode.org/reports/tr35/#Common_Elements - aliasfile = os.path.dirname(file) + "/" + alias + ".xml" - if not os.path.isfile(aliasfile): - raise Error("findEntry: fatal error: found an alias '%s' to '%s', but the alias file couldn't be found" % (filename, alias)) - # found an alias, recurse into parsing it - result = _findEntry(aliasfile, path, draft, attribute) - return result - (result, aliaspath) = _findEntryInFile(file, path, draft, attribute) - if aliaspath: - # start lookup again because of the alias source="locale" - return _findEntry(base, aliaspath, draft, attribute) - if result: - return result - return None - -def findEntry(base, path, draft=None, attribute=None): - file = base - if base.endswith(".xml"): - file = base - base = base[:-4] - else: - file = base + ".xml" - (dirname, filename) = os.path.split(base) - - result = None - while path: - result = _findEntry(base, path, draft, attribute) - if result: - return result - (result, aliaspath) = _findEntryInFile(dirname + "/root.xml", path, draft, attribute) - if result: - return result - if not aliaspath: - raise Error("findEntry: fatal error: %s: cannot find key %s" % (filename, path)) - path = aliaspath - - return result - |