summaryrefslogtreecommitdiffstats
path: root/util/locale_database/xpathlite.py
diff options
context:
space:
mode:
authorEdward Welbourne <edward.welbourne@qt.io>2020-02-27 13:58:58 +0100
committerEdward Welbourne <eddy@chaos.org.uk>2020-04-02 19:43:18 +0100
commitbe3dfd7a71a276b10bac50075b26c6af58b9d02b (patch)
tree58e3059b0d1e3f746ac593addcec3e7ef61a6c48 /util/locale_database/xpathlite.py
parentc834dbc6fb8881f543e2a599afbc23ee1277483d (diff)
Rework cldr2qlocalexml.py's reading of CLDR data
Move the code out to a CldrReader class in cldr.py, expand CldrAccess with facilities that needs, expand ldml.py to include support for more features, finally making xpathlite.py redundant. This initial commit aims, though, to be bug-for-bug compatible with xpathlite in its reading of the CLDR data. It turns out we've been using draftier data than we were aware of (which might not be a bad thing). The xpathlite code appeared to check for draft attributes, but these only appear on leaf nodes and most data were fetched by finding a parent and then scanning its children without the draft check; only am/pm data was actually being excluded based on draft values. (We allowed contributed, for am/pm, in addition to approved, which is all the xpathlite code allows otherwise.) There are also some less equivocal bugs; I'll deal with these in later commits. Simplified number-system data look-ups; the old get_number_in_system() was taking care of old LDML versions' placement of the number system attribute; this is no longer needed. (It was also being used for a currency value to which it was not appropriate, which is now handled separately; this is one of the bugs mentioned above.) Ditched a fall-back to nativeZeroDigit, which no longer exists in CLDR. Change the command-line to take the root of the CLDR data tree, rather than its common/main/ sub-directory. Support naming the file to which to write output, as a second command-line argument, instead of always writing to stdout (which remains the default) and leaving whoever runs the script to redirect stdout. Support (internally for now, while adding TODOs to give main() more command-line options) separating the stderr output into its more and less interesting parts; for now, continue producing both, but suppress the least interesting entirely. Task-number: QTBUG-81344 Change-Id: Ie611b47403a9452b51feaeeaaa0fbc8f7e84dc71 Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
Diffstat (limited to 'util/locale_database/xpathlite.py')
-rw-r--r--util/locale_database/xpathlite.py284
1 files changed, 0 insertions, 284 deletions
diff --git a/util/locale_database/xpathlite.py b/util/locale_database/xpathlite.py
deleted file mode 100644
index 3da8b24656..0000000000
--- a/util/locale_database/xpathlite.py
+++ /dev/null
@@ -1,284 +0,0 @@
-#!/usr/bin/env python
-#############################################################################
-##
-## Copyright (C) 2016 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
-
-import sys
-import os
-import xml.dom.minidom
-
-from localetools import Error
-
-class DraftResolution:
- # See http://www.unicode.org/cldr/process.html for description
- unconfirmed = 'unconfirmed'
- provisional = 'provisional'
- contributed = 'contributed'
- approved = 'approved'
- _values = { unconfirmed : 1, provisional : 2, contributed : 3, approved : 4 }
- def __init__(self, resolution):
- self.resolution = resolution
- def toInt(self):
- return DraftResolution._values[self.resolution]
-
-doc_cache = {}
-def parseDoc(file):
- if not doc_cache.has_key(file):
- doc_cache[file] = xml.dom.minidom.parse(file)
- return doc_cache[file]
-
-def findChild(parent, tag_name, arg_name=None, arg_value=None, draft=None):
- for node in parent.childNodes:
- if node.nodeType != node.ELEMENT_NODE:
- continue
- if node.nodeName != tag_name:
- continue
- if arg_value:
- if not node.attributes.has_key(arg_name):
- continue
- if node.attributes[arg_name].nodeValue != arg_value:
- continue
- if draft:
- if not node.attributes.has_key('draft'):
- # if draft is not specified then it's approved
- return node
- value = node.attributes['draft'].nodeValue
- value = DraftResolution(value).toInt()
- exemplar = DraftResolution(draft).toInt()
- if exemplar > value:
- continue
- return node
- return False
-
-def codeMapsFromFile(file):
- """Extract mappings of language, script and country codes to names.
-
- The file shall typically be common/main/en.xml, which contains a
- localeDisplayNames element with children languages, scripts and
- territories; each element in each of these has a code as its type
- attribute and its name as element content. This returns a mapping
- withe keys 'language', 'script' and 'country', each of which
- has, as value, a mapping of the relevant codes to names.
- """
- parent = findChild(findChild(parseDoc(file), 'ldml'), 'localeDisplayNames')
- keys, result = {'languages': 'language', 'scripts': 'script', 'territories': 'country'}, {}
- for src, dst in keys.items():
- child = findChild(parent, src)
- data = result[dst] = {}
- for elt in child.childNodes:
- if elt.attributes and elt.attributes.has_key('type'):
- key, value = elt.attributes['type'].value, elt.childNodes[0].wholeText
- # Don't over-write previously-read data for an alt form:
- if elt.attributes.has_key('alt') and data.has_key(key):
- continue
- data[key] = value
-
- return result
-
-def findTagsInFile(file, path):
- doc = parseDoc(file)
-
- elt = doc.documentElement
- tag_spec_list = path.split("/")
- last_entry = None
- for tag_spec in tag_spec_list:
- tag_name = tag_spec
- arg_name = 'type'
- arg_value = ''
- left_bracket = tag_spec.find('[')
- if left_bracket != -1:
- tag_name = tag_spec[:left_bracket]
- arg_value = tag_spec[left_bracket+1:-1].split("=")
- if len(arg_value) == 2:
- arg_name = arg_value[0]
- arg_value = arg_value[1]
- else:
- arg_value = arg_value[0]
- elt = findChild(elt, tag_name, arg_name, arg_value)
- if not elt:
- return None
- ret = []
- if elt.childNodes:
- for node in elt.childNodes:
- if node.attributes:
- element = [node.nodeName, None]
- element[1] = node.attributes.items()
- ret.append(element)
- else:
- if elt.attributes:
- element = [elt.nodeName, None]
- element[1] = elt.attributes.items()
- ret.append(element)
- return ret
-
-def _findEntryInFile(file, path, draft=None, attribute=None):
- doc = parseDoc(file)
-
- elt = doc.documentElement
- tag_spec_list = path.split("/")
- last_entry = None
- for i in range(len(tag_spec_list)):
- tag_spec = tag_spec_list[i]
- tag_name = tag_spec
- arg_name = 'type'
- arg_value = ''
- left_bracket = tag_spec.find('[')
- if left_bracket != -1:
- tag_name = tag_spec[:left_bracket]
- arg_value = tag_spec[left_bracket+1:-1].split("=")
- if len(arg_value) == 2:
- arg_name = arg_value[0].replace("@", "").replace("'", "")
- arg_value = arg_value[1]
- else:
- arg_value = arg_value[0]
- alias = findChild(elt, 'alias')
- if alias and alias.attributes['source'].nodeValue == 'locale':
- path = alias.attributes['path'].nodeValue
- aliaspath = tag_spec_list[:i] + path.split("/")
- def resolve(x, y):
- if y == '..':
- return x[:-1]
- return x + [y]
- # resolve all dot-dot parts of the path
- aliaspath = reduce(resolve, aliaspath, [])
- # remove attribute specification that our xpathlite doesnt support
- aliaspath = map(lambda x: x.replace("@type=", "").replace("'", ""), aliaspath)
- # append the remaining path
- aliaspath = aliaspath + tag_spec_list[i:]
- aliaspath = "/".join(aliaspath)
- # "locale" aliases are special - we need to start lookup from scratch
- return (None, aliaspath)
- elt = findChild(elt, tag_name, arg_name, arg_value, draft)
- if not elt:
- return ("", None)
- if attribute is not None:
- if elt.attributes.has_key(attribute):
- return (elt.attributes[attribute].nodeValue, None)
- return (None, None)
- try:
- return (elt.firstChild.nodeValue, None)
- except:
- pass
- return (None, None)
-
-def findAlias(file):
- doc = parseDoc(file)
-
- alias_elt = findChild(doc.documentElement, "alias")
- if not alias_elt:
- return False
- if not alias_elt.attributes.has_key('source'):
- return False
- return alias_elt.attributes['source'].nodeValue
-
-lookup_chain_cache = {}
-parent_locales = {}
-def _fixedLookupChain(dirname, name):
- if lookup_chain_cache.has_key(name):
- return lookup_chain_cache[name]
-
- # see http://www.unicode.org/reports/tr35/#Parent_Locales
- if not parent_locales:
- for ns in findTagsInFile(dirname + "/../supplemental/supplementalData.xml", "parentLocales"):
- tmp = {}
- parent_locale = ""
- for data in ns[1:][0]: # ns looks like this: [u'parentLocale', [(u'parent', u'root'), (u'locales', u'az_Cyrl bs_Cyrl en_Dsrt ..')]]
- tmp[data[0]] = data[1]
- if data[0] == u"parent":
- parent_locale = data[1]
- parent_locales[parent_locale] = tmp[u"locales"].split(" ")
-
- items = name.split("_")
- # split locale name into items and iterate through them from back to front
- # example: az_Latn_AZ => [az_Latn_AZ, az_Latn, az]
- items = list(reversed(map(lambda x: "_".join(items[:x+1]), range(len(items)))))
-
- for i in range(len(items)):
- item = items[i]
- for parent_locale in parent_locales.keys():
- for locale in parent_locales[parent_locale]:
- if item == locale:
- if parent_locale == u"root":
- items = items[:i+1]
- else:
- items = items[:i+1] + _fixedLookupChain(dirname, parent_locale)
- lookup_chain_cache[name] = items
- return items
-
- lookup_chain_cache[name] = items
- return items
-
-def _findEntry(base, path, draft=None, attribute=None):
- if base.endswith(".xml"):
- base = base[:-4]
- (dirname, filename) = os.path.split(base)
-
- items = _fixedLookupChain(dirname, filename)
- for item in items:
- file = dirname + "/" + item + ".xml"
- if os.path.isfile(file):
- alias = findAlias(file)
- if alias:
- # if alias is found we should follow it and stop processing current file
- # see http://www.unicode.org/reports/tr35/#Common_Elements
- aliasfile = os.path.dirname(file) + "/" + alias + ".xml"
- if not os.path.isfile(aliasfile):
- raise Error("findEntry: fatal error: found an alias '%s' to '%s', but the alias file couldn't be found" % (filename, alias))
- # found an alias, recurse into parsing it
- result = _findEntry(aliasfile, path, draft, attribute)
- return result
- (result, aliaspath) = _findEntryInFile(file, path, draft, attribute)
- if aliaspath:
- # start lookup again because of the alias source="locale"
- return _findEntry(base, aliaspath, draft, attribute)
- if result:
- return result
- return None
-
-def findEntry(base, path, draft=None, attribute=None):
- file = base
- if base.endswith(".xml"):
- file = base
- base = base[:-4]
- else:
- file = base + ".xml"
- (dirname, filename) = os.path.split(base)
-
- result = None
- while path:
- result = _findEntry(base, path, draft, attribute)
- if result:
- return result
- (result, aliaspath) = _findEntryInFile(dirname + "/root.xml", path, draft, attribute)
- if result:
- return result
- if not aliaspath:
- raise Error("findEntry: fatal error: %s: cannot find key %s" % (filename, path))
- path = aliaspath
-
- return result
-