diff options
Diffstat (limited to 'util/locale_database/ldml.py')
-rw-r--r-- | util/locale_database/ldml.py | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py new file mode 100644 index 0000000000..4aaa728a86 --- /dev/null +++ b/util/locale_database/ldml.py @@ -0,0 +1,140 @@ +############################################################################# +## +## Copyright (C) 2020 The Qt Company Ltd. +## Contact: https://www.qt.io/licensing/ +## +## This file is part of the test suite of the Qt Toolkit. +## +## $QT_BEGIN_LICENSE:GPL-EXCEPT$ +## Commercial License Usage +## Licensees holding valid commercial Qt licenses may use this file in +## accordance with the commercial license agreement provided with the +## Software or, alternatively, in accordance with the terms contained in +## a written agreement between you and The Qt Company. For licensing terms +## and conditions see https://www.qt.io/terms-conditions. For further +## information use the contact form at https://www.qt.io/contact-us. +## +## GNU General Public License Usage +## Alternatively, this file may be used under the terms of the GNU +## General Public License version 3 as published by the Free Software +## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +## included in the packaging of this file. Please review the following +## information to ensure the GNU General Public License requirements will +## be met: https://www.gnu.org/licenses/gpl-3.0.html. +## +## $QT_END_LICENSE$ +## +############################################################################# +"""Parsing the Locale Data Markup Language + +It's an XML format, so the raw parsing of XML is, of course, delegated +to xml.dom.minidom; but it has its own specific schemata and some +funky rules for combining data from various files (inheritance between +locales). The use of it we're interested in is extraction of CLDR's +data, so some of the material here is specific to CLDR; see cldr.py +for how it is mainly used. + +Provides various classes to wrap xml.dom's objects, specifically those +returned by minidom.parse() and their child-nodes: + Node -- wraps any node in the DOM tree + XmlScanner -- wraps the root element of a stand-alone XML file + Supplement -- specializes XmlScanner for supplemental data files + +See individual classes for further detail. +""" +from localetools import Error + +class Node (object): + """Wrapper for an arbitrary DOM node. + + Provides various ways to select chldren of a node. Selected child + nodes are returned wrapped as Node objects. A Node exposes the + raw DOM node it wraps via its .dom attribute.""" + + def __init__(self, elt): + """Wraps a DOM node for ease of access. + + Single argument, elt, is the DOM node to wrap.""" + self.dom = elt + + def findAllChildren(self, tag, wanted = None): + """All children that do have the given tag and attributes. + + First argument is the tag: children with any other tag are + ignored. + + Optional second argument, wanted, should either be None or map + attribute names to the values they must have. Only child nodes + with these attributes set to the given values are yielded.""" + + cutoff = 4 # Only accept approved, for now + for child in self.dom.childNodes: + if child.nodeType != child.ELEMENT_NODE: + continue + if child.nodeName != tag: + continue + + try: + draft = child.attributes['draft'] + except KeyError: + pass + else: + if self.__draftScores.get(draft, 0) < cutoff: + continue + + if wanted is not None: + try: + if wanted and any(child.attributes[k].nodeValue != v for k, v in wanted.items()): + continue + except KeyError: # Some wanted attribute is missing + continue + + yield Node(child) + + __draftScores = dict(true = 0, unconfirmed = 1, provisional = 2, + contributed = 3, approved = 4, false = 4) + +def _parseXPath(selector): + # Split "tag[attr=val][...]" into tag-name and attribute mapping + attrs = selector.split('[') + name = attrs.pop(0) + if attrs: + attrs = [x.strip() for x in attrs] + assert all(x.endswith(']') for x in attrs) + attrs = [x[:-1].split('=') for x in attrs] + assert all(len(x) in (1, 2) for x in attrs) + attrs = (('type', x[0]) if len(x) == 1 else x for x in attrs) + return name, dict(attrs) + +def _iterateEach(iters): + # Flatten a two-layer iterator. + for it in iters: + for item in it: + yield item + +class XmlScanner (object): + """Wrap an XML file to enable XPath access to its nodes. + """ + def __init__(self, node): + self.root = node + + def findNodes(self, xpath): + """Return all nodes under self.root matching this xpath""" + elts = (self.root,) + for selector in xpath.split('/'): + tag, attrs = _parseXPath(selector) + elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts)) + if not elts: + break + return elts + +class Supplement (XmlScanner): + # Replaces xpathlite.findTagsInFile() + def find(self, xpath): + elts = self.findNodes(xpath) + for elt in _iterateEach(e.dom.childNodes if e.dom.childNodes else (e.dom,) + for e in elts): + if elt.attributes: + yield (elt.nodeName, + dict((k, v if isinstance(v, basestring) else v.nodeValue) + for k, v in elt.attributes.items())) |