summaryrefslogtreecommitdiffstats
path: root/util/locale_database/ldml.py
diff options
context:
space:
mode:
authorEdward Welbourne <edward.welbourne@qt.io>2020-02-27 10:56:36 +0100
committerEdward Welbourne <eddy@chaos.org.uk>2020-04-02 19:43:13 +0100
commitc834dbc6fb8881f543e2a599afbc23ee1277483d (patch)
treed860c83f0ae1fc5751062f6b94ffa67ab014f261 /util/locale_database/ldml.py
parent9fab53a51317a1692ceb0069f212339bb0dd8780 (diff)
Move cldr2qtimezone.py's CLDR-reading to a CldrAccess class
This begins the process of replacing xpathlite.py, adding low-level DOM-access classes to ldml.py and the CldrAccess class to cldr.py Moved a format comment from cldr2qtimezone.py's doc-string to the method of CldrAccess that does the actual reading. Task-number: QTBUG-81344 Change-Id: I46ae3f402f8207ced6d30a1de5cedaeef47b2bcf Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
Diffstat (limited to 'util/locale_database/ldml.py')
-rw-r--r--util/locale_database/ldml.py140
1 files changed, 140 insertions, 0 deletions
diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py
new file mode 100644
index 0000000000..4aaa728a86
--- /dev/null
+++ b/util/locale_database/ldml.py
@@ -0,0 +1,140 @@
+#############################################################################
+##
+## Copyright (C) 2020 The Qt Company Ltd.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the test suite of the Qt Toolkit.
+##
+## $QT_BEGIN_LICENSE:GPL-EXCEPT$
+## Commercial License Usage
+## Licensees holding valid commercial Qt licenses may use this file in
+## accordance with the commercial license agreement provided with the
+## Software or, alternatively, in accordance with the terms contained in
+## a written agreement between you and The Qt Company. For licensing terms
+## and conditions see https://www.qt.io/terms-conditions. For further
+## information use the contact form at https://www.qt.io/contact-us.
+##
+## GNU General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU
+## General Public License version 3 as published by the Free Software
+## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+## included in the packaging of this file. Please review the following
+## information to ensure the GNU General Public License requirements will
+## be met: https://www.gnu.org/licenses/gpl-3.0.html.
+##
+## $QT_END_LICENSE$
+##
+#############################################################################
+"""Parsing the Locale Data Markup Language
+
+It's an XML format, so the raw parsing of XML is, of course, delegated
+to xml.dom.minidom; but it has its own specific schemata and some
+funky rules for combining data from various files (inheritance between
+locales). The use of it we're interested in is extraction of CLDR's
+data, so some of the material here is specific to CLDR; see cldr.py
+for how it is mainly used.
+
+Provides various classes to wrap xml.dom's objects, specifically those
+returned by minidom.parse() and their child-nodes:
+ Node -- wraps any node in the DOM tree
+ XmlScanner -- wraps the root element of a stand-alone XML file
+ Supplement -- specializes XmlScanner for supplemental data files
+
+See individual classes for further detail.
+"""
+from localetools import Error
+
+class Node (object):
+ """Wrapper for an arbitrary DOM node.
+
+ Provides various ways to select chldren of a node. Selected child
+ nodes are returned wrapped as Node objects. A Node exposes the
+ raw DOM node it wraps via its .dom attribute."""
+
+ def __init__(self, elt):
+ """Wraps a DOM node for ease of access.
+
+ Single argument, elt, is the DOM node to wrap."""
+ self.dom = elt
+
+ def findAllChildren(self, tag, wanted = None):
+ """All children that do have the given tag and attributes.
+
+ First argument is the tag: children with any other tag are
+ ignored.
+
+ Optional second argument, wanted, should either be None or map
+ attribute names to the values they must have. Only child nodes
+ with these attributes set to the given values are yielded."""
+
+ cutoff = 4 # Only accept approved, for now
+ for child in self.dom.childNodes:
+ if child.nodeType != child.ELEMENT_NODE:
+ continue
+ if child.nodeName != tag:
+ continue
+
+ try:
+ draft = child.attributes['draft']
+ except KeyError:
+ pass
+ else:
+ if self.__draftScores.get(draft, 0) < cutoff:
+ continue
+
+ if wanted is not None:
+ try:
+ if wanted and any(child.attributes[k].nodeValue != v for k, v in wanted.items()):
+ continue
+ except KeyError: # Some wanted attribute is missing
+ continue
+
+ yield Node(child)
+
+ __draftScores = dict(true = 0, unconfirmed = 1, provisional = 2,
+ contributed = 3, approved = 4, false = 4)
+
+def _parseXPath(selector):
+ # Split "tag[attr=val][...]" into tag-name and attribute mapping
+ attrs = selector.split('[')
+ name = attrs.pop(0)
+ if attrs:
+ attrs = [x.strip() for x in attrs]
+ assert all(x.endswith(']') for x in attrs)
+ attrs = [x[:-1].split('=') for x in attrs]
+ assert all(len(x) in (1, 2) for x in attrs)
+ attrs = (('type', x[0]) if len(x) == 1 else x for x in attrs)
+ return name, dict(attrs)
+
+def _iterateEach(iters):
+ # Flatten a two-layer iterator.
+ for it in iters:
+ for item in it:
+ yield item
+
+class XmlScanner (object):
+ """Wrap an XML file to enable XPath access to its nodes.
+ """
+ def __init__(self, node):
+ self.root = node
+
+ def findNodes(self, xpath):
+ """Return all nodes under self.root matching this xpath"""
+ elts = (self.root,)
+ for selector in xpath.split('/'):
+ tag, attrs = _parseXPath(selector)
+ elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts))
+ if not elts:
+ break
+ return elts
+
+class Supplement (XmlScanner):
+ # Replaces xpathlite.findTagsInFile()
+ def find(self, xpath):
+ elts = self.findNodes(xpath)
+ for elt in _iterateEach(e.dom.childNodes if e.dom.childNodes else (e.dom,)
+ for e in elts):
+ if elt.attributes:
+ yield (elt.nodeName,
+ dict((k, v if isinstance(v, basestring) else v.nodeValue)
+ for k, v in elt.attributes.items()))