util/locale_database/ldml.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140

#############################################################################
##
## Copyright (C) 2020 The Qt Company Ltd.
## Contact: https://www.qt.io/licensing/
##
## This file is part of the test suite of the Qt Toolkit.
##
## $QT_BEGIN_LICENSE:GPL-EXCEPT$
## Commercial License Usage
## Licensees holding valid commercial Qt licenses may use this file in
## accordance with the commercial license agreement provided with the
## Software or, alternatively, in accordance with the terms contained in
## a written agreement between you and The Qt Company. For licensing terms
## and conditions see https://www.qt.io/terms-conditions. For further
## information use the contact form at https://www.qt.io/contact-us.
##
## GNU General Public License Usage
## Alternatively, this file may be used under the terms of the GNU
## General Public License version 3 as published by the Free Software
## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
## included in the packaging of this file. Please review the following
## information to ensure the GNU General Public License requirements will
## be met: https://www.gnu.org/licenses/gpl-3.0.html.
##
## $QT_END_LICENSE$
##
#############################################################################
"""Parsing the Locale Data Markup Language

It's an XML format, so the raw parsing of XML is, of course, delegated
to xml.dom.minidom; but it has its own specific schemata and some
funky rules for combining data from various files (inheritance between
locales). The use of it we're interested in is extraction of CLDR's
data, so some of the material here is specific to CLDR; see cldr.py
for how it is mainly used.

Provides various classes to wrap xml.dom's objects, specifically those
returned by minidom.parse() and their child-nodes:
  Node -- wraps any node in the DOM tree
  XmlScanner -- wraps the root element of a stand-alone XML file
  Supplement -- specializes XmlScanner for supplemental data files

See individual classes for further detail.
"""
from localetools import Error

class Node (object):
    """Wrapper for an arbitrary DOM node.

    Provides various ways to select chldren of a node. Selected child
    nodes are returned wrapped as Node objects.  A Node exposes the
    raw DOM node it wraps via its .dom attribute."""

    def __init__(self, elt):
        """Wraps a DOM node for ease of access.

        Single argument, elt, is the DOM node to wrap."""
        self.dom = elt

    def findAllChildren(self, tag, wanted = None):
        """All children that do have the given tag and attributes.

        First argument is the tag: children with any other tag are
        ignored.

        Optional second argument, wanted, should either be None or map
        attribute names to the values they must have. Only child nodes
        with these attributes set to the given values are yielded."""

        cutoff = 4 # Only accept approved, for now
        for child in self.dom.childNodes:
            if child.nodeType != child.ELEMENT_NODE:
                continue
            if child.nodeName != tag:
                continue

            try:
                draft = child.attributes['draft']
            except KeyError:
                pass
            else:
                if self.__draftScores.get(draft, 0) < cutoff:
                    continue

            if wanted is not None:
                try:
                    if wanted and any(child.attributes[k].nodeValue != v for k, v in wanted.items()):
                        continue
                except KeyError: # Some wanted attribute is missing
                    continue

            yield Node(child)

    __draftScores = dict(true = 0, unconfirmed = 1, provisional = 2,
                         contributed = 3, approved = 4, false = 4)

def _parseXPath(selector):
    # Split "tag[attr=val][...]" into tag-name and attribute mapping
    attrs = selector.split('[')
    name = attrs.pop(0)
    if attrs:
        attrs = [x.strip() for x in attrs]
        assert all(x.endswith(']') for x in attrs)
        attrs = [x[:-1].split('=') for x in attrs]
        assert all(len(x) in (1, 2) for x in attrs)
        attrs = (('type', x[0]) if len(x) == 1 else x for x in attrs)
    return name, dict(attrs)

def _iterateEach(iters):
    # Flatten a two-layer iterator.
    for it in iters:
        for item in it:
            yield item

class XmlScanner (object):
    """Wrap an XML file to enable XPath access to its nodes.
    """
    def __init__(self, node):
        self.root = node

    def findNodes(self, xpath):
        """Return all nodes under self.root matching this xpath"""
        elts = (self.root,)
        for selector in xpath.split('/'):
            tag, attrs = _parseXPath(selector)
            elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts))
            if not elts:
                break
        return elts

class Supplement (XmlScanner):
    # Replaces xpathlite.findTagsInFile()
    def find(self, xpath):
        elts = self.findNodes(xpath)
        for elt in _iterateEach(e.dom.childNodes if e.dom.childNodes else (e.dom,)
                                for e in elts):
            if elt.attributes:
                yield (elt.nodeName,
                       dict((k, v if isinstance(v, basestring) else v.nodeValue)
                            for k, v in elt.attributes.items()))