2 files changed, 97 insertions, 21 deletions
diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py
index 94459b9e3f..0cc2560632 100644
--- a/util/locale_database/cldr.py
+++ b/util/locale_database/cldr.py
@@ -439,7 +439,7 @@ class CldrAccess (object):
     @property
     def cldrVersion(self):
         # Evaluate so as to ensure __cldrVersion is set:
-        self.__scanLdmlDtd()
+        self.__unDistinguishedAttributes
         return self.__cldrVersion
 
     # Implementation details
@@ -545,17 +545,68 @@ class CldrAccess (object):
 
         return cache
 
+    @property
+    def __unDistinguishedAttributes(self, cache = {}, joinPath = os.path.join):
+        """Mapping from tag names to lists of attributes.
+
+        LDML defines some attributes as 'distinguishing': if a node
+        has distinguishing attributes that weren't specified in an
+        XPath, a search on that XPath should exclude the node's
+        children.
+
+        This property is a mapping from tag names to tuples of
+        attribute names that *aren't* distinguishing for that tag.
+        Its value is cached (so its costly computation isonly done
+        once) and there's a side-effect of populating its cache: it
+        sets self.__cldrVersion to the value found in ldml.dtd, during
+        parsing."""
+        if not cache:
+            cache.update(self.__scanLdmlDtd())
+            assert cache
+
+        return cache
+
     def __scanLdmlDtd(self, joinPath = os.path.join):
-        """Scan the LDML DTD, record CLDR version."""
+        """Scan the LDML DTD, record CLDR version
+
+        Yields (tag, attrs) pairs: on elements with a given tag,
+        attributes named in its attrs (a tuple) may be ignored in an
+        XPath search; other attributes are distinguished attributes,
+        in the terminology of LDML's locale-inheritance rules.
+
+        Sets self.__cldrVersion as a side-effect, since this
+        information is found in the same file."""
         with self.__open(('common', 'dtd', 'ldml.dtd')) as dtd:
+            tag, ignored, last = None, None, None
+
             for line in dtd:
+                if line.startswith('<!ELEMENT '):
+                    if ignored:
+                        assert tag
+                        yield tag, tuple(ignored)
+                    tag, ignored, last = line.split()[1], [], None
+                    continue
+
                 if line.startswith('<!ATTLIST '):
+                    assert tag is not None
                     parts = line.split()
+                    assert parts[1] == tag
+                    last = parts[2]
                     if parts[1:5] == ['version', 'cldrVersion', 'CDATA', '#FIXED']:
-                        # parts[5] is the version, in quotes, maybe
-                        # with a final > attached to its end:
+                        # parts[5] is the version, in quotes, although the final > might be stuck on its end:
                         self.__cldrVersion = parts[5].split('"')[1]
-                        break
+                    continue
+
+                # <!ELEMENT...>s can also be @METADATA, but not @VALUE:
+                if '<!--@VALUE-->' in line or (last and '<!--@METADATA-->' in line):
+                    assert last is not None
+                    assert ignored is not None
+                    assert tag is not None
+                    ignored.append(last)
+                    last = None # No attribute is both value and metadata
+
+            if tag and ignored:
+                yield tag, tuple(ignored)
 
     def __enumMap(self, key, cache = {}):
         if not cache:
@@ -650,7 +701,7 @@ class CldrAccess (object):
         while name and name != 'root':
             doc = self.__localeAsDoc(name)
             if doc is not None:
-                yield Node(doc)
+                yield Node(doc, self.__unDistinguishedAttributes)
 
             try:
                 name = self.__parentLocale(name)
diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py
index a4a8448a43..940264674b 100644
--- a/util/locale_database/ldml.py
+++ b/util/locale_database/ldml.py
@@ -53,14 +53,21 @@ class Node (object):
     nodes are returned wrapped as Node objects.  A Node exposes the
     raw DOM node it wraps via its .dom attribute."""
 
-    def __init__(self, elt, draft = 0):
+    def __init__(self, elt, dullAttrs = None, draft = 0):
         """Wraps a DOM node for ease of access.
 
-        First argument, elt, is the DOM node to wrap. (Optional second
-        argument, draft, should only be supplied by this class's
-        creation of child nodes; it is the maximum draft score of any
-        ancestor of the new node.)"""
-        self.dom = elt
+        First argument, elt, is the DOM node to wrap.
+
+        Optional second argument, dullAttrs, should either be None or
+        map each LDML tag name to a list of the names of
+        non-distinguishing attributes for nodes with the given tag
+        name. If None is given, no distinguishing attribute checks are
+        performed.
+
+        (Optional third argument, draft, should only be supplied by
+        this class's creation of child nodes; it is the maximum draft
+        score of any ancestor of the new node.)"""
+        self.dom, self.__dull = elt, dullAttrs
         try:
             attr = elt.attributes['draft'].nodeValue
         except KeyError:
@@ -68,7 +75,7 @@ class Node (object):
         else:
             self.draft = max(draft, self.draftScore(attr))
 
-    def findAllChildren(self, tag, wanted = None):
+    def findAllChildren(self, tag, wanted = None, allDull = False):
         """All children that do have the given tag and attributes.
 
         First argument is the tag: children with any other tag are
@@ -76,7 +83,15 @@ class Node (object):
 
         Optional second argument, wanted, should either be None or map
         attribute names to the values they must have. Only child nodes
-        with thes attributes set to the given values are yielded."""
+        with thes attributes set to the given values are yielded.
+
+        By default, nodes that have distinguishing attributes, other
+        than those specified in wanted, are ignored.  Pass the allDull
+        parameter a true value to suppress this check."""
+
+        if self.__dull is None:
+            allDull = True
+        dull = () if allDull else self.__dull[tag]
 
         for child in self.dom.childNodes:
             if child.nodeType != child.ELEMENT_NODE:
@@ -92,7 +107,15 @@ class Node (object):
                 except KeyError: # Some wanted attribute is missing
                     continue
 
-            yield Node(child, self.draft)
+                if not (allDull or all(k in dull or k in wanted
+                                       for k in child.attributes.keys())):
+                    continue
+
+            elif not (allDull or all(k in dull
+                                     for k in child.attributes.keys())):
+                continue
+
+            yield Node(child, self.__dull, self.draft)
 
     def findUniqueChild(self, tag):
         """Returns the single child with the given nodeName.
@@ -156,7 +179,9 @@ class XmlScanner (object):
         self.root = node
 
     def findNodes(self, xpath):
-        """Return all nodes under self.root matching this xpath"""
+        """Return all nodes under self.root matching this xpath.
+
+        Ignores any excess attributes."""
         elts = (self.root,)
         for selector in xpath.split('/'):
             tag, attrs = _parseXPath(selector)
@@ -202,7 +227,7 @@ class LocaleScanner (object):
             elt = self.base.root
             for i, selector in enumerate(tags):
                 tag, attrs = _parseXPath(selector)
-                for alias in elt.findAllChildren('alias'):
+                for alias in elt.findAllChildren('alias', allDull = True):
                     if alias.dom.attributes['source'].nodeValue == 'locale':
                         replace = alias.dom.attributes['path'].nodeValue.split('/')
                         tags = self.__xpathJoin(tags[:i], replace, tags[i:])
@@ -251,7 +276,7 @@ class LocaleScanner (object):
         attribute; that attribute value is mentioned in the error's
         message."""
         root = self.nodes[0]
-        for alias in root.findAllChildren('alias'):
+        for alias in root.findAllChildren('alias', allDull=True):
             try:
                 source = alias.dom.attributes['source'].nodeValue
             except (KeyError, AttributeError):
@@ -261,7 +286,7 @@ class LocaleScanner (object):
 
         ids = root.findUniqueChild('identity')
         for code in ('language', 'script', 'territory', 'variant'):
-            for node in ids.findAllChildren(code):
+            for node in ids.findAllChildren(code, allDull=True):
                 try:
                     yield node.dom.attributes['type'].nodeValue
                 except (KeyError, AttributeError):
@@ -322,8 +347,8 @@ class LocaleScanner (object):
         yield 'plus', plus
         yield 'minus', minus
 
-        # Currency formatting (currencyFormat may have a type field):
-        xpath = 'numbers/currencyFormats/currencyFormatLength/currencyFormat/pattern'
+        # Currency formatting:
+        xpath = 'numbers/currencyFormats/currencyFormatLength/currencyFormat[standard]/pattern'
         try:
             money = self.find(xpath.replace('Formats/',
                                             'Formats[numberSystem={}]/'.format(system)))