diff options
Diffstat (limited to 'util')
-rw-r--r-- | util/locale_database/cldr.py | 63 | ||||
-rw-r--r-- | util/locale_database/ldml.py | 55 |
2 files changed, 97 insertions, 21 deletions
diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py index 94459b9e3f..0cc2560632 100644 --- a/util/locale_database/cldr.py +++ b/util/locale_database/cldr.py @@ -439,7 +439,7 @@ class CldrAccess (object): @property def cldrVersion(self): # Evaluate so as to ensure __cldrVersion is set: - self.__scanLdmlDtd() + self.__unDistinguishedAttributes return self.__cldrVersion # Implementation details @@ -545,17 +545,68 @@ class CldrAccess (object): return cache + @property + def __unDistinguishedAttributes(self, cache = {}, joinPath = os.path.join): + """Mapping from tag names to lists of attributes. + + LDML defines some attributes as 'distinguishing': if a node + has distinguishing attributes that weren't specified in an + XPath, a search on that XPath should exclude the node's + children. + + This property is a mapping from tag names to tuples of + attribute names that *aren't* distinguishing for that tag. + Its value is cached (so its costly computation isonly done + once) and there's a side-effect of populating its cache: it + sets self.__cldrVersion to the value found in ldml.dtd, during + parsing.""" + if not cache: + cache.update(self.__scanLdmlDtd()) + assert cache + + return cache + def __scanLdmlDtd(self, joinPath = os.path.join): - """Scan the LDML DTD, record CLDR version.""" + """Scan the LDML DTD, record CLDR version + + Yields (tag, attrs) pairs: on elements with a given tag, + attributes named in its attrs (a tuple) may be ignored in an + XPath search; other attributes are distinguished attributes, + in the terminology of LDML's locale-inheritance rules. + + Sets self.__cldrVersion as a side-effect, since this + information is found in the same file.""" with self.__open(('common', 'dtd', 'ldml.dtd')) as dtd: + tag, ignored, last = None, None, None + for line in dtd: + if line.startswith('<!ELEMENT '): + if ignored: + assert tag + yield tag, tuple(ignored) + tag, ignored, last = line.split()[1], [], None + continue + if line.startswith('<!ATTLIST '): + assert tag is not None parts = line.split() + assert parts[1] == tag + last = parts[2] if parts[1:5] == ['version', 'cldrVersion', 'CDATA', '#FIXED']: - # parts[5] is the version, in quotes, maybe - # with a final > attached to its end: + # parts[5] is the version, in quotes, although the final > might be stuck on its end: self.__cldrVersion = parts[5].split('"')[1] - break + continue + + # <!ELEMENT...>s can also be @METADATA, but not @VALUE: + if '<!--@VALUE-->' in line or (last and '<!--@METADATA-->' in line): + assert last is not None + assert ignored is not None + assert tag is not None + ignored.append(last) + last = None # No attribute is both value and metadata + + if tag and ignored: + yield tag, tuple(ignored) def __enumMap(self, key, cache = {}): if not cache: @@ -650,7 +701,7 @@ class CldrAccess (object): while name and name != 'root': doc = self.__localeAsDoc(name) if doc is not None: - yield Node(doc) + yield Node(doc, self.__unDistinguishedAttributes) try: name = self.__parentLocale(name) diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py index a4a8448a43..940264674b 100644 --- a/util/locale_database/ldml.py +++ b/util/locale_database/ldml.py @@ -53,14 +53,21 @@ class Node (object): nodes are returned wrapped as Node objects. A Node exposes the raw DOM node it wraps via its .dom attribute.""" - def __init__(self, elt, draft = 0): + def __init__(self, elt, dullAttrs = None, draft = 0): """Wraps a DOM node for ease of access. - First argument, elt, is the DOM node to wrap. (Optional second - argument, draft, should only be supplied by this class's - creation of child nodes; it is the maximum draft score of any - ancestor of the new node.)""" - self.dom = elt + First argument, elt, is the DOM node to wrap. + + Optional second argument, dullAttrs, should either be None or + map each LDML tag name to a list of the names of + non-distinguishing attributes for nodes with the given tag + name. If None is given, no distinguishing attribute checks are + performed. + + (Optional third argument, draft, should only be supplied by + this class's creation of child nodes; it is the maximum draft + score of any ancestor of the new node.)""" + self.dom, self.__dull = elt, dullAttrs try: attr = elt.attributes['draft'].nodeValue except KeyError: @@ -68,7 +75,7 @@ class Node (object): else: self.draft = max(draft, self.draftScore(attr)) - def findAllChildren(self, tag, wanted = None): + def findAllChildren(self, tag, wanted = None, allDull = False): """All children that do have the given tag and attributes. First argument is the tag: children with any other tag are @@ -76,7 +83,15 @@ class Node (object): Optional second argument, wanted, should either be None or map attribute names to the values they must have. Only child nodes - with thes attributes set to the given values are yielded.""" + with thes attributes set to the given values are yielded. + + By default, nodes that have distinguishing attributes, other + than those specified in wanted, are ignored. Pass the allDull + parameter a true value to suppress this check.""" + + if self.__dull is None: + allDull = True + dull = () if allDull else self.__dull[tag] for child in self.dom.childNodes: if child.nodeType != child.ELEMENT_NODE: @@ -92,7 +107,15 @@ class Node (object): except KeyError: # Some wanted attribute is missing continue - yield Node(child, self.draft) + if not (allDull or all(k in dull or k in wanted + for k in child.attributes.keys())): + continue + + elif not (allDull or all(k in dull + for k in child.attributes.keys())): + continue + + yield Node(child, self.__dull, self.draft) def findUniqueChild(self, tag): """Returns the single child with the given nodeName. @@ -156,7 +179,9 @@ class XmlScanner (object): self.root = node def findNodes(self, xpath): - """Return all nodes under self.root matching this xpath""" + """Return all nodes under self.root matching this xpath. + + Ignores any excess attributes.""" elts = (self.root,) for selector in xpath.split('/'): tag, attrs = _parseXPath(selector) @@ -202,7 +227,7 @@ class LocaleScanner (object): elt = self.base.root for i, selector in enumerate(tags): tag, attrs = _parseXPath(selector) - for alias in elt.findAllChildren('alias'): + for alias in elt.findAllChildren('alias', allDull = True): if alias.dom.attributes['source'].nodeValue == 'locale': replace = alias.dom.attributes['path'].nodeValue.split('/') tags = self.__xpathJoin(tags[:i], replace, tags[i:]) @@ -251,7 +276,7 @@ class LocaleScanner (object): attribute; that attribute value is mentioned in the error's message.""" root = self.nodes[0] - for alias in root.findAllChildren('alias'): + for alias in root.findAllChildren('alias', allDull=True): try: source = alias.dom.attributes['source'].nodeValue except (KeyError, AttributeError): @@ -261,7 +286,7 @@ class LocaleScanner (object): ids = root.findUniqueChild('identity') for code in ('language', 'script', 'territory', 'variant'): - for node in ids.findAllChildren(code): + for node in ids.findAllChildren(code, allDull=True): try: yield node.dom.attributes['type'].nodeValue except (KeyError, AttributeError): @@ -322,8 +347,8 @@ class LocaleScanner (object): yield 'plus', plus yield 'minus', minus - # Currency formatting (currencyFormat may have a type field): - xpath = 'numbers/currencyFormats/currencyFormatLength/currencyFormat/pattern' + # Currency formatting: + xpath = 'numbers/currencyFormats/currencyFormatLength/currencyFormat[standard]/pattern' try: money = self.find(xpath.replace('Formats/', 'Formats[numberSystem={}]/'.format(system))) |