summaryrefslogtreecommitdiffstats
path: root/util
diff options
context:
space:
mode:
authorEdward Welbourne <eddy@chaos.org.uk>2020-03-16 12:31:38 +0100
committerEdward Welbourne <eddy@chaos.org.uk>2020-04-02 19:43:43 +0100
commit963931550d42d08a4cb734b8c0a82becf7b0a91c (patch)
treeb1acdf5824d73d5db2d3abe0d83761b08e91a361 /util
parent89bd12b9ad3e4c265ddf537e845330f76ca2d399 (diff)
Check all matches for each XPath when searching
Previously, if we found one element with required attributes, we would search into it and ignore any later elements also with those required attributes. This meant that, if the first didn't contain the child elements we were looking for, we'd fail to find what we sought, if it was in a later matching element (e.g. with some ignored attributes). We would then go on to look for a match in a later file, where there might have been a match we should have found in the earlier file. Check all matches, rather than only the first match in each file. Do the search in each file "in parallel" to save reparsing the XPath. This clears the search code of rather hard-to-follow break/else handling in loops; and currently makes no change to the generated data. Change-Id: I86b010e65b9a1fc1b79e5fdd45a5aeff1ed5d5d5 Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
Diffstat (limited to 'util')
-rw-r--r--util/locale_database/ldml.py130
1 files changed, 67 insertions, 63 deletions
diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py
index 8ed756fd8e..787509ee85 100644
--- a/util/locale_database/ldml.py
+++ b/util/locale_database/ldml.py
@@ -204,66 +204,25 @@ class LocaleScanner (object):
def __init__(self, name, nodes, root):
self.name, self.nodes, self.base = name, nodes, root
- def find(self, xpath, draft = None):
- tags = xpath.split('/')
- while True:
- replace = None
- for elt in self.nodes:
- for selector in tags:
- tag, attrs = _parseXPath(selector)
- for elt in elt.findAllChildren(tag, attrs):
- if draft is None or elt.draft <= draft:
- break # and process the next selector
- else:
- break # no child, try next elt in self.nodes
- else:
- # processed all selectors
- try:
- return elt.dom.firstChild.nodeValue
- except (AttributeError, KeyError):
- pass # move on to next elt in self.nodes
-
- # No match in self.nodes; check root
- elt = self.base.root
- for i, selector in enumerate(tags):
- tag, attrs = _parseXPath(selector)
- for alias in elt.findAllChildren('alias', allDull = True):
- if alias.dom.attributes['source'].nodeValue == 'locale':
- replace = alias.dom.attributes['path'].nodeValue.split('/')
- tags = self.__xpathJoin(tags[:i], replace, tags[i:])
- break
- else:
- for elt in elt.findAllChildren(tag, attrs):
- if draft is None or elt.draft <= draft:
- break # and process the next selector
- else:
- break
- if replace:
- break
- else:
- # processed all selectors
+ def find(self, xpath, default = None, draft = None):
+ """XPath search for the content of an element.
+
+ Required argument, xpath, is the XPath to search for. Optional
+ second argument is a default value to use, if no such node is
+ found. Optional third argument is a draft score (see
+ Node.draftScore() for details); if given, leaf elements with
+ higher draft scores are ignored."""
+ try:
+ for elt in self.__find(xpath):
try:
- return elt.dom.firstChild.nodeValue
+ if draft is None or elt.draft <= draft:
+ return elt.dom.firstChild.nodeValue
except (AttributeError, KeyError):
- # No match
pass
- if not replace:
- break
-
- sought = '/'.join(tags)
- if sought != xpath:
- sought += ' (for {})'.format(xpath)
- raise Error('No {} in {}'.format(sought, self.name))
-
- def findOr(self, xpath, fallback = ''):
- """Use a fall-back value if we don't find data.
-
- Like find, but takes a fall-back value to return instead of
- raising Error on failure."""
- try:
- return self.find(xpath)
- except Error:
- return fallback
+ except Error as e:
+ if default is None:
+ raise
+ return default
def tagCodes(self):
"""Yields four tag codes
@@ -305,9 +264,9 @@ class LocaleScanner (object):
"""
if isoCode:
stem = 'numbers/currencies/currency[{}]/'.format(isoCode)
- symbol = self.findOr(stem + 'symbol')
+ symbol = self.find(stem + 'symbol', '')
name = ';'.join(
- self.findOr(stem + 'displayName' + tail)
+ self.find(stem + 'displayName' + tail, '')
for tail in ('',) + tuple(
'[count={}]'.format(x) for x in ('zero', 'one', 'two', 'few', 'many', 'other')
)) + ';'
@@ -409,13 +368,13 @@ class LocaleScanner (object):
yield 'languageEndonym', ''
yield ('countryEndonym',
- self.findOr('localeDisplayNames/territories/territory[{}]'
- .format(country)))
+ self.find('localeDisplayNames/territories/territory[{}]'
+ .format(country), ''))
def unitData(self):
yield ('byte_unit',
- self.findOr('units/unitLength[long]/unit[digital-byte]/displayName',
- 'bytes'))
+ self.find('units/unitLength[long]/unit[digital-byte]/displayName',
+ 'bytes'))
unit = self.__findUnit('', 'B')
cache = [] # Populated by the SI call, to give hints to the IEC call
@@ -455,6 +414,51 @@ class LocaleScanner (object):
('narrow', 'format', 'narrow'),
) # Used for month and day names
+ def __find(self, xpath):
+ retries = [ xpath.split('/') ]
+ while retries:
+ tags, elts, roots = retries.pop(), self.nodes, (self.base.root,)
+ for selector in tags:
+ tag, attrs = _parseXPath(selector)
+ elts = tuple(_iterateEach(e.findAllChildren(tag, attrs) for e in elts))
+ if not elts:
+ break
+
+ else: # Found matching elements
+ # Possibly filter elts to prefer the least drafty ?
+ for elt in elts:
+ yield elt
+
+ # Process roots separately: otherwise the alias-processing
+ # is excessive.
+ for i, selector in enumerate(tags):
+ tag, attrs = _parseXPath(selector)
+
+ for alias in tuple(_iterateEach(r.findAllChildren('alias', allDull=True)
+ for r in roots)):
+ if alias.dom.attributes['source'].nodeValue == 'locale':
+ replace = alias.dom.attributes['path'].nodeValue.split('/')
+ retries.append(self.__xpathJoin(tags[:i], replace, tags[i:]))
+
+ roots = tuple(_iterateEach(r.findAllChildren(tag, attrs) for r in roots))
+ if not roots:
+ if retries: # Let outer loop fall back on an alias path:
+ break
+ sought = '/'.join(tags)
+ if sought != xpath:
+ sought += ' (for {})'.format(xpath)
+ raise Error('All lack child {} for {} in {}'.format(
+ selector, sought, self.name))
+
+ else: # Found matching elements
+ for elt in roots:
+ yield elt
+
+ sought = '/'.join(tags)
+ if sought != xpath:
+ sought += ' (for {})'.format(xpath)
+ raise Error('No {} in {}'.format(sought, self.name))
+
def __findUnit(self, keySuffix, quantify, fallback=''):
# The displayName for a quantified unit in en.xml is kByte
# (even for unitLength[narrow]) instead of kB (etc.), so