diff options
Diffstat (limited to 'util/locale_database/ldml.py')
-rw-r--r-- | util/locale_database/ldml.py | 88 |
1 files changed, 70 insertions, 18 deletions
diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py index f292235fb4..219d1f7145 100644 --- a/util/locale_database/ldml.py +++ b/util/locale_database/ldml.py @@ -21,6 +21,13 @@ See individual classes for further detail. from localetools import Error from dateconverter import convert_date +# The github version of CLDR uses '↑↑↑' to indicate "inherit" +INHERIT = '↑↑↑' + +def _attrsFromDom(dom): + return { k: (v if isinstance(v, str) else v.nodeValue) + for k, v in dom.attributes.items() } + class Node (object): """Wrapper for an arbitrary DOM node. @@ -50,6 +57,9 @@ class Node (object): else: self.draft = max(draft, self.draftScore(attr)) + def attributes(self): + return _attrsFromDom(self.dom) + def findAllChildren(self, tag, wanted = None, allDull = False): """All children that do have the given tag and attributes. @@ -166,17 +176,35 @@ class XmlScanner (object): return elts class Supplement (XmlScanner): - def find(self, xpath): + def find(self, xpath, exclude=()): + """Finds nodes by matching a specified xpath. + + If exclude is passed, it should be a sequence of attribute names (its + default is empty). Any matches to the given xpath that also have any + attribute in this sequence will be excluded. + + For each childless node matching the xpath, or child of a node matching + the xpath, this yields a twople (name, attrs) where name is the + nodeName and attrs is a dict mapping the node's attribute's names to + their values. For attribute values that are not simple strings, the + nodeValue of the attribute node is used.""" elts = self.findNodes(xpath) - for elt in _iterateEach(e.dom.childNodes if e.dom.childNodes else (e.dom,) - for e in elts): + for elt in _iterateEach(e.dom.childNodes or (e.dom,) + for e in elts + if not any(a in e.dom.attributes + for a in exclude)): if elt.attributes: - yield (elt.nodeName, - dict((k, v if isinstance(v, str) else v.nodeValue) - for k, v in elt.attributes.items())) + yield elt.nodeName, _attrsFromDom(elt) class LocaleScanner (object): def __init__(self, name, nodes, root): + """Set up to scan data for a specified locale. + + First parameter is the name of the locale; it will be used in + error messages. Second is a tuple of DOM root-nodes of files + with locale data, later ones serving as fall-backs for data + missing in earlier ones. Third parameter is the root locale's + DOM node.""" self.name, self.nodes, self.base = name, nodes, root def find(self, xpath, default = None, draft = None): @@ -257,7 +285,13 @@ class LocaleScanner (object): stem = f'numbers/symbols[numberSystem={system}]/' decimal = self.find(f'{stem}decimal') group = self.find(f'{stem}group') - assert decimal != group, (self.name, system, decimal) + if decimal == group: + # mn_Mong_MN @v43 :-( + clean = Node.draftScore('approved') + decimal = self.find(f'{stem}decimal', draft=clean) + group = self.find(f'{stem}group', draft=clean) + assert decimal != group, (self.name, system, decimal) + yield 'decimal', decimal yield 'group', group yield 'percent', self.find(f'{stem}percentSign') @@ -324,6 +358,7 @@ class LocaleScanner (object): def endonyms(self, language, script, territory, variant): # TODO: take variant into account ? + # TODO: QTBUG-47892, support query for all combinations for seq in ((language, script, territory), (language, script), (language, territory), (language,)): if not all(seq): @@ -383,10 +418,10 @@ class LocaleScanner (object): ('long', 'format', 'wide'), ('short', 'format', 'abbreviated'), ('narrow', 'format', 'narrow'), - ) # Used for month and day names + ) # Used for month and day names def __find(self, xpath): - retries = [ xpath.split('/') ] + retries, foundNone = [ xpath.split('/') ], True while retries: tags, elts, roots = retries.pop(), self.nodes, (self.base.root,) for selector in tags: @@ -396,6 +431,9 @@ class LocaleScanner (object): break else: # Found matching elements + elts = tuple(self.__skipInheritors(elts)) + if elts: + foundNone = False # Possibly filter elts to prefer the least drafty ? for elt in elts: yield elt @@ -415,26 +453,40 @@ class LocaleScanner (object): if not roots: if retries: # Let outer loop fall back on an alias path: break - sought = '/'.join(tags) - if sought != xpath: - sought += f' (for {xpath})' - raise Error(f'All lack child {selector} for {sought} in {self.name}') + if foundNone: + sought = '/'.join(tags) + if sought != xpath: + sought += f' (for {xpath})' + raise Error(f'All lack child {selector} for {sought} in {self.name}') else: # Found matching elements + roots = tuple(self.__skipInheritors(roots)) + if roots: + foundNone = False for elt in roots: yield elt - sought = '/'.join(tags) - if sought != xpath: - sought += f' (for {xpath})' - raise Error(f'No {sought} in {self.name}') + if foundNone: + sought = '/'.join(tags) + if sought != xpath: + sought += f' (for {xpath})' + raise Error(f'No {sought} in {self.name}') + + @staticmethod + def __skipInheritors(elts): + for elt in elts: + try: + if elt.dom.firstChild.nodeValue != INHERIT: + yield elt + except (AttributeError, KeyError): + yield elt def __currencyDisplayName(self, stem): try: return self.find(stem + 'displayName') except Error: pass - for x in ('zero', 'one', 'two', 'few', 'many', 'other'): + for x in ('zero', 'one', 'two', 'few', 'many', 'other'): try: return self.find(f'{stem}displayName[count={x}]') except Error: |