diff options
Diffstat (limited to 'util/locale_database/qlocalexml.py')
-rw-r--r-- | util/locale_database/qlocalexml.py | 215 |
1 files changed, 192 insertions, 23 deletions
diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py index 87e356b8a7..b64ff56c64 100644 --- a/util/locale_database/qlocalexml.py +++ b/util/locale_database/qlocalexml.py @@ -1,7 +1,7 @@ # coding=utf8 ############################################################################# ## -## Copyright (C) 2018 The Qt Company Ltd. +## Copyright (C) 2020 The Qt Company Ltd. ## Contact: https://www.qt.io/licensing/ ## ## This file is part of the test suite of the Qt Toolkit. @@ -28,11 +28,17 @@ ############################################################################# """Shared serialization-scanning code for QLocaleXML format. -The Locale class is written by cldr2qlocalexml.py and read by qlocalexml2cpp.py +Provides classes: + Locale -- common data-type representing one locale as a namespace + QLocaleXmlWriter -- helper to write a QLocaleXML file + +Support: + Spacer -- provides control over indentation of the output. """ +from __future__ import print_function from xml.sax.saxutils import escape -import xpathlite +from xpathlite import Error # Tools used by Locale: def camel(seq): @@ -43,10 +49,14 @@ def camel(seq): def camelCase(words): return ''.join(camel(iter(words))) +def addEscapes(s): + return ''.join(c if n < 128 else '\\x{:02x}'.format(n) + for n, c in ((ord(c), c) for c in s)) + def ordStr(c): if len(c) == 1: return str(ord(c)) - raise xpathlite.Error('Unable to handle value "%s"' % addEscapes(c)) + raise Error('Unable to handle value "{}"'.format(addEscapes(c))) # Fix for a problem with QLocale returning a character instead of # strings for QLocale::exponential() and others. So we fallback to @@ -69,6 +79,8 @@ def convertFormat(format): * https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table * QDateTimeParser::parseFormat() and QLocalePrivate::dateTimeToString() """ + # Compare and contrast dateconverter.py's convert_date(). + # Need to (check consistency and) reduce redundancy ! result = "" i = 0 while i < len(format): @@ -113,7 +125,163 @@ def convertFormat(format): return result -class Locale: +class Spacer (object): + def __init__(self, indent = None, initial = ''): + """Prepare to manage indentation and line breaks. + + Arguments are both optional. + + First argument, indent, is either None (its default, for + 'minifying'), an ingeter (number of spaces) or the unit of + text that is to be used for each indentation level (e.g. '\t' + to use tabs). If indent is None, no indentation is added, nor + are line-breaks; otherwise, self(text), for non-empty text, + shall end with a newline and begin with indentation. + + Second argument, initial, is the initial indentation; it is + ignored if indent is None. Indentation increases after each + call to self(text) in which text starts with a tag and doesn't + include its end-tag; indentation decreases if text starts with + an end-tag. The text is not parsed any more carefully than + just described. + """ + if indent is None: + self.__call = lambda x: x + else: + self.__each = ' ' * indent if isinstance(indent, int) else indent + self.current = initial + self.__call = self.__wrap + + def __wrap(self, line): + if not line: + return '\n' + + indent = self.current + if line.startswith('</'): + indent = self.current = indent[:-len(self.__each)] + elif line.startswith('<') and not line.startswith('<!'): + cut = line.find('>') + tag = (line[1:] if cut < 0 else line[1 : cut]).strip().split()[0] + if '</{}>'.format(tag) not in line: + self.current += self.__each + return indent + line + '\n' + + def __call__(self, line): + return self.__call(line) + +class QLocaleXmlWriter (object): + def __init__(self, save = None, space = Spacer(4)): + """Set up to write digested CLDR data as QLocale XML. + + Arguments are both optional. + + First argument, save, is None (its default) or a callable that + will write content to where you intend to save it. If None, it + is replaced with a callable that prints the given content, + suppressing the newline (but see the following); this is + equivalent to passing sys.stdout.write. + + Second argument, space, is an object to call on each text + output to prepend indentation and append newlines, or not as + the case may be. The default is a Spacer(4), which grows + indent by four spaces after each unmatched new tag and shrinks + back on a close-tag (its parsing is naive, but adequate to how + this class uses it), while adding a newline to each line. + """ + self.__rawOutput = self.__printit if save is None else save + self.__wrap = space + self.__write('<localeDatabase>') + + # Output of various sections, in their usual order: + def enumData(self, languages, scripts, countries): + self.__enumTable('languageList', languages) + self.__enumTable('scriptList', scripts) + self.__enumTable('countryList', countries) + + def likelySubTags(self, entries): + self.__openTag('likelySubtags') + for have, give in entries: + self.__openTag('likelySubtag') + self.__likelySubTag('from', have) + self.__likelySubTag('to', give) + self.__closeTag('likelySubtag') + self.__closeTag('likelySubtags') + + def locales(self, locales, calendars): + self.__openTag('localeList') + self.__openTag('locale') + Locale.C(calendars).toXml(self.inTag, calendars) + self.__closeTag('locale') + keys = locales.keys() + keys.sort() + for key in keys: + self.__openTag('locale') + locales[key].toXml(self.inTag, calendars) + self.__closeTag('locale') + self.__closeTag('localeList') + + def version(self, cldrVersion): + self.inTag('version', cldrVersion) + + def inTag(self, tag, text): + self.__write('<{0}>{1}</{0}>'.format(tag, text)) + + def close(self): + if self.__rawOutput != self.__complain: + self.__write('</localeDatabase>') + self.__rawOutput = self.__complain + + # Implementation details + @staticmethod + def __printit(text): + print(text, end='') + @staticmethod + def __complain(text): + raise Error('Attempted to write data after closing :-(') + + def __enumTable(self, tag, table): + self.__openTag(tag) + for key, value in table.iteritems(): + self.__openTag(tag[:-4]) + self.inTag('name', value[0]) + self.inTag('id', key) + self.inTag('code', value[1]) + self.__closeTag(tag[:-4]) + self.__closeTag(tag) + + def __likelySubTag(self, tag, likely): + self.__openTag(tag) + self.inTag('language', likely[0]) + self.inTag('script', likely[1]) + self.inTag('country', likely[2]) + # self.inTag('variant', likely[3]) + self.__closeTag(tag) + + def __openTag(self, tag): + self.__write('<{}>'.format(tag)) + def __closeTag(self, tag): + self.__write('</{}>'.format(tag)) + + def __write(self, line): + self.__rawOutput(self.__wrap(line)) + +class Locale (object): + """Holder for the assorted data representing one locale. + + Implemented as a namespace; its constructor and update() have the + same signatures as those of a dict, acting on the instance's + __dict__, so the results are accessed as attributes rather than + mapping keys.""" + def __init__(self, data=None, **kw): + self.update(data, **kw) + + def update(self, data=None, **kw): + if data: self.__dict__.update(data) + if kw: self.__dict__.update(kw) + + def __len__(self): # Used when testing as a boolean + return len(self.__dict__) + @staticmethod def propsMonthDay(scale, lengths=('long', 'short', 'narrow')): for L in lengths: @@ -176,19 +344,26 @@ class Locale: return cls(data) - def toXml(self, calendars=('gregorian',), indent=' ', tab=' '): - print indent + '<locale>' - inner = indent + tab + def toXml(self, write, calendars=('gregorian',)): + """Writes its data as QLocale XML. + + First argument, write, is a callable taking the name and + content of an XML element; it is expected to be the inTag + bound method of a QLocaleXmlWriter instance. + + Optional second argument is a list of calendar names, in the + form used by CLDR; its default is ('gregorian',). + """ get = lambda k: getattr(self, k) for key in ('language', 'script', 'country'): - print inner + "<%s>" % key + get(key) + "</%s>" % key - print inner + "<%scode>" % key + get(key + '_code') + "</%scode>" % key + write(key, get(key)) + write('{}code'.format(key), get('{}_code'.format(key))) for key in ('decimal', 'group', 'zero'): - print inner + "<%s>" % key + ordStr(get(key)) + "</%s>" % key + write(key, ordStr(get(key))) for key, std in (('list', ';'), ('percent', '%'), ('minus', '-'), ('plus', '+'), ('exp', 'e')): - print inner + "<%s>" % key + fixOrdStr(get(key), std) + "</%s>" % key + write(key, fixOrdStr(get(key), std)) for key in ('languageEndonym', 'countryEndonym', 'quotationStart', 'quotationEnd', @@ -206,16 +381,10 @@ class Locale: '_'.join((k, cal)) for k in self.propsMonthDay('months') for cal in calendars): - print inner + "<%s>%s</%s>" % (key, escape(get(key)).encode('utf-8'), key) + write(key, escape(get(key)).encode('utf-8')) for key in ('currencyDigits', 'currencyRounding'): - print inner + "<%s>%d</%s>" % (key, get(key), key) - - print indent + "</locale>" - - def __init__(self, data=None, **kw): - if data: self.__dict__.update(data) - if kw: self.__dict__.update(kw) + write(key, get(key)) # Tools used by __monthNames: def fullName(i, name): return name @@ -261,8 +430,8 @@ class Locale: for cal in calendars: try: data = known[cal] - except KeyError: # Need to add an entry to known, above. - print 'Unsupported calendar:', cal + except KeyError as e: # Need to add an entry to known, above. + e.args += ('Unsupported calendar:', cal) raise names, get = data[0] + ('',), data[1:] for n, size in enumerate(sizes): @@ -279,7 +448,7 @@ class Locale: 'Thursday', 'Friday', 'Saturday', ''), quantifiers=('k', 'M', 'G', 'T', 'P', 'E')): """Returns an object representing the C locale.""" - return cls(dict(cls.__monthNames(calendars)), + return cls(cls.__monthNames(calendars), language='C', language_code='0', languageEndonym='', script='AnyScript', script_code='0', country='AnyCountry', country_code='0', countryEndonym='', |