From c3dea1ffca7e46319daed5b44895c6e09f51f3ea Mon Sep 17 00:00:00 2001 From: Edward Welbourne Date: Wed, 19 Feb 2020 17:18:28 +0100 Subject: Move some shared code to a localetools module The time-zone script was importing two functions from the locale data generation script. Move them to a separate module, to which I'll shortly add some more shared utilities. Cleaned up some imports in the process. Combined qlocalexml2cpp's and xpathlit's error classes into a new Error class in the new module and made it a bit more like a proper python error class. Task-number: QTBUG-81344 Change-Id: Idbe0139ba9aaa2f823b8f7216dee1d2539c18b75 Reviewed-by: Cristian Maureira-Fredes --- util/locale_database/cldr2qlocalexml.py | 54 ++++++++++++++------------- util/locale_database/cldr2qtimezone.py | 20 ++++------ util/locale_database/localetools.py | 65 +++++++++++++++++++++++++++++++++ util/locale_database/qlocalexml.py | 2 +- util/locale_database/qlocalexml2cpp.py | 31 +--------------- util/locale_database/xpathlite.py | 8 +--- 6 files changed, 105 insertions(+), 75 deletions(-) create mode 100644 util/locale_database/localetools.py diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py index fba8d7fdd5..41795ff634 100755 --- a/util/locale_database/cldr2qlocalexml.py +++ b/util/locale_database/cldr2qlocalexml.py @@ -58,14 +58,14 @@ import re import textwrap import enumdata -import xpathlite -from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile +from localetools import Error +from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, codeMapsFromFile, \ + _findEntryInFile as findEntryInFile from dateconverter import convert_date from qlocalexml import Locale, QLocaleXmlWriter # TODO: make calendars a command-line option calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew' -findEntryInFile = xpathlite._findEntryInFile def wrappedwarn(err, prefix, tokens): return err.write( '\n'.join(textwrap.wrap(prefix + ', '.join(tokens), @@ -116,19 +116,19 @@ def raiseUnknownCode(code, form, cache={}): type of code to look up. Do not pass further parameters (the next will deprive you of the cache). - Raises xpathlite.Error with a suitable message, that includes the - unknown code's full name if found. + Raises localetools.Error with a suitable message, that includes + the unknown code's full name if found. Relies on global cldr_dir being set before it's called; see tail of this file. """ if not cache: - cache.update(xpathlite.codeMapsFromFile(os.path.join(cldr_dir, 'en.xml'))) + cache.update(codeMapsFromFile(os.path.join(cldr_dir, 'en.xml'))) name = cache[form].get(code) msg = 'unknown %s code "%s"' % (form, code) if name: msg += ' - could use "%s"' % name - raise xpathlite.Error(msg) + raise Error(msg) def parse_list_pattern_part_format(pattern): # This is a very limited parsing of the format for list pattern part only. @@ -182,7 +182,7 @@ def generateLocaleInfo(path): # skip legacy/compatibility ones alias = findAlias(path) if alias: - raise xpathlite.Error('alias to "%s"' % alias) + raise Error('Alias to "{}"'.format(alias)) def code(tag): return findEntryInFile(path, 'identity/' + tag, attribute="type")[0] @@ -224,7 +224,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ # ### actually there is only one locale with variant: en_US_POSIX # does anybody care about it at all? if variant_code: - raise xpathlite.Error('we do not support variants ("%s")' % variant_code) + raise Error('We do not support variants ("{}")'.format(variant_code)) language_id = enumdata.languageCodeToId(language_code) if language_id <= 0: @@ -283,23 +283,23 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ numbering_system = None try: numbering_system = findEntry(path, "numbers/defaultNumberingSystem") - except xpathlite.Error: + except Error: pass def findEntryDef(path, xpath, value=''): try: return findEntry(path, xpath) - except xpathlite.Error: + except Error: return value def get_number_in_system(path, xpath, numbering_system): if numbering_system: try: return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]") - except xpathlite.Error: + except Error: # in CLDR 1.9 number system was refactored for numbers (but not for currency) # so if previous findEntry doesn't work we should try this: try: return findEntry(path, xpath.replace("/symbols/", "/symbols[numberSystem=" + numbering_system + "]/")) - except xpathlite.Error: + except Error: # fallback to default pass return findEntry(path, xpath) @@ -368,7 +368,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ for count in ('many', 'few', 'two', 'other', 'zero', 'one'): try: ans = findEntry(path, stem + 'unitPattern[count=%s]' % count) - except xpathlite.Error: + except Error: continue # TODO: epxloit count-handling, instead of discarding placeholders @@ -498,7 +498,7 @@ def _parseLocale(l): country = "AnyCountry" if l == "und": - raise xpathlite.Error("we are treating unknown locale like C") + raise Error('We treat unknown locale like C') parsed = splitLocale(l) language_code = parsed.next() @@ -511,19 +511,19 @@ def _parseLocale(l): if language_code != "und": language_id = enumdata.languageCodeToId(language_code) if language_id == -1: - raise xpathlite.Error('unknown language code "%s"' % language_code) + raise Error('Unknown language code "{}"'.format(language_code)) language = enumdata.language_list[language_id][0] if script_code: script_id = enumdata.scriptCodeToId(script_code) if script_id == -1: - raise xpathlite.Error('unknown script code "%s"' % script_code) + raise Error('Unknown script code "{}"'.format(script_code)) script = enumdata.script_list[script_id][0] if country_code: country_id = enumdata.countryCodeToId(country_code) if country_id == -1: - raise xpathlite.Error('unknown country code "%s"' % country_code) + raise Error('Unknown country code "{}"'.format(country_code)) country = enumdata.country_list[country_id][0] return (language, script, country) @@ -538,11 +538,13 @@ def likelySubtags(root, err): try: from_language, from_script, from_country = _parseLocale(tmp[u"from"]) to_language, to_script, to_country = _parseLocale(tmp[u"to"]) - except xpathlite.Error as e: - if tmp[u'to'].startswith(tmp[u'from']) and str(e) == 'unknown language code "%s"' % tmp[u'from']: - skips.append(tmp[u'to']) + except Error as e: + if (tmp['to'].startswith(tmp['from']) + and e.message == 'Unknown language code "{}"'.format(tmp['from'])): + skips.append(tmp['to']) else: - sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e))) + sys.stderr.write('skipping likelySubtag "{}" -> "{}" ({})\n'.format( + tmp[u"from"], tmp[u"to"], e.message)) continue # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags if to_country == "AnyCountry" and from_country != to_country: @@ -612,8 +614,8 @@ def main(args, out, err): if not l: skips.append(file) continue - except xpathlite.Error as e: - sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, str(e))) + except Error as e: + sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, e.message)) continue locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l @@ -628,8 +630,8 @@ def main(args, out, err): if not l: skips.append(file) continue - except xpathlite.Error as e: - sys.stderr.write('skipping file "{}" ({})\n'.format(file, str(e))) + except Error as e: + sys.stderr.write('skipping file "{}" ({})\n'.format(file, e.message)) continue locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l diff --git a/util/locale_database/cldr2qtimezone.py b/util/locale_database/cldr2qtimezone.py index 4c3609056d..7816abc9e1 100755 --- a/util/locale_database/cldr2qtimezone.py +++ b/util/locale_database/cldr2qtimezone.py @@ -54,20 +54,14 @@ The XML structure is as follows: import os import sys +import re import datetime import tempfile -import enumdata -import xpathlite -from xpathlite import DraftResolution -import re -import qlocalexml2cpp -findAlias = xpathlite.findAlias -findEntry = xpathlite.findEntry -findEntryInFile = xpathlite._findEntryInFile -findTagsInFile = xpathlite.findTagsInFile -unicode2hex = qlocalexml2cpp.unicode2hex -wrap_list = qlocalexml2cpp.wrap_list +import enumdata +from localetools import unicode2hex, wrap_list, Error +from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, \ + _findEntryInFile as findEntryInFile class ByteArrayData: def __init__(self): @@ -343,13 +337,13 @@ if mapTimezones: else: data['countryId'] = enumdata.countryCodeToId(data['countryCode']) if data['countryId'] < 0: - raise xpathlite.Error("Unknown Country Code \"%s\"" % data['countryCode']) + raise Error('Unknown Country Code "{}"'.format(data['countryCode'])) data['country'] = enumdata.country_list[data['countryId']][0] windowsIdDict[data['windowsKey'], data['countryId']] = data if badZones: sys.stderr.write('\n\t'.join(["\nUnknown Windows ID, please add:"] + sorted(badZones)) + "\nto the windowIdList in cldr2qtimezone.py\n\n") - raise xpathlite.Error("Unknown Windows IDs") + raise Error('Unknown Windows IDs') print "Input file parsed, now writing data" diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py new file mode 100644 index 0000000000..0d5c2acbbb --- /dev/null +++ b/util/locale_database/localetools.py @@ -0,0 +1,65 @@ +############################################################################# +## +## Copyright (C) 2020 The Qt Company Ltd. +## Contact: https://www.qt.io/licensing/ +## +## This file is part of the test suite of the Qt Toolkit. +## +## $QT_BEGIN_LICENSE:GPL-EXCEPT$ +## Commercial License Usage +## Licensees holding valid commercial Qt licenses may use this file in +## accordance with the commercial license agreement provided with the +## Software or, alternatively, in accordance with the terms contained in +## a written agreement between you and The Qt Company. For licensing terms +## and conditions see https://www.qt.io/terms-conditions. For further +## information use the contact form at https://www.qt.io/contact-us. +## +## GNU General Public License Usage +## Alternatively, this file may be used under the terms of the GNU +## General Public License version 3 as published by the Free Software +## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +## included in the packaging of this file. Please review the following +## information to ensure the GNU General Public License requirements will +## be met: https://www.gnu.org/licenses/gpl-3.0.html. +## +## $QT_END_LICENSE$ +## +############################################################################# +"""Utilities shared among the CLDR extraction tools. +Functions: + unicode2hex() -- converts unicode text to UCS-2 in hex form. + wrap_list() -- map list to comma-separated string, 20 entries per line. + +Classes: + Error -- A shared error class. +""" + +class Error (StandardError): + __upinit = StandardError.__init__ + def __init__(self, msg, *args): + self.__upinit(msg, *args) + self.message = msg + def __str__(self): + return self.message + +def unicode2hex(s): + lst = [] + for x in s: + v = ord(x) + if v > 0xFFFF: + # make a surrogate pair + # copied from qchar.h + high = (v >> 10) + 0xd7c0 + low = (v % 0x400 + 0xdc00) + lst.append(hex(high)) + lst.append(hex(low)) + else: + lst.append(hex(v)) + return lst + +def wrap_list(lst): + def split(lst, size): + while lst: + head, lst = lst[:size], lst[size:] + yield head + return ",\n".join(", ".join(x) for x in split(lst, 20)) diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py index 8289bd785a..0b962157d2 100644 --- a/util/locale_database/qlocalexml.py +++ b/util/locale_database/qlocalexml.py @@ -39,7 +39,7 @@ Support: from __future__ import print_function from xml.sax.saxutils import escape -from xpathlite import Error +from localetools import Error # Tools used by Locale: def camel(seq): diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py index eb76f02faa..c54c3953ae 100755 --- a/util/locale_database/qlocalexml2cpp.py +++ b/util/locale_database/qlocalexml2cpp.py @@ -37,9 +37,10 @@ import os import sys import tempfile import datetime -from enumdata import language_aliases, country_aliases, script_aliases from qlocalexml import QLocaleXmlReader +from enumdata import language_aliases, country_aliases, script_aliases +from localetools import unicode2hex, wrap_list, Error # TODO: Make calendars a command-line parameter # map { CLDR name: Qt file name } @@ -59,19 +60,6 @@ generated_template = """ """ -class Error: - def __init__(self, msg): - self.msg = msg - def __str__(self): - return self.msg - -def wrap_list(lst): - def split(lst, size): - while lst: - head, lst = lst[:size], lst[size:] - yield head - return ",\n".join(", ".join(x) for x in split(lst, 20)) - def fixedScriptName(name, dupes): # Don't .capitalize() as some names are already camel-case (see enumdata.py): name = ''.join(word[0].upper() + word[1:] for word in name.split()) @@ -127,21 +115,6 @@ def compareLocaleKeys(key1, key2): return key1[1] - key2[1] -def unicode2hex(s): - lst = [] - for x in s: - v = ord(x) - if v > 0xFFFF: - # make a surrogate pair - # copied from qchar.h - high = (v >> 10) + 0xd7c0 - low = (v % 0x400 + 0xdc00) - lst.append(hex(high)) - lst.append(hex(low)) - else: - lst.append(hex(v)) - return lst - class StringDataToken: def __init__(self, index, length): if index > 0xFFFF or length > 0xFFFF: diff --git a/util/locale_database/xpathlite.py b/util/locale_database/xpathlite.py index 97efaaab41..3da8b24656 100644 --- a/util/locale_database/xpathlite.py +++ b/util/locale_database/xpathlite.py @@ -31,6 +31,8 @@ import sys import os import xml.dom.minidom +from localetools import Error + class DraftResolution: # See http://www.unicode.org/cldr/process.html for description unconfirmed = 'unconfirmed' @@ -43,12 +45,6 @@ class DraftResolution: def toInt(self): return DraftResolution._values[self.resolution] -class Error: - def __init__(self, msg): - self.msg = msg - def __str__(self): - return self.msg - doc_cache = {} def parseDoc(file): if not doc_cache.has_key(file): -- cgit v1.2.3