summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEdward Welbourne <edward.welbourne@qt.io>2020-02-19 17:18:28 +0100
committerEdward Welbourne <eddy@chaos.org.uk>2020-04-02 19:42:40 +0100
commitc3dea1ffca7e46319daed5b44895c6e09f51f3ea (patch)
tree141da0f6c8cacc0d13459d06ff921594b763b954
parent4d9f1a87de7a6e50e89f96836bc2f0cf6e229dda (diff)
Move some shared code to a localetools module
The time-zone script was importing two functions from the locale data generation script. Move them to a separate module, to which I'll shortly add some more shared utilities. Cleaned up some imports in the process. Combined qlocalexml2cpp's and xpathlit's error classes into a new Error class in the new module and made it a bit more like a proper python error class. Task-number: QTBUG-81344 Change-Id: Idbe0139ba9aaa2f823b8f7216dee1d2539c18b75 Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
-rwxr-xr-xutil/locale_database/cldr2qlocalexml.py54
-rwxr-xr-xutil/locale_database/cldr2qtimezone.py20
-rw-r--r--util/locale_database/localetools.py65
-rw-r--r--util/locale_database/qlocalexml.py2
-rwxr-xr-xutil/locale_database/qlocalexml2cpp.py31
-rw-r--r--util/locale_database/xpathlite.py8
6 files changed, 105 insertions, 75 deletions
diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py
index fba8d7fdd5..41795ff634 100755
--- a/util/locale_database/cldr2qlocalexml.py
+++ b/util/locale_database/cldr2qlocalexml.py
@@ -58,14 +58,14 @@ import re
import textwrap
import enumdata
-import xpathlite
-from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile
+from localetools import Error
+from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, codeMapsFromFile, \
+ _findEntryInFile as findEntryInFile
from dateconverter import convert_date
from qlocalexml import Locale, QLocaleXmlWriter
# TODO: make calendars a command-line option
calendars = ['gregorian', 'persian', 'islamic'] # 'hebrew'
-findEntryInFile = xpathlite._findEntryInFile
def wrappedwarn(err, prefix, tokens):
return err.write(
'\n'.join(textwrap.wrap(prefix + ', '.join(tokens),
@@ -116,19 +116,19 @@ def raiseUnknownCode(code, form, cache={}):
type of code to look up. Do not pass further parameters (the next
will deprive you of the cache).
- Raises xpathlite.Error with a suitable message, that includes the
- unknown code's full name if found.
+ Raises localetools.Error with a suitable message, that includes
+ the unknown code's full name if found.
Relies on global cldr_dir being set before it's called; see tail
of this file.
"""
if not cache:
- cache.update(xpathlite.codeMapsFromFile(os.path.join(cldr_dir, 'en.xml')))
+ cache.update(codeMapsFromFile(os.path.join(cldr_dir, 'en.xml')))
name = cache[form].get(code)
msg = 'unknown %s code "%s"' % (form, code)
if name:
msg += ' - could use "%s"' % name
- raise xpathlite.Error(msg)
+ raise Error(msg)
def parse_list_pattern_part_format(pattern):
# This is a very limited parsing of the format for list pattern part only.
@@ -182,7 +182,7 @@ def generateLocaleInfo(path):
# skip legacy/compatibility ones
alias = findAlias(path)
if alias:
- raise xpathlite.Error('alias to "%s"' % alias)
+ raise Error('Alias to "{}"'.format(alias))
def code(tag):
return findEntryInFile(path, 'identity/' + tag, attribute="type")[0]
@@ -224,7 +224,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
# ### actually there is only one locale with variant: en_US_POSIX
# does anybody care about it at all?
if variant_code:
- raise xpathlite.Error('we do not support variants ("%s")' % variant_code)
+ raise Error('We do not support variants ("{}")'.format(variant_code))
language_id = enumdata.languageCodeToId(language_code)
if language_id <= 0:
@@ -283,23 +283,23 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
numbering_system = None
try:
numbering_system = findEntry(path, "numbers/defaultNumberingSystem")
- except xpathlite.Error:
+ except Error:
pass
def findEntryDef(path, xpath, value=''):
try:
return findEntry(path, xpath)
- except xpathlite.Error:
+ except Error:
return value
def get_number_in_system(path, xpath, numbering_system):
if numbering_system:
try:
return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]")
- except xpathlite.Error:
+ except Error:
# in CLDR 1.9 number system was refactored for numbers (but not for currency)
# so if previous findEntry doesn't work we should try this:
try:
return findEntry(path, xpath.replace("/symbols/", "/symbols[numberSystem=" + numbering_system + "]/"))
- except xpathlite.Error:
+ except Error:
# fallback to default
pass
return findEntry(path, xpath)
@@ -368,7 +368,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
for count in ('many', 'few', 'two', 'other', 'zero', 'one'):
try:
ans = findEntry(path, stem + 'unitPattern[count=%s]' % count)
- except xpathlite.Error:
+ except Error:
continue
# TODO: epxloit count-handling, instead of discarding placeholders
@@ -498,7 +498,7 @@ def _parseLocale(l):
country = "AnyCountry"
if l == "und":
- raise xpathlite.Error("we are treating unknown locale like C")
+ raise Error('We treat unknown locale like C')
parsed = splitLocale(l)
language_code = parsed.next()
@@ -511,19 +511,19 @@ def _parseLocale(l):
if language_code != "und":
language_id = enumdata.languageCodeToId(language_code)
if language_id == -1:
- raise xpathlite.Error('unknown language code "%s"' % language_code)
+ raise Error('Unknown language code "{}"'.format(language_code))
language = enumdata.language_list[language_id][0]
if script_code:
script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1:
- raise xpathlite.Error('unknown script code "%s"' % script_code)
+ raise Error('Unknown script code "{}"'.format(script_code))
script = enumdata.script_list[script_id][0]
if country_code:
country_id = enumdata.countryCodeToId(country_code)
if country_id == -1:
- raise xpathlite.Error('unknown country code "%s"' % country_code)
+ raise Error('Unknown country code "{}"'.format(country_code))
country = enumdata.country_list[country_id][0]
return (language, script, country)
@@ -538,11 +538,13 @@ def likelySubtags(root, err):
try:
from_language, from_script, from_country = _parseLocale(tmp[u"from"])
to_language, to_script, to_country = _parseLocale(tmp[u"to"])
- except xpathlite.Error as e:
- if tmp[u'to'].startswith(tmp[u'from']) and str(e) == 'unknown language code "%s"' % tmp[u'from']:
- skips.append(tmp[u'to'])
+ except Error as e:
+ if (tmp['to'].startswith(tmp['from'])
+ and e.message == 'Unknown language code "{}"'.format(tmp['from'])):
+ skips.append(tmp['to'])
else:
- sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e)))
+ sys.stderr.write('skipping likelySubtag "{}" -> "{}" ({})\n'.format(
+ tmp[u"from"], tmp[u"to"], e.message))
continue
# substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
if to_country == "AnyCountry" and from_country != to_country:
@@ -612,8 +614,8 @@ def main(args, out, err):
if not l:
skips.append(file)
continue
- except xpathlite.Error as e:
- sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, str(e)))
+ except Error as e:
+ sys.stderr.write('skipping defaultContent locale "{}" ({})\n'.format(file, e.message))
continue
locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
@@ -628,8 +630,8 @@ def main(args, out, err):
if not l:
skips.append(file)
continue
- except xpathlite.Error as e:
- sys.stderr.write('skipping file "{}" ({})\n'.format(file, str(e)))
+ except Error as e:
+ sys.stderr.write('skipping file "{}" ({})\n'.format(file, e.message))
continue
locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l
diff --git a/util/locale_database/cldr2qtimezone.py b/util/locale_database/cldr2qtimezone.py
index 4c3609056d..7816abc9e1 100755
--- a/util/locale_database/cldr2qtimezone.py
+++ b/util/locale_database/cldr2qtimezone.py
@@ -54,20 +54,14 @@ The XML structure is as follows:
import os
import sys
+import re
import datetime
import tempfile
-import enumdata
-import xpathlite
-from xpathlite import DraftResolution
-import re
-import qlocalexml2cpp
-findAlias = xpathlite.findAlias
-findEntry = xpathlite.findEntry
-findEntryInFile = xpathlite._findEntryInFile
-findTagsInFile = xpathlite.findTagsInFile
-unicode2hex = qlocalexml2cpp.unicode2hex
-wrap_list = qlocalexml2cpp.wrap_list
+import enumdata
+from localetools import unicode2hex, wrap_list, Error
+from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile, \
+ _findEntryInFile as findEntryInFile
class ByteArrayData:
def __init__(self):
@@ -343,13 +337,13 @@ if mapTimezones:
else:
data['countryId'] = enumdata.countryCodeToId(data['countryCode'])
if data['countryId'] < 0:
- raise xpathlite.Error("Unknown Country Code \"%s\"" % data['countryCode'])
+ raise Error('Unknown Country Code "{}"'.format(data['countryCode']))
data['country'] = enumdata.country_list[data['countryId']][0]
windowsIdDict[data['windowsKey'], data['countryId']] = data
if badZones:
sys.stderr.write('\n\t'.join(["\nUnknown Windows ID, please add:"] + sorted(badZones))
+ "\nto the windowIdList in cldr2qtimezone.py\n\n")
- raise xpathlite.Error("Unknown Windows IDs")
+ raise Error('Unknown Windows IDs')
print "Input file parsed, now writing data"
diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py
new file mode 100644
index 0000000000..0d5c2acbbb
--- /dev/null
+++ b/util/locale_database/localetools.py
@@ -0,0 +1,65 @@
+#############################################################################
+##
+## Copyright (C) 2020 The Qt Company Ltd.
+## Contact: https://www.qt.io/licensing/
+##
+## This file is part of the test suite of the Qt Toolkit.
+##
+## $QT_BEGIN_LICENSE:GPL-EXCEPT$
+## Commercial License Usage
+## Licensees holding valid commercial Qt licenses may use this file in
+## accordance with the commercial license agreement provided with the
+## Software or, alternatively, in accordance with the terms contained in
+## a written agreement between you and The Qt Company. For licensing terms
+## and conditions see https://www.qt.io/terms-conditions. For further
+## information use the contact form at https://www.qt.io/contact-us.
+##
+## GNU General Public License Usage
+## Alternatively, this file may be used under the terms of the GNU
+## General Public License version 3 as published by the Free Software
+## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+## included in the packaging of this file. Please review the following
+## information to ensure the GNU General Public License requirements will
+## be met: https://www.gnu.org/licenses/gpl-3.0.html.
+##
+## $QT_END_LICENSE$
+##
+#############################################################################
+"""Utilities shared among the CLDR extraction tools.
+Functions:
+ unicode2hex() -- converts unicode text to UCS-2 in hex form.
+ wrap_list() -- map list to comma-separated string, 20 entries per line.
+
+Classes:
+ Error -- A shared error class.
+"""
+
+class Error (StandardError):
+ __upinit = StandardError.__init__
+ def __init__(self, msg, *args):
+ self.__upinit(msg, *args)
+ self.message = msg
+ def __str__(self):
+ return self.message
+
+def unicode2hex(s):
+ lst = []
+ for x in s:
+ v = ord(x)
+ if v > 0xFFFF:
+ # make a surrogate pair
+ # copied from qchar.h
+ high = (v >> 10) + 0xd7c0
+ low = (v % 0x400 + 0xdc00)
+ lst.append(hex(high))
+ lst.append(hex(low))
+ else:
+ lst.append(hex(v))
+ return lst
+
+def wrap_list(lst):
+ def split(lst, size):
+ while lst:
+ head, lst = lst[:size], lst[size:]
+ yield head
+ return ",\n".join(", ".join(x) for x in split(lst, 20))
diff --git a/util/locale_database/qlocalexml.py b/util/locale_database/qlocalexml.py
index 8289bd785a..0b962157d2 100644
--- a/util/locale_database/qlocalexml.py
+++ b/util/locale_database/qlocalexml.py
@@ -39,7 +39,7 @@ Support:
from __future__ import print_function
from xml.sax.saxutils import escape
-from xpathlite import Error
+from localetools import Error
# Tools used by Locale:
def camel(seq):
diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py
index eb76f02faa..c54c3953ae 100755
--- a/util/locale_database/qlocalexml2cpp.py
+++ b/util/locale_database/qlocalexml2cpp.py
@@ -37,9 +37,10 @@ import os
import sys
import tempfile
import datetime
-from enumdata import language_aliases, country_aliases, script_aliases
from qlocalexml import QLocaleXmlReader
+from enumdata import language_aliases, country_aliases, script_aliases
+from localetools import unicode2hex, wrap_list, Error
# TODO: Make calendars a command-line parameter
# map { CLDR name: Qt file name }
@@ -59,19 +60,6 @@ generated_template = """
"""
-class Error:
- def __init__(self, msg):
- self.msg = msg
- def __str__(self):
- return self.msg
-
-def wrap_list(lst):
- def split(lst, size):
- while lst:
- head, lst = lst[:size], lst[size:]
- yield head
- return ",\n".join(", ".join(x) for x in split(lst, 20))
-
def fixedScriptName(name, dupes):
# Don't .capitalize() as some names are already camel-case (see enumdata.py):
name = ''.join(word[0].upper() + word[1:] for word in name.split())
@@ -127,21 +115,6 @@ def compareLocaleKeys(key1, key2):
return key1[1] - key2[1]
-def unicode2hex(s):
- lst = []
- for x in s:
- v = ord(x)
- if v > 0xFFFF:
- # make a surrogate pair
- # copied from qchar.h
- high = (v >> 10) + 0xd7c0
- low = (v % 0x400 + 0xdc00)
- lst.append(hex(high))
- lst.append(hex(low))
- else:
- lst.append(hex(v))
- return lst
-
class StringDataToken:
def __init__(self, index, length):
if index > 0xFFFF or length > 0xFFFF:
diff --git a/util/locale_database/xpathlite.py b/util/locale_database/xpathlite.py
index 97efaaab41..3da8b24656 100644
--- a/util/locale_database/xpathlite.py
+++ b/util/locale_database/xpathlite.py
@@ -31,6 +31,8 @@ import sys
import os
import xml.dom.minidom
+from localetools import Error
+
class DraftResolution:
# See http://www.unicode.org/cldr/process.html for description
unconfirmed = 'unconfirmed'
@@ -43,12 +45,6 @@ class DraftResolution:
def toInt(self):
return DraftResolution._values[self.resolution]
-class Error:
- def __init__(self, msg):
- self.msg = msg
- def __str__(self):
- return self.msg
-
doc_cache = {}
def parseDoc(file):
if not doc_cache.has_key(file):