summaryrefslogtreecommitdiffstats
path: root/util/locale_database/qlocalexml2cpp.py
diff options
context:
space:
mode:
authorEdward Welbourne <edward.welbourne@qt.io>2020-02-25 12:30:06 +0100
committerEdward Welbourne <eddy@chaos.org.uk>2020-04-02 19:42:34 +0100
commit4d9f1a87de7a6e50e89f96836bc2f0cf6e229dda (patch)
treed23d09b398e9950303eab7033a21b4c436cc5f7f /util/locale_database/qlocalexml2cpp.py
parenta20697a3940ede60b2fd5eac0ffd1a57b132191a (diff)
Move qlocalexml2cpp.py's XML-reading to QLocaleXmlReader
This new class mirrors the existing QLocaleXmlWriter and places the two side-by-side in qlocalexml.py, rather than having the writing and reading in separate places. Made judicious use of transformed versions of mappings to save repeated iteration of a mapping's entries to do lookups on fist entries of pair-values; several (id, name, code) data-sets are sometimes indexed by id, sometimes by name. Reworked the default_map, that the complicated compareLocaleKeys() used in sorting locale keys, to map IDs instead of names; the function also needed the locale_map so that it could convert IDs to names, which we can skip by going directly with IDs. Task-number: QTBUG-81344 Change-Id: Iff6a97f7f0755b56dda70d8a6796ec074c558910 Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io>
Diffstat (limited to 'util/locale_database/qlocalexml2cpp.py')
-rwxr-xr-xutil/locale_database/qlocalexml2cpp.py345
1 files changed, 68 insertions, 277 deletions
diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py
index 7c00980bc4..eb76f02faa 100755
--- a/util/locale_database/qlocalexml2cpp.py
+++ b/util/locale_database/qlocalexml2cpp.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python2
#############################################################################
##
-## Copyright (C) 2018 The Qt Company Ltd.
+## Copyright (C) 2020 The Qt Company Ltd.
## Contact: https://www.qt.io/licensing/
##
## This file is part of the test suite of the Qt Toolkit.
@@ -37,10 +37,9 @@ import os
import sys
import tempfile
import datetime
-import xml.dom.minidom
from enumdata import language_aliases, country_aliases, script_aliases
-from qlocalexml import Locale
+from qlocalexml import QLocaleXmlReader
# TODO: Make calendars a command-line parameter
# map { CLDR name: Qt file name }
@@ -73,55 +72,6 @@ def wrap_list(lst):
yield head
return ",\n".join(", ".join(x) for x in split(lst, 20))
-def isNodeNamed(elt, name, TYPE=xml.dom.minidom.Node.ELEMENT_NODE):
- return elt.nodeType == TYPE and elt.nodeName == name
-
-def firstChildElt(parent, name):
- child = parent.firstChild
- while child:
- if isNodeNamed(child, name):
- return child
- child = child.nextSibling
-
- raise Error('No %s child found' % name)
-
-def eachEltInGroup(parent, group, key):
- try:
- element = firstChildElt(parent, group).firstChild
- except Error:
- element = None
-
- while element:
- if isNodeNamed(element, key):
- yield element
- element = element.nextSibling
-
-def eltWords(elt):
- child = elt.firstChild
- while child:
- if child.nodeType == elt.TEXT_NODE:
- yield child.nodeValue
- child = child.nextSibling
-
-def firstChildText(elt, key):
- return ' '.join(eltWords(firstChildElt(elt, key)))
-
-def loadMap(doc, category):
- return dict((int(firstChildText(element, 'id')),
- (firstChildText(element, 'name'),
- firstChildText(element, 'code')))
- for element in eachEltInGroup(doc.documentElement,
- category + 'List', category))
-
-def loadLikelySubtagsMap(doc):
- def triplet(element, keys=('language', 'script', 'country')):
- return tuple(firstChildText(element, key) for key in keys)
-
- return dict((i, {'from': triplet(firstChildElt(elt, "from")),
- 'to': triplet(firstChildElt(elt, "to"))})
- for i, elt in enumerate(eachEltInGroup(doc.documentElement,
- 'likelySubtags', 'likelySubtag')))
-
def fixedScriptName(name, dupes):
# Don't .capitalize() as some names are already camel-case (see enumdata.py):
name = ''.join(word[0].upper() + word[1:] for word in name.split())
@@ -142,106 +92,40 @@ def fixedLanguageName(name, dupes):
return name.replace(" ", "") + "Language"
return name.replace(" ", "")
-def findDupes(country_map, language_map):
- country_set = set(v[0] for a, v in country_map.iteritems())
- language_set = set(v[0] for a, v in language_map.iteritems())
- return country_set & language_set
-
-def languageNameToId(name, language_map):
- for key in language_map.keys():
- if language_map[key][0] == name:
- return key
- return -1
-
-def scriptNameToId(name, script_map):
- for key in script_map.keys():
- if script_map[key][0] == name:
- return key
- return -1
-
-def countryNameToId(name, country_map):
- for key in country_map.keys():
- if country_map[key][0] == name:
- return key
- return -1
-
-def loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map):
- result = {}
-
- for locale_elt in eachEltInGroup(doc.documentElement, "localeList", "locale"):
- locale = Locale.fromXmlData(lambda k: firstChildText(locale_elt, k), calendars.keys())
- language_id = languageNameToId(locale.language, language_map)
- if language_id == -1:
- sys.stderr.write("Cannot find a language id for '%s'\n" % locale.language)
- script_id = scriptNameToId(locale.script, script_map)
- if script_id == -1:
- sys.stderr.write("Cannot find a script id for '%s'\n" % locale.script)
- country_id = countryNameToId(locale.country, country_map)
- if country_id == -1:
- sys.stderr.write("Cannot find a country id for '%s'\n" % locale.country)
-
- if language_id != 1: # C
- if country_id == 0:
- sys.stderr.write("loadLocaleMap: No country id for '%s'\n" % locale.language)
-
- if script_id == 0:
- # find default script for a given language and country (see http://www.unicode.org/reports/tr35/#Likely_Subtags)
- for key in likely_subtags_map.keys():
- tmp = likely_subtags_map[key]
- if tmp["from"][0] == locale.language and tmp["from"][1] == "AnyScript" and tmp["from"][2] == locale.country:
- locale.script = tmp["to"][1]
- script_id = scriptNameToId(locale.script, script_map)
- break
- if script_id == 0 and country_id != 0:
- # try with no country
- for key in likely_subtags_map.keys():
- tmp = likely_subtags_map[key]
- if tmp["from"][0] == locale.language and tmp["from"][1] == "AnyScript" and tmp["from"][2] == "AnyCountry":
- locale.script = tmp["to"][1]
- script_id = scriptNameToId(locale.script, script_map)
- break
-
- result[(language_id, script_id, country_id)] = locale
-
- return result
-
def compareLocaleKeys(key1, key2):
if key1 == key2:
return 0
- if key1[0] == key2[0]:
- l1 = compareLocaleKeys.locale_map[key1]
- l2 = compareLocaleKeys.locale_map[key2]
-
- if (l1.language, l1.script) in compareLocaleKeys.default_map.keys():
- default = compareLocaleKeys.default_map[(l1.language, l1.script)]
- if l1.country == default:
- return -1
- if l2.country == default:
- return 1
-
- if key1[1] != key2[1]:
- if (l2.language, l2.script) in compareLocaleKeys.default_map.keys():
- default = compareLocaleKeys.default_map[(l2.language, l2.script)]
- if l2.country == default:
- return 1
- if l1.country == default:
- return -1
-
- if key1[1] != key2[1]:
- return key1[1] - key2[1]
- else:
+ if key1[0] != key2[0]: # First sort by language:
return key1[0] - key2[0]
- return key1[2] - key2[2]
+ defaults = compareLocaleKeys.default_map
+ # maps {(language, script): country} by ID
+ try:
+ country = defaults[key1[:2]]
+ except KeyError:
+ pass
+ else:
+ if key1[2] == country:
+ return -1
+ if key2[2] == country:
+ return 1
+ if key1[1] == key2[1]:
+ return key1[2] - key2[2]
+
+ try:
+ country = defaults[key2[:2]]
+ except KeyError:
+ pass
+ else:
+ if key2[2] == country:
+ return 1
+ if key1[2] == country:
+ return -1
+
+ return key1[1] - key2[1]
-def languageCount(language_id, locale_map):
- result = 0
- for key in locale_map.keys():
- if key[0] == language_id:
- result += 1
- return result
def unicode2hex(s):
lst = []
@@ -303,40 +187,6 @@ class StringData:
fd.write(wrap_list(self.data))
fd.write("\n};\n")
-def escapedString(s):
- result = ""
- i = 0
- while i < len(s):
- if s[i] == '"':
- result += '\\"'
- i += 1
- else:
- result += s[i]
- i += 1
- s = result
-
- line = ""
- need_escape = False
- result = ""
- for c in s:
- if ord(c) < 128 and (not need_escape or ord(c.lower()) < ord('a') or ord(c.lower()) > ord('f')):
- line += c
- need_escape = False
- else:
- line += "\\x%02x" % (ord(c))
- need_escape = True
- if len(line) > 80:
- result = result + "\n" + '"' + line + '"'
- line = ""
- line += "\\0"
- result = result + "\n" + '"' + line + '"'
- if result[0] == "\n":
- result = result[1:]
- return result
-
-def printEscapedString(s):
- print escapedString(s)
-
def currencyIsoCodeData(s):
if s:
return '{' + ",".join(str(ord(x)) for x in s) + '}'
@@ -370,83 +220,25 @@ def main():
s = qlocaledata_file.readline()
data_temp_file.write(GENERATED_BLOCK_START)
- doc = xml.dom.minidom.parse(qlocalexml)
- language_map = loadMap(doc, 'language')
- script_map = loadMap(doc, 'script')
- country_map = loadMap(doc, 'country')
- likely_subtags_map = loadLikelySubtagsMap(doc)
- default_map = {}
- for key in likely_subtags_map.keys():
- tmp = likely_subtags_map[key]
- if tmp["from"][1] == "AnyScript" and tmp["from"][2] == "AnyCountry" and tmp["to"][2] != "AnyCountry":
- default_map[(tmp["to"][0], tmp["to"][1])] = tmp["to"][2]
- locale_map = loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map)
- dupes = findDupes(language_map, country_map)
-
- cldr_version = firstChildText(doc.documentElement, "version")
- data_temp_file.write(generated_template % (datetime.date.today(), cldr_version))
+ reader = QLocaleXmlReader(qlocalexml)
+ locale_map = dict(reader.loadLocaleMap(calendars, sys.stderr.write))
+ data_temp_file.write(generated_template % (datetime.date.today(), reader.cldrVersion))
# Likely subtags map
data_temp_file.write("static const QLocaleId likely_subtags[] = {\n")
- index = 0
- for key in likely_subtags_map.keys():
- tmp = likely_subtags_map[key]
- from_language = languageNameToId(tmp["from"][0], language_map)
- from_script = scriptNameToId(tmp["from"][1], script_map)
- from_country = countryNameToId(tmp["from"][2], country_map)
- to_language = languageNameToId(tmp["to"][0], language_map)
- to_script = scriptNameToId(tmp["to"][1], script_map)
- to_country = countryNameToId(tmp["to"][2], country_map)
-
- cmnt_from = ""
- if from_language != 0:
- cmnt_from = cmnt_from + language_map[from_language][1]
- else:
- cmnt_from = cmnt_from + "und"
- if from_script != 0:
- if cmnt_from:
- cmnt_from = cmnt_from + "_"
- cmnt_from = cmnt_from + script_map[from_script][1]
- if from_country != 0:
- if cmnt_from:
- cmnt_from = cmnt_from + "_"
- cmnt_from = cmnt_from + country_map[from_country][1]
- cmnt_to = ""
- if to_language != 0:
- cmnt_to = cmnt_to + language_map[to_language][1]
- else:
- cmnt_to = cmnt_to + "und"
- if to_script != 0:
- if cmnt_to:
- cmnt_to = cmnt_to + "_"
- cmnt_to = cmnt_to + script_map[to_script][1]
- if to_country != 0:
- if cmnt_to:
- cmnt_to = cmnt_to + "_"
- cmnt_to = cmnt_to + country_map[to_country][1]
-
- data_temp_file.write(" ")
- data_temp_file.write("{ %3d, %3d, %3d }, { %3d, %3d, %3d }" % (from_language, from_script, from_country, to_language, to_script, to_country))
- index += 1
- if index != len(likely_subtags_map):
- data_temp_file.write(",")
- else:
- data_temp_file.write(" ")
- data_temp_file.write(" // %s -> %s\n" % (cmnt_from, cmnt_to))
+ for had, have, got, give, last in reader.likelyMap():
+ data_temp_file.write(' {{ {:3d}, {:3d}, {:3d} }}'.format(*have))
+ data_temp_file.write(', {{ {:3d}, {:3d}, {:3d} }}'.format(*give))
+ data_temp_file.write(' ' if last else ',')
+ data_temp_file.write(' // {} -> {}\n'.format(had, got))
data_temp_file.write("};\n")
data_temp_file.write("\n")
# Locale index
data_temp_file.write("static const quint16 locale_index[] = {\n")
- index = 0
- for key in language_map.keys():
- i = 0
- count = languageCount(key, locale_map)
- if count > 0:
- i = index
- index += count
- data_temp_file.write("%6d, // %s\n" % (i, language_map[key][0]))
+ for index, name in reader.languageIndices(tuple(k[0] for k in locale_map)):
+ data_temp_file.write('{:6d}, // {}\n'.format(index, name))
data_temp_file.write(" 0 // trailing 0\n")
data_temp_file.write("};\n\n")
@@ -524,8 +316,7 @@ def main():
+ '\n')
locale_keys = locale_map.keys()
- compareLocaleKeys.default_map = default_map
- compareLocaleKeys.locale_map = locale_map
+ compareLocaleKeys.default_map = dict(reader.defaultMap())
locale_keys.sort(compareLocaleKeys)
line_format = (' { '
@@ -616,10 +407,10 @@ def main():
# Language name list
data_temp_file.write("static const char language_name_list[] =\n")
data_temp_file.write('"Default\\0"\n')
- for key in language_map.keys():
+ for key, value in reader.languages.items():
if key == 0:
continue
- data_temp_file.write('"' + language_map[key][0] + '\\0"\n')
+ data_temp_file.write('"' + value[0] + '\\0"\n')
data_temp_file.write(";\n")
data_temp_file.write("\n")
@@ -628,10 +419,10 @@ def main():
data_temp_file.write("static const quint16 language_name_index[] = {\n")
data_temp_file.write(" 0, // AnyLanguage\n")
index = 8
- for key in language_map.keys():
+ for key, value in reader.languages.items():
if key == 0:
continue
- language = language_map[key][0]
+ language = value[0]
data_temp_file.write("%6d, // %s\n" % (index, language))
index += len(language) + 1
data_temp_file.write("};\n")
@@ -641,10 +432,10 @@ def main():
# Script name list
data_temp_file.write("static const char script_name_list[] =\n")
data_temp_file.write('"Default\\0"\n')
- for key in script_map.keys():
+ for key, value in reader.scripts.items():
if key == 0:
continue
- data_temp_file.write('"' + script_map[key][0] + '\\0"\n')
+ data_temp_file.write('"' + value[0] + '\\0"\n')
data_temp_file.write(";\n")
data_temp_file.write("\n")
@@ -653,10 +444,10 @@ def main():
data_temp_file.write("static const quint16 script_name_index[] = {\n")
data_temp_file.write(" 0, // AnyScript\n")
index = 8
- for key in script_map.keys():
+ for key, value in reader.scripts.items():
if key == 0:
continue
- script = script_map[key][0]
+ script = value[0]
data_temp_file.write("%6d, // %s\n" % (index, script))
index += len(script) + 1
data_temp_file.write("};\n")
@@ -666,10 +457,10 @@ def main():
# Country name list
data_temp_file.write("static const char country_name_list[] =\n")
data_temp_file.write('"Default\\0"\n')
- for key in country_map.keys():
+ for key, value in reader.countries.items():
if key == 0:
continue
- data_temp_file.write('"' + country_map[key][0] + '\\0"\n')
+ data_temp_file.write('"' + value[0] + '\\0"\n')
data_temp_file.write(";\n")
data_temp_file.write("\n")
@@ -678,10 +469,10 @@ def main():
data_temp_file.write("static const quint16 country_name_index[] = {\n")
data_temp_file.write(" 0, // AnyCountry\n")
index = 8
- for key in country_map.keys():
+ for key, value in reader.countries.items():
if key == 0:
continue
- country = country_map[key][0]
+ country = value[0]
data_temp_file.write("%6d, // %s\n" % (index, country))
index += len(country) + 1
data_temp_file.write("};\n")
@@ -690,31 +481,31 @@ def main():
# Language code list
data_temp_file.write("static const unsigned char language_code_list[] =\n")
- for key in language_map.keys():
- code = language_map[key][1]
+ for key, value in reader.languages.items():
+ code = value[1]
if len(code) == 2:
code += r"\0"
- data_temp_file.write('"%2s" // %s\n' % (code, language_map[key][0]))
+ data_temp_file.write('"%2s" // %s\n' % (code, value[0]))
data_temp_file.write(";\n")
data_temp_file.write("\n")
# Script code list
data_temp_file.write("static const unsigned char script_code_list[] =\n")
- for key in script_map.keys():
- code = script_map[key][1]
+ for key, value in reader.scripts.items():
+ code = value[1]
for i in range(4 - len(code)):
code += "\\0"
- data_temp_file.write('"%2s" // %s\n' % (code, script_map[key][0]))
+ data_temp_file.write('"%2s" // %s\n' % (code, value[0]))
data_temp_file.write(";\n")
# Country code list
data_temp_file.write("static const unsigned char country_code_list[] =\n")
- for key in country_map.keys():
- code = country_map[key][1]
+ for key, value in reader.countries.items():
+ code = value[1]
if len(code) == 2:
code += "\\0"
- data_temp_file.write('"%2s" // %s\n' % (code, country_map[key][0]))
+ data_temp_file.write('"%2s" // %s\n' % (code, value[0]))
data_temp_file.write(";\n")
data_temp_file.write("\n")
@@ -748,7 +539,7 @@ def main():
calendar_temp_file.write(s)
s = calendar_template_file.readline()
calendar_temp_file.write(GENERATED_BLOCK_START)
- calendar_temp_file.write(generated_template % (datetime.date.today(), cldr_version))
+ calendar_temp_file.write(generated_template % (datetime.date.today(), reader.cldrVersion))
calendar_temp_file.write("static const QCalendarLocale locale_data[] = {\n")
calendar_temp_file.write(' // '
# IDs, width 7 (6 + comma)
@@ -805,8 +596,8 @@ def main():
# Language enum
qlocaleh_temp_file.write(" enum Language {\n")
language = None
- for key, value in language_map.items():
- language = fixedLanguageName(value[0], dupes)
+ for key, value in reader.languages.items():
+ language = fixedLanguageName(value[0], reader.dupes)
qlocaleh_temp_file.write(" " + language + " = " + str(key) + ",\n")
qlocaleh_temp_file.write("\n " +
@@ -822,8 +613,8 @@ def main():
# Script enum
qlocaleh_temp_file.write(" enum Script {\n")
script = None
- for key, value in script_map.items():
- script = fixedScriptName(value[0], dupes)
+ for key, value in reader.scripts.items():
+ script = fixedScriptName(value[0], reader.dupes)
qlocaleh_temp_file.write(" " + script + " = " + str(key) + ",\n")
qlocaleh_temp_file.write("\n " +
",\n ".join('%s = %s' % pair
@@ -836,8 +627,8 @@ def main():
# Country enum
qlocaleh_temp_file.write(" enum Country {\n")
country = None
- for key, value in country_map.items():
- country = fixedCountryName(value[0], dupes)
+ for key, value in reader.countries.items():
+ country = fixedCountryName(value[0], reader.dupes)
qlocaleh_temp_file.write(" " + country + " = " + str(key) + ",\n")
qlocaleh_temp_file.write("\n " +
",\n ".join('%s = %s' % pair
@@ -872,7 +663,7 @@ def main():
DOCSTRING = " QLocale's data is based on Common Locale Data Repository "
while s:
if DOCSTRING in s:
- qlocaleqdoc_temp_file.write(DOCSTRING + "v" + cldr_version + ".\n")
+ qlocaleqdoc_temp_file.write(DOCSTRING + "v" + reader.cldrVersion + ".\n")
else:
qlocaleqdoc_temp_file.write(s)
s = qlocaleqdoc_file.readline()