diff options
author | Edward Welbourne <edward.welbourne@qt.io> | 2019-05-16 14:31:54 +0200 |
---|---|---|
committer | Edward Welbourne <edward.welbourne@qt.io> | 2019-05-20 20:42:10 +0200 |
commit | 248b6756da0d31c58672c0e356c3ec16e9088234 (patch) | |
tree | 4869c1ca3d1415a9b259f4afbc61a702fee6812b /util/locale_database | |
parent | cf909f0ef609c4581ebbe2f81c7ae0c5e43d653f (diff) |
Rename util/locale_database/ to include the e that was missing
It was misnamed local_database, quite missing the point of its name.
Change-Id: I73a4fdf24f53daac12304de1f443636d89afacb2
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
Diffstat (limited to 'util/locale_database')
-rw-r--r-- | util/locale_database/README | 5 | ||||
-rwxr-xr-x | util/locale_database/cldr2qlocalexml.py | 663 | ||||
-rwxr-xr-x | util/locale_database/cldr2qtimezone.py | 431 | ||||
-rwxr-xr-x | util/locale_database/dateconverter.py | 107 | ||||
-rw-r--r-- | util/locale_database/enumdata.py | 878 | ||||
-rw-r--r-- | util/locale_database/formattags.txt | 23 | ||||
-rw-r--r-- | util/locale_database/localexml.py | 263 | ||||
-rwxr-xr-x | util/locale_database/qlocalexml2cpp.py | 834 | ||||
-rw-r--r-- | util/locale_database/testlocales/localemodel.cpp | 449 | ||||
-rw-r--r-- | util/locale_database/testlocales/localemodel.h | 56 | ||||
-rw-r--r-- | util/locale_database/testlocales/localewidget.cpp | 76 | ||||
-rw-r--r-- | util/locale_database/testlocales/localewidget.h | 46 | ||||
-rw-r--r-- | util/locale_database/testlocales/main.cpp | 38 | ||||
-rw-r--r-- | util/locale_database/testlocales/testlocales.pro | 4 | ||||
-rw-r--r-- | util/locale_database/xpathlite.py | 264 |
15 files changed, 4137 insertions, 0 deletions
diff --git a/util/locale_database/README b/util/locale_database/README new file mode 100644 index 0000000000..8654968d66 --- /dev/null +++ b/util/locale_database/README @@ -0,0 +1,5 @@ +locale_database is used to generate qlocale data from CLDR. + +CLDR is the Common Locale Data Repository, a database for localized +data (like date formats, country names etc). It is provided by the +Unicode consortium. diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py new file mode 100755 index 0000000000..4ce0a6e3b1 --- /dev/null +++ b/util/locale_database/cldr2qlocalexml.py @@ -0,0 +1,663 @@ +#!/usr/bin/env python2 +############################################################################# +## +## Copyright (C) 2017 The Qt Company Ltd. +## Contact: https://www.qt.io/licensing/ +## +## This file is part of the test suite of the Qt Toolkit. +## +## $QT_BEGIN_LICENSE:GPL-EXCEPT$ +## Commercial License Usage +## Licensees holding valid commercial Qt licenses may use this file in +## accordance with the commercial license agreement provided with the +## Software or, alternatively, in accordance with the terms contained in +## a written agreement between you and The Qt Company. For licensing terms +## and conditions see https://www.qt.io/terms-conditions. For further +## information use the contact form at https://www.qt.io/contact-us. +## +## GNU General Public License Usage +## Alternatively, this file may be used under the terms of the GNU +## General Public License version 3 as published by the Free Software +## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +## included in the packaging of this file. Please review the following +## information to ensure the GNU General Public License requirements will +## be met: https://www.gnu.org/licenses/gpl-3.0.html. +## +## $QT_END_LICENSE$ +## +############################################################################# +"""Convert CLDR data to qLocaleXML + +The CLDR data can be downloaded from CLDR_, which has a sub-directory +for each version; you need the ``core.zip`` file for your version of +choice (typically the latest). This script has had updates to cope up +to v35; for later versions, we may need adaptations. Unpack the +downloaded ``core.zip`` and check it has a common/main/ sub-directory: +pass the path of that sub-directory to this script as its single +command-line argument. Save its standard output (but not error) to a +file for later processing by ``./qlocalexml2cpp.py`` + +When you update the CLDR data, be sure to also update +src/corelib/tools/qt_attribution.json's entry for unicode-cldr. Check +this script's output for unknown language, country or script messages; +if any can be resolved, use their entry in common/main/en.xml to +append new entries to enumdata.py's lists and update documentation in +src/corelib/tools/qlocale.qdoc, adding the new entries in alphabetic +order. + +.. _CLDR: ftp://unicode.org/Public/cldr/ +""" + +import os +import sys +import re +import textwrap + +import enumdata +import xpathlite +from xpathlite import DraftResolution, findAlias, findEntry, findTagsInFile +from dateconverter import convert_date +from localexml import Locale + +findEntryInFile = xpathlite._findEntryInFile +def wrappedwarn(prefix, tokens): + return sys.stderr.write( + '\n'.join(textwrap.wrap(prefix + ', '.join(tokens), + subsequent_indent=' ', width=80)) + '\n') + +def parse_number_format(patterns, data): + # this is a very limited parsing of the number format for currency only. + def skip_repeating_pattern(x): + p = x.replace('0', '#').replace(',', '').replace('.', '') + seen = False + result = '' + for c in p: + if c == '#': + if seen: + continue + seen = True + else: + seen = False + result = result + c + return result + patterns = patterns.split(';') + result = [] + for pattern in patterns: + pattern = skip_repeating_pattern(pattern) + pattern = pattern.replace('#', "%1") + # according to http://www.unicode.org/reports/tr35/#Number_Format_Patterns + # there can be doubled or trippled currency sign, however none of the + # locales use that. + pattern = pattern.replace(u'\xa4', "%2") + pattern = pattern.replace("''", "###").replace("'", '').replace("###", "'") + pattern = pattern.replace('-', data['minus']) + pattern = pattern.replace('+', data['plus']) + result.append(pattern) + return result + +def parse_list_pattern_part_format(pattern): + # This is a very limited parsing of the format for list pattern part only. + return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3") + +def unit_quantifiers(find, path, stem, suffix, known, + # Stop at exa/exbi: 16 exbi = 2^{64} < zetta = + # 1000^7 < zebi = 2^{70}, the next quantifiers up: + si_quantifiers = ('kilo', 'mega', 'giga', 'tera', 'peta', 'exa')): + """Work out the unit quantifiers. + + Unfortunately, the CLDR data only go up to terabytes and we want + all the way to exabytes; but we can recognize the SI quantifiers + as prefixes, strip and identify the tail as the localized + translation for 'B' (e.g. French has 'octet' for 'byte' and uses + ko, Mo, Go, To from which we can extrapolate Po, Eo). + + Should be called first for the SI quantifiers, with suffix = 'B', + then for the IEC ones, with suffix = 'iB'; the list known + (initially empty before first call) is used to let the second call + know what the first learned about the localized unit. + """ + if suffix == 'B': # first call, known = [] + tail = suffix + for q in si_quantifiers: + it = find(path, stem % q) + # kB for kilobyte, in contrast with KiB for IEC: + q = q[0] if q == 'kilo' else q[0].upper() + if not it: + it = q + tail + elif it.startswith(q): + rest = it[1:] + tail = rest if all(rest == k for k in known) else suffix + known.append(rest) + yield it + else: # second call, re-using first's known + assert suffix == 'iB' + if known: + byte = known.pop() + if all(byte == k for k in known): + suffix = 'i' + byte + for q in si_quantifiers: + yield find(path, stem % q[:2], + # Those don't (yet, v31) exist in CLDR, so we always fall back to: + q[0].upper() + suffix) + +def generateLocaleInfo(path): + if not path.endswith(".xml"): + return {} + + # skip legacy/compatibility ones + alias = findAlias(path) + if alias: + raise xpathlite.Error('alias to "%s"' % alias) + + def code(tag): + return findEntryInFile(path, 'identity/' + tag, attribute="type")[0] + + return _generateLocaleInfo(path, code('language'), code('script'), + code('territory'), code('variant')) + +def getNumberSystems(cache={}): + """Cached look-up of number system information. + + Pass no arguments. Returns a mapping from number system names to, + for each system, a mapping with keys u'digits', u'type' and + u'id'\n""" + if not cache: + for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental', + 'numberingSystems.xml'), + 'numberingSystems'): + # ns has form: [u'numberingSystem', [(u'digits', u'0123456789'), (u'type', u'numeric'), (u'id', u'latn')]] + entry = dict(ns[1]) + name = entry[u'id'] + if u'digits' in entry and ord(entry[u'digits'][0]) > 0xffff: + # FIXME, QTBUG-69324: make this redundant: + # omit number system if zero doesn't fit in single-char16 UTF-16 :-( + sys.stderr.write('skipping number system "%s" [can\'t represent its zero, U+%X]\n' + % (name, ord(entry[u'digits'][0]))) + else: + cache[name] = entry + return cache + +def _generateLocaleInfo(path, language_code, script_code, country_code, variant_code=""): + if not path.endswith(".xml"): + return {} + + if language_code == 'root': + # just skip it + return {} + + # we do not support variants + # ### actually there is only one locale with variant: en_US_POSIX + # does anybody care about it at all? + if variant_code: + raise xpathlite.Error('we do not support variants ("%s")' % variant_code) + + language_id = enumdata.languageCodeToId(language_code) + if language_id <= 0: + raise xpathlite.Error('unknown language code "%s"' % language_code) + + script_id = enumdata.scriptCodeToId(script_code) + if script_id == -1: + raise xpathlite.Error('unknown script code "%s"' % script_code) + + # we should handle fully qualified names with the territory + if not country_code: + return {} + country_id = enumdata.countryCodeToId(country_code) + if country_id <= 0: + raise xpathlite.Error('unknown country code "%s"' % country_code) + + # So we say we accept only those values that have "contributed" or + # "approved" resolution. see http://www.unicode.org/cldr/process.html + # But we only respect the resolution for new datas for backward + # compatibility. + draft = DraftResolution.contributed + + result = dict( + language=enumdata.language_list[language_id][0], + language_code=language_code, language_id=language_id, + script=enumdata.script_list[script_id][0], + script_code=script_code, script_id=script_id, + country=enumdata.country_list[country_id][0], + country_code=country_code, country_id=country_id, + variant_code=variant_code) + + (dir_name, file_name) = os.path.split(path) + def from_supplement(tag, + path=os.path.join(dir_name, '..', 'supplemental', + 'supplementalData.xml')): + return findTagsInFile(path, tag) + currencies = from_supplement('currencyData/region[iso3166=%s]' % country_code) + result['currencyIsoCode'] = '' + result['currencyDigits'] = 2 + result['currencyRounding'] = 1 + if currencies: + for e in currencies: + if e[0] == 'currency': + t = [x[1] == 'false' for x in e[1] if x[0] == 'tender'] + if t and t[0]: + pass + elif not any(x[0] == 'to' for x in e[1]): + result['currencyIsoCode'] = (x[1] for x in e[1] if x[0] == 'iso4217').next() + break + if result['currencyIsoCode']: + t = from_supplement("currencyData/fractions/info[iso4217=%s]" + % result['currencyIsoCode']) + if t and t[0][0] == 'info': + result['currencyDigits'] = (int(x[1]) for x in t[0][1] if x[0] == 'digits').next() + result['currencyRounding'] = (int(x[1]) for x in t[0][1] if x[0] == 'rounding').next() + numbering_system = None + try: + numbering_system = findEntry(path, "numbers/defaultNumberingSystem") + except xpathlite.Error: + pass + def findEntryDef(path, xpath, value=''): + try: + return findEntry(path, xpath) + except xpathlite.Error: + return value + def get_number_in_system(path, xpath, numbering_system): + if numbering_system: + try: + return findEntry(path, xpath + "[numberSystem=" + numbering_system + "]") + except xpathlite.Error: + # in CLDR 1.9 number system was refactored for numbers (but not for currency) + # so if previous findEntry doesn't work we should try this: + try: + return findEntry(path, xpath.replace("/symbols/", "/symbols[numberSystem=" + numbering_system + "]/")) + except xpathlite.Error: + # fallback to default + pass + return findEntry(path, xpath) + + result['decimal'] = get_number_in_system(path, "numbers/symbols/decimal", numbering_system) + result['group'] = get_number_in_system(path, "numbers/symbols/group", numbering_system) + result['list'] = get_number_in_system(path, "numbers/symbols/list", numbering_system) + result['percent'] = get_number_in_system(path, "numbers/symbols/percentSign", numbering_system) + try: + result['zero'] = getNumberSystems()[numbering_system][u"digits"][0] + except Exception as e: + sys.stderr.write("Native zero detection problem: %s\n" % repr(e)) + result['zero'] = get_number_in_system(path, "numbers/symbols/nativeZeroDigit", numbering_system) + result['minus'] = get_number_in_system(path, "numbers/symbols/minusSign", numbering_system) + result['plus'] = get_number_in_system(path, "numbers/symbols/plusSign", numbering_system) + result['exp'] = get_number_in_system(path, "numbers/symbols/exponential", numbering_system).lower() + result['quotationStart'] = findEntry(path, "delimiters/quotationStart") + result['quotationEnd'] = findEntry(path, "delimiters/quotationEnd") + result['alternateQuotationStart'] = findEntry(path, "delimiters/alternateQuotationStart") + result['alternateQuotationEnd'] = findEntry(path, "delimiters/alternateQuotationEnd") + result['listPatternPartStart'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[start]")) + result['listPatternPartMiddle'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[middle]")) + result['listPatternPartEnd'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[end]")) + result['listPatternPartTwo'] = parse_list_pattern_part_format(findEntry(path, "listPatterns/listPattern/listPatternPart[2]")) + result['am'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[am]", draft) + result['pm'] = findEntry(path, "dates/calendars/calendar[gregorian]/dayPeriods/dayPeriodContext[format]/dayPeriodWidth[wide]/dayPeriod[pm]", draft) + result['longDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[full]/dateFormat/pattern")) + result['shortDateFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/dateFormats/dateFormatLength[short]/dateFormat/pattern")) + result['longTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[full]/timeFormat/pattern")) + result['shortTimeFormat'] = convert_date(findEntry(path, "dates/calendars/calendar[gregorian]/timeFormats/timeFormatLength[short]/timeFormat/pattern")) + + endonym = None + if country_code and script_code: + endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s_%s]" % (language_code, script_code, country_code)) + if not endonym and script_code: + endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, script_code)) + if not endonym and country_code: + endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s_%s]" % (language_code, country_code)) + if not endonym: + endonym = findEntryDef(path, "localeDisplayNames/languages/language[type=%s]" % (language_code)) + result['language_endonym'] = endonym + result['country_endonym'] = findEntryDef(path, "localeDisplayNames/territories/territory[type=%s]" % (country_code)) + + currency_format = get_number_in_system(path, "numbers/currencyFormats/currencyFormatLength/currencyFormat/pattern", numbering_system) + currency_format = parse_number_format(currency_format, result) + result['currencyFormat'] = currency_format[0] + result['currencyNegativeFormat'] = '' + if len(currency_format) > 1: + result['currencyNegativeFormat'] = currency_format[1] + + result['currencySymbol'] = '' + result['currencyDisplayName'] = '' + if result['currencyIsoCode']: + result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode']) + result['currencyDisplayName'] = ';'.join( + findEntryDef(path, 'numbers/currencies/currency[' + result['currencyIsoCode'] + + ']/displayName' + tail) + for tail in ['',] + [ + '[count=%s]' % x for x in ('zero', 'one', 'two', 'few', 'many', 'other') + ]) + ';' + + def findUnitDef(path, stem, fallback=''): + # The displayName for a quantified unit in en.xml is kByte + # instead of kB (etc.), so prefer any unitPattern provided: + for count in ('many', 'few', 'two', 'other', 'zero', 'one'): + try: + ans = findEntry(path, stem + 'unitPattern[count=%s]' % count) + except xpathlite.Error: + continue + + # TODO: epxloit count-handling, instead of discarding placeholders + if ans.startswith('{0}'): + ans = ans[3:].lstrip() + if ans: + return ans + + return findEntryDef(path, stem + 'displayName', fallback) + + # First without quantifier, then quantified each way: + result['byte_unit'] = findEntryDef( + path, 'units/unitLength[type=long]/unit[type=digital-byte]/displayName', + 'bytes') + stem = 'units/unitLength[type=short]/unit[type=digital-%sbyte]/' + known = [] # cases where we *do* have a given version: + result['byte_si_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem, 'B', known)) + # IEC 60027-2 + # http://physics.nist.gov/cuu/Units/binary.html + result['byte_iec_quantified'] = ';'.join(unit_quantifiers(findUnitDef, path, stem % '%sbi', 'iB', known)) + + # Used for month and day data: + namings = ( + ('standaloneLong', 'stand-alone', 'wide'), + ('standaloneShort', 'stand-alone', 'abbreviated'), + ('standaloneNarrow', 'stand-alone', 'narrow'), + ('long', 'format', 'wide'), + ('short', 'format', 'abbreviated'), + ('narrow', 'format', 'narrow'), + ) + + # Month data: + for cal in ('gregorian',): # We shall want to add to this + stem = 'dates/calendars/calendar[' + cal + ']/months/' + for (key, mode, size) in namings: + prop = 'monthContext[' + mode + ']/monthWidth[' + size + ']/' + result[key + 'Months'] = ';'.join( + findEntry(path, stem + prop + "month[%d]" % i) + for i in range(1, 13)) + ';' + + # Day data (for Gregorian, at least): + stem = 'dates/calendars/calendar[gregorian]/days/' + days = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat') + for (key, mode, size) in namings: + prop = 'dayContext[' + mode + ']/dayWidth[' + size + ']/day' + result[key + 'Days'] = ';'.join( + findEntry(path, stem + prop + '[' + day + ']') + for day in days) + ';' + + return Locale(result) + +def addEscapes(s): + result = '' + for c in s: + n = ord(c) + if n < 128: + result += c + else: + result += "\\x" + result += "%02x" % (n) + return result + +def unicodeStr(s): + utf8 = s.encode('utf-8') + return "<size>" + str(len(utf8)) + "</size><data>" + addEscapes(utf8) + "</data>" + +def usage(): + print "Usage: cldr2qlocalexml.py <path-to-cldr-main>" + sys.exit() + +def integrateWeekData(filePath): + if not filePath.endswith(".xml"): + return {} + + def lookup(key): + return findEntryInFile(filePath, key, attribute='territories')[0].split() + days = ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun') + + firstDayByCountryCode = {} + for day in days: + for countryCode in lookup('weekData/firstDay[day=%s]' % day): + firstDayByCountryCode[countryCode] = day + + weekendStartByCountryCode = {} + for day in days: + for countryCode in lookup('weekData/weekendStart[day=%s]' % day): + weekendStartByCountryCode[countryCode] = day + + weekendEndByCountryCode = {} + for day in days: + for countryCode in lookup('weekData/weekendEnd[day=%s]' % day): + weekendEndByCountryCode[countryCode] = day + + for (key, locale) in locale_database.iteritems(): + countryCode = locale.country_code + if countryCode in firstDayByCountryCode: + locale.firstDayOfWeek = firstDayByCountryCode[countryCode] + else: + locale.firstDayOfWeek = firstDayByCountryCode["001"] + + if countryCode in weekendStartByCountryCode: + locale.weekendStart = weekendStartByCountryCode[countryCode] + else: + locale.weekendStart = weekendStartByCountryCode["001"] + + if countryCode in weekendEndByCountryCode: + locale.weekendEnd = weekendEndByCountryCode[countryCode] + else: + locale.weekendEnd = weekendEndByCountryCode["001"] + +def splitLocale(name): + """Split name into (language, script, territory) triple as generator. + + Ignores any trailing fields (with a warning), leaves script (a capitalised + four-letter token) or territory (either a number or an all-uppercase token) + empty if unspecified, returns a single-entry generator if name is a single + tag (i.e. contains no underscores). Always yields 1 or 3 values, never 2.""" + tags = iter(name.split('_')) + yield tags.next() # Language + tag = tags.next() + + # Script is always four letters, always capitalised: + if len(tag) == 4 and tag[0].isupper() and tag[1:].islower(): + yield tag + try: + tag = tags.next() + except StopIteration: + tag = '' + else: + yield '' + + # Territory is upper-case or numeric: + if tag and tag.isupper() or tag.isdigit(): + yield tag + tag = '' + else: + yield '' + + # If nothing is left, StopIteration will avoid the warning: + tag = (tag if tag else tags.next(),) + sys.stderr.write('Ignoring unparsed cruft %s in %s\n' % ('_'.join(tag + tuple(tags)), name)) + +if len(sys.argv) != 2: + usage() + +cldr_dir = sys.argv[1] + +if not os.path.isdir(cldr_dir): + usage() + +cldr_files = os.listdir(cldr_dir) + +locale_database = {} + +# see http://www.unicode.org/reports/tr35/tr35-info.html#Default_Content +defaultContent_locales = [] +for ns in findTagsInFile(os.path.join(cldr_dir, '..', 'supplemental', + 'supplementalMetadata.xml'), + 'metadata/defaultContent'): + for data in ns[1:][0]: + if data[0] == u"locales": + defaultContent_locales += data[1].split() + +skips = [] +for file in defaultContent_locales: + try: + language_code, script_code, country_code = splitLocale(file) + except ValueError: + sys.stderr.write('skipping defaultContent locale "' + file + '" [neither two nor three tags]\n') + continue + + if not (script_code or country_code): + sys.stderr.write('skipping defaultContent locale "' + file + '" [second tag is neither script nor territory]\n') + continue + + try: + l = _generateLocaleInfo(cldr_dir + "/" + file + ".xml", language_code, script_code, country_code) + if not l: + skips.append(file) + continue + except xpathlite.Error as e: + sys.stderr.write('skipping defaultContent locale "%s" (%s)\n' % (file, str(e))) + continue + + locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l + +if skips: + wrappedwarn('skipping defaultContent locales [no locale info generated]: ', skips) + skips = [] + +for file in cldr_files: + try: + l = generateLocaleInfo(cldr_dir + "/" + file) + if not l: + skips.append(file) + continue + except xpathlite.Error as e: + sys.stderr.write('skipping file "%s" (%s)\n' % (file, str(e))) + continue + + locale_database[(l.language_id, l.script_id, l.country_id, l.variant_code)] = l + +if skips: + wrappedwarn('skipping files [no locale info generated]: ', skips) + +integrateWeekData(cldr_dir+"/../supplemental/supplementalData.xml") +locale_keys = locale_database.keys() +locale_keys.sort() + +cldr_version = 'unknown' +ldml = open(cldr_dir+"/../dtd/ldml.dtd", "r") +for line in ldml: + if 'version cldrVersion CDATA #FIXED' in line: + cldr_version = line.split('"')[1] + +print "<localeDatabase>" +print " <version>" + cldr_version + "</version>" +print " <languageList>" +for id in enumdata.language_list: + l = enumdata.language_list[id] + print " <language>" + print " <name>" + l[0] + "</name>" + print " <id>" + str(id) + "</id>" + print " <code>" + l[1] + "</code>" + print " </language>" +print " </languageList>" + +print " <scriptList>" +for id in enumdata.script_list: + l = enumdata.script_list[id] + print " <script>" + print " <name>" + l[0] + "</name>" + print " <id>" + str(id) + "</id>" + print " <code>" + l[1] + "</code>" + print " </script>" +print " </scriptList>" + +print " <countryList>" +for id in enumdata.country_list: + l = enumdata.country_list[id] + print " <country>" + print " <name>" + l[0] + "</name>" + print " <id>" + str(id) + "</id>" + print " <code>" + l[1] + "</code>" + print " </country>" +print " </countryList>" + +def _parseLocale(l): + language = "AnyLanguage" + script = "AnyScript" + country = "AnyCountry" + + if l == "und": + raise xpathlite.Error("we are treating unknown locale like C") + + parsed = splitLocale(l) + language_code = parsed.next() + script_code = country_code = '' + try: + script_code, country_code = parsed + except ValueError: + pass + + if language_code != "und": + language_id = enumdata.languageCodeToId(language_code) + if language_id == -1: + raise xpathlite.Error('unknown language code "%s"' % language_code) + language = enumdata.language_list[language_id][0] + + if script_code: + script_id = enumdata.scriptCodeToId(script_code) + if script_id == -1: + raise xpathlite.Error('unknown script code "%s"' % script_code) + script = enumdata.script_list[script_id][0] + + if country_code: + country_id = enumdata.countryCodeToId(country_code) + if country_id == -1: + raise xpathlite.Error('unknown country code "%s"' % country_code) + country = enumdata.country_list[country_id][0] + + return (language, script, country) + +skips = [] +print " <likelySubtags>" +for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likelySubtags"): + tmp = {} + for data in ns[1:][0]: # ns looks like this: [u'likelySubtag', [(u'from', u'aa'), (u'to', u'aa_Latn_ET')]] + tmp[data[0]] = data[1] + + try: + from_language, from_script, from_country = _parseLocale(tmp[u"from"]) + to_language, to_script, to_country = _parseLocale(tmp[u"to"]) + except xpathlite.Error as e: + if tmp[u'to'].startswith(tmp[u'from']) and str(e) == 'unknown language code "%s"' % tmp[u'from']: + skips.append(tmp[u'to']) + else: + sys.stderr.write('skipping likelySubtag "%s" -> "%s" (%s)\n' % (tmp[u"from"], tmp[u"to"], str(e))) + continue + # substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags + if to_country == "AnyCountry" and from_country != to_country: + to_country = from_country + if to_script == "AnyScript" and from_script != to_script: + to_script = from_script + + print " <likelySubtag>" + print " <from>" + print " <language>" + from_language + "</language>" + print " <script>" + from_script + "</script>" + print " <country>" + from_country + "</country>" + print " </from>" + print " <to>" + print " <language>" + to_language + "</language>" + print " <script>" + to_script + "</script>" + print " <country>" + to_country + "</country>" + print " </to>" + print " </likelySubtag>" +print " </likelySubtags>" +if skips: + wrappedwarn('skipping likelySubtags (for unknown language codes): ', skips) +print " <localeList>" + +Locale.C().toXml() +for key in locale_keys: + locale_database[key].toXml() + +print " </localeList>" +print "</localeDatabase>" diff --git a/util/locale_database/cldr2qtimezone.py b/util/locale_database/cldr2qtimezone.py new file mode 100755 index 0000000000..256839317c --- /dev/null +++ b/util/locale_database/cldr2qtimezone.py @@ -0,0 +1,431 @@ +#!/usr/bin/env python2 +############################################################################# +## +## Copyright (C) 2016 The Qt Company Ltd. +## Contact: https://www.qt.io/licensing/ +## +## This file is part of the test suite of the Qt Toolkit. +## +## $QT_BEGIN_LICENSE:GPL-EXCEPT$ +## Commercial License Usage +## Licensees holding valid commercial Qt licenses may use this file in +## accordance with the commercial license agreement provided with the +## Software or, alternatively, in accordance with the terms contained in +## a written agreement between you and The Qt Company. For licensing terms +## and conditions see https://www.qt.io/terms-conditions. For further +## information use the contact form at https://www.qt.io/contact-us. +## +## GNU General Public License Usage +## Alternatively, this file may be used under the terms of the GNU +## General Public License version 3 as published by the Free Software +## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +## included in the packaging of this file. Please review the following +## information to ensure the GNU General Public License requirements will +## be met: https://www.gnu.org/licenses/gpl-3.0.html. +## +## $QT_END_LICENSE$ +## +############################################################################# +"""Parse CLDR data for QTimeZone use with MS-Windows + +Script to parse the CLDR supplemental/windowsZones.xml file and encode +for use in QTimeZone. See ``./cldr2qlocalexml.py`` for where to get +the CLDR data. Pass its common/ directory as first parameter to this +script and the qtbase root directory as second parameter. It shall +update qtbase's src/corelib/tools/qtimezoneprivate_data_p.h ready for +use. + +The XML structure is as follows: + + <supplementalData> + <version number="$Revision: 7825 $"/> + <generation date="$Date: 2012-10-10 14:45:31 -0700 (Wed, 10 Oct 2012) $"/> + <windowsZones> + <mapTimezones otherVersion="7dc0101" typeVersion="2012f"> + <!-- (UTC-08:00) Pacific Time (US & Canada) --> + <mapZone other="Pacific Standard Time" territory="001" type="America/Los_Angeles"/> + <mapZone other="Pacific Standard Time" territory="CA" type="America/Vancouver America/Dawson America/Whitehorse"/> + <mapZone other="Pacific Standard Time" territory="MX" type="America/Tijuana"/> + <mapZone other="Pacific Standard Time" territory="US" type="America/Los_Angeles"/> + <mapZone other="Pacific Standard Time" territory="ZZ" type="PST8PDT"/> + </mapTimezones> + </windowsZones> + </supplementalData> +""" + +import os +import sys +import datetime +import tempfile +import enumdata +import xpathlite +from xpathlite import DraftResolution +import re +import qlocalexml2cpp + +findAlias = xpathlite.findAlias +findEntry = xpathlite.findEntry +findEntryInFile = xpathlite._findEntryInFile +findTagsInFile = xpathlite.findTagsInFile +unicode2hex = qlocalexml2cpp.unicode2hex +wrap_list = qlocalexml2cpp.wrap_list + +class ByteArrayData: + def __init__(self): + self.data = [] + self.hash = {} + def append(self, s): + s = s + '\0' + if s in self.hash: + return self.hash[s] + + lst = unicode2hex(s) + index = len(self.data) + if index > 65535: + print "\n\n\n#error Data index is too big!" + sys.stderr.write ("\n\n\nERROR: index exceeds the uint16 range! index = %d\n" % index) + sys.exit(1) + self.hash[s] = index + self.data += lst + return index + +# List of currently known Windows IDs. If script fails on missing ID plase add it here +# Not public so may be safely changed. +# Windows Key : [ Windows Id, Offset Seconds ] +windowsIdList = { + 1 : [ u'Afghanistan Standard Time', 16200 ], + 2 : [ u'Alaskan Standard Time', -32400 ], + 3 : [ u'Arab Standard Time', 10800 ], + 4 : [ u'Arabian Standard Time', 14400 ], + 5 : [ u'Arabic Standard Time', 10800 ], + 6 : [ u'Argentina Standard Time', -10800 ], + 7 : [ u'Atlantic Standard Time', -14400 ], + 8 : [ u'AUS Central Standard Time', 34200 ], + 9 : [ u'AUS Eastern Standard Time', 36000 ], + 10 : [ u'Azerbaijan Standard Time', 14400 ], + 11 : [ u'Azores Standard Time', -3600 ], + 12 : [ u'Bahia Standard Time', -10800 ], + 13 : [ u'Bangladesh Standard Time', 21600 ], + 14 : [ u'Belarus Standard Time', 10800 ], + 15 : [ u'Canada Central Standard Time', -21600 ], + 16 : [ u'Cape Verde Standard Time', -3600 ], + 17 : [ u'Caucasus Standard Time', 14400 ], + 18 : [ u'Cen. Australia Standard Time', 34200 ], + 19 : [ u'Central America Standard Time', -21600 ], + 20 : [ u'Central Asia Standard Time', 21600 ], + 21 : [ u'Central Brazilian Standard Time', -14400 ], + 22 : [ u'Central Europe Standard Time', 3600 ], + 23 : [ u'Central European Standard Time', 3600 ], + 24 : [ u'Central Pacific Standard Time', 39600 ], + 25 : [ u'Central Standard Time (Mexico)', -21600 ], + 26 : [ u'Central Standard Time', -21600 ], + 27 : [ u'China Standard Time', 28800 ], + 28 : [ u'Dateline Standard Time', -43200 ], + 29 : [ u'E. Africa Standard Time', 10800 ], + 30 : [ u'E. Australia Standard Time', 36000 ], + 31 : [ u'E. South America Standard Time', -10800 ], + 32 : [ u'Eastern Standard Time', -18000 ], + 33 : [ u'Eastern Standard Time (Mexico)', -18000 ], + 34 : [ u'Egypt Standard Time', 7200 ], + 35 : [ u'Ekaterinburg Standard Time', 18000 ], + 36 : [ u'Fiji Standard Time', 43200 ], + 37 : [ u'FLE Standard Time', 7200 ], + 38 : [ u'Georgian Standard Time', 14400 ], + 39 : [ u'GMT Standard Time', 0 ], + 40 : [ u'Greenland Standard Time', -10800 ], + 41 : [ u'Greenwich Standard Time', 0 ], + 42 : [ u'GTB Standard Time', 7200 ], + 43 : [ u'Hawaiian Standard Time', -36000 ], + 44 : [ u'India Standard Time', 19800 ], + 45 : [ u'Iran Standard Time', 12600 ], + 46 : [ u'Israel Standard Time', 7200 ], + 47 : [ u'Jordan Standard Time', 7200 ], + 48 : [ u'Kaliningrad Standard Time', 7200 ], + 49 : [ u'Korea Standard Time', 32400 ], + 50 : [ u'Libya Standard Time', 7200 ], + 51 : [ u'Line Islands Standard Time', 50400 ], + 52 : [ u'Magadan Standard Time', 36000 ], + 53 : [ u'Mauritius Standard Time', 14400 ], + 54 : [ u'Middle East Standard Time', 7200 ], + 55 : [ u'Montevideo Standard Time', -10800 ], + 56 : [ u'Morocco Standard Time', 0 ], + 57 : [ u'Mountain Standard Time (Mexico)', -25200 ], + 58 : [ u'Mountain Standard Time', -25200 ], + 59 : [ u'Myanmar Standard Time', 23400 ], + 60 : [ u'N. Central Asia Standard Time', 21600 ], + 61 : [ u'Namibia Standard Time', 3600 ], + 62 : [ u'Nepal Standard Time', 20700 ], + 63 : [ u'New Zealand Standard Time', 43200 ], + 64 : [ u'Newfoundland Standard Time', -12600 ], + 65 : [ u'North Asia East Standard Time', 28800 ], + 66 : [ u'North Asia Standard Time', 25200 ], + 67 : [ u'Pacific SA Standard Time', -10800 ], + 68 : [ u'E. Europe Standard Time', 7200 ], + 69 : [ u'Pacific Standard Time', -28800 ], + 70 : [ u'Pakistan Standard Time', 18000 ], + 71 : [ u'Paraguay Standard Time', -14400 ], + 72 : [ u'Romance Standard Time', 3600 ], + 73 : [ u'Russia Time Zone 3', 14400 ], + 74 : [ u'Russia Time Zone 10', 39600 ], + 75 : [ u'Russia Time Zone 11', 43200 ], + 76 : [ u'Russian Standard Time', 10800 ], + 77 : [ u'SA Eastern Standard Time', -10800 ], + 78 : [ u'SA Pacific Standard Time', -18000 ], + 79 : [ u'SA Western Standard Time', -14400 ], + 80 : [ u'Samoa Standard Time', 46800 ], + 81 : [ u'SE Asia Standard Time', 25200 ], + 82 : [ u'Singapore Standard Time', 28800 ], + 83 : [ u'South Africa Standard Time', 7200 ], + 84 : [ u'Sri Lanka Standard Time', 19800 ], + 85 : [ u'Syria Standard Time', 7200 ], + 86 : [ u'Taipei Standard Time', 28800 ], + 87 : [ u'Tasmania Standard Time', 36000 ], + 88 : [ u'Tokyo Standard Time', 32400 ], + 89 : [ u'Tonga Standard Time', 46800 ], + 90 : [ u'Turkey Standard Time', 7200 ], + 91 : [ u'Ulaanbaatar Standard Time', 28800 ], + 92 : [ u'US Eastern Standard Time', -18000 ], + 93 : [ u'US Mountain Standard Time', -25200 ], + 94 : [ u'UTC-02', -7200 ], + 95 : [ u'UTC-11', -39600 ], + 96 : [ u'UTC', 0 ], + 97 : [ u'UTC+12', 43200 ], + 98 : [ u'Venezuela Standard Time', -16200 ], + 99 : [ u'Vladivostok Standard Time', 36000 ], + 100: [ u'W. Australia Standard Time', 28800 ], + 101: [ u'W. Central Africa Standard Time', 3600 ], + 102: [ u'W. Europe Standard Time', 3600 ], + 103: [ u'West Asia Standard Time', 18000 ], + 104: [ u'West Pacific Standard Time', 36000 ], + 105: [ u'Yakutsk Standard Time', 32400 ], + 106: [ u'North Korea Standard Time', 30600 ] +} + +def windowsIdToKey(windowsId): + for windowsKey in windowsIdList: + if windowsIdList[windowsKey][0] == windowsId: + return windowsKey + return 0 + +# List of standard UTC IDs to use. Not public so may be safely changed. +# Do not remove ID's as is part of API/behavior guarantee +# Key : [ UTC Id, Offset Seconds ] +utcIdList = { + 0 : [ u'UTC', 0 ], # Goes first so is default + 1 : [ u'UTC-14:00', -50400 ], + 2 : [ u'UTC-13:00', -46800 ], + 3 : [ u'UTC-12:00', -43200 ], + 4 : [ u'UTC-11:00', -39600 ], + 5 : [ u'UTC-10:00', -36000 ], + 6 : [ u'UTC-09:00', -32400 ], + 7 : [ u'UTC-08:00', -28800 ], + 8 : [ u'UTC-07:00', -25200 ], + 9 : [ u'UTC-06:00', -21600 ], + 10 : [ u'UTC-05:00', -18000 ], + 11 : [ u'UTC-04:30', -16200 ], + 12 : [ u'UTC-04:00', -14400 ], + 13 : [ u'UTC-03:30', -12600 ], + 14 : [ u'UTC-03:00', -10800 ], + 15 : [ u'UTC-02:00', -7200 ], + 16 : [ u'UTC-01:00', -3600 ], + 17 : [ u'UTC-00:00', 0 ], + 18 : [ u'UTC+00:00', 0 ], + 19 : [ u'UTC+01:00', 3600 ], + 20 : [ u'UTC+02:00', 7200 ], + 21 : [ u'UTC+03:00', 10800 ], + 22 : [ u'UTC+03:30', 12600 ], + 23 : [ u'UTC+04:00', 14400 ], + 24 : [ u'UTC+04:30', 16200 ], + 25 : [ u'UTC+05:00', 18000 ], + 26 : [ u'UTC+05:30', 19800 ], + 27 : [ u'UTC+05:45', 20700 ], + 28 : [ u'UTC+06:00', 21600 ], + 29 : [ u'UTC+06:30', 23400 ], + 30 : [ u'UTC+07:00', 25200 ], + 31 : [ u'UTC+08:00', 28800 ], + 32 : [ u'UTC+09:00', 32400 ], + 33 : [ u'UTC+09:30', 34200 ], + 34 : [ u'UTC+10:00', 36000 ], + 35 : [ u'UTC+11:00', 39600 ], + 36 : [ u'UTC+12:00', 43200 ], + 37 : [ u'UTC+13:00', 46800 ], + 38 : [ u'UTC+14:00', 50400 ], + 39 : [ u'UTC+08:30', 30600 ] +} + +def usage(): + print "Usage: cldr2qtimezone.py <path to cldr core/common> <path to qtbase>" + sys.exit() + +if len(sys.argv) != 3: + usage() + +cldrPath = sys.argv[1] +qtPath = sys.argv[2] + +if not os.path.isdir(cldrPath) or not os.path.isdir(qtPath): + usage() + +windowsZonesPath = cldrPath + "/supplemental/windowsZones.xml" +tempFileDir = qtPath +dataFilePath = qtPath + "/src/corelib/tools/qtimezoneprivate_data_p.h" + +if not os.path.isfile(windowsZonesPath): + usage() + +if not os.path.isfile(dataFilePath): + usage() + +cldr_version = 'unknown' +ldml = open(cldrPath + "/dtd/ldml.dtd", "r") +for line in ldml: + if 'version cldrVersion CDATA #FIXED' in line: + cldr_version = line.split('"')[1] + +# [[u'version', [(u'number', u'$Revision: 7825 $')]]] +versionNumber = findTagsInFile(windowsZonesPath, "version")[0][1][0][1] + +mapTimezones = findTagsInFile(windowsZonesPath, "windowsZones/mapTimezones") + +defaultDict = {} +windowsIdDict = {} + +if mapTimezones: + for mapZone in mapTimezones: + # [u'mapZone', [(u'territory', u'MH'), (u'other', u'UTC+12'), (u'type', u'Pacific/Majuro Pacific/Kwajalein')]] + if mapZone[0] == u'mapZone': + data = {} + for attribute in mapZone[1]: + if attribute[0] == u'other': + data['windowsId'] = attribute[1] + if attribute[0] == u'territory': + data['countryCode'] = attribute[1] + if attribute[0] == u'type': + data['ianaList'] = attribute[1] + + data['windowsKey'] = windowsIdToKey(data['windowsId']) + if data['windowsKey'] <= 0: + raise xpathlite.Error("Unknown Windows ID, please add \"%s\"" % data['windowsId']) + + countryId = 0 + if data['countryCode'] == u'001': + defaultDict[data['windowsKey']] = data['ianaList'] + else: + data['countryId'] = enumdata.countryCodeToId(data['countryCode']) + if data['countryId'] < 0: + raise xpathlite.Error("Unknown Country Code \"%s\"" % data['countryCode']) + data['country'] = enumdata.country_list[data['countryId']][0] + windowsIdDict[data['windowsKey'], data['countryId']] = data + +print "Input file parsed, now writing data" + +GENERATED_BLOCK_START = "// GENERATED PART STARTS HERE\n" +GENERATED_BLOCK_END = "// GENERATED PART ENDS HERE\n" + +# Create a temp file to write the new data into +(newTempFile, newTempFilePath) = tempfile.mkstemp("qtimezone_data_p", dir=tempFileDir) +newTempFile = os.fdopen(newTempFile, "w") + +# Open the old file and copy over the first non-generated section to the new file +oldDataFile = open(dataFilePath, "r") +s = oldDataFile.readline() +while s and s != GENERATED_BLOCK_START: + newTempFile.write(s) + s = oldDataFile.readline() + +# Write out generated block start tag and warning +newTempFile.write(GENERATED_BLOCK_START) +newTempFile.write(""" +/* + This part of the file was generated on %s from the + Common Locale Data Repository v%s supplemental/windowsZones.xml file %s + + http://www.unicode.org/cldr/ + + Do not edit this code: run cldr2qtimezone.py on updated (or + edited) CLDR data; see qtbase/util/locale_database/. +*/ + +""" % (str(datetime.date.today()), cldr_version, versionNumber) ) + +windowsIdData = ByteArrayData() +ianaIdData = ByteArrayData() + +# Write Windows/IANA table +newTempFile.write("// Windows ID Key, Country Enum, IANA ID Index\n") +newTempFile.write("static const QZoneData zoneDataTable[] = {\n") +for index in windowsIdDict: + data = windowsIdDict[index] + newTempFile.write(" { %6d,%6d,%6d }, // %s / %s\n" + % (data['windowsKey'], + data['countryId'], + ianaIdData.append(data['ianaList']), + data['windowsId'], + data['country'])) +newTempFile.write(" { 0, 0, 0 } // Trailing zeroes\n") +newTempFile.write("};\n\n") + +print "Done Zone Data" + +# Write Windows ID key table +newTempFile.write("// Windows ID Key, Windows ID Index, IANA ID Index, UTC Offset\n") +newTempFile.write("static const QWindowsData windowsDataTable[] = {\n") +for windowsKey in windowsIdList: + newTempFile.write(" { %6d,%6d,%6d,%6d }, // %s\n" + % (windowsKey, + windowsIdData.append(windowsIdList[windowsKey][0]), + ianaIdData.append(defaultDict[windowsKey]), + windowsIdList[windowsKey][1], + windowsIdList[windowsKey][0])) +newTempFile.write(" { 0, 0, 0, 0 } // Trailing zeroes\n") +newTempFile.write("};\n\n") + +print "Done Windows Data Table" + +# Write UTC ID key table +newTempFile.write("// IANA ID Index, UTC Offset\n") +newTempFile.write("static const QUtcData utcDataTable[] = {\n") +for index in utcIdList: + data = utcIdList[index] + newTempFile.write(" { %6d,%6d }, // %s\n" + % (ianaIdData.append(data[0]), + data[1], + data[0])) +newTempFile.write(" { 0, 0 } // Trailing zeroes\n") +newTempFile.write("};\n\n") + +print "Done UTC Data Table" + +# Write out Windows ID's data +newTempFile.write("static const char windowsIdData[] = {\n") +newTempFile.write(wrap_list(windowsIdData.data)) +newTempFile.write("\n};\n\n") + +# Write out IANA ID's data +newTempFile.write("static const char ianaIdData[] = {\n") +newTempFile.write(wrap_list(ianaIdData.data)) +newTempFile.write("\n};\n") + +print "Done ID Data Table" + +# Write out the end of generated block tag +newTempFile.write(GENERATED_BLOCK_END) +s = oldDataFile.readline() + +# Skip through the old generated data in the old file +while s and s != GENERATED_BLOCK_END: + s = oldDataFile.readline() + +# Now copy the rest of the original file into the new file +s = oldDataFile.readline() +while s: + newTempFile.write(s) + s = oldDataFile.readline() + +# Now close the old and new file, delete the old file and copy the new file in its place +newTempFile.close() +oldDataFile.close() +os.remove(dataFilePath) +os.rename(newTempFilePath, dataFilePath) + +print "Data generation completed, please check the new file at " + dataFilePath diff --git a/util/locale_database/dateconverter.py b/util/locale_database/dateconverter.py new file mode 100755 index 0000000000..1990fe0c61 --- /dev/null +++ b/util/locale_database/dateconverter.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python +############################################################################# +## +## Copyright (C) 2016 The Qt Company Ltd. +## Contact: https://www.qt.io/licensing/ +## +## This file is part of the test suite of the Qt Toolkit. +## +## $QT_BEGIN_LICENSE:GPL-EXCEPT$ +## Commercial License Usage +## Licensees holding valid commercial Qt licenses may use this file in +## accordance with the commercial license agreement provided with the +## Software or, alternatively, in accordance with the terms contained in +## a written agreement between you and The Qt Company. For licensing terms +## and conditions see https://www.qt.io/terms-conditions. For further +## information use the contact form at https://www.qt.io/contact-us. +## +## GNU General Public License Usage +## Alternatively, this file may be used under the terms of the GNU +## General Public License version 3 as published by the Free Software +## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +## included in the packaging of this file. Please review the following +## information to ensure the GNU General Public License requirements will +## be met: https://www.gnu.org/licenses/gpl-3.0.html. +## +## $QT_END_LICENSE$ +## +############################################################################# + +import re + +def _convert_pattern(pattern): + # patterns from http://www.unicode.org/reports/tr35/#Date_Format_Patterns + qt_regexps = { + r"yyy{3,}" : "yyyy", # more that three digits hence convert to four-digit year + r"L" : "M", # stand-alone month names. not supported. + r"g{1,}": "", # modified julian day. not supported. + r"S{1,}" : "", # fractional seconds. not supported. + r"A{1,}" : "" # milliseconds in day. not supported. + } + qt_patterns = { + "G" : "", "GG" : "", "GGG" : "", "GGGG" : "", "GGGGG" : "", # Era. not supported. + "y" : "yyyy", # four-digit year without leading zeroes + "Q" : "", "QQ" : "", "QQQ" : "", "QQQQ" : "", # quarter. not supported. + "q" : "", "qq" : "", "qqq" : "", "qqqq" : "", # quarter. not supported. + "MMMMM" : "MMM", # narrow month name. + "LLLLL" : "MMM", # stand-alone narrow month name. + "l" : "", # special symbol for chinese leap month. not supported. + "w" : "", "W" : "", # week of year/month. not supported. + "D" : "", "DD" : "", "DDD" : "", # day of year. not supported. + "F" : "", # day of week in month. not supported. + "E" : "ddd", "EE" : "ddd", "EEE" : "ddd", "EEEEE" : "ddd", "EEEE" : "dddd", # day of week + "e" : "ddd", "ee" : "ddd", "eee" : "ddd", "eeeee" : "ddd", "eeee" : "dddd", # local day of week + "c" : "ddd", "cc" : "ddd", "ccc" : "ddd", "ccccc" : "ddd", "cccc" : "dddd", # stand-alone local day of week + "a" : "AP", # AM/PM + "K" : "h", # Hour 0-11 + "k" : "H", # Hour 1-24 + "j" : "", # special reserved symbol. + "z" : "t", "zz" : "t", "zzz" : "t", "zzzz" : "t", # timezone + "Z" : "t", "ZZ" : "t", "ZZZ" : "t", "ZZZZ" : "t", # timezone + "v" : "t", "vv" : "t", "vvv" : "t", "vvvv" : "t", # timezone + "V" : "t", "VV" : "t", "VVV" : "t", "VVVV" : "t" # timezone + } + if qt_patterns.has_key(pattern): + return qt_patterns[pattern] + for r,v in qt_regexps.items(): + pattern = re.sub(r, v, pattern) + return pattern + +def convert_date(input): + result = "" + patterns = "GyYuQqMLlwWdDFgEecahHKkjmsSAzZvV" + last = "" + inquote = 0 + chars_to_strip = " -" + for c in input: + if c == "'": + inquote = inquote + 1 + if inquote % 2 == 0: + if c in patterns: + if not last: + last = c + else: + if c in last: + last += c + else: + # pattern changed + converted = _convert_pattern(last) + result += converted + if not converted: + result = result.rstrip(chars_to_strip) + last = c + continue + if last: + # pattern ended + converted = _convert_pattern(last) + result += converted + if not converted: + result = result.rstrip(chars_to_strip) + last = "" + result += c + if last: + converted = _convert_pattern(last) + result += converted + if not converted: + result = result.rstrip(chars_to_strip) + return result.lstrip(chars_to_strip) diff --git a/util/locale_database/enumdata.py b/util/locale_database/enumdata.py new file mode 100644 index 0000000000..26bb74d1fe --- /dev/null +++ b/util/locale_database/enumdata.py @@ -0,0 +1,878 @@ +#!/usr/bin/env python +############################################################################# +## +## Copyright (C) 2016 The Qt Company Ltd. +## Contact: https://www.qt.io/licensing/ +## +## This file is part of the test suite of the Qt Toolkit. +## +## $QT_BEGIN_LICENSE:GPL-EXCEPT$ +## Commercial License Usage +## Licensees holding valid commercial Qt licenses may use this file in +## accordance with the commercial license agreement provided with the +## Software or, alternatively, in accordance with the terms contained in +## a written agreement between you and The Qt Company. For licensing terms +## and conditions see https://www.qt.io/terms-conditions. For further +## information use the contact form at https://www.qt.io/contact-us. +## +## GNU General Public License Usage +## Alternatively, this file may be used under the terms of the GNU +## General Public License version 3 as published by the Free Software +## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +## included in the packaging of this file. Please review the following +## information to ensure the GNU General Public License requirements will +## be met: https://www.gnu.org/licenses/gpl-3.0.html. +## +## $QT_END_LICENSE$ +## +############################################################################# + +# Each *_list reflects the current values of its enums in qlocale.h; +# if new xml language files are available in CLDR, these languages and +# countries need to be *appended* to this list (for compatibility +# between versions). Include any spaces present in names (scripts +# shall squish them out for the enum entries) in *_list, but use the +# squished forms of names in the *_aliases mappings. + +### Qt 6: restore alphabetic order in each list. + +language_list = { + 0: ["AnyLanguage", " "], + 1: ["C", " "], + 2: ["Abkhazian", "ab"], + 3: ["Oromo", "om"], # macrolanguage + 4: ["Afar", "aa"], + 5: ["Afrikaans", "af"], + 6: ["Albanian", "sq"], # macrolanguage + 7: ["Amharic", "am"], + 8: ["Arabic", "ar"], # macrolanguage + 9: ["Armenian", "hy"], + 10: ["Assamese", "as"], + 11: ["Aymara", "ay"], # macrolanguage + 12: ["Azerbaijani", "az"], # macrolanguage + 13: ["Bashkir", "ba"], + 14: ["Basque", "eu"], + 15: ["Bengali", "bn"], + 16: ["Dzongkha", "dz"], + 17: ["Bihari", "bh"], + 18: ["Bislama", "bi"], + 19: ["Breton", "br"], + 20: ["Bulgarian", "bg"], + 21: ["Burmese", "my"], + 22: ["Belarusian", "be"], + 23: ["Khmer", "km"], + 24: ["Catalan", "ca"], + 25: ["Chinese", "zh"], # macrolanguage + 26: ["Corsican", "co"], + 27: ["Croatian", "hr"], + 28: ["Czech", "cs"], + 29: ["Danish", "da"], + 30: ["Dutch", "nl"], + 31: ["English", "en"], + 32: ["Esperanto", "eo"], + 33: ["Estonian", "et"], # macrolanguage + 34: ["Faroese", "fo"], + 35: ["Fijian", "fj"], + 36: ["Finnish", "fi"], + 37: ["French", "fr"], + 38: ["Western Frisian", "fy"], + 39: ["Gaelic", "gd"], + 40: ["Galician", "gl"], + 41: ["Georgian", "ka"], + 42: ["German", "de"], + 43: ["Greek", "el"], + 44: ["Greenlandic", "kl"], + 45: ["Guarani", "gn"], # macrolanguage + 46: ["Gujarati", "gu"], + 47: ["Hausa", "ha"], + 48: ["Hebrew", "he"], + 49: ["Hindi", "hi"], + 50: ["Hungarian", "hu"], + 51: ["Icelandic", "is"], + 52: ["Indonesian", "id"], + 53: ["Interlingua", "ia"], + 54: ["Interlingue", "ie"], + 55: ["Inuktitut", "iu"], # macrolanguage + 56: ["Inupiak", "ik"], # macrolanguage + 57: ["Irish", "ga"], + 58: ["Italian", "it"], + 59: ["Japanese", "ja"], + 60: ["Javanese", "jv"], + 61: ["Kannada", "kn"], + 62: ["Kashmiri", "ks"], + 63: ["Kazakh", "kk"], + 64: ["Kinyarwanda", "rw"], + 65: ["Kirghiz", "ky"], + 66: ["Korean", "ko"], + 67: ["Kurdish", "ku"], # macrolanguage + 68: ["Rundi", "rn"], + 69: ["Lao", "lo"], + 70: ["Latin", "la"], + 71: ["Latvian", "lv"], # macrolanguage + 72: ["Lingala", "ln"], + 73: ["Lithuanian", "lt"], + 74: ["Macedonian", "mk"], + 75: ["Malagasy", "mg"], # macrolanguage + 76: ["Malay", "ms"], # macrolanguage + 77: ["Malayalam", "ml"], + 78: ["Maltese", "mt"], + 79: ["Maori", "mi"], + 80: ["Marathi", "mr"], + 81: ["Marshallese", "mh"], + 82: ["Mongolian", "mn"], # macrolanguage + 83: ["Nauru", "na"], + 84: ["Nepali", "ne"], # macrolanguage + 85: ["Norwegian Bokmal", "nb"], + 86: ["Occitan", "oc"], + 87: ["Oriya", "or"], # macrolanguage + 88: ["Pashto", "ps"], # macrolanguage + 89: ["Persian", "fa"], # macrolanguage + 90: ["Polish", "pl"], + 91: ["Portuguese", "pt"], + 92: ["Punjabi", "pa"], + 93: ["Quechua", "qu"], # macrolanguage + 94: ["Romansh", "rm"], + 95: ["Romanian", "ro"], + 96: ["Russian", "ru"], + 97: ["Samoan", "sm"], + 98: ["Sango", "sg"], + 99: ["Sanskrit", "sa"], + 100: ["Serbian", "sr"], + 101: ["Ossetic", "os"], + 102: ["Southern Sotho", "st"], + 103: ["Tswana", "tn"], + 104: ["Shona", "sn"], + 105: ["Sindhi", "sd"], + 106: ["Sinhala", "si"], + 107: ["Swati", "ss"], + 108: ["Slovak", "sk"], + 109: ["Slovenian", "sl"], + 110: ["Somali", "so"], + 111: ["Spanish", "es"], + 112: ["Sundanese", "su"], + 113: ["Swahili", "sw"], # macrolanguage + 114: ["Swedish", "sv"], + 115: ["Sardinian", "sc"], # macrolanguage + 116: ["Tajik", "tg"], + 117: ["Tamil", "ta"], + 118: ["Tatar", "tt"], + 119: ["Telugu", "te"], + 120: ["Thai", "th"], + 121: ["Tibetan", "bo"], + 122: ["Tigrinya", "ti"], + 123: ["Tongan", "to"], + 124: ["Tsonga", "ts"], + 125: ["Turkish", "tr"], + 126: ["Turkmen", "tk"], + 127: ["Tahitian", "ty"], + 128: ["Uighur", "ug"], + 129: ["Ukrainian", "uk"], + 130: ["Urdu", "ur"], + 131: ["Uzbek", "uz"], # macrolanguage + 132: ["Vietnamese", "vi"], + 133: ["Volapuk", "vo"], + 134: ["Welsh", "cy"], + 135: ["Wolof", "wo"], + 136: ["Xhosa", "xh"], + 137: ["Yiddish", "yi"], # macrolanguage + 138: ["Yoruba", "yo"], + 139: ["Zhuang", "za"], # macrolanguage + 140: ["Zulu", "zu"], + 141: ["Norwegian Nynorsk", "nn"], + 142: ["Bosnian", "bs"], + 143: ["Divehi", "dv"], + 144: ["Manx", "gv"], + 145: ["Cornish", "kw"], + 146: ["Akan", "ak"], # macrolanguage + 147: ["Konkani", "kok"], + 148: ["Ga", "gaa"], + 149: ["Igbo", "ig" ], + 150: ["Kamba", "kam"], + 151: ["Syriac", "syr"], + 152: ["Blin", "byn"], + 153: ["Geez", "gez"], + 154: ["Koro", "kfo"], + 155: ["Sidamo", "sid"], + 156: ["Atsam", "cch"], + 157: ["Tigre", "tig"], + 158: ["Jju", "kaj"], + 159: ["Friulian", "fur"], + 160: ["Venda", "ve" ], + 161: ["Ewe", "ee" ], + 162: ["Walamo", "wal"], + 163: ["Hawaiian", "haw"], + 164: ["Tyap", "kcg"], + 165: ["Nyanja", "ny" ], + 166: ["Filipino", "fil"], + 167: ["Swiss German", "gsw"], + 168: ["Sichuan Yi", "ii" ], + 169: ["Kpelle", "kpe"], + 170: ["Low German", "nds"], + 171: ["South Ndebele", "nr" ], + 172: ["Northern Sotho", "nso"], + 173: ["Northern Sami", "se" ], + 174: ["Taroko", "trv"], + 175: ["Gusii", "guz"], + 176: ["Taita", "dav"], + 177: ["Fulah", "ff"], # macrolanguage + 178: ["Kikuyu", "ki"], + 179: ["Samburu", "saq"], + 180: ["Sena", "seh"], + 181: ["North Ndebele", "nd"], + 182: ["Rombo", "rof"], + 183: ["Tachelhit", "shi"], + 184: ["Kabyle", "kab"], + 185: ["Nyankole", "nyn"], + 186: ["Bena", "bez"], + 187: ["Vunjo", "vun"], + 188: ["Bambara", "bm"], + 189: ["Embu", "ebu"], + 190: ["Cherokee", "chr"], + 191: ["Morisyen", "mfe"], + 192: ["Makonde", "kde"], + 193: ["Langi", "lag"], + 194: ["Ganda", "lg"], + 195: ["Bemba", "bem"], + 196: ["Kabuverdianu", "kea"], + 197: ["Meru", "mer"], + 198: ["Kalenjin", "kln"], + 199: ["Nama", "naq"], + 200: ["Machame", "jmc"], + 201: ["Colognian", "ksh"], + 202: ["Masai", "mas"], + 203: ["Soga", "xog"], + 204: ["Luyia", "luy"], + 205: ["Asu", "asa"], + 206: ["Teso", "teo"], + 207: ["Saho", "ssy"], + 208: ["Koyra Chiini", "khq"], + 209: ["Rwa", "rwk"], + 210: ["Luo", "luo"], + 211: ["Chiga", "cgg"], + 212: ["Central Morocco Tamazight", "tzm"], + 213: ["Koyraboro Senni", "ses"], + 214: ["Shambala", "ksb"], + 215: ["Bodo", "brx"], + 216: ["Avaric", "av"], + 217: ["Chamorro", "ch"], + 218: ["Chechen", "ce"], + 219: ["Church", "cu"], # macrolanguage + 220: ["Chuvash", "cv"], + 221: ["Cree", "cr"], # macrolanguage + 222: ["Haitian", "ht"], + 223: ["Herero", "hz"], + 224: ["Hiri Motu", "ho"], + 225: ["Kanuri", "kr"], # macrolanguage + 226: ["Komi", "kv"], # macrolanguage + 227: ["Kongo", "kg"], # macrolanguage + 228: ["Kwanyama", "kj"], + 229: ["Limburgish", "li"], + 230: ["Luba Katanga", "lu"], + 231: ["Luxembourgish", "lb"], + 232: ["Navaho", "nv"], + 233: ["Ndonga", "ng"], + 234: ["Ojibwa", "oj"], # macrolanguage + 235: ["Pali", "pi"], # macrolanguage + 236: ["Walloon", "wa"], + 237: ["Aghem", "agq"], + 238: ["Basaa", "bas"], + 239: ["Zarma", "dje"], + 240: ["Duala", "dua"], + 241: ["Jola Fonyi", "dyo"], + 242: ["Ewondo", "ewo"], + 243: ["Bafia", "ksf"], + 244: ["Makhuwa Meetto", "mgh"], + 245: ["Mundang", "mua"], + 246: ["Kwasio", "nmg"], + 247: ["Nuer", "nus"], + 248: ["Sakha", "sah"], + 249: ["Sangu", "sbp"], + 250: ["Congo Swahili", "swc"], + 251: ["Tasawaq", "twq"], + 252: ["Vai", "vai"], + 253: ["Walser", "wae"], + 254: ["Yangben", "yav"], + 255: ["Avestan", "ae"], + 256: ["Asturian", "ast"], + 257: ["Ngomba", "jgo"], + 258: ["Kako", "kkj"], + 259: ["Meta", "mgo"], + 260: ["Ngiemboon", "nnh"], + 261: ["Aragonese", "an"], + 262: ["Akkadian", "akk"], + 263: ["Ancient Egyptian", "egy"], + 264: ["Ancient Greek", "grc"], + 265: ["Aramaic", "arc"], + 266: ["Balinese", "ban"], + 267: ["Bamun", "bax"], + 268: ["Batak Toba", "bbc"], + 269: ["Buginese", "bug"], + 270: ["Buhid", "bku"], + 271: ["Carian", "xcr"], + 272: ["Chakma", "ccp"], + 273: ["Classical Mandaic", "myz"], + 274: ["Coptic", "cop"], + 275: ["Dogri", "doi"], # macrolanguage + 276: ["Eastern Cham", "cjm"], + 277: ["Eastern Kayah", "eky"], + 278: ["Etruscan", "ett"], + 279: ["Gothic", "got"], + 280: ["Hanunoo", "hnn"], + 281: ["Ingush", "inh"], + 282: ["Large Flowery Miao", "hmd"], + 283: ["Lepcha", "lep"], + 284: ["Limbu", "lif"], + 285: ["Lisu", "lis"], + 286: ["Lu", "khb"], + 287: ["Lycian", "xlc"], + 288: ["Lydian", "xld"], + 289: ["Mandingo", "man"], # macrolanguage + 290: ["Manipuri", "mni"], + 291: ["Meroitic", "xmr"], + 292: ["Northern Thai", "nod"], + 293: ["Old Irish", "sga"], + 294: ["Old Norse", "non"], + 295: ["Old Persian", "peo"], + 296: ["Old Turkish", "otk"], + 297: ["Pahlavi", "pal"], + 298: ["Parthian", "xpr"], + 299: ["Phoenician", "phn"], + 300: ["Prakrit Language", "pra"], + 301: ["Rejang", "rej"], + 302: ["Sabaean", "xsa"], + 303: ["Samaritan", "smp"], + 304: ["Santali", "sat"], + 305: ["Saurashtra", "saz"], + 306: ["Sora", "srb"], + 307: ["Sylheti", "syl"], + 308: ["Tagbanwa", "tbw"], + 309: ["Tai Dam", "blt"], + 310: ["Tai Nua", "tdd"], + 311: ["Ugaritic", "uga"], + 312: ["Akoose", "bss"], + 313: ["Lakota", "lkt"], + 314: ["Standard Moroccan Tamazight", "zgh"], + 315: ["Mapuche", "arn"], + 316: ["Central Kurdish", "ckb"], + 317: ["Lower Sorbian", "dsb"], + 318: ["Upper Sorbian", "hsb"], + 319: ["Kenyang", "ken"], + 320: ["Mohawk", "moh"], + 321: ["Nko", "nqo"], + 322: ["Prussian", "prg"], + 323: ["Kiche", "quc"], + 324: ["Southern Sami", "sma"], + 325: ["Lule Sami", "smj"], + 326: ["Inari Sami", "smn"], + 327: ["Skolt Sami", "sms"], + 328: ["Warlpiri", "wbp"], + 329: ["Manichaean Middle Persian", "xmn"], + 330: ["Mende", "men"], + 331: ["Ancient North Arabian", "xna"], + 332: ["Linear A", "lab"], + 333: ["Hmong Njua", "hnj"], + 334: ["Ho", "hoc"], + 335: ["Lezghian", "lez"], + 336: ["Bassa", "bsq"], + 337: ["Mono", "mru"], + 338: ["Tedim Chin", "ctd"], + 339: ["Maithili", "mai"], + 340: ["Ahom", "aho"], + 341: ["American Sign Language", "ase"], + 342: ["Ardhamagadhi Prakrit", "pka"], + 343: ["Bhojpuri", "bho"], + 344: ["Hieroglyphic Luwian", "hlu"], + 345: ["Literary Chinese", "lzh"], + 346: ["Mazanderani", "mzn"], + 347: ["Mru", "mro"], + 348: ["Newari", "new"], + 349: ["Northern Luri", "lrc"], + 350: ["Palauan", "pau"], + 351: ["Papiamento", "pap"], + 352: ["Saraiki", "skr"], + 353: ["Tokelau", "tkl"], + 354: ["Tok Pisin", "tpi"], + 355: ["Tuvalu", "tvl"], + 356: ["Uncoded Languages", "mis"], + 357: ["Cantonese", "yue"], + 358: ["Osage", "osa"], + 359: ["Tangut", "txg"], + 360: ["Ido", "io"], + 361: ["Lojban", "jbo"], + 362: ["Sicilian", "scn"], + 363: ["Southern Kurdish", "sdh"], + 364: ["Western Balochi", "bgn"], +} + +language_aliases = { + # Legacy - should disappear at some point: + 'Norwegian': 'NorwegianBokmal', + 'Moldavian': 'Romanian', + 'SerboCroatian': 'Serbian', + 'Tagalog': 'Filipino', + 'Twi': 'Akan', + # Renamings: + 'Afan': 'Oromo', + 'Byelorussian': 'Belarusian', + 'Bhutani': 'Dzongkha', + 'Cambodian': 'Khmer', + 'Kurundi': 'Rundi', + 'RhaetoRomance': 'Romansh', + 'Chewa': 'Nyanja', + 'Frisian': 'WesternFrisian', + 'Uigur': 'Uighur', +} + +country_list = { + 0: ["AnyCountry", "ZZ"], + 1: ["Afghanistan", "AF"], + 2: ["Albania", "AL"], + 3: ["Algeria", "DZ"], + 4: ["American Samoa", "AS"], + 5: ["Andorra", "AD"], + 6: ["Angola", "AO"], + 7: ["Anguilla", "AI"], + 8: ["Antarctica", "AQ"], + 9: ["Antigua And Barbuda", "AG"], + 10: ["Argentina", "AR"], + 11: ["Armenia", "AM"], + 12: ["Aruba", "AW"], + 13: ["Australia", "AU"], + 14: ["Austria", "AT"], + 15: ["Azerbaijan", "AZ"], + 16: ["Bahamas", "BS"], + 17: ["Bahrain", "BH"], + 18: ["Bangladesh", "BD"], + 19: ["Barbados", "BB"], + 20: ["Belarus", "BY"], + 21: ["Belgium", "BE"], + 22: ["Belize", "BZ"], + 23: ["Benin", "BJ"], + 24: ["Bermuda", "BM"], + 25: ["Bhutan", "BT"], + 26: ["Bolivia", "BO"], + 27: ["Bosnia And Herzegowina", "BA"], + 28: ["Botswana", "BW"], + 29: ["Bouvet Island", "BV"], + 30: ["Brazil", "BR"], + 31: ["British Indian Ocean Territory", "IO"], + 32: ["Brunei", "BN"], + 33: ["Bulgaria", "BG"], + 34: ["Burkina Faso", "BF"], + 35: ["Burundi", "BI"], + 36: ["Cambodia", "KH"], + 37: ["Cameroon", "CM"], + 38: ["Canada", "CA"], + 39: ["Cape Verde", "CV"], + 40: ["Cayman Islands", "KY"], + 41: ["Central African Republic", "CF"], + 42: ["Chad", "TD"], + 43: ["Chile", "CL"], + 44: ["China", "CN"], + 45: ["Christmas Island", "CX"], + 46: ["Cocos Islands", "CC"], + 47: ["Colombia", "CO"], + 48: ["Comoros", "KM"], + 49: ["Congo Kinshasa", "CD"], + 50: ["Congo Brazzaville", "CG"], + 51: ["Cook Islands", "CK"], + 52: ["Costa Rica", "CR"], + 53: ["Ivory Coast", "CI"], + 54: ["Croatia", "HR"], + 55: ["Cuba", "CU"], + 56: ["Cyprus", "CY"], + 57: ["Czech Republic", "CZ"], + 58: ["Denmark", "DK"], + 59: ["Djibouti", "DJ"], + 60: ["Dominica", "DM"], + 61: ["Dominican Republic", "DO"], + 62: ["East Timor", "TL"], + 63: ["Ecuador", "EC"], + 64: ["Egypt", "EG"], + 65: ["El Salvador", "SV"], + 66: ["Equatorial Guinea", "GQ"], + 67: ["Eritrea", "ER"], + 68: ["Estonia", "EE"], + 69: ["Ethiopia", "ET"], + 70: ["Falkland Islands", "FK"], + 71: ["Faroe Islands", "FO"], + 72: ["Fiji", "FJ"], + 73: ["Finland", "FI"], + 74: ["France", "FR"], + 75: ["Guernsey", "GG"], + 76: ["French Guiana", "GF"], + 77: ["French Polynesia", "PF"], + 78: ["French Southern Territories", "TF"], + 79: ["Gabon", "GA"], + 80: ["Gambia", "GM"], + 81: ["Georgia", "GE"], + 82: ["Germany", "DE"], + 83: ["Ghana", "GH"], + 84: ["Gibraltar", "GI"], + 85: ["Greece", "GR"], + 86: ["Greenland", "GL"], + 87: ["Grenada", "GD"], + 88: ["Guadeloupe", "GP"], + 89: ["Guam", "GU"], + 90: ["Guatemala", "GT"], + 91: ["Guinea", "GN"], + 92: ["Guinea Bissau", "GW"], + 93: ["Guyana", "GY"], + 94: ["Haiti", "HT"], + 95: ["Heard And McDonald Islands", "HM"], + 96: ["Honduras", "HN"], + 97: ["Hong Kong", "HK"], + 98: ["Hungary", "HU"], + 99: ["Iceland", "IS"], + 100: ["India", "IN"], + 101: ["Indonesia", "ID"], + 102: ["Iran", "IR"], + 103: ["Iraq", "IQ"], + 104: ["Ireland", "IE"], + 105: ["Israel", "IL"], + 106: ["Italy", "IT"], + 107: ["Jamaica", "JM"], + 108: ["Japan", "JP"], + 109: ["Jordan", "JO"], + 110: ["Kazakhstan", "KZ"], + 111: ["Kenya", "KE"], + 112: ["Kiribati", "KI"], + 113: ["North Korea", "KP"], + 114: ["South Korea", "KR"], + 115: ["Kuwait", "KW"], + 116: ["Kyrgyzstan", "KG"], + 117: ["Laos", "LA"], + 118: ["Latvia", "LV"], + 119: ["Lebanon", "LB"], + 120: ["Lesotho", "LS"], + 121: ["Liberia", "LR"], + 122: ["Libya", "LY"], + 123: ["Liechtenstein", "LI"], + 124: ["Lithuania", "LT"], + 125: ["Luxembourg", "LU"], + 126: ["Macau", "MO"], + 127: ["Macedonia", "MK"], + 128: ["Madagascar", "MG"], + 129: ["Malawi", "MW"], + 130: ["Malaysia", "MY"], + 131: ["Maldives", "MV"], + 132: ["Mali", "ML"], + 133: ["Malta", "MT"], + 134: ["Marshall Islands", "MH"], + 135: ["Martinique", "MQ"], + 136: ["Mauritania", "MR"], + 137: ["Mauritius", "MU"], + 138: ["Mayotte", "YT"], + 139: ["Mexico", "MX"], + 140: ["Micronesia", "FM"], + 141: ["Moldova", "MD"], + 142: ["Monaco", "MC"], + 143: ["Mongolia", "MN"], + 144: ["Montserrat", "MS"], + 145: ["Morocco", "MA"], + 146: ["Mozambique", "MZ"], + 147: ["Myanmar", "MM"], + 148: ["Namibia", "NA"], + 149: ["Nauru", "NR"], + 150: ["Nepal", "NP"], + 151: ["Netherlands", "NL"], + 152: ["Cura Sao", "CW"], + 153: ["New Caledonia", "NC"], + 154: ["New Zealand", "NZ"], + 155: ["Nicaragua", "NI"], + 156: ["Niger", "NE"], + 157: ["Nigeria", "NG"], + 158: ["Niue", "NU"], + 159: ["Norfolk Island", "NF"], + 160: ["Northern Mariana Islands", "MP"], + 161: ["Norway", "NO"], + 162: ["Oman", "OM"], + 163: ["Pakistan", "PK"], + 164: ["Palau", "PW"], + 165: ["Palestinian Territories", "PS"], + 166: ["Panama", "PA"], + 167: ["Papua New Guinea", "PG"], + 168: ["Paraguay", "PY"], + 169: ["Peru", "PE"], + 170: ["Philippines", "PH"], + 171: ["Pitcairn", "PN"], + 172: ["Poland", "PL"], + 173: ["Portugal", "PT"], + 174: ["Puerto Rico", "PR"], + 175: ["Qatar", "QA"], + 176: ["Reunion", "RE"], + 177: ["Romania", "RO"], + 178: ["Russia", "RU"], + 179: ["Rwanda", "RW"], + 180: ["Saint Kitts And Nevis", "KN"], + 181: ["Saint Lucia", "LC"], + 182: ["Saint Vincent And The Grenadines", "VC"], + 183: ["Samoa", "WS"], + 184: ["San Marino", "SM"], + 185: ["Sao Tome And Principe", "ST"], + 186: ["Saudi Arabia", "SA"], + 187: ["Senegal", "SN"], + 188: ["Seychelles", "SC"], + 189: ["Sierra Leone", "SL"], + 190: ["Singapore", "SG"], + 191: ["Slovakia", "SK"], + 192: ["Slovenia", "SI"], + 193: ["Solomon Islands", "SB"], + 194: ["Somalia", "SO"], + 195: ["South Africa", "ZA"], + 196: ["South Georgia And The South Sandwich Islands", "GS"], + 197: ["Spain", "ES"], + 198: ["Sri Lanka", "LK"], + 199: ["Saint Helena", "SH"], + 200: ["Saint Pierre And Miquelon", "PM"], + 201: ["Sudan", "SD"], + 202: ["Suriname", "SR"], + 203: ["Svalbard And Jan Mayen Islands", "SJ"], + 204: ["Swaziland", "SZ"], + 205: ["Sweden", "SE"], + 206: ["Switzerland", "CH"], + 207: ["Syria", "SY"], + 208: ["Taiwan", "TW"], + 209: ["Tajikistan", "TJ"], + 210: ["Tanzania", "TZ"], + 211: ["Thailand", "TH"], + 212: ["Togo", "TG"], + 213: ["Tokelau", "TK"], + 214: ["Tonga", "TO"], + 215: ["Trinidad And Tobago", "TT"], + 216: ["Tunisia", "TN"], + 217: ["Turkey", "TR"], + 218: ["Turkmenistan", "TM"], + 219: ["Turks And Caicos Islands", "TC"], + 220: ["Tuvalu", "TV"], + 221: ["Uganda", "UG"], + 222: ["Ukraine", "UA"], + 223: ["United Arab Emirates", "AE"], + 224: ["United Kingdom", "GB"], + 225: ["United States", "US"], + 226: ["United States Minor Outlying Islands", "UM"], + 227: ["Uruguay", "UY"], + 228: ["Uzbekistan", "UZ"], + 229: ["Vanuatu", "VU"], + 230: ["Vatican City State", "VA"], + 231: ["Venezuela", "VE"], + 232: ["Vietnam", "VN"], + 233: ["British Virgin Islands", "VG"], + 234: ["United States Virgin Islands", "VI"], + 235: ["Wallis And Futuna Islands", "WF"], + 236: ["Western Sahara", "EH"], + 237: ["Yemen", "YE"], + 238: ["Canary Islands", "IC"], + 239: ["Zambia", "ZM"], + 240: ["Zimbabwe", "ZW"], + 241: ["Clipperton Island", "CP"], + 242: ["Montenegro", "ME"], + 243: ["Serbia", "RS"], + 244: ["Saint Barthelemy", "BL"], + 245: ["Saint Martin", "MF"], + 246: ["Latin America", "419"], + 247: ["Ascension Island", "AC"], + 248: ["Aland Islands", "AX"], + 249: ["Diego Garcia", "DG"], + 250: ["Ceuta And Melilla", "EA"], + 251: ["Isle Of Man", "IM"], + 252: ["Jersey", "JE"], + 253: ["Tristan Da Cunha", "TA"], + 254: ["South Sudan", "SS"], + 255: ["Bonaire", "BQ"], + 256: ["Sint Maarten", "SX"], + 257: ["Kosovo", "XK"], + 258: ["European Union", "EU"], + 259: ["Outlying Oceania", "QO"], + 260: ["World", "001"], + 261: ["Europe", "150"], +} + +country_aliases = { + # Deprecated: + 'Tokelau': 'TokelauCountry', + 'Tuvalu': 'TuvaluCountry', + # Renamings: + 'DemocraticRepublicOfCongo': 'CongoKinshasa', + 'PeoplesRepublicOfCongo': 'CongoBrazzaville', + 'DemocraticRepublicOfKorea': 'NorthKorea', + 'RepublicOfKorea': 'SouthKorea', + 'RussianFederation': 'Russia', + 'SyrianArabRepublic': 'Syria', + 'LatinAmericaAndTheCaribbean': 'LatinAmerica', +} + +script_list = { + 0: ["AnyScript", "Zzzz"], + 1: ["Arabic", "Arab"], + 2: ["Cyrillic", "Cyrl"], + 3: ["Deseret", "Dsrt"], + 4: ["Gurmukhi", "Guru"], + 5: ["Simplified Han", "Hans"], + 6: ["Traditional Han", "Hant"], + 7: ["Latin", "Latn"], + 8: ["Mongolian", "Mong"], + 9: ["Tifinagh", "Tfng"], + 10: ["Armenian", "Armn"], + 11: ["Bengali", "Beng"], + 12: ["Cherokee", "Cher"], + 13: ["Devanagari", "Deva"], + 14: ["Ethiopic", "Ethi"], + 15: ["Georgian", "Geor"], + 16: ["Greek", "Grek"], + 17: ["Gujarati", "Gujr"], + 18: ["Hebrew", "Hebr"], + 19: ["Japanese", "Jpan"], + 20: ["Khmer", "Khmr"], + 21: ["Kannada", "Knda"], + 22: ["Korean", "Kore"], + 23: ["Lao", "Laoo"], + 24: ["Malayalam", "Mlym"], + 25: ["Myanmar", "Mymr"], + 26: ["Oriya", "Orya"], + 27: ["Tamil", "Taml"], + 28: ["Telugu", "Telu"], + 29: ["Thaana", "Thaa"], + 30: ["Thai", "Thai"], + 31: ["Tibetan", "Tibt"], + 32: ["Sinhala", "Sinh"], + 33: ["Syriac", "Syrc"], + 34: ["Yi", "Yiii"], + 35: ["Vai", "Vaii"], + 36: ["Avestan", "Avst"], + 37: ["Balinese", "Bali"], + 38: ["Bamum", "Bamu"], + 39: ["Batak", "Batk"], + 40: ["Bopomofo", "Bopo"], + 41: ["Brahmi", "Brah"], + 42: ["Buginese", "Bugi"], + 43: ["Buhid", "Buhd"], + 44: ["Canadian Aboriginal", "Cans"], + 45: ["Carian", "Cari"], + 46: ["Chakma", "Cakm"], + 47: ["Cham", "Cham"], + 48: ["Coptic", "Copt"], + 49: ["Cypriot", "Cprt"], + 50: ["Egyptian Hieroglyphs", "Egyp"], + 51: ["Fraser", "Lisu"], + 52: ["Glagolitic", "Glag"], + 53: ["Gothic", "Goth"], + 54: ["Han", "Hani"], + 55: ["Hangul", "Hang"], + 56: ["Hanunoo", "Hano"], + 57: ["Imperial Aramaic", "Armi"], + 58: ["Inscriptional Pahlavi", "Phli"], + 59: ["Inscriptional Parthian", "Prti"], + 60: ["Javanese", "Java"], + 61: ["Kaithi", "Kthi"], + 62: ["Katakana", "Kana"], + 63: ["Kayah Li", "Kali"], + 64: ["Kharoshthi", "Khar"], + 65: ["Lanna", "Lana"], + 66: ["Lepcha", "Lepc"], + 67: ["Limbu", "Limb"], + 68: ["Linear B", "Linb"], + 69: ["Lycian", "Lyci"], + 70: ["Lydian", "Lydi"], + 71: ["Mandaean", "Mand"], + 72: ["Meitei Mayek", "Mtei"], + 73: ["Meroitic", "Mero"], + 74: ["Meroitic Cursive", "Merc"], + 75: ["Nko", "Nkoo"], + 76: ["New Tai Lue", "Talu"], + 77: ["Ogham", "Ogam"], + 78: ["Ol Chiki", "Olck"], + 79: ["Old Italic", "Ital"], + 80: ["Old Persian", "Xpeo"], + 81: ["Old South Arabian", "Sarb"], + 82: ["Orkhon", "Orkh"], + 83: ["Osmanya", "Osma"], + 84: ["Phags Pa", "Phag"], + 85: ["Phoenician", "Phnx"], + 86: ["Pollard Phonetic", "Plrd"], + 87: ["Rejang", "Rjng"], + 88: ["Runic", "Runr"], + 89: ["Samaritan", "Samr"], + 90: ["Saurashtra", "Saur"], + 91: ["Sharada", "Shrd"], + 92: ["Shavian", "Shaw"], + 93: ["Sora Sompeng", "Sora"], + 94: ["Cuneiform", "Xsux"], + 95: ["Sundanese", "Sund"], + 96: ["Syloti Nagri", "Sylo"], + 97: ["Tagalog", "Tglg"], + 98: ["Tagbanwa", "Tagb"], + 99: ["Tai Le", "Tale"], + 100: ["Tai Viet", "Tavt"], + 101: ["Takri", "Takr"], + 102: ["Ugaritic", "Ugar"], + 103: ["Braille", "Brai"], + 104: ["Hiragana", "Hira"], + 105: ["Caucasian Albanian", "Aghb"], + 106: ["Bassa Vah", "Bass"], + 107: ["Duployan", "Dupl"], + 108: ["Elbasan", "Elba"], + 109: ["Grantha", "Gran"], + 110: ["Pahawh Hmong", "Hmng"], + 111: ["Khojki", "Khoj"], + 112: ["Linear A", "Lina"], + 113: ["Mahajani", "Mahj"], + 114: ["Manichaean", "Mani"], + 115: ["Mende Kikakui", "Mend"], + 116: ["Modi", "Modi"], + 117: ["Mro", "Mroo"], + 118: ["Old North Arabian", "Narb"], + 119: ["Nabataean", "Nbat"], + 120: ["Palmyrene", "Palm"], + 121: ["Pau Cin Hau", "Pauc"], + 122: ["Old Permic", "Perm"], + 123: ["Psalter Pahlavi", "Phlp"], + 124: ["Siddham", "Sidd"], + 125: ["Khudawadi", "Sind"], + 126: ["Tirhuta", "Tirh"], + 127: ["Varang Kshiti", "Wara"], + 128: ["Ahom", "Ahom"], + 129: ["Anatolian Hieroglyphs", "Hluw"], + 130: ["Hatran", "Hatr"], + 131: ["Multani", "Mult"], + 132: ["Old Hungarian", "Hung"], + 133: ["Sign Writing", "Sgnw"], + 134: ["Adlam", "Adlm"], + 135: ["Bhaiksuki", "Bhks"], + 136: ["Marchen", "Marc"], + 137: ["Newa", "Newa"], + 138: ["Osage", "Osge"], + 139: ["Tangut", "Tang"], + 140: ["Han with Bopomofo", "Hanb"], + 141: ["Jamo", "Jamo"], +} + +script_aliases = { + # Renamings: + 'SimplifiedChineseScript': 'SimplifiedHanScript', + 'TraditionalChineseScript': 'TraditionalHanScript', +} + +def countryCodeToId(code): + if not code: + return 0 + for country_id in country_list: + if country_list[country_id][1] == code: + return country_id + return -1 + +def languageCodeToId(code): + if not code: + return 0 + for language_id in language_list: + if language_list[language_id][1] == code: + return language_id + return -1 + +def scriptCodeToId(code): + if not code: + return 0 + for script_id in script_list: + if script_list[script_id][1] == code: + return script_id + return -1 diff --git a/util/locale_database/formattags.txt b/util/locale_database/formattags.txt new file mode 100644 index 0000000000..5138c37a81 --- /dev/null +++ b/util/locale_database/formattags.txt @@ -0,0 +1,23 @@ +d +dd +ddd +dddd +M +MM +MMM +MMMM +yy +yyyy +h the hour without a leading zero (0 to 23 or 1 to 12 if AM/PM display) +hh the hour with a leading zero (00 to 23 or 01 to 12 if AM/PM display) +H the hour without a leading zero (0 to 23, even with AM/PM display) +HH the hour with a leading zero (00 to 23, even with AM/PM display) +m +mm +s +ss +z the milliseconds without leading zeroes (0 to 999) +zzz the milliseconds with leading zeroes (000 to 999) +AP or A interpret as an AM/PM time. AP must be either "AM" or "PM" +ap or a Interpret as an AM/PM time. ap must be either "am" or "pm" +t time zone diff --git a/util/locale_database/localexml.py b/util/locale_database/localexml.py new file mode 100644 index 0000000000..e95b3aebcc --- /dev/null +++ b/util/locale_database/localexml.py @@ -0,0 +1,263 @@ +############################################################################# +## +## Copyright (C) 2017 The Qt Company Ltd. +## Contact: https://www.qt.io/licensing/ +## +## This file is part of the test suite of the Qt Toolkit. +## +## $QT_BEGIN_LICENSE:GPL-EXCEPT$ +## Commercial License Usage +## Licensees holding valid commercial Qt licenses may use this file in +## accordance with the commercial license agreement provided with the +## Software or, alternatively, in accordance with the terms contained in +## a written agreement between you and The Qt Company. For licensing terms +## and conditions see https://www.qt.io/terms-conditions. For further +## information use the contact form at https://www.qt.io/contact-us. +## +## GNU General Public License Usage +## Alternatively, this file may be used under the terms of the GNU +## General Public License version 3 as published by the Free Software +## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +## included in the packaging of this file. Please review the following +## information to ensure the GNU General Public License requirements will +## be met: https://www.gnu.org/licenses/gpl-3.0.html. +## +## $QT_END_LICENSE$ +## +############################################################################# +"""Shared serialization-scanning code for QLocaleXML format. + +The Locale class is written by cldr2qlocalexml.py and read by qlocalexml2cpp.py +""" +from xml.sax.saxutils import escape + +import xpathlite + +# Tools used by Locale: +def camel(seq): + yield seq.next() + for word in seq: + yield word.capitalize() + +def camelCase(words): + return ''.join(camel(iter(words))) + +def ordStr(c): + if len(c) == 1: + return str(ord(c)) + raise xpathlite.Error('Unable to handle value "%s"' % addEscapes(c)) + +# Fix for a problem with QLocale returning a character instead of +# strings for QLocale::exponential() and others. So we fallback to +# default values in these cases. +def fixOrdStr(c, d): + return str(ord(c if len(c) == 1 else d)) + +def startCount(c, text): # strspn + """First index in text where it doesn't have a character in c""" + assert text and text[0] in c + try: + return (j for j, d in enumerate(text) if d not in c).next() + except StopIteration: + return len(text) + +def convertFormat(format): + """Convert date/time format-specier from CLDR to Qt + + Match up (as best we can) the differences between: + * https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table + * QDateTimeParser::parseFormat() and QLocalePrivate::dateTimeToString() + """ + result = "" + i = 0 + while i < len(format): + if format[i] == "'": + result += "'" + i += 1 + while i < len(format) and format[i] != "'": + result += format[i] + i += 1 + if i < len(format): + result += "'" + i += 1 + else: + s = format[i:] + if s.startswith('E'): # week-day + n = startCount('E', s) + if n < 3: + result += 'ddd' + elif n == 4: + result += 'dddd' + else: # 5: narrow, 6 short; but should be name, not number :-( + result += 'd' if n < 6 else 'dd' + i += n + elif s[0] in 'ab': # am/pm + # 'b' should distinguish noon/midnight, too :-( + result += "AP" + i += startCount('ab', s) + elif s.startswith('S'): # fractions of seconds: count('S') == number of decimals to show + result += 'z' + i += startCount('S', s) + elif s.startswith('V'): # long time zone specifiers (and a deprecated short ID) + result += 't' + i += startCount('V', s) + elif s[0] in 'zv': # zone + # Should use full name, e.g. "Central European Time", if 'zzzz' :-( + # 'v' should get generic non-location format, e.g. PT for "Pacific Time", no DST indicator + result += "t" + i += startCount('zv', s) + else: + result += format[i] + i += 1 + + return result + +class Locale: + # Tool used during class body (see del below), not method: + def propsMonthDay(lengths=('long', 'short', 'narrow'), scale=('months', 'days')): + for L in lengths: + for S in scale: + yield camelCase((L, S)) + yield camelCase(('standalone', L, S)) + + # Expected to be numbers, read with int(): + __asint = ("decimal", "group", "zero", + "list", "percent", "minus", "plus", "exp", + "currencyDigits", "currencyRounding") + # Single character; use the code-point number for each: + __asord = ("quotationStart", "quotationEnd", + "alternateQuotationStart", "alternateQuotationEnd") + # Convert day-name to Qt day-of-week number: + __asdow = ("firstDayOfWeek", "weekendStart", "weekendEnd") + # Convert from CLDR format-strings to QDateTimeParser ones: + __asfmt = ("longDateFormat", "shortDateFormat", "longTimeFormat", "shortTimeFormat") + # Just use the raw text: + __astxt = ("language", "languageEndonym", "script", "country", "countryEndonym", + "listPatternPartStart", "listPatternPartMiddle", + "listPatternPartEnd", "listPatternPartTwo", "am", "pm", + 'byte_unit', 'byte_si_quantified', 'byte_iec_quantified', + "currencyIsoCode", "currencySymbol", "currencyDisplayName", + "currencyFormat", "currencyNegativeFormat" + ) + tuple(propsMonthDay()) + del propsMonthDay + + # Day-of-Week numbering used by Qt: + __qDoW = {"mon": 1, "tue": 2, "wed": 3, "thu": 4, "fri": 5, "sat": 6, "sun": 7} + + @classmethod + def fromXmlData(cls, lookup): + """Constructor from the contents of XML elements. + + Single parameter, lookup, is called with the names of XML + elements that should contain the relevant data, within a CLDR + locale element (within a localeList element); these names are + used for the attributes of the object constructed. Attribute + values are obtained by suitably digesting the returned element + texts.\n""" + data = {} + for k in cls.__asint: + data['listDelim' if k == 'list' else k] = int(lookup(k)) + + for k in cls.__asord: + value = lookup(k) + assert len(value) == 1, \ + (k, value, 'value should be exactly one character') + data[k] = ord(value) + + for k in cls.__asdow: + data[k] = cls.__qDoW[lookup(k)] + + for k in cls.__asfmt: + data[k] = convertFormat(lookup(k)) + + for k in cls.__astxt: + data[k] = lookup(k) + + return cls(data) + + def toXml(self, indent=' ', tab=' '): + print indent + '<locale>' + inner = indent + tab + get = lambda k: getattr(self, k) + for key in ('language', 'script', 'country'): + print inner + "<%s>" % key + get(key) + "</%s>" % key + print inner + "<%scode>" % key + get(key + '_code') + "</%scode>" % key + + for key in ('decimal', 'group', 'zero'): + print inner + "<%s>" % key + ordStr(get(key)) + "</%s>" % key + for key, std in (('list', ';'), ('percent', '%'), + ('minus', '-'), ('plus', '+'), ('exp', 'e')): + print inner + "<%s>" % key + fixOrdStr(get(key), std) + "</%s>" % key + + for key in ('language_endonym', 'country_endonym', + 'quotationStart', 'quotationEnd', + 'alternateQuotationStart', 'alternateQuotationEnd', + 'listPatternPartStart', 'listPatternPartMiddle', + 'listPatternPartEnd', 'listPatternPartTwo', + 'byte_unit', 'byte_si_quantified', 'byte_iec_quantified', + 'am', 'pm', 'firstDayOfWeek', + 'weekendStart', 'weekendEnd', + 'longDateFormat', 'shortDateFormat', + 'longTimeFormat', 'shortTimeFormat', + 'standaloneLongMonths', 'standaloneShortMonths', + 'standaloneNarrowMonths', + 'longMonths', 'shortMonths', 'narrowMonths', + 'longDays', 'shortDays', 'narrowDays', + 'standaloneLongDays', 'standaloneShortDays', 'standaloneNarrowDays', + 'currencyIsoCode', 'currencySymbol', 'currencyDisplayName', + 'currencyFormat', 'currencyNegativeFormat'): + ent = camelCase(key.split('_')) if key.endswith('_endonym') else key + print inner + "<%s>%s</%s>" % (ent, escape(get(key)).encode('utf-8'), ent) + + for key in ('currencyDigits', 'currencyRounding'): + print inner + "<%s>%d</%s>" % (key, get(key), key) + + print indent + "</locale>" + + def __init__(self, data=None, **kw): + if data: self.__dict__.update(data) + if kw: self.__dict__.update(kw) + + @classmethod + def C(cls, + # Empty entries at end to ensure final separator when join()ed: + months = ('January', 'February', 'March', 'April', 'May', 'June', 'July', + 'August', 'September', 'October', 'November', 'December', ''), + days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday', + 'Thursday', 'Friday', 'Saturday', ''), + quantifiers=('k', 'M', 'G', 'T', 'P', 'E')): + """Returns an object representing the C locale.""" + return cls(language='C', language_code='0', language_endonym='', + script='AnyScript', script_code='0', + country='AnyCountry', country_code='0', country_endonym='', + decimal='.', group=',', list=';', percent='%', + zero='0', minus='-', plus='+', exp='e', + quotationStart='"', quotationEnd='"', + alternateQuotationStart='\'', alternateQuotationEnd='\'', + listPatternPartStart='%1, %2', + listPatternPartMiddle='%1, %2', + listPatternPartEnd='%1, %2', + listPatternPartTwo='%1, %2', + byte_unit='bytes', + byte_si_quantified=';'.join(q + 'B' for q in quantifiers), + byte_iec_quantified=';'.join(q.upper() + 'iB' for q in quantifiers), + am='AM', pm='PM', firstDayOfWeek='mon', + weekendStart='sat', weekendEnd='sun', + longDateFormat='EEEE, d MMMM yyyy', shortDateFormat='d MMM yyyy', + longTimeFormat='HH:mm:ss z', shortTimeFormat='HH:mm:ss', + longMonths=';'.join(months), + shortMonths=';'.join(m[:3] for m in months), + narrowMonths='1;2;3;4;5;6;7;8;9;10;11;12;', + standaloneLongMonths=';'.join(months), + standaloneShortMonths=';'.join(m[:3] for m in months), + standaloneNarrowMonths=';'.join(m[:1] for m in months), + longDays=';'.join(days), + shortDays=';'.join(d[:3] for d in days), + narrowDays='7;1;2;3;4;5;6;', + standaloneLongDays=';'.join(days), + standaloneShortDays=';'.join(d[:3] for d in days), + standaloneNarrowDays=';'.join(d[:1] for d in days), + currencyIsoCode='', currencySymbol='', + currencyDisplayName=';' * 7, + currencyDigits=2, currencyRounding=1, + currencyFormat='%1%2', currencyNegativeFormat='') diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py new file mode 100755 index 0000000000..2dad2dd57a --- /dev/null +++ b/util/locale_database/qlocalexml2cpp.py @@ -0,0 +1,834 @@ +#!/usr/bin/env python2 +############################################################################# +## +## Copyright (C) 2017 The Qt Company Ltd. +## Contact: https://www.qt.io/licensing/ +## +## This file is part of the test suite of the Qt Toolkit. +## +## $QT_BEGIN_LICENSE:GPL-EXCEPT$ +## Commercial License Usage +## Licensees holding valid commercial Qt licenses may use this file in +## accordance with the commercial license agreement provided with the +## Software or, alternatively, in accordance with the terms contained in +## a written agreement between you and The Qt Company. For licensing terms +## and conditions see https://www.qt.io/terms-conditions. For further +## information use the contact form at https://www.qt.io/contact-us. +## +## GNU General Public License Usage +## Alternatively, this file may be used under the terms of the GNU +## General Public License version 3 as published by the Free Software +## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +## included in the packaging of this file. Please review the following +## information to ensure the GNU General Public License requirements will +## be met: https://www.gnu.org/licenses/gpl-3.0.html. +## +## $QT_END_LICENSE$ +## +############################################################################# +"""Script to generate C++ code from CLDR data in qLocaleXML form + +See ``cldr2qlocalexml.py`` for how to generate the qLocaleXML data itself. +Pass the output file from that as first parameter to this script; pass +the root of the qtbase check-out as second parameter. +""" + +import os +import sys +import tempfile +import datetime +import xml.dom.minidom +from enumdata import language_aliases, country_aliases, script_aliases + +from localexml import Locale + +class Error: + def __init__(self, msg): + self.msg = msg + def __str__(self): + return self.msg + +def wrap_list(lst): + def split(lst, size): + while lst: + head, lst = lst[:size], lst[size:] + yield head + return ",\n".join(", ".join(x) for x in split(lst, 20)) + +def isNodeNamed(elt, name, TYPE=xml.dom.minidom.Node.ELEMENT_NODE): + return elt.nodeType == TYPE and elt.nodeName == name + +def firstChildElt(parent, name): + child = parent.firstChild + while child: + if isNodeNamed(child, name): + return child + child = child.nextSibling + + raise Error('No %s child found' % name) + +def eachEltInGroup(parent, group, key): + try: + element = firstChildElt(parent, group).firstChild + except Error: + element = None + + while element: + if isNodeNamed(element, key): + yield element + element = element.nextSibling + +def eltWords(elt): + child = elt.firstChild + while child: + if child.nodeType == elt.TEXT_NODE: + yield child.nodeValue + child = child.nextSibling + +def firstChildText(elt, key): + return ' '.join(eltWords(firstChildElt(elt, key))) + +def loadMap(doc, category): + return dict((int(firstChildText(element, 'id')), + (firstChildText(element, 'name'), + firstChildText(element, 'code'))) + for element in eachEltInGroup(doc.documentElement, + category + 'List', category)) + +def loadLikelySubtagsMap(doc): + def triplet(element, keys=('language', 'script', 'country')): + return tuple(firstChildText(element, key) for key in keys) + + return dict((i, {'from': triplet(firstChildElt(elt, "from")), + 'to': triplet(firstChildElt(elt, "to"))}) + for i, elt in enumerate(eachEltInGroup(doc.documentElement, + 'likelySubtags', 'likelySubtag'))) + +def fixedScriptName(name, dupes): + # Don't .capitalize() as some names are already camel-case (see enumdata.py): + name = ''.join(word[0].upper() + word[1:] for word in name.split()) + if name[-6:] != "Script": + name = name + "Script" + if name in dupes: + sys.stderr.write("\n\n\nERROR: The script name '%s' is messy" % name) + sys.exit(1) + return name + +def fixedCountryName(name, dupes): + if name in dupes: + return name.replace(" ", "") + "Country" + return name.replace(" ", "") + +def fixedLanguageName(name, dupes): + if name in dupes: + return name.replace(" ", "") + "Language" + return name.replace(" ", "") + +def findDupes(country_map, language_map): + country_set = set(v[0] for a, v in country_map.iteritems()) + language_set = set(v[0] for a, v in language_map.iteritems()) + return country_set & language_set + +def languageNameToId(name, language_map): + for key in language_map.keys(): + if language_map[key][0] == name: + return key + return -1 + +def scriptNameToId(name, script_map): + for key in script_map.keys(): + if script_map[key][0] == name: + return key + return -1 + +def countryNameToId(name, country_map): + for key in country_map.keys(): + if country_map[key][0] == name: + return key + return -1 + +def loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map): + result = {} + + for locale_elt in eachEltInGroup(doc.documentElement, "localeList", "locale"): + locale = Locale.fromXmlData(lambda k: firstChildText(locale_elt, k)) + language_id = languageNameToId(locale.language, language_map) + if language_id == -1: + sys.stderr.write("Cannot find a language id for '%s'\n" % locale.language) + script_id = scriptNameToId(locale.script, script_map) + if script_id == -1: + sys.stderr.write("Cannot find a script id for '%s'\n" % locale.script) + country_id = countryNameToId(locale.country, country_map) + if country_id == -1: + sys.stderr.write("Cannot find a country id for '%s'\n" % locale.country) + + if language_id != 1: # C + if country_id == 0: + sys.stderr.write("loadLocaleMap: No country id for '%s'\n" % locale.language) + + if script_id == 0: + # find default script for a given language and country (see http://www.unicode.org/reports/tr35/#Likely_Subtags) + for key in likely_subtags_map.keys(): + tmp = likely_subtags_map[key] + if tmp["from"][0] == locale.language and tmp["from"][1] == "AnyScript" and tmp["from"][2] == locale.country: + locale.script = tmp["to"][1] + script_id = scriptNameToId(locale.script, script_map) + break + if script_id == 0 and country_id != 0: + # try with no country + for key in likely_subtags_map.keys(): + tmp = likely_subtags_map[key] + if tmp["from"][0] == locale.language and tmp["from"][1] == "AnyScript" and tmp["from"][2] == "AnyCountry": + locale.script = tmp["to"][1] + script_id = scriptNameToId(locale.script, script_map) + break + + result[(language_id, script_id, country_id)] = locale + + return result + +def compareLocaleKeys(key1, key2): + if key1 == key2: + return 0 + + if key1[0] == key2[0]: + l1 = compareLocaleKeys.locale_map[key1] + l2 = compareLocaleKeys.locale_map[key2] + + if (l1.language, l1.script) in compareLocaleKeys.default_map.keys(): + default = compareLocaleKeys.default_map[(l1.language, l1.script)] + if l1.country == default: + return -1 + if l2.country == default: + return 1 + + if key1[1] != key2[1]: + if (l2.language, l2.script) in compareLocaleKeys.default_map.keys(): + default = compareLocaleKeys.default_map[(l2.language, l2.script)] + if l2.country == default: + return 1 + if l1.country == default: + return -1 + + if key1[1] != key2[1]: + return key1[1] - key2[1] + else: + return key1[0] - key2[0] + + return key1[2] - key2[2] + + +def languageCount(language_id, locale_map): + result = 0 + for key in locale_map.keys(): + if key[0] == language_id: + result += 1 + return result + +def unicode2hex(s): + lst = [] + for x in s: + v = ord(x) + if v > 0xFFFF: + # make a surrogate pair + # copied from qchar.h + high = (v >> 10) + 0xd7c0 + low = (v % 0x400 + 0xdc00) + lst.append(hex(high)) + lst.append(hex(low)) + else: + lst.append(hex(v)) + return lst + +class StringDataToken: + def __init__(self, index, length): + if index > 0xFFFF or length > 0xFFFF: + raise Error("Position exceeds ushort range: %d,%d " % (index, length)) + self.index = index + self.length = length + def __str__(self): + return " %d,%d " % (self.index, self.length) + +class StringData: + def __init__(self, name): + self.data = [] + self.hash = {} + self.name = name + def append(self, s): + if s in self.hash: + return self.hash[s] + + lst = unicode2hex(s) + index = len(self.data) + if index > 65535: + print "\n\n\n#error Data index is too big!" + sys.stderr.write ("\n\n\nERROR: index exceeds the uint16 range! index = %d\n" % index) + sys.exit(1) + size = len(lst) + if size >= 65535: + print "\n\n\n#error Data is too big!" + sys.stderr.write ("\n\n\nERROR: data size exceeds the uint16 range! size = %d\n" % size) + sys.exit(1) + token = None + try: + token = StringDataToken(index, size) + except Error as e: + sys.stderr.write("\n\n\nERROR: %s: on data '%s'" % (e, s)) + sys.exit(1) + self.hash[s] = token + self.data += lst + return token + +def escapedString(s): + result = "" + i = 0 + while i < len(s): + if s[i] == '"': + result += '\\"' + i += 1 + else: + result += s[i] + i += 1 + s = result + + line = "" + need_escape = False + result = "" + for c in s: + if ord(c) < 128 and (not need_escape or ord(c.lower()) < ord('a') or ord(c.lower()) > ord('f')): + line += c + need_escape = False + else: + line += "\\x%02x" % (ord(c)) + need_escape = True + if len(line) > 80: + result = result + "\n" + '"' + line + '"' + line = "" + line += "\\0" + result = result + "\n" + '"' + line + '"' + if result[0] == "\n": + result = result[1:] + return result + +def printEscapedString(s): + print escapedString(s) + +def currencyIsoCodeData(s): + if s: + return '{' + ",".join(str(ord(x)) for x in s) + '}' + return "{0,0,0}" + +def usage(): + print "Usage: qlocalexml2cpp.py <path-to-locale.xml> <path-to-qtbase-src-tree>" + sys.exit(1) + +GENERATED_BLOCK_START = "// GENERATED PART STARTS HERE\n" +GENERATED_BLOCK_END = "// GENERATED PART ENDS HERE\n" + +def main(): + if len(sys.argv) != 3: + usage() + + localexml = sys.argv[1] + qtsrcdir = sys.argv[2] + + if not (os.path.isdir(qtsrcdir) + and all(os.path.isfile(os.path.join(qtsrcdir, 'src', 'corelib', 'tools', leaf)) + for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))): + usage() + + (data_temp_file, data_temp_file_path) = tempfile.mkstemp("qlocale_data_p", dir=qtsrcdir) + data_temp_file = os.fdopen(data_temp_file, "w") + qlocaledata_file = open(qtsrcdir + "/src/corelib/tools/qlocale_data_p.h", "r") + s = qlocaledata_file.readline() + while s and s != GENERATED_BLOCK_START: + data_temp_file.write(s) + s = qlocaledata_file.readline() + data_temp_file.write(GENERATED_BLOCK_START) + + doc = xml.dom.minidom.parse(localexml) + language_map = loadMap(doc, 'language') + script_map = loadMap(doc, 'script') + country_map = loadMap(doc, 'country') + likely_subtags_map = loadLikelySubtagsMap(doc) + default_map = {} + for key in likely_subtags_map.keys(): + tmp = likely_subtags_map[key] + if tmp["from"][1] == "AnyScript" and tmp["from"][2] == "AnyCountry" and tmp["to"][2] != "AnyCountry": + default_map[(tmp["to"][0], tmp["to"][1])] = tmp["to"][2] + locale_map = loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map) + dupes = findDupes(language_map, country_map) + + cldr_version = firstChildText(doc.documentElement, "version") + + data_temp_file.write(""" +/* + This part of the file was generated on %s from the + Common Locale Data Repository v%s + + http://www.unicode.org/cldr/ + + Do not edit this section: instead regenerate it using + cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or + edited) CLDR data; see qtbase/util/locale_database/. +*/ + +""" % (str(datetime.date.today()), cldr_version) ) + + # Likely subtags map + data_temp_file.write("static const QLocaleId likely_subtags[] = {\n") + index = 0 + for key in likely_subtags_map.keys(): + tmp = likely_subtags_map[key] + from_language = languageNameToId(tmp["from"][0], language_map) + from_script = scriptNameToId(tmp["from"][1], script_map) + from_country = countryNameToId(tmp["from"][2], country_map) + to_language = languageNameToId(tmp["to"][0], language_map) + to_script = scriptNameToId(tmp["to"][1], script_map) + to_country = countryNameToId(tmp["to"][2], country_map) + + cmnt_from = "" + if from_language != 0: + cmnt_from = cmnt_from + language_map[from_language][1] + else: + cmnt_from = cmnt_from + "und" + if from_script != 0: + if cmnt_from: + cmnt_from = cmnt_from + "_" + cmnt_from = cmnt_from + script_map[from_script][1] + if from_country != 0: + if cmnt_from: + cmnt_from = cmnt_from + "_" + cmnt_from = cmnt_from + country_map[from_country][1] + cmnt_to = "" + if to_language != 0: + cmnt_to = cmnt_to + language_map[to_language][1] + else: + cmnt_to = cmnt_to + "und" + if to_script != 0: + if cmnt_to: + cmnt_to = cmnt_to + "_" + cmnt_to = cmnt_to + script_map[to_script][1] + if to_country != 0: + if cmnt_to: + cmnt_to = cmnt_to + "_" + cmnt_to = cmnt_to + country_map[to_country][1] + + data_temp_file.write(" ") + data_temp_file.write("{ %3d, %3d, %3d }, { %3d, %3d, %3d }" % (from_language, from_script, from_country, to_language, to_script, to_country)) + index += 1 + if index != len(likely_subtags_map): + data_temp_file.write(",") + else: + data_temp_file.write(" ") + data_temp_file.write(" // %s -> %s\n" % (cmnt_from, cmnt_to)) + data_temp_file.write("};\n") + + data_temp_file.write("\n") + + # Locale index + data_temp_file.write("static const quint16 locale_index[] = {\n") + index = 0 + for key in language_map.keys(): + i = 0 + count = languageCount(key, locale_map) + if count > 0: + i = index + index += count + data_temp_file.write("%6d, // %s\n" % (i, language_map[key][0])) + data_temp_file.write(" 0 // trailing 0\n") + data_temp_file.write("};\n\n") + + list_pattern_part_data = StringData('list_pattern_part_data') + date_format_data = StringData('date_format_data') + time_format_data = StringData('time_format_data') + months_data = StringData('months_data') + days_data = StringData('days_data') + am_data = StringData('am_data') + pm_data = StringData('pm_data') + byte_unit_data = StringData('byte_unit_data') + currency_symbol_data = StringData('currency_symbol_data') + currency_display_name_data = StringData('currency_display_name_data') + currency_format_data = StringData('currency_format_data') + endonyms_data = StringData('endonyms_data') + + # Locale data + data_temp_file.write("static const QLocaleData locale_data[] = {\n") + # Table headings: keep each label centred in its field, matching line_format: + data_temp_file.write(' // ' + # Width 6 + comma: + + ' lang ' # IDs + + 'script ' + + ' terr ' + + ' dec ' # Numeric punctuation: + + ' group ' + + ' list ' # List delimiter + + ' prcnt ' # Arithmetic symbols: + + ' zero ' + + ' minus ' + + ' plus ' + + ' exp ' + # Width 8 + comma - to make space for these wide labels ! + + ' quotOpn ' # Quotation marks + + ' quotEnd ' + + 'altQtOpn ' + + 'altQtEnd ' + # Width 11 + comma: + + ' lpStart ' # List pattern + + ' lpMid ' + + ' lpEnd ' + + ' lpTwo ' + + ' sDtFmt ' # Date format + + ' lDtFmt ' + + ' sTmFmt ' # Time format + + ' lTmFmt ' + + ' ssMonth ' # Months + + ' slMonth ' + + ' snMonth ' + + ' sMonth ' + + ' lMonth ' + + ' nMonth ' + + ' ssDays ' # Days + + ' slDays ' + + ' snDays ' + + ' sDays ' + + ' lDays ' + + ' nDays ' + + ' am ' # am/pm indicators + + ' pm ' + # Width 8 + comma + + ' byte ' + + ' siQuant ' + + 'iecQuant ' + # Width 8+4 + comma + + ' currISO ' + # Width 11 + comma: + + ' currSym ' # Currency formatting: + + ' currDsply ' + + ' currFmt ' + + ' currFmtNeg ' + + ' endoLang ' # Name of language in itself, and of country: + + ' endoCntry ' + # Width 6 + comma: + + 'curDgt ' # Currency number representation: + + 'curRnd ' + + 'dow1st ' # First day of week + + ' wknd+ ' # Week-end start/end days: + + ' wknd-' + # No trailing space on last entry (be sure to + # pad before adding anything after it). + + '\n') + + locale_keys = locale_map.keys() + compareLocaleKeys.default_map = default_map + compareLocaleKeys.locale_map = locale_map + locale_keys.sort(compareLocaleKeys) + + line_format = (' { ' + # Locale-identifier: + + '%6d,' * 3 + # Numeric formats, list delimiter: + + '%6d,' * 8 + # Quotation marks: + + '%8d,' * 4 + # List patterns, date/time formats, month/day names, am/pm: + + '%11s,' * 22 + # SI/IEC byte-unit abbreviations: + + '%8s,' * 3 + # Currency ISO code: + + ' %10s, ' + # Currency and endonyms + + '%11s,' * 6 + # Currency formatting: + + '%6d,%6d' + # Day of week and week-end: + + ',%6d' * 3 + + ' }') + for key in locale_keys: + l = locale_map[key] + data_temp_file.write(line_format + % (key[0], key[1], key[2], + l.decimal, + l.group, + l.listDelim, + l.percent, + l.zero, + l.minus, + l.plus, + l.exp, + l.quotationStart, + l.quotationEnd, + l.alternateQuotationStart, + l.alternateQuotationEnd, + list_pattern_part_data.append(l.listPatternPartStart), + list_pattern_part_data.append(l.listPatternPartMiddle), + list_pattern_part_data.append(l.listPatternPartEnd), + list_pattern_part_data.append(l.listPatternPartTwo), + date_format_data.append(l.shortDateFormat), + date_format_data.append(l.longDateFormat), + time_format_data.append(l.shortTimeFormat), + time_format_data.append(l.longTimeFormat), + months_data.append(l.standaloneShortMonths), + months_data.append(l.standaloneLongMonths), + months_data.append(l.standaloneNarrowMonths), + months_data.append(l.shortMonths), + months_data.append(l.longMonths), + months_data.append(l.narrowMonths), + days_data.append(l.standaloneShortDays), + days_data.append(l.standaloneLongDays), + days_data.append(l.standaloneNarrowDays), + days_data.append(l.shortDays), + days_data.append(l.longDays), + days_data.append(l.narrowDays), + am_data.append(l.am), + pm_data.append(l.pm), + byte_unit_data.append(l.byte_unit), + byte_unit_data.append(l.byte_si_quantified), + byte_unit_data.append(l.byte_iec_quantified), + currencyIsoCodeData(l.currencyIsoCode), + currency_symbol_data.append(l.currencySymbol), + currency_display_name_data.append(l.currencyDisplayName), + currency_format_data.append(l.currencyFormat), + currency_format_data.append(l.currencyNegativeFormat), + endonyms_data.append(l.languageEndonym), + endonyms_data.append(l.countryEndonym), + l.currencyDigits, + l.currencyRounding, + l.firstDayOfWeek, + l.weekendStart, + l.weekendEnd) + + ", // %s/%s/%s\n" % (l.language, l.script, l.country)) + data_temp_file.write(line_format # All zeros, matching the format: + % ( (0,) * (3 + 8 + 4) + ("0,0",) * (22 + 3) + + (currencyIsoCodeData(0),) + + ("0,0",) * 6 + (0,) * (2 + 3)) + + " // trailing 0s\n") + data_temp_file.write("};\n") + + # StringData tables: + for data in (list_pattern_part_data, date_format_data, + time_format_data, months_data, days_data, + byte_unit_data, am_data, pm_data, currency_symbol_data, + currency_display_name_data, currency_format_data, + endonyms_data): + data_temp_file.write("\nstatic const ushort %s[] = {\n" % data.name) + data_temp_file.write(wrap_list(data.data)) + data_temp_file.write("\n};\n") + + data_temp_file.write("\n") + + # Language name list + data_temp_file.write("static const char language_name_list[] =\n") + data_temp_file.write('"Default\\0"\n') + for key in language_map.keys(): + if key == 0: + continue + data_temp_file.write('"' + language_map[key][0] + '\\0"\n') + data_temp_file.write(";\n") + + data_temp_file.write("\n") + + # Language name index + data_temp_file.write("static const quint16 language_name_index[] = {\n") + data_temp_file.write(" 0, // AnyLanguage\n") + index = 8 + for key in language_map.keys(): + if key == 0: + continue + language = language_map[key][0] + data_temp_file.write("%6d, // %s\n" % (index, language)) + index += len(language) + 1 + data_temp_file.write("};\n") + + data_temp_file.write("\n") + + # Script name list + data_temp_file.write("static const char script_name_list[] =\n") + data_temp_file.write('"Default\\0"\n') + for key in script_map.keys(): + if key == 0: + continue + data_temp_file.write('"' + script_map[key][0] + '\\0"\n') + data_temp_file.write(";\n") + + data_temp_file.write("\n") + + # Script name index + data_temp_file.write("static const quint16 script_name_index[] = {\n") + data_temp_file.write(" 0, // AnyScript\n") + index = 8 + for key in script_map.keys(): + if key == 0: + continue + script = script_map[key][0] + data_temp_file.write("%6d, // %s\n" % (index, script)) + index += len(script) + 1 + data_temp_file.write("};\n") + + data_temp_file.write("\n") + + # Country name list + data_temp_file.write("static const char country_name_list[] =\n") + data_temp_file.write('"Default\\0"\n') + for key in country_map.keys(): + if key == 0: + continue + data_temp_file.write('"' + country_map[key][0] + '\\0"\n') + data_temp_file.write(";\n") + + data_temp_file.write("\n") + + # Country name index + data_temp_file.write("static const quint16 country_name_index[] = {\n") + data_temp_file.write(" 0, // AnyCountry\n") + index = 8 + for key in country_map.keys(): + if key == 0: + continue + country = country_map[key][0] + data_temp_file.write("%6d, // %s\n" % (index, country)) + index += len(country) + 1 + data_temp_file.write("};\n") + + data_temp_file.write("\n") + + # Language code list + data_temp_file.write("static const unsigned char language_code_list[] =\n") + for key in language_map.keys(): + code = language_map[key][1] + if len(code) == 2: + code += r"\0" + data_temp_file.write('"%2s" // %s\n' % (code, language_map[key][0])) + data_temp_file.write(";\n") + + data_temp_file.write("\n") + + # Script code list + data_temp_file.write("static const unsigned char script_code_list[] =\n") + for key in script_map.keys(): + code = script_map[key][1] + for i in range(4 - len(code)): + code += "\\0" + data_temp_file.write('"%2s" // %s\n' % (code, script_map[key][0])) + data_temp_file.write(";\n") + + # Country code list + data_temp_file.write("static const unsigned char country_code_list[] =\n") + for key in country_map.keys(): + code = country_map[key][1] + if len(code) == 2: + code += "\\0" + data_temp_file.write('"%2s" // %s\n' % (code, country_map[key][0])) + data_temp_file.write(";\n") + + data_temp_file.write("\n") + data_temp_file.write(GENERATED_BLOCK_END) + s = qlocaledata_file.readline() + # skip until end of the old block + while s and s != GENERATED_BLOCK_END: + s = qlocaledata_file.readline() + + s = qlocaledata_file.readline() + while s: + data_temp_file.write(s) + s = qlocaledata_file.readline() + data_temp_file.close() + qlocaledata_file.close() + + os.remove(qtsrcdir + "/src/corelib/tools/qlocale_data_p.h") + os.rename(data_temp_file_path, qtsrcdir + "/src/corelib/tools/qlocale_data_p.h") + + # qlocale.h + + (qlocaleh_temp_file, qlocaleh_temp_file_path) = tempfile.mkstemp("qlocale.h", dir=qtsrcdir) + qlocaleh_temp_file = os.fdopen(qlocaleh_temp_file, "w") + qlocaleh_file = open(qtsrcdir + "/src/corelib/tools/qlocale.h", "r") + s = qlocaleh_file.readline() + while s and s != GENERATED_BLOCK_START: + qlocaleh_temp_file.write(s) + s = qlocaleh_file.readline() + qlocaleh_temp_file.write(GENERATED_BLOCK_START) + qlocaleh_temp_file.write("// see qlocale_data_p.h for more info on generated data\n") + + # Language enum + qlocaleh_temp_file.write(" enum Language {\n") + language = None + for key, value in language_map.items(): + language = fixedLanguageName(value[0], dupes) + qlocaleh_temp_file.write(" " + language + " = " + str(key) + ",\n") + + qlocaleh_temp_file.write("\n " + + ",\n ".join('%s = %s' % pair + for pair in sorted(language_aliases.items())) + + ",\n") + qlocaleh_temp_file.write("\n") + qlocaleh_temp_file.write(" LastLanguage = " + language + "\n") + qlocaleh_temp_file.write(" };\n") + + qlocaleh_temp_file.write("\n") + + # Script enum + qlocaleh_temp_file.write(" enum Script {\n") + script = None + for key, value in script_map.items(): + script = fixedScriptName(value[0], dupes) + qlocaleh_temp_file.write(" " + script + " = " + str(key) + ",\n") + qlocaleh_temp_file.write("\n " + + ",\n ".join('%s = %s' % pair + for pair in sorted(script_aliases.items())) + + ",\n") + qlocaleh_temp_file.write("\n") + qlocaleh_temp_file.write(" LastScript = " + script + "\n") + qlocaleh_temp_file.write(" };\n") + + # Country enum + qlocaleh_temp_file.write(" enum Country {\n") + country = None + for key, value in country_map.items(): + country = fixedCountryName(value[0], dupes) + qlocaleh_temp_file.write(" " + country + " = " + str(key) + ",\n") + qlocaleh_temp_file.write("\n " + + ",\n ".join('%s = %s' % pair + for pair in sorted(country_aliases.items())) + + ",\n") + qlocaleh_temp_file.write("\n") + qlocaleh_temp_file.write(" LastCountry = " + country + "\n") + qlocaleh_temp_file.write(" };\n") + + qlocaleh_temp_file.write(GENERATED_BLOCK_END) + s = qlocaleh_file.readline() + # skip until end of the old block + while s and s != GENERATED_BLOCK_END: + s = qlocaleh_file.readline() + + s = qlocaleh_file.readline() + while s: + qlocaleh_temp_file.write(s) + s = qlocaleh_file.readline() + qlocaleh_temp_file.close() + qlocaleh_file.close() + + os.remove(qtsrcdir + "/src/corelib/tools/qlocale.h") + os.rename(qlocaleh_temp_file_path, qtsrcdir + "/src/corelib/tools/qlocale.h") + + # qlocale.qdoc + + (qlocaleqdoc_temp_file, qlocaleqdoc_temp_file_path) = tempfile.mkstemp("qlocale.qdoc", dir=qtsrcdir) + qlocaleqdoc_temp_file = os.fdopen(qlocaleqdoc_temp_file, "w") + qlocaleqdoc_file = open(qtsrcdir + "/src/corelib/tools/qlocale.qdoc", "r") + s = qlocaleqdoc_file.readline() + DOCSTRING = " QLocale's data is based on Common Locale Data Repository " + while s: + if DOCSTRING in s: + qlocaleqdoc_temp_file.write(DOCSTRING + "v" + cldr_version + ".\n") + else: + qlocaleqdoc_temp_file.write(s) + s = qlocaleqdoc_file.readline() + qlocaleqdoc_temp_file.close() + qlocaleqdoc_file.close() + + os.remove(qtsrcdir + "/src/corelib/tools/qlocale.qdoc") + os.rename(qlocaleqdoc_temp_file_path, qtsrcdir + "/src/corelib/tools/qlocale.qdoc") + +if __name__ == "__main__": + main() diff --git a/util/locale_database/testlocales/localemodel.cpp b/util/locale_database/testlocales/localemodel.cpp new file mode 100644 index 0000000000..d380d01e09 --- /dev/null +++ b/util/locale_database/testlocales/localemodel.cpp @@ -0,0 +1,449 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the utils of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:GPL-EXCEPT$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3 as published by the Free Software +** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ +#include "localemodel.h" + +#include <QLocale> +#include <QDate> +#include <qdebug.h> + +static const int g_model_cols = 6; + +struct LocaleListItem +{ + int language; + int country; +}; + +const LocaleListItem g_locale_list[] = { + { 1, 0 }, // C/AnyCountry + { 3, 69 }, // Afan/Ethiopia + { 3, 111 }, // Afan/Kenya + { 4, 59 }, // Afar/Djibouti + { 4, 67 }, // Afar/Eritrea + { 4, 69 }, // Afar/Ethiopia + { 5, 195 }, // Afrikaans/SouthAfrica + { 5, 148 }, // Afrikaans/Namibia + { 6, 2 }, // Albanian/Albania + { 7, 69 }, // Amharic/Ethiopia + { 8, 186 }, // Arabic/SaudiArabia + { 8, 3 }, // Arabic/Algeria + { 8, 17 }, // Arabic/Bahrain + { 8, 64 }, // Arabic/Egypt + { 8, 103 }, // Arabic/Iraq + { 8, 109 }, // Arabic/Jordan + { 8, 115 }, // Arabic/Kuwait + { 8, 119 }, // Arabic/Lebanon + { 8, 122 }, // Arabic/LibyanArabJamahiriya + { 8, 145 }, // Arabic/Morocco + { 8, 162 }, // Arabic/Oman + { 8, 175 }, // Arabic/Qatar + { 8, 201 }, // Arabic/Sudan + { 8, 207 }, // Arabic/SyrianArabRepublic + { 8, 216 }, // Arabic/Tunisia + { 8, 223 }, // Arabic/UnitedArabEmirates + { 8, 237 }, // Arabic/Yemen + { 9, 11 }, // Armenian/Armenia + { 10, 100 }, // Assamese/India + { 12, 15 }, // Azerbaijani/Azerbaijan + { 14, 197 }, // Basque/Spain + { 15, 18 }, // Bengali/Bangladesh + { 15, 100 }, // Bengali/India + { 16, 25 }, // Bhutani/Bhutan + { 20, 33 }, // Bulgarian/Bulgaria + { 22, 20 }, // Byelorussian/Belarus + { 23, 36 }, // Cambodian/Cambodia + { 24, 197 }, // Catalan/Spain + { 25, 44 }, // Chinese/China + { 25, 97 }, // Chinese/HongKong + { 25, 126 }, // Chinese/Macau + { 25, 190 }, // Chinese/Singapore + { 25, 208 }, // Chinese/Taiwan + { 27, 54 }, // Croatian/Croatia + { 28, 57 }, // Czech/CzechRepublic + { 29, 58 }, // Danish/Denmark + { 30, 151 }, // Dutch/Netherlands + { 30, 21 }, // Dutch/Belgium + { 31, 225 }, // English/UnitedStates + { 31, 4 }, // English/AmericanSamoa + { 31, 13 }, // English/Australia + { 31, 21 }, // English/Belgium + { 31, 22 }, // English/Belize + { 31, 28 }, // English/Botswana + { 31, 38 }, // English/Canada + { 31, 89 }, // English/Guam + { 31, 97 }, // English/HongKong + { 31, 100 }, // English/India + { 31, 104 }, // English/Ireland + { 31, 107 }, // English/Jamaica + { 31, 133 }, // English/Malta + { 31, 134 }, // English/MarshallIslands + { 31, 148 }, // English/Namibia + { 31, 154 }, // English/NewZealand + { 31, 160 }, // English/NorthernMarianaIslands + { 31, 163 }, // English/Pakistan + { 31, 170 }, // English/Philippines + { 31, 190 }, // English/Singapore + { 31, 195 }, // English/SouthAfrica + { 31, 215 }, // English/TrinidadAndTobago + { 31, 224 }, // English/UnitedKingdom + { 31, 226 }, // English/UnitedStatesMinorOutlyingIslands + { 31, 234 }, // English/USVirginIslands + { 31, 240 }, // English/Zimbabwe + { 33, 68 }, // Estonian/Estonia + { 34, 71 }, // Faroese/FaroeIslands + { 36, 73 }, // Finnish/Finland + { 37, 74 }, // French/France + { 37, 21 }, // French/Belgium + { 37, 38 }, // French/Canada + { 37, 125 }, // French/Luxembourg + { 37, 142 }, // French/Monaco + { 37, 206 }, // French/Switzerland + { 40, 197 }, // Galician/Spain + { 41, 81 }, // Georgian/Georgia + { 42, 82 }, // German/Germany + { 42, 14 }, // German/Austria + { 42, 21 }, // German/Belgium + { 42, 123 }, // German/Liechtenstein + { 42, 125 }, // German/Luxembourg + { 42, 206 }, // German/Switzerland + { 43, 85 }, // Greek/Greece + { 43, 56 }, // Greek/Cyprus + { 44, 86 }, // Greenlandic/Greenland + { 46, 100 }, // Gujarati/India + { 47, 83 }, // Hausa/Ghana + { 47, 156 }, // Hausa/Niger + { 47, 157 }, // Hausa/Nigeria + { 48, 105 }, // Hebrew/Israel + { 49, 100 }, // Hindi/India + { 50, 98 }, // Hungarian/Hungary + { 51, 99 }, // Icelandic/Iceland + { 52, 101 }, // Indonesian/Indonesia + { 57, 104 }, // Irish/Ireland + { 58, 106 }, // Italian/Italy + { 58, 206 }, // Italian/Switzerland + { 59, 108 }, // Japanese/Japan + { 61, 100 }, // Kannada/India + { 63, 110 }, // Kazakh/Kazakhstan + { 64, 179 }, // Kinyarwanda/Rwanda + { 65, 116 }, // Kirghiz/Kyrgyzstan + { 66, 114 }, // Korean/RepublicOfKorea + { 67, 102 }, // Kurdish/Iran + { 67, 103 }, // Kurdish/Iraq + { 67, 207 }, // Kurdish/SyrianArabRepublic + { 67, 217 }, // Kurdish/Turkey + { 69, 117 }, // Laothian/Lao + { 71, 118 }, // Latvian/Latvia + { 72, 49 }, // Lingala/DemocraticRepublicOfCongo + { 72, 50 }, // Lingala/PeoplesRepublicOfCongo + { 73, 124 }, // Lithuanian/Lithuania + { 74, 127 }, // Macedonian/Macedonia + { 76, 130 }, // Malay/Malaysia + { 76, 32 }, // Malay/BruneiDarussalam + { 77, 100 }, // Malayalam/India + { 78, 133 }, // Maltese/Malta + { 80, 100 }, // Marathi/India + { 82, 143 }, // Mongolian/Mongolia + { 84, 150 }, // Nepali/Nepal + { 85, 161 }, // Norwegian/Norway + { 87, 100 }, // Oriya/India + { 88, 1 }, // Pashto/Afghanistan + { 89, 102 }, // Persian/Iran + { 89, 1 }, // Persian/Afghanistan + { 90, 172 }, // Polish/Poland + { 91, 173 }, // Portuguese/Portugal + { 91, 30 }, // Portuguese/Brazil + { 92, 100 }, // Punjabi/India + { 92, 163 }, // Punjabi/Pakistan + { 95, 177 }, // Romanian/Romania + { 96, 178 }, // Russian/RussianFederation + { 96, 222 }, // Russian/Ukraine + { 99, 100 }, // Sanskrit/India + { 100, 241 }, // Serbian/SerbiaAndMontenegro + { 100, 27 }, // Serbian/BosniaAndHerzegowina + { 100, 238 }, // Serbian/Yugoslavia + { 101, 241 }, // SerboCroatian/SerbiaAndMontenegro + { 101, 27 }, // SerboCroatian/BosniaAndHerzegowina + { 101, 238 }, // SerboCroatian/Yugoslavia + { 102, 195 }, // Sesotho/SouthAfrica + { 103, 195 }, // Setswana/SouthAfrica + { 107, 195 }, // Siswati/SouthAfrica + { 108, 191 }, // Slovak/Slovakia + { 109, 192 }, // Slovenian/Slovenia + { 110, 194 }, // Somali/Somalia + { 110, 59 }, // Somali/Djibouti + { 110, 69 }, // Somali/Ethiopia + { 110, 111 }, // Somali/Kenya + { 111, 197 }, // Spanish/Spain + { 111, 10 }, // Spanish/Argentina + { 111, 26 }, // Spanish/Bolivia + { 111, 43 }, // Spanish/Chile + { 111, 47 }, // Spanish/Colombia + { 111, 52 }, // Spanish/CostaRica + { 111, 61 }, // Spanish/DominicanRepublic + { 111, 63 }, // Spanish/Ecuador + { 111, 65 }, // Spanish/ElSalvador + { 111, 90 }, // Spanish/Guatemala + { 111, 96 }, // Spanish/Honduras + { 111, 139 }, // Spanish/Mexico + { 111, 155 }, // Spanish/Nicaragua + { 111, 166 }, // Spanish/Panama + { 111, 168 }, // Spanish/Paraguay + { 111, 169 }, // Spanish/Peru + { 111, 174 }, // Spanish/PuertoRico + { 111, 225 }, // Spanish/UnitedStates + { 111, 227 }, // Spanish/Uruguay + { 111, 231 }, // Spanish/Venezuela + { 113, 111 }, // Swahili/Kenya + { 113, 210 }, // Swahili/Tanzania + { 114, 205 }, // Swedish/Sweden + { 114, 73 }, // Swedish/Finland + { 116, 209 }, // Tajik/Tajikistan + { 117, 100 }, // Tamil/India + { 118, 178 }, // Tatar/RussianFederation + { 119, 100 }, // Telugu/India + { 120, 211 }, // Thai/Thailand + { 122, 67 }, // Tigrinya/Eritrea + { 122, 69 }, // Tigrinya/Ethiopia + { 124, 195 }, // Tsonga/SouthAfrica + { 125, 217 }, // Turkish/Turkey + { 129, 222 }, // Ukrainian/Ukraine + { 130, 100 }, // Urdu/India + { 130, 163 }, // Urdu/Pakistan + { 131, 228 }, // Uzbek/Uzbekistan + { 131, 1 }, // Uzbek/Afghanistan + { 132, 232 }, // Vietnamese/VietNam + { 134, 224 }, // Welsh/UnitedKingdom + { 136, 195 }, // Xhosa/SouthAfrica + { 138, 157 }, // Yoruba/Nigeria + { 140, 195 }, // Zulu/SouthAfrica + { 141, 161 }, // Nynorsk/Norway + { 142, 27 }, // Bosnian/BosniaAndHerzegowina + { 143, 131 }, // Divehi/Maldives + { 144, 224 }, // Manx/UnitedKingdom + { 145, 224 }, // Cornish/UnitedKingdom + { 146, 83 }, // Akan/Ghana + { 147, 100 }, // Konkani/India + { 148, 83 }, // Ga/Ghana + { 149, 157 }, // Igbo/Nigeria + { 150, 111 }, // Kamba/Kenya + { 151, 207 }, // Syriac/SyrianArabRepublic + { 152, 67 }, // Blin/Eritrea + { 153, 67 }, // Geez/Eritrea + { 153, 69 }, // Geez/Ethiopia + { 154, 157 }, // Koro/Nigeria + { 155, 69 }, // Sidamo/Ethiopia + { 156, 157 }, // Atsam/Nigeria + { 157, 67 }, // Tigre/Eritrea + { 158, 157 }, // Jju/Nigeria + { 159, 106 }, // Friulian/Italy + { 160, 195 }, // Venda/SouthAfrica + { 161, 83 }, // Ewe/Ghana + { 161, 212 }, // Ewe/Togo + { 163, 225 }, // Hawaiian/UnitedStates + { 164, 157 }, // Tyap/Nigeria + { 165, 129 }, // Chewa/Malawi +}; +static const int g_locale_list_count = sizeof(g_locale_list)/sizeof(g_locale_list[0]); + +LocaleModel::LocaleModel(QObject *parent) + : QAbstractItemModel(parent) +{ + m_data_list.append(1234.5678); + m_data_list.append(QDate::currentDate()); + m_data_list.append(QDate::currentDate()); + m_data_list.append(QTime::currentTime()); + m_data_list.append(QTime::currentTime()); +} + +QVariant LocaleModel::data(const QModelIndex &index, int role) const +{ + if (!index.isValid() + || role != Qt::DisplayRole && role != Qt::EditRole && role != Qt::ToolTipRole + || index.column() >= g_model_cols + || index.row() >= g_locale_list_count + 2) + return QVariant(); + + QVariant data; + if (index.column() < g_model_cols - 1) + data = m_data_list.at(index.column()); + + if (index.row() == 0) { + if (role == Qt::ToolTipRole) + return QVariant(); + switch (index.column()) { + case 0: + return data.toDouble(); + case 1: + return data.toDate(); + case 2: + return data.toDate(); + case 3: + return data.toTime(); + case 4: + return data.toTime(); + case 5: + return QVariant(); + default: + break; + } + } else { + QLocale locale; + if (index.row() == 1) { + locale = QLocale::system(); + } else { + LocaleListItem item = g_locale_list[index.row() - 2]; + locale = QLocale((QLocale::Language)item.language, (QLocale::Country)item.country); + } + + switch (index.column()) { + case 0: + if (role == Qt::ToolTipRole) + return QVariant(); + return locale.toString(data.toDouble()); + case 1: + if (role == Qt::ToolTipRole) + return locale.dateFormat(QLocale::LongFormat); + return locale.toString(data.toDate(), QLocale::LongFormat); + case 2: + if (role == Qt::ToolTipRole) + return locale.dateFormat(QLocale::ShortFormat); + return locale.toString(data.toDate(), QLocale::ShortFormat); + case 3: + if (role == Qt::ToolTipRole) + return locale.timeFormat(QLocale::LongFormat); + return locale.toString(data.toTime(), QLocale::LongFormat); + case 4: + if (role == Qt::ToolTipRole) + return locale.timeFormat(QLocale::ShortFormat); + return locale.toString(data.toTime(), QLocale::ShortFormat); + case 5: + if (role == Qt::ToolTipRole) + return QVariant(); + return locale.name(); + default: + break; + } + } + + return QVariant(); +} + +QVariant LocaleModel::headerData(int section, Qt::Orientation orientation, int role) const +{ + if (role != Qt::DisplayRole) + return QVariant(); + + if (orientation == Qt::Horizontal) { + switch (section) { + case 0: + return QLatin1String("Double"); + case 1: + return QLatin1String("Long Date"); + case 2: + return QLatin1String("Short Date"); + case 3: + return QLatin1String("Long Time"); + case 4: + return QLatin1String("Short Time"); + case 5: + return QLatin1String("Name"); + default: + break; + } + } else { + if (section >= g_locale_list_count + 2) + return QVariant(); + if (section == 0) { + return QLatin1String("Input"); + } else if (section == 1) { + return QLatin1String("System"); + } else { + LocaleListItem item = g_locale_list[section - 2]; + return QLocale::languageToString((QLocale::Language)item.language) + + QLatin1Char('/') + + QLocale::countryToString((QLocale::Country)item.country); + } + } + + return QVariant(); +} + +QModelIndex LocaleModel::index(int row, int column, + const QModelIndex &parent) const +{ + if (parent.isValid() + || row >= g_locale_list_count + 2 + || column >= g_model_cols) + return QModelIndex(); + + return createIndex(row, column); +} + +QModelIndex LocaleModel::parent(const QModelIndex&) const +{ + return QModelIndex(); +} + +int LocaleModel::columnCount(const QModelIndex&) const +{ + return g_model_cols; +} + +int LocaleModel::rowCount(const QModelIndex &parent) const +{ + if (parent.isValid()) + return 0; + return g_locale_list_count + 2; +} + +Qt::ItemFlags LocaleModel::flags(const QModelIndex &index) const +{ + if (!index.isValid()) + return 0; + if (index.row() == 0 && index.column() == g_model_cols - 1) + return 0; + if (index.row() == 0) + return QAbstractItemModel::flags(index) | Qt::ItemIsEditable; + return QAbstractItemModel::flags(index); +} + +bool LocaleModel::setData(const QModelIndex &index, const QVariant &value, int role) +{ + if (!index.isValid() + || index.row() != 0 + || index.column() >= g_model_cols - 1 + || role != Qt::EditRole + || m_data_list.at(index.column()).type() != value.type()) + return false; + + m_data_list[index.column()] = value; + emit dataChanged(createIndex(1, index.column()), + createIndex(g_locale_list_count, index.column())); + + return true; +} diff --git a/util/locale_database/testlocales/localemodel.h b/util/locale_database/testlocales/localemodel.h new file mode 100644 index 0000000000..b24fc5f4c6 --- /dev/null +++ b/util/locale_database/testlocales/localemodel.h @@ -0,0 +1,56 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the utils of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:GPL-EXCEPT$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3 as published by the Free Software +** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ +#ifndef LOCALEMODEL_H +#define LOCALEMODEL_H + +#include <QAbstractItemModel> +#include <QList> +#include <QVariant> + +class LocaleModel : public QAbstractItemModel +{ + Q_OBJECT +public: + LocaleModel(QObject *parent = 0); + + virtual int columnCount(const QModelIndex &parent = QModelIndex()) const; + virtual QVariant data(const QModelIndex &index, int role = Qt::DisplayRole) const; + virtual QModelIndex index(int row, int column, + const QModelIndex &parent = QModelIndex()) const; + virtual QModelIndex parent(const QModelIndex &index) const; + virtual int rowCount(const QModelIndex &parent = QModelIndex()) const; + virtual QVariant headerData(int section, Qt::Orientation orientation, + int role = Qt::DisplayRole ) const; + virtual Qt::ItemFlags flags(const QModelIndex &index) const; + virtual bool setData(const QModelIndex &index, const QVariant &value, + int role = Qt::EditRole); +private: + QList<QVariant> m_data_list; +}; + +#endif // LOCALEMODEL_H diff --git a/util/locale_database/testlocales/localewidget.cpp b/util/locale_database/testlocales/localewidget.cpp new file mode 100644 index 0000000000..3ff7f73a98 --- /dev/null +++ b/util/locale_database/testlocales/localewidget.cpp @@ -0,0 +1,76 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the utils of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:GPL-EXCEPT$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3 as published by the Free Software +** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ +#include <QTableView> +#include <QVBoxLayout> +#include <QItemDelegate> +#include <QItemEditorFactory> +#include <QDoubleSpinBox> + +#include "localewidget.h" +#include "localemodel.h" + +class DoubleEditorCreator : public QItemEditorCreatorBase +{ +public: + QWidget *createWidget(QWidget *parent) const { + QDoubleSpinBox *w = new QDoubleSpinBox(parent); + w->setDecimals(4); + w->setRange(-10000.0, 10000.0); + return w; + } + virtual QByteArray valuePropertyName() const { + return QByteArray("value"); + } +}; + +class EditorFactory : public QItemEditorFactory +{ +public: + EditorFactory() { + static DoubleEditorCreator double_editor_creator; + registerEditor(QVariant::Double, &double_editor_creator); + } +}; + +LocaleWidget::LocaleWidget(QWidget *parent) + : QWidget(parent) +{ + m_model = new LocaleModel(this); + m_view = new QTableView(this); + + QItemDelegate *delegate = qobject_cast<QItemDelegate*>(m_view->itemDelegate()); + Q_ASSERT(delegate != 0); + static EditorFactory editor_factory; + delegate->setItemEditorFactory(&editor_factory); + + m_view->setModel(m_model); + + QVBoxLayout *layout = new QVBoxLayout(this); + layout->setMargin(0); + layout->addWidget(m_view); +} diff --git a/util/locale_database/testlocales/localewidget.h b/util/locale_database/testlocales/localewidget.h new file mode 100644 index 0000000000..896a6e5229 --- /dev/null +++ b/util/locale_database/testlocales/localewidget.h @@ -0,0 +1,46 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the utils of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:GPL-EXCEPT$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3 as published by the Free Software +** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ +#ifndef LOCALEWIDGET_H +#define LOCALEWIDGET_H + +#include <QWidget> + +class LocaleModel; +class QTableView; + +class LocaleWidget : public QWidget +{ + Q_OBJECT +public: + LocaleWidget(QWidget *parent = 0); +private: + LocaleModel *m_model; + QTableView *m_view; +}; + +#endif // LOCALEWIDGET_H diff --git a/util/locale_database/testlocales/main.cpp b/util/locale_database/testlocales/main.cpp new file mode 100644 index 0000000000..0c3c45f989 --- /dev/null +++ b/util/locale_database/testlocales/main.cpp @@ -0,0 +1,38 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the utils of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:GPL-EXCEPT$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3 as published by the Free Software +** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ +#include <QApplication> + +#include "localewidget.h" + +int main(int argc, char *argv[]) +{ + QApplication app(argc, argv); + LocaleWidget wgt; + wgt.show(); + return app.exec(); +} diff --git a/util/locale_database/testlocales/testlocales.pro b/util/locale_database/testlocales/testlocales.pro new file mode 100644 index 0000000000..a9a6247f96 --- /dev/null +++ b/util/locale_database/testlocales/testlocales.pro @@ -0,0 +1,4 @@ +TARGET = testlocales +CONFIG += debug +SOURCES += localemodel.cpp localewidget.cpp main.cpp +HEADERS += localemodel.h localewidget.h
\ No newline at end of file diff --git a/util/locale_database/xpathlite.py b/util/locale_database/xpathlite.py new file mode 100644 index 0000000000..218135d7a7 --- /dev/null +++ b/util/locale_database/xpathlite.py @@ -0,0 +1,264 @@ +#!/usr/bin/env python +############################################################################# +## +## Copyright (C) 2016 The Qt Company Ltd. +## Contact: https://www.qt.io/licensing/ +## +## This file is part of the test suite of the Qt Toolkit. +## +## $QT_BEGIN_LICENSE:GPL-EXCEPT$ +## Commercial License Usage +## Licensees holding valid commercial Qt licenses may use this file in +## accordance with the commercial license agreement provided with the +## Software or, alternatively, in accordance with the terms contained in +## a written agreement between you and The Qt Company. For licensing terms +## and conditions see https://www.qt.io/terms-conditions. For further +## information use the contact form at https://www.qt.io/contact-us. +## +## GNU General Public License Usage +## Alternatively, this file may be used under the terms of the GNU +## General Public License version 3 as published by the Free Software +## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +## included in the packaging of this file. Please review the following +## information to ensure the GNU General Public License requirements will +## be met: https://www.gnu.org/licenses/gpl-3.0.html. +## +## $QT_END_LICENSE$ +## +############################################################################# + +import sys +import os +import xml.dom.minidom + +class DraftResolution: + # See http://www.unicode.org/cldr/process.html for description + unconfirmed = 'unconfirmed' + provisional = 'provisional' + contributed = 'contributed' + approved = 'approved' + _values = { unconfirmed : 1, provisional : 2, contributed : 3, approved : 4 } + def __init__(self, resolution): + self.resolution = resolution + def toInt(self): + return DraftResolution._values[self.resolution] + +class Error: + def __init__(self, msg): + self.msg = msg + def __str__(self): + return self.msg + +doc_cache = {} +def parseDoc(file): + if not doc_cache.has_key(file): + doc_cache[file] = xml.dom.minidom.parse(file) + return doc_cache[file] + +def findChild(parent, tag_name, arg_name=None, arg_value=None, draft=None): + for node in parent.childNodes: + if node.nodeType != node.ELEMENT_NODE: + continue + if node.nodeName != tag_name: + continue + if arg_value: + if not node.attributes.has_key(arg_name): + continue + if node.attributes[arg_name].nodeValue != arg_value: + continue + if draft: + if not node.attributes.has_key('draft'): + # if draft is not specified then it's approved + return node + value = node.attributes['draft'].nodeValue + value = DraftResolution(value).toInt() + exemplar = DraftResolution(draft).toInt() + if exemplar > value: + continue + return node + return False + +def findTagsInFile(file, path): + doc = parseDoc(file) + + elt = doc.documentElement + tag_spec_list = path.split("/") + last_entry = None + for i in range(len(tag_spec_list)): + tag_spec = tag_spec_list[i] + tag_name = tag_spec + arg_name = 'type' + arg_value = '' + left_bracket = tag_spec.find('[') + if left_bracket != -1: + tag_name = tag_spec[:left_bracket] + arg_value = tag_spec[left_bracket+1:-1].split("=") + if len(arg_value) == 2: + arg_name = arg_value[0] + arg_value = arg_value[1] + else: + arg_value = arg_value[0] + elt = findChild(elt, tag_name, arg_name, arg_value) + if not elt: + return None + ret = [] + if elt.childNodes: + for node in elt.childNodes: + if node.attributes: + element = [node.nodeName, None] + element[1] = node.attributes.items() + ret.append(element) + else: + if elt.attributes: + element = [elt.nodeName, None] + element[1] = elt.attributes.items() + ret.append(element) + return ret + +def _findEntryInFile(file, path, draft=None, attribute=None): + doc = parseDoc(file) + + elt = doc.documentElement + tag_spec_list = path.split("/") + last_entry = None + for i in range(len(tag_spec_list)): + tag_spec = tag_spec_list[i] + tag_name = tag_spec + arg_name = 'type' + arg_value = '' + left_bracket = tag_spec.find('[') + if left_bracket != -1: + tag_name = tag_spec[:left_bracket] + arg_value = tag_spec[left_bracket+1:-1].split("=") + if len(arg_value) == 2: + arg_name = arg_value[0].replace("@", "").replace("'", "") + arg_value = arg_value[1] + else: + arg_value = arg_value[0] + alias = findChild(elt, 'alias') + if alias and alias.attributes['source'].nodeValue == 'locale': + path = alias.attributes['path'].nodeValue + aliaspath = tag_spec_list[:i] + path.split("/") + def resolve(x, y): + if y == '..': + return x[:-1] + return x + [y] + # resolve all dot-dot parts of the path + aliaspath = reduce(resolve, aliaspath, []) + # remove attribute specification that our xpathlite doesnt support + aliaspath = map(lambda x: x.replace("@type=", "").replace("'", ""), aliaspath) + # append the remaining path + aliaspath = aliaspath + tag_spec_list[i:] + aliaspath = "/".join(aliaspath) + # "locale" aliases are special - we need to start lookup from scratch + return (None, aliaspath) + elt = findChild(elt, tag_name, arg_name, arg_value, draft) + if not elt: + return ("", None) + if attribute is not None: + if elt.attributes.has_key(attribute): + return (elt.attributes[attribute].nodeValue, None) + return (None, None) + try: + return (elt.firstChild.nodeValue, None) + except: + pass + return (None, None) + +def findAlias(file): + doc = parseDoc(file) + + alias_elt = findChild(doc.documentElement, "alias") + if not alias_elt: + return False + if not alias_elt.attributes.has_key('source'): + return False + return alias_elt.attributes['source'].nodeValue + +lookup_chain_cache = {} +parent_locales = {} +def _fixedLookupChain(dirname, name): + if lookup_chain_cache.has_key(name): + return lookup_chain_cache[name] + + # see http://www.unicode.org/reports/tr35/#Parent_Locales + if not parent_locales: + for ns in findTagsInFile(dirname + "/../supplemental/supplementalData.xml", "parentLocales"): + tmp = {} + parent_locale = "" + for data in ns[1:][0]: # ns looks like this: [u'parentLocale', [(u'parent', u'root'), (u'locales', u'az_Cyrl bs_Cyrl en_Dsrt ..')]] + tmp[data[0]] = data[1] + if data[0] == u"parent": + parent_locale = data[1] + parent_locales[parent_locale] = tmp[u"locales"].split(" ") + + items = name.split("_") + # split locale name into items and iterate through them from back to front + # example: az_Latn_AZ => [az_Latn_AZ, az_Latn, az] + items = list(reversed(map(lambda x: "_".join(items[:x+1]), range(len(items))))) + + for i in range(len(items)): + item = items[i] + for parent_locale in parent_locales.keys(): + for locale in parent_locales[parent_locale]: + if item == locale: + if parent_locale == u"root": + items = items[:i+1] + else: + items = items[:i+1] + _fixedLookupChain(dirname, parent_locale) + lookup_chain_cache[name] = items + return items + + lookup_chain_cache[name] = items + return items + +def _findEntry(base, path, draft=None, attribute=None): + if base.endswith(".xml"): + base = base[:-4] + (dirname, filename) = os.path.split(base) + + items = _fixedLookupChain(dirname, filename) + for item in items: + file = dirname + "/" + item + ".xml" + if os.path.isfile(file): + alias = findAlias(file) + if alias: + # if alias is found we should follow it and stop processing current file + # see http://www.unicode.org/reports/tr35/#Common_Elements + aliasfile = os.path.dirname(file) + "/" + alias + ".xml" + if not os.path.isfile(aliasfile): + raise Error("findEntry: fatal error: found an alias '%s' to '%s', but the alias file couldn't be found" % (filename, alias)) + # found an alias, recurse into parsing it + result = _findEntry(aliasfile, path, draft, attribute) + return result + (result, aliaspath) = _findEntryInFile(file, path, draft, attribute) + if aliaspath: + # start lookup again because of the alias source="locale" + return _findEntry(base, aliaspath, draft, attribute) + if result: + return result + return None + +def findEntry(base, path, draft=None, attribute=None): + file = base + if base.endswith(".xml"): + file = base + base = base[:-4] + else: + file = base + ".xml" + (dirname, filename) = os.path.split(base) + + result = None + while path: + result = _findEntry(base, path, draft, attribute) + if result: + return result + (result, aliaspath) = _findEntryInFile(dirname + "/root.xml", path, draft, attribute) + if result: + return result + if not aliaspath: + raise Error("findEntry: fatal error: %s: cannot find key %s" % (filename, path)) + path = aliaspath + + return result + |