diff options
author | The Qt Project <gerrit-noreply@qt-project.org> | 2020-02-12 08:47:47 +0000 |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2020-02-12 08:47:47 +0000 |
commit | f4a52b79ced0726b207c1af8c12ada1f8be12ff0 (patch) | |
tree | d11ec8434ee3c61ddc1f0d04009cfc478ec05af5 /util/locale_database | |
parent | 99da0c164b02dca8f556c3186231e6b0723c532c (diff) | |
parent | c53f8c038c0736aeb208bcd69bf3b5e2816c29ae (diff) |
Merge "Merge remote-tracking branch 'origin/dev' into wip/cmake" into wip/cmake
Diffstat (limited to 'util/locale_database')
-rwxr-xr-x | util/locale_database/cldr2qlocalexml.py | 28 | ||||
-rw-r--r-- | util/locale_database/localexml.py | 10 | ||||
-rwxr-xr-x | util/locale_database/qlocalexml2cpp.py | 279 |
3 files changed, 179 insertions, 138 deletions
diff --git a/util/locale_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py index 072ea9e4ed..625f1c32c4 100755 --- a/util/locale_database/cldr2qlocalexml.py +++ b/util/locale_database/cldr2qlocalexml.py @@ -2,7 +2,7 @@ # coding=utf8 ############################################################################# ## -## Copyright (C) 2018 The Qt Company Ltd. +## Copyright (C) 2020 The Qt Company Ltd. ## Contact: https://www.qt.io/licensing/ ## ## This file is part of the test suite of the Qt Toolkit. @@ -314,7 +314,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ result['zero'] = get_number_in_system(path, "numbers/symbols/nativeZeroDigit", numbering_system) result['minus'] = get_number_in_system(path, "numbers/symbols/minusSign", numbering_system) result['plus'] = get_number_in_system(path, "numbers/symbols/plusSign", numbering_system) - result['exp'] = get_number_in_system(path, "numbers/symbols/exponential", numbering_system).lower() + result['exp'] = get_number_in_system(path, "numbers/symbols/exponential", numbering_system) result['quotationStart'] = findEntry(path, "delimiters/quotationStart") result['quotationEnd'] = findEntry(path, "delimiters/quotationEnd") result['alternateQuotationStart'] = findEntry(path, "delimiters/alternateQuotationStart") @@ -352,13 +352,15 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ result['currencySymbol'] = '' result['currencyDisplayName'] = '' if result['currencyIsoCode']: - result['currencySymbol'] = findEntryDef(path, "numbers/currencies/currency[%s]/symbol" % result['currencyIsoCode']) - result['currencyDisplayName'] = ';'.join( - findEntryDef(path, 'numbers/currencies/currency[' + result['currencyIsoCode'] - + ']/displayName' + tail) - for tail in ['',] + [ - '[count=%s]' % x for x in ('zero', 'one', 'two', 'few', 'many', 'other') - ]) + ';' + stem = "numbers/currencies/currency[%s]/" % result['currencyIsoCode'] + result['currencySymbol'] = findEntryDef(path, stem + 'symbol') + displays = tuple(findEntryDef(path, stem + 'displayName' + tail) + for tail in ('',) + tuple( + '[count=%s]' % x for x in ('zero', 'one', 'two', + 'few', 'many', 'other'))) + while displays and not displays[-1]: + displays = displays[:-1] + result['currencyDisplayName'] = ';'.join(displays) def findUnitDef(path, stem, fallback=''): # The displayName for a quantified unit in en.xml is kByte @@ -405,7 +407,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ prop = 'monthContext[' + mode + ']/monthWidth[' + size + ']/' result[key + 'Months_' + cal] = ';'.join( findEntry(path, stem + prop + "month[%d]" % i) - for i in range(1, 13)) + ';' + for i in range(1, 13)) # Day data (for Gregorian, at least): stem = 'dates/calendars/calendar[gregorian]/days/' @@ -414,7 +416,7 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ prop = 'dayContext[' + mode + ']/dayWidth[' + size + ']/day' result[key + 'Days'] = ';'.join( findEntry(path, stem + prop + '[' + day + ']') - for day in days) + ';' + for day in days) return Locale(result) @@ -582,6 +584,10 @@ for line in ldml: if 'version cldrVersion CDATA #FIXED' in line: cldr_version = line.split('"')[1] +if sys.stdout.encoding != 'UTF-8' or (sys.stdout.encoding is None and sys.getdefaultencoding() != 'UTF-8'): + reload(sys) # Weirdly, this gets a richer sys module than the plain import got us ! + sys.setdefaultencoding('UTF-8') + print "<localeDatabase>" print " <version>" + cldr_version + "</version>" print " <languageList>" diff --git a/util/locale_database/localexml.py b/util/locale_database/localexml.py index 9b353f5122..9958398938 100644 --- a/util/locale_database/localexml.py +++ b/util/locale_database/localexml.py @@ -1,7 +1,7 @@ # coding=utf8 ############################################################################# ## -## Copyright (C) 2018 The Qt Company Ltd. +## Copyright (C) 2020 The Qt Company Ltd. ## Contact: https://www.qt.io/licensing/ ## ## This file is part of the test suite of the Qt Toolkit. @@ -267,7 +267,7 @@ class Locale: except KeyError: # Need to add an entry to known, above. print 'Unsupported calendar:', cal raise - names, get = data[0] + ('',), data[1:] + names, get = data[0], data[1:] for n, size in enumerate(sizes): yield ('_'.join((camelCase((size, 'months')), cal)), ';'.join(get[n][0](i, x) for i, x in enumerate(names))) @@ -279,7 +279,7 @@ class Locale: def C(cls, calendars=('gregorian',), # Empty entry at end to ensure final separator when join()ed: days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday', - 'Thursday', 'Friday', 'Saturday', ''), + 'Thursday', 'Friday', 'Saturday'), quantifiers=('k', 'M', 'G', 'T', 'P', 'E')): """Returns an object representing the C locale.""" return cls(dict(cls.__monthNames(calendars)), @@ -303,11 +303,11 @@ class Locale: longTimeFormat='HH:mm:ss z', shortTimeFormat='HH:mm:ss', longDays=';'.join(days), shortDays=';'.join(d[:3] for d in days), - narrowDays='7;1;2;3;4;5;6;', + narrowDays='7;1;2;3;4;5;6', standaloneLongDays=';'.join(days), standaloneShortDays=';'.join(d[:3] for d in days), standaloneNarrowDays=';'.join(d[:1] for d in days), currencyIsoCode='', currencySymbol='', - currencyDisplayName=';' * 7, + currencyDisplayName='', currencyDigits=2, currencyRounding=1, currencyFormat='%1%2', currencyNegativeFormat='') diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py index 52e6331569..0cfa0f03e4 100755 --- a/util/locale_database/qlocalexml2cpp.py +++ b/util/locale_database/qlocalexml2cpp.py @@ -1,7 +1,7 @@ #!/usr/bin/env python2 ############################################################################# ## -## Copyright (C) 2018 The Qt Company Ltd. +## Copyright (C) 2020 The Qt Company Ltd. ## Contact: https://www.qt.io/licensing/ ## ## This file is part of the test suite of the Qt Toolkit. @@ -259,46 +259,66 @@ def unicode2hex(s): return lst class StringDataToken: - def __init__(self, index, length): - if index > 0xFFFF or length > 0xFFFF: - raise Error("Position exceeds ushort range: %d,%d " % (index, length)) + def __init__(self, index, length, bits): + if index > 0xffff: + print "\n\n\n#error Data index is too big!", index + raise ValueError("Start-index (%d) exceeds the uint16 range!" % index) + if length >= (1 << bits): + print "\n\n\n#error Range length is too big!", length + raise ValueError("Data size (%d) exceeds the %d-bit range!" % (length, bits)) + self.index = index self.length = length - def __str__(self): - return " %d,%d " % (self.index, self.length) class StringData: def __init__(self, name): self.data = [] self.hash = {} self.name = name + self.text = '' # Used in quick-search for matches in data - def append(self, s): - if s in self.hash: - return self.hash[s] - - lst = unicode2hex(s) - index = len(self.data) - if index > 65535: - print "\n\n\n#error Data index is too big!" - sys.stderr.write ("\n\n\nERROR: index exceeds the uint16 range! index = %d\n" % index) - sys.exit(1) - size = len(lst) - if size >= 65535: - print "\n\n\n#error Data is too big!" - sys.stderr.write ("\n\n\nERROR: data size exceeds the uint16 range! size = %d\n" % size) - sys.exit(1) - token = None + def append(self, s, bits=8): try: - token = StringDataToken(index, size) - except Error as e: - sys.stderr.write("\n\n\nERROR: %s: on data '%s'" % (e, s)) - sys.exit(1) - self.hash[s] = token - self.data += lst + token = self.hash[s] + except KeyError: + token = self.__store(s, bits) + self.hash[s] = token return token + def __store(self, s, bits): + """Add string s to known data. + + Seeks to avoid duplication, where possible. + For example, short-forms may be prefixes of long-forms. + """ + if not s: + return StringDataToken(0, 0, bits) + ucs2 = unicode2hex(s) + try: + index = self.text.index(s) - 1 + matched = 0 + while matched < len(ucs2): + index, matched = self.data.index(ucs2[0], index + 1), 1 + if index + len(ucs2) >= len(self.data): + raise ValueError # not found after all ! + while matched < len(ucs2) and self.data[index + matched] == ucs2[matched]: + matched += 1 + except ValueError: + index = len(self.data) + self.data += ucs2 + self.text += s + + assert index >= 0 + try: + return StringDataToken(index, len(ucs2), bits) + except ValueError as e: + e.args += (self.name, s) + raise + def write(self, fd): + if len(self.data) > 0xffff: + raise ValueError("Data is too big for quint16 index to its end!" % len(self.data), + self.name) fd.write("\nstatic const ushort %s[] = {\n" % self.name) fd.write(wrap_list(self.data)) fd.write("\n};\n") @@ -319,7 +339,7 @@ def escapedString(s): need_escape = False result = "" for c in s: - if ord(c) < 128 and (not need_escape or ord(c.lower()) < ord('a') or ord(c.lower()) > ord('f')): + if ord(c) < 128 and not (need_escape and ord('a') <= ord(c.lower()) <= ord('f')): line += c need_escape = False else: @@ -361,7 +381,7 @@ def main(): for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))): usage() - (data_temp_file, data_temp_file_path) = tempfile.mkstemp("qlocale_data_p", dir=qtsrcdir) + (data_temp_file, data_temp_file_path) = tempfile.mkstemp("qlocale_data_p.h", dir=qtsrcdir) data_temp_file = os.fdopen(data_temp_file, "w") qlocaledata_file = open(qtsrcdir + "/src/corelib/text/qlocale_data_p.h", "r") s = qlocaledata_file.readline() @@ -426,7 +446,8 @@ def main(): cmnt_to = cmnt_to + country_map[to_country][1] data_temp_file.write(" ") - data_temp_file.write("{ %3d, %3d, %3d }, { %3d, %3d, %3d }" % (from_language, from_script, from_country, to_language, to_script, to_country)) + data_temp_file.write("{ %3d, %3d, %3d }, { %3d, %3d, %3d }" % + (from_language, from_script, from_country, to_language, to_script, to_country)) index += 1 if index != len(likely_subtags_map): data_temp_file.write(",") @@ -483,39 +504,43 @@ def main(): + ' quotEnd ' + 'altQtOpn ' + 'altQtEnd ' - # Width 11 + comma: - + ' lpStart ' # List pattern - + ' lpMid ' - + ' lpEnd ' - + ' lpTwo ' - + ' sDtFmt ' # Date format - + ' lDtFmt ' - + ' sTmFmt ' # Time format - + ' lTmFmt ' - + ' ssDays ' # Days - + ' slDays ' - + ' snDays ' - + ' sDays ' - + ' lDays ' - + ' nDays ' - + ' am ' # am/pm indicators - + ' pm ' - # Width 8 + comma - + ' byte ' - + ' siQuant ' - + 'iecQuant ' + + # Range entries (all start-indices, then all sizes): + # Width 5 + comma: + + 'lStrt ' # List pattern + + 'lpMid ' + + 'lpEnd ' + + 'lPair ' + + 'lDFmt ' # Date format + + 'sDFmt ' + + 'lTFmt ' # Time format + + 'sTFmt ' + + 'slDay ' # Day names + + 'lDays ' + + 'ssDys ' + + 'sDays ' + + 'snDay ' + + 'nDays ' + + ' am ' # am/pm indicators + + ' pm ' + + ' byte ' + + 'siQnt ' + + 'iecQn ' + + 'crSym ' # Currency formatting: + + 'crDsp ' + + 'crFmt ' + + 'crFNg ' + + 'ntLng ' # Name of language in itself, and of territory: + + 'ntTer ' + # Width 3 + comma for each size; no header + + ' ' * 25 + + # Strays (char array, bit-fields): # Width 8+4 + comma + ' currISO ' - # Width 11 + comma: - + ' currSym ' # Currency formatting: - + ' currDsply ' - + ' currFmt ' - + ' currFmtNeg ' - + ' endoLang ' # Name of language in itself, and of country: - + ' endoCntry ' # Width 6 + comma: - + 'curDgt ' # Currency number representation: - + 'curRnd ' + + 'curDgt ' # Currency digits + + 'curRnd ' # Currencty rounding (unused: QTBUG-81343) + 'dow1st ' # First day of week + ' wknd+ ' # Week-end start/end days: + ' wknd-' @@ -535,14 +560,16 @@ def main(): + '%6d,' * 8 # Quotation marks: + '%8d,' * 4 + # List patterns, date/time formats, month/day names, am/pm: - + '%11s,' * 16 # SI/IEC byte-unit abbreviations: - + '%8s,' * 3 + # Currency and endonyms + + '%5d,' * 25 + # Sizes for the same: + + '%3d,' * 25 + # Currency ISO code: + ' %10s, ' - # Currency and endonyms - + '%11s,' * 6 # Currency formatting: + '%6d,%6d' # Day of week and week-end: @@ -550,8 +577,32 @@ def main(): + ' }') for key in locale_keys: l = locale_map[key] + # Sequence of StringDataToken: + ranges = (tuple(list_pattern_part_data.append(p) for p in # 4 entries: + (l.listPatternPartStart, l.listPatternPartMiddle, + l.listPatternPartEnd, l.listPatternPartTwo)) + + tuple (date_format_data.append(f) for f in # 2 entries: + (l.longDateFormat, l.shortDateFormat)) + + tuple(time_format_data.append(f) for f in # 2 entries: + (l.longTimeFormat, l.shortTimeFormat)) + + tuple(days_data.append(d) for d in # 6 entries: + (l.standaloneLongDays, l.longDays, + l.standaloneShortDays, l.shortDays, + l.standaloneNarrowDays, l.narrowDays)) + + (am_data.append(l.am), pm_data.append(l.pm)) + # 2 entries: + tuple(byte_unit_data.append(b) for b in # 3 entries: + (l.byte_unit, l.byte_si_quantified, l.byte_iec_quantified)) + + (currency_symbol_data.append(l.currencySymbol), + currency_display_name_data.append(l.currencyDisplayName), + currency_format_data.append(l.currencyFormat), + currency_format_data.append(l.currencyNegativeFormat), + endonyms_data.append(l.languageEndonym), + endonyms_data.append(l.countryEndonym)) # 6 entries + ) # Total: 25 entries + assert len(ranges) == 25 + data_temp_file.write(line_format - % (key[0], key[1], key[2], + % ((key[0], key[1], key[2], l.decimal, l.group, l.listDelim, @@ -563,44 +614,22 @@ def main(): l.quotationStart, l.quotationEnd, l.alternateQuotationStart, - l.alternateQuotationEnd, - list_pattern_part_data.append(l.listPatternPartStart), - list_pattern_part_data.append(l.listPatternPartMiddle), - list_pattern_part_data.append(l.listPatternPartEnd), - list_pattern_part_data.append(l.listPatternPartTwo), - date_format_data.append(l.shortDateFormat), - date_format_data.append(l.longDateFormat), - time_format_data.append(l.shortTimeFormat), - time_format_data.append(l.longTimeFormat), - days_data.append(l.standaloneShortDays), - days_data.append(l.standaloneLongDays), - days_data.append(l.standaloneNarrowDays), - days_data.append(l.shortDays), - days_data.append(l.longDays), - days_data.append(l.narrowDays), - am_data.append(l.am), - pm_data.append(l.pm), - byte_unit_data.append(l.byte_unit), - byte_unit_data.append(l.byte_si_quantified), - byte_unit_data.append(l.byte_iec_quantified), - currencyIsoCodeData(l.currencyIsoCode), - currency_symbol_data.append(l.currencySymbol), - currency_display_name_data.append(l.currencyDisplayName), - currency_format_data.append(l.currencyFormat), - currency_format_data.append(l.currencyNegativeFormat), - endonyms_data.append(l.languageEndonym), - endonyms_data.append(l.countryEndonym), + l.alternateQuotationEnd) + + tuple(r.index for r in ranges) + + tuple(r.length for r in ranges) + + (currencyIsoCodeData(l.currencyIsoCode), l.currencyDigits, - l.currencyRounding, + l.currencyRounding, # unused (QTBUG-81343) l.firstDayOfWeek, l.weekendStart, - l.weekendEnd) + l.weekendEnd)) + ", // %s/%s/%s\n" % (l.language, l.script, l.country)) data_temp_file.write(line_format # All zeros, matching the format: - % ( (0,) * (3 + 8 + 4) + ("0,0",) * (16 + 3) + % ( (0,) * (3 + 8 + 4) + (0,) * 25 * 2 + (currencyIsoCodeData(0),) - + ("0,0",) * 6 + (0,) * (2 + 3)) - + " // trailing 0s\n") + + (0,) * 2 + + (0,) * 3) + + " // trailing zeros\n") data_temp_file.write("};\n") # StringData tables: @@ -735,7 +764,7 @@ def main(): os.rename(data_temp_file_path, qtsrcdir + "/src/corelib/text/qlocale_data_p.h") # Generate calendar data - calendar_format = ' {%6d,%6d,%6d,{%5s},{%5s},{%5s},{%5s},{%5s},{%5s}}, ' + calendar_format = ' {%6d,%6d,%6d' + ',%5d' * 6 + ',%3d' * 6 + ' },' for calendar, stem in calendars.items(): months_data = StringData('months_data') calendar_data_file = "q%scalendar_data_p.h" % stem @@ -755,30 +784,38 @@ def main(): + ' lang ' + ' script' + ' terr ' - # Month-name start-end pairs, width 8 (5 plus '{},'): - + ' sShort ' - + ' sLong ' - + ' sNarrow' - + ' short ' - + ' long ' - + ' narrow' - # No trailing space on last; be sure - # to pad before adding later entries. + # Month-name start-indices, width 6 (5 + comma): + + 'sLng ' + + 'long ' + + 'sSrt ' + + 'shrt ' + + 'sNrw ' + + 'naro ' + # No individual headers for the sizes. + + 'Sizes...' + '\n') for key in locale_keys: l = locale_map[key] + # Sequence of StringDataToken: + try: + # Twelve long month names can add up to more than 256 (e.g. kde_TZ: 264) + ranges = (tuple(months_data.append(m[calendar], 16) for m in + (l.standaloneLongMonths, l.longMonths)) + + tuple(months_data.append(m[calendar]) for m in + (l.standaloneShortMonths, l.shortMonths, + l.standaloneNarrowMonths, l.narrowMonths))) + except ValueError as e: + e.args += (l.language, l.script, l.country, stem) + raise + calendar_temp_file.write( calendar_format - % (key[0], key[1], key[2], - months_data.append(l.standaloneShortMonths[calendar]), - months_data.append(l.standaloneLongMonths[calendar]), - months_data.append(l.standaloneNarrowMonths[calendar]), - months_data.append(l.shortMonths[calendar]), - months_data.append(l.longMonths[calendar]), - months_data.append(l.narrowMonths[calendar])) - + "// %s/%s/%s\n " % (l.language, l.script, l.country)) - calendar_temp_file.write(calendar_format % ( (0,) * 3 + ('0,0',) * 6 ) - + '// trailing zeros\n') + % ((key[0], key[1], key[2]) + + tuple(r.index for r in ranges) + + tuple(r.length for r in ranges)) + + "// %s/%s/%s\n" % (l.language, l.script, l.country)) + calendar_temp_file.write(calendar_format % ( (0,) * (3 + 6 * 2) ) + + '// trailing zeros\n') calendar_temp_file.write("};\n") months_data.write(calendar_temp_file) s = calendar_template_file.readline() @@ -815,9 +852,7 @@ def main(): ",\n") qlocaleh_temp_file.write("\n") qlocaleh_temp_file.write(" LastLanguage = " + language + "\n") - qlocaleh_temp_file.write(" };\n") - - qlocaleh_temp_file.write("\n") + qlocaleh_temp_file.write(" };\n\n") # Script enum qlocaleh_temp_file.write(" enum Script {\n") @@ -831,7 +866,7 @@ def main(): ",\n") qlocaleh_temp_file.write("\n") qlocaleh_temp_file.write(" LastScript = " + script + "\n") - qlocaleh_temp_file.write(" };\n") + qlocaleh_temp_file.write(" };\n\n") # Country enum qlocaleh_temp_file.write(" enum Country {\n") |