diff options
Diffstat (limited to 'util/locale_database/qlocalexml2cpp.py')
-rwxr-xr-x | util/locale_database/qlocalexml2cpp.py | 279 |
1 files changed, 157 insertions, 122 deletions
diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py index 52e6331569..0cfa0f03e4 100755 --- a/util/locale_database/qlocalexml2cpp.py +++ b/util/locale_database/qlocalexml2cpp.py @@ -1,7 +1,7 @@ #!/usr/bin/env python2 ############################################################################# ## -## Copyright (C) 2018 The Qt Company Ltd. +## Copyright (C) 2020 The Qt Company Ltd. ## Contact: https://www.qt.io/licensing/ ## ## This file is part of the test suite of the Qt Toolkit. @@ -259,46 +259,66 @@ def unicode2hex(s): return lst class StringDataToken: - def __init__(self, index, length): - if index > 0xFFFF or length > 0xFFFF: - raise Error("Position exceeds ushort range: %d,%d " % (index, length)) + def __init__(self, index, length, bits): + if index > 0xffff: + print "\n\n\n#error Data index is too big!", index + raise ValueError("Start-index (%d) exceeds the uint16 range!" % index) + if length >= (1 << bits): + print "\n\n\n#error Range length is too big!", length + raise ValueError("Data size (%d) exceeds the %d-bit range!" % (length, bits)) + self.index = index self.length = length - def __str__(self): - return " %d,%d " % (self.index, self.length) class StringData: def __init__(self, name): self.data = [] self.hash = {} self.name = name + self.text = '' # Used in quick-search for matches in data - def append(self, s): - if s in self.hash: - return self.hash[s] - - lst = unicode2hex(s) - index = len(self.data) - if index > 65535: - print "\n\n\n#error Data index is too big!" - sys.stderr.write ("\n\n\nERROR: index exceeds the uint16 range! index = %d\n" % index) - sys.exit(1) - size = len(lst) - if size >= 65535: - print "\n\n\n#error Data is too big!" - sys.stderr.write ("\n\n\nERROR: data size exceeds the uint16 range! size = %d\n" % size) - sys.exit(1) - token = None + def append(self, s, bits=8): try: - token = StringDataToken(index, size) - except Error as e: - sys.stderr.write("\n\n\nERROR: %s: on data '%s'" % (e, s)) - sys.exit(1) - self.hash[s] = token - self.data += lst + token = self.hash[s] + except KeyError: + token = self.__store(s, bits) + self.hash[s] = token return token + def __store(self, s, bits): + """Add string s to known data. + + Seeks to avoid duplication, where possible. + For example, short-forms may be prefixes of long-forms. + """ + if not s: + return StringDataToken(0, 0, bits) + ucs2 = unicode2hex(s) + try: + index = self.text.index(s) - 1 + matched = 0 + while matched < len(ucs2): + index, matched = self.data.index(ucs2[0], index + 1), 1 + if index + len(ucs2) >= len(self.data): + raise ValueError # not found after all ! + while matched < len(ucs2) and self.data[index + matched] == ucs2[matched]: + matched += 1 + except ValueError: + index = len(self.data) + self.data += ucs2 + self.text += s + + assert index >= 0 + try: + return StringDataToken(index, len(ucs2), bits) + except ValueError as e: + e.args += (self.name, s) + raise + def write(self, fd): + if len(self.data) > 0xffff: + raise ValueError("Data is too big for quint16 index to its end!" % len(self.data), + self.name) fd.write("\nstatic const ushort %s[] = {\n" % self.name) fd.write(wrap_list(self.data)) fd.write("\n};\n") @@ -319,7 +339,7 @@ def escapedString(s): need_escape = False result = "" for c in s: - if ord(c) < 128 and (not need_escape or ord(c.lower()) < ord('a') or ord(c.lower()) > ord('f')): + if ord(c) < 128 and not (need_escape and ord('a') <= ord(c.lower()) <= ord('f')): line += c need_escape = False else: @@ -361,7 +381,7 @@ def main(): for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))): usage() - (data_temp_file, data_temp_file_path) = tempfile.mkstemp("qlocale_data_p", dir=qtsrcdir) + (data_temp_file, data_temp_file_path) = tempfile.mkstemp("qlocale_data_p.h", dir=qtsrcdir) data_temp_file = os.fdopen(data_temp_file, "w") qlocaledata_file = open(qtsrcdir + "/src/corelib/text/qlocale_data_p.h", "r") s = qlocaledata_file.readline() @@ -426,7 +446,8 @@ def main(): cmnt_to = cmnt_to + country_map[to_country][1] data_temp_file.write(" ") - data_temp_file.write("{ %3d, %3d, %3d }, { %3d, %3d, %3d }" % (from_language, from_script, from_country, to_language, to_script, to_country)) + data_temp_file.write("{ %3d, %3d, %3d }, { %3d, %3d, %3d }" % + (from_language, from_script, from_country, to_language, to_script, to_country)) index += 1 if index != len(likely_subtags_map): data_temp_file.write(",") @@ -483,39 +504,43 @@ def main(): + ' quotEnd ' + 'altQtOpn ' + 'altQtEnd ' - # Width 11 + comma: - + ' lpStart ' # List pattern - + ' lpMid ' - + ' lpEnd ' - + ' lpTwo ' - + ' sDtFmt ' # Date format - + ' lDtFmt ' - + ' sTmFmt ' # Time format - + ' lTmFmt ' - + ' ssDays ' # Days - + ' slDays ' - + ' snDays ' - + ' sDays ' - + ' lDays ' - + ' nDays ' - + ' am ' # am/pm indicators - + ' pm ' - # Width 8 + comma - + ' byte ' - + ' siQuant ' - + 'iecQuant ' + + # Range entries (all start-indices, then all sizes): + # Width 5 + comma: + + 'lStrt ' # List pattern + + 'lpMid ' + + 'lpEnd ' + + 'lPair ' + + 'lDFmt ' # Date format + + 'sDFmt ' + + 'lTFmt ' # Time format + + 'sTFmt ' + + 'slDay ' # Day names + + 'lDays ' + + 'ssDys ' + + 'sDays ' + + 'snDay ' + + 'nDays ' + + ' am ' # am/pm indicators + + ' pm ' + + ' byte ' + + 'siQnt ' + + 'iecQn ' + + 'crSym ' # Currency formatting: + + 'crDsp ' + + 'crFmt ' + + 'crFNg ' + + 'ntLng ' # Name of language in itself, and of territory: + + 'ntTer ' + # Width 3 + comma for each size; no header + + ' ' * 25 + + # Strays (char array, bit-fields): # Width 8+4 + comma + ' currISO ' - # Width 11 + comma: - + ' currSym ' # Currency formatting: - + ' currDsply ' - + ' currFmt ' - + ' currFmtNeg ' - + ' endoLang ' # Name of language in itself, and of country: - + ' endoCntry ' # Width 6 + comma: - + 'curDgt ' # Currency number representation: - + 'curRnd ' + + 'curDgt ' # Currency digits + + 'curRnd ' # Currencty rounding (unused: QTBUG-81343) + 'dow1st ' # First day of week + ' wknd+ ' # Week-end start/end days: + ' wknd-' @@ -535,14 +560,16 @@ def main(): + '%6d,' * 8 # Quotation marks: + '%8d,' * 4 + # List patterns, date/time formats, month/day names, am/pm: - + '%11s,' * 16 # SI/IEC byte-unit abbreviations: - + '%8s,' * 3 + # Currency and endonyms + + '%5d,' * 25 + # Sizes for the same: + + '%3d,' * 25 + # Currency ISO code: + ' %10s, ' - # Currency and endonyms - + '%11s,' * 6 # Currency formatting: + '%6d,%6d' # Day of week and week-end: @@ -550,8 +577,32 @@ def main(): + ' }') for key in locale_keys: l = locale_map[key] + # Sequence of StringDataToken: + ranges = (tuple(list_pattern_part_data.append(p) for p in # 4 entries: + (l.listPatternPartStart, l.listPatternPartMiddle, + l.listPatternPartEnd, l.listPatternPartTwo)) + + tuple (date_format_data.append(f) for f in # 2 entries: + (l.longDateFormat, l.shortDateFormat)) + + tuple(time_format_data.append(f) for f in # 2 entries: + (l.longTimeFormat, l.shortTimeFormat)) + + tuple(days_data.append(d) for d in # 6 entries: + (l.standaloneLongDays, l.longDays, + l.standaloneShortDays, l.shortDays, + l.standaloneNarrowDays, l.narrowDays)) + + (am_data.append(l.am), pm_data.append(l.pm)) + # 2 entries: + tuple(byte_unit_data.append(b) for b in # 3 entries: + (l.byte_unit, l.byte_si_quantified, l.byte_iec_quantified)) + + (currency_symbol_data.append(l.currencySymbol), + currency_display_name_data.append(l.currencyDisplayName), + currency_format_data.append(l.currencyFormat), + currency_format_data.append(l.currencyNegativeFormat), + endonyms_data.append(l.languageEndonym), + endonyms_data.append(l.countryEndonym)) # 6 entries + ) # Total: 25 entries + assert len(ranges) == 25 + data_temp_file.write(line_format - % (key[0], key[1], key[2], + % ((key[0], key[1], key[2], l.decimal, l.group, l.listDelim, @@ -563,44 +614,22 @@ def main(): l.quotationStart, l.quotationEnd, l.alternateQuotationStart, - l.alternateQuotationEnd, - list_pattern_part_data.append(l.listPatternPartStart), - list_pattern_part_data.append(l.listPatternPartMiddle), - list_pattern_part_data.append(l.listPatternPartEnd), - list_pattern_part_data.append(l.listPatternPartTwo), - date_format_data.append(l.shortDateFormat), - date_format_data.append(l.longDateFormat), - time_format_data.append(l.shortTimeFormat), - time_format_data.append(l.longTimeFormat), - days_data.append(l.standaloneShortDays), - days_data.append(l.standaloneLongDays), - days_data.append(l.standaloneNarrowDays), - days_data.append(l.shortDays), - days_data.append(l.longDays), - days_data.append(l.narrowDays), - am_data.append(l.am), - pm_data.append(l.pm), - byte_unit_data.append(l.byte_unit), - byte_unit_data.append(l.byte_si_quantified), - byte_unit_data.append(l.byte_iec_quantified), - currencyIsoCodeData(l.currencyIsoCode), - currency_symbol_data.append(l.currencySymbol), - currency_display_name_data.append(l.currencyDisplayName), - currency_format_data.append(l.currencyFormat), - currency_format_data.append(l.currencyNegativeFormat), - endonyms_data.append(l.languageEndonym), - endonyms_data.append(l.countryEndonym), + l.alternateQuotationEnd) + + tuple(r.index for r in ranges) + + tuple(r.length for r in ranges) + + (currencyIsoCodeData(l.currencyIsoCode), l.currencyDigits, - l.currencyRounding, + l.currencyRounding, # unused (QTBUG-81343) l.firstDayOfWeek, l.weekendStart, - l.weekendEnd) + l.weekendEnd)) + ", // %s/%s/%s\n" % (l.language, l.script, l.country)) data_temp_file.write(line_format # All zeros, matching the format: - % ( (0,) * (3 + 8 + 4) + ("0,0",) * (16 + 3) + % ( (0,) * (3 + 8 + 4) + (0,) * 25 * 2 + (currencyIsoCodeData(0),) - + ("0,0",) * 6 + (0,) * (2 + 3)) - + " // trailing 0s\n") + + (0,) * 2 + + (0,) * 3) + + " // trailing zeros\n") data_temp_file.write("};\n") # StringData tables: @@ -735,7 +764,7 @@ def main(): os.rename(data_temp_file_path, qtsrcdir + "/src/corelib/text/qlocale_data_p.h") # Generate calendar data - calendar_format = ' {%6d,%6d,%6d,{%5s},{%5s},{%5s},{%5s},{%5s},{%5s}}, ' + calendar_format = ' {%6d,%6d,%6d' + ',%5d' * 6 + ',%3d' * 6 + ' },' for calendar, stem in calendars.items(): months_data = StringData('months_data') calendar_data_file = "q%scalendar_data_p.h" % stem @@ -755,30 +784,38 @@ def main(): + ' lang ' + ' script' + ' terr ' - # Month-name start-end pairs, width 8 (5 plus '{},'): - + ' sShort ' - + ' sLong ' - + ' sNarrow' - + ' short ' - + ' long ' - + ' narrow' - # No trailing space on last; be sure - # to pad before adding later entries. + # Month-name start-indices, width 6 (5 + comma): + + 'sLng ' + + 'long ' + + 'sSrt ' + + 'shrt ' + + 'sNrw ' + + 'naro ' + # No individual headers for the sizes. + + 'Sizes...' + '\n') for key in locale_keys: l = locale_map[key] + # Sequence of StringDataToken: + try: + # Twelve long month names can add up to more than 256 (e.g. kde_TZ: 264) + ranges = (tuple(months_data.append(m[calendar], 16) for m in + (l.standaloneLongMonths, l.longMonths)) + + tuple(months_data.append(m[calendar]) for m in + (l.standaloneShortMonths, l.shortMonths, + l.standaloneNarrowMonths, l.narrowMonths))) + except ValueError as e: + e.args += (l.language, l.script, l.country, stem) + raise + calendar_temp_file.write( calendar_format - % (key[0], key[1], key[2], - months_data.append(l.standaloneShortMonths[calendar]), - months_data.append(l.standaloneLongMonths[calendar]), - months_data.append(l.standaloneNarrowMonths[calendar]), - months_data.append(l.shortMonths[calendar]), - months_data.append(l.longMonths[calendar]), - months_data.append(l.narrowMonths[calendar])) - + "// %s/%s/%s\n " % (l.language, l.script, l.country)) - calendar_temp_file.write(calendar_format % ( (0,) * 3 + ('0,0',) * 6 ) - + '// trailing zeros\n') + % ((key[0], key[1], key[2]) + + tuple(r.index for r in ranges) + + tuple(r.length for r in ranges)) + + "// %s/%s/%s\n" % (l.language, l.script, l.country)) + calendar_temp_file.write(calendar_format % ( (0,) * (3 + 6 * 2) ) + + '// trailing zeros\n') calendar_temp_file.write("};\n") months_data.write(calendar_temp_file) s = calendar_template_file.readline() @@ -815,9 +852,7 @@ def main(): ",\n") qlocaleh_temp_file.write("\n") qlocaleh_temp_file.write(" LastLanguage = " + language + "\n") - qlocaleh_temp_file.write(" };\n") - - qlocaleh_temp_file.write("\n") + qlocaleh_temp_file.write(" };\n\n") # Script enum qlocaleh_temp_file.write(" enum Script {\n") @@ -831,7 +866,7 @@ def main(): ",\n") qlocaleh_temp_file.write("\n") qlocaleh_temp_file.write(" LastScript = " + script + "\n") - qlocaleh_temp_file.write(" };\n") + qlocaleh_temp_file.write(" };\n\n") # Country enum qlocaleh_temp_file.write(" enum Country {\n") |