diff options
Diffstat (limited to 'util/locale_database/dateconverter.py')
-rw-r--r--[-rwxr-xr-x] | util/locale_database/dateconverter.py | 302 |
1 files changed, 195 insertions, 107 deletions
diff --git a/util/locale_database/dateconverter.py b/util/locale_database/dateconverter.py index 1990fe0c61..8ca15405f7 100755..100644 --- a/util/locale_database/dateconverter.py +++ b/util/locale_database/dateconverter.py @@ -1,107 +1,195 @@ -#!/usr/bin/env python -############################################################################# -## -## Copyright (C) 2016 The Qt Company Ltd. -## Contact: https://www.qt.io/licensing/ -## -## This file is part of the test suite of the Qt Toolkit. -## -## $QT_BEGIN_LICENSE:GPL-EXCEPT$ -## Commercial License Usage -## Licensees holding valid commercial Qt licenses may use this file in -## accordance with the commercial license agreement provided with the -## Software or, alternatively, in accordance with the terms contained in -## a written agreement between you and The Qt Company. For licensing terms -## and conditions see https://www.qt.io/terms-conditions. For further -## information use the contact form at https://www.qt.io/contact-us. -## -## GNU General Public License Usage -## Alternatively, this file may be used under the terms of the GNU -## General Public License version 3 as published by the Free Software -## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT -## included in the packaging of this file. Please review the following -## information to ensure the GNU General Public License requirements will -## be met: https://www.gnu.org/licenses/gpl-3.0.html. -## -## $QT_END_LICENSE$ -## -############################################################################# - -import re - -def _convert_pattern(pattern): - # patterns from http://www.unicode.org/reports/tr35/#Date_Format_Patterns - qt_regexps = { - r"yyy{3,}" : "yyyy", # more that three digits hence convert to four-digit year - r"L" : "M", # stand-alone month names. not supported. - r"g{1,}": "", # modified julian day. not supported. - r"S{1,}" : "", # fractional seconds. not supported. - r"A{1,}" : "" # milliseconds in day. not supported. - } - qt_patterns = { - "G" : "", "GG" : "", "GGG" : "", "GGGG" : "", "GGGGG" : "", # Era. not supported. - "y" : "yyyy", # four-digit year without leading zeroes - "Q" : "", "QQ" : "", "QQQ" : "", "QQQQ" : "", # quarter. not supported. - "q" : "", "qq" : "", "qqq" : "", "qqqq" : "", # quarter. not supported. - "MMMMM" : "MMM", # narrow month name. - "LLLLL" : "MMM", # stand-alone narrow month name. - "l" : "", # special symbol for chinese leap month. not supported. - "w" : "", "W" : "", # week of year/month. not supported. - "D" : "", "DD" : "", "DDD" : "", # day of year. not supported. - "F" : "", # day of week in month. not supported. - "E" : "ddd", "EE" : "ddd", "EEE" : "ddd", "EEEEE" : "ddd", "EEEE" : "dddd", # day of week - "e" : "ddd", "ee" : "ddd", "eee" : "ddd", "eeeee" : "ddd", "eeee" : "dddd", # local day of week - "c" : "ddd", "cc" : "ddd", "ccc" : "ddd", "ccccc" : "ddd", "cccc" : "dddd", # stand-alone local day of week - "a" : "AP", # AM/PM - "K" : "h", # Hour 0-11 - "k" : "H", # Hour 1-24 - "j" : "", # special reserved symbol. - "z" : "t", "zz" : "t", "zzz" : "t", "zzzz" : "t", # timezone - "Z" : "t", "ZZ" : "t", "ZZZ" : "t", "ZZZZ" : "t", # timezone - "v" : "t", "vv" : "t", "vvv" : "t", "vvvv" : "t", # timezone - "V" : "t", "VV" : "t", "VVV" : "t", "VVVV" : "t" # timezone - } - if qt_patterns.has_key(pattern): - return qt_patterns[pattern] - for r,v in qt_regexps.items(): - pattern = re.sub(r, v, pattern) - return pattern - -def convert_date(input): - result = "" - patterns = "GyYuQqMLlwWdDFgEecahHKkjmsSAzZvV" - last = "" - inquote = 0 - chars_to_strip = " -" - for c in input: - if c == "'": - inquote = inquote + 1 - if inquote % 2 == 0: - if c in patterns: - if not last: - last = c - else: - if c in last: - last += c - else: - # pattern changed - converted = _convert_pattern(last) - result += converted - if not converted: - result = result.rstrip(chars_to_strip) - last = c - continue - if last: - # pattern ended - converted = _convert_pattern(last) - result += converted - if not converted: - result = result.rstrip(chars_to_strip) - last = "" - result += c - if last: - converted = _convert_pattern(last) - result += converted - if not converted: - result = result.rstrip(chars_to_strip) - return result.lstrip(chars_to_strip) +# Copyright (C) 2016 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 + +class Converter (object): + """Conversion between CLDR and Qt datetime formats. + + Keep in sync with qlocale_mac.mm's macToQtFormat(). + The definitive source of truth is: + https://www.unicode.org/reports/tr35/tr35-68/tr35-dates.html#Date_Field_Symbol_Table + + See convert() for explanation of the approach taken. Each method + with a single-letter name is used to scan a prefix of a text, + presumed to begin with that letter (or one Qt treats as equivalent + to it) and returns a pair (Qt format, length), to use the given Qt + format in place of text[:length]. In all cases, length must be + positive.""" + + @staticmethod + def __is_reserved(ch): + """Every ASCII letter is a reserved symbol in CLDR datetime formats""" + assert len(ch) == 1, ch + return ch.isascii() and ch.isalpha(); + @staticmethod + def __count_first(text): + """How many of text[0] appear at the start of text ?""" + assert text + return len(text) - len(text.lstrip(text[0])) + @classmethod + def __verbatim(cls, text): + # Used where our format coincides with LDML's, including on length. + n = cls.__count_first(text) + return text[:n], n + @classmethod + def __treat_as(cls, mimic, text): + # Helper for aliases + n = cls.__count_first(text) + return mimic * n, n + + # Please follow alphabetic order, with two cases of the same + # letter adjacent, lower before upper. + @classmethod + def a(cls, text): # AM/PM indicator; use locale-appropriate case + return 'Ap', cls.__count_first(text) + + # A: Milliseconds in day. Not supported. + b = a # AM/PM/noon/midnight + B = a # "Flexible day period" (e.g. "at night" / "in the day") + # (Only zh_Hant_TW affected; zh_Hant_{HK,MO} use 'ah', mapped to + # 'APh', so do the same here.) + + @classmethod + def c(cls, text): # Stand-alone local day of week + # Has length-variants for several cases Qt doesn't support, as + # do 'e' and 'E': just map all simply to weekday, abbreviated + # or full. + n = cls.__count_first(text) + return ('dddd' if n == 4 else 'ddd'), n + + # C: Input skeleton symbol + d = __verbatim # day (of month or of week, depends on length) + # D: Day of year. Not supported. + e = c # Local day of week + E = c # Just plain day of week + # F: Day of week in month. Not supported. + # g: Modified julian day. Not supported. + # G: Era. Not supported. + h = __verbatim # Hour 1-12, treat as 0-11 + H = __verbatim # Hour 0-23 + # j: Input skeleton symbol + # J: Input skeleton symbol + + @classmethod + def k(cls, text): # Hour 1-24, treat as 0-23 + return cls.__treat_as('H', text) + @classmethod + def K(cls, text): # Hour 0-11 + return cls.__treat_as('h', text) + + # l: Deprecated Chinese leap month indicator. + @classmethod + def L(cls, text): # Stand-alone month names: treat as plain month names. + n = cls.__count_first(text) + # Length five is narrow; treat same as abbreviated; anything + # shorter matches Qt's month forms. + return ('MMM' if n > 4 else 'M' * n), n + + m = __verbatim # Minute within the hour. + M = L # Plain month names, possibly abbreviated, and numbers. + + @classmethod + def O(cls, text): # Localized GMT±offset formats. Map to Z-or-UTC±HH:mm + return 't', cls.__count_first(text) + + # q: Quarter. Not supported. + # Q: Quarter. Not supported. + + s = __verbatim # Seconds within the minute. + @classmethod + def S(cls, text): # Fractional seconds. Only milliseconds supported. + # FIXME: spec is unclear, do we need to include the leading + # dot or not ? For now, no known locale actually exercises + # this, so stick with what we've done on Darwin since long + # before adding support here. + n = cls.__count_first(text) + return ('z' if n < 3 else 'zzz'), n + + @classmethod + def u(cls, text): # Extended year (numeric) + # Officially, 'u' is simply the full year number, zero-padded + # to the length of the field. Qt's closest to that is four-digit. + # It explicitly has no special case for two-digit year. + return 'yyyy', cls.__count_first(text) + + # U: Cyclic Year Name. Not supported + @classmethod + def v(cls, text): # Generic non-location format. Map to name. + return 'tttt', cls.__count_first(text) + + V = v # Zone ID in various forms; VV is IANA ID. Map to name. + # w: Week of year. Not supported. + # W: Week of month. Not supported. + + @classmethod + def x(cls, text): # Variations on offset format. + n = cls.__count_first(text) + # Ignore: n == 1 may omit minutes, n > 3 may include seconds. + return ('ttt' if n > 1 and n & 1 else 'tt'), n + X = x # Should use Z for zero offset. + + @classmethod + def y(cls, text): # Year number. + n = cls.__count_first(text) + return ('yy' if n == 2 else 'yyyy'), n + # Y: Year for Week-of-year calendars + + z = v # Specific (i.e. distinguish standard from DST) non-location format. + @classmethod + def Z(cls, text): # Offset format, optionaly with GMT (Qt uses UTC) prefix. + n = cls.__count_first(text) + return ('tt' if n < 4 else 'ttt' if n > 4 else 't'), n + + @staticmethod + def scanQuote(text): # Can't have ' as a method name, so handle specially + assert text.startswith("'") + i = text.find("'", 1) # Find the next; -1 if not present. + i = len(text) if i < 0 else i + 1 # Include the close-quote. + return text[:i], i + + # Now put all of those to use: + @classmethod + def convert(cls, text): + """Convert a CLDR datetime format string into a Qt one. + + Presumes that the caller will ''.join() the fragments it + yields. Each sequence of CLDR field symbols that corresponds + to a Qt format token is converted to it; all other CLDR field + symbols are discarded; the literals in between fields are + preserved verbatim, except that space and hyphen separators + immediately before a discarded field are discarded with it. + + The approach is to look at the first symbol of the remainder + of the text, at each iteration, and use that first symbol to + select a function that will identify how much of the text to + consume and what to replace it with.""" + sep = '' + while text: + ch = text[0] + if ch == "'": + quoted, length = cls.scanQuote(text) + text = text[length:] + sep += quoted + elif hasattr(cls, ch): + qtform, length = getattr(cls, ch)(text) + assert qtform and length > 0, (ch, text, qtform, length) + text = text[length:] + if sep: + yield sep + sep = '' + yield qtform + elif cls.__is_reserved(ch): + text = text[cls.__count_first(text):] + # Discard space or dash separator that was only there + # for the sake of the unsupported field: + sep = sep.rstrip(' -') + # TODO: should we also strip [ -]* from text + # immediately following unsupported forms ? + else: + sep += ch + text = text[1:] + if sep: + yield sep + +def convert_date(text): + # See Converter.convert() + return ''.join(Converter.convert(text)) |