diff options
author | Alexandru Croitor <alexandru.croitor@qt.io> | 2019-05-28 16:41:49 +0200 |
---|---|---|
committer | Alexandru Croitor <alexandru.croitor@qt.io> | 2019-06-03 15:14:42 +0200 |
commit | e4079eca49adce16e31dac2a18d49d7a55817891 (patch) | |
tree | 1dfb960ec1115b1f552afe8a013058542389505e /util | |
parent | f32a6cfb6b6236533508901f114ab57396da8ff3 (diff) | |
parent | ec6dc5f78453048c4f0604655a34c6c20c79d819 (diff) |
Merge remote-tracking branch 'origin/dev' into wip/cmake
Take 5.
Change-Id: Ifb2d20e95ba824e45e667fba6c2ba45389991cc3
Diffstat (limited to 'util')
-rw-r--r-- | util/corelib/qurl-generateTLDs/main.cpp | 26 | ||||
-rw-r--r-- | util/local_database/README | 1 | ||||
-rw-r--r-- | util/locale_database/README | 5 | ||||
-rwxr-xr-x | util/locale_database/cldr2qlocalexml.py (renamed from util/local_database/cldr2qlocalexml.py) | 36 | ||||
-rwxr-xr-x | util/locale_database/cldr2qtimezone.py (renamed from util/local_database/cldr2qtimezone.py) | 2 | ||||
-rwxr-xr-x | util/locale_database/dateconverter.py (renamed from util/local_database/dateconverter.py) | 0 | ||||
-rw-r--r-- | util/locale_database/enumdata.py (renamed from util/local_database/enumdata.py) | 2 | ||||
-rw-r--r-- | util/locale_database/formattags.txt (renamed from util/local_database/formattags.txt) | 0 | ||||
-rw-r--r-- | util/locale_database/localexml.py (renamed from util/local_database/localexml.py) | 48 | ||||
-rwxr-xr-x | util/locale_database/qlocalexml2cpp.py (renamed from util/local_database/qlocalexml2cpp.py) | 2 | ||||
-rw-r--r-- | util/locale_database/testlocales/localemodel.cpp (renamed from util/local_database/testlocales/localemodel.cpp) | 0 | ||||
-rw-r--r-- | util/locale_database/testlocales/localemodel.h (renamed from util/local_database/testlocales/localemodel.h) | 0 | ||||
-rw-r--r-- | util/locale_database/testlocales/localewidget.cpp (renamed from util/local_database/testlocales/localewidget.cpp) | 0 | ||||
-rw-r--r-- | util/locale_database/testlocales/localewidget.h (renamed from util/local_database/testlocales/localewidget.h) | 0 | ||||
-rw-r--r-- | util/locale_database/testlocales/main.cpp (renamed from util/local_database/testlocales/main.cpp) | 0 | ||||
-rw-r--r-- | util/locale_database/testlocales/testlocales.pro (renamed from util/local_database/testlocales/testlocales.pro) | 0 | ||||
-rw-r--r-- | util/locale_database/xpathlite.py (renamed from util/local_database/xpathlite.py) | 28 |
17 files changed, 116 insertions, 34 deletions
diff --git a/util/corelib/qurl-generateTLDs/main.cpp b/util/corelib/qurl-generateTLDs/main.cpp index 6fde287049..e458ea9d53 100644 --- a/util/corelib/qurl-generateTLDs/main.cpp +++ b/util/corelib/qurl-generateTLDs/main.cpp @@ -90,15 +90,15 @@ int main(int argc, char **argv) { QCoreApplication app(argc, argv); if (argc < 3) { - printf("\nusage: %s inputFile outputFile\n\n", argv[0]); + printf("\nUsage: ./%s inputFile outputFile\n\n", argv[0]); printf("'inputFile' should be a list of effective TLDs, one per line,\n"); - printf("as obtained from http://publicsuffix.org . To create indices and data file\n"); + printf("as obtained from http://publicsuffix.org/. To create indices and data\n"); printf("file, do the following:\n\n"); - printf(" wget https://publicsuffix.org/list/effective_tld_names.dat -O effective_tld_names.dat\n"); - printf(" grep '^[^\\/\\/]' effective_tld_names.dat > effective_tld_names.dat.trimmed\n"); - printf(" %s effective_tld_names.dat.trimmed effective_tld_names.dat.qt\n\n", argv[0]); - printf("Now copy the data from effective_tld_names.dat.qt to the file src/corelib/io/qurltlds_p.h in your Qt repo\n\n"); - exit(1); + printf(" wget https://publicsuffix.org/list/public_suffix_list.dat -O public_suffix_list.dat\n"); + printf(" grep -v '^//' public_suffix_list.dat | grep . > public_suffix_list.dat.trimmed\n"); + printf(" ./%s public_suffix_list.dat.trimmed public_suffix_list.cpp\n\n", argv[0]); + printf("Now replace the code in qtbase/src/corelib/io/qurltlds_p.h with public_suffix_list.cpp's contents\n\n"); + return 1; } QFile file(argv[1]); if (!file.open(QIODevice::ReadOnly)) { @@ -146,7 +146,7 @@ int main(int argc, char **argv) entry.append("\\0"); } outFile.write("static const quint32 tldIndices[] = {\n"); - outDataBuffer.write("\nstatic const char *tldData[] = {\n"); + outDataBuffer.write("\nstatic const char *tldData[] = {"); int totalUtf8Size = 0; int chunkSize = 0; // strlen of the current chunk (sizeof is bigger by 1) @@ -165,22 +165,22 @@ int main(int argc, char **argv) if (chunkSize >= 0xffff) { static int chunkCount = 0; qWarning() << "chunk" << ++chunkCount << "has length" << chunkSize - stringUtf8Size; - outDataBuffer.write(",\n\n"); + outDataBuffer.write(",\n"); chunks.append(QString::number(totalUtf8Size)); chunkSize = 0; } totalUtf8Size += stringUtf8Size; - outDataBuffer.write("\""); + outDataBuffer.write("\n\""); outDataBuffer.write(entry.toUtf8()); - outDataBuffer.write("\"\n"); + outDataBuffer.write("\""); } } chunks.append(QString::number(totalUtf8Size)); outFile.write(QByteArray::number(totalUtf8Size)); - outFile.write("};\n"); + outFile.write("\n};\n"); - outDataBuffer.write("};\n"); + outDataBuffer.write("\n};\n"); outDataBuffer.close(); outFile.write(outDataBufferBA); diff --git a/util/local_database/README b/util/local_database/README deleted file mode 100644 index 23b6a33ad8..0000000000 --- a/util/local_database/README +++ /dev/null @@ -1 +0,0 @@ -local_database is used to generate qlocale data from the Common Locale Data Repository (The database for localized names (like date formats, country names etc)). diff --git a/util/locale_database/README b/util/locale_database/README new file mode 100644 index 0000000000..8654968d66 --- /dev/null +++ b/util/locale_database/README @@ -0,0 +1,5 @@ +locale_database is used to generate qlocale data from CLDR. + +CLDR is the Common Locale Data Repository, a database for localized +data (like date formats, country names etc). It is provided by the +Unicode consortium. diff --git a/util/local_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py index bc999e1b65..d75ef282f9 100755 --- a/util/local_database/cldr2qlocalexml.py +++ b/util/locale_database/cldr2qlocalexml.py @@ -31,7 +31,7 @@ The CLDR data can be downloaded from CLDR_, which has a sub-directory for each version; you need the ``core.zip`` file for your version of choice (typically the latest). This script has had updates to cope up -to v29; for later versions, we may need adaptations. Unpack the +to v35; for later versions, we may need adaptations. Unpack the downloaded ``core.zip`` and check it has a common/main/ sub-directory: pass the path of that sub-directory to this script as its single command-line argument. Save its standard output (but not error) to a @@ -95,6 +95,34 @@ def parse_number_format(patterns, data): result.append(pattern) return result +def raiseUnknownCode(code, form, cache={}): + """Check whether an unknown code could be supported. + + We declare a language, script or country code unknown if it's not + known to enumdata.py; however, if it's present in main/en.xml's + mapping of codes to names, we have the option of adding support. + This caches the necessary look-up (so we only read main/en.xml + once) and returns the name we should use if we do add support. + + First parameter, code, is the unknown code. Second parameter, + form, is one of 'language', 'script' or 'country' to select the + type of code to look up. Do not pass further parameters (the next + will deprive you of the cache). + + Raises xpathlite.Error with a suitable message, that includes the + unknown code's full name if found. + + Relies on global cldr_dir being set before it's called; see tail + of this file. + """ + if not cache: + cache.update(xpathlite.codeMapsFromFile(os.path.join(cldr_dir, 'en.xml'))) + name = cache[form].get(code) + msg = 'unknown %s code "%s"' % (form, code) + if name: + msg += ' - could use "%s"' % name + raise xpathlite.Error(msg) + def parse_list_pattern_part_format(pattern): # This is a very limited parsing of the format for list pattern part only. return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3") @@ -193,18 +221,18 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_ language_id = enumdata.languageCodeToId(language_code) if language_id <= 0: - raise xpathlite.Error('unknown language code "%s"' % language_code) + raiseUnknownCode(language_code, 'language') script_id = enumdata.scriptCodeToId(script_code) if script_id == -1: - raise xpathlite.Error('unknown script code "%s"' % script_code) + raiseUnknownCode(script_code, 'script') # we should handle fully qualified names with the territory if not country_code: return {} country_id = enumdata.countryCodeToId(country_code) if country_id <= 0: - raise xpathlite.Error('unknown country code "%s"' % country_code) + raiseUnknownCode(country_code, 'country') # So we say we accept only those values that have "contributed" or # "approved" resolution. see http://www.unicode.org/cldr/process.html diff --git a/util/local_database/cldr2qtimezone.py b/util/locale_database/cldr2qtimezone.py index 7c10b1dfd2..256839317c 100755 --- a/util/local_database/cldr2qtimezone.py +++ b/util/locale_database/cldr2qtimezone.py @@ -343,7 +343,7 @@ newTempFile.write(""" http://www.unicode.org/cldr/ Do not edit this code: run cldr2qtimezone.py on updated (or - edited) CLDR data; see qtbase/util/local_database/. + edited) CLDR data; see qtbase/util/locale_database/. */ """ % (str(datetime.date.today()), cldr_version, versionNumber) ) diff --git a/util/local_database/dateconverter.py b/util/locale_database/dateconverter.py index 1990fe0c61..1990fe0c61 100755 --- a/util/local_database/dateconverter.py +++ b/util/locale_database/dateconverter.py diff --git a/util/local_database/enumdata.py b/util/locale_database/enumdata.py index 26bb74d1fe..0e40d8a9ee 100644 --- a/util/local_database/enumdata.py +++ b/util/locale_database/enumdata.py @@ -402,6 +402,8 @@ language_list = { 362: ["Sicilian", "scn"], 363: ["Southern Kurdish", "sdh"], 364: ["Western Balochi", "bgn"], + 365: ["Cebuano", "ceb"], + 366: ["Erzya", "myv"], } language_aliases = { diff --git a/util/local_database/formattags.txt b/util/locale_database/formattags.txt index 5138c37a81..5138c37a81 100644 --- a/util/local_database/formattags.txt +++ b/util/locale_database/formattags.txt diff --git a/util/local_database/localexml.py b/util/locale_database/localexml.py index a47fa6a5ff..e95b3aebcc 100644 --- a/util/local_database/localexml.py +++ b/util/locale_database/localexml.py @@ -53,7 +53,21 @@ def ordStr(c): def fixOrdStr(c, d): return str(ord(c if len(c) == 1 else d)) +def startCount(c, text): # strspn + """First index in text where it doesn't have a character in c""" + assert text and text[0] in c + try: + return (j for j, d in enumerate(text) if d not in c).next() + except StopIteration: + return len(text) + def convertFormat(format): + """Convert date/time format-specier from CLDR to Qt + + Match up (as best we can) the differences between: + * https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table + * QDateTimeParser::parseFormat() and QLocalePrivate::dateTimeToString() + """ result = "" i = 0 while i < len(format): @@ -68,20 +82,30 @@ def convertFormat(format): i += 1 else: s = format[i:] - if s.startswith("EEEE"): - result += "dddd" - i += 4 - elif s.startswith("EEE"): - result += "ddd" - i += 3 - elif s.startswith("a"): + if s.startswith('E'): # week-day + n = startCount('E', s) + if n < 3: + result += 'ddd' + elif n == 4: + result += 'dddd' + else: # 5: narrow, 6 short; but should be name, not number :-( + result += 'd' if n < 6 else 'dd' + i += n + elif s[0] in 'ab': # am/pm + # 'b' should distinguish noon/midnight, too :-( result += "AP" - i += 1 - elif s.startswith("z"): + i += startCount('ab', s) + elif s.startswith('S'): # fractions of seconds: count('S') == number of decimals to show + result += 'z' + i += startCount('S', s) + elif s.startswith('V'): # long time zone specifiers (and a deprecated short ID) + result += 't' + i += startCount('V', s) + elif s[0] in 'zv': # zone + # Should use full name, e.g. "Central European Time", if 'zzzz' :-( + # 'v' should get generic non-location format, e.g. PT for "Pacific Time", no DST indicator result += "t" - i += 1 - elif s.startswith("v"): - i += 1 + i += startCount('zv', s) else: result += format[i] i += 1 diff --git a/util/local_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py index fb5ae5ba54..2dad2dd57a 100755 --- a/util/local_database/qlocalexml2cpp.py +++ b/util/locale_database/qlocalexml2cpp.py @@ -370,7 +370,7 @@ def main(): Do not edit this section: instead regenerate it using cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or - edited) CLDR data; see qtbase/util/local_database/. + edited) CLDR data; see qtbase/util/locale_database/. */ """ % (str(datetime.date.today()), cldr_version) ) diff --git a/util/local_database/testlocales/localemodel.cpp b/util/locale_database/testlocales/localemodel.cpp index d380d01e09..d380d01e09 100644 --- a/util/local_database/testlocales/localemodel.cpp +++ b/util/locale_database/testlocales/localemodel.cpp diff --git a/util/local_database/testlocales/localemodel.h b/util/locale_database/testlocales/localemodel.h index b24fc5f4c6..b24fc5f4c6 100644 --- a/util/local_database/testlocales/localemodel.h +++ b/util/locale_database/testlocales/localemodel.h diff --git a/util/local_database/testlocales/localewidget.cpp b/util/locale_database/testlocales/localewidget.cpp index 3ff7f73a98..3ff7f73a98 100644 --- a/util/local_database/testlocales/localewidget.cpp +++ b/util/locale_database/testlocales/localewidget.cpp diff --git a/util/local_database/testlocales/localewidget.h b/util/locale_database/testlocales/localewidget.h index 896a6e5229..896a6e5229 100644 --- a/util/local_database/testlocales/localewidget.h +++ b/util/locale_database/testlocales/localewidget.h diff --git a/util/local_database/testlocales/main.cpp b/util/locale_database/testlocales/main.cpp index 0c3c45f989..0c3c45f989 100644 --- a/util/local_database/testlocales/main.cpp +++ b/util/locale_database/testlocales/main.cpp diff --git a/util/local_database/testlocales/testlocales.pro b/util/locale_database/testlocales/testlocales.pro index a9a6247f96..a9a6247f96 100644 --- a/util/local_database/testlocales/testlocales.pro +++ b/util/locale_database/testlocales/testlocales.pro diff --git a/util/local_database/xpathlite.py b/util/locale_database/xpathlite.py index 218135d7a7..97efaaab41 100644 --- a/util/local_database/xpathlite.py +++ b/util/locale_database/xpathlite.py @@ -78,14 +78,38 @@ def findChild(parent, tag_name, arg_name=None, arg_value=None, draft=None): return node return False +def codeMapsFromFile(file): + """Extract mappings of language, script and country codes to names. + + The file shall typically be common/main/en.xml, which contains a + localeDisplayNames element with children languages, scripts and + territories; each element in each of these has a code as its type + attribute and its name as element content. This returns a mapping + withe keys 'language', 'script' and 'country', each of which + has, as value, a mapping of the relevant codes to names. + """ + parent = findChild(findChild(parseDoc(file), 'ldml'), 'localeDisplayNames') + keys, result = {'languages': 'language', 'scripts': 'script', 'territories': 'country'}, {} + for src, dst in keys.items(): + child = findChild(parent, src) + data = result[dst] = {} + for elt in child.childNodes: + if elt.attributes and elt.attributes.has_key('type'): + key, value = elt.attributes['type'].value, elt.childNodes[0].wholeText + # Don't over-write previously-read data for an alt form: + if elt.attributes.has_key('alt') and data.has_key(key): + continue + data[key] = value + + return result + def findTagsInFile(file, path): doc = parseDoc(file) elt = doc.documentElement tag_spec_list = path.split("/") last_entry = None - for i in range(len(tag_spec_list)): - tag_spec = tag_spec_list[i] + for tag_spec in tag_spec_list: tag_name = tag_spec arg_name = 'type' arg_value = '' |