summaryrefslogtreecommitdiffstats
path: root/util
diff options
context:
space:
mode:
authorAlexandru Croitor <alexandru.croitor@qt.io>2019-05-28 16:41:49 +0200
committerAlexandru Croitor <alexandru.croitor@qt.io>2019-06-03 15:14:42 +0200
commite4079eca49adce16e31dac2a18d49d7a55817891 (patch)
tree1dfb960ec1115b1f552afe8a013058542389505e /util
parentf32a6cfb6b6236533508901f114ab57396da8ff3 (diff)
parentec6dc5f78453048c4f0604655a34c6c20c79d819 (diff)
Merge remote-tracking branch 'origin/dev' into wip/cmake
Diffstat (limited to 'util')
-rw-r--r--util/corelib/qurl-generateTLDs/main.cpp26
-rw-r--r--util/local_database/README1
-rw-r--r--util/locale_database/README5
-rwxr-xr-xutil/locale_database/cldr2qlocalexml.py (renamed from util/local_database/cldr2qlocalexml.py)36
-rwxr-xr-xutil/locale_database/cldr2qtimezone.py (renamed from util/local_database/cldr2qtimezone.py)2
-rwxr-xr-xutil/locale_database/dateconverter.py (renamed from util/local_database/dateconverter.py)0
-rw-r--r--util/locale_database/enumdata.py (renamed from util/local_database/enumdata.py)2
-rw-r--r--util/locale_database/formattags.txt (renamed from util/local_database/formattags.txt)0
-rw-r--r--util/locale_database/localexml.py (renamed from util/local_database/localexml.py)48
-rwxr-xr-xutil/locale_database/qlocalexml2cpp.py (renamed from util/local_database/qlocalexml2cpp.py)2
-rw-r--r--util/locale_database/testlocales/localemodel.cpp (renamed from util/local_database/testlocales/localemodel.cpp)0
-rw-r--r--util/locale_database/testlocales/localemodel.h (renamed from util/local_database/testlocales/localemodel.h)0
-rw-r--r--util/locale_database/testlocales/localewidget.cpp (renamed from util/local_database/testlocales/localewidget.cpp)0
-rw-r--r--util/locale_database/testlocales/localewidget.h (renamed from util/local_database/testlocales/localewidget.h)0
-rw-r--r--util/locale_database/testlocales/main.cpp (renamed from util/local_database/testlocales/main.cpp)0
-rw-r--r--util/locale_database/testlocales/testlocales.pro (renamed from util/local_database/testlocales/testlocales.pro)0
-rw-r--r--util/locale_database/xpathlite.py (renamed from util/local_database/xpathlite.py)28
17 files changed, 116 insertions, 34 deletions
diff --git a/util/corelib/qurl-generateTLDs/main.cpp b/util/corelib/qurl-generateTLDs/main.cpp
index 6fde287049..e458ea9d53 100644
--- a/util/corelib/qurl-generateTLDs/main.cpp
+++ b/util/corelib/qurl-generateTLDs/main.cpp
@@ -90,15 +90,15 @@ int main(int argc, char **argv)
{
QCoreApplication app(argc, argv);
if (argc < 3) {
- printf("\nusage: %s inputFile outputFile\n\n", argv[0]);
+ printf("\nUsage: ./%s inputFile outputFile\n\n", argv[0]);
printf("'inputFile' should be a list of effective TLDs, one per line,\n");
- printf("as obtained from http://publicsuffix.org . To create indices and data file\n");
+ printf("as obtained from http://publicsuffix.org/. To create indices and data\n");
printf("file, do the following:\n\n");
- printf(" wget https://publicsuffix.org/list/effective_tld_names.dat -O effective_tld_names.dat\n");
- printf(" grep '^[^\\/\\/]' effective_tld_names.dat > effective_tld_names.dat.trimmed\n");
- printf(" %s effective_tld_names.dat.trimmed effective_tld_names.dat.qt\n\n", argv[0]);
- printf("Now copy the data from effective_tld_names.dat.qt to the file src/corelib/io/qurltlds_p.h in your Qt repo\n\n");
- exit(1);
+ printf(" wget https://publicsuffix.org/list/public_suffix_list.dat -O public_suffix_list.dat\n");
+ printf(" grep -v '^//' public_suffix_list.dat | grep . > public_suffix_list.dat.trimmed\n");
+ printf(" ./%s public_suffix_list.dat.trimmed public_suffix_list.cpp\n\n", argv[0]);
+ printf("Now replace the code in qtbase/src/corelib/io/qurltlds_p.h with public_suffix_list.cpp's contents\n\n");
+ return 1;
}
QFile file(argv[1]);
if (!file.open(QIODevice::ReadOnly)) {
@@ -146,7 +146,7 @@ int main(int argc, char **argv)
entry.append("\\0");
}
outFile.write("static const quint32 tldIndices[] = {\n");
- outDataBuffer.write("\nstatic const char *tldData[] = {\n");
+ outDataBuffer.write("\nstatic const char *tldData[] = {");
int totalUtf8Size = 0;
int chunkSize = 0; // strlen of the current chunk (sizeof is bigger by 1)
@@ -165,22 +165,22 @@ int main(int argc, char **argv)
if (chunkSize >= 0xffff) {
static int chunkCount = 0;
qWarning() << "chunk" << ++chunkCount << "has length" << chunkSize - stringUtf8Size;
- outDataBuffer.write(",\n\n");
+ outDataBuffer.write(",\n");
chunks.append(QString::number(totalUtf8Size));
chunkSize = 0;
}
totalUtf8Size += stringUtf8Size;
- outDataBuffer.write("\"");
+ outDataBuffer.write("\n\"");
outDataBuffer.write(entry.toUtf8());
- outDataBuffer.write("\"\n");
+ outDataBuffer.write("\"");
}
}
chunks.append(QString::number(totalUtf8Size));
outFile.write(QByteArray::number(totalUtf8Size));
- outFile.write("};\n");
+ outFile.write("\n};\n");
- outDataBuffer.write("};\n");
+ outDataBuffer.write("\n};\n");
outDataBuffer.close();
outFile.write(outDataBufferBA);
diff --git a/util/local_database/README b/util/local_database/README
deleted file mode 100644
index 23b6a33ad8..0000000000
--- a/util/local_database/README
+++ /dev/null
@@ -1 +0,0 @@
-local_database is used to generate qlocale data from the Common Locale Data Repository (The database for localized names (like date formats, country names etc)).
diff --git a/util/locale_database/README b/util/locale_database/README
new file mode 100644
index 0000000000..8654968d66
--- /dev/null
+++ b/util/locale_database/README
@@ -0,0 +1,5 @@
+locale_database is used to generate qlocale data from CLDR.
+
+CLDR is the Common Locale Data Repository, a database for localized
+data (like date formats, country names etc). It is provided by the
+Unicode consortium.
diff --git a/util/local_database/cldr2qlocalexml.py b/util/locale_database/cldr2qlocalexml.py
index bc999e1b65..d75ef282f9 100755
--- a/util/local_database/cldr2qlocalexml.py
+++ b/util/locale_database/cldr2qlocalexml.py
@@ -31,7 +31,7 @@
The CLDR data can be downloaded from CLDR_, which has a sub-directory
for each version; you need the ``core.zip`` file for your version of
choice (typically the latest). This script has had updates to cope up
-to v29; for later versions, we may need adaptations. Unpack the
+to v35; for later versions, we may need adaptations. Unpack the
downloaded ``core.zip`` and check it has a common/main/ sub-directory:
pass the path of that sub-directory to this script as its single
command-line argument. Save its standard output (but not error) to a
@@ -95,6 +95,34 @@ def parse_number_format(patterns, data):
result.append(pattern)
return result
+def raiseUnknownCode(code, form, cache={}):
+ """Check whether an unknown code could be supported.
+
+ We declare a language, script or country code unknown if it's not
+ known to enumdata.py; however, if it's present in main/en.xml's
+ mapping of codes to names, we have the option of adding support.
+ This caches the necessary look-up (so we only read main/en.xml
+ once) and returns the name we should use if we do add support.
+
+ First parameter, code, is the unknown code. Second parameter,
+ form, is one of 'language', 'script' or 'country' to select the
+ type of code to look up. Do not pass further parameters (the next
+ will deprive you of the cache).
+
+ Raises xpathlite.Error with a suitable message, that includes the
+ unknown code's full name if found.
+
+ Relies on global cldr_dir being set before it's called; see tail
+ of this file.
+ """
+ if not cache:
+ cache.update(xpathlite.codeMapsFromFile(os.path.join(cldr_dir, 'en.xml')))
+ name = cache[form].get(code)
+ msg = 'unknown %s code "%s"' % (form, code)
+ if name:
+ msg += ' - could use "%s"' % name
+ raise xpathlite.Error(msg)
+
def parse_list_pattern_part_format(pattern):
# This is a very limited parsing of the format for list pattern part only.
return pattern.replace("{0}", "%1").replace("{1}", "%2").replace("{2}", "%3")
@@ -193,18 +221,18 @@ def _generateLocaleInfo(path, language_code, script_code, country_code, variant_
language_id = enumdata.languageCodeToId(language_code)
if language_id <= 0:
- raise xpathlite.Error('unknown language code "%s"' % language_code)
+ raiseUnknownCode(language_code, 'language')
script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1:
- raise xpathlite.Error('unknown script code "%s"' % script_code)
+ raiseUnknownCode(script_code, 'script')
# we should handle fully qualified names with the territory
if not country_code:
return {}
country_id = enumdata.countryCodeToId(country_code)
if country_id <= 0:
- raise xpathlite.Error('unknown country code "%s"' % country_code)
+ raiseUnknownCode(country_code, 'country')
# So we say we accept only those values that have "contributed" or
# "approved" resolution. see http://www.unicode.org/cldr/process.html
diff --git a/util/local_database/cldr2qtimezone.py b/util/locale_database/cldr2qtimezone.py
index 7c10b1dfd2..256839317c 100755
--- a/util/local_database/cldr2qtimezone.py
+++ b/util/locale_database/cldr2qtimezone.py
@@ -343,7 +343,7 @@ newTempFile.write("""
http://www.unicode.org/cldr/
Do not edit this code: run cldr2qtimezone.py on updated (or
- edited) CLDR data; see qtbase/util/local_database/.
+ edited) CLDR data; see qtbase/util/locale_database/.
*/
""" % (str(datetime.date.today()), cldr_version, versionNumber) )
diff --git a/util/local_database/dateconverter.py b/util/locale_database/dateconverter.py
index 1990fe0c61..1990fe0c61 100755
--- a/util/local_database/dateconverter.py
+++ b/util/locale_database/dateconverter.py
diff --git a/util/local_database/enumdata.py b/util/locale_database/enumdata.py
index 26bb74d1fe..0e40d8a9ee 100644
--- a/util/local_database/enumdata.py
+++ b/util/locale_database/enumdata.py
@@ -402,6 +402,8 @@ language_list = {
362: ["Sicilian", "scn"],
363: ["Southern Kurdish", "sdh"],
364: ["Western Balochi", "bgn"],
+ 365: ["Cebuano", "ceb"],
+ 366: ["Erzya", "myv"],
}
language_aliases = {
diff --git a/util/local_database/formattags.txt b/util/locale_database/formattags.txt
index 5138c37a81..5138c37a81 100644
--- a/util/local_database/formattags.txt
+++ b/util/locale_database/formattags.txt
diff --git a/util/local_database/localexml.py b/util/locale_database/localexml.py
index a47fa6a5ff..e95b3aebcc 100644
--- a/util/local_database/localexml.py
+++ b/util/locale_database/localexml.py
@@ -53,7 +53,21 @@ def ordStr(c):
def fixOrdStr(c, d):
return str(ord(c if len(c) == 1 else d))
+def startCount(c, text): # strspn
+ """First index in text where it doesn't have a character in c"""
+ assert text and text[0] in c
+ try:
+ return (j for j, d in enumerate(text) if d not in c).next()
+ except StopIteration:
+ return len(text)
+
def convertFormat(format):
+ """Convert date/time format-specier from CLDR to Qt
+
+ Match up (as best we can) the differences between:
+ * https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
+ * QDateTimeParser::parseFormat() and QLocalePrivate::dateTimeToString()
+ """
result = ""
i = 0
while i < len(format):
@@ -68,20 +82,30 @@ def convertFormat(format):
i += 1
else:
s = format[i:]
- if s.startswith("EEEE"):
- result += "dddd"
- i += 4
- elif s.startswith("EEE"):
- result += "ddd"
- i += 3
- elif s.startswith("a"):
+ if s.startswith('E'): # week-day
+ n = startCount('E', s)
+ if n < 3:
+ result += 'ddd'
+ elif n == 4:
+ result += 'dddd'
+ else: # 5: narrow, 6 short; but should be name, not number :-(
+ result += 'd' if n < 6 else 'dd'
+ i += n
+ elif s[0] in 'ab': # am/pm
+ # 'b' should distinguish noon/midnight, too :-(
result += "AP"
- i += 1
- elif s.startswith("z"):
+ i += startCount('ab', s)
+ elif s.startswith('S'): # fractions of seconds: count('S') == number of decimals to show
+ result += 'z'
+ i += startCount('S', s)
+ elif s.startswith('V'): # long time zone specifiers (and a deprecated short ID)
+ result += 't'
+ i += startCount('V', s)
+ elif s[0] in 'zv': # zone
+ # Should use full name, e.g. "Central European Time", if 'zzzz' :-(
+ # 'v' should get generic non-location format, e.g. PT for "Pacific Time", no DST indicator
result += "t"
- i += 1
- elif s.startswith("v"):
- i += 1
+ i += startCount('zv', s)
else:
result += format[i]
i += 1
diff --git a/util/local_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py
index fb5ae5ba54..2dad2dd57a 100755
--- a/util/local_database/qlocalexml2cpp.py
+++ b/util/locale_database/qlocalexml2cpp.py
@@ -370,7 +370,7 @@ def main():
Do not edit this section: instead regenerate it using
cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or
- edited) CLDR data; see qtbase/util/local_database/.
+ edited) CLDR data; see qtbase/util/locale_database/.
*/
""" % (str(datetime.date.today()), cldr_version) )
diff --git a/util/local_database/testlocales/localemodel.cpp b/util/locale_database/testlocales/localemodel.cpp
index d380d01e09..d380d01e09 100644
--- a/util/local_database/testlocales/localemodel.cpp
+++ b/util/locale_database/testlocales/localemodel.cpp
diff --git a/util/local_database/testlocales/localemodel.h b/util/locale_database/testlocales/localemodel.h
index b24fc5f4c6..b24fc5f4c6 100644
--- a/util/local_database/testlocales/localemodel.h
+++ b/util/locale_database/testlocales/localemodel.h
diff --git a/util/local_database/testlocales/localewidget.cpp b/util/locale_database/testlocales/localewidget.cpp
index 3ff7f73a98..3ff7f73a98 100644
--- a/util/local_database/testlocales/localewidget.cpp
+++ b/util/locale_database/testlocales/localewidget.cpp
diff --git a/util/local_database/testlocales/localewidget.h b/util/locale_database/testlocales/localewidget.h
index 896a6e5229..896a6e5229 100644
--- a/util/local_database/testlocales/localewidget.h
+++ b/util/locale_database/testlocales/localewidget.h
diff --git a/util/local_database/testlocales/main.cpp b/util/locale_database/testlocales/main.cpp
index 0c3c45f989..0c3c45f989 100644
--- a/util/local_database/testlocales/main.cpp
+++ b/util/locale_database/testlocales/main.cpp
diff --git a/util/local_database/testlocales/testlocales.pro b/util/locale_database/testlocales/testlocales.pro
index a9a6247f96..a9a6247f96 100644
--- a/util/local_database/testlocales/testlocales.pro
+++ b/util/locale_database/testlocales/testlocales.pro
diff --git a/util/local_database/xpathlite.py b/util/locale_database/xpathlite.py
index 218135d7a7..97efaaab41 100644
--- a/util/local_database/xpathlite.py
+++ b/util/locale_database/xpathlite.py
@@ -78,14 +78,38 @@ def findChild(parent, tag_name, arg_name=None, arg_value=None, draft=None):
return node
return False
+def codeMapsFromFile(file):
+ """Extract mappings of language, script and country codes to names.
+
+ The file shall typically be common/main/en.xml, which contains a
+ localeDisplayNames element with children languages, scripts and
+ territories; each element in each of these has a code as its type
+ attribute and its name as element content. This returns a mapping
+ withe keys 'language', 'script' and 'country', each of which
+ has, as value, a mapping of the relevant codes to names.
+ """
+ parent = findChild(findChild(parseDoc(file), 'ldml'), 'localeDisplayNames')
+ keys, result = {'languages': 'language', 'scripts': 'script', 'territories': 'country'}, {}
+ for src, dst in keys.items():
+ child = findChild(parent, src)
+ data = result[dst] = {}
+ for elt in child.childNodes:
+ if elt.attributes and elt.attributes.has_key('type'):
+ key, value = elt.attributes['type'].value, elt.childNodes[0].wholeText
+ # Don't over-write previously-read data for an alt form:
+ if elt.attributes.has_key('alt') and data.has_key(key):
+ continue
+ data[key] = value
+
+ return result
+
def findTagsInFile(file, path):
doc = parseDoc(file)
elt = doc.documentElement
tag_spec_list = path.split("/")
last_entry = None
- for i in range(len(tag_spec_list)):
- tag_spec = tag_spec_list[i]
+ for tag_spec in tag_spec_list:
tag_name = tag_spec
arg_name = 'type'
arg_value = ''