summaryrefslogtreecommitdiffstats
path: root/util/local_database/cldr2qlocalexml.py
diff options
context:
space:
mode:
Diffstat (limited to 'util/local_database/cldr2qlocalexml.py')
-rwxr-xr-xutil/local_database/cldr2qlocalexml.py119
1 files changed, 80 insertions, 39 deletions
diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py
index 2377aa0771..400b7a51f8 100755
--- a/util/local_database/cldr2qlocalexml.py
+++ b/util/local_database/cldr2qlocalexml.py
@@ -115,8 +115,6 @@ def fixOrdStrList(c):
return str(ord(';'))
def generateLocaleInfo(path):
- (dir_name, file_name) = os.path.split(path)
-
if not path.endswith(".xml"):
return {}
@@ -126,13 +124,20 @@ def generateLocaleInfo(path):
raise xpathlite.Error("alias to \"%s\"" % alias)
language_code = findEntryInFile(path, "identity/language", attribute="type")[0]
- if language_code == 'root':
- # just skip it
- return {}
country_code = findEntryInFile(path, "identity/territory", attribute="type")[0]
script_code = findEntryInFile(path, "identity/script", attribute="type")[0]
variant_code = findEntryInFile(path, "identity/variant", attribute="type")[0]
+ return _generateLocaleInfo(path, language_code, script_code, country_code, variant_code)
+
+def _generateLocaleInfo(path, language_code, script_code, country_code, variant_code=""):
+ if not path.endswith(".xml"):
+ return {}
+
+ if language_code == 'root':
+ # just skip it
+ return {}
+
# we do not support variants
# ### actually there is only one locale with variant: en_US_POSIX
# does anybody care about it at all?
@@ -175,6 +180,7 @@ def generateLocaleInfo(path):
result['script_id'] = script_id
result['country_id'] = country_id
+ (dir_name, file_name) = os.path.split(path)
supplementalPath = dir_name + "/../supplemental/supplementalData.xml"
currencies = findTagsInFile(supplementalPath, "currencyData/region[iso3166=%s]"%country_code);
result['currencyIsoCode'] = ''
@@ -562,6 +568,41 @@ if not os.path.isdir(cldr_dir):
cldr_files = os.listdir(cldr_dir)
locale_database = {}
+
+# see http://www.unicode.org/reports/tr35/tr35-info.html#Default_Content
+defaultContent_locales = {}
+for ns in findTagsInFile(cldr_dir + "/../supplemental/supplementalMetadata.xml", "metadata/defaultContent"):
+ for data in ns[1:][0]:
+ if data[0] == u"locales":
+ defaultContent_locales = data[1].split()
+
+for file in defaultContent_locales:
+ items = file.split("_")
+ if len(items) == 3:
+ language_code = items[0]
+ script_code = items[1]
+ country_code = items[2]
+ else:
+ if len(items) != 2:
+ sys.stderr.write("skipping defaultContent locale \"" + file + "\"\n")
+ continue
+ language_code = items[0]
+ script_code = ""
+ country_code = items[1]
+ if len(country_code) == 4:
+ sys.stderr.write("skipping defaultContent locale \"" + file + "\"\n")
+ continue
+ try:
+ l = _generateLocaleInfo(cldr_dir + "/" + file + ".xml", language_code, script_code, country_code)
+ if not l:
+ sys.stderr.write("skipping defaultContent locale \"" + file + "\"\n")
+ continue
+ except xpathlite.Error as e:
+ sys.stderr.write("skipping defaultContent locale \"%s\" (%s)\n" % (file, str(e)))
+ continue
+
+ locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l
+
for file in cldr_files:
try:
l = generateLocaleInfo(cldr_dir + "/" + file)
@@ -762,42 +803,42 @@ for key in locale_keys:
print " <minus>" + fixOrdStrMinus(l['minus']) + "</minus>"
print " <plus>" + fixOrdStrPlus(l['plus']) + "</plus>"
print " <exp>" + fixOrdStrExp(l['exp']) + "</exp>"
- print " <quotationStart>" + l['quotationStart'].encode('utf-8') + "</quotationStart>"
- print " <quotationEnd>" + l['quotationEnd'].encode('utf-8') + "</quotationEnd>"
- print " <alternateQuotationStart>" + l['alternateQuotationStart'].encode('utf-8') + "</alternateQuotationStart>"
- print " <alternateQuotationEnd>" + l['alternateQuotationEnd'].encode('utf-8') + "</alternateQuotationEnd>"
- print " <listPatternPartStart>" + l['listPatternPartStart'].encode('utf-8') + "</listPatternPartStart>"
- print " <listPatternPartMiddle>" + l['listPatternPartMiddle'].encode('utf-8') + "</listPatternPartMiddle>"
- print " <listPatternPartEnd>" + l['listPatternPartEnd'].encode('utf-8') + "</listPatternPartEnd>"
- print " <listPatternPartTwo>" + l['listPatternPartTwo'].encode('utf-8') + "</listPatternPartTwo>"
- print " <am>" + l['am'].encode('utf-8') + "</am>"
- print " <pm>" + l['pm'].encode('utf-8') + "</pm>"
- print " <firstDayOfWeek>" + l['firstDayOfWeek'].encode('utf-8') + "</firstDayOfWeek>"
- print " <weekendStart>" + l['weekendStart'].encode('utf-8') + "</weekendStart>"
- print " <weekendEnd>" + l['weekendEnd'].encode('utf-8') + "</weekendEnd>"
- print " <longDateFormat>" + l['longDateFormat'].encode('utf-8') + "</longDateFormat>"
- print " <shortDateFormat>" + l['shortDateFormat'].encode('utf-8') + "</shortDateFormat>"
- print " <longTimeFormat>" + l['longTimeFormat'].encode('utf-8') + "</longTimeFormat>"
- print " <shortTimeFormat>" + l['shortTimeFormat'].encode('utf-8') + "</shortTimeFormat>"
- print " <standaloneLongMonths>" + l['standaloneLongMonths'].encode('utf-8') + "</standaloneLongMonths>"
- print " <standaloneShortMonths>"+ l['standaloneShortMonths'].encode('utf-8') + "</standaloneShortMonths>"
- print " <standaloneNarrowMonths>"+ l['standaloneNarrowMonths'].encode('utf-8') + "</standaloneNarrowMonths>"
- print " <longMonths>" + l['longMonths'].encode('utf-8') + "</longMonths>"
- print " <shortMonths>" + l['shortMonths'].encode('utf-8') + "</shortMonths>"
- print " <narrowMonths>" + l['narrowMonths'].encode('utf-8') + "</narrowMonths>"
- print " <longDays>" + l['longDays'].encode('utf-8') + "</longDays>"
- print " <shortDays>" + l['shortDays'].encode('utf-8') + "</shortDays>"
- print " <narrowDays>" + l['narrowDays'].encode('utf-8') + "</narrowDays>"
- print " <standaloneLongDays>" + l['standaloneLongDays'].encode('utf-8') + "</standaloneLongDays>"
- print " <standaloneShortDays>" + l['standaloneShortDays'].encode('utf-8') + "</standaloneShortDays>"
- print " <standaloneNarrowDays>" + l['standaloneNarrowDays'].encode('utf-8') + "</standaloneNarrowDays>"
- print " <currencyIsoCode>" + l['currencyIsoCode'].encode('utf-8') + "</currencyIsoCode>"
- print " <currencySymbol>" + l['currencySymbol'].encode('utf-8') + "</currencySymbol>"
- print " <currencyDisplayName>" + l['currencyDisplayName'].encode('utf-8') + "</currencyDisplayName>"
+ print " <quotationStart>" + escape(l['quotationStart']).encode('utf-8') + "</quotationStart>"
+ print " <quotationEnd>" + escape(l['quotationEnd']).encode('utf-8') + "</quotationEnd>"
+ print " <alternateQuotationStart>" + escape(l['alternateQuotationStart']).encode('utf-8') + "</alternateQuotationStart>"
+ print " <alternateQuotationEnd>" + escape(l['alternateQuotationEnd']).encode('utf-8') + "</alternateQuotationEnd>"
+ print " <listPatternPartStart>" + escape(l['listPatternPartStart']).encode('utf-8') + "</listPatternPartStart>"
+ print " <listPatternPartMiddle>" + escape(l['listPatternPartMiddle']).encode('utf-8') + "</listPatternPartMiddle>"
+ print " <listPatternPartEnd>" + escape(l['listPatternPartEnd']).encode('utf-8') + "</listPatternPartEnd>"
+ print " <listPatternPartTwo>" + escape(l['listPatternPartTwo']).encode('utf-8') + "</listPatternPartTwo>"
+ print " <am>" + escape(l['am']).encode('utf-8') + "</am>"
+ print " <pm>" + escape(l['pm']).encode('utf-8') + "</pm>"
+ print " <firstDayOfWeek>" + escape(l['firstDayOfWeek']).encode('utf-8') + "</firstDayOfWeek>"
+ print " <weekendStart>" + escape(l['weekendStart']).encode('utf-8') + "</weekendStart>"
+ print " <weekendEnd>" + escape(l['weekendEnd']).encode('utf-8') + "</weekendEnd>"
+ print " <longDateFormat>" + escape(l['longDateFormat']).encode('utf-8') + "</longDateFormat>"
+ print " <shortDateFormat>" + escape(l['shortDateFormat']).encode('utf-8') + "</shortDateFormat>"
+ print " <longTimeFormat>" + escape(l['longTimeFormat']).encode('utf-8') + "</longTimeFormat>"
+ print " <shortTimeFormat>" + escape(l['shortTimeFormat']).encode('utf-8') + "</shortTimeFormat>"
+ print " <standaloneLongMonths>" + escape(l['standaloneLongMonths']).encode('utf-8') + "</standaloneLongMonths>"
+ print " <standaloneShortMonths>"+ escape(l['standaloneShortMonths']).encode('utf-8') + "</standaloneShortMonths>"
+ print " <standaloneNarrowMonths>"+ escape(l['standaloneNarrowMonths']).encode('utf-8') + "</standaloneNarrowMonths>"
+ print " <longMonths>" + escape(l['longMonths']).encode('utf-8') + "</longMonths>"
+ print " <shortMonths>" + escape(l['shortMonths']).encode('utf-8') + "</shortMonths>"
+ print " <narrowMonths>" + escape(l['narrowMonths']).encode('utf-8') + "</narrowMonths>"
+ print " <longDays>" + escape(l['longDays']).encode('utf-8') + "</longDays>"
+ print " <shortDays>" + escape(l['shortDays']).encode('utf-8') + "</shortDays>"
+ print " <narrowDays>" + escape(l['narrowDays']).encode('utf-8') + "</narrowDays>"
+ print " <standaloneLongDays>" + escape(l['standaloneLongDays']).encode('utf-8') + "</standaloneLongDays>"
+ print " <standaloneShortDays>" + escape(l['standaloneShortDays']).encode('utf-8') + "</standaloneShortDays>"
+ print " <standaloneNarrowDays>" + escape(l['standaloneNarrowDays']).encode('utf-8') + "</standaloneNarrowDays>"
+ print " <currencyIsoCode>" + escape(l['currencyIsoCode']).encode('utf-8') + "</currencyIsoCode>"
+ print " <currencySymbol>" + escape(l['currencySymbol']).encode('utf-8') + "</currencySymbol>"
+ print " <currencyDisplayName>" + escape(l['currencyDisplayName']).encode('utf-8') + "</currencyDisplayName>"
print " <currencyDigits>" + str(l['currencyDigits']) + "</currencyDigits>"
print " <currencyRounding>" + str(l['currencyRounding']) + "</currencyRounding>"
- print " <currencyFormat>" + l['currencyFormat'].encode('utf-8') + "</currencyFormat>"
- print " <currencyNegativeFormat>" + l['currencyNegativeFormat'].encode('utf-8') + "</currencyNegativeFormat>"
+ print " <currencyFormat>" + escape(l['currencyFormat']).encode('utf-8') + "</currencyFormat>"
+ print " <currencyNegativeFormat>" + escape(l['currencyNegativeFormat']).encode('utf-8') + "</currencyNegativeFormat>"
print " </locale>"
print " </localeList>"
print "</localeDatabase>"