summaryrefslogtreecommitdiffstats
path: root/util/locale_database/enumdata.py
diff options
context:
space:
mode:
Diffstat (limited to 'util/locale_database/enumdata.py')
-rw-r--r--util/locale_database/enumdata.py184
1 files changed, 107 insertions, 77 deletions
diff --git a/util/locale_database/enumdata.py b/util/locale_database/enumdata.py
index c3a7f92209..1749b85f63 100644
--- a/util/locale_database/enumdata.py
+++ b/util/locale_database/enumdata.py
@@ -1,56 +1,60 @@
-# -*- coding: utf-8; -*-
-#############################################################################
-##
-## Copyright (C) 2021 The Qt Company Ltd.
-## Contact: https://www.qt.io/licensing/
-##
-## This file is part of the test suite of the Qt Toolkit.
-##
-## $QT_BEGIN_LICENSE:GPL-EXCEPT$
-## Commercial License Usage
-## Licensees holding valid commercial Qt licenses may use this file in
-## accordance with the commercial license agreement provided with the
-## Software or, alternatively, in accordance with the terms contained in
-## a written agreement between you and The Qt Company. For licensing terms
-## and conditions see https://www.qt.io/terms-conditions. For further
-## information use the contact form at https://www.qt.io/contact-us.
-##
-## GNU General Public License Usage
-## Alternatively, this file may be used under the terms of the GNU
-## General Public License version 3 as published by the Free Software
-## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-## included in the packaging of this file. Please review the following
-## information to ensure the GNU General Public License requirements will
-## be met: https://www.gnu.org/licenses/gpl-3.0.html.
-##
-## $QT_END_LICENSE$
-##
-#############################################################################
+# Copyright (C) 2021 The Qt Company Ltd.
+# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
-# A run of cldr2qlocalexml.py will produce output reporting any
-# language, script and territory codes it sees, in data, for which it
-# can find a name (taken always from en.xml) that could potentially be
-# used. There is no point adding a mapping for such a code unless the
-# CLDR's common/main/ contains an XML file for at least one locale
-# that exerciss it.
+"""Assorted enumerations implicated in public API.
-# Each *_list reflects the current values of its enums in qlocale.h;
-# if new xml language files are available in CLDR, these languages and
-# territories need to be *appended* to this list (for compatibility
-# between versions). Include any spaces present in names (scripts
-# shall squish them out for the enum entries) in *_list, but use the
-# squished forms of names in the *_aliases mappings.
+The numberings of these enumerations can only change at major
+versions. When new CLDR data implies adding entries, the new ones must
+go after all existing ones. See also zonedata.py for enumerations
+related to timezones and CLDR, which can more freely be changed
+between versions.
-# For a new major version (and only then), we can change the
-# numbering, so re-sort each list into alphabetic order (e.g. using
-# sort -k2); but keep the Any and C entries first. That's why those
-# are offset with a blank line, below. After doing that, regenerate
-# locale data as usual; this will cause a binary-incompatible change.
+A run of cldr2qlocalexml.py will produce output reporting any
+language, script and territory codes it sees, in data, for which it
+can find a name (taken always from en.xml) that could potentially be
+used. There is no point adding a mapping for such a code unless the
+CLDR's common/main/ contains an XML file for at least one locale that
+exercises it (and little point, even then, absent substantial data,
+ignoring draft='unconfirmed' entries).
-# Note on "macrolanguage" comments: see "ISO 639 macrolanguage" on
-# Wikipedia. A "macrolanguage" is (loosely-speaking) a group of
-# languages so closely related to one another that they could also be
-# regarded as divergent dialects of the macrolanguage.
+Each *_map reflects the current values of its enums in qlocale.h; if
+new xml language files are available in CLDR, these languages and
+territories need to be *appended* to this list (for compatibility
+between versions). Include any spaces and dashes present in names
+(they'll be squished out for the enum entries) in *_map, but use the
+squished forms of names in the *_aliases mappings. The squishing also
+turns the first letter of each word into a capital so you can safely
+preserve the case of en.xml's name; but omit (or replace with space)
+any punctuation aside from dashes and map any accented letters to
+their un-accented plain ASCII. The two tables, for each enum, have
+the forms:
+* map { Numeric value: ("Proper name", "ISO code") }
+* alias { "OldName": "CurrentName" }
+
+TODO: add support for marking entries as deprecated from a specified
+version. For aliases that merely deprecates the name. Where we have a
+name for which CLDR offers no data, we may also want to deprecate
+entries in the map - although they may be worth keeping for the
+benefit of QLocaleSelector (see QTBUG-112765), if other
+locale-specific resources might have use of them.
+
+For a new major version (and only then), we can change the numbering,
+so re-sort each list into alphabetic order (e.g. using sort -k2); but
+keep the Any and C entries first. That's why those are offset with a
+blank line, below. After doing that, regenerate locale data as usual;
+this will cause a binary-incompatible change.
+
+Note on 'macrolanguage' comments: see QTBUG-107781 and 'ISO 639
+macrolanguage' on Wikipedia. A 'macrolanguage' is (loosely-speaking) a
+group of languages so closely related to one another that they could
+also be regarded as divergent dialects of the macrolanguage. In some
+cases this may mean a resource (such as translation or text-to-speech
+data) may describe itself as pertaining to the macrolanguage, implying
+its suitability for use in any of the languages within the
+macrolanguage. For example, no_NO might be used for a generic
+Norwegian resource, embracing both nb_NO and nn_NO.
+
+"""
language_map = {
0: ("AnyLanguage", " "),
@@ -177,7 +181,7 @@ language_map = {
120: ("Japanese", "ja"),
121: ("Javanese", "jv"),
122: ("Jju", "kaj"),
- 123: ("Jola Fonyi", "dyo"),
+ 123: ("Jola-Fonyi", "dyo"),
124: ("Kabuverdianu", "kea"),
125: ("Kabyle", "kab"),
126: ("Kako", "kkj"),
@@ -218,7 +222,7 @@ language_map = {
161: ("Lojban", "jbo"),
162: ("Lower Sorbian", "dsb"),
163: ("Low German", "nds"),
- 164: ("Luba Katanga", "lu"),
+ 164: ("Luba-Katanga", "lu"),
165: ("Lule Sami", "smj"),
166: ("Luo", "luo"),
167: ("Luxembourgish", "lb"),
@@ -226,7 +230,7 @@ language_map = {
169: ("Macedonian", "mk"),
170: ("Machame", "jmc"),
171: ("Maithili", "mai"),
- 172: ("Makhuwa Meetto", "mgh"),
+ 172: ("Makhuwa-Meetto", "mgh"),
173: ("Makonde", "kde"),
174: ("Malagasy", "mg"), # macrolanguage
175: ("Malayalam", "ml"),
@@ -382,7 +386,31 @@ language_map = {
325: ("Zarma", "dje"),
326: ("Zhuang", "za"), # macrolanguage
327: ("Zulu", "zu"),
+ # added in CLDR v40
+ 328: ("Kaingang", "kgp"),
+ 329: ("Nheengatu", "yrl"),
+ # added in CLDR v42
+ 330: ("Haryanvi", "bgc"),
+ 331: ("Northern Frisian", "frr"),
+ 332: ("Rajasthani", "raj"),
+ 333: ("Moksha", "mdf"),
+ 334: ("Toki Pona", "tok"),
+ 335: ("Pijin", "pis"),
+ 336: ("Obolo", "ann"),
+ # added in CLDR v43
+ 337: ("Baluchi", "bal"),
+ 338: ("Ligurian", "lij"),
+ 339: ("Rohingya", "rhg"),
+ 340: ("Torwali", "trw"),
+ # added in CLDR v44
+ 341: ("Anii", "blo"),
+ 342: ("Kangri", "xnr"),
+ 343: ("Venetian", "vec"),
}
+# Don't add languages just because they exist; check CLDR does provide
+# substantial data for locales using it; and check, once added, they
+# don't show up in cldr2qlocalexmo.py's unused listing. Do also check
+# the data's draft status; if it's (nearly) all unconfirmed, leave it.
language_aliases = {
# Renamings prior to Qt 6.0 (CLDR v37):
@@ -420,7 +448,7 @@ territory_map = {
7: ("Angola", "AO"),
8: ("Anguilla", "AI"),
9: ("Antarctica", "AQ"),
- 10: ("Antigua And Barbuda", "AG"),
+ 10: ("Antigua and Barbuda", "AG"),
11: ("Argentina", "AR"),
12: ("Armenia", "AM"),
13: ("Aruba", "AW"),
@@ -439,7 +467,7 @@ territory_map = {
26: ("Bermuda", "BM"),
27: ("Bhutan", "BT"),
28: ("Bolivia", "BO"),
- 29: ("Bosnia And Herzegovina", "BA"),
+ 29: ("Bosnia and Herzegovina", "BA"),
30: ("Botswana", "BW"),
31: ("Bouvet Island", "BV"),
32: ("Brazil", "BR"),
@@ -457,7 +485,7 @@ territory_map = {
44: ("Caribbean Netherlands", "BQ"),
45: ("Cayman Islands", "KY"),
46: ("Central African Republic", "CF"),
- 47: ("Ceuta And Melilla", "EA"),
+ 47: ("Ceuta and Melilla", "EA"),
48: ("Chad", "TD"),
49: ("Chile", "CL"),
50: ("China", "CN"),
@@ -466,8 +494,8 @@ territory_map = {
53: ("Cocos Islands", "CC"),
54: ("Colombia", "CO"),
55: ("Comoros", "KM"),
- 56: ("Congo Brazzaville", "CG"),
- 57: ("Congo Kinshasa", "CD"),
+ 56: ("Congo - Brazzaville", "CG"),
+ 57: ("Congo - Kinshasa", "CD"),
58: ("Cook Islands", "CK"),
59: ("Costa Rica", "CR"),
60: ("Croatia", "HR"),
@@ -511,11 +539,11 @@ territory_map = {
98: ("Guam", "GU"),
99: ("Guatemala", "GT"),
100: ("Guernsey", "GG"),
- 101: ("Guinea Bissau", "GW"),
+ 101: ("Guinea-Bissau", "GW"),
102: ("Guinea", "GN"),
103: ("Guyana", "GY"),
104: ("Haiti", "HT"),
- 105: ("Heard And McDonald Islands", "HM"),
+ 105: ("Heard and McDonald Islands", "HM"),
106: ("Honduras", "HN"),
107: ("Hong Kong", "HK"),
108: ("Hungary", "HU"),
@@ -525,12 +553,12 @@ territory_map = {
112: ("Iran", "IR"),
113: ("Iraq", "IQ"),
114: ("Ireland", "IE"),
- 115: ("Isle Of Man", "IM"),
+ 115: ("Isle of Man", "IM"),
116: ("Israel", "IL"),
117: ("Italy", "IT"),
- # Officially Côte d’Ivoire, which we'd ned to map to CotedIvoire
- # or CoteDIvoire, either failing to make the d' separate from
- # Cote or messing with its case. So stick with Ivory Coast:
+ # Officially Côte d’Ivoire, which we'd need to map to CotedIvoire
+ # or CoteDIvoire, either failing to make the d' separate from Cote
+ # or messing with its case. So stick with Ivory Coast:
118: ("Ivory Coast", "CI"),
119: ("Jamaica", "JM"),
120: ("Japan", "JP"),
@@ -610,14 +638,14 @@ territory_map = {
194: ("Rwanda", "RW"),
195: ("Saint Barthelemy", "BL"),
196: ("Saint Helena", "SH"),
- 197: ("Saint Kitts And Nevis", "KN"),
+ 197: ("Saint Kitts and Nevis", "KN"),
198: ("Saint Lucia", "LC"),
199: ("Saint Martin", "MF"),
- 200: ("Saint Pierre And Miquelon", "PM"),
- 201: ("Saint Vincent And Grenadines", "VC"),
+ 200: ("Saint Pierre and Miquelon", "PM"),
+ 201: ("Saint Vincent and Grenadines", "VC"),
202: ("Samoa", "WS"),
203: ("San Marino", "SM"),
- 204: ("Sao Tome And Principe", "ST"),
+ 204: ("Sao Tome and Principe", "ST"),
205: ("Saudi Arabia", "SA"),
206: ("Senegal", "SN"),
207: ("Serbia", "RS"),
@@ -630,14 +658,14 @@ territory_map = {
214: ("Solomon Islands", "SB"),
215: ("Somalia", "SO"),
216: ("South Africa", "ZA"),
- 217: ("South Georgia And South Sandwich Islands", "GS"),
+ 217: ("South Georgia and South Sandwich Islands", "GS"),
218: ("South Korea", "KR"),
219: ("South Sudan", "SS"),
220: ("Spain", "ES"),
221: ("Sri Lanka", "LK"),
222: ("Sudan", "SD"),
223: ("Suriname", "SR"),
- 224: ("Svalbard And Jan Mayen", "SJ"),
+ 224: ("Svalbard and Jan Mayen", "SJ"),
225: ("Sweden", "SE"),
226: ("Switzerland", "CH"),
227: ("Syria", "SY"),
@@ -649,12 +677,12 @@ territory_map = {
233: ("Togo", "TG"),
234: ("Tokelau", "TK"),
235: ("Tonga", "TO"),
- 236: ("Trinidad And Tobago", "TT"),
- 237: ("Tristan Da Cunha", "TA"),
+ 236: ("Trinidad and Tobago", "TT"),
+ 237: ("Tristan da Cunha", "TA"),
238: ("Tunisia", "TN"),
239: ("Turkey", "TR"),
240: ("Turkmenistan", "TM"),
- 241: ("Turks And Caicos Islands", "TC"),
+ 241: ("Turks and Caicos Islands", "TC"),
242: ("Tuvalu", "TV"),
243: ("Uganda", "UG"),
244: ("Ukraine", "UA"),
@@ -669,9 +697,9 @@ territory_map = {
253: ("Vatican City", "VA"),
254: ("Venezuela", "VE"),
255: ("Vietnam", "VN"),
- 256: ("Wallis And Futuna", "WF"),
+ 256: ("Wallis and Futuna", "WF"),
257: ("Western Sahara", "EH"),
- 258: ("World", "001"),
+ 258: ("world", "001"),
259: ("Yemen", "YE"),
260: ("Zambia", "ZM"),
261: ("Zimbabwe", "ZW"),
@@ -741,7 +769,7 @@ script_map = {
28: ("Deseret", "Dsrt"),
29: ("Devanagari", "Deva"),
30: ("Duployan", "Dupl"),
- 31: ("Egyptian Hieroglyphs", "Egyp"),
+ 31: ("Egyptian hieroglyphs", "Egyp"),
32: ("Elbasan", "Elba"),
33: ("Ethiopic", "Ethi"),
34: ("Fraser", "Lisu"),
@@ -816,7 +844,7 @@ script_map = {
103: ("Pahawh Hmong", "Hmng"),
104: ("Palmyrene", "Palm"),
105: ("Pau Cin Hau", "Pauc"),
- 106: ("Phags Pa", "Phag"),
+ 106: ("Phags-pa", "Phag"),
107: ("Phoenician", "Phnx"),
108: ("Pollard Phonetic", "Plrd"),
109: ("Psalter Pahlavi", "Phlp"),
@@ -827,7 +855,7 @@ script_map = {
114: ("Sharada", "Shrd"),
115: ("Shavian", "Shaw"),
116: ("Siddham", "Sidd"),
- 117: ("Sign Writing", "Sgnw"),
+ 117: ("SignWriting", "Sgnw"), # Oddly, en.xml leaves no space in it.
118: ("Simplified Han", "Hans"),
119: ("Sinhala", "Sinh"),
120: ("Sora Sompeng", "Sora"),
@@ -852,6 +880,8 @@ script_map = {
139: ("Vai", "Vaii"),
140: ("Varang Kshiti", "Wara"),
141: ("Yi", "Yiii"),
+ # Added at CLDR v43
+ 142: ("Hanifi", "Rohg"), # Used for Rohingya
}
script_aliases = {