summaryrefslogtreecommitdiffstats
path: root/util
diff options
context:
space:
mode:
authorEdward Welbourne <edward.welbourne@qt.io>2021-07-06 12:33:05 +0200
committerQt Cherry-pick Bot <cherrypick_bot@qt-project.org>2021-07-14 18:59:11 +0000
commit39da496b5e12a0d0505beb9dd04b835f54ed0992 (patch)
tree1282a72e666bb78d7dbae0d3816c59057f5f7f35 /util
parentc12e241373fce9925d2c062448eceb92188ad0e8 (diff)
Make locale ordering transitive
The ordering function used to sort the locale data generated for QLocale attempted to sort the default territory for a given language and script before other territories, but was too tangled for it to be obvious this is what it was doing. The result turned out to be non-transitive. Replace with code that implements the same preference but only applies it where the result is compatible with transitivity. This leads to a shuffling of the order of the Serbian-language locales, which sorts the Cyrillic ones before the Latin ones. This is consistent with my reading of the CLDR data, which fills in Cyrillic and Serbia for Serbian; Serbian/Cyrillic/Serbia did previously sort before all other Serbian variants. Thanks to Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io> for discovering the non-transitivity. Change-Id: I0ce9f78e620e714f980f32b85b7100ed0f92ad74 Reviewed-by: Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io> Reviewed-by: Cristian Maureira-Fredes <cristian.maureira-fredes@qt.io> (cherry picked from commit 7dec56c6a5ee985f91dd2197225131c028b33650) Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
Diffstat (limited to 'util')
-rwxr-xr-xutil/locale_database/qlocalexml2cpp.py63
1 files changed, 36 insertions, 27 deletions
diff --git a/util/locale_database/qlocalexml2cpp.py b/util/locale_database/qlocalexml2cpp.py
index 8194b15755..65ef43d693 100755
--- a/util/locale_database/qlocalexml2cpp.py
+++ b/util/locale_database/qlocalexml2cpp.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python2
#############################################################################
##
-## Copyright (C) 2020 The Qt Company Ltd.
+## Copyright (C) 2021 The Qt Company Ltd.
## Contact: https://www.qt.io/licensing/
##
## This file is part of the test suite of the Qt Toolkit.
@@ -39,40 +39,49 @@ import datetime
from qlocalexml import QLocaleXmlReader
from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor
-def compareLocaleKeys(key1, key2):
- if key1 == key2:
+def compareLocaleKeys(left, right):
+ """Compares two (language, script, territory) triples.
+
+ Returns a negative value if left should sort before right,
+ positive if left should sort after right and zero if they are
+ equal.
+
+ Loosely, it sorts by (language, script, territory) *but* sorts a
+ triple with the default territory for its language and script
+ before all other triples for that language, unless they meet the
+ same condition. In the case of the same language in two scripts,
+ if just one triple does have its default territory then it wins;
+ but if both have their respective default the special treatment of
+ default territory is skipped."""
+
+ # TODO: study the relationship between this and CLDR's likely
+ # sub-tags algorithm. Work out how locale sort-order impacts
+ # QLocale's likely sub-tag matching algorithms. Make sure this is
+ # sorting in an order compatible with those algorithms.
+ # TODO: should we compare territory before or after script ?
+
+ if left == right:
return 0
- if key1[0] != key2[0]: # First sort by language:
- return key1[0] - key2[0]
+ if left[0] != right[0]: # First sort by language:
+ return left[0] - right[0]
defaults = compareLocaleKeys.default_map
# maps {(language, script): territory} by ID
- try:
- territory = defaults[key1[:2]]
- except KeyError:
- pass
- else:
- if key1[2] == territory:
- return -1
- if key2[2] == territory:
- return 1
-
- if key1[1] == key2[1]:
- return key1[2] - key2[2]
+ leftLand = defaults.get(left[:2])
+ rightLand = defaults.get(right[:2])
- try:
- territory = defaults[key2[:2]]
- except KeyError:
- pass
- else:
- if key2[2] == territory:
+ # If just one matches its default territory, it wins:
+ if leftLand is None or left[2] != leftLand:
+ if rightLand is not None and right[2] == rightLand:
return 1
- if key1[2] == territory:
- return -1
-
- return key1[1] - key2[1]
+ # else: Neither matches
+ elif rightLand is None or right[2] != rightLand:
+ return -1
+ # else: Both match
+ # Compare script first, territory after:
+ return left[1] - right[1] or left[2] - right[2]
class StringDataToken:
def __init__(self, index, length, bits):