summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEdward Welbourne <edward.welbourne@qt.io>2020-04-16 16:20:43 +0200
committerEdward Welbourne <edward.welbourne@qt.io>2020-04-17 13:43:50 +0200
commit300aaec2f9b72158aa5b9b26a4662b6fa635eec1 (patch)
treec5399b7adc20d5a4a94b72f7c147a3277f050089
parentad5aee2e34fad45d1d90bb059fa00a791d4ba3e2 (diff)
Fix digit grouping when digits are surrogat pairs
This is a follow-up to commit ed2b110b6add650954dc102a0317c14ff826c677 to fix indexing errors. Added the test that should have accompanied that commit, which found some bugs, and refined the Indian number formatting test (on which it's based). Make variable i in the loops that insert grouping characters in a number be consistently a *character* offset - which, when each digit is a surrogate pair, isn't the same as an index into the QString. Apply the needed scaling when indexing with it, not when setting it or decrementing it. Don't assume the separator has the same width as a digit. Differences in index no longer give the number of digits between two points in a string, so actively track how many digits we've seen in a group when converting a numeric string to the C locale. Partially cleaned up the code for that in the process (more shall follow when I sort out digit grouping properly, without special-casing India). Change-Id: I13d0f24efa26e599dfefb5733e062088fa56d375 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r--src/corelib/text/qlocale.cpp54
-rw-r--r--tests/auto/corelib/text/qlocale/tst_qlocale.cpp160
2 files changed, 153 insertions, 61 deletions
diff --git a/src/corelib/text/qlocale.cpp b/src/corelib/text/qlocale.cpp
index 258e90a06c..467fd31ab6 100644
--- a/src/corelib/text/qlocale.cpp
+++ b/src/corelib/text/qlocale.cpp
@@ -3712,15 +3712,16 @@ QT_WARNING_POP
uint cnt_thousand_sep = 0;
if (base == 10) {
if (flags & ThousandsGroup) {
- for (int i = num_str.length() / digitWidth - 3; i > 0; i -= 3 * digitWidth) {
- num_str.insert(i, group);
+ for (int i = num_str.length() / digitWidth - 3; i > 0; i -= 3) {
+ num_str.insert(i * digitWidth, group);
++cnt_thousand_sep;
}
} else if (flags & IndianNumberGrouping) {
- if (num_str.length() > 3 * digitWidth)
- num_str.insert(num_str.length() - 3 * digitWidth , group);
- for (int i = num_str.length() - 6 * digitWidth; i > 0; i -= 2 * digitWidth) {
- num_str.insert(i, group);
+ const int size = num_str.length();
+ if (size > 3 * digitWidth)
+ num_str.insert(size - 3 * digitWidth , group);
+ for (int i = size / digitWidth - 5; i > 0; i -= 2) {
+ num_str.insert(i * digitWidth, group);
++cnt_thousand_sep;
}
}
@@ -3807,15 +3808,16 @@ QString QLocaleData::unsLongLongToString(const QString &zero, const QString &gro
uint cnt_thousand_sep = 0;
if (base == 10) {
if (flags & ThousandsGroup) {
- for (int i = num_str.length() - 3 * digitWidth; i > 0; i -= 3 * digitWidth) {
- num_str.insert(i, group);
+ for (int i = num_str.length() / digitWidth - 3; i > 0; i -= 3) {
+ num_str.insert(i * digitWidth, group);
++cnt_thousand_sep;
}
} else if (flags & IndianNumberGrouping) {
- if (num_str.length() > 3 * digitWidth)
- num_str.insert(num_str.length() - 3 * digitWidth , group);
- for (int i = num_str.length() - 6 * digitWidth; i > 0; i -= 2 * digitWidth) {
- num_str.insert(i, group);
+ const int size = num_str.length();
+ if (size > 3 * digitWidth)
+ num_str.insert(size - 3 * digitWidth , group);
+ for (int i = size / digitWidth - 5; i > 0; i -= 2) {
+ num_str.insert(i * digitWidth, group);
++cnt_thousand_sep;
}
}
@@ -3886,6 +3888,8 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o
auto length = s.size();
decltype(length) idx = 0;
+ const int leadingGroupWidth = (m_country_id == QLocale::India ? 2 : 3);
+ int digitsInGroup = 0;
int group_cnt = 0; // counts number of group chars
int decpt_idx = -1;
int last_separator_idx = -1;
@@ -3937,26 +3941,26 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o
if (!(number_options & QLocale::RejectGroupSeparator)) {
if (start_of_digits_idx == -1 && out >= '0' && out <= '9') {
start_of_digits_idx = idx;
+ digitsInGroup++;
} else if (out == ',') {
// Don't allow group chars after the decimal point or exponent
if (decpt_idx != -1 || exponent_idx != -1)
return false;
- // check distance from the last separator or from the beginning of the digits
- // ### FIXME: Some locales allow other groupings!
- // See https://en.wikipedia.org/wiki/Thousands_separator
- if (m_country_id == QLocale::India) {
- if (last_separator_idx != -1 && idx - last_separator_idx != 3)
+ if (last_separator_idx == -1) {
+ if (start_of_digits_idx == -1 || digitsInGroup > leadingGroupWidth)
+ return false;
+ } else {
+ // check distance from the last separator or from the beginning of the digits
+ // ### FIXME: Some locales allow other groupings!
+ // See https://en.wikipedia.org/wiki/Thousands_separator
+ if (digitsInGroup != leadingGroupWidth)
return false;
- } else if (last_separator_idx != -1 && idx - last_separator_idx != 4)
- return false;
- if (last_separator_idx == -1
- && (start_of_digits_idx == -1 || idx - start_of_digits_idx > 3)) {
- return false;
}
last_separator_idx = idx;
++group_cnt;
+ digitsInGroup = 0;
// don't add the group separator
idx += in.size();
@@ -3965,11 +3969,13 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o
// check distance from the last separator
// ### FIXME: Some locales allow other groupings!
// See https://en.wikipedia.org/wiki/Thousands_separator
- if (last_separator_idx != -1 && idx - last_separator_idx != 4)
+ if (last_separator_idx != -1 && digitsInGroup != 3)
return false;
// stop processing separators
last_separator_idx = -1;
+ } else if (out >= '0' && out <= '9') {
+ digitsInGroup++;
}
}
@@ -3983,7 +3989,7 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o
if (last_separator_idx + 1 == idx)
return false;
// were there enough digits since the last separator?
- if (last_separator_idx != -1 && idx - last_separator_idx != 4)
+ if (last_separator_idx != -1 && digitsInGroup != 3)
return false;
}
diff --git a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp
index 2b06e60c44..e1f9d67ec2 100644
--- a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp
+++ b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp
@@ -149,7 +149,8 @@ private slots:
void systemLocale_data();
void systemLocale();
- void IndianNumberGrouping();
+ void numberGroupingIndia();
+ void numberFormatChakma();
// *** ORDER-DEPENDENCY *** (This Is Bad.)
// Test order is determined by order of declaration here: *all* tests that
@@ -3020,56 +3021,141 @@ void tst_QLocale::systemLocale()
QCOMPARE(QLocale::system(), originalSystemLocale);
}
-void tst_QLocale::IndianNumberGrouping()
+void tst_QLocale::numberGroupingIndia()
{
- QLocale indian(QLocale::Hindi, QLocale::India);
+ const QLocale indian(QLocale::Hindi, QLocale::India);
- qint8 int8 = 100;
- QString strResult8("100");
+ // A 7-bit value (fits in signed 8-bit):
+ const QString strResult8("120");
+ const qint8 int8 = 120;
QCOMPARE(indian.toString(int8), strResult8);
QCOMPARE(indian.toShort(strResult8), short(int8));
- quint8 uint8 = 100;
+ const quint8 uint8 = 120u;
QCOMPARE(indian.toString(uint8), strResult8);
QCOMPARE(indian.toShort(strResult8), short(uint8));
- // Boundary case 1000 for short and ushort
- short shortInt = 1000;
- QString strResult16("1,000");
- QCOMPARE(indian.toString(shortInt), strResult16);
- QCOMPARE(indian.toShort(strResult16), shortInt);
-
- ushort uShortInt = 1000;
- QCOMPARE(indian.toString(uShortInt), strResult16);
- QCOMPARE(indian.toUShort(strResult16), uShortInt);
-
- shortInt = 10000;
- strResult16 = "10,000";
- QCOMPARE(indian.toString(shortInt), strResult16);
- QCOMPARE(indian.toShort(strResult16), shortInt);
-
- uShortInt = 10000;
- QCOMPARE(indian.toString(uShortInt), strResult16);
- QCOMPARE(indian.toUShort(strResult16), uShortInt);
-
- int intInt = 1000000000;
- QString strResult32("1,00,00,00,000");
- QCOMPARE(indian.toString(intInt), strResult32);
- QCOMPARE(indian.toInt(strResult32), intInt);
-
- uint uIntInt = 1000000000;
- QCOMPARE(indian.toString(uIntInt), strResult32);
- QCOMPARE(indian.toUInt(strResult32), uIntInt);
-
- QString strResult64("10,00,00,00,00,00,00,00,000");
- qint64 int64 = Q_INT64_C(1000000000000000000);
+ // Boundary case for needing a first separator:
+ const QString strResultSep("3,210");
+ const short shortSep = 3210;
+ QCOMPARE(indian.toString(shortSep), strResultSep);
+ QCOMPARE(indian.toShort(strResultSep), shortSep);
+
+ const ushort uShortSep = 3210u;
+ QCOMPARE(indian.toString(uShortSep), strResultSep);
+ QCOMPARE(indian.toUShort(strResultSep), uShortSep);
+
+ // 15-bit:
+ const QString strResult16("24,310");
+ const short short16 = 24310;
+ QCOMPARE(indian.toString(short16), strResult16);
+ QCOMPARE(indian.toShort(strResult16), short16);
+
+ const ushort uShort16 = 24310u;
+ QCOMPARE(indian.toString(uShort16), strResult16);
+ QCOMPARE(indian.toUShort(strResult16), uShort16);
+
+ // 31-bit
+ const QString strResult32("2,03,04,05,010");
+ const int integer32 = 2030405010;
+ QCOMPARE(indian.toString(integer32), strResult32);
+ QCOMPARE(indian.toInt(strResult32), integer32);
+
+ const uint uInteger32 = 2030405010u;
+ QCOMPARE(indian.toString(uInteger32), strResult32);
+ QCOMPARE(indian.toUInt(strResult32), uInteger32);
+
+ // 63-bit:
+ const QString strResult64("60,05,00,40,03,00,20,01,000");
+ const qint64 int64 = Q_INT64_C(6005004003002001000);
QCOMPARE(indian.toString(int64), strResult64);
QCOMPARE(indian.toLongLong(strResult64), int64);
- quint64 uint64 = Q_UINT64_C(1000000000000000000);
+ const quint64 uint64 = Q_UINT64_C(6005004003002001000);
QCOMPARE(indian.toString(uint64), strResult64);
QCOMPARE(indian.toULongLong(strResult64), uint64);
}
+void tst_QLocale::numberFormatChakma()
+{
+ // Initially India's flavour, since the number formatting is currently only
+ // done right for India. Should change to Bangladesh once that's fixed.
+ const QLocale chakma(QLocale::Chakma, QLocale::ChakmaScript, QLocale::India);
+ const uint zeroVal = 0x11136; // Unicode's representation of Chakma zero
+ const QChar data[] = {
+ QChar::highSurrogate(zeroVal), QChar::lowSurrogate(zeroVal),
+ QChar::highSurrogate(zeroVal + 1), QChar::lowSurrogate(zeroVal + 1),
+ QChar::highSurrogate(zeroVal + 2), QChar::lowSurrogate(zeroVal + 2),
+ QChar::highSurrogate(zeroVal + 3), QChar::lowSurrogate(zeroVal + 3),
+ QChar::highSurrogate(zeroVal + 4), QChar::lowSurrogate(zeroVal + 4),
+ QChar::highSurrogate(zeroVal + 5), QChar::lowSurrogate(zeroVal + 5),
+ QChar::highSurrogate(zeroVal + 6), QChar::lowSurrogate(zeroVal + 6),
+ };
+ const QChar separator(QLatin1Char(','));
+ const QString
+ zero = QString::fromRawData(data, 2),
+ one = QString::fromRawData(data + 2, 2),
+ two = QString::fromRawData(data + 4, 2),
+ three = QString::fromRawData(data + 6, 2),
+ four = QString::fromRawData(data + 8, 2),
+ five = QString::fromRawData(data + 10, 2),
+ six = QString::fromRawData(data + 12, 2);
+
+ // A 7-bit value (fits in signed 8-bit):
+ const QString strResult8 = one + two + zero;
+ const qint8 int8 = 120;
+ QCOMPARE(chakma.toString(int8), strResult8);
+ QCOMPARE(chakma.toShort(strResult8), short(int8));
+
+ const quint8 uint8 = 120;
+ QCOMPARE(chakma.toString(uint8), strResult8);
+ QCOMPARE(chakma.toShort(strResult8), short(uint8));
+
+ // Boundary case for needing a first separator:
+ const QString strResultSep = three + separator + two + one + zero;
+ const short shortSep = 3210;
+ QCOMPARE(chakma.toString(shortSep), strResultSep);
+ QCOMPARE(chakma.toShort(strResultSep), shortSep);
+
+ const ushort uShortSep = 3210u;
+ QCOMPARE(chakma.toString(uShortSep), strResultSep);
+ QCOMPARE(chakma.toUShort(strResultSep), uShortSep);
+
+ // Fifteen-bit value:
+ const QString strResult16 = two + four + separator + three + one + zero;
+ const short short16 = 24310;
+ QCOMPARE(chakma.toString(short16), strResult16);
+ QCOMPARE(chakma.toShort(strResult16), short16);
+
+ const ushort uShort16 = 24310u;
+ QCOMPARE(chakma.toString(uShort16), strResult16);
+ QCOMPARE(chakma.toUShort(strResult16), uShort16);
+
+ // 31-bit
+ const QString strResult32 =
+ two + separator + zero + three + separator + zero + four
+ + separator + zero + five + separator + zero + one + zero;
+ const int integer32 = 2030405010;
+ QCOMPARE(chakma.toString(integer32), strResult32);
+ QCOMPARE(chakma.toInt(strResult32), integer32);
+
+ const uint uInteger32 = 2030405010u;
+ QCOMPARE(chakma.toString(uInteger32), strResult32);
+ QCOMPARE(chakma.toUInt(strResult32), uInteger32);
+
+ // 63-bit:
+ const QString strResult64 =
+ six + zero + separator + zero + five + separator + zero + zero + separator
+ + four + zero + separator + zero + three + separator + zero + zero + separator
+ + two + zero + separator + zero + one + separator + zero + zero + zero;
+ const qint64 int64 = Q_INT64_C(6005004003002001000);
+ QCOMPARE(chakma.toString(int64), strResult64);
+ QCOMPARE(chakma.toLongLong(strResult64), int64);
+
+ const quint64 uint64 = Q_UINT64_C(6005004003002001000);
+ QCOMPARE(chakma.toString(uint64), strResult64);
+ QCOMPARE(chakma.toULongLong(strResult64), uint64);
+}
+
QTEST_MAIN(tst_QLocale)
#include "tst_qlocale.moc"