From d8538163075a2058f110bc77e7d481ef1ab7612c Mon Sep 17 00:00:00 2001 From: Edward Welbourne Date: Fri, 3 Jul 2020 14:13:15 +0200 Subject: Fix handling of Suzhou numbering system This only arises when the system locale tells us to use its zero as our zero digit, since no CLDR locale uses it by default. Adapt an MS-specific QLocale::system() test to use Suzhou numbering, so as to test this. While updating the locale-restoration code to also restore the digits being set in that test, add restore code for the long time format, where previously only the short time format was restored. Add a comment to make it less likely one of those shall be missed in future. Fixes: QTBUG-85409 Change-Id: I343324bb563ee0e455dfe77d4825bf8c3082ca30 Reviewed-by: Thiago Macieira --- src/corelib/text/qlocale.cpp | 7 ++-- src/corelib/text/qlocale_p.h | 20 +++++++---- src/corelib/text/qlocale_tools.cpp | 4 +-- src/corelib/text/qlocale_tools_p.h | 17 +++++++++ src/corelib/text/qlocale_win.cpp | 16 +++++---- tests/auto/corelib/text/qlocale/tst_qlocale.cpp | 46 +++++++++++++++++-------- util/locale_database/ldml.py | 6 ++-- 7 files changed, 81 insertions(+), 35 deletions(-) diff --git a/src/corelib/text/qlocale.cpp b/src/corelib/text/qlocale.cpp index 5bf2f7b296..2febee62a9 100644 --- a/src/corelib/text/qlocale.cpp +++ b/src/corelib/text/qlocale.cpp @@ -3363,7 +3363,7 @@ QString QLocaleData::doubleToString(double d, int precision, DoubleForm form, QString converted; converted.reserve(2 * digits.size()); for (int i = 0; i < digits.length(); ++i) { - const uint digit = zeroUcs4 - '0' + digits.at(i).unicode(); + const uint digit = unicodeForDigit(digits.at(i).unicode() - '0', zeroUcs4); Q_ASSERT(QChar::requiresSurrogates(digit)); converted.append(QChar::highSurrogate(digit)); converted.append(QChar::lowSurrogate(digit)); @@ -3372,9 +3372,10 @@ QString QLocaleData::doubleToString(double d, int precision, DoubleForm form, } else { Q_ASSERT(zero.size() == 1); Q_ASSERT(!zero.at(0).isSurrogate()); - ushort z = zero.at(0).unicode() - '0'; + ushort z = zero.at(0).unicode(); + ushort *const value = reinterpret_cast(digits.data()); for (int i = 0; i < digits.length(); ++i) - reinterpret_cast(digits.data())[i] += z; + value[i] = unicodeForDigit(value[i] - '0', z); } const bool mustMarkDecimal = flags & ForcePoint; diff --git a/src/corelib/text/qlocale_p.h b/src/corelib/text/qlocale_p.h index 818fdf27a1..d822f86b72 100644 --- a/src/corelib/text/qlocale_p.h +++ b/src/corelib/text/qlocale_p.h @@ -463,14 +463,22 @@ inline char QLocaleData::numericToCLocale(QStringView in) const if ((group == u"\xa0" || group == u"\x202f") && in == u" ") return ','; - const uint zeroUcs4 = zeroUcs(); - const uint tenUcs4 = zeroUcs4 + 10; const uint inUcs4 = in.size() == 2 ? QChar::surrogateToUcs4(in.at(0), in.at(1)) : in.at(0).unicode(); - - if (zeroUcs4 <= inUcs4 && inUcs4 < tenUcs4) - return '0' + inUcs4 - zeroUcs4; - + const uint zeroUcs4 = zeroUcs(); + // Must match qlocale_tools.h's unicodeForDigit() + if (zeroUcs4 == 0x3007u) { + // QTBUG-85409: Suzhou's digits aren't contiguous ! + if (inUcs4 == zeroUcs4) + return '0'; + if (inUcs4 > 0x3020u && inUcs4 <= 0x3029u) + return inUcs4 - 0x3020u; + } else { + const uint tenUcs4 = zeroUcs4 + 10; + + if (zeroUcs4 <= inUcs4 && inUcs4 < tenUcs4) + return '0' + inUcs4 - zeroUcs4; + } if ('0' <= inUcs4 && inUcs4 <= '9') return inUcs4; diff --git a/src/corelib/text/qlocale_tools.cpp b/src/corelib/text/qlocale_tools.cpp index 9466edb234..4f3912224b 100644 --- a/src/corelib/text/qlocale_tools.cpp +++ b/src/corelib/text/qlocale_tools.cpp @@ -447,14 +447,14 @@ QString qulltoa(qulonglong number, int base, const QStringView zero) } else if (zero.size() && !zero.at(0).isSurrogate()) { const ushort zeroUcs4 = zero.at(0).unicode(); while (number != 0) { - *(--p) = zeroUcs4 + number % base; + *(--p) = unicodeForDigit(number % base, zeroUcs4); number /= base; } } else if (zero.size() == 2 && zero.at(0).isHighSurrogate()) { const uint zeroUcs4 = QChar::surrogateToUcs4(zero.at(0), zero.at(1)); while (number != 0) { - const uint digit = zeroUcs4 + number % base; + const uint digit = unicodeForDigit(number % base, zeroUcs4); *(--p) = QChar::lowSurrogate(digit); *(--p) = QChar::highSurrogate(digit); diff --git a/src/corelib/text/qlocale_tools_p.h b/src/corelib/text/qlocale_tools_p.h index e2e60b159d..17bfcb2e36 100644 --- a/src/corelib/text/qlocale_tools_p.h +++ b/src/corelib/text/qlocale_tools_p.h @@ -89,6 +89,23 @@ inline int wholePartSpace(double d) return d > (1 << 19) ? std::numeric_limits::max_exponent10 + 1 : 6; } +// Returns code-point of same kind (UCS2 or UCS4) as zero; digit is 0 through 9 +template +inline UcsInt unicodeForDigit(uint digit, UcsInt zero) +{ + // Must match QLocaleData::numericToCLocale()'s digit-digestion. + Q_ASSERT(digit < 10); + if (!digit) + return zero; + + // See QTBUG-85409: Suzhou's digits are U+3007, U+2021, ..., U+3029 + if (zero == 0x3007u) + return 0x3020u + digit; + // At CLDR 36.1, no other number system's digits were discontinuous. + + return zero + digit; +} + Q_CORE_EXPORT double qstrtod(const char *s00, char const **se, bool *ok); Q_CORE_EXPORT double qstrntod(const char *s00, int len, char const **se, bool *ok); qlonglong qstrtoll(const char *nptr, const char **endptr, int base, bool *ok); diff --git a/src/corelib/text/qlocale_win.cpp b/src/corelib/text/qlocale_win.cpp index a1a1e1c43c..d74ae40313 100644 --- a/src/corelib/text/qlocale_win.cpp +++ b/src/corelib/text/qlocale_win.cpp @@ -235,23 +235,24 @@ QString QSystemLocalePrivate::substituteDigits(QString &&string) zeroDigit(); // Ensure zero is set. switch (zero.size()) { case 1: { - const ushort offset = zero.at(0).unicode() - '0'; - if (!offset) // Nothing to do + ushort z = zero.at(0).unicode(); + if (z == '0') // Nothing to do break; - Q_ASSERT(offset > 9); + Q_ASSERT(z > '9'); ushort *const qch = reinterpret_cast(string.data()); for (int i = 0, stop = string.size(); i < stop; ++i) { ushort &ch = qch[i]; if (ch >= '0' && ch <= '9') - ch += offset; + ch = unicodeForDigit(ch - '0', z); } break; } case 2: { // Surrogate pair (high, low): - uint digit = QChar::surrogateToUcs4(zero.at(0), zero.at(1)); + uint z = QChar::surrogateToUcs4(zero.at(0), zero.at(1)); for (int i = 0; i < 10; i++) { - const QChar s[2] = { QChar::highSurrogate(digit + i), QChar::lowSurrogate(digit + i) }; + uint digit = unicodeForDigit(i, z); + const QChar s[2] = { QChar::highSurrogate(digit), QChar::lowSurrogate(digit) }; string.replace(QString(QLatin1Char('0' + i)), QString(s, 2)); } break; @@ -276,7 +277,8 @@ QVariant QSystemLocalePrivate::zeroDigit() */ wchar_t digits[11]; if (getLocaleInfo(LOCALE_SNATIVEDIGITS, digits, 11)) { - // assert all(digits[i] == i + digits[0] for i in range(1, 10)), assumed above + // assert all(digits[i] == i + digits[0] for i in range(1, 10)), + // assumed above (unless digits[0] is 0x3007; see QTBUG-85409). zero = QString::fromWCharArray(digits, 1); } } diff --git a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp index 1d43e35959..52752690da 100644 --- a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp +++ b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp @@ -2072,7 +2072,7 @@ static QString getWinLocaleInfo(LCTYPE type) return QString::fromWCharArray(buf.data()); } -static void setWinLocaleInfo(LCTYPE type, const QString &value) +static void setWinLocaleInfo(LCTYPE type, QStringView value) { LCID id = GetThreadLocale(); SetLocaleInfo(id, type, reinterpret_cast(value.utf16())); @@ -2091,7 +2091,10 @@ public: m_thousand = getWinLocaleInfo(LOCALE_STHOUSAND); m_sdate = getWinLocaleInfo(LOCALE_SSHORTDATE); m_ldate = getWinLocaleInfo(LOCALE_SLONGDATE); - m_time = getWinLocaleInfo(LOCALE_SSHORTTIME); + m_stime = getWinLocaleInfo(LOCALE_SSHORTTIME); + m_ltime = getWinLocaleInfo(LOCALE_STIMEFORMAT); + m_digits = getWinLocaleInfo(LOCALE_SNATIVEDIGITS); + m_subst = getWinLocaleInfo(LOCALE_IDIGITSUBSTITUTION); } ~RestoreLocaleHelper() @@ -2101,20 +2104,23 @@ public: setWinLocaleInfo(LOCALE_STHOUSAND, m_thousand); setWinLocaleInfo(LOCALE_SSHORTDATE, m_sdate); setWinLocaleInfo(LOCALE_SLONGDATE, m_ldate); - setWinLocaleInfo(LOCALE_SSHORTTIME, m_time); + setWinLocaleInfo(LOCALE_SSHORTTIME, m_stime); + setWinLocaleInfo(LOCALE_STIMEFORMAT, m_ltime); + setWinLocaleInfo(LOCALE_SNATIVEDIGITS, m_digits); + setWinLocaleInfo(LOCALE_IDIGITSUBSTITUTION, m_subst); QSystemLocale dummy; // to provoke a refresh of the system locale } - QString m_decimal, m_thousand, m_sdate, m_ldate, m_time; + QString m_decimal, m_thousand, m_sdate, m_ldate, m_stime, m_ltime, m_digits, m_subst; }; void tst_QLocale::windowsDefaultLocale() { RestoreLocaleHelper systemLocale; // set weird system defaults and make sure we're using them - setWinLocaleInfo(LOCALE_SDECIMAL, QLatin1String("@")); - setWinLocaleInfo(LOCALE_STHOUSAND, QLatin1String("?")); + setWinLocaleInfo(LOCALE_SDECIMAL, u"@"); + setWinLocaleInfo(LOCALE_STHOUSAND, u"?"); const QString shortDateFormat = QStringLiteral("d*M*yyyy"); setWinLocaleInfo(LOCALE_SSHORTDATE, shortDateFormat); const QString longDateFormat = QStringLiteral("d@M@yyyy"); @@ -2123,11 +2129,17 @@ void tst_QLocale::windowsDefaultLocale() setWinLocaleInfo(LOCALE_SSHORTTIME, shortTimeFormat); const QString longTimeFormat = QStringLiteral("HH%mm%ss"); setWinLocaleInfo(LOCALE_STIMEFORMAT, longTimeFormat); + // Suzhou numerals (QTBUG-85409): + const QStringView digits = u"\u3007\u3021\u3022\u3023\u3024\u3025\u3026\u3027\u3028\u3029"; + setWinLocaleInfo(LOCALE_SNATIVEDIGITS, digits); + setWinLocaleInfo(LOCALE_IDIGITSUBSTITUTION, u"2"); + // NB: when adding to the system things being set, be sure to update RestoreLocaleHelper, too. QSystemLocale dummy; // to provoke a refresh of the system locale QLocale locale = QLocale::system(); - // make sure we are seeing the system's format strings + // Make sure we are seeing the system's format strings + QCOMPARE(locale.zeroDigit(), QStringView(u"\u3007")); QCOMPARE(locale.decimalPoint(), QStringView(u"@")); QCOMPARE(locale.groupSeparator(), QStringView(u"?")); QCOMPARE(locale.dateFormat(QLocale::ShortFormat), shortDateFormat); @@ -2140,24 +2152,28 @@ void tst_QLocale::windowsDefaultLocale() QCOMPARE(locale.dateTimeFormat(QLocale::LongFormat), expectedLongDateTimeFormat); // make sure we are using the system to parse them - QCOMPARE(locale.toString(1234.56), QString("1?234@56")); - QCOMPARE(locale.toString(QDate(1974, 12, 1), QLocale::ShortFormat), QString("1*12*1974")); + QCOMPARE(locale.toString(1234.56), QStringView(u"\u3021?\u3022\u3023\u3024@\u3025\u3026")); + QCOMPARE(locale.toString(QDate(1974, 12, 1), QLocale::ShortFormat), + QStringView(u"\u3021*\u3021\u3022*\u3021\u3029\u3027\u3024")); QCOMPARE(locale.toString(QDate(1974, 12, 1), QLocale::NarrowFormat), locale.toString(QDate(1974, 12, 1), QLocale::ShortFormat)); - QCOMPARE(locale.toString(QDate(1974, 12, 1), QLocale::LongFormat), QString("1@12@1974")); - const QString expectedFormattedShortTimeSeconds = QStringLiteral("1^2^3"); - const QString expectedFormattedShortTime = QStringLiteral("1^2"); + QCOMPARE(locale.toString(QDate(1974, 12, 1), QLocale::LongFormat), + QStringView(u"\u3021@\u3021\u3022@\u3021\u3029\u3027\u3024")); + const QString expectedFormattedShortTime = QStringView(u"\u3021^\u3022").toString(); QCOMPARE(locale.toString(QTime(1,2,3), QLocale::ShortFormat), expectedFormattedShortTime); QCOMPARE(locale.toString(QTime(1,2,3), QLocale::NarrowFormat), locale.toString(QTime(1,2,3), QLocale::ShortFormat)); - const QString expectedFormattedLongTime = QStringLiteral("01%02%03"); + const QString expectedFormattedLongTime + = QStringView(u"\u3007\u3021%\u3007\u3022%\u3007\u3023").toString(); QCOMPARE(locale.toString(QTime(1,2,3), QLocale::LongFormat), expectedFormattedLongTime); QCOMPARE(locale.toString(QDateTime(QDate(1974, 12, 1), QTime(1,2,3)), QLocale::ShortFormat), - QStringLiteral("1*12*1974 ") + expectedFormattedShortTime); + QStringView(u"\u3021*\u3021\u3022*\u3021\u3029\u3027\u3024 ").toString() + + expectedFormattedShortTime); QCOMPARE(locale.toString(QDateTime(QDate(1974, 12, 1), QTime(1,2,3)), QLocale::NarrowFormat), locale.toString(QDateTime(QDate(1974, 12, 1), QTime(1,2,3)), QLocale::ShortFormat)); QCOMPARE(locale.toString(QDateTime(QDate(1974, 12, 1), QTime(1,2,3)), QLocale::LongFormat), - QStringLiteral("1@12@1974 ") + expectedFormattedLongTime); + QStringView(u"\u3021@\u3021\u3022@\u3021\u3029\u3027\u3024 ").toString() + + expectedFormattedLongTime); } #endif // Q_OS_WIN diff --git a/util/locale_database/ldml.py b/util/locale_database/ldml.py index 0f1cefc30c..0f08884d4b 100644 --- a/util/locale_database/ldml.py +++ b/util/locale_database/ldml.py @@ -299,8 +299,10 @@ class LocaleScanner (object): digits = lookup(system)['digits'] assert len(digits) == 10 zero = digits[0] - # Qt's number-formatting code assumes digits are consecutive: - assert all(ord(c) == i for i, c in enumerate(digits, ord(zero))) + # Qt's number-formatting code assumes digits are consecutive + # (except Suzhou, CLDR's hanidec - see QTBUG-85409): + assert all(ord(c) == i + (0x3020 if ord(zero) == 0x3007 else ord(zero)) + for i, c in enumerate(digits[1:], 1)) yield 'zero', zero plus = self.find(stem + 'plusSign') -- cgit v1.2.3