From ed2b110b6add650954dc102a0317c14ff826c677 Mon Sep 17 00:00:00 2001 From: Edward Welbourne Date: Mon, 13 Jan 2020 15:46:13 +0100 Subject: Allow surrogate pairs for various "single character" locale data Extract the character in its proper unicode form and encode it in a new single_character_data table of locale data. Record each entry as the range within that table that encodes it. Also added an assertion in the generator script to check that the digits CLDR gives us are a contiguous sequence in increasing order, as has been assumed by the C++ code for some time. Lots of number-formatting code now has to take account of how wide the digits are. This leaves nowhere for updateSystemPrivate() to record values read from sys_locale->query(), so we must always consult that function when accessing these members of the systemData() object. Various internal users of these single-character fields need the system-or-CLDR value rather than the raw CLDR value, so move QLocalePrivate's methods to supply them down to QLocaleData and ensure they check for system values, where appropriate first. This allows us to finally support the Chakma language and script, for whose number system UTF-16 needs surrogate pairs. Costs 10.8 kB in added data, much of it due to adding two new locales that need surrogates to represent digits. [ChangeLog][QtCore][QLocale] Various QLocale methods that returned single QChar values now return QString values to accommodate those locales which need a surrogate pair to represent the (single character) return value. Fixes: QTBUG-69324 Fixes: QTBUG-81053 Change-Id: I481722d6f5ee266164f09031679a851dfa6e7839 Reviewed-by: Thiago Macieira --- src/corelib/text/qlocale_p.h | 97 ++++++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 43 deletions(-) (limited to 'src/corelib/text/qlocale_p.h') diff --git a/src/corelib/text/qlocale_p.h b/src/corelib/text/qlocale_p.h index 11a42b7fee..a9164c264e 100644 --- a/src/corelib/text/qlocale_p.h +++ b/src/corelib/text/qlocale_p.h @@ -221,21 +221,19 @@ public: typedef QVarLengthArray CharBuff; - static QString doubleToString(const QChar zero, const QChar plus, - const QChar minus, const QChar exponent, - const QChar group, const QChar decimal, - double d, int precision, - DoubleForm form, + static QString doubleToString(const QString &zero, const QString &plus, + const QString &minus, const QString &exponent, + const QString &group, const QString &decimal, + double d, int precision, DoubleForm form, int width, unsigned flags); - static QString longLongToString(const QChar zero, const QChar group, - const QChar plus, const QChar minus, + static QString longLongToString(const QString &zero, const QString &group, + const QString &plus, const QString &minus, qint64 l, int precision, int base, int width, unsigned flags); - static QString unsLongLongToString(const QChar zero, const QChar group, - const QChar plus, + static QString unsLongLongToString(const QString &zero, const QString &group, + const QString &plus, quint64 l, int precision, - int base, int width, - unsigned flags); + int base, int width, unsigned flags); QString doubleToString(double d, int precision = -1, @@ -282,12 +280,22 @@ public: bool numberToCLocale(QStringView s, QLocale::NumberOptions number_options, CharBuff *result) const; - inline char digitToCLocale(QChar c) const; + inline char numericToCLocale(QStringView in) const; // this function is used in QIntValidator (QtGui) Q_CORE_EXPORT bool validateChars(QStringView str, NumberMode numMode, QByteArray *buff, int decDigits = -1, QLocale::NumberOptions number_options = QLocale::DefaultNumberOptions) const; + QString decimalPoint() const; + QString groupSeparator() const; + QString listSeparator() const; + QString percentSign() const; + QString zeroDigit() const; + uint zeroUcs() const; + QString positiveSign() const; + QString negativeSign() const; + QString exponentSeparator() const; + struct DataRange { quint16 offset; @@ -310,6 +318,14 @@ public: { return listEntry(table, index).viewData(table); } + uint ucsFirst(const ushort *table) const + { + if (size && !QChar::isSurrogate(table[offset])) + return table[offset]; + if (size > 1 && QChar::isHighSurrogate(table[offset])) + return QChar::surrogateToUcs4(table[offset], table[offset + 1]); + return 0; + } private: DataRange listEntry(const ushort *table, int index) const { @@ -328,7 +344,9 @@ public: }; #define ForEachQLocaleRange(X) \ - X(startListPattern) X(midListPattern) X(endListPattern) X(pairListPattern) \ + X(startListPattern) X(midListPattern) X(endListPattern) X(pairListPattern) X(listDelimit) \ + X(decimalSeparator) X(groupDelim) X(percent) X(zero) X(minus) X(plus) X(exponential) \ + X(quoteStart) X(quoteEnd) X(quoteStartAlternate) X(quoteEndAlternate) \ X(longDateFormat) X(shortDateFormat) X(longTimeFormat) X(shortTimeFormat) \ X(longDayNamesStandalone) X(longDayNames) \ X(shortDayNamesStandalone) X(shortDayNames) \ @@ -347,11 +365,6 @@ public: public: quint16 m_language_id, m_script_id, m_country_id; - // FIXME QTBUG-69324: not all unicode code-points map to single-token UTF-16 :-( - char16_t m_decimal, m_group, m_list, m_percent, m_zero, m_minus, m_plus, m_exponential; - char16_t m_quotation_start, m_quotation_end; - char16_t m_alternate_quotation_start, m_alternate_quotation_end; - // Offsets, then sizes, for each range: #define rangeIndex(name) quint16 m_ ## name ## _idx; ForEachQLocaleRange(rangeIndex) @@ -389,15 +402,6 @@ public: static QLocalePrivate *get(QLocale &l) { return l.d; } static const QLocalePrivate *get(const QLocale &l) { return l.d; } - QChar decimal() const { return QChar(m_data->m_decimal); } - QChar group() const { return QChar(m_data->m_group); } - QChar list() const { return QChar(m_data->m_list); } - QChar percent() const { return QChar(m_data->m_percent); } - QChar zero() const { return QChar(m_data->m_zero); } - QChar plus() const { return QChar(m_data->m_plus); } - QChar minus() const { return QChar(m_data->m_minus); } - QChar exponential() const { return QChar(m_data->m_exponential); } - quint16 languageId() const { return m_data->m_language_id; } quint16 countryId() const { return m_data->m_country_id; } @@ -437,37 +441,44 @@ inline QLocalePrivate *QSharedDataPointer::clone() return QLocalePrivate::create(d->m_data, d->m_data_offset, d->m_numberOptions); } -inline char QLocaleData::digitToCLocale(QChar in) const +inline char QLocaleData::numericToCLocale(QStringView in) const { - const ushort tenUnicode = m_zero + 10; - - if (in.unicode() >= m_zero && in.unicode() < tenUnicode) - return '0' + in.unicode() - m_zero; - - if (in.unicode() >= '0' && in.unicode() <= '9') - return in.toLatin1(); + Q_ASSERT(in.size() == 1 || (in.size() == 2 && in.at(0).isHighSurrogate())); - if (in == m_plus || in == QLatin1Char('+')) + if (in == positiveSign() || in == u"+") return '+'; - if (in == m_minus || in == QLatin1Char('-') || in == QChar(0x2212)) + if (in == negativeSign() || in == u"-" || in == u"\x2212") return '-'; - if (in == m_decimal) + if (in == decimalPoint()) return '.'; - if (in == m_group) - return ','; - - if (in == m_exponential || in.toCaseFolded().unicode() == QChar::toCaseFolded(m_exponential)) + if (in.compare(exponentSeparator(), Qt::CaseInsensitive) == 0) return 'e'; + const QString group = groupSeparator(); + if (in == group) + return ','; + // In several languages group() is a non-breaking space (U+00A0) or its thin // version (U+202f), which look like spaces. People (and thus some of our // tests) use a regular space instead and complain if it doesn't work. - if ((m_group == 0xA0 || m_group == 0x202f) && in.unicode() == ' ') + // Should this be extended generally to any case where group is a space ? + if ((group == u"\xa0" || group == u"\x202f") && in == u" ") return ','; + const uint zeroUcs4 = zeroUcs(); + const uint tenUcs4 = zeroUcs4 + 10; + const uint inUcs4 = in.size() == 2 + ? QChar::surrogateToUcs4(in.at(0), in.at(1)) : in.at(0).unicode(); + + if (zeroUcs4 <= inUcs4 && inUcs4 < tenUcs4) + return '0' + inUcs4 - zeroUcs4; + + if ('0' <= inUcs4 && inUcs4 <= '9') + return inUcs4; + return 0; } -- cgit v1.2.3