diff options
Diffstat (limited to 'src/corelib/text/qlocale.cpp')
-rw-r--r-- | src/corelib/text/qlocale.cpp | 1840 |
1 files changed, 1115 insertions, 725 deletions
diff --git a/src/corelib/text/qlocale.cpp b/src/corelib/text/qlocale.cpp index 811df6dcdc..86ab072b73 100644 --- a/src/corelib/text/qlocale.cpp +++ b/src/corelib/text/qlocale.cpp @@ -22,6 +22,7 @@ QT_WARNING_DISABLE_GCC("-Wfree-nonheap-object") // false positive tracking #include "qlocale.h" #include "qlocale_p.h" #include "qlocale_tools_p.h" +#include <private/qtools_p.h> #if QT_CONFIG(datetimeparser) #include "private/qdatetimeparser_p.h" #endif @@ -31,6 +32,9 @@ QT_WARNING_DISABLE_GCC("-Wfree-nonheap-object") // false positive tracking #include "qvariant.h" #include "qvarlengtharray.h" #include "qstringbuilder.h" +#if QT_CONFIG(timezone) +# include "qtimezone.h" +#endif #include "private/qnumeric_p.h" #include "private/qtools_p.h" #include <cmath> @@ -46,9 +50,12 @@ QT_WARNING_DISABLE_GCC("-Wfree-nonheap-object") // false positive tracking #include "private/qgregoriancalendar_p.h" #include "qcalendar.h" +#include <q20iterator.h> + QT_BEGIN_NAMESPACE -QT_IMPL_METATYPE_EXTERN(QStringView) +constexpr int QLocale::DefaultTwoDigitBaseYear; + QT_IMPL_METATYPE_EXTERN_TAGGED(QList<Qt::DayOfWeek>, QList_Qt__DayOfWeek) #ifndef QT_NO_SYSTEMLOCALE QT_IMPL_METATYPE_EXTERN_TAGGED(QSystemLocale::CurrencyToStringArgument, @@ -56,19 +63,27 @@ QT_IMPL_METATYPE_EXTERN_TAGGED(QSystemLocale::CurrencyToStringArgument, #endif using namespace Qt::StringLiterals; +using namespace QtMiscUtils; #ifndef QT_NO_SYSTEMLOCALE Q_CONSTINIT static QSystemLocale *_systemLocale = nullptr; -class QSystemLocaleSingleton: public QSystemLocale -{ -public: - QSystemLocaleSingleton() : QSystemLocale(true) {} -}; - -Q_GLOBAL_STATIC(QSystemLocaleSingleton, QSystemLocale_globalSystemLocale) Q_CONSTINIT static QLocaleData systemLocaleData = {}; #endif +static_assert(ascii_isspace(' ')); +static_assert(ascii_isspace('\t')); +static_assert(ascii_isspace('\n')); +static_assert(ascii_isspace('\v')); +static_assert(ascii_isspace('\f')); +static_assert(ascii_isspace('\r')); +static_assert(!ascii_isspace('\0')); +static_assert(!ascii_isspace('\a')); +static_assert(!ascii_isspace('a')); +static_assert(!ascii_isspace('\177')); +static_assert(!ascii_isspace(uchar('\200'))); +static_assert(!ascii_isspace(uchar('\xA0'))); // NBSP (is a space but Latin 1, not ASCII) +static_assert(!ascii_isspace(uchar('\377'))); + /****************************************************************************** ** Helpers for accessing Qt locale database */ @@ -92,22 +107,22 @@ QLocale::Language QLocalePrivate::codeToLanguage(QStringView code, if (uc1 > 0x7F || uc2 > 0x7F || uc3 > 0x7F) return QLocale::AnyLanguage; - const AlphaCode codeBuf = { { char(uc1), char(uc2), char(uc3) } }; + const AlphaCode codeBuf = { char(uc1), char(uc2), char(uc3) }; auto searchCode = [codeBuf](auto f) { return std::find_if(languageCodeList.begin(), languageCodeList.end(), - [=](const LanguageCodeEntry &i) { return f(i) == codeBuf; }); + [=](LanguageCodeEntry i) { return f(i) == codeBuf; }); }; if (codeTypes.testFlag(QLocale::ISO639Part1) && uc3 == 0) { - auto i = searchCode([](const LanguageCodeEntry &i) { return i.part1; }); + auto i = searchCode([](LanguageCodeEntry i) { return i.part1; }); if (i != languageCodeList.end()) return QLocale::Language(std::distance(languageCodeList.begin(), i)); } if (uc3 != 0) { if (codeTypes.testFlag(QLocale::ISO639Part2B)) { - auto i = searchCode([](const LanguageCodeEntry &i) { return i.part2B; }); + auto i = searchCode([](LanguageCodeEntry i) { return i.part2B; }); if (i != languageCodeList.end()) return QLocale::Language(std::distance(languageCodeList.begin(), i)); } @@ -116,13 +131,13 @@ QLocale::Language QLocalePrivate::codeToLanguage(QStringView code, // This is asserted in iso639_3.LanguageCodeData. if (codeTypes.testFlag(QLocale::ISO639Part2T) && !codeTypes.testFlag(QLocale::ISO639Part3)) { - auto i = searchCode([](const LanguageCodeEntry &i) { return i.part2T; }); + auto i = searchCode([](LanguageCodeEntry i) { return i.part2T; }); if (i != languageCodeList.end()) return QLocale::Language(std::distance(languageCodeList.begin(), i)); } if (codeTypes.testFlag(QLocale::ISO639Part3)) { - auto i = searchCode([](const LanguageCodeEntry &i) { return i.part3; }); + auto i = searchCode([](LanguageCodeEntry i) { return i.part3; }); if (i != languageCodeList.end()) return QLocale::Language(std::distance(languageCodeList.begin(), i)); } @@ -162,7 +177,7 @@ QLocale::Script QLocalePrivate::codeToScript(QStringView code) noexcept unsigned char c3 = code[3].toLower().toLatin1(); const unsigned char *c = script_code_list; - for (int i = 0; i < QLocale::LastScript; ++i, c += 4) { + for (qsizetype i = 0; i < QLocale::LastScript; ++i, c += 4) { if (c0 == c[0] && c1 == c[1] && c2 == c[2] && c3 == c[3]) return QLocale::Script(i); } @@ -188,27 +203,27 @@ QLocale::Territory QLocalePrivate::codeToTerritory(QStringView code) noexcept return QLocale::AnyTerritory; } -QLatin1StringView QLocalePrivate::languageToCode(QLocale::Language language, - QLocale::LanguageCodeTypes codeTypes) +std::array<char, 4> QLocalePrivate::languageToCode(QLocale::Language language, + QLocale::LanguageCodeTypes codeTypes) { if (language == QLocale::AnyLanguage || language > QLocale::LastLanguage) return {}; if (language == QLocale::C) - return "C"_L1; + return {'C'}; const LanguageCodeEntry &i = languageCodeList[language]; if (codeTypes.testFlag(QLocale::ISO639Part1) && i.part1.isValid()) - return {i.part1.code, 2}; + return i.part1.decode(); if (codeTypes.testFlag(QLocale::ISO639Part2B) && i.part2B.isValid()) - return {i.part2B.code, 3}; + return i.part2B.decode(); if (codeTypes.testFlag(QLocale::ISO639Part2T) && i.part2T.isValid()) - return {i.part2T.code, 3}; + return i.part2T.decode(); if (codeTypes.testFlag(QLocale::ISO639Part3)) - return {i.part3.code, 3}; + return i.part3.decode(); return {}; } @@ -237,7 +252,7 @@ struct LikelyPair QLocaleId value = QLocaleId { 0, 0, 0 }; }; -bool operator<(const LikelyPair &lhs, const LikelyPair &rhs) +bool operator<(LikelyPair lhs, LikelyPair rhs) { // Must match the comparison LocaleDataWriter.likelySubtags() uses when // sorting, see qtbase/util/locale_database.qlocalexml2cpp.py @@ -396,14 +411,14 @@ QByteArray QLocaleId::name(char separator) const return QByteArrayLiteral("C"); const LanguageCodeEntry &language = languageCodeList[language_id]; - const char *lang; + AlphaCode lang; qsizetype langLen; if (language.part1.isValid()) { - lang = language.part1.code; + lang = language.part1; langLen = 2; } else { - lang = language.part2B.isValid() ? language.part2B.code : language.part3.code; + lang = language.part2B.isValid() ? language.part2B : language.part3; langLen = 3; } @@ -416,10 +431,12 @@ QByteArray QLocaleId::name(char separator) const QByteArray name(len, Qt::Uninitialized); char *uc = name.data(); - *uc++ = lang[0]; - *uc++ = lang[1]; + auto langArray = lang.decode(); + + *uc++ = langArray[0]; + *uc++ = langArray[1]; if (langLen > 2) - *uc++ = lang[2]; + *uc++ = langArray[2]; if (script) { *uc++ = separator; @@ -448,9 +465,9 @@ QByteArray QLocalePrivate::bcp47Name(char separator) const return m_data->id().withLikelySubtagsRemoved().name(separator); } -static int findLocaleIndexById(const QLocaleId &localeId) +static qsizetype findLocaleIndexById(QLocaleId localeId) { - quint16 idx = locale_index[localeId.language_id]; + qsizetype idx = locale_index[localeId.language_id]; // If there are no locales for specified language (so we we've got the // default language, which has no associated script or country), give up: if (localeId.language_id && idx == 0) @@ -467,14 +484,14 @@ static int findLocaleIndexById(const QLocaleId &localeId) return -1; } -int QLocaleData::findLocaleIndex(QLocaleId lid) +qsizetype QLocaleData::findLocaleIndex(QLocaleId lid) { QLocaleId localeId = lid; QLocaleId likelyId = localeId.withLikelySubtagsAdded(); const ushort fallback = likelyId.language_id; // Try a straight match with the likely data: - int index = findLocaleIndexById(likelyId); + qsizetype index = findLocaleIndexById(likelyId); if (index >= 0) return index; QVarLengthArray<QLocaleId, 6> tried; @@ -517,13 +534,13 @@ int QLocaleData::findLocaleIndex(QLocaleId lid) return locale_index[fallback]; } -static QStringView findTag(QStringView name) +static QStringView findTag(QStringView name) noexcept { - const QString separators = QStringLiteral("_-.@"); - int i = 0; - while (i < name.size() && !separators.contains(name[i])) - i++; - return name.first(i); + const std::u16string_view v(name.utf16(), size_t(name.size())); + const auto i = v.find_first_of(u"_-.@"); + if (i == std::string_view::npos) + return name; + return name.first(qsizetype(i)); } static bool validTag(QStringView tag) @@ -531,7 +548,7 @@ static bool validTag(QStringView tag) // Is tag is a non-empty sequence of ASCII letters and/or digits ? for (QChar uc : tag) { const char16_t ch = uc.unicode(); - if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9'))) + if (!isAsciiLetterOrNumber(ch)) return false; } return tag.size() > 0; @@ -544,7 +561,7 @@ static bool isScript(QStringView tag) static const QString allScripts = QString::fromLatin1(reinterpret_cast<const char *>(script_code_list), sizeof(script_code_list) - 1); - return tag.length() == 4 && allScripts.indexOf(tag) % 4 == 0; + return tag.size() == 4 && allScripts.indexOf(tag) % 4 == 0; } bool qt_splitLocaleName(QStringView name, QStringView *lang, QStringView *script, QStringView *land) @@ -605,9 +622,9 @@ QLocaleId QLocaleId::fromName(QStringView name) return { langId, QLocalePrivate::codeToScript(script), QLocalePrivate::codeToTerritory(land) }; } -QString qt_readEscapedFormatString(QStringView format, int *idx) +QString qt_readEscapedFormatString(QStringView format, qsizetype *idx) { - int &i = *idx; + qsizetype &i = *idx; Q_ASSERT(format.at(i) == u'\''); ++i; @@ -655,7 +672,7 @@ QString qt_readEscapedFormatString(QStringView format, int *idx) qt_repeatCount(u"aab"); // == 2 \endcode */ -int qt_repeatCount(QStringView s) +qsizetype qt_repeatCount(QStringView s) { if (s.isEmpty()) return 0; @@ -663,15 +680,15 @@ int qt_repeatCount(QStringView s) qsizetype j = 1; while (j < s.size() && s.at(j) == c) ++j; - return int(j); + return j; } Q_CONSTINIT static const QLocaleData *default_data = nullptr; +Q_CONSTINIT QBasicAtomicInt QLocalePrivate::s_generation = Q_BASIC_ATOMIC_INITIALIZER(0); -static const QLocaleData *const c_data = locale_data; static QLocalePrivate *c_private() { - static QLocalePrivate c_locale(c_data, 0, QLocale::OmitGroupSeparator, 1); + static QLocalePrivate c_locale(locale_data, 0, QLocale::OmitGroupSeparator, 1); return &c_locale; } @@ -681,37 +698,48 @@ static QLocalePrivate *c_private() */ /*! - Constructs a QSystemLocale object. + \internal + Constructs a QSystemLocale object. + + The constructor will automatically install this object as the system locale. + It and the destructor maintain a stack of system locales, with the + most-recently-created instance (that hasn't yet been deleted) used as the + system locale. This is only intended as a way to let a platform plugin + install its own system locale, overriding what might otherwise be provided + for its class of platform (as Android does, differing from Linux), and to + let tests transiently override the system or plugin-supplied one. As such, + there should not be diverse threads creating and destroying QSystemLocale + instances concurrently, so no attempt is made at thread-safety in managing + the stack. - The constructor will automatically install this object as the system locale, - if there's not one active. It also resets the flag that'll prompt - QLocale::system() to re-initialize its data, so that instantiating a - QSystemLocale transiently (doesn't install the transient as system locale if - there was one already and) triggers an update to the system locale's data. + This constructor also resets the flag that'll prompt QLocale::system() to + re-initialize its data, so that instantiating a QSystemLocale (even + transiently) triggers a refresh of the system locale's data. This is + exploited by some test code. */ -QSystemLocale::QSystemLocale() +QSystemLocale::QSystemLocale() : next(_systemLocale) { - if (!_systemLocale) - _systemLocale = this; + _systemLocale = this; systemLocaleData.m_language_id = 0; } /*! \internal -*/ -QSystemLocale::QSystemLocale(bool) -{ } - -/*! - Deletes the object. + Deletes the object. */ QSystemLocale::~QSystemLocale() { if (_systemLocale == this) { - _systemLocale = nullptr; + _systemLocale = next; + // Change to system locale => force refresh. systemLocaleData.m_language_id = 0; + } else { + for (QSystemLocale *p = _systemLocale; p; p = p->next) { + if (p->next == this) + p->next = next; + } } } @@ -719,7 +747,12 @@ static const QSystemLocale *systemLocale() { if (_systemLocale) return _systemLocale; - return QSystemLocale_globalSystemLocale(); + + // As this is only ever instantiated with _systemLocale null, it is + // necessarily the ->next-most in any chain that may subsequently develop; + // and it won't be destructed until exit()-time. + static QSystemLocale globalInstance; + return &globalInstance; } static void updateSystemPrivate() @@ -750,28 +783,49 @@ static void updateSystemPrivate() systemLocaleData.m_script_id = res.toInt(); // Should we replace Any values based on likely sub-tags ? + + // If system locale is default locale, update the default collator's generation: + if (default_data == &systemLocaleData) + QLocalePrivate::s_generation.fetchAndAddRelaxed(1); } #endif // !QT_NO_SYSTEMLOCALE -static const QLocaleData *systemData() +static const QLocaleData *systemData(qsizetype *sysIndex = nullptr) { #ifndef QT_NO_SYSTEMLOCALE /* Copy over the information from the fallback locale and modify. - This modifies (cross-thread) global state, so take care to only call it in - one thread. + If sysIndex is passed, it should be the m_index of the system locale's + QLocalePrivate, which we'll update if it needs it. + + This modifies (cross-thread) global state, so is mutex-protected. */ { + Q_CONSTINIT static QLocaleId sysId; + bool updated = false; + Q_CONSTINIT static QBasicMutex systemDataMutex; systemDataMutex.lock(); - if (systemLocaleData.m_language_id == 0) + if (systemLocaleData.m_language_id == 0) { updateSystemPrivate(); + updated = true; + } + // Initialization of system private has *sysIndex == -1 to hit this. + if (sysIndex && (updated || *sysIndex < 0)) { + const QLocaleId nowId = systemLocaleData.id(); + if (sysId != nowId || *sysIndex < 0) { + // This look-up may be expensive: + *sysIndex = QLocaleData::findLocaleIndex(nowId); + sysId = nowId; + } + } systemDataMutex.unlock(); } return &systemLocaleData; #else + Q_UNUSED(sysIndex); return locale_data; #endif } @@ -783,7 +837,7 @@ static const QLocaleData *defaultData() return default_data; } -static uint defaultIndex() +static qsizetype defaultIndex() { const QLocaleData *const data = defaultData(); #ifndef QT_NO_SYSTEMLOCALE @@ -794,15 +848,15 @@ static uint defaultIndex() } #endif - Q_ASSERT(data >= locale_data); - Q_ASSERT(data < locale_data + std::size(locale_data)); + using QtPrivate::q_points_into_range; + Q_ASSERT(q_points_into_range(data, locale_data)); return data - locale_data; } const QLocaleData *QLocaleData::c() { Q_ASSERT(locale_index[QLocale::C] == 0); - return c_data; + return locale_data; } #ifndef QT_NO_DATASTREAM @@ -821,9 +875,8 @@ QDataStream &operator>>(QDataStream &ds, QLocale &l) } #endif // QT_NO_DATASTREAM -static const int locale_data_size = sizeof(locale_data)/sizeof(QLocaleData) - 1; +static constexpr qsizetype locale_data_size = q20::ssize(locale_data) - 1; // trailing guard -Q_CONSTINIT QBasicAtomicInt QLocalePrivate::s_generation = Q_BASIC_ATOMIC_INITIALIZER(0); Q_GLOBAL_STATIC(QSharedDataPointer<QLocalePrivate>, defaultLocalePrivate, new QLocalePrivate(defaultData(), defaultIndex())) @@ -831,8 +884,8 @@ static QLocalePrivate *localePrivateByName(QStringView name) { if (name == u"C") return c_private(); - const int index = QLocaleData::findLocaleIndex(QLocaleId::fromName(name)); - Q_ASSERT(index >= 0 && size_t(index) < std::size(locale_data) - 1); + const qsizetype index = QLocaleData::findLocaleIndex(QLocaleId::fromName(name)); + Q_ASSERT(index >= 0 && index < locale_data_size); return new QLocalePrivate(locale_data + index, index, locale_data[index].m_language_id == QLocale::C ? QLocale::OmitGroupSeparator : QLocale::DefaultNumberOptions); @@ -844,8 +897,8 @@ static QLocalePrivate *findLocalePrivate(QLocale::Language language, QLocale::Sc if (language == QLocale::C) return c_private(); - int index = QLocaleData::findLocaleIndex(QLocaleId { language, script, territory }); - Q_ASSERT(index >= 0 && size_t(index) < std::size(locale_data) - 1); + qsizetype index = QLocaleData::findLocaleIndex(QLocaleId { language, script, territory }); + Q_ASSERT(index >= 0 && index < locale_data_size); const QLocaleData *data = locale_data + index; QLocale::NumberOptions numberOptions = QLocale::DefaultNumberOptions; @@ -860,29 +913,41 @@ static QLocalePrivate *findLocalePrivate(QLocale::Language language, QLocale::Sc return new QLocalePrivate(data, index, numberOptions); } -QString QLocaleData::decimalPoint() const +static std::optional<QString> +systemLocaleString(const QLocaleData *that, QSystemLocale::QueryType type) { #ifndef QT_NO_SYSTEMLOCALE - if (this == &systemLocaleData) { - auto res = systemLocale()->query(QSystemLocale::DecimalPoint).toString(); - if (!res.isEmpty()) - return res; - } + if (that != &systemLocaleData) + return std::nullopt; + + QVariant v = systemLocale()->query(type); + if (v.metaType() != QMetaType::fromType<QString>()) + return std::nullopt; + + return v.toString(); +#else + Q_UNUSED(that) + Q_UNUSED(type) + return std::nullopt; #endif - return decimalSeparator().getData(single_character_data); +} + +static QString localeString(const QLocaleData *that, QSystemLocale::QueryType type, + QLocaleData::DataRange range) +{ + if (auto opt = systemLocaleString(that, type)) + return *opt; + return range.getData(single_character_data); +} + +QString QLocaleData::decimalPoint() const +{ + return localeString(this, QSystemLocale::DecimalPoint, decimalSeparator()); } QString QLocaleData::groupSeparator() const { - // Empty => don't do grouping -#ifndef QT_NO_SYSTEMLOCALE - if (this == &systemLocaleData) { - QVariant res = systemLocale()->query(QSystemLocale::GroupSeparator); - if (!res.isNull()) - return res.toString(); - } -#endif - return groupDelim().getData(single_character_data); + return localeString(this, QSystemLocale::GroupSeparator, groupDelim()); } QString QLocaleData::percentSign() const @@ -897,14 +962,7 @@ QString QLocaleData::listSeparator() const QString QLocaleData::zeroDigit() const { -#ifndef QT_NO_SYSTEMLOCALE - if (this == &systemLocaleData) { - auto res = systemLocale()->query(QSystemLocale::ZeroDigit).toString(); - if (!res.isEmpty()) - return res; - } -#endif - return zero().getData(single_character_data); + return localeString(this, QSystemLocale::ZeroDigit, zero()); } char32_t QLocaleData::zeroUcs() const @@ -925,26 +983,12 @@ char32_t QLocaleData::zeroUcs() const QString QLocaleData::negativeSign() const { -#ifndef QT_NO_SYSTEMLOCALE - if (this == &systemLocaleData) { - auto res = systemLocale()->query(QSystemLocale::NegativeSign).toString(); - if (!res.isEmpty()) - return res; - } -#endif - return minus().getData(single_character_data); + return localeString(this, QSystemLocale::NegativeSign, minus()); } QString QLocaleData::positiveSign() const { -#ifndef QT_NO_SYSTEMLOCALE - if (this == &systemLocaleData) { - auto res = systemLocale()->query(QSystemLocale::PositiveSign).toString(); - if (!res.isEmpty()) - return res; - } -#endif - return plus().getData(single_character_data); + return localeString(this, QSystemLocale::PositiveSign, plus()); } QString QLocaleData::exponentSeparator() const @@ -959,6 +1003,21 @@ QLocale::QLocale(QLocalePrivate &dd) : d(&dd) {} +/*! + \variable QLocale::DefaultTwoDigitBaseYear + \since 6.7 + + \brief The default start year of the century within which a format taking + a two-digit year will select. The value of the constant is \c {1900}. + + Some locales use, particularly for ShortFormat, only the last two digits of + the year. Proir to 6.7 the year 1900 was always used as a base year for + such cases. Now various QLocale and QDate functions have the overloads that + allow callers to specify the base year, and this constant is used as its + default value. + + \sa toDate(), toDateTime(), QDate::fromString(), QDateTime::fromString() +*/ /*! \since 6.3 @@ -994,12 +1053,10 @@ QLocale::QLocale(QStringView name) { } -#if QT_STRINGVIEW_LEVEL < 2 /*! \fn QLocale::QLocale(const QString &name) \overload */ -#endif /*! Constructs a QLocale object initialized with the default locale. @@ -1030,7 +1087,7 @@ QLocale::QLocale() */ QLocale::QLocale(Language language, Territory territory) - : d(findLocalePrivate(language, QLocale::AnyScript, territory)) + : d(findLocalePrivate(language, AnyScript, territory)) { } @@ -1062,10 +1119,7 @@ QLocale::QLocale(Language language, Script script, Territory territory) Constructs a QLocale object as a copy of \a other. */ -QLocale::QLocale(const QLocale &other) -{ - d = other.d; -} +QLocale::QLocale(const QLocale &other) noexcept = default; /*! Destructor @@ -1080,11 +1134,7 @@ QLocale::~QLocale() to this QLocale object. */ -QLocale &QLocale::operator=(const QLocale &other) -{ - d = other.d; - return *this; -} +QLocale &QLocale::operator=(const QLocale &other) noexcept = default; /*! \internal @@ -1164,10 +1214,10 @@ QString QLocale::quoteString(QStringView str, QuotationStyle style) const #ifndef QT_NO_SYSTEMLOCALE if (d->m_data == &systemLocaleData) { QVariant res; - if (style == QLocale::AlternateQuotation) + if (style == AlternateQuotation) res = systemLocale()->query(QSystemLocale::StringToAlternateQuotation, QVariant::fromValue(str)); - if (res.isNull() || style == QLocale::StandardQuotation) + if (res.isNull() || style == StandardQuotation) res = systemLocale()->query(QSystemLocale::StringToStandardQuotation, QVariant::fromValue(str)); if (!res.isNull()) @@ -1176,7 +1226,7 @@ QString QLocale::quoteString(QStringView str, QuotationStyle style) const #endif QLocaleData::DataRange start, end; - if (style == QLocale::StandardQuotation) { + if (style == StandardQuotation) { start = d->m_data->quoteStart(); end = d->m_data->quoteEnd(); } else { @@ -1206,7 +1256,7 @@ QString QLocale::createSeparatedList(const QStringList &list) const } #endif - const int size = list.size(); + const qsizetype size = list.size(); if (size < 1) return QString(); @@ -1221,7 +1271,7 @@ QString QLocale::createSeparatedList(const QStringList &list) const QStringView formatMid = d->m_data->midListPattern().viewData(list_pattern_part_data); QStringView formatEnd = d->m_data->endListPattern().viewData(list_pattern_part_data); QString result = formatStart.arg(list.at(0), list.at(1)); - for (int i = 2; i < size - 1; ++i) + for (qsizetype i = 2; i < size - 1; ++i) result = formatMid.arg(result, list.at(i)); result = formatEnd.arg(result, list.at(size - 1)); return result; @@ -1308,51 +1358,94 @@ QLocale::Country QLocale::country() const #endif /*! - Returns the language and country of this locale as a - string of the form "language_country", where - language is a lowercase, two-letter ISO 639 language code, - and country is an uppercase, two- or three-letter ISO 3166 country code. + \since 6.7 + \enum QLocale::TagSeparator + + Indicate how to combine the parts that make up a locale identifier. + + A locale identifier may be made up of several tags, indicating language, + script and territory (plus, potentially, other details), joined together to + form the identifier. Various standards and conventional forms use either a + dash (the Unicode HYPHEN-MINUS, U+002D) or an underscore (LOW LINE, U+005F). + Different clients of QLocale may thus need one or the other. + + \value Dash Use \c{'-'}, the dash or hyphen character. + \value Underscore Use \c{'_'}, the underscore character. + + \note Although dash and underscore are the only separators used in public + standards (as at 2023), it is possible to cast any \l + {https://en.cppreference.com/w/cpp/language/ascii} {ASCII} character to this + type if a non-standard ASCII separator is needed. Casting a non-ASCII + character (with decimal value above 127) is not supported: such values are + reserved for future use as enum members if some public standard ever uses a + non-ASCII separator. It is, of course, possible to use QString::replace() to + replace the separator used by a function taking a parameter of this type + with an arbitrary Unicode character or string. +*/ + +Q_DECL_COLD_FUNCTION static void badSeparatorWarning(const char *method, char sep) +{ + qWarning("QLocale::%s(): Using non-ASCII separator '%c' (%02x) is unsupported", + method, sep, uint(uchar(sep))); +} + +/*! + \brief The short name of this locale. + + Returns the language and territory of this locale as a string of the form + "language_territory", where language is a lowercase, two-letter ISO 639 + language code, and territory is an uppercase, two- or three-letter ISO 3166 + territory code. If the locale has no specified territory, only the language + name is returned. Since Qt 6.7 an optional \a separator parameter can be + supplied to override the default underscore character separating the two + tags. - Note that even if QLocale object was constructed with an explicit script, - name() will not contain it for compatibility reasons. Use \l bcp47Name() instead - if you need a full locale name. + Even if the QLocale object was constructed with an explicit script, name() + will not contain it for compatibility reasons. Use \l bcp47Name() instead if + you need a full locale name, or construct the string you want to identify a + locale by from those returned by passing its \l language() to \l + languageToCode() and similar for the script and territory. - \sa QLocale(), language(), script(), territory(), bcp47Name() + \sa QLocale(), language(), script(), territory(), bcp47Name(), uiLanguages() */ -QString QLocale::name() const +QString QLocale::name(TagSeparator separator) const { + const char sep = char(separator); + if (uchar(sep) > 0x7f) { + badSeparatorWarning("name", sep); + return {}; + } + const auto code = d->languageCode(); + QLatin1StringView view{code.data()}; + Language l = language(); if (l == C) - return d->languageCode(); + return view; Territory c = territory(); if (c == AnyTerritory) - return d->languageCode(); + return view; - return d->languageCode() + u'_' + d->territoryCode(); -} - -static qlonglong toIntegral_helper(const QLocaleData *d, QStringView str, bool *ok, - QLocale::NumberOptions mode, qlonglong) -{ - return d->stringToLongLong(str, 10, ok, mode); -} - -static qulonglong toIntegral_helper(const QLocaleData *d, QStringView str, bool *ok, - QLocale::NumberOptions mode, qulonglong) -{ - return d->stringToUnsLongLong(str, 10, ok, mode); + return view + QLatin1Char(sep) + d->territoryCode(); } template <typename T> static inline T toIntegral_helper(const QLocalePrivate *d, QStringView str, bool *ok) { - using Int64 = - typename std::conditional<std::is_unsigned<T>::value, qulonglong, qlonglong>::type; + constexpr bool isUnsigned = std::is_unsigned_v<T>; + using Int64 = typename std::conditional_t<isUnsigned, quint64, qint64>; - // we select the right overload by the last, unused parameter - Int64 val = toIntegral_helper(d->m_data, str, ok, d->m_numberOptions, Int64()); + QSimpleParsedNumber<Int64> r{}; + if constexpr (isUnsigned) + r = d->m_data->stringToUnsLongLong(str, 10, d->m_numberOptions); + else + r = d->m_data->stringToLongLong(str, 10, d->m_numberOptions); + + if (ok) + *ok = r.ok(); + + Int64 val = r.result; if (T(val) != val) { if (ok != nullptr) *ok = false; @@ -1365,20 +1458,35 @@ T toIntegral_helper(const QLocalePrivate *d, QStringView str, bool *ok) /*! \since 4.8 - Returns the dash-separated language, script and country (and possibly other - BCP47 fields) of this locale as a string. + \brief Returns the BCP47 field names joined with dashes. + + This combines as many of language, script and territory (and possibly other + BCP47 fields) for this locale as are needed to uniquely specify it. Note + that fields may be omitted if the Unicode consortium's \l {Matching + combinations of language, script and territory}{Likely Subtag Rules} imply + the omitted fields when given those retained. See \l name() for how to + construct a string from individual fields, if some other format is needed. - Unlike the uiLanguages() the returned value of the bcp47Name() represents - the locale name of the QLocale data but not the language the user-interface - should be in. + Unlike uiLanguages(), the value returned by bcp47Name() represents the + locale name of the QLocale data; this need not be the language the + user-interface should be in. - This function tries to conform the locale name to BCP47. + This function tries to conform the locale name to the IETF Best Common + Practice 47, defined by RFC 5646. Since Qt 6.7, it supports an optional \a + separator parameter which can be used to override the BCP47-specified use of + a hyphen to separate the tags. For use in IETF-defined protocols, however, + the default, QLocale::TagSeparator::Dash, should be retained. - \sa language(), territory(), script(), uiLanguages() + \sa name(), language(), territory(), script(), uiLanguages() */ -QString QLocale::bcp47Name() const +QString QLocale::bcp47Name(TagSeparator separator) const { - return QString::fromLatin1(d->bcp47Name()); + const char sep = char(separator); + if (uchar(sep) > 0x7f) { + badSeparatorWarning("bcp47Name", sep); + return {}; + } + return QString::fromLatin1(d->bcp47Name(sep)); } /*! @@ -1401,7 +1509,8 @@ QString QLocale::bcp47Name() const */ QString QLocale::languageToCode(Language language, LanguageCodeTypes codeTypes) { - return QLocalePrivate::languageToCode(language, codeTypes); + const auto code = QLocalePrivate::languageToCode(language, codeTypes); + return QLatin1StringView{code.data()}; } /*! @@ -1522,9 +1631,9 @@ QLocale::Script QLocale::codeToScript(QStringView scriptCode) noexcept QString QLocale::languageToString(Language language) { - if (language > QLocale::LastLanguage) + if (language > LastLanguage) return "Unknown"_L1; - return QLatin1StringView(language_name_list + language_name_index[language]); + return QString::fromUtf8(language_name_list + language_name_index[language]); } /*! @@ -1534,11 +1643,11 @@ QString QLocale::languageToString(Language language) \sa languageToString(), scriptToString(), territory(), bcp47Name() */ -QString QLocale::territoryToString(QLocale::Territory territory) +QString QLocale::territoryToString(Territory territory) { - if (territory > QLocale::LastTerritory) + if (territory > LastTerritory) return "Unknown"_L1; - return QLatin1StringView(territory_name_list + territory_name_index[territory]); + return QString::fromUtf8(territory_name_list + territory_name_index[territory]); } #if QT_DEPRECATED_SINCE(6, 6) @@ -1562,14 +1671,13 @@ QString QLocale::countryToString(Country country) \sa languageToString(), territoryToString(), script(), bcp47Name() */ -QString QLocale::scriptToString(QLocale::Script script) +QString QLocale::scriptToString(Script script) { - if (script > QLocale::LastScript) + if (script > LastScript) return "Unknown"_L1; - return QLatin1StringView(script_name_list + script_name_index[script]); + return QString::fromUtf8(script_name_list + script_name_index[script]); } -#if QT_STRINGVIEW_LEVEL < 2 /*! \fn short QLocale::toShort(const QString &s, bool *ok) const @@ -1702,9 +1810,6 @@ QString QLocale::scriptToString(QLocale::Script script) If \a ok is not \nullptr, failure is reported by setting *\a{ok} to \c false, and success by setting *\a{ok} to \c true. - This function does not fall back to the 'C' locale if the string - cannot be interpreted in this locale. - This function ignores leading and trailing whitespace. \sa toDouble(), toInt(), toString() @@ -1720,9 +1825,6 @@ QString QLocale::scriptToString(QLocale::Script script) If \a ok is not \nullptr, failure is reported by setting *\a{ok} to \c false, and success by setting *\a{ok} to \c true. - This function does not fall back to the 'C' locale if the string - cannot be interpreted in this locale. - \snippet code/src_corelib_text_qlocale.cpp 3 Notice that the last conversion returns 1234.0, because '.' is the @@ -1732,7 +1834,6 @@ QString QLocale::scriptToString(QLocale::Script script) \sa toFloat(), toInt(), toString() */ -#endif // QT_STRINGVIEW_LEVEL < 2 /*! Returns the short int represented by the localized string \a s. @@ -1925,10 +2026,6 @@ float QLocale::toFloat(QStringView s, bool *ok) const If \a ok is not \nullptr, failure is reported by setting *\a{ok} to \c false, and success by setting *\a{ok} to \c true. - Unlike QString::toDouble(), this function does not fall back to - the "C" locale if the string cannot be interpreted in this - locale. - \snippet code/src_corelib_text_qlocale.cpp 3-qstringview Notice that the last conversion returns 1234.0, because '.' is the @@ -1974,7 +2071,6 @@ QString QLocale::toString(qulonglong i) const return d->m_data->unsLongLongToString(i, -1, 10, -1, flags); } -#if QT_STRINGVIEW_LEVEL < 2 /*! Returns a localized string representation of the given \a date in the specified \a format. @@ -2011,7 +2107,6 @@ QString QLocale::toString(QTime time, const QString &format) const \sa QDateTime::toString(), QDate::toString(), QTime::toString() */ -#endif /*! \since 5.14 @@ -2092,7 +2187,7 @@ QString QLocale::toString(QDate date, FormatType format) const static bool timeFormatContainsAP(QStringView format) { - int i = 0; + qsizetype i = 0; while (i < format.size()) { if (format.at(i).unicode() == '\'') { qt_readEscapedFormatString(format, &i); @@ -2343,6 +2438,16 @@ QTime QLocale::toTime(const QString &string, FormatType format) const Parses \a string and returns the date it represents. The format of the date string is chosen according to the \a format parameter (see dateFormat()). +//! [base-year-for-short] + Some locales use, particularly for ShortFormat, only the last two digits of + the year. In such a case, the 100 years starting at \a baseYear are the + candidates first considered. Prior to 6.7 there was no \a baseYear parameter + and 1900 was always used. This is the default for \a baseYear, selecting a + year from then to 1999. In some cases, other fields may lead to the next or + previous century being selected, to get a result consistent with all fields + given. See \l QDate::fromString() for details. +//! [base-year-for-short] + \note Month and day names, where used, must be given in the locale's language. @@ -2350,18 +2455,18 @@ QTime QLocale::toTime(const QString &string, FormatType format) const \sa dateFormat(), toTime(), toDateTime(), QDate::fromString() */ -QDate QLocale::toDate(const QString &string, FormatType format) const +QDate QLocale::toDate(const QString &string, FormatType format, int baseYear) const { - return toDate(string, dateFormat(format)); + return toDate(string, dateFormat(format), baseYear); } /*! \since 5.14 \overload */ -QDate QLocale::toDate(const QString &string, FormatType format, QCalendar cal) const +QDate QLocale::toDate(const QString &string, FormatType format, QCalendar cal, int baseYear) const { - return toDate(string, dateFormat(format), cal); + return toDate(string, dateFormat(format), cal, baseYear); } /*! @@ -2373,6 +2478,8 @@ QDate QLocale::toDate(const QString &string, FormatType format, QCalendar cal) c date string is chosen according to the \a format parameter (see dateFormat()). + \include qlocale.cpp base-year-for-short + \note Month and day names, where used, must be given in the locale's language. Any am/pm indicators used must match \l amText() or \l pmText(), ignoring case. @@ -2381,18 +2488,19 @@ QDate QLocale::toDate(const QString &string, FormatType format, QCalendar cal) c \sa dateTimeFormat(), toTime(), toDate(), QDateTime::fromString() */ -QDateTime QLocale::toDateTime(const QString &string, FormatType format) const +QDateTime QLocale::toDateTime(const QString &string, FormatType format, int baseYear) const { - return toDateTime(string, dateTimeFormat(format)); + return toDateTime(string, dateTimeFormat(format), baseYear); } /*! \since 5.14 \overload */ -QDateTime QLocale::toDateTime(const QString &string, FormatType format, QCalendar cal) const +QDateTime QLocale::toDateTime(const QString &string, FormatType format, QCalendar cal, + int baseYear) const { - return toDateTime(string, dateTimeFormat(format), cal); + return toDateTime(string, dateTimeFormat(format), cal, baseYear); } /*! @@ -2433,6 +2541,16 @@ QTime QLocale::toTime(const QString &string, const QString &format) const Parses \a string and returns the date it represents. See QDate::fromString() for the interpretation of \a format. +//! [base-year-for-two-digit] + When \a format only specifies the last two digits of a year, the 100 years + starting at \a baseYear are the candidates first considered. Prior to 6.7 + there was no \a baseYear parameter and 1900 was always used. This is the + default for \a baseYear, selecting a year from then to 1999. In some cases, + other fields may lead to the next or previous century being selected, to get + a result consistent with all fields given. See \l QDate::fromString() for + details. +//! [base-year-for-two-digit] + \note Month and day names, where used, must be given in the locale's language. @@ -2440,26 +2558,27 @@ QTime QLocale::toTime(const QString &string, const QString &format) const \sa dateFormat(), toTime(), toDateTime(), QDate::fromString() */ -QDate QLocale::toDate(const QString &string, const QString &format) const +QDate QLocale::toDate(const QString &string, const QString &format, int baseYear) const { - return toDate(string, format, QCalendar()); + return toDate(string, format, QCalendar(), baseYear); } /*! \since 5.14 \overload */ -QDate QLocale::toDate(const QString &string, const QString &format, QCalendar cal) const +QDate QLocale::toDate(const QString &string, const QString &format, QCalendar cal, int baseYear) const { QDate date; #if QT_CONFIG(datetimeparser) QDateTimeParser dt(QMetaType::QDate, QDateTimeParser::FromString, cal); dt.setDefaultLocale(*this); if (dt.parseFormat(format)) - dt.fromString(string, &date, nullptr); + dt.fromString(string, &date, nullptr, baseYear); #else Q_UNUSED(string); Q_UNUSED(format); + Q_UNUSED(baseYear); Q_UNUSED(cal); #endif return date; @@ -2473,6 +2592,8 @@ QDate QLocale::toDate(const QString &string, const QString &format, QCalendar ca Parses \a string and returns the date-time it represents. See QDateTime::fromString() for the interpretation of \a format. + \include qlocale.cpp base-year-for-two-digit + \note Month and day names, where used, must be given in the locale's language. Any am/pm indicators used must match \l amText() or \l pmText(), ignoring case. @@ -2486,27 +2607,31 @@ QDate QLocale::toDate(const QString &string, const QString &format, QCalendar ca \sa dateTimeFormat(), toTime(), toDate(), QDateTime::fromString() */ -QDateTime QLocale::toDateTime(const QString &string, const QString &format) const +QDateTime QLocale::toDateTime(const QString &string, const QString &format, int baseYear) const { - return toDateTime(string, format, QCalendar()); + return toDateTime(string, format, QCalendar(), baseYear); } /*! \since 5.14 \overload */ -QDateTime QLocale::toDateTime(const QString &string, const QString &format, QCalendar cal) const +QDateTime QLocale::toDateTime(const QString &string, const QString &format, QCalendar cal, + int baseYear) const { #if QT_CONFIG(datetimeparser) QDateTime datetime; QDateTimeParser dt(QMetaType::QDateTime, QDateTimeParser::FromString, cal); dt.setDefaultLocale(*this); - if (dt.parseFormat(format) && (dt.fromString(string, &datetime) || !datetime.isValid())) + if (dt.parseFormat(format) && (dt.fromString(string, &datetime, baseYear) + || !datetime.isValid())) { return datetime; + } #else Q_UNUSED(string); Q_UNUSED(format); + Q_UNUSED(baseYear); Q_UNUSED(cal); #endif return QDateTime(); @@ -2516,7 +2641,14 @@ QDateTime QLocale::toDateTime(const QString &string, const QString &format, QCal /*! \since 4.1 - Returns the decimal point character of this locale. + Returns the fractional part separator for this locale. + + This is the token that separates the whole number part from the fracional + part in the representation of a number which has a fractional part. This is + commonly called the "decimal point character" - even though, in many + locales, it is not a "point" (or similar dot). It is (since Qt 6.0) returned + as a string in case some locale needs more than one UTF-16 code-point to + represent its separator. \sa groupSeparator(), toString() */ @@ -2528,7 +2660,14 @@ QString QLocale::decimalPoint() const /*! \since 4.1 - Returns the group separator character of this locale. + Returns the digit-grouping separator for this locale. + + This is a token used to break up long sequences of digits, in the + representation of a number, to make it easier to read. In some locales it + may be empty, indicating that digits should not be broken up into groups in + this way. In others it may be a spacing character. It is (since Qt 6.0) + returned as a string in case some locale needs more than one UTF-16 + code-point to represent its separator. \sa decimalPoint(), toString() */ @@ -2540,7 +2679,12 @@ QString QLocale::groupSeparator() const /*! \since 4.1 - Returns the percent character of this locale. + Returns the percent marker of this locale. + + This is a token presumed to be appended to a number to indicate a + percentage. It is (since Qt 6.0) returned as a string because, in some + locales, it is not a single character - for example, because it includes a + text-direction-control character. \sa toString() */ @@ -2554,6 +2698,13 @@ QString QLocale::percent() const Returns the zero digit character of this locale. + This is a single Unicode character but may be encoded as a surrogate pair, + so is (since Qt 6.0) returned as a string. In most locales, other digits + follow it in Unicode ordering - however, some number systems, notably those + using U+3007 as zero, do not have contiguous digits. Use toString() to + obtain suitable representations of numbers, rather than trying to construct + them from this zero digit. + \sa toString() */ QString QLocale::zeroDigit() const @@ -2564,7 +2715,12 @@ QString QLocale::zeroDigit() const /*! \since 4.1 - Returns the negative sign character of this locale. + Returns the negative sign indicator of this locale. + + This is a token presumed to be used as a prefix to a number to indicate that + it is negative. It is (since Qt 6.0) returned as a string because, in some + locales, it is not a single character - for example, because it includes a + text-direction-control character. \sa positiveSign(), toString() */ @@ -2576,7 +2732,12 @@ QString QLocale::negativeSign() const /*! \since 4.5 - Returns the positive sign character of this locale. + Returns the positive sign indicator of this locale. + + This is a token presumed to be used as a prefix to a number to indicate that + it is positive. It is (since Qt 6.0) returned as a string because, in some + locales, it is not a single character - for example, because it includes a + text-direction-control character. \sa negativeSign(), toString() */ @@ -2588,8 +2749,13 @@ QString QLocale::positiveSign() const /*! \since 4.1 - Returns the exponential character of this locale, used to separate exponent - from mantissa in some floating-point numeric representations. + Returns the exponent separator for this locale. + + This is a token used to separate mantissa from exponent in some + floating-point numeric representations. It is (since Qt 6.0) returned as a + string because, in some locales, it is not a single character - for example, + it may consist of a multiplication sign and a representation of the "ten to + the power" operator. \sa toString(double, char, int) */ @@ -2598,11 +2764,6 @@ QString QLocale::exponential() const return d->m_data->exponentSeparator(); } -static bool qIsUpper(char c) -{ - return c >= 'A' && c <= 'Z'; -} - /*! \overload Returns a string representing the floating-point number \a f. @@ -2613,21 +2774,17 @@ static bool qIsUpper(char c) The \a format defaults to \c{'g'}. It can be any of the following: \table - \header \li Format \li Meaning - \row \li \c 'e' \li format as [-]9.9e[+|-]999 - \row \li \c 'E' \li format as [-]9.9E[+|-]999 - \row \li \c 'f' \li format as [-]9.9 - \row \li \c 'F' \li same as \c 'f' except for INF and NAN (see below) - \row \li \c 'g' \li use \c 'e' or \c 'f' format, whichever is more concise - \row \li \c 'G' \li use \c 'E' or \c 'F' format, whichever is more concise + \header \li Format \li Meaning \li Meaning of \a precision + \row \li \c 'e' \li format as [-]9.9e[+|-]999 \li number of digits \e after the decimal point + \row \li \c 'E' \li format as [-]9.9E[+|-]999 \li " + \row \li \c 'f' \li format as [-]9.9 \li " + \row \li \c 'F' \li same as \c 'f' except for INF and NAN (see below) \li " + \row \li \c 'g' \li use \c 'e' or \c 'f' format, whichever is more concise \li maximum number of significant digits (trailing zeroes are omitted) + \row \li \c 'G' \li use \c 'E' or \c 'F' format, whichever is more concise \li " \endtable - For the \c 'e', \c 'E', \c 'f' and \c 'F' formats, the \a precision - represents the number of digits \e after the decimal point. For the \c 'g' - and \c 'G' formats, the \a precision represents the maximum number of - significant digits (trailing zeroes are omitted). The special \a precision - value QLocale::FloatingPointShortest selects the shortest representation - that, when read as a number, gets back the original floating-point + The special \a precision value QLocale::FloatingPointShortest selects the + shortest representation that, when read as a number, gets back the original floating-point value. Aside from that, any negative \a precision is ignored in favor of the default, 6. @@ -2644,20 +2801,20 @@ static bool qIsUpper(char c) QString QLocale::toString(double f, char format, int precision) const { QLocaleData::DoubleForm form = QLocaleData::DFDecimal; - uint flags = qIsUpper(format) ? QLocaleData::CapitalEorX : 0; + uint flags = isAsciiUpper(format) ? QLocaleData::CapitalEorX : 0; switch (QtMiscUtils::toAsciiLower(format)) { - case 'f': - form = QLocaleData::DFDecimal; - break; - case 'e': - form = QLocaleData::DFExponent; - break; - case 'g': - form = QLocaleData::DFSignificantDigits; - break; - default: - break; + case 'f': + form = QLocaleData::DFDecimal; + break; + case 'e': + form = QLocaleData::DFExponent; + break; + case 'g': + form = QLocaleData::DFSignificantDigits; + break; + default: + break; } if (!(d->m_numberOptions & OmitGroupSeparator)) @@ -2704,8 +2861,19 @@ QString QLocale::toString(double f, char format, int precision) const QLocale QLocale::system() { - QT_PREPEND_NAMESPACE(systemData)(); // Ensure system data is up to date. - static QLocalePrivate locale(systemData(), defaultIndex(), DefaultNumberOptions, 1); + constexpr auto sysData = []() { + // Same return as systemData(), but leave the setup to the actual call to it. +#ifdef QT_NO_SYSTEMLOCALE + return locale_data; +#else + return &systemLocaleData; +#endif + }; + Q_CONSTINIT static QLocalePrivate locale(sysData(), -1, DefaultNumberOptions, 1); + // Calling systemData() ensures system data is up to date; we also need it + // to ensure that locale's index stays up to date: + systemData(&locale.m_index); + Q_ASSERT(locale.m_index >= 0 && locale.m_index < locale_data_size); return QLocale(locale); } @@ -2722,15 +2890,14 @@ QLocale QLocale::system() QList<QLocale> locales = QLocale::matchingLocales(QLocale::AnyLanguage, QLocale::AnyScript, QLocale::Russia); */ -QList<QLocale> QLocale::matchingLocales(QLocale::Language language, QLocale::Script script, - QLocale::Territory territory) +QList<QLocale> QLocale::matchingLocales(Language language, Script script, Territory territory) { const QLocaleId filter { language, script, territory }; if (!filter.isValid()) return QList<QLocale>(); - if (language == QLocale::C) - return QList<QLocale>() << QLocale(QLocale::C); + if (language == C) + return QList<QLocale>{QLocale(C)}; QList<QLocale> result; if (filter.matchesAll()) @@ -2747,6 +2914,15 @@ QList<QLocale> QLocale::matchingLocales(QLocale::Language language, QLocale::Scr ++index; } + // Add current system locale, if it matches + const auto syslocaledata = systemData(); + + if (filter.acceptLanguage(syslocaledata->m_language_id)) { + const QLocaleId id = syslocaledata->id(); + if (filter.acceptScriptTerritory(id)) + result.append(system()); + } + return result; } @@ -2764,7 +2940,7 @@ QList<QLocale> QLocale::matchingLocales(QLocale::Language language, QLocale::Scr QList<QLocale::Country> QLocale::countriesForLanguage(Language language) { const auto locales = matchingLocales(language, AnyScript, AnyCountry); - QList<QLocale::Country> result; + QList<Country> result; result.reserve(locales.size()); for (const auto &locale : locales) result.append(locale.territory()); @@ -2842,6 +3018,14 @@ QString QLocale::standaloneDayName(int day, FormatType type) const // Calendar look-up of month and day names: +// Only used in assertions +[[maybe_unused]] static bool sameLocale(const QLocaleData *locale, const QCalendarLocale &calendar) +{ + return locale->m_language_id == calendar.m_language_id + && locale->m_script_id == calendar.m_script_id + && locale->m_territory_id == calendar.m_territory_id; +} + /*! \internal */ @@ -2950,12 +3134,13 @@ QString QCalendarBackend::monthName(const QLocale &locale, int month, int, QLocale::FormatType format) const { Q_ASSERT(month >= 1 && month <= maximumMonthsInYear()); - return rawMonthName(localeMonthIndexData()[locale.d->m_index], - localeMonthData(), month, format); + const QCalendarLocale &monthly = localeMonthIndexData()[locale.d->m_index]; + Q_ASSERT(sameLocale(locale.d->m_data, monthly)); + return rawMonthName(monthly, localeMonthData(), month, format); } -QString QGregorianCalendar::monthName(const QLocale &locale, int month, int year, - QLocale::FormatType format) const +QString QRomanCalendar::monthName(const QLocale &locale, int month, int year, + QLocale::FormatType format) const { #ifndef QT_NO_SYSTEMLOCALE if (locale.d->m_data == &systemLocaleData) { @@ -2985,12 +3170,13 @@ QString QCalendarBackend::standaloneMonthName(const QLocale &locale, int month, QLocale::FormatType format) const { Q_ASSERT(month >= 1 && month <= maximumMonthsInYear()); - return rawStandaloneMonthName(localeMonthIndexData()[locale.d->m_index], - localeMonthData(), month, format); + const QCalendarLocale &monthly = localeMonthIndexData()[locale.d->m_index]; + Q_ASSERT(sameLocale(locale.d->m_data, monthly)); + return rawStandaloneMonthName(monthly, localeMonthData(), month, format); } -QString QGregorianCalendar::standaloneMonthName(const QLocale &locale, int month, int year, - QLocale::FormatType format) const +QString QRomanCalendar::standaloneMonthName(const QLocale &locale, int month, int year, + QLocale::FormatType format) const { #ifndef QT_NO_SYSTEMLOCALE if (locale.d->m_data == &systemLocaleData) { @@ -3097,10 +3283,10 @@ Qt::DayOfWeek QLocale::firstDayOfWeek() const QLocale::MeasurementSystem QLocalePrivate::measurementSystem() const { - for (int i = 0; i < ImperialMeasurementSystemsCount; ++i) { - if (ImperialMeasurementSystems[i].languageId == m_data->m_language_id - && ImperialMeasurementSystems[i].territoryId == m_data->m_territory_id) { - return ImperialMeasurementSystems[i].system; + for (const auto &system : ImperialMeasurementSystems) { + if (system.languageId == m_data->m_language_id + && system.territoryId == m_data->m_territory_id) { + return system.system; } } return QLocale::MetricSystem; @@ -3158,34 +3344,34 @@ QLocale::MeasurementSystem QLocale::measurementSystem() const Qt::LayoutDirection QLocale::textDirection() const { switch (script()) { - case QLocale::AdlamScript: - case QLocale::ArabicScript: - case QLocale::AvestanScript: - case QLocale::CypriotScript: - case QLocale::HatranScript: - case QLocale::HebrewScript: - case QLocale::ImperialAramaicScript: - case QLocale::InscriptionalPahlaviScript: - case QLocale::InscriptionalParthianScript: - case QLocale::KharoshthiScript: - case QLocale::LydianScript: - case QLocale::MandaeanScript: - case QLocale::ManichaeanScript: - case QLocale::MendeKikakuiScript: - case QLocale::MeroiticCursiveScript: - case QLocale::MeroiticScript: - case QLocale::NabataeanScript: - case QLocale::NkoScript: - case QLocale::OldHungarianScript: - case QLocale::OldNorthArabianScript: - case QLocale::OldSouthArabianScript: - case QLocale::OrkhonScript: - case QLocale::PalmyreneScript: - case QLocale::PhoenicianScript: - case QLocale::PsalterPahlaviScript: - case QLocale::SamaritanScript: - case QLocale::SyriacScript: - case QLocale::ThaanaScript: + case AdlamScript: + case ArabicScript: + case AvestanScript: + case CypriotScript: + case HatranScript: + case HebrewScript: + case ImperialAramaicScript: + case InscriptionalPahlaviScript: + case InscriptionalParthianScript: + case KharoshthiScript: + case LydianScript: + case MandaeanScript: + case ManichaeanScript: + case MendeKikakuiScript: + case MeroiticCursiveScript: + case MeroiticScript: + case NabataeanScript: + case NkoScript: + case OldHungarianScript: + case OldNorthArabianScript: + case OldSouthArabianScript: + case OrkhonScript: + case PalmyreneScript: + case PhoenicianScript: + case PsalterPahlaviScript: + case SamaritanScript: + case SyriacScript: + case ThaanaScript: return Qt::RightToLeft; default: break; @@ -3318,7 +3504,19 @@ QString QCalendarBackend::dateTimeToString(QStringView format, const QDateTime & day = parts.day; } - int i = 0; + auto appendToResult = [&](int t, int repeat) { + auto data = locale.d->m_data; + if (repeat > 1) + result.append(data->longLongToString(t, -1, 10, repeat, QLocaleData::ZeroPadded)); + else + result.append(data->longLongToString(t)); + }; + + auto formatType = [](int repeat) { + return repeat == 3 ? QLocale::ShortFormat : QLocale::LongFormat; + }; + + qsizetype i = 0; while (i < format.size()) { if (format.at(i).unicode() == '\'') { result.append(qt_readEscapedFormatString(format, &i)); @@ -3326,7 +3524,9 @@ QString QCalendarBackend::dateTimeToString(QStringView format, const QDateTime & } const QChar c = format.at(i); - int repeat = qt_repeatCount(format.mid(i)); + qsizetype rep = qt_repeatCount(format.mid(i)); + Q_ASSERT(rep < std::numeric_limits<int>::max()); + int repeat = int(rep); bool used = false; if (formatDate) { switch (c.unicode()) { @@ -3338,15 +3538,11 @@ QString QCalendarBackend::dateTimeToString(QStringView format, const QDateTime & repeat = 2; switch (repeat) { - case 4: { - const int len = (year < 0) ? 5 : 4; - result.append(locale.d->m_data->longLongToString(year, -1, 10, len, - QLocaleData::ZeroPadded)); + case 4: + appendToResult(year, (year < 0) ? 5 : 4); break; - } case 2: - result.append(locale.d->m_data->longLongToString(year % 100, -1, 10, 2, - QLocaleData::ZeroPadded)); + appendToResult(year % 100, 2); break; default: repeat = 1; @@ -3358,43 +3554,20 @@ QString QCalendarBackend::dateTimeToString(QStringView format, const QDateTime & case 'M': used = true; repeat = qMin(repeat, 4); - switch (repeat) { - case 1: - result.append(locale.d->m_data->longLongToString(month)); - break; - case 2: - result.append(locale.d->m_data->longLongToString(month, -1, 10, 2, - QLocaleData::ZeroPadded)); - break; - case 3: - result.append(monthName(locale, month, year, QLocale::ShortFormat)); - break; - case 4: - result.append(monthName(locale, month, year, QLocale::LongFormat)); - break; - } + if (repeat <= 2) + appendToResult(month, repeat); + else + result.append(monthName(locale, month, year, formatType(repeat))); break; case 'd': used = true; repeat = qMin(repeat, 4); - switch (repeat) { - case 1: - result.append(locale.d->m_data->longLongToString(day)); - break; - case 2: - result.append(locale.d->m_data->longLongToString(day, -1, 10, 2, - QLocaleData::ZeroPadded)); - break; - case 3: - result.append(locale.dayName( - dayOfWeek(date.toJulianDay()), QLocale::ShortFormat)); - break; - case 4: - result.append(locale.dayName( - dayOfWeek(date.toJulianDay()), QLocale::LongFormat)); - break; - } + if (repeat <= 2) + appendToResult(day, repeat); + else + result.append( + locale.dayName(dayOfWeek(date.toJulianDay()), formatType(repeat))); break; default: @@ -3413,58 +3586,25 @@ QString QCalendarBackend::dateTimeToString(QStringView format, const QDateTime & else if (hour == 0) hour = 12; } - - switch (repeat) { - case 1: - result.append(locale.d->m_data->longLongToString(hour)); - break; - case 2: - result.append(locale.d->m_data->longLongToString(hour, -1, 10, 2, - QLocaleData::ZeroPadded)); - break; - } + appendToResult(hour, repeat); break; } case 'H': used = true; repeat = qMin(repeat, 2); - switch (repeat) { - case 1: - result.append(locale.d->m_data->longLongToString(time.hour())); - break; - case 2: - result.append(locale.d->m_data->longLongToString(time.hour(), -1, 10, 2, - QLocaleData::ZeroPadded)); - break; - } + appendToResult(time.hour(), repeat); break; case 'm': used = true; repeat = qMin(repeat, 2); - switch (repeat) { - case 1: - result.append(locale.d->m_data->longLongToString(time.minute())); - break; - case 2: - result.append(locale.d->m_data->longLongToString(time.minute(), -1, 10, 2, - QLocaleData::ZeroPadded)); - break; - } + appendToResult(time.minute(), repeat); break; case 's': used = true; repeat = qMin(repeat, 2); - switch (repeat) { - case 1: - result.append(locale.d->m_data->longLongToString(time.second())); - break; - case 2: - result.append(locale.d->m_data->longLongToString(time.second(), -1, 10, 2, - QLocaleData::ZeroPadded)); - break; - } + appendToResult(time.second(), repeat); break; case 'A': @@ -3485,13 +3625,12 @@ QString QCalendarBackend::dateTimeToString(QStringView format, const QDateTime & case 'z': used = true; - repeat = (repeat >= 3) ? 3 : 1; + repeat = qMin(repeat, 3); // note: the millisecond component is treated like the decimal part of the seconds // so ms == 2 is always printed as "002", but ms == 200 can be either "2" or "200" - result.append(locale.d->m_data->longLongToString(time.msec(), -1, 10, 3, - QLocaleData::ZeroPadded)); - if (repeat == 1) { + appendToResult(time.msec(), 3); + if (repeat != 3) { if (result.endsWith(locale.zeroDigit())) result.chop(1); if (result.endsWith(locale.zeroDigit())) @@ -3499,13 +3638,56 @@ QString QCalendarBackend::dateTimeToString(QStringView format, const QDateTime & } break; - case 't': + case 't': { + enum AbbrType { Long, Offset, Short }; + const auto tzAbbr = [locale](const QDateTime &when, AbbrType type) { +#if QT_CONFIG(timezone) + if (type != Short || locale != QLocale::system()) { + QTimeZone::NameType mode = + type == Short ? QTimeZone::ShortName + : type == Long ? QTimeZone::LongName : QTimeZone::OffsetName; + return when.timeRepresentation().displayName(when, mode, locale); + } // else: prefer QDateTime's abbreviation, for backwards-compatibility. +#endif // else, make do with non-localized abbreviation: + if (type != Offset) + return when.timeZoneAbbreviation(); + // For Offset, we can coerce to a UTC-based zone's abbreviation: + return when.toOffsetFromUtc(when.offsetFromUtc()).timeZoneAbbreviation(); + }; used = true; - repeat = 1; - // If we have a QDateTime use the time spec otherwise use the current system tzname - result.append(formatDate ? datetime.timeZoneAbbreviation() - : QDateTime::currentDateTime().timeZoneAbbreviation()); + repeat = qMin(repeat, 4); + // If we don't have a date-time, use the current system time: + const QDateTime when = formatDate ? datetime : QDateTime::currentDateTime(); + QString text; + switch (repeat) { + case 4: + text = tzAbbr(when, Long); + break; + case 3: // ±hh:mm + case 2: // ±hhmm (we'll remove the ':' at the end) + text = tzAbbr(when, Offset); + Q_ASSERT(text.startsWith("UTC"_L1)); // Need to strip this. + // The Qt::UTC case omits the zero offset: + text = (text.size() == 3 + ? u"+00:00"_s + : (text.size() <= 6 + // Whole-hour offsets may lack the zero minutes: + ? QStringView{text}.sliced(3) + ":00"_L1 + : std::move(text).sliced(3))); + if (repeat == 2) + text = text.remove(u':'); + break; + default: + text = tzAbbr(when, Short); + // UTC-offset zones only include minutes if non-zero. + if (text.startsWith("UTC"_L1) && text.size() == 6) + text += ":00"_L1; + break; + } + if (!text.isEmpty()) + result.append(text); break; + } default: break; @@ -3535,10 +3717,10 @@ QString QLocaleData::doubleToString(double d, int precision, DoubleForm form, width = 0; int decpt; - int bufSize = 1; + qsizetype bufSize = 1; if (precision == QLocale::FloatingPointShortest) bufSize += std::numeric_limits<double>::max_digits10; - else if (form == DFDecimal && qIsFinite(d)) + else if (form == DFDecimal && qt_is_finite(d)) bufSize += wholePartSpace(qAbs(d)) + precision; else // Add extra digit due to different interpretations of precision. bufSize += qMax(2, precision) + 1; // Must also be big enough for "nan" or "inf" @@ -3560,15 +3742,14 @@ QString QLocaleData::doubleToString(double d, int precision, DoubleForm form, if (zero == u"0") { // No need to convert digits. - Q_ASSERT(std::all_of(buf.cbegin(), buf.cbegin() + length, [](char ch) - { return '0' <= ch && ch <= '9'; })); + Q_ASSERT(std::all_of(buf.cbegin(), buf.cbegin() + length, isAsciiDigit)); // That check is taken care of in unicodeForDigits, below. } else if (zero.size() == 2 && zero.at(0).isHighSurrogate()) { const char32_t zeroUcs4 = QChar::surrogateToUcs4(zero.at(0), zero.at(1)); QString converted; converted.reserve(2 * digits.size()); - for (int i = 0; i < digits.length(); ++i) { - const char32_t digit = unicodeForDigit(digits.at(i).unicode() - '0', zeroUcs4); + for (QChar ch : std::as_const(digits)) { + const char32_t digit = unicodeForDigit(ch.unicode() - '0', zeroUcs4); Q_ASSERT(QChar::requiresSurrogates(digit)); converted.append(QChar::highSurrogate(digit)); converted.append(QChar::lowSurrogate(digit)); @@ -3579,7 +3760,7 @@ QString QLocaleData::doubleToString(double d, int precision, DoubleForm form, Q_ASSERT(!zero.at(0).isSurrogate()); char16_t z = zero.at(0).unicode(); char16_t *const value = reinterpret_cast<char16_t *>(digits.data()); - for (int i = 0; i < digits.length(); ++i) + for (qsizetype i = 0; i < digits.size(); ++i) value[i] = unicodeForDigit(value[i] - '0', z); } @@ -3587,81 +3768,78 @@ QString QLocaleData::doubleToString(double d, int precision, DoubleForm form, const bool groupDigits = flags & GroupDigits; const int minExponentDigits = flags & ZeroPadExponent ? 2 : 1; switch (form) { - case DFExponent: - numStr = exponentForm(std::move(digits), decpt, precision, PMDecimalDigits, - mustMarkDecimal, minExponentDigits); - break; - case DFDecimal: - numStr = decimalForm(std::move(digits), decpt, precision, PMDecimalDigits, - mustMarkDecimal, groupDigits); - break; - case DFSignificantDigits: { - PrecisionMode mode = (flags & AddTrailingZeroes) ? - PMSignificantDigits : PMChopTrailingZeros; - - /* POSIX specifies sprintf() to follow fprintf(), whose 'g/G' - format says; with P = 6 if precision unspecified else 1 if - precision is 0 else precision; when 'e/E' would have exponent - X, use: - * 'f/F' if P > X >= -4, with precision P-1-X - * 'e/E' otherwise, with precision P-1 - Helpfully, we already have mapped precision < 0 to 6 - except - for F.P.Shortest mode, which is its own story - and those of - our callers with unspecified precision either used 6 or -1 - for it. - */ - bool useDecimal; - if (precision == QLocale::FloatingPointShortest) { - // Find out which representation is shorter. - // Set bias to everything added to exponent form but not - // decimal, minus the converse. - - // Exponent adds separator, sign and digits: - int bias = 2 + minExponentDigits; - // Decimal form may get grouping separators inserted: - if (groupDigits && decpt >= m_grouping_top + m_grouping_least) - bias -= (decpt - m_grouping_top - m_grouping_least) / m_grouping_higher + 1; - // X = decpt - 1 needs two digits if decpt > 10: - if (decpt > 10 && minExponentDigits == 1) - ++bias; - // Assume digitCount < 95, so we can ignore the 3-digit - // exponent case (we'll set useDecimal false anyway). - - const int digitCount = digits.length() / zero.size(); - if (!mustMarkDecimal) { - // Decimal separator is skipped if at end; adjust if - // that happens for only one form: - if (digitCount <= decpt && digitCount > 1) - ++bias; // decimal but not exponent - else if (digitCount == 1 && decpt <= 0) - --bias; // exponent but not decimal - } - // When 0 < decpt <= digitCount, the forms have equal digit - // counts, plus things bias has taken into account; - // otherwise decimal form's digit count is right-padded with - // zeros to decpt, when decpt is positive, otherwise it's - // left-padded with 1 - decpt zeros. - useDecimal = (decpt <= 0 ? 1 - decpt <= bias - : decpt <= digitCount ? 0 <= bias - : decpt <= digitCount + bias); - } else { - // X == decpt - 1, POSIX's P; -4 <= X < P iff -4 < decpt <= P - Q_ASSERT(precision >= 0); - useDecimal = decpt > -4 && decpt <= (precision ? precision : 1); + case DFExponent: + numStr = exponentForm(std::move(digits), decpt, precision, PMDecimalDigits, + mustMarkDecimal, minExponentDigits); + break; + case DFDecimal: + numStr = decimalForm(std::move(digits), decpt, precision, PMDecimalDigits, + mustMarkDecimal, groupDigits); + break; + case DFSignificantDigits: { + PrecisionMode mode + = (flags & AddTrailingZeroes) ? PMSignificantDigits : PMChopTrailingZeros; + + /* POSIX specifies sprintf() to follow fprintf(), whose 'g/G' format + says; with P = 6 if precision unspecified else 1 if precision is + 0 else precision; when 'e/E' would have exponent X, use: + * 'f/F' if P > X >= -4, with precision P-1-X + * 'e/E' otherwise, with precision P-1 + Helpfully, we already have mapped precision < 0 to 6 - except for + F.P.Shortest mode, which is its own story - and those of our + callers with unspecified precision either used 6 or -1 for it. + */ + bool useDecimal; + if (precision == QLocale::FloatingPointShortest) { + // Find out which representation is shorter. + // Set bias to everything added to exponent form but not + // decimal, minus the converse. + + // Exponent adds separator, sign and digits: + int bias = 2 + minExponentDigits; + // Decimal form may get grouping separators inserted: + if (groupDigits && decpt >= m_grouping_top + m_grouping_least) + bias -= (decpt - m_grouping_least) / m_grouping_higher + 1; + // X = decpt - 1 needs two digits if decpt > 10: + if (decpt > 10 && minExponentDigits == 1) + ++bias; + // Assume digitCount < 95, so we can ignore the 3-digit + // exponent case (we'll set useDecimal false anyway). + + const qsizetype digitCount = digits.size() / zero.size(); + if (!mustMarkDecimal) { + // Decimal separator is skipped if at end; adjust if + // that happens for only one form: + if (digitCount <= decpt && digitCount > 1) + ++bias; // decimal but not exponent + else if (digitCount == 1 && decpt <= 0) + --bias; // exponent but not decimal } - - numStr = useDecimal - ? decimalForm(std::move(digits), decpt, precision, mode, - mustMarkDecimal, groupDigits) - : exponentForm(std::move(digits), decpt, precision, mode, - mustMarkDecimal, minExponentDigits); - break; + // When 0 < decpt <= digitCount, the forms have equal digit + // counts, plus things bias has taken into account; otherwise + // decimal form's digit count is right-padded with zeros to + // decpt, when decpt is positive, otherwise it's left-padded + // with 1 - decpt zeros. + useDecimal = (decpt <= 0 ? 1 - decpt <= bias + : decpt <= digitCount ? 0 <= bias : decpt <= digitCount + bias); + } else { + // X == decpt - 1, POSIX's P; -4 <= X < P iff -4 < decpt <= P + Q_ASSERT(precision >= 0); + useDecimal = decpt > -4 && decpt <= (precision ? precision : 1); } + + numStr = useDecimal + ? decimalForm(std::move(digits), decpt, precision, mode, + mustMarkDecimal, groupDigits) + : exponentForm(std::move(digits), decpt, precision, mode, + mustMarkDecimal, minExponentDigits); + break; + } } // Pad with zeros. LeftAdjusted overrides ZeroPadded. if (flags & ZeroPadded && !(flags & LeftAdjusted)) { - for (int i = numStr.length() / zero.length() + prefix.size(); i < width; ++i) + for (qsizetype i = numStr.size() / zero.size() + prefix.size(); i < width; ++i) numStr.prepend(zero); } } @@ -3684,33 +3862,33 @@ QString QLocaleData::decimalForm(QString &&digits, int decpt, int precision, for (; decpt < 0; ++decpt) digits.prepend(zero); } else { - for (int i = digits.length() / digitWidth; i < decpt; ++i) + for (qsizetype i = digits.size() / digitWidth; i < decpt; ++i) digits.append(zero); } switch (pm) { case PMDecimalDigits: - for (int i = digits.length() / digitWidth - decpt; i < precision; ++i) + for (qsizetype i = digits.size() / digitWidth - decpt; i < precision; ++i) digits.append(zero); break; case PMSignificantDigits: - for (int i = digits.length() / digitWidth; i < precision; ++i) + for (qsizetype i = digits.size() / digitWidth; i < precision; ++i) digits.append(zero); break; case PMChopTrailingZeros: - Q_ASSERT(digits.length() / digitWidth <= qMax(decpt, 1) || !digits.endsWith(zero)); + Q_ASSERT(digits.size() / digitWidth <= qMax(decpt, 1) || !digits.endsWith(zero)); break; } - if (mustMarkDecimal || decpt < digits.length() / digitWidth) + if (mustMarkDecimal || decpt < digits.size() / digitWidth) digits.insert(decpt * digitWidth, decimalPoint()); if (groupDigits) { const QString group = groupSeparator(); - int i = decpt - m_grouping_least; + qsizetype i = decpt - m_grouping_least; if (i >= m_grouping_top) { digits.insert(i * digitWidth, group); - while ((i -= m_grouping_higher) >= m_grouping_top) + while ((i -= m_grouping_higher) > 0) digits.insert(i * digitWidth, group); } } @@ -3732,19 +3910,19 @@ QString QLocaleData::exponentForm(QString &&digits, int decpt, int precision, switch (pm) { case PMDecimalDigits: - for (int i = digits.length() / digitWidth; i < precision + 1; ++i) + for (qsizetype i = digits.size() / digitWidth; i < precision + 1; ++i) digits.append(zero); break; case PMSignificantDigits: - for (int i = digits.length() / digitWidth; i < precision; ++i) + for (qsizetype i = digits.size() / digitWidth; i < precision; ++i) digits.append(zero); break; case PMChopTrailingZeros: - Q_ASSERT(digits.length() / digitWidth <= 1 || !digits.endsWith(zero)); + Q_ASSERT(digits.size() / digitWidth <= 1 || !digits.endsWith(zero)); break; } - if (mustMarkDecimal || digits.length() > digitWidth) + if (mustMarkDecimal || digits.size() > digitWidth) digits.insert(digitWidth, decimalPoint()); digits.append(exponentSeparator()); @@ -3793,7 +3971,7 @@ QString QLocaleData::applyIntegerFormatting(QString &&numStr, bool negative, int { const QString zero = base == 10 ? zeroDigit() : QStringLiteral("0"); const auto digitWidth = zero.size(); - const auto digitCount = numStr.length() / digitWidth; + const auto digitCount = numStr.size() / digitWidth; const auto basePrefix = [&] () -> QStringView { if (flags & ShowBase) { @@ -3810,15 +3988,15 @@ QString QLocaleData::applyIntegerFormatting(QString &&numStr, bool negative, int const QString prefix = signPrefix(negative, flags) + basePrefix; // Count how much of width we've used up. Each digit counts as one - int usedWidth = digitCount + prefix.size(); + qsizetype usedWidth = digitCount + prefix.size(); if (base == 10 && flags & GroupDigits) { const QString group = groupSeparator(); - int i = digitCount - m_grouping_least; + qsizetype i = digitCount - m_grouping_least; if (i >= m_grouping_top) { numStr.insert(i * digitWidth, group); ++usedWidth; - while ((i -= m_grouping_higher) >= m_grouping_top) { + while ((i -= m_grouping_higher) > 0) { numStr.insert(i * digitWidth, group); ++usedWidth; } @@ -3830,7 +4008,7 @@ QString QLocaleData::applyIntegerFormatting(QString &&numStr, bool negative, int if (noPrecision) precision = 1; - for (int i = numStr.length(); i < precision; ++i) { + for (qsizetype i = numStr.size(); i < precision; ++i) { numStr.prepend(zero); usedWidth++; } @@ -3838,7 +4016,7 @@ QString QLocaleData::applyIntegerFormatting(QString &&numStr, bool negative, int // LeftAdjusted overrides ZeroPadded; and sprintf() only pads when // precision is not specified in the format string. if (noPrecision && flags & ZeroPadded && !(flags & LeftAdjusted)) { - for (int i = usedWidth; i < width; ++i) + for (qsizetype i = usedWidth; i < width; ++i) numStr.prepend(zero); } @@ -3848,49 +4026,259 @@ QString QLocaleData::applyIntegerFormatting(QString &&numStr, bool negative, int return result; } +inline QLocaleData::NumericData QLocaleData::numericData(QLocaleData::NumberMode mode) const +{ + NumericData result; + if (this == c()) { + result.isC = true; + return result; + } + result.setZero(zero().viewData(single_character_data)); + result.group = groupDelim().viewData(single_character_data); + // Note: minus, plus and exponent might not actually be single characters. + result.minus = minus().viewData(single_character_data); + result.plus = plus().viewData(single_character_data); + if (mode != IntegerMode) + result.decimal = decimalSeparator().viewData(single_character_data); + if (mode == DoubleScientificMode) { + result.exponent = exponential().viewData(single_character_data); + // exponentCyrillic means "apply the Cyrrilic-specific exponent hack" + result.exponentCyrillic = m_script_id == QLocale::CyrillicScript; + } +#ifndef QT_NO_SYSTEMLOCALE + if (this == &systemLocaleData) { + const auto getString = [sys = systemLocale()](QSystemLocale::QueryType query) { + return sys->query(query).toString(); + }; + if (mode != IntegerMode) { + result.sysDecimal = getString(QSystemLocale::DecimalPoint); + if (result.sysDecimal.size()) + result.decimal = QStringView{result.sysDecimal}; + } + result.sysGroup = getString(QSystemLocale::GroupSeparator); + if (result.sysGroup.size()) + result.group = QStringView{result.sysGroup}; + result.sysMinus = getString(QSystemLocale::NegativeSign); + if (result.sysMinus.size()) + result.minus = QStringView{result.sysMinus}; + result.sysPlus = getString(QSystemLocale::PositiveSign); + if (result.sysPlus.size()) + result.plus = QStringView{result.sysPlus}; + result.setZero(getString(QSystemLocale::ZeroDigit)); + } +#endif + + return result; +} + +namespace { +// A bit like QStringIterator but rather specialized ... and some of the tokens +// it recognizes aren't single Unicode code-points (but it does map each to a +// single character). +class NumericTokenizer +{ + // TODO: use deterministic finite-state-automata. + // TODO QTBUG-95460: CLDR has Inf/NaN representations per locale. + static constexpr char lettersInfNaN[] = "afin"; // Letters of Inf, NaN + static constexpr auto matchInfNaN = QtPrivate::makeCharacterSetMatch<lettersInfNaN>(); + const QStringView m_text; + const QLocaleData::NumericData m_guide; + qsizetype m_index = 0; + const QLocaleData::NumberMode m_mode; + static_assert('+' + 1 == ',' && ',' + 1 == '-' && '-' + 1 == '.'); + char lastMark; // C locale accepts '+' through lastMark. +public: + NumericTokenizer(QStringView text, QLocaleData::NumericData &&guide, + QLocaleData::NumberMode mode) + : m_text(text), m_guide(guide), m_mode(mode), + lastMark(mode == QLocaleData::IntegerMode ? '-' : '.') + { + Q_ASSERT(m_guide.isValid(mode)); + } + bool done() const { return !(m_index < m_text.size()); } + qsizetype index() const { return m_index; } + inline int asBmpDigit(char16_t digit) const; + char nextToken(); +}; + +int NumericTokenizer::asBmpDigit(char16_t digit) const +{ + // If digit *is* a digit, result will be in range 0 through 9; otherwise not. + // Must match qlocale_tools.h's unicodeForDigit() + if (m_guide.zeroUcs != u'\u3007' || digit == m_guide.zeroUcs) + return digit - m_guide.zeroUcs; + + // QTBUG-85409: Suzhou's digits aren't contiguous ! + if (digit == u'\u3020') // U+3020 POSTAL MARK FACE is not a digit. + return -1; + // ... but is followed by digits 1 through 9. + return digit - u'\u3020'; +} + +char NumericTokenizer::nextToken() +{ + // As long as caller stops iterating on a zero return, those don't need to + // keep m_index correctly updated. + Q_ASSERT(!done()); + // Mauls non-letters above 'Z' but we don't care: + const auto asciiLower = [](unsigned char c) { return c >= 'A' ? c | 0x20 : c; }; + const QStringView tail = m_text.sliced(m_index); + const QChar ch = tail.front(); + if (ch == u'\u2212') { + // Special case: match the "proper" minus sign, for all locales. + ++m_index; + return '-'; + } + if (m_guide.isC) { + // "Conversion" to C locale is just a filter: + ++m_index; + if (Q_LIKELY(ch.unicode() < 256)) { + unsigned char ascii = asciiLower(ch.toLatin1()); + if (Q_LIKELY(isAsciiDigit(ascii) || ('+' <= ascii && ascii <= lastMark) + // No caller presently (6.5) passes DoubleStandardMode, + // so !IntegerMode implies scientific, for now. + || (m_mode != QLocaleData::IntegerMode + && matchInfNaN.matches(ascii)) + || (m_mode == QLocaleData::DoubleScientificMode + && ascii == 'e'))) { + return ascii; + } + } + return 0; + } + if (ch.unicode() < 256) { + // Accept the C locale's digits and signs in all locales: + char ascii = asciiLower(ch.toLatin1()); + if (isAsciiDigit(ascii) || ascii == '-' || ascii == '+' + // Also its Inf and NaN letters: + || (m_mode != QLocaleData::IntegerMode && matchInfNaN.matches(ascii))) { + ++m_index; + return ascii; + } + } + + // Other locales may be trickier: + if (tail.startsWith(m_guide.minus)) { + m_index += m_guide.minus.size(); + return '-'; + } + if (tail.startsWith(m_guide.plus)) { + m_index += m_guide.plus.size(); + return '+'; + } + if (!m_guide.group.isEmpty() && tail.startsWith(m_guide.group)) { + m_index += m_guide.group.size(); + return ','; + } + if (m_mode != QLocaleData::IntegerMode && tail.startsWith(m_guide.decimal)) { + m_index += m_guide.decimal.size(); + return '.'; + } + if (m_mode == QLocaleData::DoubleScientificMode + && tail.startsWith(m_guide.exponent, Qt::CaseInsensitive)) { + m_index += m_guide.exponent.size(); + return 'e'; + } + + // Must match qlocale_tools.h's unicodeForDigit() + if (m_guide.zeroLen == 1) { + if (!ch.isSurrogate()) { + const uint gap = asBmpDigit(ch.unicode()); + if (gap < 10u) { + ++m_index; + return '0' + gap; + } + } else if (ch.isHighSurrogate() && tail.size() > 1 && tail.at(1).isLowSurrogate()) { + return 0; + } + } else if (ch.isHighSurrogate()) { + // None of the corner cases below matches a surrogate, so (update + // already and) return early if we don't have a digit. + if (tail.size() > 1) { + QChar low = tail.at(1); + if (low.isLowSurrogate()) { + m_index += 2; + const uint gap = QChar::surrogateToUcs4(ch, low) - m_guide.zeroUcs; + return gap < 10u ? '0' + gap : 0; + } + } + return 0; + } + + // All cases where tail starts with properly-matched surrogate pair + // have been handled by this point. + Q_ASSERT(!(ch.isHighSurrogate() && tail.size() > 1 && tail.at(1).isLowSurrogate())); + + // Weird corner cases follow (code above assumes these match no surrogates). + + // Some locales use a non-breaking space (U+00A0) or its thin version + // (U+202f) for grouping. These look like spaces, so people (and thus some + // of our tests) use a regular space instead and complain if it doesn't + // work. + // Should this be extended generally to any case where group is a space ? + if ((m_guide.group == u"\u00a0" || m_guide.group == u"\u202f") && tail.startsWith(u' ')) { + ++m_index; + return ','; + } + + // Cyrillic has its own E, used by Ukrainian as exponent; but others + // writing Cyrillic may well use that; and Ukrainians might well use E. + // All other Cyrillic locales (officially) use plain ASCII E. + if (m_guide.exponentCyrillic // Only true in scientific float mode. + && (tail.startsWith(u"\u0415", Qt::CaseInsensitive) + || tail.startsWith(u"E", Qt::CaseInsensitive))) { + ++m_index; + return 'e'; + } + + return 0; +} +} // namespace with no name + /* - Converts a number in locale to its representation in the C locale. - Only has to guarantee that a string that is a correct representation of - a number will be converted. If junk is passed in, junk will be passed - out and the error will be detected during the actual conversion to a - number. We can't detect junk here, since we don't even know the base - of the number. + Converts a number in locale representation to the C locale equivalent. + + Only has to guarantee that a string that is a correct representation of a + number will be converted. Checks signs, separators and digits appear in all + the places they should, and nowhere else. + + Returns true precisely if the number appears to be well-formed, modulo + things a parser for C Locale strings (without digit-grouping separators; + they're stripped) will catch. When it returns true, it records (and + '\0'-terminates) the C locale representation in *result. + + Note: only QString integer-parsing methods have a base parameter (hence need + to cope with letters as possible digits); but these are now all routed via + byteArrayToU?LongLong(), so no longer come via here. The QLocale + number-parsers only work in decimal, so don't have to cope with any digits + other than 0 through 9. */ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_options, - CharBuff *result) const + NumberMode mode, CharBuff *result) const { s = s.trimmed(); if (s.size() < 1) return false; + NumericTokenizer tokens(s, numericData(mode), mode); + + // Digit-grouping details (all modes): + qsizetype digitsInGroup = 0; + qsizetype last_separator_idx = -1; + qsizetype start_of_digits_idx = -1; + + // Floating-point details (non-integer modes): + qsizetype decpt_idx = -1; + qsizetype exponent_idx = -1; + + char last = '\0'; + while (!tokens.done()) { + qsizetype idx = tokens.index(); // before nextToken() advances + char out = tokens.nextToken(); + if (out == 0) + return false; + Q_ASSERT(tokens.index() > idx); // it always *should* advance (except on zero return) - const QChar *uc = s.data(); - auto length = s.size(); - decltype(length) idx = 0; - - int digitsInGroup = 0; - int decpt_idx = -1; - int last_separator_idx = -1; - int start_of_digits_idx = -1; - int exponent_idx = -1; - - while (idx < length) { - const QStringView in = QStringView(uc + idx, uc[idx].isHighSurrogate() ? 2 : 1); - - char out = numericToCLocale(in); - if (out == 0) { - const QChar simple = in.size() == 1 ? in.front() : QChar::Null; - if (in == listSeparator()) - out = ';'; - else if (in == percentSign()) - out = '%'; - // for handling base-x numbers - else if (simple.toLatin1() >= 'A' && simple.toLatin1() <= 'Z') - out = simple.toLower().toLatin1(); - else if (simple.toLatin1() >= 'a' && simple.toLatin1() <= 'z') - out = simple.toLatin1(); - else - break; - } else if (out == '.') { + if (out == '.') { // Fail if more than one decimal point or point after e if (decpt_idx != -1 || exponent_idx != -1) return false; @@ -3899,26 +4287,26 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o exponent_idx = idx; } - if (number_options & QLocale::RejectLeadingZeroInExponent) { - if (exponent_idx != -1 && out == '0' && idx < length - 1) { - // After the exponent there can only be '+', '-' or digits. - // If we find a '0' directly after some non-digit, then that is a leading zero. - if (result->last() < '0' || result->last() > '9') - return false; - } + if (number_options.testFlag(QLocale::RejectLeadingZeroInExponent) + && exponent_idx != -1 && out == '0') { + // After the exponent there can only be '+', '-' or digits. + // If we find a '0' directly after some non-digit, then that is a + // leading zero, acceptable only if it is the whole exponent. + if (!tokens.done() && !isAsciiDigit(last)) + return false; } - if (number_options & QLocale::RejectTrailingZeroesAfterDot) { - // If we've seen a decimal point and the last character after the exponent is 0, then - // that is a trailing zero. - if (decpt_idx >= 0 && idx == exponent_idx && result->last() == '0') - return false; + if (number_options.testFlag(QLocale::RejectTrailingZeroesAfterDot) && decpt_idx >= 0) { + // In a fractional part, a 0 just before the exponent is trailing: + if (idx == exponent_idx && last == '0') + return false; } - if (!(number_options & QLocale::RejectGroupSeparator)) { - if (start_of_digits_idx == -1 && out >= '0' && out <= '9') { - start_of_digits_idx = idx; - digitsInGroup++; + if (!number_options.testFlag(QLocale::RejectGroupSeparator)) { + if (isAsciiDigit(out)) { + if (start_of_digits_idx == -1) + start_of_digits_idx = idx; + ++digitsInGroup; } else if (out == ',') { // Don't allow group chars after the decimal point or exponent if (decpt_idx != -1 || exponent_idx != -1) @@ -3927,7 +4315,7 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o if (last_separator_idx == -1) { // Check distance from the beginning of the digits: if (start_of_digits_idx == -1 || m_grouping_top > digitsInGroup - || digitsInGroup >= m_grouping_higher + m_grouping_top) { + || digitsInGroup >= m_grouping_least + m_grouping_top) { return false; } } else { @@ -3938,65 +4326,57 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o last_separator_idx = idx; digitsInGroup = 0; - - // don't add the group separator - idx += in.size(); - continue; - } else if (out == '.' || idx == exponent_idx) { - // Were there enough digits since the last separator? - if (last_separator_idx != -1 && digitsInGroup != m_grouping_least) + } else if (mode != IntegerMode && (out == '.' || idx == exponent_idx) + && last_separator_idx != -1) { + // Were there enough digits since the last group separator? + if (digitsInGroup != m_grouping_least) return false; - // If we saw no separator, should we fail if - // digitsInGroup > m_grouping_top + m_grouping_least ? // stop processing separators last_separator_idx = -1; - } else if (out >= '0' && out <= '9') { - digitsInGroup++; } + } else if (out == ',') { + return false; } - result->append(out); - idx += in.size(); + last = out; + if (out != ',') // Leave group separators out of the result. + result->append(out); } - if (!(number_options & QLocale::RejectGroupSeparator)) { - // group separator post-processing - // did we end in a separator? - if (last_separator_idx + 1 == idx) - return false; - // Were there enough digits since the last separator? - if (last_separator_idx != -1 && digitsInGroup != m_grouping_least) + if (!number_options.testFlag(QLocale::RejectGroupSeparator) && last_separator_idx != -1) { + // Were there enough digits since the last group separator? + if (digitsInGroup != m_grouping_least) return false; - // If we saw no separator, and no decimal point, should we fail if - // digitsInGroup > m_grouping_top + m_grouping_least ? } - if (number_options & QLocale::RejectTrailingZeroesAfterDot) { - // In decimal form, the last character can be a trailing zero if we've seen a decpt. - if (decpt_idx != -1 && exponent_idx == -1 && result->last() == '0') + if (number_options.testFlag(QLocale::RejectTrailingZeroesAfterDot) + && decpt_idx != -1 && exponent_idx == -1) { + // In the fractional part, a final zero is trailing: + if (last == '0') return false; } result->append('\0'); - return idx == length; + return true; } -bool QLocaleData::validateChars(QStringView str, NumberMode numMode, QByteArray *buff, - int decDigits, QLocale::NumberOptions number_options) const +ParsingResult +QLocaleData::validateChars(QStringView str, NumberMode numMode, int decDigits, + QLocale::NumberOptions number_options) const { - buff->clear(); - buff->reserve(str.length()); + ParsingResult result; + result.buff.reserve(str.size()); enum { Whole, Fractional, Exponent } state = Whole; const bool scientific = numMode == DoubleScientificMode; - char last = 0; + NumericTokenizer tokens(str, numericData(numMode), numMode); + char last = '\0'; - for (qsizetype i = 0; i < str.size();) { - const QStringView in = str.mid(i, str.at(i).isHighSurrogate() ? 2 : 1); - char c = numericToCLocale(in); + while (!tokens.done()) { + char c = tokens.nextToken(); - if (c >= '0' && c <= '9') { + if (isAsciiDigit(c)) { switch (state) { case Whole: // Nothing special to do (unless we want to check grouping sizes). @@ -4004,185 +4384,149 @@ bool QLocaleData::validateChars(QStringView str, NumberMode numMode, QByteArray case Fractional: // If a double has too many digits in its fractional part it is Invalid. if (decDigits-- == 0) - return false; + return {}; break; case Exponent: - if (last < '0' || last > '9') { + if (!isAsciiDigit(last)) { // This is the first digit in the exponent (there may have beena '+' // or '-' in before). If it's a zero, the exponent is zero-padded. if (c == '0' && (number_options & QLocale::RejectLeadingZeroInExponent)) - return false; + return {}; } break; } } else { switch (c) { - case '.': - // If an integer has a decimal point, it is Invalid. - // A double can only have one, at the end of its whole-number part. - if (numMode == IntegerMode || state != Whole) - return false; - // Even when decDigits is 0, we do allow the decimal point to be - // present - just as long as no digits follow it. - - state = Fractional; - break; + case '.': + // If an integer has a decimal point, it is Invalid. + // A double can only have one, at the end of its whole-number part. + if (numMode == IntegerMode || state != Whole) + return {}; + // Even when decDigits is 0, we do allow the decimal point to be + // present - just as long as no digits follow it. + + state = Fractional; + break; - case '+': - case '-': - // A sign can only appear at the start or after the e of scientific: - if (i != 0 && !(scientific && last == 'e')) - return false; - break; + case '+': + case '-': + // A sign can only appear at the start or after the e of scientific: + if (last != '\0' && !(scientific && last == 'e')) + return {}; + break; - case ',': - // Grouping is only allowed after a digit in the whole-number portion: - if ((number_options & QLocale::RejectGroupSeparator) || state != Whole - || last < '0' || last > '9') { - return false; - } - // We could check grouping sizes are correct, but fixup()s are - // probably better off correcting any misplacement instead. - break; + case ',': + // Grouping is only allowed after a digit in the whole-number portion: + if ((number_options & QLocale::RejectGroupSeparator) || state != Whole + || !isAsciiDigit(last)) { + return {}; + } + // We could check grouping sizes are correct, but fixup()s are + // probably better off correcting any misplacement instead. + break; - case 'e': - // Only one e is allowed and only in scientific: - if (!scientific || state == Exponent) - return false; - state = Exponent; - break; + case 'e': + // Only one e is allowed and only in scientific: + if (!scientific || state == Exponent) + return {}; + state = Exponent; + break; - default: - // Nothing else can validly appear in a number. - // In fact, numericToCLocale() must have returned 0. If anyone changes - // it to return something else, we probably need to handle it here ! - Q_ASSERT(!c); - return false; + default: + // Nothing else can validly appear in a number. + // NumericTokenizer allows letters of "inf" and "nan", but + // validators don't accept those values. + // For anything else, tokens.nextToken() must have returned 0. + Q_ASSERT(!c || c == 'a' || c == 'f' || c == 'i' || c == 'n'); + return {}; } } last = c; if (c != ',') // Skip grouping - buff->append(c); - i += in.size(); + result.buff.append(c); } - return true; + result.state = ParsingResult::Acceptable; + + // Intermediate if it ends with any character that requires a digit after + // it to be valid e.g. group separator, sign, or exponent + if (last == ',' || last == '-' || last == '+' || last == 'e') + result.state = ParsingResult::Intermediate; + + return result; } double QLocaleData::stringToDouble(QStringView str, bool *ok, QLocale::NumberOptions number_options) const { CharBuff buff; - if (!numberToCLocale(str, number_options, &buff)) { + if (!numberToCLocale(str, number_options, DoubleScientificMode, &buff)) { if (ok != nullptr) *ok = false; return 0.0; } - int processed = 0; - bool nonNullOk = false; - double d = qt_asciiToDouble(buff.constData(), buff.length() - 1, nonNullOk, processed); + auto r = qt_asciiToDouble(buff.constData(), buff.size() - 1); if (ok != nullptr) - *ok = nonNullOk; - return d; + *ok = r.ok(); + return r.result; } -qlonglong QLocaleData::stringToLongLong(QStringView str, int base, bool *ok, - QLocale::NumberOptions number_options) const +QSimpleParsedNumber<qint64> +QLocaleData::stringToLongLong(QStringView str, int base, + QLocale::NumberOptions number_options) const { CharBuff buff; - if (!numberToCLocale(str, number_options, &buff)) { - if (ok != nullptr) - *ok = false; - return 0; - } + if (!numberToCLocale(str, number_options, IntegerMode, &buff)) + return {}; - return bytearrayToLongLong(QByteArrayView(buff.constData(), buff.size()), base, ok); + return bytearrayToLongLong(QByteArrayView(buff), base); } -qulonglong QLocaleData::stringToUnsLongLong(QStringView str, int base, bool *ok, - QLocale::NumberOptions number_options) const +QSimpleParsedNumber<quint64> +QLocaleData::stringToUnsLongLong(QStringView str, int base, + QLocale::NumberOptions number_options) const { CharBuff buff; - if (!numberToCLocale(str, number_options, &buff)) { - if (ok != nullptr) - *ok = false; - return 0; - } + if (!numberToCLocale(str, number_options, IntegerMode, &buff)) + return {}; - return bytearrayToUnsLongLong(QByteArrayView(buff.constData(), buff.size()), base, ok); + return bytearrayToUnsLongLong(QByteArrayView(buff), base); } -qlonglong QLocaleData::bytearrayToLongLong(QByteArrayView num, int base, bool *ok) +static bool checkParsed(QByteArrayView num, qsizetype used) { - if (num.isEmpty() || num.at(0) == '\0') { - if (ok != nullptr) - *ok = false; - return 0; - } - - bool _ok; - const char *endptr; - const qlonglong l = qstrntoll(num.data(), num.size(), &endptr, base, &_ok); - - if (!_ok || endptr == num.data()) { - if (ok != nullptr) - *ok = false; - return 0; - } + if (used <= 0) + return false; - const char *const stop = num.end(); - if (endptr < stop && *endptr != '\0') { - while (endptr < stop && ascii_isspace(*endptr)) - ++endptr; + const qsizetype len = num.size(); + if (used < len && num[used] != '\0') { + while (used < len && ascii_isspace(num[used])) + ++used; } - if (endptr < stop && *endptr != '\0') { + if (used < len && num[used] != '\0') // we stopped at a non-digit character after converting some digits - if (ok != nullptr) - *ok = false; - return 0; - } + return false; - if (ok != nullptr) - *ok = true; - return l; + return true; } -qulonglong QLocaleData::bytearrayToUnsLongLong(QByteArrayView num, int base, bool *ok) +QSimpleParsedNumber<qint64> QLocaleData::bytearrayToLongLong(QByteArrayView num, int base) { - if (num.isEmpty() || num.at(0) == '\0') { - if (ok != nullptr) - *ok = false; - return 0; - } - - bool _ok; - const char *endptr; - const qulonglong l = qstrntoull(num.data(), num.size(), &endptr, base, &_ok); - - if (!_ok || endptr == num.data()) { - if (ok != nullptr) - *ok = false; - return 0; - } - - const char *const stop = num.end(); - if (endptr < stop && *endptr != '\0') { - while (endptr < stop && ascii_isspace(*endptr)) - ++endptr; - } - - if (endptr < stop && *endptr != '\0') { - if (ok != nullptr) - *ok = false; - return 0; - } + auto r = qstrntoll(num.data(), num.size(), base); + if (!checkParsed(num, r.used)) + return {}; + return r; +} - if (ok != nullptr) - *ok = true; - return l; +QSimpleParsedNumber<quint64> QLocaleData::bytearrayToUnsLongLong(QByteArrayView num, int base) +{ + auto r = qstrntoull(num.data(), num.size(), base); + if (!checkParsed(num, r.used)) + return {}; + return r; } /*! @@ -4201,7 +4545,7 @@ qulonglong QLocaleData::bytearrayToUnsLongLong(QByteArrayView num, int base, boo \since 4.8 Returns a currency symbol according to the \a format. */ -QString QLocale::currencySymbol(QLocale::CurrencySymbolFormat format) const +QString QLocale::currencySymbol(CurrencySymbolFormat format) const { #ifndef QT_NO_SYSTEMLOCALE if (d->m_data == &systemLocaleData) { @@ -4218,7 +4562,7 @@ QString QLocale::currencySymbol(QLocale::CurrencySymbolFormat format) const case CurrencyIsoCode: { const char *code = d->m_data->m_currency_iso_code; if (auto len = qstrnlen(code, 3)) - return QString::fromLatin1(code, int(len)); + return QString::fromLatin1(code, qsizetype(len)); break; } } @@ -4252,7 +4596,7 @@ QString QLocale::toCurrencyString(qlonglong value, const QString &symbol) const QString str = toString(value); QString sym = symbol.isNull() ? currencySymbol() : symbol; if (sym.isEmpty()) - sym = currencySymbol(QLocale::CurrencyIsoCode); + sym = currencySymbol(CurrencyIsoCode); return range.viewData(currency_format_data).arg(str, sym); } @@ -4274,7 +4618,7 @@ QString QLocale::toCurrencyString(qulonglong value, const QString &symbol) const QString str = toString(value); QString sym = symbol.isNull() ? currencySymbol() : symbol; if (sym.isEmpty()) - sym = currencySymbol(QLocale::CurrencyIsoCode); + sym = currencySymbol(CurrencyIsoCode); return d->m_data->currencyFormat().getData(currency_format_data).arg(str, sym); } @@ -4307,7 +4651,7 @@ QString QLocale::toCurrencyString(double value, const QString &symbol, int preci QString str = toString(value, 'f', precision == -1 ? d->m_data->m_currency_digits : precision); QString sym = symbol.isNull() ? currencySymbol() : symbol; if (sym.isEmpty()) - sym = currencySymbol(QLocale::CurrencyIsoCode); + sym = currencySymbol(CurrencyIsoCode); return range.viewData(currency_format_data).arg(str, sym); } @@ -4381,46 +4725,78 @@ QString QLocale::formattedDataSize(qint64 bytes, int precision, DataSizeFormats /*! \since 4.8 + \brief List of locale names for use in selecting translations - Returns an ordered list of locale names for translation purposes in - preference order (like "en-Latn-US", "en-US", "en"). + Each entry in the returned list is the name of a locale suitable to the + user's preferences for what to translate the UI into. Where a name in the + list is composed of several tags, they are joined as indicated by \a + separator. Prior to Qt 6.7 a dash was used as separator. - The return value represents locale names that the user expects to see the - UI translation in. + For example, using the default separator QLocale::TagSeparator::Dash, if the + user has configured their system to use English as used in the USA, the list + would be "en-Latn-US", "en-US", "en". The order of entries is the order in + which to check for translations; earlier items in the list are to be + preferred over later ones. If your translation files use underscores, rather + than dashes, to separate locale tags, pass QLocale::TagSeparator::Underscore + as \a separator. Most likely you do not need to use this function directly, but just pass the QLocale object to the QTranslator::load() function. - Earlier items in the list are to be preferred over later ones. - \sa QTranslator, bcp47Name() */ -QStringList QLocale::uiLanguages() const +QStringList QLocale::uiLanguages(TagSeparator separator) const { + const char sep = char(separator); QStringList uiLanguages; - QList<QLocale> locales; + if (uchar(sep) > 0x7f) { + badSeparatorWarning("uiLanguages", sep); + return uiLanguages; + } + QList<QLocaleId> localeIds; #ifdef QT_NO_SYSTEMLOCALE constexpr bool isSystem = false; #else const bool isSystem = d->m_data == &systemLocaleData; if (isSystem) { uiLanguages = systemLocale()->query(QSystemLocale::UILanguages).toStringList(); - // ... but we need to include likely-adjusted forms of each of those, too: + if (separator != TagSeparator::Dash) { + // Map from default separator, Dash, used by backends: + const QChar join = QLatin1Char(sep); + uiLanguages = uiLanguages.replaceInStrings(u"-", QStringView(&join, 1)); + } + // ... but we need to include likely-adjusted forms of each of those, too. + // For now, collect up locale Ids representing the entries, for later processing: for (const auto &entry : std::as_const(uiLanguages)) - locales.append(QLocale(entry)); - if (locales.isEmpty()) - locales.append(systemLocale()->fallbackLocale()); + localeIds.append(QLocaleId::fromName(entry)); + if (localeIds.isEmpty()) + localeIds.append(systemLocale()->fallbackLocale().d->m_data->id()); + // If the system locale (isn't C and) didn't include itself in the list, + // or as fallback, presume to know better than it and put its name + // first. (Known issue, QTBUG-104930, on some macOS versions when in + // locale en_DE.) Our translation system might have a translation for a + // locale the platform doesn't believe in. + const QString name = bcp47Name(separator); + if (!name.isEmpty() && language() != C && !uiLanguages.contains(name)) { + // That uses contains(name) as a cheap pre-test, but there may be an + // entry that matches this on purging likely subtags. + const QLocaleId mine = d->m_data->id().withLikelySubtagsRemoved(); + const auto isMine = [mine](const QString &entry) { + return QLocaleId::fromName(entry).withLikelySubtagsRemoved() == mine; + }; + if (std::none_of(uiLanguages.constBegin(), uiLanguages.constEnd(), isMine)) { + localeIds.prepend(d->m_data->id()); + uiLanguages.prepend(name); + } + } } else #endif { - locales.append(*this); + localeIds.append(d->m_data->id()); } - for (int i = locales.size(); i-- > 0; ) { - const QLocale &locale = locales.at(i); - const auto data = locale.d->m_data; - QLocaleId id = data->id(); - - int j; + for (qsizetype i = localeIds.size(); i-- > 0; ) { + QLocaleId id = localeIds.at(i); + qsizetype j; QByteArray prior; if (isSystem && i < uiLanguages.size()) { // Adding likely-adjusted forms to system locale's list. @@ -4430,35 +4806,49 @@ QStringList QLocale::uiLanguages() const j = i + 1; } else if (id.language_id == C) { // Attempt no likely sub-tag amendments to C: - uiLanguages.append(locale.name()); + uiLanguages.append(QString::fromLatin1(id.name(sep))); continue; } else { // Plain locale or empty system uiLanguages; just append. - const QString name = locale.bcp47Name(); - uiLanguages.append(name); - prior = name.toLatin1(); + prior = id.name(sep); + uiLanguages.append(QString::fromLatin1(prior)); j = uiLanguages.size(); } const QLocaleId max = id.withLikelySubtagsAdded(); const QLocaleId min = max.withLikelySubtagsRemoved(); - id.script_id = 0; // For re-use as script-less variant. // Include minimal version (last) unless it's what our locale is derived from: - if (min.name() != prior) - uiLanguages.insert(j, QString::fromLatin1(min.name())); + if (auto name = min.name(sep); name != prior) + uiLanguages.insert(j, QString::fromLatin1(name)); else if (!isSystem) --j; // bcp47Name() matches min(): put more specific forms *before* it. - // Include scriptless version if likely-equivalent and distinct: - if (data->m_script_id && id != min && id.name() != prior - && id.withLikelySubtagsAdded() == max) { - uiLanguages.insert(j, QString::fromLatin1(id.name())); + if (id.script_id) { + // Include scriptless version if likely-equivalent and distinct: + id.script_id = 0; + if (id != min && id.withLikelySubtagsAdded() == max) { + if (auto name = id.name(sep); name != prior) + uiLanguages.insert(j, QString::fromLatin1(name)); + } + } + + if (!id.territory_id) { + Q_ASSERT(!min.territory_id); + Q_ASSERT(!id.script_id); // because we just cleared it. + // Include version with territory if it likely-equivalent and distinct: + id.territory_id = max.territory_id; + if (id != max && id.withLikelySubtagsAdded() == max) { + if (auto name = id.name(sep); name != prior) + uiLanguages.insert(j, QString::fromLatin1(name)); + } } // Include version with all likely sub-tags (first) if distinct from the rest: - if (max != min && max != id && max.name() != prior) - uiLanguages.insert(j, QString::fromLatin1(max.name())); + if (max != min && max != id) { + if (auto name = max.name(sep); name != prior) + uiLanguages.insert(j, QString::fromLatin1(name)); + } } return uiLanguages; } @@ -4490,7 +4880,7 @@ QLocale QLocale::collation() const \since 4.8 Returns a native name of the language for the locale. For example - "Schwiizertüütsch" for Swiss-German locale. + "Schweizer Hochdeutsch" for the Swiss-German locale. \sa nativeTerritoryName(), languageToString() */ |