diff options
Diffstat (limited to 'src/corelib/text/qlocale.cpp')
-rw-r--r-- | src/corelib/text/qlocale.cpp | 2366 |
1 files changed, 1450 insertions, 916 deletions
diff --git a/src/corelib/text/qlocale.cpp b/src/corelib/text/qlocale.cpp index d5cde8a69d..ab95b300eb 100644 --- a/src/corelib/text/qlocale.cpp +++ b/src/corelib/text/qlocale.cpp @@ -1,45 +1,13 @@ -/**************************************************************************** -** -** Copyright (C) 2021 The Qt Company Ltd. -** Copyright (C) 2019 Intel Corporation. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2022 The Qt Company Ltd. +// Copyright (C) 2021 Intel Corporation. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only #include "qglobal.h" +#if (defined(QT_STATIC) || defined(QT_BOOTSTRAPPED)) && defined(Q_CC_GNU_ONLY) && Q_CC_GNU >= 1000 +QT_WARNING_DISABLE_GCC("-Wfree-nonheap-object") // false positive tracking +#endif + #if defined(Q_OS_MACOS) # include "private/qcore_mac_p.h" # include <CoreFoundation/CoreFoundation.h> @@ -54,6 +22,7 @@ #include "qlocale.h" #include "qlocale_p.h" #include "qlocale_tools_p.h" +#include <private/qtools_p.h> #if QT_CONFIG(datetimeparser) #include "private/qdatetimeparser_p.h" #endif @@ -61,8 +30,13 @@ #include "qdatetime.h" #include "qstringlist.h" #include "qvariant.h" +#include "qvarlengtharray.h" #include "qstringbuilder.h" +#if QT_CONFIG(timezone) +# include "qtimezone.h" +#endif #include "private/qnumeric_p.h" +#include "private/qtools_p.h" #include <cmath> #ifndef QT_NO_SYSTEMLOCALE # include "qmutex.h" @@ -76,20 +50,40 @@ #include "private/qgregoriancalendar_p.h" #include "qcalendar.h" +#include <q20iterator.h> + QT_BEGIN_NAMESPACE +constexpr int QLocale::DefaultTwoDigitBaseYear; + +QT_IMPL_METATYPE_EXTERN_TAGGED(QList<Qt::DayOfWeek>, QList_Qt__DayOfWeek) #ifndef QT_NO_SYSTEMLOCALE -static QSystemLocale *_systemLocale = nullptr; -class QSystemLocaleSingleton: public QSystemLocale -{ -public: - QSystemLocaleSingleton() : QSystemLocale(true) {} -}; +QT_IMPL_METATYPE_EXTERN_TAGGED(QSystemLocale::CurrencyToStringArgument, + QSystemLocale__CurrencyToStringArgument) +#endif -Q_GLOBAL_STATIC(QSystemLocaleSingleton, QSystemLocale_globalSystemLocale) -static QLocaleData systemLocaleData; +using namespace Qt::StringLiterals; +using namespace QtMiscUtils; + +#ifndef QT_NO_SYSTEMLOCALE +Q_CONSTINIT static QSystemLocale *_systemLocale = nullptr; +Q_CONSTINIT static QLocaleData systemLocaleData = {}; #endif +static_assert(ascii_isspace(' ')); +static_assert(ascii_isspace('\t')); +static_assert(ascii_isspace('\n')); +static_assert(ascii_isspace('\v')); +static_assert(ascii_isspace('\f')); +static_assert(ascii_isspace('\r')); +static_assert(!ascii_isspace('\0')); +static_assert(!ascii_isspace('\a')); +static_assert(!ascii_isspace('a')); +static_assert(!ascii_isspace('\177')); +static_assert(!ascii_isspace(uchar('\200'))); +static_assert(!ascii_isspace(uchar('\xA0'))); // NBSP (is a space but Latin 1, not ASCII) +static_assert(!ascii_isspace(uchar('\377'))); + /****************************************************************************** ** Helpers for accessing Qt locale database */ @@ -98,22 +92,58 @@ QT_BEGIN_INCLUDE_NAMESPACE #include "qlocale_data_p.h" QT_END_INCLUDE_NAMESPACE -QLocale::Language QLocalePrivate::codeToLanguage(QStringView code) noexcept +QLocale::Language QLocalePrivate::codeToLanguage(QStringView code, + QLocale::LanguageCodeTypes codeTypes) noexcept { const auto len = code.size(); if (len != 2 && len != 3) return QLocale::AnyLanguage; - char16_t uc1 = code[0].toLower().unicode(); - char16_t uc2 = code[1].toLower().unicode(); - char16_t uc3 = len > 2 ? code[2].toLower().unicode() : 0; - const unsigned char *c = language_code_list; - for (; *c != 0; c += 3) { - if (uc1 == c[0] && uc2 == c[1] && uc3 == c[2]) - return QLocale::Language((c - language_code_list)/3); + const char16_t uc1 = code[0].toLower().unicode(); + const char16_t uc2 = code[1].toLower().unicode(); + const char16_t uc3 = len > 2 ? code[2].toLower().unicode() : 0; + + // All language codes are ASCII. + if (uc1 > 0x7F || uc2 > 0x7F || uc3 > 0x7F) + return QLocale::AnyLanguage; + + const AlphaCode codeBuf = { char(uc1), char(uc2), char(uc3) }; + + auto searchCode = [codeBuf](auto f) { + return std::find_if(languageCodeList.begin(), languageCodeList.end(), + [=](LanguageCodeEntry i) { return f(i) == codeBuf; }); + }; + + if (codeTypes.testFlag(QLocale::ISO639Part1) && uc3 == 0) { + auto i = searchCode([](LanguageCodeEntry i) { return i.part1; }); + if (i != languageCodeList.end()) + return QLocale::Language(std::distance(languageCodeList.begin(), i)); + } + + if (uc3 != 0) { + if (codeTypes.testFlag(QLocale::ISO639Part2B)) { + auto i = searchCode([](LanguageCodeEntry i) { return i.part2B; }); + if (i != languageCodeList.end()) + return QLocale::Language(std::distance(languageCodeList.begin(), i)); + } + + // Optimization: Part 2T code if present is always the same as Part 3 code. + // This is asserted in iso639_3.LanguageCodeData. + if (codeTypes.testFlag(QLocale::ISO639Part2T) + && !codeTypes.testFlag(QLocale::ISO639Part3)) { + auto i = searchCode([](LanguageCodeEntry i) { return i.part2T; }); + if (i != languageCodeList.end()) + return QLocale::Language(std::distance(languageCodeList.begin(), i)); + } + + if (codeTypes.testFlag(QLocale::ISO639Part3)) { + auto i = searchCode([](LanguageCodeEntry i) { return i.part3; }); + if (i != languageCodeList.end()) + return QLocale::Language(std::distance(languageCodeList.begin(), i)); + } } - if (uc3 == 0) { + if (codeTypes.testFlag(QLocale::LegacyLanguageCode) && uc3 == 0) { // legacy codes if (uc1 == 'n' && uc2 == 'o') // no -> nb return QLocale::NorwegianBokmal; @@ -147,7 +177,7 @@ QLocale::Script QLocalePrivate::codeToScript(QStringView code) noexcept unsigned char c3 = code[3].toLower().toLatin1(); const unsigned char *c = script_code_list; - for (int i = 0; i < QLocale::LastScript; ++i, c += 4) { + for (qsizetype i = 0; i < QLocale::LastScript; ++i, c += 4) { if (c0 == c[0] && c1 == c[1] && c2 == c[2] && c3 == c[3]) return QLocale::Script(i); } @@ -173,32 +203,46 @@ QLocale::Territory QLocalePrivate::codeToTerritory(QStringView code) noexcept return QLocale::AnyTerritory; } -QLatin1String QLocalePrivate::languageToCode(QLocale::Language language) +std::array<char, 4> QLocalePrivate::languageToCode(QLocale::Language language, + QLocale::LanguageCodeTypes codeTypes) { if (language == QLocale::AnyLanguage || language > QLocale::LastLanguage) - return QLatin1String(); + return {}; if (language == QLocale::C) - return QLatin1String("C"); + return {'C'}; - const unsigned char *c = language_code_list + 3 * language; - return QLatin1String(reinterpret_cast<const char*>(c), c[2] == 0 ? 2 : 3); + const LanguageCodeEntry &i = languageCodeList[language]; + + if (codeTypes.testFlag(QLocale::ISO639Part1) && i.part1.isValid()) + return i.part1.decode(); + + if (codeTypes.testFlag(QLocale::ISO639Part2B) && i.part2B.isValid()) + return i.part2B.decode(); + + if (codeTypes.testFlag(QLocale::ISO639Part2T) && i.part2T.isValid()) + return i.part2T.decode(); + + if (codeTypes.testFlag(QLocale::ISO639Part3)) + return i.part3.decode(); + + return {}; } -QLatin1String QLocalePrivate::scriptToCode(QLocale::Script script) +QLatin1StringView QLocalePrivate::scriptToCode(QLocale::Script script) { if (script == QLocale::AnyScript || script > QLocale::LastScript) - return QLatin1String(); + return {}; const unsigned char *c = script_code_list + 4 * script; - return QLatin1String(reinterpret_cast<const char *>(c), 4); + return {reinterpret_cast<const char *>(c), 4}; } -QLatin1String QLocalePrivate::territoryToCode(QLocale::Territory territory) +QLatin1StringView QLocalePrivate::territoryToCode(QLocale::Territory territory) { if (territory == QLocale::AnyTerritory || territory > QLocale::LastTerritory) - return QLatin1String(); + return {}; const unsigned char *c = territory_code_list + 3 * territory; - return QLatin1String(reinterpret_cast<const char*>(c), c[2] == 0 ? 2 : 3); + return {reinterpret_cast<const char*>(c), c[2] == 0 ? 2 : 3}; } namespace { @@ -208,7 +252,7 @@ struct LikelyPair QLocaleId value = QLocaleId { 0, 0, 0 }; }; -bool operator<(const LikelyPair &lhs, const LikelyPair &rhs) +bool operator<(LikelyPair lhs, LikelyPair rhs) { // Must match the comparison LocaleDataWriter.likelySubtags() uses when // sorting, see qtbase/util/locale_database.qlocalexml2cpp.py @@ -366,20 +410,34 @@ QByteArray QLocaleId::name(char separator) const if (language_id == QLocale::C) return QByteArrayLiteral("C"); - const unsigned char *lang = language_code_list + 3 * language_id; + const LanguageCodeEntry &language = languageCodeList[language_id]; + AlphaCode lang; + qsizetype langLen; + + if (language.part1.isValid()) { + lang = language.part1; + langLen = 2; + } else { + lang = language.part2B.isValid() ? language.part2B : language.part3; + langLen = 3; + } + const unsigned char *script = (script_id != QLocale::AnyScript ? script_code_list + 4 * script_id : nullptr); const unsigned char *country = (territory_id != QLocale::AnyTerritory ? territory_code_list + 3 * territory_id : nullptr); - char len = (lang[2] != 0 ? 3 : 2) + (script ? 4 + 1 : 0) - + (country ? (country[2] != 0 ? 3 : 2) + 1 : 0); + qsizetype len = langLen + (script ? 4 + 1 : 0) + (country ? (country[2] != 0 ? 3 : 2) + 1 : 0); QByteArray name(len, Qt::Uninitialized); char *uc = name.data(); - *uc++ = lang[0]; - *uc++ = lang[1]; - if (lang[2] != 0) - *uc++ = lang[2]; + + auto langArray = lang.decode(); + + *uc++ = langArray[0]; + *uc++ = langArray[1]; + if (langLen > 2) + *uc++ = langArray[2]; + if (script) { *uc++ = separator; *uc++ = script[0]; @@ -407,9 +465,9 @@ QByteArray QLocalePrivate::bcp47Name(char separator) const return m_data->id().withLikelySubtagsRemoved().name(separator); } -static int findLocaleIndexById(const QLocaleId &localeId) +static qsizetype findLocaleIndexById(QLocaleId localeId) { - quint16 idx = locale_index[localeId.language_id]; + qsizetype idx = locale_index[localeId.language_id]; // If there are no locales for specified language (so we we've got the // default language, which has no associated script or country), give up: if (localeId.language_id && idx == 0) @@ -426,17 +484,17 @@ static int findLocaleIndexById(const QLocaleId &localeId) return -1; } -int QLocaleData::findLocaleIndex(QLocaleId lid) +qsizetype QLocaleData::findLocaleIndex(QLocaleId lid) { QLocaleId localeId = lid; QLocaleId likelyId = localeId.withLikelySubtagsAdded(); const ushort fallback = likelyId.language_id; // Try a straight match with the likely data: - int index = findLocaleIndexById(likelyId); + qsizetype index = findLocaleIndexById(likelyId); if (index >= 0) return index; - QList<QLocaleId> tried; + QVarLengthArray<QLocaleId, 6> tried; tried.push_back(likelyId); #define CheckCandidate(id) do { \ @@ -476,13 +534,13 @@ int QLocaleData::findLocaleIndex(QLocaleId lid) return locale_index[fallback]; } -static QStringView findTag(QStringView name) +static QStringView findTag(QStringView name) noexcept { - const QString separators = QStringLiteral("_-.@"); - int i = 0; - while (i < name.size() && !separators.contains(name[i])) - i++; - return name.first(i); + const std::u16string_view v(name.utf16(), size_t(name.size())); + const auto i = v.find_first_of(u"_-.@"); + if (i == std::string_view::npos) + return name; + return name.first(qsizetype(i)); } static bool validTag(QStringView tag) @@ -490,7 +548,7 @@ static bool validTag(QStringView tag) // Is tag is a non-empty sequence of ASCII letters and/or digits ? for (QChar uc : tag) { const char16_t ch = uc.unicode(); - if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9'))) + if (!isAsciiLetterOrNumber(ch)) return false; } return tag.size() > 0; @@ -503,7 +561,7 @@ static bool isScript(QStringView tag) static const QString allScripts = QString::fromLatin1(reinterpret_cast<const char *>(script_code_list), sizeof(script_code_list) - 1); - return tag.length() == 4 && allScripts.indexOf(tag) % 4 == 0; + return tag.size() == 4 && allScripts.indexOf(tag) % 4 == 0; } bool qt_splitLocaleName(QStringView name, QStringView *lang, QStringView *script, QStringView *land) @@ -550,7 +608,7 @@ bool qt_splitLocaleName(QStringView name, QStringView *lang, QStringView *script return state != LangState; } -QLocaleId QLocaleId::fromName(const QString &name) +QLocaleId QLocaleId::fromName(QStringView name) { QStringView lang; QStringView script; @@ -564,26 +622,26 @@ QLocaleId QLocaleId::fromName(const QString &name) return { langId, QLocalePrivate::codeToScript(script), QLocalePrivate::codeToTerritory(land) }; } -QString qt_readEscapedFormatString(QStringView format, int *idx) +QString qt_readEscapedFormatString(QStringView format, qsizetype *idx) { - int &i = *idx; + qsizetype &i = *idx; - Q_ASSERT(format.at(i) == QLatin1Char('\'')); + Q_ASSERT(format.at(i) == u'\''); ++i; if (i == format.size()) return QString(); - if (format.at(i).unicode() == '\'') { // "''" outside of a quoted stirng + if (format.at(i).unicode() == '\'') { // "''" outside of a quoted string ++i; - return QLatin1String("'"); + return "'"_L1; } QString result; while (i < format.size()) { if (format.at(i).unicode() == '\'') { - if (format.mid(i + 1).startsWith(QLatin1Char('\''))) { + if (format.mid(i + 1).startsWith(u'\'')) { // "''" inside a quoted string - result.append(QLatin1Char('\'')); + result.append(u'\''); i += 2; } else { break; @@ -614,7 +672,7 @@ QString qt_readEscapedFormatString(QStringView format, int *idx) qt_repeatCount(u"aab"); // == 2 \endcode */ -int qt_repeatCount(QStringView s) +qsizetype qt_repeatCount(QStringView s) { if (s.isEmpty()) return 0; @@ -622,15 +680,15 @@ int qt_repeatCount(QStringView s) qsizetype j = 1; while (j < s.size() && s.at(j) == c) ++j; - return int(j); + return j; } -static const QLocaleData *default_data = nullptr; +Q_CONSTINIT static const QLocaleData *default_data = nullptr; +Q_CONSTINIT QBasicAtomicInt QLocalePrivate::s_generation = Q_BASIC_ATOMIC_INITIALIZER(0); -static const QLocaleData *const c_data = locale_data; static QLocalePrivate *c_private() { - static QLocalePrivate c_locale(c_data, 0, QLocale::OmitGroupSeparator, 1); + static QLocalePrivate c_locale(locale_data, 0, QLocale::OmitGroupSeparator, 1); return &c_locale; } @@ -640,37 +698,48 @@ static QLocalePrivate *c_private() */ /*! - Constructs a QSystemLocale object. - - The constructor will automatically install this object as the system locale, - if there's not one active. It also resets the flag that'll prompt - QLocale::system() to re-initialize its data, so that instantiating a - QSystemLocale transiently (doesn't install the transient as system locale if - there was one already and) triggers an update to the system locale's data. + \internal + Constructs a QSystemLocale object. + + The constructor will automatically install this object as the system locale. + It and the destructor maintain a stack of system locales, with the + most-recently-created instance (that hasn't yet been deleted) used as the + system locale. This is only intended as a way to let a platform plugin + install its own system locale, overriding what might otherwise be provided + for its class of platform (as Android does, differing from Linux), and to + let tests transiently override the system or plugin-supplied one. As such, + there should not be diverse threads creating and destroying QSystemLocale + instances concurrently, so no attempt is made at thread-safety in managing + the stack. + + This constructor also resets the flag that'll prompt QLocale::system() to + re-initialize its data, so that instantiating a QSystemLocale (even + transiently) triggers a refresh of the system locale's data. This is + exploited by some test code. */ -QSystemLocale::QSystemLocale() +QSystemLocale::QSystemLocale() : next(_systemLocale) { - if (!_systemLocale) - _systemLocale = this; + _systemLocale = this; systemLocaleData.m_language_id = 0; } /*! \internal -*/ -QSystemLocale::QSystemLocale(bool) -{ } - -/*! - Deletes the object. + Deletes the object. */ QSystemLocale::~QSystemLocale() { if (_systemLocale == this) { - _systemLocale = nullptr; + _systemLocale = next; + // Change to system locale => force refresh. systemLocaleData.m_language_id = 0; + } else { + for (QSystemLocale *p = _systemLocale; p; p = p->next) { + if (p->next == this) + p->next = next; + } } } @@ -678,7 +747,12 @@ static const QSystemLocale *systemLocale() { if (_systemLocale) return _systemLocale; - return QSystemLocale_globalSystemLocale(); + + // As this is only ever instantiated with _systemLocale null, it is + // necessarily the ->next-most in any chain that may subsequently develop; + // and it won't be destructed until exit()-time. + static QSystemLocale globalInstance; + return &globalInstance; } static void updateSystemPrivate() @@ -709,28 +783,49 @@ static void updateSystemPrivate() systemLocaleData.m_script_id = res.toInt(); // Should we replace Any values based on likely sub-tags ? + + // If system locale is default locale, update the default collator's generation: + if (default_data == &systemLocaleData) + QLocalePrivate::s_generation.fetchAndAddRelaxed(1); } #endif // !QT_NO_SYSTEMLOCALE -static const QLocaleData *systemData() +static const QLocaleData *systemData(qsizetype *sysIndex = nullptr) { #ifndef QT_NO_SYSTEMLOCALE /* Copy over the information from the fallback locale and modify. - This modifies (cross-thread) global state, so take care to only call it in - one thread. + If sysIndex is passed, it should be the m_index of the system locale's + QLocalePrivate, which we'll update if it needs it. + + This modifies (cross-thread) global state, so is mutex-protected. */ { - static QBasicMutex systemDataMutex; + Q_CONSTINIT static QLocaleId sysId; + bool updated = false; + + Q_CONSTINIT static QBasicMutex systemDataMutex; systemDataMutex.lock(); - if (systemLocaleData.m_language_id == 0) + if (systemLocaleData.m_language_id == 0) { updateSystemPrivate(); + updated = true; + } + // Initialization of system private has *sysIndex == -1 to hit this. + if (sysIndex && (updated || *sysIndex < 0)) { + const QLocaleId nowId = systemLocaleData.id(); + if (sysId != nowId || *sysIndex < 0) { + // This look-up may be expensive: + *sysIndex = QLocaleData::findLocaleIndex(nowId); + sysId = nowId; + } + } systemDataMutex.unlock(); } return &systemLocaleData; #else + Q_UNUSED(sysIndex); return locale_data; #endif } @@ -742,7 +837,7 @@ static const QLocaleData *defaultData() return default_data; } -static uint defaultIndex() +static qsizetype defaultIndex() { const QLocaleData *const data = defaultData(); #ifndef QT_NO_SYSTEMLOCALE @@ -753,15 +848,15 @@ static uint defaultIndex() } #endif - Q_ASSERT(data >= locale_data); - Q_ASSERT(data < locale_data + std::size(locale_data)); + using QtPrivate::q_points_into_range; + Q_ASSERT(q_points_into_range(data, locale_data)); return data - locale_data; } const QLocaleData *QLocaleData::c() { Q_ASSERT(locale_index[QLocale::C] == 0); - return c_data; + return locale_data; } #ifndef QT_NO_DATASTREAM @@ -780,18 +875,17 @@ QDataStream &operator>>(QDataStream &ds, QLocale &l) } #endif // QT_NO_DATASTREAM +static constexpr qsizetype locale_data_size = q20::ssize(locale_data) - 1; // trailing guard -static const int locale_data_size = sizeof(locale_data)/sizeof(QLocaleData) - 1; +Q_GLOBAL_STATIC(QSharedDataPointer<QLocalePrivate>, defaultLocalePrivate, + new QLocalePrivate(defaultData(), defaultIndex())) -Q_GLOBAL_STATIC_WITH_ARGS(QSharedDataPointer<QLocalePrivate>, defaultLocalePrivate, - (new QLocalePrivate(defaultData(), defaultIndex()))) - -static QLocalePrivate *localePrivateByName(const QString &name) +static QLocalePrivate *localePrivateByName(QStringView name) { - if (name == QLatin1String("C")) + if (name == u"C") return c_private(); - const int index = QLocaleData::findLocaleIndex(QLocaleId::fromName(name)); - Q_ASSERT(index >= 0 && size_t(index) < std::size(locale_data) - 1); + const qsizetype index = QLocaleData::findLocaleIndex(QLocaleId::fromName(name)); + Q_ASSERT(index >= 0 && index < locale_data_size); return new QLocalePrivate(locale_data + index, index, locale_data[index].m_language_id == QLocale::C ? QLocale::OmitGroupSeparator : QLocale::DefaultNumberOptions); @@ -803,8 +897,8 @@ static QLocalePrivate *findLocalePrivate(QLocale::Language language, QLocale::Sc if (language == QLocale::C) return c_private(); - int index = QLocaleData::findLocaleIndex(QLocaleId { language, script, territory }); - Q_ASSERT(index >= 0 && size_t(index) < std::size(locale_data) - 1); + qsizetype index = QLocaleData::findLocaleIndex(QLocaleId { language, script, territory }); + Q_ASSERT(index >= 0 && index < locale_data_size); const QLocaleData *data = locale_data + index; QLocale::NumberOptions numberOptions = QLocale::DefaultNumberOptions; @@ -819,29 +913,41 @@ static QLocalePrivate *findLocalePrivate(QLocale::Language language, QLocale::Sc return new QLocalePrivate(data, index, numberOptions); } -QString QLocaleData::decimalPoint() const +static std::optional<QString> +systemLocaleString(const QLocaleData *that, QSystemLocale::QueryType type) { #ifndef QT_NO_SYSTEMLOCALE - if (this == &systemLocaleData) { - auto res = systemLocale()->query(QSystemLocale::DecimalPoint).toString(); - if (!res.isEmpty()) - return res; - } + if (that != &systemLocaleData) + return std::nullopt; + + QVariant v = systemLocale()->query(type); + if (v.metaType() != QMetaType::fromType<QString>()) + return std::nullopt; + + return v.toString(); +#else + Q_UNUSED(that) + Q_UNUSED(type) + return std::nullopt; #endif - return decimalSeparator().getData(single_character_data); +} + +static QString localeString(const QLocaleData *that, QSystemLocale::QueryType type, + QLocaleData::DataRange range) +{ + if (auto opt = systemLocaleString(that, type)) + return *opt; + return range.getData(single_character_data); +} + +QString QLocaleData::decimalPoint() const +{ + return localeString(this, QSystemLocale::DecimalPoint, decimalSeparator()); } QString QLocaleData::groupSeparator() const { - // Empty => don't do grouping -#ifndef QT_NO_SYSTEMLOCALE - if (this == &systemLocaleData) { - QVariant res = systemLocale()->query(QSystemLocale::GroupSeparator); - if (!res.isNull()) - return res.toString(); - } -#endif - return groupDelim().getData(single_character_data); + return localeString(this, QSystemLocale::GroupSeparator, groupDelim()); } QString QLocaleData::percentSign() const @@ -856,14 +962,7 @@ QString QLocaleData::listSeparator() const QString QLocaleData::zeroDigit() const { -#ifndef QT_NO_SYSTEMLOCALE - if (this == &systemLocaleData) { - auto res = systemLocale()->query(QSystemLocale::ZeroDigit).toString(); - if (!res.isEmpty()) - return res; - } -#endif - return zero().getData(single_character_data); + return localeString(this, QSystemLocale::ZeroDigit, zero()); } char32_t QLocaleData::zeroUcs() const @@ -884,26 +983,12 @@ char32_t QLocaleData::zeroUcs() const QString QLocaleData::negativeSign() const { -#ifndef QT_NO_SYSTEMLOCALE - if (this == &systemLocaleData) { - auto res = systemLocale()->query(QSystemLocale::NegativeSign).toString(); - if (!res.isEmpty()) - return res; - } -#endif - return minus().getData(single_character_data); + return localeString(this, QSystemLocale::NegativeSign, minus()); } QString QLocaleData::positiveSign() const { -#ifndef QT_NO_SYSTEMLOCALE - if (this == &systemLocaleData) { - auto res = systemLocale()->query(QSystemLocale::PositiveSign).toString(); - if (!res.isEmpty()) - return res; - } -#endif - return plus().getData(single_character_data); + return localeString(this, QSystemLocale::PositiveSign, plus()); } QString QLocaleData::exponentSeparator() const @@ -918,11 +1003,29 @@ QLocale::QLocale(QLocalePrivate &dd) : d(&dd) {} +/*! + \variable QLocale::DefaultTwoDigitBaseYear + \since 6.7 + + \brief The default start year of the century within which a format taking + a two-digit year will select. The value of the constant is \c {1900}. + + Some locales use, particularly for ShortFormat, only the last two digits of + the year. Proir to 6.7 the year 1900 was always used as a base year for + such cases. Now various QLocale and QDate functions have the overloads that + allow callers to specify the base year, and this constant is used as its + default value. + + \sa toDate(), toDateTime(), QDate::fromString(), QDateTime::fromString() +*/ /*! - Constructs a QLocale object with the specified \a name, - which has the format - "language[_script][_territory][.codeset][@modifier]" or "C", where: + \since 6.3 + + Constructs a QLocale object with the specified \a name. + + The name has the format "language[_script][_territory][.codeset][@modifier]" + or "C", where: \list \li language is a lowercase, two-letter, ISO 639 language code (some @@ -934,30 +1037,34 @@ QLocale::QLocale(QLocalePrivate &dd) \endlist The separator can be either underscore \c{'_'} (U+005F, "low line") or a - dash \c{'-'} (U+002D, "hyphen-minus"). If the string violates the locale + dash \c{'-'} (U+002D, "hyphen-minus"). If QLocale has no data for the + specified combination of language, script, and territory, then it uses the + most suitable match it can find instead. If the string violates the locale format, or no suitable data can be found for the specified keys, the "C" - locale is used instead. If QLocale has no data for the specified combination - of language, script and territory, the it uses most suitable match it can - find instead. + locale is used instead. This constructor is much slower than QLocale(Language, Script, Territory) or QLocale(Language, Territory). \sa bcp47Name(), {Matching combinations of language, script and territory} */ - -QLocale::QLocale(const QString &name) +QLocale::QLocale(QStringView name) : d(localePrivateByName(name)) { } /*! + \fn QLocale::QLocale(const QString &name) + \overload +*/ + +/*! Constructs a QLocale object initialized with the default locale. If no default locale was set using setDefault(), this locale will be the same as the one returned by system(). - \sa setDefault() + \sa setDefault(), system() */ QLocale::QLocale() @@ -980,7 +1087,7 @@ QLocale::QLocale() */ QLocale::QLocale(Language language, Territory territory) - : d(findLocalePrivate(language, QLocale::AnyScript, territory)) + : d(findLocalePrivate(language, AnyScript, territory)) { } @@ -1012,10 +1119,7 @@ QLocale::QLocale(Language language, Script script, Territory territory) Constructs a QLocale object as a copy of \a other. */ -QLocale::QLocale(const QLocale &other) -{ - d = other.d; -} +QLocale::QLocale(const QLocale &other) noexcept = default; /*! Destructor @@ -1030,11 +1134,7 @@ QLocale::~QLocale() to this QLocale object. */ -QLocale &QLocale::operator=(const QLocale &other) -{ - d = other.d; - return *this; -} +QLocale &QLocale::operator=(const QLocale &other) noexcept = default; /*! \internal @@ -1072,7 +1172,7 @@ size_t qHash(const QLocale &key, size_t seed) noexcept Sets the \a options related to number conversions for this QLocale instance. - \sa numberOptions() + \sa numberOptions(), FloatingPointPrecisionOption */ void QLocale::setNumberOptions(NumberOptions options) { @@ -1088,7 +1188,7 @@ void QLocale::setNumberOptions(NumberOptions options) By default, no options are set for the standard locales, except for the "C" locale, which has OmitGroupSeparator set by default. - \sa setNumberOptions(), toString(), groupSeparator() + \sa setNumberOptions(), toString(), groupSeparator(), FloatingPointPrecisionOption */ QLocale::NumberOptions QLocale::numberOptions() const { @@ -1114,10 +1214,10 @@ QString QLocale::quoteString(QStringView str, QuotationStyle style) const #ifndef QT_NO_SYSTEMLOCALE if (d->m_data == &systemLocaleData) { QVariant res; - if (style == QLocale::AlternateQuotation) + if (style == AlternateQuotation) res = systemLocale()->query(QSystemLocale::StringToAlternateQuotation, QVariant::fromValue(str)); - if (res.isNull() || style == QLocale::StandardQuotation) + if (res.isNull() || style == StandardQuotation) res = systemLocale()->query(QSystemLocale::StringToStandardQuotation, QVariant::fromValue(str)); if (!res.isNull()) @@ -1126,7 +1226,7 @@ QString QLocale::quoteString(QStringView str, QuotationStyle style) const #endif QLocaleData::DataRange start, end; - if (style == QLocale::StandardQuotation) { + if (style == StandardQuotation) { start = d->m_data->quoteStart(); end = d->m_data->quoteEnd(); } else { @@ -1156,7 +1256,7 @@ QString QLocale::createSeparatedList(const QStringList &list) const } #endif - const int size = list.size(); + const qsizetype size = list.size(); if (size < 1) return QString(); @@ -1171,7 +1271,7 @@ QString QLocale::createSeparatedList(const QStringList &list) const QStringView formatMid = d->m_data->midListPattern().viewData(list_pattern_part_data); QStringView formatEnd = d->m_data->endListPattern().viewData(list_pattern_part_data); QString result = formatStart.arg(list.at(0), list.at(1)); - for (int i = 2; i < size - 1; ++i) + for (qsizetype i = 2; i < size - 1; ++i) result = formatMid.arg(result, list.at(i)); result = formatEnd.arg(result, list.at(size - 1)); return result; @@ -1206,6 +1306,7 @@ void QLocale::setDefault(const QLocale &locale) // update the cached private *defaultLocalePrivate = locale.d; + QLocalePrivate::s_generation.fetchAndAddRelaxed(1); } /*! @@ -1257,51 +1358,94 @@ QLocale::Country QLocale::country() const #endif /*! - Returns the language and country of this locale as a - string of the form "language_country", where - language is a lowercase, two-letter ISO 639 language code, - and country is an uppercase, two- or three-letter ISO 3166 country code. + \since 6.7 + \enum QLocale::TagSeparator + + Indicate how to combine the parts that make up a locale identifier. + + A locale identifier may be made up of several tags, indicating language, + script and territory (plus, potentially, other details), joined together to + form the identifier. Various standards and conventional forms use either a + dash (the Unicode HYPHEN-MINUS, U+002D) or an underscore (LOW LINE, U+005F). + Different clients of QLocale may thus need one or the other. + + \value Dash Use \c{'-'}, the dash or hyphen character. + \value Underscore Use \c{'_'}, the underscore character. + + \note Although dash and underscore are the only separators used in public + standards (as at 2023), it is possible to cast any \l + {https://en.cppreference.com/w/cpp/language/ascii} {ASCII} character to this + type if a non-standard ASCII separator is needed. Casting a non-ASCII + character (with decimal value above 127) is not supported: such values are + reserved for future use as enum members if some public standard ever uses a + non-ASCII separator. It is, of course, possible to use QString::replace() to + replace the separator used by a function taking a parameter of this type + with an arbitrary Unicode character or string. +*/ + +Q_DECL_COLD_FUNCTION static void badSeparatorWarning(const char *method, char sep) +{ + qWarning("QLocale::%s(): Using non-ASCII separator '%c' (%02x) is unsupported", + method, sep, uint(uchar(sep))); +} + +/*! + \brief The short name of this locale. - Note that even if QLocale object was constructed with an explicit script, - name() will not contain it for compatibility reasons. Use \l bcp47Name() instead - if you need a full locale name. + Returns the language and territory of this locale as a string of the form + "language_territory", where language is a lowercase, two-letter ISO 639 + language code, and territory is an uppercase, two- or three-letter ISO 3166 + territory code. If the locale has no specified territory, only the language + name is returned. Since Qt 6.7 an optional \a separator parameter can be + supplied to override the default underscore character separating the two + tags. - \sa QLocale(), language(), script(), territory(), bcp47Name() + Even if the QLocale object was constructed with an explicit script, name() + will not contain it for compatibility reasons. Use \l bcp47Name() instead if + you need a full locale name, or construct the string you want to identify a + locale by from those returned by passing its \l language() to \l + languageToCode() and similar for the script and territory. + + \sa QLocale(), language(), script(), territory(), bcp47Name(), uiLanguages() */ -QString QLocale::name() const +QString QLocale::name(TagSeparator separator) const { + const char sep = char(separator); + if (uchar(sep) > 0x7f) { + badSeparatorWarning("name", sep); + return {}; + } + const auto code = d->languageCode(); + QLatin1StringView view{code.data()}; + Language l = language(); if (l == C) - return d->languageCode(); + return view; Territory c = territory(); if (c == AnyTerritory) - return d->languageCode(); - - return d->languageCode() + QLatin1Char('_') + d->territoryCode(); -} - -static qlonglong toIntegral_helper(const QLocaleData *d, QStringView str, bool *ok, - QLocale::NumberOptions mode, qlonglong) -{ - return d->stringToLongLong(str, 10, ok, mode); -} + return view; -static qulonglong toIntegral_helper(const QLocaleData *d, QStringView str, bool *ok, - QLocale::NumberOptions mode, qulonglong) -{ - return d->stringToUnsLongLong(str, 10, ok, mode); + return view + QLatin1Char(sep) + d->territoryCode(); } template <typename T> static inline T toIntegral_helper(const QLocalePrivate *d, QStringView str, bool *ok) { - using Int64 = - typename std::conditional<std::is_unsigned<T>::value, qulonglong, qlonglong>::type; + constexpr bool isUnsigned = std::is_unsigned_v<T>; + using Int64 = typename std::conditional_t<isUnsigned, quint64, qint64>; + + QSimpleParsedNumber<Int64> r{}; + if constexpr (isUnsigned) + r = d->m_data->stringToUnsLongLong(str, 10, d->m_numberOptions); + else + r = d->m_data->stringToLongLong(str, 10, d->m_numberOptions); + + if (ok) + *ok = r.ok(); - // we select the right overload by the last, unused parameter - Int64 val = toIntegral_helper(d->m_data, str, ok, d->m_numberOptions, Int64()); + Int64 val = r.result; if (T(val) != val) { if (ok != nullptr) *ok = false; @@ -1314,49 +1458,79 @@ T toIntegral_helper(const QLocalePrivate *d, QStringView str, bool *ok) /*! \since 4.8 - Returns the dash-separated language, script and country (and possibly other - BCP47 fields) of this locale as a string. + \brief Returns the BCP47 field names joined with dashes. - Unlike the uiLanguages() the returned value of the bcp47Name() represents - the locale name of the QLocale data but not the language the user-interface - should be in. + This combines as many of language, script and territory (and possibly other + BCP47 fields) for this locale as are needed to uniquely specify it. Note + that fields may be omitted if the Unicode consortium's \l {Matching + combinations of language, script and territory}{Likely Subtag Rules} imply + the omitted fields when given those retained. See \l name() for how to + construct a string from individual fields, if some other format is needed. - This function tries to conform the locale name to BCP47. + Unlike uiLanguages(), the value returned by bcp47Name() represents the + locale name of the QLocale data; this need not be the language the + user-interface should be in. - \sa language(), territory(), script(), uiLanguages() + This function tries to conform the locale name to the IETF Best Common + Practice 47, defined by RFC 5646. Since Qt 6.7, it supports an optional \a + separator parameter which can be used to override the BCP47-specified use of + a hyphen to separate the tags. For use in IETF-defined protocols, however, + the default, QLocale::TagSeparator::Dash, should be retained. + + \sa name(), language(), territory(), script(), uiLanguages() */ -QString QLocale::bcp47Name() const +QString QLocale::bcp47Name(TagSeparator separator) const { - return QString::fromLatin1(d->bcp47Name()); + const char sep = char(separator); + if (uchar(sep) > 0x7f) { + badSeparatorWarning("bcp47Name", sep); + return {}; + } + return QString::fromLatin1(d->bcp47Name(sep)); } /*! Returns the two- or three-letter language code for \a language, as defined in the ISO 639 standards. + If specified, \a codeTypes selects which set of codes to consider. The first + code from the set that is defined for \a language is returned. Otherwise, + all ISO-639 codes are considered. The codes are considered in the following + order: \c ISO639Part1, \c ISO639Part2B, \c ISO639Part2T, \c ISO639Part3. + \c LegacyLanguageCode is ignored by this function. + \note For \c{QLocale::C} the function returns \c{"C"}. For \c QLocale::AnyLanguage an empty string is returned. + If the language has no code in any selected code set, an empty string + is returned. - \since 6.1 + \since 6.3 \sa codeToLanguage(), language(), name(), bcp47Name(), territoryToCode(), scriptToCode() */ -QString QLocale::languageToCode(Language language) +QString QLocale::languageToCode(Language language, LanguageCodeTypes codeTypes) { - return QLocalePrivate::languageToCode(language); + const auto code = QLocalePrivate::languageToCode(language, codeTypes); + return QLatin1StringView{code.data()}; } /*! Returns the QLocale::Language enum corresponding to the two- or three-letter \a languageCode, as defined in the ISO 639 standards. - If the code is invalid or not known QLocale::AnyLanguage is returned. + If specified, \a codeTypes selects which set of codes to consider for + conversion. By default all codes known to Qt are considered. The codes are + matched in the following order: \c ISO639Part1, \c ISO639Part2B, + \c ISO639Part2T, \c ISO639Part3, \c LegacyLanguageCode. - \since 6.1 + If the code is invalid or not known \c QLocale::AnyLanguage is returned. + + \since 6.3 \sa languageToCode(), codeToTerritory(), codeToScript() */ -QLocale::Language QLocale::codeToLanguage(QStringView languageCode) noexcept +QLocale::Language QLocale::codeToLanguage(QStringView languageCode, + LanguageCodeTypes codeTypes) noexcept { - return QLocalePrivate::codeToLanguage(languageCode); + return QLocalePrivate::codeToLanguage(languageCode, codeTypes); } /*! @@ -1457,9 +1631,9 @@ QLocale::Script QLocale::codeToScript(QStringView scriptCode) noexcept QString QLocale::languageToString(Language language) { - if (language > QLocale::LastLanguage) - return QLatin1String("Unknown"); - return QLatin1String(language_name_list + language_name_index[language]); + if (language > LastLanguage) + return "Unknown"_L1; + return QString::fromUtf8(language_name_list + language_name_index[language]); } /*! @@ -1469,11 +1643,11 @@ QString QLocale::languageToString(Language language) \sa languageToString(), scriptToString(), territory(), bcp47Name() */ -QString QLocale::territoryToString(QLocale::Territory territory) +QString QLocale::territoryToString(Territory territory) { - if (territory > QLocale::LastTerritory) - return QLatin1String("Unknown"); - return QLatin1String(territory_name_list + territory_name_index[territory]); + if (territory > LastTerritory) + return "Unknown"_L1; + return QString::fromUtf8(territory_name_list + territory_name_index[territory]); } #if QT_DEPRECATED_SINCE(6, 6) @@ -1497,14 +1671,13 @@ QString QLocale::countryToString(Country country) \sa languageToString(), territoryToString(), script(), bcp47Name() */ -QString QLocale::scriptToString(QLocale::Script script) +QString QLocale::scriptToString(Script script) { - if (script > QLocale::LastScript) - return QLatin1String("Unknown"); - return QLatin1String(script_name_list + script_name_index[script]); + if (script > LastScript) + return "Unknown"_L1; + return QString::fromUtf8(script_name_list + script_name_index[script]); } -#if QT_STRINGVIEW_LEVEL < 2 /*! \fn short QLocale::toShort(const QString &s, bool *ok) const @@ -1637,9 +1810,6 @@ QString QLocale::scriptToString(QLocale::Script script) If \a ok is not \nullptr, failure is reported by setting *\a{ok} to \c false, and success by setting *\a{ok} to \c true. - This function does not fall back to the 'C' locale if the string - cannot be interpreted in this locale. - This function ignores leading and trailing whitespace. \sa toDouble(), toInt(), toString() @@ -1655,9 +1825,6 @@ QString QLocale::scriptToString(QLocale::Script script) If \a ok is not \nullptr, failure is reported by setting *\a{ok} to \c false, and success by setting *\a{ok} to \c true. - This function does not fall back to the 'C' locale if the string - cannot be interpreted in this locale. - \snippet code/src_corelib_text_qlocale.cpp 3 Notice that the last conversion returns 1234.0, because '.' is the @@ -1667,7 +1834,6 @@ QString QLocale::scriptToString(QLocale::Script script) \sa toFloat(), toInt(), toString() */ -#endif // QT_STRINGVIEW_LEVEL < 2 /*! Returns the short int represented by the localized string \a s. @@ -1860,10 +2026,6 @@ float QLocale::toFloat(QStringView s, bool *ok) const If \a ok is not \nullptr, failure is reported by setting *\a{ok} to \c false, and success by setting *\a{ok} to \c true. - Unlike QString::toDouble(), this function does not fall back to - the "C" locale if the string cannot be interpreted in this - locale. - \snippet code/src_corelib_text_qlocale.cpp 3-qstringview Notice that the last conversion returns 1234.0, because '.' is the @@ -1909,7 +2071,6 @@ QString QLocale::toString(qulonglong i) const return d->m_data->unsLongLongToString(i, -1, 10, -1, flags); } -#if QT_STRINGVIEW_LEVEL < 2 /*! Returns a localized string representation of the given \a date in the specified \a format. @@ -1946,7 +2107,6 @@ QString QLocale::toString(QTime time, const QString &format) const \sa QDateTime::toString(), QDate::toString(), QTime::toString() */ -#endif /*! \since 5.14 @@ -2027,7 +2187,7 @@ QString QLocale::toString(QDate date, FormatType format) const static bool timeFormatContainsAP(QStringView format) { - int i = 0; + qsizetype i = 0; while (i < format.size()) { if (format.at(i).unicode() == '\'') { qt_readEscapedFormatString(format, &i); @@ -2246,16 +2406,20 @@ QString QLocale::dateTimeFormat(FormatType format) const } } #endif - return dateFormat(format) + QLatin1Char(' ') + timeFormat(format); + return dateFormat(format) + u' ' + timeFormat(format); } #if QT_CONFIG(datestring) /*! \since 4.4 - Parses the time string given in \a string and returns the - time. The format of the time string is chosen according to the - \a format parameter (see timeFormat()). + Reads \a string as a time in a locale-specific \a format. + + Parses \a string and returns the time it represents. The format of the time + string is chosen according to the \a format parameter (see timeFormat()). + + \note Any am/pm indicators used must match \l amText() or \l pmText(), + ignoring case. If the time could not be parsed, returns an invalid time. @@ -2269,59 +2433,86 @@ QTime QLocale::toTime(const QString &string, FormatType format) const /*! \since 4.4 - Parses the date string given in \a string and returns the - date. The format of the date string is chosen according to the - \a format parameter (see dateFormat()). + Reads \a string as a date in a locale-specific \a format. + + Parses \a string and returns the date it represents. The format of the date + string is chosen according to the \a format parameter (see dateFormat()). + +//! [base-year-for-short] + Some locales use, particularly for ShortFormat, only the last two digits of + the year. In such a case, the 100 years starting at \a baseYear are the + candidates first considered. Prior to 6.7 there was no \a baseYear parameter + and 1900 was always used. This is the default for \a baseYear, selecting a + year from then to 1999. In some cases, other fields may lead to the next or + previous century being selected, to get a result consistent with all fields + given. See \l QDate::fromString() for details. +//! [base-year-for-short] + + \note Month and day names, where used, must be given in the locale's + language. If the date could not be parsed, returns an invalid date. \sa dateFormat(), toTime(), toDateTime(), QDate::fromString() */ -QDate QLocale::toDate(const QString &string, FormatType format) const +QDate QLocale::toDate(const QString &string, FormatType format, int baseYear) const { - return toDate(string, dateFormat(format)); + return toDate(string, dateFormat(format), baseYear); } /*! \since 5.14 \overload */ -QDate QLocale::toDate(const QString &string, FormatType format, QCalendar cal) const +QDate QLocale::toDate(const QString &string, FormatType format, QCalendar cal, int baseYear) const { - return toDate(string, dateFormat(format), cal); + return toDate(string, dateFormat(format), cal, baseYear); } /*! \since 4.4 - Parses the date/time string given in \a string and returns the - time. The format of the date/time string is chosen according to the - \a format parameter (see dateTimeFormat()). + Reads \a string as a date-time in a locale-specific \a format. + + Parses \a string and returns the date-time it represents. The format of the + date string is chosen according to the \a format parameter (see + dateFormat()). + + \include qlocale.cpp base-year-for-short + + \note Month and day names, where used, must be given in the locale's + language. Any am/pm indicators used must match \l amText() or \l pmText(), + ignoring case. If the string could not be parsed, returns an invalid QDateTime. \sa dateTimeFormat(), toTime(), toDate(), QDateTime::fromString() */ -QDateTime QLocale::toDateTime(const QString &string, FormatType format) const +QDateTime QLocale::toDateTime(const QString &string, FormatType format, int baseYear) const { - return toDateTime(string, dateTimeFormat(format)); + return toDateTime(string, dateTimeFormat(format), baseYear); } /*! \since 5.14 \overload */ -QDateTime QLocale::toDateTime(const QString &string, FormatType format, QCalendar cal) const +QDateTime QLocale::toDateTime(const QString &string, FormatType format, QCalendar cal, + int baseYear) const { - return toDateTime(string, dateTimeFormat(format), cal); + return toDateTime(string, dateTimeFormat(format), cal, baseYear); } /*! \since 4.4 - Parses the time string given in \a string and returns the - time. See QTime::fromString() for information on what is a valid - format string. + Reads \a string as a time in the given \a format. + + Parses \a string and returns the time it represents. See QTime::fromString() + for the interpretation of \a format. + + \note Any am/pm indicators used must match \l amText() or \l pmText(), + ignoring case. If the time could not be parsed, returns an invalid time. @@ -2345,37 +2536,49 @@ QTime QLocale::toTime(const QString &string, const QString &format) const /*! \since 4.4 - Parses the date string given in \a string and returns the - date. See QDate::fromString() for information on the expressions - that can be used with this function. + Reads \a string as a date in the given \a format. + + Parses \a string and returns the date it represents. See QDate::fromString() + for the interpretation of \a format. - This function searches month names and the names of the days of - the week in the current locale. +//! [base-year-for-two-digit] + When \a format only specifies the last two digits of a year, the 100 years + starting at \a baseYear are the candidates first considered. Prior to 6.7 + there was no \a baseYear parameter and 1900 was always used. This is the + default for \a baseYear, selecting a year from then to 1999. In some cases, + other fields may lead to the next or previous century being selected, to get + a result consistent with all fields given. See \l QDate::fromString() for + details. +//! [base-year-for-two-digit] + + \note Month and day names, where used, must be given in the locale's + language. If the date could not be parsed, returns an invalid date. \sa dateFormat(), toTime(), toDateTime(), QDate::fromString() */ -QDate QLocale::toDate(const QString &string, const QString &format) const +QDate QLocale::toDate(const QString &string, const QString &format, int baseYear) const { - return toDate(string, format, QCalendar()); + return toDate(string, format, QCalendar(), baseYear); } /*! \since 5.14 \overload */ -QDate QLocale::toDate(const QString &string, const QString &format, QCalendar cal) const +QDate QLocale::toDate(const QString &string, const QString &format, QCalendar cal, int baseYear) const { QDate date; #if QT_CONFIG(datetimeparser) QDateTimeParser dt(QMetaType::QDate, QDateTimeParser::FromString, cal); dt.setDefaultLocale(*this); if (dt.parseFormat(format)) - dt.fromString(string, &date, nullptr); + dt.fromString(string, &date, nullptr, baseYear); #else Q_UNUSED(string); Q_UNUSED(format); + Q_UNUSED(baseYear); Q_UNUSED(cal); #endif return date; @@ -2384,12 +2587,16 @@ QDate QLocale::toDate(const QString &string, const QString &format, QCalendar ca /*! \since 4.4 - Parses the date/time string given in \a string and returns the - time. See QDateTime::fromString() for information on the expressions - that can be used with this function. + Reads \a string as a date-time in the given \a format. - \note The month and day names used must be given in the user's local - language. + Parses \a string and returns the date-time it represents. See + QDateTime::fromString() for the interpretation of \a format. + + \include qlocale.cpp base-year-for-two-digit + + \note Month and day names, where used, must be given in the locale's + language. Any am/pm indicators used must match \l amText() or \l pmText(), + ignoring case. If the string could not be parsed, returns an invalid QDateTime. If the string can be parsed and represents an invalid date-time (e.g. in a gap @@ -2400,27 +2607,31 @@ QDate QLocale::toDate(const QString &string, const QString &format, QCalendar ca \sa dateTimeFormat(), toTime(), toDate(), QDateTime::fromString() */ -QDateTime QLocale::toDateTime(const QString &string, const QString &format) const +QDateTime QLocale::toDateTime(const QString &string, const QString &format, int baseYear) const { - return toDateTime(string, format, QCalendar()); + return toDateTime(string, format, QCalendar(), baseYear); } /*! \since 5.14 \overload */ -QDateTime QLocale::toDateTime(const QString &string, const QString &format, QCalendar cal) const +QDateTime QLocale::toDateTime(const QString &string, const QString &format, QCalendar cal, + int baseYear) const { #if QT_CONFIG(datetimeparser) QDateTime datetime; QDateTimeParser dt(QMetaType::QDateTime, QDateTimeParser::FromString, cal); dt.setDefaultLocale(*this); - if (dt.parseFormat(format) && (dt.fromString(string, &datetime) || !datetime.isValid())) + if (dt.parseFormat(format) && (dt.fromString(string, &datetime, baseYear) + || !datetime.isValid())) { return datetime; + } #else Q_UNUSED(string); Q_UNUSED(format); + Q_UNUSED(baseYear); Q_UNUSED(cal); #endif return QDateTime(); @@ -2430,7 +2641,14 @@ QDateTime QLocale::toDateTime(const QString &string, const QString &format, QCal /*! \since 4.1 - Returns the decimal point character of this locale. + Returns the fractional part separator for this locale. + + This is the token that separates the whole number part from the fracional + part in the representation of a number which has a fractional part. This is + commonly called the "decimal point character" - even though, in many + locales, it is not a "point" (or similar dot). It is (since Qt 6.0) returned + as a string in case some locale needs more than one UTF-16 code-point to + represent its separator. \sa groupSeparator(), toString() */ @@ -2442,7 +2660,14 @@ QString QLocale::decimalPoint() const /*! \since 4.1 - Returns the group separator character of this locale. + Returns the digit-grouping separator for this locale. + + This is a token used to break up long sequences of digits, in the + representation of a number, to make it easier to read. In some locales it + may be empty, indicating that digits should not be broken up into groups in + this way. In others it may be a spacing character. It is (since Qt 6.0) + returned as a string in case some locale needs more than one UTF-16 + code-point to represent its separator. \sa decimalPoint(), toString() */ @@ -2454,7 +2679,12 @@ QString QLocale::groupSeparator() const /*! \since 4.1 - Returns the percent character of this locale. + Returns the percent marker of this locale. + + This is a token presumed to be appended to a number to indicate a + percentage. It is (since Qt 6.0) returned as a string because, in some + locales, it is not a single character - for example, because it includes a + text-direction-control character. \sa toString() */ @@ -2468,6 +2698,13 @@ QString QLocale::percent() const Returns the zero digit character of this locale. + This is a single Unicode character but may be encoded as a surrogate pair, + so is (since Qt 6.0) returned as a string. In most locales, other digits + follow it in Unicode ordering - however, some number systems, notably those + using U+3007 as zero, do not have contiguous digits. Use toString() to + obtain suitable representations of numbers, rather than trying to construct + them from this zero digit. + \sa toString() */ QString QLocale::zeroDigit() const @@ -2478,7 +2715,12 @@ QString QLocale::zeroDigit() const /*! \since 4.1 - Returns the negative sign character of this locale. + Returns the negative sign indicator of this locale. + + This is a token presumed to be used as a prefix to a number to indicate that + it is negative. It is (since Qt 6.0) returned as a string because, in some + locales, it is not a single character - for example, because it includes a + text-direction-control character. \sa positiveSign(), toString() */ @@ -2490,7 +2732,12 @@ QString QLocale::negativeSign() const /*! \since 4.5 - Returns the positive sign character of this locale. + Returns the positive sign indicator of this locale. + + This is a token presumed to be used as a prefix to a number to indicate that + it is positive. It is (since Qt 6.0) returned as a string because, in some + locales, it is not a single character - for example, because it includes a + text-direction-control character. \sa negativeSign(), toString() */ @@ -2502,8 +2749,13 @@ QString QLocale::positiveSign() const /*! \since 4.1 - Returns the exponential character of this locale, used to separate exponent - from mantissa in some floating-point numeric representations. + Returns the exponent separator for this locale. + + This is a token used to separate mantissa from exponent in some + floating-point numeric representations. It is (since Qt 6.0) returned as a + string because, in some locales, it is not a single character - for example, + it may consist of a multiplication sign and a representation of the "ten to + the power" operator. \sa toString(double, char, int) */ @@ -2512,44 +2764,57 @@ QString QLocale::exponential() const return d->m_data->exponentSeparator(); } -static bool qIsUpper(char c) -{ - return c >= 'A' && c <= 'Z'; -} - -static char qToLower(char c) -{ - if (c >= 'A' && c <= 'Z') - return c - 'A' + 'a'; - else - return c; -} - /*! \overload - - \a f and \a prec have the same meaning as in QString::number(double, char, int). - - \sa toDouble(), numberOptions(), exponential(), decimalPoint(), zeroDigit(), positiveSign(), percent() + Returns a string representing the floating-point number \a f. + + The form of the representation is controlled by the \a format and \a + precision parameters. + + The \a format defaults to \c{'g'}. It can be any of the following: + + \table + \header \li Format \li Meaning \li Meaning of \a precision + \row \li \c 'e' \li format as [-]9.9e[+|-]999 \li number of digits \e after the decimal point + \row \li \c 'E' \li format as [-]9.9E[+|-]999 \li " + \row \li \c 'f' \li format as [-]9.9 \li " + \row \li \c 'F' \li same as \c 'f' except for INF and NAN (see below) \li " + \row \li \c 'g' \li use \c 'e' or \c 'f' format, whichever is more concise \li maximum number of significant digits (trailing zeroes are omitted) + \row \li \c 'G' \li use \c 'E' or \c 'F' format, whichever is more concise \li " + \endtable + + The special \a precision value QLocale::FloatingPointShortest selects the + shortest representation that, when read as a number, gets back the original floating-point + value. Aside from that, any negative \a precision is ignored in favor of the + default, 6. + + For the \c 'e', \c 'f' and \c 'g' formats, positive infinity is represented + as "inf", negative infinity as "-inf" and floating-point NaN (not-a-number) + values are represented as "nan". For the \c 'E', \c 'F' and \c 'G' formats, + "INF" and "NAN" are used instead. This does not vary with locale. + + \sa toDouble(), numberOptions(), exponential(), decimalPoint(), zeroDigit(), + positiveSign(), percent(), toCurrencyString(), formattedDataSize(), + QLocale::FloatingPointPrecisionOption */ -QString QLocale::toString(double i, char f, int prec) const +QString QLocale::toString(double f, char format, int precision) const { QLocaleData::DoubleForm form = QLocaleData::DFDecimal; - uint flags = qIsUpper(f) ? QLocaleData::CapitalEorX : 0; + uint flags = isAsciiUpper(format) ? QLocaleData::CapitalEorX : 0; - switch (qToLower(f)) { - case 'f': - form = QLocaleData::DFDecimal; - break; - case 'e': - form = QLocaleData::DFExponent; - break; - case 'g': - form = QLocaleData::DFSignificantDigits; - break; - default: - break; + switch (QtMiscUtils::toAsciiLower(format)) { + case 'f': + form = QLocaleData::DFDecimal; + break; + case 'e': + form = QLocaleData::DFExponent; + break; + case 'g': + form = QLocaleData::DFSignificantDigits; + break; + default: + break; } if (!(d->m_numberOptions & OmitGroupSeparator)) @@ -2558,7 +2823,7 @@ QString QLocale::toString(double i, char f, int prec) const flags |= QLocaleData::ZeroPadExponent; if (d->m_numberOptions & IncludeTrailingZeroesAfterDot) flags |= QLocaleData::AddTrailingZeroes; - return d->m_data->doubleToString(i, prec, form, -1, flags); + return d->m_data->doubleToString(f, precision, form, -1, flags); } /*! @@ -2583,16 +2848,32 @@ QString QLocale::toString(double i, char f, int prec) const /*! Returns a QLocale object initialized to the system locale. - On Windows and Mac, this locale will use the decimal/grouping characters and - date/time formats specified in the system configuration panel. + The system locale may use system-specific sources for locale data, where + available, otherwise falling back on QLocale's built-in database entry for + the language, script and territory the system reports. + + For example, on Windows and Mac, this locale will use the decimal/grouping + characters and date/time formats specified in the system configuration + panel. \sa c() */ QLocale QLocale::system() { - static QLocalePrivate locale(systemData(), defaultIndex(), DefaultNumberOptions, 1); - QT_PREPEND_NAMESPACE(systemData)(); // trigger updating of the system data if necessary + constexpr auto sysData = []() { + // Same return as systemData(), but leave the setup to the actual call to it. +#ifdef QT_NO_SYSTEMLOCALE + return locale_data; +#else + return &systemLocaleData; +#endif + }; + Q_CONSTINIT static QLocalePrivate locale(sysData(), -1, DefaultNumberOptions, 1); + // Calling systemData() ensures system data is up to date; we also need it + // to ensure that locale's index stays up to date: + systemData(&locale.m_index); + Q_ASSERT(locale.m_index >= 0 && locale.m_index < locale_data_size); return QLocale(locale); } @@ -2609,15 +2890,14 @@ QLocale QLocale::system() QList<QLocale> locales = QLocale::matchingLocales(QLocale::AnyLanguage, QLocale::AnyScript, QLocale::Russia); */ -QList<QLocale> QLocale::matchingLocales(QLocale::Language language, QLocale::Script script, - QLocale::Territory territory) +QList<QLocale> QLocale::matchingLocales(Language language, Script script, Territory territory) { const QLocaleId filter { language, script, territory }; if (!filter.isValid()) return QList<QLocale>(); - if (language == QLocale::C) - return QList<QLocale>() << QLocale(QLocale::C); + if (language == C) + return QList<QLocale>{QLocale(C)}; QList<QLocale> result; if (filter.matchesAll()) @@ -2634,6 +2914,15 @@ QList<QLocale> QLocale::matchingLocales(QLocale::Language language, QLocale::Scr ++index; } + // Add current system locale, if it matches + const auto syslocaledata = systemData(); + + if (filter.acceptLanguage(syslocaledata->m_language_id)) { + const QLocaleId id = syslocaledata->id(); + if (filter.acceptScriptTerritory(id)) + result.append(system()); + } + return result; } @@ -2651,7 +2940,7 @@ QList<QLocale> QLocale::matchingLocales(QLocale::Language language, QLocale::Scr QList<QLocale::Country> QLocale::countriesForLanguage(Language language) { const auto locales = matchingLocales(language, AnyScript, AnyCountry); - QList<QLocale::Country> result; + QList<Country> result; result.reserve(locales.size()); for (const auto &locale : locales) result.append(locale.territory()); @@ -2729,6 +3018,14 @@ QString QLocale::standaloneDayName(int day, FormatType type) const // Calendar look-up of month and day names: +// Only used in assertions +[[maybe_unused]] static bool sameLocale(const QLocaleData *locale, const QCalendarLocale &calendar) +{ + return locale->m_language_id == calendar.m_language_id + && locale->m_script_id == calendar.m_script_id + && locale->m_territory_id == calendar.m_territory_id; +} + /*! \internal */ @@ -2837,20 +3134,30 @@ QString QCalendarBackend::monthName(const QLocale &locale, int month, int, QLocale::FormatType format) const { Q_ASSERT(month >= 1 && month <= maximumMonthsInYear()); - return rawMonthName(localeMonthIndexData()[locale.d->m_index], - localeMonthData(), month, format); + const QCalendarLocale &monthly = localeMonthIndexData()[locale.d->m_index]; + Q_ASSERT(sameLocale(locale.d->m_data, monthly)); + return rawMonthName(monthly, localeMonthData(), month, format); } -QString QGregorianCalendar::monthName(const QLocale &locale, int month, int year, - QLocale::FormatType format) const +QString QRomanCalendar::monthName(const QLocale &locale, int month, int year, + QLocale::FormatType format) const { #ifndef QT_NO_SYSTEMLOCALE if (locale.d->m_data == &systemLocaleData) { Q_ASSERT(month >= 1 && month <= 12); - QVariant res = systemLocale()->query(format == QLocale::LongFormat - ? QSystemLocale::MonthNameLong - : QSystemLocale::MonthNameShort, - month); + QSystemLocale::QueryType queryType = QSystemLocale::MonthNameLong; + switch (format) { + case QLocale::LongFormat: + queryType = QSystemLocale::MonthNameLong; + break; + case QLocale::ShortFormat: + queryType = QSystemLocale::MonthNameShort; + break; + case QLocale::NarrowFormat: + queryType = QSystemLocale::MonthNameNarrow; + break; + } + QVariant res = systemLocale()->query(queryType, month); if (!res.isNull()) return res.toString(); } @@ -2863,20 +3170,30 @@ QString QCalendarBackend::standaloneMonthName(const QLocale &locale, int month, QLocale::FormatType format) const { Q_ASSERT(month >= 1 && month <= maximumMonthsInYear()); - return rawStandaloneMonthName(localeMonthIndexData()[locale.d->m_index], - localeMonthData(), month, format); + const QCalendarLocale &monthly = localeMonthIndexData()[locale.d->m_index]; + Q_ASSERT(sameLocale(locale.d->m_data, monthly)); + return rawStandaloneMonthName(monthly, localeMonthData(), month, format); } -QString QGregorianCalendar::standaloneMonthName(const QLocale &locale, int month, int year, - QLocale::FormatType format) const +QString QRomanCalendar::standaloneMonthName(const QLocale &locale, int month, int year, + QLocale::FormatType format) const { #ifndef QT_NO_SYSTEMLOCALE if (locale.d->m_data == &systemLocaleData) { Q_ASSERT(month >= 1 && month <= 12); - QVariant res = systemLocale()->query(format == QLocale::LongFormat - ? QSystemLocale::StandaloneMonthNameLong - : QSystemLocale::StandaloneMonthNameShort, - month); + QSystemLocale::QueryType queryType = QSystemLocale::StandaloneMonthNameLong; + switch (format) { + case QLocale::LongFormat: + queryType = QSystemLocale::StandaloneMonthNameLong; + break; + case QLocale::ShortFormat: + queryType = QSystemLocale::StandaloneMonthNameShort; + break; + case QLocale::NarrowFormat: + queryType = QSystemLocale::StandaloneMonthNameNarrow; + break; + } + QVariant res = systemLocale()->query(queryType, month); if (!res.isNull()) return res.toString(); } @@ -2895,10 +3212,19 @@ QString QCalendarBackend::weekDayName(const QLocale &locale, int day, #ifndef QT_NO_SYSTEMLOCALE if (locale.d->m_data == &systemLocaleData) { - QVariant res = systemLocale()->query(format == QLocale::LongFormat - ? QSystemLocale::DayNameLong - : QSystemLocale::DayNameShort, - day); + QSystemLocale::QueryType queryType = QSystemLocale::DayNameLong; + switch (format) { + case QLocale::LongFormat: + queryType = QSystemLocale::DayNameLong; + break; + case QLocale::ShortFormat: + queryType = QSystemLocale::DayNameShort; + break; + case QLocale::NarrowFormat: + queryType = QSystemLocale::DayNameNarrow; + break; + } + QVariant res = systemLocale()->query(queryType, day); if (!res.isNull()) return res.toString(); } @@ -2915,10 +3241,19 @@ QString QCalendarBackend::standaloneWeekDayName(const QLocale &locale, int day, #ifndef QT_NO_SYSTEMLOCALE if (locale.d->m_data == &systemLocaleData) { - QVariant res = systemLocale()->query(format == QLocale::LongFormat - ? QSystemLocale::DayNameLong - : QSystemLocale::DayNameShort, - day); + QSystemLocale::QueryType queryType = QSystemLocale::StandaloneDayNameLong; + switch (format) { + case QLocale::LongFormat: + queryType = QSystemLocale::StandaloneDayNameLong; + break; + case QLocale::ShortFormat: + queryType = QSystemLocale::StandaloneDayNameShort; + break; + case QLocale::NarrowFormat: + queryType = QSystemLocale::StandaloneDayNameNarrow; + break; + } + QVariant res = systemLocale()->query(queryType, day); if (!res.isNull()) return res.toString(); } @@ -2948,10 +3283,10 @@ Qt::DayOfWeek QLocale::firstDayOfWeek() const QLocale::MeasurementSystem QLocalePrivate::measurementSystem() const { - for (int i = 0; i < ImperialMeasurementSystemsCount; ++i) { - if (ImperialMeasurementSystems[i].languageId == m_data->m_language_id - && ImperialMeasurementSystems[i].territoryId == m_data->m_territory_id) { - return ImperialMeasurementSystems[i].system; + for (const auto &system : ImperialMeasurementSystems) { + if (system.languageId == m_data->m_language_id + && system.territoryId == m_data->m_territory_id) { + return system.system; } } return QLocale::MetricSystem; @@ -3009,34 +3344,34 @@ QLocale::MeasurementSystem QLocale::measurementSystem() const Qt::LayoutDirection QLocale::textDirection() const { switch (script()) { - case QLocale::AdlamScript: - case QLocale::ArabicScript: - case QLocale::AvestanScript: - case QLocale::CypriotScript: - case QLocale::HatranScript: - case QLocale::HebrewScript: - case QLocale::ImperialAramaicScript: - case QLocale::InscriptionalPahlaviScript: - case QLocale::InscriptionalParthianScript: - case QLocale::KharoshthiScript: - case QLocale::LydianScript: - case QLocale::MandaeanScript: - case QLocale::ManichaeanScript: - case QLocale::MendeKikakuiScript: - case QLocale::MeroiticCursiveScript: - case QLocale::MeroiticScript: - case QLocale::NabataeanScript: - case QLocale::NkoScript: - case QLocale::OldHungarianScript: - case QLocale::OldNorthArabianScript: - case QLocale::OldSouthArabianScript: - case QLocale::OrkhonScript: - case QLocale::PalmyreneScript: - case QLocale::PhoenicianScript: - case QLocale::PsalterPahlaviScript: - case QLocale::SamaritanScript: - case QLocale::SyriacScript: - case QLocale::ThaanaScript: + case AdlamScript: + case ArabicScript: + case AvestanScript: + case CypriotScript: + case HatranScript: + case HebrewScript: + case ImperialAramaicScript: + case InscriptionalPahlaviScript: + case InscriptionalParthianScript: + case KharoshthiScript: + case LydianScript: + case MandaeanScript: + case ManichaeanScript: + case MendeKikakuiScript: + case MeroiticCursiveScript: + case MeroiticScript: + case NabataeanScript: + case NkoScript: + case OldHungarianScript: + case OldNorthArabianScript: + case OldSouthArabianScript: + case OrkhonScript: + case PalmyreneScript: + case PhoenicianScript: + case PsalterPahlaviScript: + case SamaritanScript: + case SyriacScript: + case ThaanaScript: return Qt::RightToLeft; default: break; @@ -3169,7 +3504,19 @@ QString QCalendarBackend::dateTimeToString(QStringView format, const QDateTime & day = parts.day; } - int i = 0; + auto appendToResult = [&](int t, int repeat) { + auto data = locale.d->m_data; + if (repeat > 1) + result.append(data->longLongToString(t, -1, 10, repeat, QLocaleData::ZeroPadded)); + else + result.append(data->longLongToString(t)); + }; + + auto formatType = [](int repeat) { + return repeat == 3 ? QLocale::ShortFormat : QLocale::LongFormat; + }; + + qsizetype i = 0; while (i < format.size()) { if (format.at(i).unicode() == '\'') { result.append(qt_readEscapedFormatString(format, &i)); @@ -3177,7 +3524,9 @@ QString QCalendarBackend::dateTimeToString(QStringView format, const QDateTime & } const QChar c = format.at(i); - int repeat = qt_repeatCount(format.mid(i)); + qsizetype rep = qt_repeatCount(format.mid(i)); + Q_ASSERT(rep < std::numeric_limits<int>::max()); + int repeat = int(rep); bool used = false; if (formatDate) { switch (c.unicode()) { @@ -3189,15 +3538,11 @@ QString QCalendarBackend::dateTimeToString(QStringView format, const QDateTime & repeat = 2; switch (repeat) { - case 4: { - const int len = (year < 0) ? 5 : 4; - result.append(locale.d->m_data->longLongToString(year, -1, 10, len, - QLocaleData::ZeroPadded)); + case 4: + appendToResult(year, (year < 0) ? 5 : 4); break; - } case 2: - result.append(locale.d->m_data->longLongToString(year % 100, -1, 10, 2, - QLocaleData::ZeroPadded)); + appendToResult(year % 100, 2); break; default: repeat = 1; @@ -3209,43 +3554,20 @@ QString QCalendarBackend::dateTimeToString(QStringView format, const QDateTime & case 'M': used = true; repeat = qMin(repeat, 4); - switch (repeat) { - case 1: - result.append(locale.d->m_data->longLongToString(month)); - break; - case 2: - result.append(locale.d->m_data->longLongToString(month, -1, 10, 2, - QLocaleData::ZeroPadded)); - break; - case 3: - result.append(monthName(locale, month, year, QLocale::ShortFormat)); - break; - case 4: - result.append(monthName(locale, month, year, QLocale::LongFormat)); - break; - } + if (repeat <= 2) + appendToResult(month, repeat); + else + result.append(monthName(locale, month, year, formatType(repeat))); break; case 'd': used = true; repeat = qMin(repeat, 4); - switch (repeat) { - case 1: - result.append(locale.d->m_data->longLongToString(day)); - break; - case 2: - result.append(locale.d->m_data->longLongToString(day, -1, 10, 2, - QLocaleData::ZeroPadded)); - break; - case 3: - result.append(locale.dayName( - dayOfWeek(date.toJulianDay()), QLocale::ShortFormat)); - break; - case 4: - result.append(locale.dayName( - dayOfWeek(date.toJulianDay()), QLocale::LongFormat)); - break; - } + if (repeat <= 2) + appendToResult(day, repeat); + else + result.append( + locale.dayName(dayOfWeek(date.toJulianDay()), formatType(repeat))); break; default: @@ -3264,83 +3586,51 @@ QString QCalendarBackend::dateTimeToString(QStringView format, const QDateTime & else if (hour == 0) hour = 12; } - - switch (repeat) { - case 1: - result.append(locale.d->m_data->longLongToString(hour)); - break; - case 2: - result.append(locale.d->m_data->longLongToString(hour, -1, 10, 2, - QLocaleData::ZeroPadded)); - break; - } + appendToResult(hour, repeat); break; } case 'H': used = true; repeat = qMin(repeat, 2); - switch (repeat) { - case 1: - result.append(locale.d->m_data->longLongToString(time.hour())); - break; - case 2: - result.append(locale.d->m_data->longLongToString(time.hour(), -1, 10, 2, - QLocaleData::ZeroPadded)); - break; - } + appendToResult(time.hour(), repeat); break; case 'm': used = true; repeat = qMin(repeat, 2); - switch (repeat) { - case 1: - result.append(locale.d->m_data->longLongToString(time.minute())); - break; - case 2: - result.append(locale.d->m_data->longLongToString(time.minute(), -1, 10, 2, - QLocaleData::ZeroPadded)); - break; - } + appendToResult(time.minute(), repeat); break; case 's': used = true; repeat = qMin(repeat, 2); - switch (repeat) { - case 1: - result.append(locale.d->m_data->longLongToString(time.second())); - break; - case 2: - result.append(locale.d->m_data->longLongToString(time.second(), -1, 10, 2, - QLocaleData::ZeroPadded)); - break; - } - break; - - case 'a': - used = true; - repeat = format.mid(i + 1).startsWith(QLatin1Char('p')) ? 2 : 1; - result.append(time.hour() < 12 ? locale.amText().toLower() - : locale.pmText().toLower()); + appendToResult(time.second(), repeat); break; case 'A': + case 'a': { + QString text = time.hour() < 12 ? locale.amText() : locale.pmText(); used = true; - repeat = format.mid(i + 1).startsWith(QLatin1Char('P')) ? 2 : 1; - result.append(time.hour() < 12 ? locale.amText().toUpper() - : locale.pmText().toUpper()); + repeat = 1; + if (format.mid(i + 1).startsWith(u'p', Qt::CaseInsensitive)) + ++repeat; + if (c.unicode() == 'A' && (repeat == 1 || format.at(i + 1).unicode() == 'P')) + text = std::move(text).toUpper(); + else if (c.unicode() == 'a' && (repeat == 1 || format.at(i + 1).unicode() == 'p')) + text = std::move(text).toLower(); + // else 'Ap' or 'aP' => use CLDR text verbatim, preserving case + result.append(text); break; + } case 'z': used = true; - repeat = (repeat >= 3) ? 3 : 1; + repeat = qMin(repeat, 3); // note: the millisecond component is treated like the decimal part of the seconds // so ms == 2 is always printed as "002", but ms == 200 can be either "2" or "200" - result.append(locale.d->m_data->longLongToString(time.msec(), -1, 10, 3, - QLocaleData::ZeroPadded)); - if (repeat == 1) { + appendToResult(time.msec(), 3); + if (repeat != 3) { if (result.endsWith(locale.zeroDigit())) result.chop(1); if (result.endsWith(locale.zeroDigit())) @@ -3348,20 +3638,63 @@ QString QCalendarBackend::dateTimeToString(QStringView format, const QDateTime & } break; - case 't': + case 't': { + enum AbbrType { Long, Offset, Short }; + const auto tzAbbr = [locale](const QDateTime &when, AbbrType type) { +#if QT_CONFIG(timezone) + if (type != Short || locale != QLocale::system()) { + QTimeZone::NameType mode = + type == Short ? QTimeZone::ShortName + : type == Long ? QTimeZone::LongName : QTimeZone::OffsetName; + return when.timeRepresentation().displayName(when, mode, locale); + } // else: prefer QDateTime's abbreviation, for backwards-compatibility. +#endif // else, make do with non-localized abbreviation: + if (type != Offset) + return when.timeZoneAbbreviation(); + // For Offset, we can coerce to a UTC-based zone's abbreviation: + return when.toOffsetFromUtc(when.offsetFromUtc()).timeZoneAbbreviation(); + }; used = true; - repeat = 1; - // If we have a QDateTime use the time spec otherwise use the current system tzname - result.append(formatDate ? datetime.timeZoneAbbreviation() - : QDateTime::currentDateTime().timeZoneAbbreviation()); + repeat = qMin(repeat, 4); + // If we don't have a date-time, use the current system time: + const QDateTime when = formatDate ? datetime : QDateTime::currentDateTime(); + QString text; + switch (repeat) { + case 4: + text = tzAbbr(when, Long); + break; + case 3: // ±hh:mm + case 2: // ±hhmm (we'll remove the ':' at the end) + text = tzAbbr(when, Offset); + Q_ASSERT(text.startsWith("UTC"_L1)); // Need to strip this. + // The Qt::UTC case omits the zero offset: + text = (text.size() == 3 + ? u"+00:00"_s + : (text.size() <= 6 + // Whole-hour offsets may lack the zero minutes: + ? QStringView{text}.sliced(3) + ":00"_L1 + : std::move(text).sliced(3))); + if (repeat == 2) + text = text.remove(u':'); + break; + default: + text = tzAbbr(when, Short); + // UTC-offset zones only include minutes if non-zero. + if (text.startsWith("UTC"_L1) && text.size() == 6) + text += ":00"_L1; + break; + } + if (!text.isEmpty()) + result.append(text); break; + } default: break; } } if (!used) - result.append(QString(repeat, c)); + result.resize(result.size() + repeat, c); i += repeat; } @@ -3373,21 +3706,24 @@ QString QCalendarBackend::dateTimeToString(QStringView format, const QDateTime & QString QLocaleData::doubleToString(double d, int precision, DoubleForm form, int width, unsigned flags) const { - // Undocumented: aside from F.P.Shortest, precision < 0 is treated as - // default, 6 - same as printf(). + // Although the special handling of F.P.Shortest below is limited to + // DFSignificantDigits, the double-conversion library does treat it + // specially for the other forms, shedding trailing zeros for DFDecimal and + // using the shortest mantissa that faithfully represents the value for + // DFExponent. if (precision != QLocale::FloatingPointShortest && precision < 0) precision = 6; if (width < 0) width = 0; int decpt; - int bufSize = 1; + qsizetype bufSize = 1; if (precision == QLocale::FloatingPointShortest) bufSize += std::numeric_limits<double>::max_digits10; - else if (form == DFDecimal && qIsFinite(d)) + else if (form == DFDecimal && qt_is_finite(d)) bufSize += wholePartSpace(qAbs(d)) + precision; - else // Add extra digit due to different interpretations of precision. Also, "nan" has to fit. - bufSize += qMax(2, precision) + 1; + else // Add extra digit due to different interpretations of precision. + bufSize += qMax(2, precision) + 1; // Must also be big enough for "nan" or "inf" QVarLengthArray<char> buf(bufSize); int length; @@ -3397,7 +3733,8 @@ QString QLocaleData::doubleToString(double d, int precision, DoubleForm form, const QString prefix = signPrefix(negative && !isZero(d), flags); QString numStr; - if (qstrncmp(buf.data(), "inf", 3) == 0 || qstrncmp(buf.data(), "nan", 3) == 0) { + if (length == 3 + && (qstrncmp(buf.data(), "inf", 3) == 0 || qstrncmp(buf.data(), "nan", 3) == 0)) { numStr = QString::fromLatin1(buf.data(), length); } else { // Handle finite values const QString zero = zeroDigit(); @@ -3405,12 +3742,14 @@ QString QLocaleData::doubleToString(double d, int precision, DoubleForm form, if (zero == u"0") { // No need to convert digits. + Q_ASSERT(std::all_of(buf.cbegin(), buf.cbegin() + length, isAsciiDigit)); + // That check is taken care of in unicodeForDigits, below. } else if (zero.size() == 2 && zero.at(0).isHighSurrogate()) { const char32_t zeroUcs4 = QChar::surrogateToUcs4(zero.at(0), zero.at(1)); QString converted; converted.reserve(2 * digits.size()); - for (int i = 0; i < digits.length(); ++i) { - const char32_t digit = unicodeForDigit(digits.at(i).unicode() - '0', zeroUcs4); + for (QChar ch : std::as_const(digits)) { + const char32_t digit = unicodeForDigit(ch.unicode() - '0', zeroUcs4); Q_ASSERT(QChar::requiresSurrogates(digit)); converted.append(QChar::highSurrogate(digit)); converted.append(QChar::lowSurrogate(digit)); @@ -3421,7 +3760,7 @@ QString QLocaleData::doubleToString(double d, int precision, DoubleForm form, Q_ASSERT(!zero.at(0).isSurrogate()); char16_t z = zero.at(0).unicode(); char16_t *const value = reinterpret_cast<char16_t *>(digits.data()); - for (int i = 0; i < digits.length(); ++i) + for (qsizetype i = 0; i < digits.size(); ++i) value[i] = unicodeForDigit(value[i] - '0', z); } @@ -3429,81 +3768,78 @@ QString QLocaleData::doubleToString(double d, int precision, DoubleForm form, const bool groupDigits = flags & GroupDigits; const int minExponentDigits = flags & ZeroPadExponent ? 2 : 1; switch (form) { - case DFExponent: - numStr = exponentForm(std::move(digits), decpt, precision, PMDecimalDigits, - mustMarkDecimal, minExponentDigits); - break; - case DFDecimal: - numStr = decimalForm(std::move(digits), decpt, precision, PMDecimalDigits, - mustMarkDecimal, groupDigits); - break; - case DFSignificantDigits: { - PrecisionMode mode = (flags & AddTrailingZeroes) ? - PMSignificantDigits : PMChopTrailingZeros; - - /* POSIX specifies sprintf() to follow fprintf(), whose 'g/G' - format says; with P = 6 if precision unspecified else 1 if - precision is 0 else precision; when 'e/E' would have exponent - X, use: - * 'f/F' if P > X >= -4, with precision P-1-X - * 'e/E' otherwise, with precision P-1 - Helpfully, we already have mapped precision < 0 to 6 - except - for F.P.Shortest mode, which is its own story - and those of - our callers with unspecified precision either used 6 or -1 - for it. - */ - bool useDecimal; - if (precision == QLocale::FloatingPointShortest) { - // Find out which representation is shorter. - // Set bias to everything added to exponent form but not - // decimal, minus the converse. - - // Exponent adds separator, sign and digits: - int bias = 2 + minExponentDigits; - // Decimal form may get grouping separators inserted: - if (groupDigits && decpt >= m_grouping_top + m_grouping_least) - bias -= (decpt - m_grouping_top - m_grouping_least) / m_grouping_higher + 1; - // X = decpt - 1 needs two digits if decpt > 10: - if (decpt > 10 && minExponentDigits == 1) - ++bias; - // Assume digitCount < 95, so we can ignore the 3-digit - // exponent case (we'll set useDecimal false anyway). - - const int digitCount = digits.length() / zero.size(); - if (!mustMarkDecimal) { - // Decimal separator is skipped if at end; adjust if - // that happens for only one form: - if (digitCount <= decpt && digitCount > 1) - ++bias; // decimal but not exponent - else if (digitCount == 1 && decpt <= 0) - --bias; // exponent but not decimal - } - // When 0 < decpt <= digitCount, the forms have equal digit - // counts, plus things bias has taken into account; - // otherwise decimal form's digit count is right-padded with - // zeros to decpt, when decpt is positive, otherwise it's - // left-padded with 1 - decpt zeros. - useDecimal = (decpt <= 0 ? 1 - decpt <= bias - : decpt <= digitCount ? 0 <= bias - : decpt <= digitCount + bias); - } else { - // X == decpt - 1, POSIX's P; -4 <= X < P iff -4 < decpt <= P - Q_ASSERT(precision >= 0); - useDecimal = decpt > -4 && decpt <= (precision ? precision : 1); + case DFExponent: + numStr = exponentForm(std::move(digits), decpt, precision, PMDecimalDigits, + mustMarkDecimal, minExponentDigits); + break; + case DFDecimal: + numStr = decimalForm(std::move(digits), decpt, precision, PMDecimalDigits, + mustMarkDecimal, groupDigits); + break; + case DFSignificantDigits: { + PrecisionMode mode + = (flags & AddTrailingZeroes) ? PMSignificantDigits : PMChopTrailingZeros; + + /* POSIX specifies sprintf() to follow fprintf(), whose 'g/G' format + says; with P = 6 if precision unspecified else 1 if precision is + 0 else precision; when 'e/E' would have exponent X, use: + * 'f/F' if P > X >= -4, with precision P-1-X + * 'e/E' otherwise, with precision P-1 + Helpfully, we already have mapped precision < 0 to 6 - except for + F.P.Shortest mode, which is its own story - and those of our + callers with unspecified precision either used 6 or -1 for it. + */ + bool useDecimal; + if (precision == QLocale::FloatingPointShortest) { + // Find out which representation is shorter. + // Set bias to everything added to exponent form but not + // decimal, minus the converse. + + // Exponent adds separator, sign and digits: + int bias = 2 + minExponentDigits; + // Decimal form may get grouping separators inserted: + if (groupDigits && decpt >= m_grouping_top + m_grouping_least) + bias -= (decpt - m_grouping_least) / m_grouping_higher + 1; + // X = decpt - 1 needs two digits if decpt > 10: + if (decpt > 10 && minExponentDigits == 1) + ++bias; + // Assume digitCount < 95, so we can ignore the 3-digit + // exponent case (we'll set useDecimal false anyway). + + const qsizetype digitCount = digits.size() / zero.size(); + if (!mustMarkDecimal) { + // Decimal separator is skipped if at end; adjust if + // that happens for only one form: + if (digitCount <= decpt && digitCount > 1) + ++bias; // decimal but not exponent + else if (digitCount == 1 && decpt <= 0) + --bias; // exponent but not decimal } - - numStr = useDecimal - ? decimalForm(std::move(digits), decpt, precision, mode, - mustMarkDecimal, groupDigits) - : exponentForm(std::move(digits), decpt, precision, mode, - mustMarkDecimal, minExponentDigits); - break; + // When 0 < decpt <= digitCount, the forms have equal digit + // counts, plus things bias has taken into account; otherwise + // decimal form's digit count is right-padded with zeros to + // decpt, when decpt is positive, otherwise it's left-padded + // with 1 - decpt zeros. + useDecimal = (decpt <= 0 ? 1 - decpt <= bias + : decpt <= digitCount ? 0 <= bias : decpt <= digitCount + bias); + } else { + // X == decpt - 1, POSIX's P; -4 <= X < P iff -4 < decpt <= P + Q_ASSERT(precision >= 0); + useDecimal = decpt > -4 && decpt <= (precision ? precision : 1); } + + numStr = useDecimal + ? decimalForm(std::move(digits), decpt, precision, mode, + mustMarkDecimal, groupDigits) + : exponentForm(std::move(digits), decpt, precision, mode, + mustMarkDecimal, minExponentDigits); + break; + } } // Pad with zeros. LeftAdjusted overrides ZeroPadded. if (flags & ZeroPadded && !(flags & LeftAdjusted)) { - for (int i = numStr.length() / zero.length() + prefix.size(); i < width; ++i) + for (qsizetype i = numStr.size() / zero.size() + prefix.size(); i < width; ++i) numStr.prepend(zero); } } @@ -3526,33 +3862,33 @@ QString QLocaleData::decimalForm(QString &&digits, int decpt, int precision, for (; decpt < 0; ++decpt) digits.prepend(zero); } else { - for (int i = digits.length() / digitWidth; i < decpt; ++i) + for (qsizetype i = digits.size() / digitWidth; i < decpt; ++i) digits.append(zero); } switch (pm) { case PMDecimalDigits: - for (int i = digits.length() / digitWidth - decpt; i < precision; ++i) + for (qsizetype i = digits.size() / digitWidth - decpt; i < precision; ++i) digits.append(zero); break; case PMSignificantDigits: - for (int i = digits.length() / digitWidth; i < precision; ++i) + for (qsizetype i = digits.size() / digitWidth; i < precision; ++i) digits.append(zero); break; case PMChopTrailingZeros: - Q_ASSERT(digits.length() / digitWidth <= qMax(decpt, 1) || !digits.endsWith(zero)); + Q_ASSERT(digits.size() / digitWidth <= qMax(decpt, 1) || !digits.endsWith(zero)); break; } - if (mustMarkDecimal || decpt < digits.length() / digitWidth) + if (mustMarkDecimal || decpt < digits.size() / digitWidth) digits.insert(decpt * digitWidth, decimalPoint()); if (groupDigits) { const QString group = groupSeparator(); - int i = decpt - m_grouping_least; + qsizetype i = decpt - m_grouping_least; if (i >= m_grouping_top) { digits.insert(i * digitWidth, group); - while ((i -= m_grouping_higher) >= m_grouping_top) + while ((i -= m_grouping_higher) > 0) digits.insert(i * digitWidth, group); } } @@ -3574,19 +3910,19 @@ QString QLocaleData::exponentForm(QString &&digits, int decpt, int precision, switch (pm) { case PMDecimalDigits: - for (int i = digits.length() / digitWidth; i < precision + 1; ++i) + for (qsizetype i = digits.size() / digitWidth; i < precision + 1; ++i) digits.append(zero); break; case PMSignificantDigits: - for (int i = digits.length() / digitWidth; i < precision; ++i) + for (qsizetype i = digits.size() / digitWidth; i < precision; ++i) digits.append(zero); break; case PMChopTrailingZeros: - Q_ASSERT(digits.length() / digitWidth <= 1 || !digits.endsWith(zero)); + Q_ASSERT(digits.size() / digitWidth <= 1 || !digits.endsWith(zero)); break; } - if (mustMarkDecimal || digits.length() > digitWidth) + if (mustMarkDecimal || digits.size() > digitWidth) digits.insert(digitWidth, decimalPoint()); digits.append(exponentSeparator()); @@ -3606,20 +3942,17 @@ QString QLocaleData::signPrefix(bool negative, unsigned flags) const return {}; } -QString QLocaleData::longLongToString(qlonglong l, int precision, +QString QLocaleData::longLongToString(qlonglong n, int precision, int base, int width, unsigned flags) const { - bool negative = l < 0; + bool negative = n < 0; -QT_WARNING_PUSH - /* "unary minus operator applied to unsigned type, result still unsigned" */ -QT_WARNING_DISABLE_MSVC(4146) /* Negating std::numeric_limits<qlonglong>::min() hits undefined behavior, so - taking an absolute value has to cast to unsigned to change sign. + taking an absolute value has to take a slight detour. */ - QString numStr = qulltoa(negative ? -qulonglong(l) : qulonglong(l), base, zeroDigit()); -QT_WARNING_POP + QString numStr = qulltoa(negative ? 1u + qulonglong(-(n + 1)) : qulonglong(n), + base, zeroDigit()); return applyIntegerFormatting(std::move(numStr), negative, precision, base, width, flags); } @@ -3638,7 +3971,7 @@ QString QLocaleData::applyIntegerFormatting(QString &&numStr, bool negative, int { const QString zero = base == 10 ? zeroDigit() : QStringLiteral("0"); const auto digitWidth = zero.size(); - const auto digitCount = numStr.length() / digitWidth; + const auto digitCount = numStr.size() / digitWidth; const auto basePrefix = [&] () -> QStringView { if (flags & ShowBase) { @@ -3655,15 +3988,15 @@ QString QLocaleData::applyIntegerFormatting(QString &&numStr, bool negative, int const QString prefix = signPrefix(negative, flags) + basePrefix; // Count how much of width we've used up. Each digit counts as one - int usedWidth = digitCount + prefix.size(); + qsizetype usedWidth = digitCount + prefix.size(); if (base == 10 && flags & GroupDigits) { const QString group = groupSeparator(); - int i = digitCount - m_grouping_least; + qsizetype i = digitCount - m_grouping_least; if (i >= m_grouping_top) { numStr.insert(i * digitWidth, group); ++usedWidth; - while ((i -= m_grouping_higher) >= m_grouping_top) { + while ((i -= m_grouping_higher) > 0) { numStr.insert(i * digitWidth, group); ++usedWidth; } @@ -3675,7 +4008,7 @@ QString QLocaleData::applyIntegerFormatting(QString &&numStr, bool negative, int if (noPrecision) precision = 1; - for (int i = numStr.length(); i < precision; ++i) { + for (qsizetype i = numStr.size(); i < precision; ++i) { numStr.prepend(zero); usedWidth++; } @@ -3683,7 +4016,7 @@ QString QLocaleData::applyIntegerFormatting(QString &&numStr, bool negative, int // LeftAdjusted overrides ZeroPadded; and sprintf() only pads when // precision is not specified in the format string. if (noPrecision && flags & ZeroPadded && !(flags & LeftAdjusted)) { - for (int i = usedWidth; i < width; ++i) + for (qsizetype i = usedWidth; i < width; ++i) numStr.prepend(zero); } @@ -3693,50 +4026,259 @@ QString QLocaleData::applyIntegerFormatting(QString &&numStr, bool negative, int return result; } +inline QLocaleData::NumericData QLocaleData::numericData(QLocaleData::NumberMode mode) const +{ + NumericData result; + if (this == c()) { + result.isC = true; + return result; + } + result.setZero(zero().viewData(single_character_data)); + result.group = groupDelim().viewData(single_character_data); + // Note: minus, plus and exponent might not actually be single characters. + result.minus = minus().viewData(single_character_data); + result.plus = plus().viewData(single_character_data); + if (mode != IntegerMode) + result.decimal = decimalSeparator().viewData(single_character_data); + if (mode == DoubleScientificMode) { + result.exponent = exponential().viewData(single_character_data); + // exponentCyrillic means "apply the Cyrrilic-specific exponent hack" + result.exponentCyrillic = m_script_id == QLocale::CyrillicScript; + } +#ifndef QT_NO_SYSTEMLOCALE + if (this == &systemLocaleData) { + const auto getString = [sys = systemLocale()](QSystemLocale::QueryType query) { + return sys->query(query).toString(); + }; + if (mode != IntegerMode) { + result.sysDecimal = getString(QSystemLocale::DecimalPoint); + if (result.sysDecimal.size()) + result.decimal = QStringView{result.sysDecimal}; + } + result.sysGroup = getString(QSystemLocale::GroupSeparator); + if (result.sysGroup.size()) + result.group = QStringView{result.sysGroup}; + result.sysMinus = getString(QSystemLocale::NegativeSign); + if (result.sysMinus.size()) + result.minus = QStringView{result.sysMinus}; + result.sysPlus = getString(QSystemLocale::PositiveSign); + if (result.sysPlus.size()) + result.plus = QStringView{result.sysPlus}; + result.setZero(getString(QSystemLocale::ZeroDigit)); + } +#endif + + return result; +} + +namespace { +// A bit like QStringIterator but rather specialized ... and some of the tokens +// it recognizes aren't single Unicode code-points (but it does map each to a +// single character). +class NumericTokenizer +{ + // TODO: use deterministic finite-state-automata. + // TODO QTBUG-95460: CLDR has Inf/NaN representations per locale. + static constexpr char lettersInfNaN[] = "afin"; // Letters of Inf, NaN + static constexpr auto matchInfNaN = QtPrivate::makeCharacterSetMatch<lettersInfNaN>(); + const QStringView m_text; + const QLocaleData::NumericData m_guide; + qsizetype m_index = 0; + const QLocaleData::NumberMode m_mode; + static_assert('+' + 1 == ',' && ',' + 1 == '-' && '-' + 1 == '.'); + char lastMark; // C locale accepts '+' through lastMark. +public: + NumericTokenizer(QStringView text, QLocaleData::NumericData &&guide, + QLocaleData::NumberMode mode) + : m_text(text), m_guide(guide), m_mode(mode), + lastMark(mode == QLocaleData::IntegerMode ? '-' : '.') + { + Q_ASSERT(m_guide.isValid(mode)); + } + bool done() const { return !(m_index < m_text.size()); } + qsizetype index() const { return m_index; } + inline int asBmpDigit(char16_t digit) const; + char nextToken(); +}; + +int NumericTokenizer::asBmpDigit(char16_t digit) const +{ + // If digit *is* a digit, result will be in range 0 through 9; otherwise not. + // Must match qlocale_tools.h's unicodeForDigit() + if (m_guide.zeroUcs != u'\u3007' || digit == m_guide.zeroUcs) + return digit - m_guide.zeroUcs; + + // QTBUG-85409: Suzhou's digits aren't contiguous ! + if (digit == u'\u3020') // U+3020 POSTAL MARK FACE is not a digit. + return -1; + // ... but is followed by digits 1 through 9. + return digit - u'\u3020'; +} + +char NumericTokenizer::nextToken() +{ + // As long as caller stops iterating on a zero return, those don't need to + // keep m_index correctly updated. + Q_ASSERT(!done()); + // Mauls non-letters above 'Z' but we don't care: + const auto asciiLower = [](unsigned char c) { return c >= 'A' ? c | 0x20 : c; }; + const QStringView tail = m_text.sliced(m_index); + const QChar ch = tail.front(); + if (ch == u'\u2212') { + // Special case: match the "proper" minus sign, for all locales. + ++m_index; + return '-'; + } + if (m_guide.isC) { + // "Conversion" to C locale is just a filter: + ++m_index; + if (Q_LIKELY(ch.unicode() < 256)) { + unsigned char ascii = asciiLower(ch.toLatin1()); + if (Q_LIKELY(isAsciiDigit(ascii) || ('+' <= ascii && ascii <= lastMark) + // No caller presently (6.5) passes DoubleStandardMode, + // so !IntegerMode implies scientific, for now. + || (m_mode != QLocaleData::IntegerMode + && matchInfNaN.matches(ascii)) + || (m_mode == QLocaleData::DoubleScientificMode + && ascii == 'e'))) { + return ascii; + } + } + return 0; + } + if (ch.unicode() < 256) { + // Accept the C locale's digits and signs in all locales: + char ascii = asciiLower(ch.toLatin1()); + if (isAsciiDigit(ascii) || ascii == '-' || ascii == '+' + // Also its Inf and NaN letters: + || (m_mode != QLocaleData::IntegerMode && matchInfNaN.matches(ascii))) { + ++m_index; + return ascii; + } + } + + // Other locales may be trickier: + if (tail.startsWith(m_guide.minus)) { + m_index += m_guide.minus.size(); + return '-'; + } + if (tail.startsWith(m_guide.plus)) { + m_index += m_guide.plus.size(); + return '+'; + } + if (!m_guide.group.isEmpty() && tail.startsWith(m_guide.group)) { + m_index += m_guide.group.size(); + return ','; + } + if (m_mode != QLocaleData::IntegerMode && tail.startsWith(m_guide.decimal)) { + m_index += m_guide.decimal.size(); + return '.'; + } + if (m_mode == QLocaleData::DoubleScientificMode + && tail.startsWith(m_guide.exponent, Qt::CaseInsensitive)) { + m_index += m_guide.exponent.size(); + return 'e'; + } + + // Must match qlocale_tools.h's unicodeForDigit() + if (m_guide.zeroLen == 1) { + if (!ch.isSurrogate()) { + const uint gap = asBmpDigit(ch.unicode()); + if (gap < 10u) { + ++m_index; + return '0' + gap; + } + } else if (ch.isHighSurrogate() && tail.size() > 1 && tail.at(1).isLowSurrogate()) { + return 0; + } + } else if (ch.isHighSurrogate()) { + // None of the corner cases below matches a surrogate, so (update + // already and) return early if we don't have a digit. + if (tail.size() > 1) { + QChar low = tail.at(1); + if (low.isLowSurrogate()) { + m_index += 2; + const uint gap = QChar::surrogateToUcs4(ch, low) - m_guide.zeroUcs; + return gap < 10u ? '0' + gap : 0; + } + } + return 0; + } + + // All cases where tail starts with properly-matched surrogate pair + // have been handled by this point. + Q_ASSERT(!(ch.isHighSurrogate() && tail.size() > 1 && tail.at(1).isLowSurrogate())); + + // Weird corner cases follow (code above assumes these match no surrogates). + + // Some locales use a non-breaking space (U+00A0) or its thin version + // (U+202f) for grouping. These look like spaces, so people (and thus some + // of our tests) use a regular space instead and complain if it doesn't + // work. + // Should this be extended generally to any case where group is a space ? + if ((m_guide.group == u"\u00a0" || m_guide.group == u"\u202f") && tail.startsWith(u' ')) { + ++m_index; + return ','; + } + + // Cyrillic has its own E, used by Ukrainian as exponent; but others + // writing Cyrillic may well use that; and Ukrainians might well use E. + // All other Cyrillic locales (officially) use plain ASCII E. + if (m_guide.exponentCyrillic // Only true in scientific float mode. + && (tail.startsWith(u"\u0415", Qt::CaseInsensitive) + || tail.startsWith(u"E", Qt::CaseInsensitive))) { + ++m_index; + return 'e'; + } + + return 0; +} +} // namespace with no name + /* - Converts a number in locale to its representation in the C locale. - Only has to guarantee that a string that is a correct representation of - a number will be converted. If junk is passed in, junk will be passed - out and the error will be detected during the actual conversion to a - number. We can't detect junk here, since we don't even know the base - of the number. + Converts a number in locale representation to the C locale equivalent. + + Only has to guarantee that a string that is a correct representation of a + number will be converted. Checks signs, separators and digits appear in all + the places they should, and nowhere else. + + Returns true precisely if the number appears to be well-formed, modulo + things a parser for C Locale strings (without digit-grouping separators; + they're stripped) will catch. When it returns true, it records (and + '\0'-terminates) the C locale representation in *result. + + Note: only QString integer-parsing methods have a base parameter (hence need + to cope with letters as possible digits); but these are now all routed via + byteArrayToU?LongLong(), so no longer come via here. The QLocale + number-parsers only work in decimal, so don't have to cope with any digits + other than 0 through 9. */ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_options, - CharBuff *result) const + NumberMode mode, CharBuff *result) const { s = s.trimmed(); if (s.size() < 1) return false; + NumericTokenizer tokens(s, numericData(mode), mode); + + // Digit-grouping details (all modes): + qsizetype digitsInGroup = 0; + qsizetype last_separator_idx = -1; + qsizetype start_of_digits_idx = -1; + + // Floating-point details (non-integer modes): + qsizetype decpt_idx = -1; + qsizetype exponent_idx = -1; + + char last = '\0'; + while (!tokens.done()) { + qsizetype idx = tokens.index(); // before nextToken() advances + char out = tokens.nextToken(); + if (out == 0) + return false; + Q_ASSERT(tokens.index() > idx); // it always *should* advance (except on zero return) - const QChar *uc = s.data(); - auto length = s.size(); - decltype(length) idx = 0; - - int digitsInGroup = 0; - int group_cnt = 0; // counts number of group chars - int decpt_idx = -1; - int last_separator_idx = -1; - int start_of_digits_idx = -1; - int exponent_idx = -1; - - while (idx < length) { - const QStringView in = QStringView(uc + idx, uc[idx].isHighSurrogate() ? 2 : 1); - - char out = numericToCLocale(in); - if (out == 0) { - const QChar simple = in.size() == 1 ? in.front() : QChar::Null; - if (in == listSeparator()) - out = ';'; - else if (in == percentSign()) - out = '%'; - // for handling base-x numbers - else if (simple.toLatin1() >= 'A' && simple.toLatin1() <= 'Z') - out = simple.toLower().toLatin1(); - else if (simple.toLatin1() >= 'a' && simple.toLatin1() <= 'z') - out = simple.toLatin1(); - else - break; - } else if (out == '.') { + if (out == '.') { // Fail if more than one decimal point or point after e if (decpt_idx != -1 || exponent_idx != -1) return false; @@ -3745,26 +4287,26 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o exponent_idx = idx; } - if (number_options & QLocale::RejectLeadingZeroInExponent) { - if (exponent_idx != -1 && out == '0' && idx < length - 1) { - // After the exponent there can only be '+', '-' or digits. - // If we find a '0' directly after some non-digit, then that is a leading zero. - if (result->last() < '0' || result->last() > '9') - return false; - } + if (number_options.testFlag(QLocale::RejectLeadingZeroInExponent) + && exponent_idx != -1 && out == '0') { + // After the exponent there can only be '+', '-' or digits. + // If we find a '0' directly after some non-digit, then that is a + // leading zero, acceptable only if it is the whole exponent. + if (!tokens.done() && !isAsciiDigit(last)) + return false; } - if (number_options & QLocale::RejectTrailingZeroesAfterDot) { - // If we've seen a decimal point and the last character after the exponent is 0, then - // that is a trailing zero. - if (decpt_idx >= 0 && idx == exponent_idx && result->last() == '0') - return false; + if (number_options.testFlag(QLocale::RejectTrailingZeroesAfterDot) && decpt_idx >= 0) { + // In a fractional part, a 0 just before the exponent is trailing: + if (idx == exponent_idx && last == '0') + return false; } - if (!(number_options & QLocale::RejectGroupSeparator)) { - if (start_of_digits_idx == -1 && out >= '0' && out <= '9') { - start_of_digits_idx = idx; - digitsInGroup++; + if (!number_options.testFlag(QLocale::RejectGroupSeparator)) { + if (isAsciiDigit(out)) { + if (start_of_digits_idx == -1) + start_of_digits_idx = idx; + ++digitsInGroup; } else if (out == ',') { // Don't allow group chars after the decimal point or exponent if (decpt_idx != -1 || exponent_idx != -1) @@ -3773,7 +4315,7 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o if (last_separator_idx == -1) { // Check distance from the beginning of the digits: if (start_of_digits_idx == -1 || m_grouping_top > digitsInGroup - || digitsInGroup >= m_grouping_higher + m_grouping_top) { + || digitsInGroup >= m_grouping_least + m_grouping_top) { return false; } } else { @@ -3783,262 +4325,208 @@ bool QLocaleData::numberToCLocale(QStringView s, QLocale::NumberOptions number_o } last_separator_idx = idx; - ++group_cnt; digitsInGroup = 0; - - // don't add the group separator - idx += in.size(); - continue; - } else if (out == '.' || idx == exponent_idx) { - // Were there enough digits since the last separator? - if (last_separator_idx != -1 && digitsInGroup != m_grouping_least) + } else if (mode != IntegerMode && (out == '.' || idx == exponent_idx) + && last_separator_idx != -1) { + // Were there enough digits since the last group separator? + if (digitsInGroup != m_grouping_least) return false; - // If we saw no separator, should we fail if - // digitsInGroup > m_grouping_top + m_grouping_least ? // stop processing separators last_separator_idx = -1; - } else if (out >= '0' && out <= '9') { - digitsInGroup++; } + } else if (out == ',') { + return false; } - result->append(out); - idx += in.size(); + last = out; + if (out != ',') // Leave group separators out of the result. + result->append(out); } - if (!(number_options & QLocale::RejectGroupSeparator)) { - // group separator post-processing - // did we end in a separator? - if (last_separator_idx + 1 == idx) - return false; - // Were there enough digits since the last separator? - if (last_separator_idx != -1 && digitsInGroup != m_grouping_least) + if (!number_options.testFlag(QLocale::RejectGroupSeparator) && last_separator_idx != -1) { + // Were there enough digits since the last group separator? + if (digitsInGroup != m_grouping_least) return false; - // If we saw no separator, and no decimal point, should we fail if - // digitsInGroup > m_grouping_top + m_grouping_least ? } - if (number_options & QLocale::RejectTrailingZeroesAfterDot) { - // In decimal form, the last character can be a trailing zero if we've seen a decpt. - if (decpt_idx != -1 && exponent_idx == -1 && result->last() == '0') + if (number_options.testFlag(QLocale::RejectTrailingZeroesAfterDot) + && decpt_idx != -1 && exponent_idx == -1) { + // In the fractional part, a final zero is trailing: + if (last == '0') return false; } result->append('\0'); - return idx == length; + return true; } -bool QLocaleData::validateChars(QStringView str, NumberMode numMode, QByteArray *buff, - int decDigits, QLocale::NumberOptions number_options) const +ParsingResult +QLocaleData::validateChars(QStringView str, NumberMode numMode, int decDigits, + QLocale::NumberOptions number_options) const { - buff->clear(); - buff->reserve(str.length()); + ParsingResult result; + result.buff.reserve(str.size()); + enum { Whole, Fractional, Exponent } state = Whole; const bool scientific = numMode == DoubleScientificMode; - bool lastWasE = false; - bool lastWasDigit = false; - int eCnt = 0; - int decPointCnt = 0; - bool dec = false; - int decDigitCnt = 0; - - for (qsizetype i = 0; i < str.size();) { - const QStringView in = str.mid(i, str.at(i).isHighSurrogate() ? 2 : 1); - char c = numericToCLocale(in); - - if (c >= '0' && c <= '9') { - if (numMode != IntegerMode) { - // If a double has too many digits after decpt, it shall be Invalid. - if (dec && decDigits != -1 && decDigits < ++decDigitCnt) - return false; - } + NumericTokenizer tokens(str, numericData(numMode), numMode); + char last = '\0'; - // The only non-digit character after the 'e' can be '+' or '-'. - // If a zero is directly after that, then the exponent is zero-padded. - if ((number_options & QLocale::RejectLeadingZeroInExponent) - && c == '0' && eCnt > 0 && !lastWasDigit) { - return false; + while (!tokens.done()) { + char c = tokens.nextToken(); + + if (isAsciiDigit(c)) { + switch (state) { + case Whole: + // Nothing special to do (unless we want to check grouping sizes). + break; + case Fractional: + // If a double has too many digits in its fractional part it is Invalid. + if (decDigits-- == 0) + return {}; + break; + case Exponent: + if (!isAsciiDigit(last)) { + // This is the first digit in the exponent (there may have beena '+' + // or '-' in before). If it's a zero, the exponent is zero-padded. + if (c == '0' && (number_options & QLocale::RejectLeadingZeroInExponent)) + return {}; + } + break; } - lastWasDigit = true; } else { switch (c) { - case '.': - if (numMode == IntegerMode) { - // If an integer has a decimal point, it shall be Invalid. - return false; - } else { - // If a double has more than one decimal point, it shall be Invalid. - if (++decPointCnt > 1) - return false; -#if 0 - // If a double with no decimal digits has a decimal point, it shall be - // Invalid. - if (decDigits == 0) - return false; -#endif // On second thoughts, it shall be Valid. - - dec = true; - } - break; + case '.': + // If an integer has a decimal point, it is Invalid. + // A double can only have one, at the end of its whole-number part. + if (numMode == IntegerMode || state != Whole) + return {}; + // Even when decDigits is 0, we do allow the decimal point to be + // present - just as long as no digits follow it. + + state = Fractional; + break; - case '+': - case '-': - if (scientific) { - // If a scientific has a sign that's not at the beginning or after - // an 'e', it shall be Invalid. - if (i != 0 && !lastWasE) - return false; - } else { - // If a non-scientific has a sign that's not at the beginning, - // it shall be Invalid. - if (i != 0) - return false; - } - break; + case '+': + case '-': + // A sign can only appear at the start or after the e of scientific: + if (last != '\0' && !(scientific && last == 'e')) + return {}; + break; - case ',': - //it can only be placed after a digit which is before the decimal point - if ((number_options & QLocale::RejectGroupSeparator) || !lastWasDigit || - decPointCnt > 0) - return false; - break; + case ',': + // Grouping is only allowed after a digit in the whole-number portion: + if ((number_options & QLocale::RejectGroupSeparator) || state != Whole + || !isAsciiDigit(last)) { + return {}; + } + // We could check grouping sizes are correct, but fixup()s are + // probably better off correcting any misplacement instead. + break; - case 'e': - if (scientific) { - // If a scientific has more than one 'e', it shall be Invalid. - if (++eCnt > 1) - return false; - dec = false; - } else { - // If a non-scientific has an 'e', it shall be Invalid. - return false; - } - break; + case 'e': + // Only one e is allowed and only in scientific: + if (!scientific || state == Exponent) + return {}; + state = Exponent; + break; - default: - // If it's not a valid digit, it shall be Invalid. - return false; + default: + // Nothing else can validly appear in a number. + // NumericTokenizer allows letters of "inf" and "nan", but + // validators don't accept those values. + // For anything else, tokens.nextToken() must have returned 0. + Q_ASSERT(!c || c == 'a' || c == 'f' || c == 'i' || c == 'n'); + return {}; } - lastWasDigit = false; } - lastWasE = c == 'e'; - if (c != ',') - buff->append(c); - - i += in.size(); + last = c; + if (c != ',') // Skip grouping + result.buff.append(c); } - return true; + result.state = ParsingResult::Acceptable; + + // Intermediate if it ends with any character that requires a digit after + // it to be valid e.g. group separator, sign, or exponent + if (last == ',' || last == '-' || last == '+' || last == 'e') + result.state = ParsingResult::Intermediate; + + return result; } double QLocaleData::stringToDouble(QStringView str, bool *ok, QLocale::NumberOptions number_options) const { CharBuff buff; - if (!numberToCLocale(str, number_options, &buff)) { + if (!numberToCLocale(str, number_options, DoubleScientificMode, &buff)) { if (ok != nullptr) *ok = false; return 0.0; } - int processed = 0; - bool nonNullOk = false; - double d = qt_asciiToDouble(buff.constData(), buff.length() - 1, nonNullOk, processed); + auto r = qt_asciiToDouble(buff.constData(), buff.size() - 1); if (ok != nullptr) - *ok = nonNullOk; - return d; + *ok = r.ok(); + return r.result; } -qlonglong QLocaleData::stringToLongLong(QStringView str, int base, bool *ok, - QLocale::NumberOptions number_options) const +QSimpleParsedNumber<qint64> +QLocaleData::stringToLongLong(QStringView str, int base, + QLocale::NumberOptions number_options) const { CharBuff buff; - if (!numberToCLocale(str, number_options, &buff)) { - if (ok != nullptr) - *ok = false; - return 0; - } + if (!numberToCLocale(str, number_options, IntegerMode, &buff)) + return {}; - return bytearrayToLongLong(buff.constData(), base, ok); + return bytearrayToLongLong(QByteArrayView(buff), base); } -qulonglong QLocaleData::stringToUnsLongLong(QStringView str, int base, bool *ok, - QLocale::NumberOptions number_options) const +QSimpleParsedNumber<quint64> +QLocaleData::stringToUnsLongLong(QStringView str, int base, + QLocale::NumberOptions number_options) const { CharBuff buff; - if (!numberToCLocale(str, number_options, &buff)) { - if (ok != nullptr) - *ok = false; - return 0; - } + if (!numberToCLocale(str, number_options, IntegerMode, &buff)) + return {}; - return bytearrayToUnsLongLong(buff.constData(), base, ok); + return bytearrayToUnsLongLong(QByteArrayView(buff), base); } -qlonglong QLocaleData::bytearrayToLongLong(const char *num, int base, bool *ok) +static bool checkParsed(QByteArrayView num, qsizetype used) { - bool _ok; - const char *endptr; - - if (*num == '\0') { - if (ok != nullptr) - *ok = false; - return 0; - } - - qlonglong l = qstrtoll(num, &endptr, base, &_ok); - - if (!_ok) { - if (ok != nullptr) - *ok = false; - return 0; - } + if (used <= 0) + return false; - if (*endptr != '\0') { - while (ascii_isspace(*endptr)) - ++endptr; + const qsizetype len = num.size(); + if (used < len && num[used] != '\0') { + while (used < len && ascii_isspace(num[used])) + ++used; } - if (*endptr != '\0') { + if (used < len && num[used] != '\0') // we stopped at a non-digit character after converting some digits - if (ok != nullptr) - *ok = false; - return 0; - } + return false; - if (ok != nullptr) - *ok = true; - return l; + return true; } -qulonglong QLocaleData::bytearrayToUnsLongLong(const char *num, int base, bool *ok) +QSimpleParsedNumber<qint64> QLocaleData::bytearrayToLongLong(QByteArrayView num, int base) { - bool _ok; - const char *endptr; - qulonglong l = qstrtoull(num, &endptr, base, &_ok); - - if (!_ok) { - if (ok != nullptr) - *ok = false; - return 0; - } - - if (*endptr != '\0') { - while (ascii_isspace(*endptr)) - ++endptr; - } - - if (*endptr != '\0') { - if (ok != nullptr) - *ok = false; - return 0; - } + auto r = qstrntoll(num.data(), num.size(), base); + if (!checkParsed(num, r.used)) + return {}; + return r; +} - if (ok != nullptr) - *ok = true; - return l; +QSimpleParsedNumber<quint64> QLocaleData::bytearrayToUnsLongLong(QByteArrayView num, int base) +{ + auto r = qstrntoull(num.data(), num.size(), base); + if (!checkParsed(num, r.used)) + return {}; + return r; } /*! @@ -4057,7 +4545,7 @@ qulonglong QLocaleData::bytearrayToUnsLongLong(const char *num, int base, bool * \since 4.8 Returns a currency symbol according to the \a format. */ -QString QLocale::currencySymbol(QLocale::CurrencySymbolFormat format) const +QString QLocale::currencySymbol(CurrencySymbolFormat format) const { #ifndef QT_NO_SYSTEMLOCALE if (d->m_data == &systemLocaleData) { @@ -4074,7 +4562,7 @@ QString QLocale::currencySymbol(QLocale::CurrencySymbolFormat format) const case CurrencyIsoCode: { const char *code = d->m_data->m_currency_iso_code; if (auto len = qstrnlen(code, 3)) - return QString::fromLatin1(code, int(len)); + return QString::fromLatin1(code, qsizetype(len)); break; } } @@ -4108,8 +4596,8 @@ QString QLocale::toCurrencyString(qlonglong value, const QString &symbol) const QString str = toString(value); QString sym = symbol.isNull() ? currencySymbol() : symbol; if (sym.isEmpty()) - sym = currencySymbol(QLocale::CurrencyIsoCode); - return range.getData(currency_format_data).arg(str, sym); + sym = currencySymbol(CurrencyIsoCode); + return range.viewData(currency_format_data).arg(str, sym); } /*! @@ -4130,7 +4618,7 @@ QString QLocale::toCurrencyString(qulonglong value, const QString &symbol) const QString str = toString(value); QString sym = symbol.isNull() ? currencySymbol() : symbol; if (sym.isEmpty()) - sym = currencySymbol(QLocale::CurrencyIsoCode); + sym = currencySymbol(CurrencyIsoCode); return d->m_data->currencyFormat().getData(currency_format_data).arg(str, sym); } @@ -4163,8 +4651,8 @@ QString QLocale::toCurrencyString(double value, const QString &symbol, int preci QString str = toString(value, 'f', precision == -1 ? d->m_data->m_currency_digits : precision); QString sym = symbol.isNull() ? currencySymbol() : symbol; if (sym.isEmpty()) - sym = currencySymbol(QLocale::CurrencyIsoCode); - return range.getData(currency_format_data).arg(str, sym); + sym = currencySymbol(CurrencyIsoCode); + return range.viewData(currency_format_data).arg(str, sym); } /*! @@ -4223,59 +4711,89 @@ QString QLocale::formattedDataSize(qint64 bytes, int precision, DataSizeFormats // We don't support sizes in units larger than exbibytes because // the number of bytes would not fit into qint64. Q_ASSERT(power <= 6 && power >= 0); - QString unit; + QStringView unit; if (power > 0) { QLocaleData::DataRange range = (format & DataSizeSIQuantifiers) ? d->m_data->byteAmountSI() : d->m_data->byteAmountIEC(); - unit = range.getListEntry(byte_unit_data, power - 1); + unit = range.viewListEntry(byte_unit_data, power - 1); } else { - unit = d->m_data->byteCount().getData(byte_unit_data); + unit = d->m_data->byteCount().viewData(byte_unit_data); } - return number + QLatin1Char(' ') + unit; + return number + u' ' + unit; } /*! \since 4.8 - - Returns an ordered list of locale names for translation purposes in - preference order (like "en-Latn-US", "en-US", "en"). - - The return value represents locale names that the user expects to see the - UI translation in. - - Most like you do not need to use this function directly, but just pass the + \brief List of locale names for use in selecting translations + + Each entry in the returned list is the name of a locale suitable to the + user's preferences for what to translate the UI into. Where a name in the + list is composed of several tags, they are joined as indicated by \a + separator. Prior to Qt 6.7 a dash was used as separator. + + For example, using the default separator QLocale::TagSeparator::Dash, if the + user has configured their system to use English as used in the USA, the list + would be "en-Latn-US", "en-US", "en". The order of entries is the order in + which to check for translations; earlier items in the list are to be + preferred over later ones. If your translation files use underscores, rather + than dashes, to separate locale tags, pass QLocale::TagSeparator::Underscore + as \a separator. + + Most likely you do not need to use this function directly, but just pass the QLocale object to the QTranslator::load() function. - The first item in the list is the most preferred one. - \sa QTranslator, bcp47Name() */ -QStringList QLocale::uiLanguages() const +QStringList QLocale::uiLanguages(TagSeparator separator) const { + const char sep = char(separator); QStringList uiLanguages; - QList<QLocale> locales; -#ifndef QT_NO_SYSTEMLOCALE - if (d->m_data == &systemLocaleData) { + if (uchar(sep) > 0x7f) { + badSeparatorWarning("uiLanguages", sep); + return uiLanguages; + } + QList<QLocaleId> localeIds; +#ifdef QT_NO_SYSTEMLOCALE + constexpr bool isSystem = false; +#else + const bool isSystem = d->m_data == &systemLocaleData; + if (isSystem) { uiLanguages = systemLocale()->query(QSystemLocale::UILanguages).toStringList(); - // ... but we need to include likely-adjusted forms of each of those, too: - for (const auto &entry : uiLanguages) - locales.append(QLocale(entry)); - if (locales.isEmpty()) - locales.append(systemLocale()->fallbackLocale()); + // ... but we need to include likely-adjusted forms of each of those, too. + // For now, collect up locale Ids representing the entries, for later processing: + for (const auto &entry : std::as_const(uiLanguages)) + localeIds.append(QLocaleId::fromName(entry)); + if (localeIds.isEmpty()) + localeIds.append(systemLocale()->fallbackLocale().d->m_data->id()); + // If the system locale (isn't C and) didn't include itself in the list, + // or as fallback, presume to know better than it and put its name + // first. (Known issue, QTBUG-104930, on some macOS versions when in + // locale en_DE.) Our translation system might have a translation for a + // locale the platform doesn't believe in. + const QString name = bcp47Name(separator); + if (!name.isEmpty() && language() != C && !uiLanguages.contains(name)) { + // That uses contains(name) as a cheap pre-test, but there may be an + // entry that matches this on purging likely subtags. + const QLocaleId mine = d->m_data->id().withLikelySubtagsRemoved(); + const auto isMine = [mine](const QString &entry) { + return QLocaleId::fromName(entry).withLikelySubtagsRemoved() == mine; + }; + if (std::none_of(uiLanguages.constBegin(), uiLanguages.constEnd(), isMine)) { + localeIds.prepend(d->m_data->id()); + uiLanguages.prepend(name); + } + } } else #endif { - locales.append(*this); + localeIds.append(d->m_data->id()); } - for (int i = locales.size(); i-- > 0; ) { - const QLocale &locale = locales.at(i); - const auto data = locale.d->m_data; - QLocaleId id = data->id(); - - int j; + for (qsizetype i = localeIds.size(); i-- > 0; ) { + QLocaleId id = localeIds.at(i); + qsizetype j; QByteArray prior; - if (i < uiLanguages.size()) { + if (isSystem && i < uiLanguages.size()) { // Adding likely-adjusted forms to system locale's list. // Name the locale is derived from: prior = uiLanguages.at(i).toLatin1(); @@ -4283,33 +4801,49 @@ QStringList QLocale::uiLanguages() const j = i + 1; } else if (id.language_id == C) { // Attempt no likely sub-tag amendments to C: - uiLanguages.append(locale.name()); + uiLanguages.append(QString::fromLatin1(id.name(sep))); continue; } else { - // Plain locale, not system locale; just append. - const QString name = locale.bcp47Name(); - uiLanguages.append(name); - prior = name.toLatin1(); + // Plain locale or empty system uiLanguages; just append. + prior = id.name(sep); + uiLanguages.append(QString::fromLatin1(prior)); j = uiLanguages.size(); } const QLocaleId max = id.withLikelySubtagsAdded(); const QLocaleId min = max.withLikelySubtagsRemoved(); - id.script_id = 0; // For re-use as script-less variant. - // Include version with all likely sub-tags (last) if distinct from the rest: - if (max != min && max != id && max.name() != prior) - uiLanguages.insert(j, QString::fromLatin1(max.name())); + // Include minimal version (last) unless it's what our locale is derived from: + if (auto name = min.name(sep); name != prior) + uiLanguages.insert(j, QString::fromLatin1(name)); + else if (!isSystem) + --j; // bcp47Name() matches min(): put more specific forms *before* it. + + if (id.script_id) { + // Include scriptless version if likely-equivalent and distinct: + id.script_id = 0; + if (id != min && id.withLikelySubtagsAdded() == max) { + if (auto name = id.name(sep); name != prior) + uiLanguages.insert(j, QString::fromLatin1(name)); + } + } - // Include scriptless version if likely-equivalent and distinct: - if (data->m_script_id && id != min && id.name() != prior - && id.withLikelySubtagsAdded() == max) { - uiLanguages.insert(j, QString::fromLatin1(id.name())); + if (!id.territory_id) { + Q_ASSERT(!min.territory_id); + Q_ASSERT(!id.script_id); // because we just cleared it. + // Include version with territory if it likely-equivalent and distinct: + id.territory_id = max.territory_id; + if (id != max && id.withLikelySubtagsAdded() == max) { + if (auto name = id.name(sep); name != prior) + uiLanguages.insert(j, QString::fromLatin1(name)); + } } - // Include minimal version (first) unless it's what our locale is derived from: - if (min.name() != prior) - uiLanguages.insert(j, QString::fromLatin1(min.name())); + // Include version with all likely sub-tags (first) if distinct from the rest: + if (max != min && max != id) { + if (auto name = max.name(sep); name != prior) + uiLanguages.insert(j, QString::fromLatin1(name)); + } } return uiLanguages; } @@ -4341,7 +4875,7 @@ QLocale QLocale::collation() const \since 4.8 Returns a native name of the language for the locale. For example - "Schwiizertüütsch" for Swiss-German locale. + "Schweizer Hochdeutsch" for the Swiss-German locale. \sa nativeTerritoryName(), languageToString() */ |