diff options
author | Giuseppe D'Angelo <giuseppe.dangelo@kdab.com> | 2021-08-04 01:39:43 +0200 |
---|---|---|
committer | Giuseppe D'Angelo <giuseppe.dangelo@kdab.com> | 2021-11-30 19:33:34 +0200 |
commit | 54536bb5aea2da08d97670810b3baf14f90ed4f5 (patch) | |
tree | 072830197dbbe92625bfa4d5072ac5f52ea855d8 | |
parent | 064c3d35e6809672323e8d912e9140ddd0ad48cd (diff) |
QString::arg: deprecate use of arbitrary Unicode digits as replacements
The only documented replacements for Q*String*::arg() are sequences like
%1, %2, %3 -- where the n-th number is expressed using a sequence of
ASCII digits [1].
The code parsing the replacements however used the QChar::digitValue()
function. That function simply checks if a QChar has a *Unicode digit
value* (no matter what its block/category is), and if so, returns the
corresponding digit value as an int (otherwise returns -1).
The result of this is that a sequence like "%¹" or "%१" actually
triggered substitutions (both count as "1"). Similarly, QChars with
a digit value would be parsed as part of longer sequences like "%1²"
(counting as "12" (!)).
This behavior is weird, undocumented, and extremely likely the usual
backstabbing by Unicode by using "convenience" QChar methods -- that is,
never *intended* by the implementation.
This commit deprecates (via warnings) such usages, which for the time
being are left working as before (in the name of backwards
compatibility). At the same time: given it's extremely unlikely that
someone would be deliberately relying on this behavior, it implements
the desired change of behavior (only accept sequences of ASCII digits)
starting from Qt 6.6, that is, after the next LTS.
Throughout Qt 6's lifetime users will still be able to control arg()'s
behavior by setting an env variable, but that variable (and the support
for Unicode digits) will disappear in Qt 7.
To summarize:
* Qt 6.3->6.5: default is Unicode digits, env var to control
* Qt 6.6->6.x: default is ASCII digits, env var to control
* Qt 7: only ASCII digits, no env var
[1] That's the name Unicode gives to them, cf. https://www.unicode.org/charts/PDF/U0000.pdf
[ChangeLog][QtCore][Deprecation Notices] The arg() functions
featured in Qt string classes have always been documented to require
replacements tokens to be sequences of ASCII digits (like %1, %2, %34,
and so on). A coding oversight made it accept sequences of arbitrary
characters with a Unicode digit value instead. For instance, "%2੩" is
interpreted as the 23rd substitution; and "%1²" is interpreted as the
12th substitution. This behavior is deprecated, and will result in
runtime warnings. Starting from Qt 6.6, arg()'s behavior will be changed
to accept only ASCII digits by default. That means that "%1²" is going
to be interpreted as substitution number 1 followed by the "²" character
(which does not get substituted, so it gets left as-is in the result).
Users can restore the previous semantics (accept Unicode digits) by
setting the QT_USE_UNICODE_DIGIT_VALUES_IN_STRING_ARG environment
variable to a non-zero value. In Qt 7, arg() will only support sequences
of ASCII digits. Note that from Qt 6.3 users can also set
QT_USE_UNICODE_DIGIT_VALUES_IN_STRING_ARG to zero; this will make arg()
use ASCII digits only, in preparation for the future change of defaults.
Change-Id: I8a044b629bcca6996e76018c9faf7c6748ae04e8
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
-rw-r--r-- | src/corelib/text/qstring.cpp | 84 | ||||
-rw-r--r-- | tests/auto/corelib/text/qstring/tst_qstring.cpp | 29 |
2 files changed, 109 insertions, 4 deletions
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp index 291e2c37ab..ca2ebe2b9e 100644 --- a/src/corelib/text/qstring.cpp +++ b/src/corelib/text/qstring.cpp @@ -1485,6 +1485,40 @@ inline char qToLower(char ch) return ch; } +// ### Qt 7: do not allow anything but ASCII digits +// in arg()'s replacements. +#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) +static bool supportUnicodeDigitValuesInArg() +{ + static const bool result = []() { + static const char supportUnicodeDigitValuesEnvVar[] + = "QT_USE_UNICODE_DIGIT_VALUES_IN_STRING_ARG"; + + if (qEnvironmentVariableIsSet(supportUnicodeDigitValuesEnvVar)) + return qEnvironmentVariableIntValue(supportUnicodeDigitValuesEnvVar) != 0; + +#if QT_VERSION < QT_VERSION_CHECK(6, 6, 0) // keep it in sync with the test + return true; +#else + return false; +#endif + }(); + + return result; +} +#endif + +static int qArgDigitValue(QChar ch) noexcept +{ +#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) + if (supportUnicodeDigitValuesInArg()) + return ch.digitValue(); +#endif + if (ch >= u'0' && ch <= u'9') + return int(ch.unicode() - u'0'); + return -1; +} + /*! \macro QT_RESTRICTED_CAST_FROM_ASCII @@ -7725,6 +7759,34 @@ QString QString::normalized(QString::NormalizationForm mode, QChar::UnicodeVersi return copy; } +#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0) +static void checkArgEscape(QStringView s) +{ + // If we're in here, it means that qArgDigitValue has accepted the + // digit. We can skip the check in case we already know it will + // succeed. + if (!supportUnicodeDigitValuesInArg()) + return; + + const auto isNonAsciiDigit = [](QChar c) { + return c.unicode() < u'0' || c.unicode() > u'9'; + }; + + if (std::any_of(s.begin(), s.end(), isNonAsciiDigit)) { + const auto accumulateDigit = [](int partial, QChar digit) { + return partial * 10 + digit.digitValue(); + }; + const int parsedNumber = std::accumulate(s.begin(), s.end(), 0, accumulateDigit); + + qWarning("QString::arg(): the replacement \"%%%ls\" contains non-ASCII digits;\n" + " it is currently being interpreted as the %d-th substitution.\n" + " This is deprecated; support for non-ASCII digits will be dropped\n" + " in a future version of Qt.", + qUtf16Printable(s.toString()), + parsedNumber); + } +} +#endif struct ArgEscapeData { @@ -7765,20 +7827,34 @@ static ArgEscapeData findArgEscapes(QStringView s) break; } - int escape = c->digitValue(); + int escape = qArgDigitValue(*c); if (escape == -1) continue; + // ### Qt 7: do not allow anything but ASCII digits + // in arg()'s replacements. +#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) + const QChar *escapeBegin = c; + const QChar *escapeEnd = escapeBegin + 1; +#endif + ++c; if (c != uc_end) { - int next_escape = c->digitValue(); + const int next_escape = qArgDigitValue(*c); if (next_escape != -1) { escape = (10 * escape) + next_escape; ++c; +#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) + ++escapeEnd; +#endif } } +#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0) + checkArgEscape(QStringView(escapeBegin, escapeEnd)); +#endif + if (escape > d.min_escape) continue; @@ -7829,9 +7905,9 @@ static QString replaceArgEscapes(QStringView s, const ArgEscapeData &d, qsizetyp if (localize) ++c; - int escape = c->digitValue(); + int escape = qArgDigitValue(*c); if (escape != -1 && c + 1 != uc_end) { - int digit = c[1].digitValue(); + const int digit = qArgDigitValue(c[1]); if (digit != -1) { ++c; escape = 10 * escape + digit; diff --git a/tests/auto/corelib/text/qstring/tst_qstring.cpp b/tests/auto/corelib/text/qstring/tst_qstring.cpp index f6e7563774..07122f6235 100644 --- a/tests/auto/corelib/text/qstring/tst_qstring.cpp +++ b/tests/auto/corelib/text/qstring/tst_qstring.cpp @@ -5153,6 +5153,35 @@ void tst_QString::arg() .arg( firstName ).arg( lastName ); QCOMPARE( fullName, QLatin1String("My name is Bond, James Bond") ); + // ### Qt 7: clean this up, leave just the #else branch +#if QT_VERSION < QT_VERSION_CHECK(6, 6, 0) + static const QRegularExpression nonAsciiArgWarning("QString::arg\\(\\): the replacement \".*\" contains non-ASCII digits"); + QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning); + QCOMPARE( QString("%¹").arg("foo"), QString("foo") ); + QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning); + QCOMPARE( QString("%¹%1").arg("foo"), QString("foofoo") ); + QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning); + QCOMPARE( QString("%1²").arg("E=mc"), QString("E=mc") ); + QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning); + QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning); + QCOMPARE( QString("%1²%2").arg("a").arg("b"), QString("ba") ); + QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning); + QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning); + QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning); + QCOMPARE( QString("%¹%1²%2").arg("a").arg("b"), QString("a%1²b") ); + QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning); + QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning); + QCOMPARE( QString("%2²%1").arg("a").arg("b"), QString("ba") ); +#else + QTest::ignoreMessage(QtWarningMsg, "QString::arg: Argument missing: %¹, foo"); + QCOMPARE( QString("%¹").arg("foo"), QString("%¹") ); + QCOMPARE( QString("%¹%1").arg("foo"), QString("%¹foo") ); + QCOMPARE( QString("%1²").arg("E=mc"), QString("E=mc²") ); + QCOMPARE( QString("%1²%2").arg("a").arg("b"), QString("a²b") ); + QCOMPARE( QString("%¹%1²%2").arg("a").arg("b"), QString("%¹a²b") ); + QCOMPARE( QString("%2²%1").arg("a").arg("b"), QString("b²a") ); +#endif + // number overloads QCOMPARE( s4.arg(0), QLatin1String("[0]") ); QCOMPARE( s4.arg(-1), QLatin1String("[-1]") ); |