summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2021-08-04 01:39:43 +0200
committerGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2021-11-30 19:33:34 +0200
commit54536bb5aea2da08d97670810b3baf14f90ed4f5 (patch)
tree072830197dbbe92625bfa4d5072ac5f52ea855d8
parent064c3d35e6809672323e8d912e9140ddd0ad48cd (diff)
QString::arg: deprecate use of arbitrary Unicode digits as replacements
The only documented replacements for Q*String*::arg() are sequences like %1, %2, %3 -- where the n-th number is expressed using a sequence of ASCII digits [1]. The code parsing the replacements however used the QChar::digitValue() function. That function simply checks if a QChar has a *Unicode digit value* (no matter what its block/category is), and if so, returns the corresponding digit value as an int (otherwise returns -1). The result of this is that a sequence like "%¹" or "%१" actually triggered substitutions (both count as "1"). Similarly, QChars with a digit value would be parsed as part of longer sequences like "%1²" (counting as "12" (!)). This behavior is weird, undocumented, and extremely likely the usual backstabbing by Unicode by using "convenience" QChar methods -- that is, never *intended* by the implementation. This commit deprecates (via warnings) such usages, which for the time being are left working as before (in the name of backwards compatibility). At the same time: given it's extremely unlikely that someone would be deliberately relying on this behavior, it implements the desired change of behavior (only accept sequences of ASCII digits) starting from Qt 6.6, that is, after the next LTS. Throughout Qt 6's lifetime users will still be able to control arg()'s behavior by setting an env variable, but that variable (and the support for Unicode digits) will disappear in Qt 7. To summarize: * Qt 6.3->6.5: default is Unicode digits, env var to control * Qt 6.6->6.x: default is ASCII digits, env var to control * Qt 7: only ASCII digits, no env var [1] That's the name Unicode gives to them, cf. https://www.unicode.org/charts/PDF/U0000.pdf [ChangeLog][QtCore][Deprecation Notices] The arg() functions featured in Qt string classes have always been documented to require replacements tokens to be sequences of ASCII digits (like %1, %2, %34, and so on). A coding oversight made it accept sequences of arbitrary characters with a Unicode digit value instead. For instance, "%2੩" is interpreted as the 23rd substitution; and "%1²" is interpreted as the 12th substitution. This behavior is deprecated, and will result in runtime warnings. Starting from Qt 6.6, arg()'s behavior will be changed to accept only ASCII digits by default. That means that "%1²" is going to be interpreted as substitution number 1 followed by the "²" character (which does not get substituted, so it gets left as-is in the result). Users can restore the previous semantics (accept Unicode digits) by setting the QT_USE_UNICODE_DIGIT_VALUES_IN_STRING_ARG environment variable to a non-zero value. In Qt 7, arg() will only support sequences of ASCII digits. Note that from Qt 6.3 users can also set QT_USE_UNICODE_DIGIT_VALUES_IN_STRING_ARG to zero; this will make arg() use ASCII digits only, in preparation for the future change of defaults. Change-Id: I8a044b629bcca6996e76018c9faf7c6748ae04e8 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
-rw-r--r--src/corelib/text/qstring.cpp84
-rw-r--r--tests/auto/corelib/text/qstring/tst_qstring.cpp29
2 files changed, 109 insertions, 4 deletions
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp
index 291e2c37ab..ca2ebe2b9e 100644
--- a/src/corelib/text/qstring.cpp
+++ b/src/corelib/text/qstring.cpp
@@ -1485,6 +1485,40 @@ inline char qToLower(char ch)
return ch;
}
+// ### Qt 7: do not allow anything but ASCII digits
+// in arg()'s replacements.
+#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0)
+static bool supportUnicodeDigitValuesInArg()
+{
+ static const bool result = []() {
+ static const char supportUnicodeDigitValuesEnvVar[]
+ = "QT_USE_UNICODE_DIGIT_VALUES_IN_STRING_ARG";
+
+ if (qEnvironmentVariableIsSet(supportUnicodeDigitValuesEnvVar))
+ return qEnvironmentVariableIntValue(supportUnicodeDigitValuesEnvVar) != 0;
+
+#if QT_VERSION < QT_VERSION_CHECK(6, 6, 0) // keep it in sync with the test
+ return true;
+#else
+ return false;
+#endif
+ }();
+
+ return result;
+}
+#endif
+
+static int qArgDigitValue(QChar ch) noexcept
+{
+#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0)
+ if (supportUnicodeDigitValuesInArg())
+ return ch.digitValue();
+#endif
+ if (ch >= u'0' && ch <= u'9')
+ return int(ch.unicode() - u'0');
+ return -1;
+}
+
/*!
\macro QT_RESTRICTED_CAST_FROM_ASCII
@@ -7725,6 +7759,34 @@ QString QString::normalized(QString::NormalizationForm mode, QChar::UnicodeVersi
return copy;
}
+#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0)
+static void checkArgEscape(QStringView s)
+{
+ // If we're in here, it means that qArgDigitValue has accepted the
+ // digit. We can skip the check in case we already know it will
+ // succeed.
+ if (!supportUnicodeDigitValuesInArg())
+ return;
+
+ const auto isNonAsciiDigit = [](QChar c) {
+ return c.unicode() < u'0' || c.unicode() > u'9';
+ };
+
+ if (std::any_of(s.begin(), s.end(), isNonAsciiDigit)) {
+ const auto accumulateDigit = [](int partial, QChar digit) {
+ return partial * 10 + digit.digitValue();
+ };
+ const int parsedNumber = std::accumulate(s.begin(), s.end(), 0, accumulateDigit);
+
+ qWarning("QString::arg(): the replacement \"%%%ls\" contains non-ASCII digits;\n"
+ " it is currently being interpreted as the %d-th substitution.\n"
+ " This is deprecated; support for non-ASCII digits will be dropped\n"
+ " in a future version of Qt.",
+ qUtf16Printable(s.toString()),
+ parsedNumber);
+ }
+}
+#endif
struct ArgEscapeData
{
@@ -7765,20 +7827,34 @@ static ArgEscapeData findArgEscapes(QStringView s)
break;
}
- int escape = c->digitValue();
+ int escape = qArgDigitValue(*c);
if (escape == -1)
continue;
+ // ### Qt 7: do not allow anything but ASCII digits
+ // in arg()'s replacements.
+#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0)
+ const QChar *escapeBegin = c;
+ const QChar *escapeEnd = escapeBegin + 1;
+#endif
+
++c;
if (c != uc_end) {
- int next_escape = c->digitValue();
+ const int next_escape = qArgDigitValue(*c);
if (next_escape != -1) {
escape = (10 * escape) + next_escape;
++c;
+#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0)
+ ++escapeEnd;
+#endif
}
}
+#if QT_VERSION <= QT_VERSION_CHECK(7, 0, 0)
+ checkArgEscape(QStringView(escapeBegin, escapeEnd));
+#endif
+
if (escape > d.min_escape)
continue;
@@ -7829,9 +7905,9 @@ static QString replaceArgEscapes(QStringView s, const ArgEscapeData &d, qsizetyp
if (localize)
++c;
- int escape = c->digitValue();
+ int escape = qArgDigitValue(*c);
if (escape != -1 && c + 1 != uc_end) {
- int digit = c[1].digitValue();
+ const int digit = qArgDigitValue(c[1]);
if (digit != -1) {
++c;
escape = 10 * escape + digit;
diff --git a/tests/auto/corelib/text/qstring/tst_qstring.cpp b/tests/auto/corelib/text/qstring/tst_qstring.cpp
index f6e7563774..07122f6235 100644
--- a/tests/auto/corelib/text/qstring/tst_qstring.cpp
+++ b/tests/auto/corelib/text/qstring/tst_qstring.cpp
@@ -5153,6 +5153,35 @@ void tst_QString::arg()
.arg( firstName ).arg( lastName );
QCOMPARE( fullName, QLatin1String("My name is Bond, James Bond") );
+ // ### Qt 7: clean this up, leave just the #else branch
+#if QT_VERSION < QT_VERSION_CHECK(6, 6, 0)
+ static const QRegularExpression nonAsciiArgWarning("QString::arg\\(\\): the replacement \".*\" contains non-ASCII digits");
+ QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning);
+ QCOMPARE( QString("%¹").arg("foo"), QString("foo") );
+ QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning);
+ QCOMPARE( QString("%¹%1").arg("foo"), QString("foofoo") );
+ QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning);
+ QCOMPARE( QString("%1²").arg("E=mc"), QString("E=mc") );
+ QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning);
+ QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning);
+ QCOMPARE( QString("%1²%2").arg("a").arg("b"), QString("ba") );
+ QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning);
+ QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning);
+ QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning);
+ QCOMPARE( QString("%¹%1²%2").arg("a").arg("b"), QString("a%1²b") );
+ QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning);
+ QTest::ignoreMessage(QtWarningMsg, nonAsciiArgWarning);
+ QCOMPARE( QString("%2²%1").arg("a").arg("b"), QString("ba") );
+#else
+ QTest::ignoreMessage(QtWarningMsg, "QString::arg: Argument missing: %¹, foo");
+ QCOMPARE( QString("%¹").arg("foo"), QString("%¹") );
+ QCOMPARE( QString("%¹%1").arg("foo"), QString("%¹foo") );
+ QCOMPARE( QString("%1²").arg("E=mc"), QString("E=mc²") );
+ QCOMPARE( QString("%1²%2").arg("a").arg("b"), QString("a²b") );
+ QCOMPARE( QString("%¹%1²%2").arg("a").arg("b"), QString("%¹a²b") );
+ QCOMPARE( QString("%2²%1").arg("a").arg("b"), QString("b²a") );
+#endif
+
// number overloads
QCOMPARE( s4.arg(0), QLatin1String("[0]") );
QCOMPARE( s4.arg(-1), QLatin1String("[-1]") );