summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIevgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>2021-08-26 13:04:58 +0200
committerIevgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>2021-09-22 15:40:30 +0200
commitfe46cd59ce3c961d714b303d7d5484cca8864247 (patch)
tree640929d44eeadfa83a263bcf34024bbffda817ec
parent9e770e357256aff5ca267f82792128ef61d0b278 (diff)
Add isValidUtf8() methods to QUtf8StringView and QByteArray{,View}
The new methods return true if the string contains valid UTF-8 encoded data, or false otherwise. [ChangeLog][QtCore][QByteArray] Added isValidUtf8() method. [ChangeLog][QtCore][QByteArrayView] Added isValidUtf8() method. [ChangeLog][QtCore][QUtf8StringView] Added isValidUtf8() method. Task-number: QTBUG-92021 Change-Id: I5d0cb613265d98b1f189c5f5cc09c1f7db302272 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
-rw-r--r--src/corelib/text/qbytearray.cpp18
-rw-r--r--src/corelib/text/qbytearray.h5
-rw-r--r--src/corelib/text/qbytearrayalgorithms.h2
-rw-r--r--src/corelib/text/qbytearrayview.h2
-rw-r--r--src/corelib/text/qbytearrayview.qdoc9
-rw-r--r--src/corelib/text/qutf8stringview.h5
-rw-r--r--src/corelib/text/qutf8stringview.qdoc9
-rw-r--r--tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp83
8 files changed, 133 insertions, 0 deletions
diff --git a/src/corelib/text/qbytearray.cpp b/src/corelib/text/qbytearray.cpp
index a1b0f30d01..8a7db6ebe4 100644
--- a/src/corelib/text/qbytearray.cpp
+++ b/src/corelib/text/qbytearray.cpp
@@ -51,6 +51,7 @@
#include "qstringalgorithms_p.h"
#include "qscopedpointer.h"
#include "qbytearray_p.h"
+#include "qstringconverter_p.h"
#include <qdatastream.h>
#include <qmath.h>
@@ -426,6 +427,14 @@ int QtPrivate::compareMemory(QByteArrayView lhs, QByteArrayView rhs)
return lhs.size() == rhs.size() ? 0 : lhs.size() > rhs.size() ? 1 : -1;
}
+/*!
+ \internal
+*/
+bool QtPrivate::isValidUtf8(QByteArrayView s) noexcept
+{
+ return QUtf8::isValidUtf8(s).isValidUtf8;
+}
+
// the CRC table below is created by the following piece of code
#if 0
static void createCRC16Table() // build CRC16 lookup table
@@ -2784,6 +2793,15 @@ bool QByteArray::isLower() const
}
/*!
+ \fn QByteArray::isValidUtf8() const
+
+ Returns \c true if this byte array contains valid UTF-8 encoded data,
+ or \c false otherwise.
+
+ \since 6.3
+*/
+
+/*!
Returns a byte array that contains the first \a len bytes of this byte
array.
diff --git a/src/corelib/text/qbytearray.h b/src/corelib/text/qbytearray.h
index 4f29018f32..0a604f1594 100644
--- a/src/corelib/text/qbytearray.h
+++ b/src/corelib/text/qbytearray.h
@@ -200,6 +200,11 @@ public:
bool isUpper() const;
bool isLower() const;
+ [[nodiscard]] bool isValidUtf8() const noexcept
+ {
+ return QtPrivate::isValidUtf8(qToByteArrayViewIgnoringNull(*this));
+ }
+
void truncate(qsizetype pos);
void chop(qsizetype n);
diff --git a/src/corelib/text/qbytearrayalgorithms.h b/src/corelib/text/qbytearrayalgorithms.h
index a78e6e1709..b669f065b9 100644
--- a/src/corelib/text/qbytearrayalgorithms.h
+++ b/src/corelib/text/qbytearrayalgorithms.h
@@ -74,6 +74,8 @@ qsizetype count(QByteArrayView haystack, QByteArrayView needle) noexcept;
[[nodiscard]] Q_CORE_EXPORT Q_DECL_PURE_FUNCTION QByteArrayView trimmed(QByteArrayView s) noexcept;
+[[nodiscard]] Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool isValidUtf8(QByteArrayView s) noexcept;
+
} // namespace QtPrivate
/*****************************************************************************
diff --git a/src/corelib/text/qbytearrayview.h b/src/corelib/text/qbytearrayview.h
index 1cd5b0333e..11db03c62f 100644
--- a/src/corelib/text/qbytearrayview.h
+++ b/src/corelib/text/qbytearrayview.h
@@ -288,6 +288,8 @@ public:
inline int compare(QByteArrayView a, Qt::CaseSensitivity cs = Qt::CaseSensitive) const noexcept;
+ [[nodiscard]] inline bool isValidUtf8() const noexcept { return QtPrivate::isValidUtf8(*this); }
+
//
// STL compatibility API:
//
diff --git a/src/corelib/text/qbytearrayview.qdoc b/src/corelib/text/qbytearrayview.qdoc
index 624b056744..96bb1ccb56 100644
--- a/src/corelib/text/qbytearrayview.qdoc
+++ b/src/corelib/text/qbytearrayview.qdoc
@@ -377,6 +377,15 @@
*/
/*!
+ \fn QByteArrayView::isValidUtf8() const
+
+ Returns \c true if this byte array view contains valid UTF-8 encoded data,
+ or \c false otherwise.
+
+ \since 6.3
+*/
+
+/*!
\fn QByteArrayView::const_iterator QByteArrayView::begin() const
Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the
diff --git a/src/corelib/text/qutf8stringview.h b/src/corelib/text/qutf8stringview.h
index eeab604fa8..a6930c2e0f 100644
--- a/src/corelib/text/qutf8stringview.h
+++ b/src/corelib/text/qutf8stringview.h
@@ -285,6 +285,11 @@ public:
constexpr void chop(qsizetype n)
{ verify(n); m_size -= n; }
+ [[nodiscard]] inline bool isValidUtf8() const noexcept
+ {
+ return QByteArrayView(reinterpret_cast<const char *>(data()), size()).isValidUtf8();
+ }
+
//
// STL compatibility API:
//
diff --git a/src/corelib/text/qutf8stringview.qdoc b/src/corelib/text/qutf8stringview.qdoc
index 683af4e423..deac0882fe 100644
--- a/src/corelib/text/qutf8stringview.qdoc
+++ b/src/corelib/text/qutf8stringview.qdoc
@@ -679,6 +679,15 @@
*/
/*!
+ \fn QUtf8StringView::isValidUtf8() const
+
+ Returns \c true if this string contains valid UTF-8 encoded data,
+ or \c false otherwise.
+
+ \since 6.3
+*/
+
+/*!
\fn template <typename QStringLike> qToUtf8StringViewIgnoringNull(const QStringLike &s);
\relates QUtf8StringView
\internal
diff --git a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp
index 2fdb3ad0b5..b42019bf04 100644
--- a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp
+++ b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp
@@ -898,6 +898,19 @@ private Q_SLOTS:
void indexOf_regexp_QString() { indexOf_contains_lastIndexOf_count_regexp_impl<QString>(); }
void indexOf_regexp_QStringView_data() { indexOf_contains_lastIndexOf_count_regexp_data(); }
void indexOf_regexp_QStringView() { indexOf_contains_lastIndexOf_count_regexp_impl<QStringView>(); }
+
+private:
+ void isValidUtf8_data();
+ template<typename String>
+ void isValidUtf8_impl() const;
+
+private Q_SLOTS:
+ void isValidUtf8_QByteArray_data() { isValidUtf8_data(); }
+ void isValidUtf8_QByteArray() { isValidUtf8_impl<QByteArray>(); }
+ void isValidUtf8_QByteArrayView_data() { isValidUtf8_data(); }
+ void isValidUtf8_QByteArrayView() { isValidUtf8_impl<QByteArrayView>(); }
+ void isValidUtf8_QUtf8StringView_data() { isValidUtf8_data(); }
+ void isValidUtf8_QUtf8StringView() { isValidUtf8_impl<QUtf8StringView>(); }
};
namespace {
@@ -2855,6 +2868,76 @@ void tst_QStringApiSymmetry::indexOf_contains_lastIndexOf_count_regexp_impl() co
}
}
+void tst_QStringApiSymmetry::isValidUtf8_data()
+{
+ QTest::addColumn<QByteArray>("ba");
+ QTest::addColumn<bool>("valid");
+
+ int row = 0;
+ QTest::addRow("valid-%02d", row++) << QByteArray() << true;
+ QTest::addRow("valid-%02d", row++) << QByteArray("ascii") << true;
+ QTest::addRow("valid-%02d", row++)
+ << QByteArray("\xc2\xa2\xe0\xa4\xb9\xf0\x90\x8d\x88") << true; // U+00A2 U+0939 U+10348
+ QTest::addRow("valid-%02d", row++) << QByteArray("\xf4\x8f\xbf\xbf") << true; // U+10FFFF
+
+ row = 0;
+ QTest::addRow("overlong-%02d", row++) << QByteArray("\xc0\x00") << false;
+ QTest::addRow("overlong-%02d", row++) << QByteArray("\xc1\xff") << false;
+ QTest::addRow("overlong-%02d", row++) << QByteArray("\xe0\x00\x00") << false;
+ QTest::addRow("overlong-%02d", row++) << QByteArray("\xe0\xa0\x7f") << false;
+ QTest::addRow("overlong-%02d", row++) << QByteArray("\xf0\x00\x00\x00") << false;
+ QTest::addRow("overlong-%02d", row++) << QByteArray("\xf0\x90\x80\x7f") << false;
+
+ row = 0;
+ QTest::addRow("short-%02d", row++) << QByteArray("\xc2") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("x\xc2") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("x\xc2y") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("\xc2y") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("\xe0\xa4") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("x\xe0\xa4") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("x\xe0\xa4y") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("\xe0\xa4y") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("\xe0") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("x\xe0") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("x\xe0y") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("\xe0y") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("\xf4\x8f\xbf") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("x\xf4\x8f\xbf") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("x\xf4\x8f\xbfy") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("\xf4\x8f\xbfy") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("\xf4\x8f") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("x\xf4\x8f") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("x\xf4\x8fy") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("\xf4\x8fy") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("\xf4") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("x\xf4") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("x\xf4y") << false;
+ QTest::addRow("short-%02d", row++) << QByteArray("\xf4y") << false;
+
+ row = 0;
+ QTest::addRow("surrogates-%02d", row++) << QByteArray("\xed\x9f\xc0\xee\x80\x7f") << false;
+ QTest::addRow("surrogates-%02d", row++) << QByteArray("\xed\x9f\xc0") << false;
+ QTest::addRow("surrogates-%02d", row++) << QByteArray("\xee\x80\x7f") << false;
+ QTest::addRow("surrogates-%02d", row++) << QByteArray("\xee\x80\x7f\xed\x9f\xc0") << false;
+
+ row = 0;
+ QTest::addRow("other-%02d", row++) << QByteArray("\xf4\x8f\xbf\xc0") << false;
+ QTest::addRow("other-%02d", row++) << QByteArray("\xf7\x80\x80\x80") << false;
+ QTest::addRow("other-%02d", row++) << QByteArray("\xfd\xbf\xbf\xbf\xbf") << false;
+ QTest::addRow("other-%02d", row++) << QByteArray("\xfe\xbf\xbf\xbf\xbf\xbf") << false;
+ QTest::addRow("other-%02d", row++) << QByteArray("\xff\xbf\xbf\xbf\xbf\xbf\xbf") << false;
+ QTest::addRow("other-%02d", row++) << QByteArray("\x80") << false;
+ QTest::addRow("other-%02d", row++) << QByteArray("\xbf") << false;
+}
+
+template<typename String>
+void tst_QStringApiSymmetry::isValidUtf8_impl() const
+{
+ QFETCH(QByteArray, ba);
+ const String string(ba);
+ QTEST(string.isValidUtf8(), "valid");
+}
+
QTEST_APPLESS_MAIN(tst_QStringApiSymmetry)
#include "tst_qstringapisymmetry.moc"