diff options
author | Marc Mutz <marc.mutz@kdab.com> | 2017-11-22 15:48:02 +0100 |
---|---|---|
committer | Marc Mutz <marc.mutz@kdab.com> | 2020-06-03 19:13:54 +0200 |
commit | 6a3c6f939f29c83d53d2da0c3f53b814bdd02358 (patch) | |
tree | b0734ab85ce0839a80e440b42da4216ff7291378 /tests/auto/corelib/text | |
parent | 1b33ee95e5c6e5e27f732fd273920861fdae486a (diff) |
Long live QStringTokenizer!
This class is designed as C++20-style generator / lazy sequence, and
the new return value of QString{,View}::tokenize().
It thus is more similar to a hand-coded loop around indexOf() than
QString::split(), which returns a container (the filling of which
allocates memory).
The template arguments of QStringTokenizer intricately depend on the
arguments with which it is constructed, so QStringTokenizer cannot be used
directly without C++17 CTAD. To work around this issue, add a factory
function, qTokenize().
LATER:
- ~Optimize QLatin1String needles (avoid repeated L1->UTF16 conversion)~
(out of scope for QStringTokenizer, should be solved in the respective
indexOf())
- Keep per-instantiation state:
* Boyer-Moore table
[ChangeLog][QtCore][QStringTokenizer] New class.
[ChangeLog][QtCore][qTokenize] New function.
Change-Id: I7a7a02e9175cdd3887778f29f2f91933329be759
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Diffstat (limited to 'tests/auto/corelib/text')
7 files changed, 345 insertions, 1 deletions
diff --git a/tests/auto/corelib/text/CMakeLists.txt b/tests/auto/corelib/text/CMakeLists.txt index 19cd71a987..1aada48d2a 100644 --- a/tests/auto/corelib/text/CMakeLists.txt +++ b/tests/auto/corelib/text/CMakeLists.txt @@ -19,5 +19,6 @@ add_subdirectory(qstringiterator) add_subdirectory(qstringlist) add_subdirectory(qstringmatcher) add_subdirectory(qstringref) +add_subdirectory(qstringtokenizer) add_subdirectory(qstringview) add_subdirectory(qtextboundaryfinder) diff --git a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp index a55f2f13eb..37cc7db841 100644 --- a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp +++ b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp @@ -1,6 +1,6 @@ /**************************************************************************** ** -** Copyright (C) 2019 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> +** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> ** Copyright (C) 2019 Mail.ru Group. ** Contact: https://www.qt.io/licensing/ ** @@ -33,6 +33,7 @@ #include <QString> #include <QStringView> +#include <QStringTokenizer> #include <QChar> #include <QScopedArrayPointer> #include <QStringRef> @@ -512,6 +513,116 @@ private Q_SLOTS: void split_QStringRef_char16_t() { split_impl<QStringRef, char16_t>(); } private: + void tok_data(bool rhsHasVariableLength = true); + template <typename Haystack, typename Needle> void tok_impl() const; + +private Q_SLOTS: + // let Splittable = {QString, QStringRef, QStringView, QLatin1String, const char16_t*, std::u16string} + // let Separators = Splittable ∪ {QChar, char16_t} + // test Splittable × Separators: + void tok_QString_QString_data() { tok_data(); } + void tok_QString_QString() { tok_impl<QString, QString>(); } + void tok_QString_QStringRef_data() { tok_data(); } + void tok_QString_QStringRef() { tok_impl<QString, QStringRef>(); } + void tok_QString_QStringView_data() { tok_data(); } + void tok_QString_QStringView() { tok_impl<QString, QStringView>(); } + void tok_QString_QLatin1String_data() { tok_data(); } + void tok_QString_QLatin1String() { tok_impl<QString, QLatin1String>(); } + void tok_QString_const_char16_t_star_data() { tok_data(); } + void tok_QString_const_char16_t_star() { tok_impl<QString, const char16_t*>(); } + void tok_QString_stdu16string_data() { tok_data(); } + void tok_QString_stdu16string() { tok_impl<QString, std::u16string>(); } + void tok_QString_QChar_data() { tok_data(false); } + void tok_QString_QChar() { tok_impl<QString, QChar>(); } + void tok_QString_char16_t_data() { tok_data(false); } + void tok_QString_char16_t() { tok_impl<QString, char16_t>(); } + + void tok_QStringRef_QString_data() { tok_data(); } + void tok_QStringRef_QString() { tok_impl<QStringRef, QString>(); } + void tok_QStringRef_QStringRef_data() { tok_data(); } + void tok_QStringRef_QStringRef() { tok_impl<QStringRef, QStringRef>(); } + void tok_QStringRef_QStringView_data() { tok_data(); } + void tok_QStringRef_QStringView() { tok_impl<QStringRef, QStringView>(); } + void tok_QStringRef_QLatin1String_data() { tok_data(); } + void tok_QStringRef_QLatin1String() { tok_impl<QStringRef, QLatin1String>(); } + void tok_QStringRef_const_char16_t_star_data() { tok_data(); } + void tok_QStringRef_const_char16_t_star() { tok_impl<QStringRef, const char16_t*>(); } + void tok_QStringRef_stdu16string_data() { tok_data(); } + void tok_QStringRef_stdu16string() { tok_impl<QStringRef, std::u16string>(); } + void tok_QStringRef_QChar_data() { tok_data(false); } + void tok_QStringRef_QChar() { tok_impl<QStringRef, QChar>(); } + void tok_QStringRef_char16_t_data() { tok_data(false); } + void tok_QStringRef_char16_t() { tok_impl<QStringRef, char16_t>(); } + + void tok_QStringView_QString_data() { tok_data(); } + void tok_QStringView_QString() { tok_impl<QStringView, QString>(); } + void tok_QStringView_QStringRef_data() { tok_data(); } + void tok_QStringView_QStringRef() { tok_impl<QStringView, QStringRef>(); } + void tok_QStringView_QStringView_data() { tok_data(); } + void tok_QStringView_QStringView() { tok_impl<QStringView, QStringView>(); } + void tok_QStringView_QLatin1String_data() { tok_data(); } + void tok_QStringView_QLatin1String() { tok_impl<QStringView, QLatin1String>(); } + void tok_QStringView_const_char16_t_star_data() { tok_data(); } + void tok_QStringView_const_char16_t_star() { tok_impl<QStringView, const char16_t*>(); } + void tok_QStringView_stdu16string_data() { tok_data(); } + void tok_QStringView_stdu16string() { tok_impl<QStringView, std::u16string>(); } + void tok_QStringView_QChar_data() { tok_data(false); } + void tok_QStringView_QChar() { tok_impl<QStringView, QChar>(); } + void tok_QStringView_char16_t_data() { tok_data(false); } + void tok_QStringView_char16_t() { tok_impl<QStringView, char16_t>(); } + + void tok_QLatin1String_QString_data() { tok_data(); } + void tok_QLatin1String_QString() { tok_impl<QLatin1String, QString>(); } + void tok_QLatin1String_QStringRef_data() { tok_data(); } + void tok_QLatin1String_QStringRef() { tok_impl<QLatin1String, QStringRef>(); } + void tok_QLatin1String_QStringView_data() { tok_data(); } + void tok_QLatin1String_QStringView() { tok_impl<QLatin1String, QStringView>(); } + void tok_QLatin1String_QLatin1String_data() { tok_data(); } + void tok_QLatin1String_QLatin1String() { tok_impl<QLatin1String, QLatin1String>(); } + void tok_QLatin1String_const_char16_t_star_data() { tok_data(); } + void tok_QLatin1String_const_char16_t_star() { tok_impl<QLatin1String, const char16_t*>(); } + void tok_QLatin1String_stdu16string_data() { tok_data(); } + void tok_QLatin1String_stdu16string() { tok_impl<QLatin1String, std::u16string>(); } + void tok_QLatin1String_QChar_data() { tok_data(false); } + void tok_QLatin1String_QChar() { tok_impl<QLatin1String, QChar>(); } + void tok_QLatin1String_char16_t_data() { tok_data(false); } + void tok_QLatin1String_char16_t() { tok_impl<QLatin1String, char16_t>(); } + + void tok_const_char16_t_star_QString_data() { tok_data(); } + void tok_const_char16_t_star_QString() { tok_impl<const char16_t*, QString>(); } + void tok_const_char16_t_star_QStringRef_data() { tok_data(); } + void tok_const_char16_t_star_QStringRef() { tok_impl<const char16_t*, QStringRef>(); } + void tok_const_char16_t_star_QStringView_data() { tok_data(); } + void tok_const_char16_t_star_QStringView() { tok_impl<const char16_t*, QStringView>(); } + void tok_const_char16_t_star_QLatin1String_data() { tok_data(); } + void tok_const_char16_t_star_QLatin1String() { tok_impl<const char16_t*, QLatin1String>(); } + void tok_const_char16_t_star_const_char16_t_star_data() { tok_data(); } + void tok_const_char16_t_star_const_char16_t_star() { tok_impl<const char16_t*, const char16_t*>(); } + void tok_const_char16_t_star_stdu16string_data() { tok_data(); } + void tok_const_char16_t_star_stdu16string() { tok_impl<const char16_t*, std::u16string>(); } + void tok_const_char16_t_star_QChar_data() { tok_data(false); } + void tok_const_char16_t_star_QChar() { tok_impl<const char16_t*, QChar>(); } + void tok_const_char16_t_star_char16_t_data() { tok_data(false); } + void tok_const_char16_t_star_char16_t() { tok_impl<const char16_t*, char16_t>(); } + + void tok_stdu16string_QString_data() { tok_data(); } + void tok_stdu16string_QString() { tok_impl<std::u16string, QString>(); } + void tok_stdu16string_QStringRef_data() { tok_data(); } + void tok_stdu16string_QStringRef() { tok_impl<std::u16string, QStringRef>(); } + void tok_stdu16string_QStringView_data() { tok_data(); } + void tok_stdu16string_QStringView() { tok_impl<std::u16string, QStringView>(); } + void tok_stdu16string_QLatin1String_data() { tok_data(); } + void tok_stdu16string_QLatin1String() { tok_impl<std::u16string, QLatin1String>(); } + void tok_stdu16string_const_char16_t_star_data() { tok_data(); } + void tok_stdu16string_const_char16_t_star() { tok_impl<std::u16string, const char16_t*>(); } + void tok_stdu16string_stdu16string_data() { tok_data(); } + void tok_stdu16string_stdu16string() { tok_impl<std::u16string, std::u16string>(); } + void tok_stdu16string_QChar_data() { tok_data(false); } + void tok_stdu16string_QChar() { tok_impl<std::u16string, QChar>(); } + void tok_stdu16string_char16_t_data() { tok_data(false); } + void tok_stdu16string_char16_t() { tok_impl<std::u16string, char16_t>(); } + +private: void mid_data(); template <typename String> void mid_impl(); @@ -901,6 +1012,8 @@ template <> QStringView make(const QStringRef &sf, QLatin1String, const QBy template <> QLatin1String make(const QStringRef &, QLatin1String l1, const QByteArray &) { return l1; } template <> QByteArray make(const QStringRef &, QLatin1String, const QByteArray &u8) { return u8; } template <> const char * make(const QStringRef &, QLatin1String, const QByteArray &u8) { return u8.data(); } +template <> const char16_t* make(const QStringRef &sf, QLatin1String, const QByteArray &) { return QStringView{sf}.utf16(); } // assumes `sf` doesn't represent a substring +template <> std::u16string make(const QStringRef &sf, QLatin1String, const QByteArray &) { return sf.toString().toStdU16String(); } template <typename> struct is_utf8_encoded : std::false_type {}; template <> struct is_utf8_encoded<const char*> : std::true_type {}; @@ -1278,6 +1391,10 @@ static QStringList skipped(const QStringList &sl) return result; } +template <typename T> T deepCopied(T s) { return s; } +template <> QString deepCopied(QString s) { return detached(s); } +template <> QByteArray deepCopied(QByteArray s) { return detached(s); } + template <typename Haystack, typename Needle> void tst_QStringApiSymmetry::split_impl() const { @@ -1304,6 +1421,59 @@ void tst_QStringApiSymmetry::split_impl() const QCOMPARE(toQStringList(haystack.split(needle, Qt::SkipEmptyParts, Qt::CaseInsensitive)), skippedResultCIS); } +void tst_QStringApiSymmetry::tok_data(bool rhsHasVariableLength) +{ + split_data(rhsHasVariableLength); +} + +template <typename Haystack, typename Needle> +void tst_QStringApiSymmetry::tok_impl() const +{ + QFETCH(const QStringRef, haystackU16); + QFETCH(const QLatin1String, haystackL1); + QFETCH(const QStringRef, needleU16); + QFETCH(const QLatin1String, needleL1); + QFETCH(const QStringList, resultCS); + QFETCH(const QStringList, resultCIS); + + const QStringList skippedResultCS = skipped(resultCS); + const QStringList skippedResultCIS = skipped(resultCIS); + + const auto haystackU8 = haystackU16.toUtf8(); + const auto needleU8 = needleU16.toUtf8(); + + const auto haystack = make<Haystack>(haystackU16, haystackL1, haystackU8); + const auto needle = make<Needle>(needleU16, needleL1, needleU8); + + QCOMPARE(toQStringList(qTokenize(haystack, needle)), resultCS); + QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::KeepEmptyParts, Qt::CaseSensitive)), resultCS); + QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::CaseInsensitive, Qt::KeepEmptyParts)), resultCIS); + QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::SkipEmptyParts, Qt::CaseSensitive)), skippedResultCS); + QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::CaseInsensitive, Qt::SkipEmptyParts)), skippedResultCIS); + + { + const auto tok = qTokenize(deepCopied(haystack), deepCopied(needle)); + // here, the temporaries returned from deepCopied() have already been destroyed, + // yet `tok` should have kept a copy alive as needed: + QCOMPARE(toQStringList(tok), resultCS); + } + +#ifdef __cpp_deduction_guides + QCOMPARE(toQStringList(QStringTokenizer{haystack, needle}), resultCS); + QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::KeepEmptyParts, Qt::CaseSensitive}), resultCS); + QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::CaseInsensitive, Qt::KeepEmptyParts}), resultCIS); + QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::SkipEmptyParts, Qt::CaseSensitive}), skippedResultCS); + QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::CaseInsensitive, Qt::SkipEmptyParts}), skippedResultCIS); + + { + const auto tok = QStringTokenizer{deepCopied(haystack), deepCopied(needle)}; + // here, the temporaries returned from deepCopied() have already been destroyed, + // yet `tok` should have kept a copy alive as needed: + QCOMPARE(toQStringList(tok), resultCS); + } +#endif // __cpp_deduction_guides +} + void tst_QStringApiSymmetry::mid_data() { QTest::addColumn<QStringRef>("unicode"); diff --git a/tests/auto/corelib/text/qstringtokenizer/.gitignore b/tests/auto/corelib/text/qstringtokenizer/.gitignore new file mode 100644 index 0000000000..5925520afd --- /dev/null +++ b/tests/auto/corelib/text/qstringtokenizer/.gitignore @@ -0,0 +1 @@ +tst_qstringtokenizer diff --git a/tests/auto/corelib/text/qstringtokenizer/CMakeLists.txt b/tests/auto/corelib/text/qstringtokenizer/CMakeLists.txt new file mode 100644 index 0000000000..5928e5b99a --- /dev/null +++ b/tests/auto/corelib/text/qstringtokenizer/CMakeLists.txt @@ -0,0 +1,13 @@ +# Generated from qstringtokenizer.pro. + +##################################################################### +## tst_qstringtokenizer Test: +##################################################################### + +qt_add_test(tst_qstringtokenizer + SOURCES + tst_qstringtokenizer.cpp +) + +## Scopes: +##################################################################### diff --git a/tests/auto/corelib/text/qstringtokenizer/qstringtokenizer.pro b/tests/auto/corelib/text/qstringtokenizer/qstringtokenizer.pro new file mode 100644 index 0000000000..5ae27c6570 --- /dev/null +++ b/tests/auto/corelib/text/qstringtokenizer/qstringtokenizer.pro @@ -0,0 +1,7 @@ +CONFIG += testcase +TARGET = tst_qstringtokenizer +QT = core testlib +contains(QT_CONFIG, c++14):CONFIG *= c++14 +contains(QT_CONFIG, c++1z):CONFIG *= c++1z +contains(QT_CONFIG, c++2a):CONFIG *= c++2a +SOURCES += tst_qstringtokenizer.cpp diff --git a/tests/auto/corelib/text/qstringtokenizer/tst_qstringtokenizer.cpp b/tests/auto/corelib/text/qstringtokenizer/tst_qstringtokenizer.cpp new file mode 100644 index 0000000000..0f50c389b4 --- /dev/null +++ b/tests/auto/corelib/text/qstringtokenizer/tst_qstringtokenizer.cpp @@ -0,0 +1,151 @@ +/**************************************************************************** +** +** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:GPL-EXCEPT$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3 as published by the Free Software +** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include <QStringTokenizer> +#include <QStringBuilder> + +#include <QTest> + +#include <string> + +Q_DECLARE_METATYPE(Qt::SplitBehavior) + +class tst_QStringTokenizer : public QObject +{ + Q_OBJECT + +private Q_SLOTS: + void constExpr() const; + void basics_data() const; + void basics() const; + void toContainer() const; +}; + +static QStringList skipped(const QStringList &sl) +{ + QStringList result; + result.reserve(sl.size()); + for (const QString &s : sl) { + if (!s.isEmpty()) + result.push_back(s); + } + return result; +} + +QString toQString(QStringView str) +{ + return str.toString(); +} + +template <typename Container> +QStringList toQStringList(const Container &c) +{ + QStringList r; + for (auto &&e : c) + r.push_back(toQString(e)); + return r; +} + +void tst_QStringTokenizer::constExpr() const +{ + // compile-time checks + { + constexpr auto tok = qTokenize(u"a,b,c", u","); + Q_UNUSED(tok); + } + { + constexpr auto tok = qTokenize(u"a,b,c", u','); + Q_UNUSED(tok); + } +} + +void tst_QStringTokenizer::basics_data() const +{ + QTest::addColumn<Qt::SplitBehavior>("sb"); + QTest::addColumn<Qt::CaseSensitivity>("cs"); + +#define ROW(sb, cs) \ + do { QTest::addRow("%s/%s", #sb, #cs) << Qt::SplitBehavior{Qt::sb} << Qt::cs; } while (0) + + ROW(KeepEmptyParts, CaseSensitive); + ROW(KeepEmptyParts, CaseInsensitive); + ROW(SkipEmptyParts, CaseSensitive); + ROW(SkipEmptyParts, CaseInsensitive); + +#undef ROW +} + +void tst_QStringTokenizer::basics() const +{ + QFETCH(const Qt::SplitBehavior, sb); + QFETCH(const Qt::CaseSensitivity, cs); + + auto expected = QStringList{"", "a", "b", "c", "d", "e", ""}; + if (sb & Qt::SkipEmptyParts) + expected = skipped(expected); + QCOMPARE(toQStringList(qTokenize(u",a,b,c,d,e,", u',', sb, cs)), expected); + QCOMPARE(toQStringList(qTokenize(u",a,b,c,d,e,", u',', cs, sb)), expected); + + { + auto tok = qTokenize(expected.join(u'x'), u"X" % QString(), Qt::CaseInsensitive); + // the temporary QStrings returned from join() and the QStringBuilder expression + // are now destroyed, but 'tok' should keep both alive + QCOMPARE(toQStringList(tok), expected); + } + + using namespace std::string_literals; + + { + auto tok = qTokenize(expected.join(u'x'), u"X"s, Qt::CaseInsensitive); + QCOMPARE(toQStringList(tok), expected); + } + + { + auto tok = qTokenize(expected.join(u'x'), QLatin1Char('x'), cs, sb); + QCOMPARE(toQStringList(tok), expected); + } +} + +void tst_QStringTokenizer::toContainer() const +{ + // QStringView value_type: + { + auto tok = qTokenize(u"a,b,c", u','); + auto v = tok.toContainer(); + QVERIFY((std::is_same_v<decltype(v), QVector<QStringView>>)); + } + // QLatin1String value_type + { + auto tok = qTokenize(QLatin1String{"a,b,c"}, u','); + auto v = tok.toContainer(); + QVERIFY((std::is_same_v<decltype(v), QVector<QLatin1String>>)); + } +} + +QTEST_APPLESS_MAIN(tst_QStringTokenizer) +#include "tst_qstringtokenizer.moc" diff --git a/tests/auto/corelib/text/text.pro b/tests/auto/corelib/text/text.pro index cb7de443bd..a8ad7bd2f8 100644 --- a/tests/auto/corelib/text/text.pro +++ b/tests/auto/corelib/text/text.pro @@ -20,5 +20,6 @@ SUBDIRS = \ qstringlist \ qstringmatcher \ qstringref \ + qstringtokenizer \ qstringview \ qtextboundaryfinder |