summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarc Mutz <marc.mutz@kdab.com>2017-11-22 15:48:02 +0100
committerMarc Mutz <marc.mutz@kdab.com>2020-06-03 19:13:54 +0200
commit6a3c6f939f29c83d53d2da0c3f53b814bdd02358 (patch)
treeb0734ab85ce0839a80e440b42da4216ff7291378
parent1b33ee95e5c6e5e27f732fd273920861fdae486a (diff)
Long live QStringTokenizer!
This class is designed as C++20-style generator / lazy sequence, and the new return value of QString{,View}::tokenize(). It thus is more similar to a hand-coded loop around indexOf() than QString::split(), which returns a container (the filling of which allocates memory). The template arguments of QStringTokenizer intricately depend on the arguments with which it is constructed, so QStringTokenizer cannot be used directly without C++17 CTAD. To work around this issue, add a factory function, qTokenize(). LATER: - ~Optimize QLatin1String needles (avoid repeated L1->UTF16 conversion)~ (out of scope for QStringTokenizer, should be solved in the respective indexOf()) - Keep per-instantiation state: * Boyer-Moore table [ChangeLog][QtCore][QStringTokenizer] New class. [ChangeLog][QtCore][qTokenize] New function. Change-Id: I7a7a02e9175cdd3887778f29f2f91933329be759 Reviewed-by: Lars Knoll <lars.knoll@qt.io> Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
-rw-r--r--src/corelib/text/qstringtokenizer.cpp357
-rw-r--r--src/corelib/text/qstringtokenizer.h438
-rw-r--r--src/corelib/text/text.pri2
-rw-r--r--tests/auto/corelib/text/CMakeLists.txt1
-rw-r--r--tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp172
-rw-r--r--tests/auto/corelib/text/qstringtokenizer/.gitignore1
-rw-r--r--tests/auto/corelib/text/qstringtokenizer/CMakeLists.txt13
-rw-r--r--tests/auto/corelib/text/qstringtokenizer/qstringtokenizer.pro7
-rw-r--r--tests/auto/corelib/text/qstringtokenizer/tst_qstringtokenizer.cpp151
-rw-r--r--tests/auto/corelib/text/text.pro1
10 files changed, 1142 insertions, 1 deletions
diff --git a/src/corelib/text/qstringtokenizer.cpp b/src/corelib/text/qstringtokenizer.cpp
new file mode 100644
index 0000000000..043269a3ac
--- /dev/null
+++ b/src/corelib/text/qstringtokenizer.cpp
@@ -0,0 +1,357 @@
+/****************************************************************************
+**
+** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
+** Contact: http://www.qt.io/licensing/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qstringtokenizer.h"
+#include "qstringalgorithms.h"
+
+QT_BEGIN_NAMESPACE
+
+/*!
+ \class QStringTokenizer
+ \inmodule QtCore
+ \since 6.0
+ \brief The QStringTokenizer class splits strings into tokens along given separators
+ \reentrant
+ \ingroup tools
+ \ingroup string-processing
+
+ Splits a string into substrings wherever a given separator occurs,
+ and returns a (lazy) list of those strings. If the separator does
+ not match anywhere in the string, produces a single-element
+ containing this string. If the separator is empty,
+ QStringTokenizer produces an empty string, followed by each of the
+ string's characters, followed by another empty string. The two
+ enumerations Qt::SplitBehavior and Qt::CaseSensitivity further
+ control the output.
+
+ QStringTokenizer drives QStringView::tokenize(), but, at least with a
+ recent compiler, you can use it directly, too:
+
+ \code
+ for (auto it : QStringTokenizer{string, separator})
+ use(*it);
+ \endcode
+
+ \note You should never, ever, name the template arguments of a
+ QStringTokenizer explicitly. If you can use C++17 Class Template
+ Argument Deduction (CTAD), you may write
+ \c{QStringTokenizer{string, separator}} (without template
+ arguments). If you can't use C++17 CTAD, you must use the
+ QStringView::split() or QLatin1String::split() member functions
+ and store the return value only in \c{auto} variables:
+
+ \code
+ auto result = string.split(sep);
+ \endcode
+
+ This is because the template arguments of QStringTokenizer have a
+ very subtle dependency on the specific string and separator types
+ from with which they are constructed, and they don't usually
+ correspond to the actual types passed.
+
+ \section Lazy Sequences
+
+ QStringTokenizer acts as a so-called lazy sequence, that is, each
+ next element is only computed once you ask for it. Lazy sequences
+ have the advantage that they only require O(1) memory. They have
+ the disadvantage that, at least for QStringTokenizer, they only
+ allow forward, not random-access, iteration.
+
+ The intended use-case is that you just plug it into a ranged for loop:
+
+ \code
+ for (auto it : QStringTokenizer{string, separator})
+ use(*it);
+ \endcode
+
+ or a C++20 ranged algorithm:
+
+ \code
+ std::ranges::for_each(QStringTokenizer{string, separator},
+ [] (auto token) { use(token); });
+ \endcode
+
+ \section End Sentinel
+
+ The QStringTokenizer iterators cannot be used with classical STL
+ algorithms, because those require iterator/iterator pairs, while
+ QStringTokenizer uses sentinels, that is, it uses a different
+ type, QStringTokenizer::sentinel, to mark the end of the
+ range. This improves performance, because the sentinel is an empty
+ type. Sentinels are supported from C++17 (for ranged for)
+ and C++20 (for algorithms using the new ranges library).
+
+ \section Temporaries
+
+ QStringTokenizer is very carefully designed to avoid dangling
+ references. If you construct a tokenizer from a temporary string
+ (an rvalue), that argument is stored internally, so the referenced
+ data isn't deleted before it is tokenized:
+
+ \code
+ auto tok = QStringTokenizer{widget.text(), u','};
+ // return value of `widget.text()` is destroyed, but content was moved into `tok`
+ for (auto e : tok)
+ use(e);
+ \endcode
+
+ If you pass named objects (lvalues), then QStringTokenizer does
+ not store a copy. You are reponsible to keep the named object's
+ data around for longer than the tokenizer operates on it:
+
+ \code
+ auto text = widget.text();
+ auto tok = QStringTokenizer{text, u','};
+ text.clear(); // destroy content of `text`
+ for (auto e : tok) // ERROR: `tok` references deleted data!
+ use(e);
+ \endcode
+
+ \sa QStringView::split(), QLatin1Sting::split(), Qt::SplitBehavior, Qt::CaseSensitivity
+*/
+
+/*!
+ \typedef QStringTokenizer::value_type
+
+ Alias for \c{const QStringView} or \c{const QLatin1String},
+ depending on the tokenizer's \c Haystack template argument.
+*/
+
+/*!
+ \typedef QStringTokenizer::difference_type
+
+ Alias for qsizetype.
+*/
+
+/*!
+ \typedef QStringTokenizer::size_type
+
+ Alias for qsizetype.
+*/
+
+/*!
+ \typedef QStringTokenizer::reference
+
+ Alias for \c{value_type &}.
+
+ QStringTokenizer does not support mutable references, so this is
+ the same as const_reference.
+*/
+
+/*!
+ \typedef QStringTokenizer::const_reference
+
+ Alias for \c{value_type &}.
+*/
+
+/*!
+ \typedef QStringTokenizer::pointer
+
+ Alias for \c{value_type *}.
+
+ QStringTokenizer does not support mutable iterators, so this is
+ the same as const_pointer.
+*/
+
+/*!
+ \typedef QStringTokenizer::const_pointer
+
+ Alias for \c{value_type *}.
+*/
+
+/*!
+ \typedef QStringTokenizer::iterator
+
+ This typedef provides an STL-style const iterator for
+ QStringTokenizer.
+
+ QStringTokenizer does not support mutable iterators, so this is
+ the same as const_iterator.
+
+ \sa const_iterator
+*/
+
+/*!
+ \typedef QStringTokenizer::const_iterator
+
+ This typedef provides an STL-style const iterator for
+ QStringTokenizer.
+
+ \sa iterator
+*/
+
+/*!
+ \typedef QStringTokenizer::sentinel
+
+ This typedef provides an STL-style sentinel for
+ QStringTokenizer::iterator and QStringTokenizer::const_iterator.
+
+ \sa const_iterator
+*/
+
+/*!
+ \fn QStringTokenizer(Haystack haystack, String needle, Qt::CaseSensitivity cs, Qt::SplitBehavior sb)
+ \fn QStringTokenizer(Haystack haystack, String needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs)
+
+ Constructs a string tokenizer that splits the string \a haystack
+ into substrings wherever \a needle occurs, and allows iteration
+ over those strings as they are found. If \a needle does not match
+ anywhere in \a haystack, a single element containing \a haystack
+ is produced.
+
+ \a cs specifies whether \a needle should be matched case
+ sensitively or case insensitively.
+
+ If \a sb is QString::SkipEmptyParts, empty entries don't
+ appear in the result. By default, empty entries are included.
+
+ \sa QStringView::split(), QLatin1String::split(), Qt::CaseSensitivity, Qt::SplitBehavior
+*/
+
+/*!
+ \fn QStringTokenizer::const_iterator QStringTokenizer::begin() const
+
+ Returns a const \l{STL-style iterators}{STL-style iterator}
+ pointing to the first token in the list.
+
+ \sa end(), cbegin()
+*/
+
+/*!
+ \fn QStringTokenizer::const_iterator QStringTokenizer::cbegin() const
+
+ Same as begin().
+
+ \sa cend(), begin()
+*/
+
+/*!
+ \fn QStringTokenizer::sentinel QStringTokenizer::end() const
+
+ Returns a const \l{STL-style iterators}{STL-style sentinel}
+ pointing to the imaginary token after the last token in the list.
+
+ \sa begin(), cend()
+*/
+
+/*!
+ \fn QStringTokenizer::sentinel QStringTokenizer::cend() const
+
+ Same as end().
+
+ \sa cbegin(), end()
+*/
+
+/*!
+ \fn QStringTokenizer::toContainer(Container &&c) const &
+
+ Convenience method to convert the lazy sequence into a
+ (typically) random-access container.
+
+ This function is only available if \c Container has a \c value_type
+ matching this tokenizer's value_type.
+
+ If you pass in a named container (an lvalue), then that container
+ is filled, and a reference to it is returned.
+
+ If you pass in a temporary container (an rvalue, incl. the default
+ argument), then that container is filled, and returned by value.
+
+ \code
+ // assuming tok's value_type is QStringView, then...
+ auto tok = QStringTokenizer{~~~};
+ // ... rac1 is a QVector:
+ auto rac1 = tok.toContainer();
+ // ... rac2 is std::pmr::vector<QStringView>:
+ auto rac2 = tok.toContainer<std::pmr::vector<QStringView>>();
+ auto rac3 = QVarLengthArray<QStringView, 12>{};
+ // appends the token sequence produced by tok to rac3
+ // and returns a reference to rac3 (which we ignore here):
+ tok.toContainer(rac3);
+ \endcode
+
+ This gives you maximum flexibility in how you want the sequence to
+ be stored.
+*/
+
+/*!
+ \fn QStringTokenizer::toContainer(Container &&c) const &&
+ \overload
+
+ In addition to the constraints on the lvalue-this overload, this
+ rvalue-this overload is only available when this QStringTokenizer
+ does not store the haystack internally, as this could create a
+ container full of dangling references:
+
+ \code
+ auto tokens = QStringTokenizer{widget.text(), u','}.toContainer();
+ // ERROR: cannot call toContainer() on rvalue
+ // 'tokens' references the data of the copy of widget.text()
+ // stored inside the QStringTokenizer, which has since been deleted
+ \endcode
+
+ To fix, store the QStringTokenizer in a temporary:
+
+ \code
+ auto tokenizer = QStringTokenizer{widget.text90, u','};
+ auto tokens = tokenizer.toContainer();
+ // OK: the copy of widget.text() stored in 'tokenizer' keeps the data
+ // referenced by 'tokens' alive.
+ \endcode
+
+ You can force this function into existence by passing a view instead:
+
+ \code
+ func(QStringTokenizer{QStringView{widget.text()}, u','}.toContainer());
+ // OK: compiler keeps widget.text() around until after func() has executed
+ \endcode
+*/
+
+/*!
+ \fn qTokenize(Haystack &&haystack, Needle &&needle, Flags...flags)
+ \relates QStringTokenizer
+ \since 6.0
+
+ Factory function for QStringTokenizer. You can use this function
+ if your compiler doesn't, yet, support C++17 Class Template
+ Argument Deduction (CTAD), but we recommend direct use of
+ QStringTokenizer with CTAD instead.
+*/
+
+QT_END_NAMESPACE
diff --git a/src/corelib/text/qstringtokenizer.h b/src/corelib/text/qstringtokenizer.h
new file mode 100644
index 0000000000..31bbbf01c8
--- /dev/null
+++ b/src/corelib/text/qstringtokenizer.h
@@ -0,0 +1,438 @@
+/****************************************************************************
+**
+** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
+** Contact: http://www.qt.io/licensing/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#ifndef QSTRINGTOKENIZER_H
+#define QSTRINGTOKENIZER_H
+
+#include <QtCore/qnamespace.h>
+
+QT_BEGIN_NAMESPACE
+
+template <typename, typename> class QStringBuilder;
+template <typename> class QVector;
+
+#if defined(Q_QDOC) || 1 || (defined(__cpp_range_based_for) && __cpp_range_based_for >= 201603)
+# define Q_STRINGTOKENIZER_USE_SENTINEL
+#endif
+
+class QStringTokenizerBaseBase
+{
+protected:
+ ~QStringTokenizerBaseBase() = default;
+ constexpr QStringTokenizerBaseBase(Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept
+ : m_sb{sb}, m_cs{cs} {}
+
+ struct tokenizer_state {
+ qsizetype start, end, extra;
+ friend constexpr bool operator==(tokenizer_state lhs, tokenizer_state rhs) noexcept
+ { return lhs.start == rhs.start && lhs.end == rhs.end && lhs.extra == rhs.extra; }
+ friend constexpr bool operator!=(tokenizer_state lhs, tokenizer_state rhs) noexcept
+ { return !operator==(lhs, rhs); }
+ };
+
+ Qt::SplitBehavior m_sb;
+ Qt::CaseSensitivity m_cs;
+};
+
+template <typename Haystack, typename Needle>
+class QStringTokenizerBase : protected QStringTokenizerBaseBase
+{
+ struct next_result {
+ Haystack value;
+ bool ok;
+ tokenizer_state state;
+ };
+ inline next_result next(tokenizer_state state) const noexcept;
+ inline next_result toFront() const noexcept { return next({}); }
+public:
+ constexpr explicit QStringTokenizerBase(Haystack haystack, Needle needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept
+ : QStringTokenizerBaseBase{sb, cs}, m_haystack{haystack}, m_needle{needle} {}
+
+ class iterator;
+ friend class iterator;
+#ifdef Q_STRINGTOKENIZER_USE_SENTINEL
+ class sentinel {
+ friend constexpr bool operator==(sentinel, sentinel) noexcept { return true; }
+ friend constexpr bool operator!=(sentinel, sentinel) noexcept { return false; }
+ };
+#else
+ using sentinel = iterator;
+#endif
+ class iterator {
+ const QStringTokenizerBase *tokenizer;
+ next_result current;
+ friend class QStringTokenizerBase;
+ explicit iterator(const QStringTokenizerBase &t) noexcept
+ : tokenizer{&t}, current{t.toFront()} {}
+ public:
+ using difference_type = qsizetype;
+ using value_type = Haystack;
+ using pointer = const value_type*;
+ using reference = const value_type&;
+ using iterator_category = std::forward_iterator_tag;
+
+ iterator() noexcept = default;
+
+ // violates std::forward_iterator (returns a reference into the iterator)
+ Q_REQUIRED_RESULT constexpr const Haystack* operator->() const { return Q_ASSERT(current.ok), &current.value; }
+ Q_REQUIRED_RESULT constexpr const Haystack& operator*() const { return *operator->(); }
+
+ iterator& operator++() { advance(); return *this; }
+ iterator operator++(int) { auto tmp = *this; advance(); return tmp; }
+
+ friend constexpr bool operator==(const iterator &lhs, const iterator &rhs) noexcept
+ { return lhs.current.ok == rhs.current.ok && (!lhs.current.ok || (Q_ASSERT(lhs.tokenizer == rhs.tokenizer), lhs.current.state == rhs.current.state)); }
+ friend constexpr bool operator!=(const iterator &lhs, const iterator &rhs) noexcept
+ { return !operator==(lhs, rhs); }
+#ifdef Q_STRINGTOKENIZER_USE_SENTINEL
+ friend constexpr bool operator==(const iterator &lhs, sentinel) noexcept
+ { return !lhs.current.ok; }
+ friend constexpr bool operator!=(const iterator &lhs, sentinel) noexcept
+ { return !operator==(lhs, sentinel{}); }
+ friend constexpr bool operator==(sentinel, const iterator &rhs) noexcept
+ { return !rhs.current.ok; }
+ friend constexpr bool operator!=(sentinel, const iterator &rhs) noexcept
+ { return !operator==(sentinel{}, rhs); }
+#endif
+ private:
+ void advance() {
+ Q_ASSERT(current.ok);
+ current = tokenizer->next(current.state);
+ }
+ };
+ using const_iterator = iterator;
+
+ using size_type = std::size_t;
+ using difference_type = typename iterator::difference_type;
+ using value_type = typename iterator::value_type;
+ using pointer = typename iterator::pointer;
+ using const_pointer = pointer;
+ using reference = typename iterator::reference;
+ using const_reference = reference;
+
+ Q_REQUIRED_RESULT iterator begin() const noexcept { return iterator{*this}; }
+ Q_REQUIRED_RESULT iterator cbegin() const noexcept { return begin(); }
+ template <bool = std::is_same<iterator, sentinel>::value> // ODR protection
+ Q_REQUIRED_RESULT constexpr sentinel end() const noexcept { return {}; }
+ template <bool = std::is_same<iterator, sentinel>::value> // ODR protection
+ Q_REQUIRED_RESULT constexpr sentinel cend() const noexcept { return {}; }
+
+private:
+ Haystack m_haystack;
+ Needle m_needle;
+};
+
+QT_BEGIN_INCLUDE_NAMESPACE
+#include <QtCore/qstringview.h>
+QT_END_INCLUDE_NAMESPACE
+
+namespace QtPrivate {
+namespace Tok {
+
+ constexpr qsizetype size(QChar) noexcept { return 1; }
+ template <typename String>
+ constexpr qsizetype size(const String &s) noexcept { return static_cast<qsizetype>(s.size()); }
+
+ template <typename String> struct ViewForImpl {};
+ template <> struct ViewForImpl<QStringView> { using type = QStringView; };
+ template <> struct ViewForImpl<QLatin1String> { using type = QLatin1String; };
+ template <> struct ViewForImpl<QChar> { using type = QChar; };
+ template <> struct ViewForImpl<QString> : ViewForImpl<QStringView> {};
+ template <> struct ViewForImpl<QStringRef> : ViewForImpl<QStringView> {};
+ template <> struct ViewForImpl<QLatin1Char> : ViewForImpl<QChar> {};
+ template <> struct ViewForImpl<char16_t> : ViewForImpl<QChar> {};
+ template <> struct ViewForImpl<char16_t*> : ViewForImpl<QStringView> {};
+ template <> struct ViewForImpl<const char16_t*> : ViewForImpl<QStringView> {};
+ template <typename LHS, typename RHS>
+ struct ViewForImpl<QStringBuilder<LHS, RHS>> : ViewForImpl<typename QStringBuilder<LHS,RHS>::ConvertTo> {};
+ template <typename Char, typename...Args>
+ struct ViewForImpl<std::basic_string<Char, Args...>> : ViewForImpl<Char*> {};
+#ifdef __cpp_lib_string_view
+ template <typename Char, typename...Args>
+ struct ViewForImpl<std::basic_string_view<Char, Args...>> : ViewForImpl<Char*> {};
+#endif
+
+ // This metafunction maps a StringLike to a View (currently, QChar,
+ // QStringView, QLatin1String). This is what QStringTokenizerBase
+ // operates on. QStringTokenizer adds pinning to keep rvalues alive
+ // for the duration of the algorithm.
+ template <typename String>
+ using ViewFor = typename ViewForImpl<typename std::decay<String>::type>::type;
+
+ // Pinning:
+ // rvalues of owning string types need to be moved into QStringTokenizer
+ // to keep them alive for the lifetime of the tokenizer. For lvalues, we
+ // assume the user takes care of that.
+
+ // default: don't pin anything (characters are pinned implicitly)
+ template <typename String>
+ struct PinForImpl { using type = ViewFor<String>; };
+
+ // rvalue QString -> QString
+ template <>
+ struct PinForImpl<QString> { using type = QString; };
+
+ // rvalue std::basic_string -> basic_string
+ template <typename Char, typename...Args>
+ struct PinForImpl<std::basic_string<Char, Args...>>
+ { using type = std::basic_string<Char, Args...>; };
+
+ // rvalue QStringBuilder -> pin as the nested ConvertTo type
+ template <typename LHS, typename RHS>
+ struct PinForImpl<QStringBuilder<LHS, RHS>>
+ : PinForImpl<typename QStringBuilder<LHS, RHS>::ConvertTo> {};
+
+ template <typename StringLike>
+ using PinFor = typename PinForImpl<typename std::remove_cv<StringLike>::type>::type;
+
+ template <typename T> struct is_owning_string_type : std::false_type {};
+ template <> struct is_owning_string_type<QString> : std::true_type {};
+ template <typename...Args> struct is_owning_string_type<std::basic_string<Args...>> : std::true_type {};
+
+ // unpinned
+ template <typename T, bool pinned = is_owning_string_type<T>::value>
+ struct Pinning
+ {
+ // this is the storage for non-pinned types - no storage
+ constexpr Pinning(const T&) noexcept {}
+ // Since we don't store something, the view() method needs to be
+ // given something it can return.
+ constexpr T view(T t) const noexcept { return t; }
+ };
+
+ // pinned
+ template <typename T>
+ struct Pinning<T, true>
+ {
+ T m_string;
+ // specialisation for owning string types (QString, std::u16string):
+ // stores the string:
+ constexpr Pinning(T &&s) noexcept : m_string{std::move(s)} {}
+ // ... and thus view() uses that instead of the argument passed in:
+ constexpr QStringView view(const T&) const noexcept { return m_string; }
+ };
+
+ // NeedlePinning and HaystackPinning are there to distinguish them as
+ // base classes of QStringTokenizer. We use inheritance to reap the
+ // empty base class optimization.
+ template <typename T>
+ struct NeedlePinning : Pinning<T>
+ {
+ using Pinning<T>::Pinning;
+ template <typename Arg>
+ constexpr auto needleView(Arg &&a) noexcept
+ -> decltype(this->view(std::forward<Arg>(a)))
+ { return this->view(std::forward<Arg>(a)); }
+ };
+
+ template <typename T>
+ struct HaystackPinning : Pinning<T>
+ {
+ using Pinning<T>::Pinning;
+ template <typename Arg>
+ constexpr auto haystackView(Arg &&a) noexcept
+ -> decltype(this->view(std::forward<Arg>(a)))
+ { return this->view(std::forward<Arg>(a)); }
+ };
+
+ // The Base of a QStringTokenizer is QStringTokenizerBase for the views
+ // corresponding to the Haystack and Needle template arguments
+ //
+ // ie. QStringTokenizer<QString, QString>
+ // : QStringTokenizerBase<QStringView, QStringView> (+ pinning)
+ template <typename Haystack, typename Needle>
+ using TokenizerBase = QStringTokenizerBase<ViewFor<Haystack>, ViewFor<Needle>>;
+} // namespace Tok
+} // namespace QtPrivate
+
+template <typename Haystack, typename Needle>
+class QStringTokenizer
+ : private QtPrivate::Tok::HaystackPinning<Haystack>,
+ private QtPrivate::Tok::NeedlePinning<Needle>,
+ public QtPrivate::Tok::TokenizerBase<Haystack, Needle>
+{
+ using HPin = QtPrivate::Tok::HaystackPinning<Haystack>;
+ using NPin = QtPrivate::Tok::NeedlePinning<Needle>;
+ using Base = QtPrivate::Tok::TokenizerBase<Haystack, Needle>;
+ template <typename Container, typename HPin>
+ struct if_haystack_not_pinned_impl : std::enable_if<std::is_empty<HPin>::value, bool> {};
+ template <typename Container>
+ using if_haystack_not_pinned = typename if_haystack_not_pinned_impl<Container, HPin>::type;
+ template <typename Container, typename Iterator = decltype(std::begin(std::declval<Container>()))>
+ using if_compatible_container = typename std::enable_if<
+ std::is_same<
+ typename Base::value_type,
+ typename std::iterator_traits<Iterator>::value_type
+ >::value,
+ bool
+ >::type;
+public:
+ using value_type = typename Base::value_type;
+
+ constexpr explicit QStringTokenizer(Haystack haystack, Needle needle,
+ Qt::CaseSensitivity cs,
+ Qt::SplitBehavior sb = Qt::KeepEmptyParts)
+ noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value)
+ // here, we present the haystack to Pinning<>, for optional storing.
+ // If it did store, haystack is moved-from and mustn't be touched
+ // any longer, which is why view() for these Pinning<>s ignores the
+ // argument.
+ : HPin{std::forward<Haystack>(haystack)},
+ NPin{std::forward<Needle>(needle)},
+ // If Pinning<> didn't store, we pass the haystack (ditto needle)
+ // to view() again, so it can be copied from there.
+ Base{this->haystackView(haystack),
+ this->needleView(needle), sb, cs}
+ {}
+ constexpr explicit QStringTokenizer(Haystack haystack, Needle needle,
+ Qt::SplitBehavior sb = Qt::KeepEmptyParts,
+ Qt::CaseSensitivity cs = Qt::CaseSensitive)
+ noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value)
+ : HPin{std::forward<Haystack>(haystack)},
+ NPin{std::forward<Needle>(needle)},
+ Base{this->haystackView(haystack),
+ this->needleView(needle), sb, cs}
+ {}
+
+ template <typename Container = QVector<value_type>,
+ if_compatible_container<Container> = true>
+ Container toContainer(Container &&c = {}) const &
+ {
+ for (auto e : *this)
+ c.emplace_back(e);
+ return c;
+ }
+
+ template <typename Container = QVector<value_type>,
+ if_compatible_container<Container> = true,
+ if_haystack_not_pinned<Container> = true>
+ Container toContainer(Container &&c = {}) const &&
+ {
+ for (auto e : *this)
+ c.emplace_back(e);
+ return c;
+ }
+};
+
+namespace QtPrivate {
+namespace Tok {
+// This meta function just calculated the template arguments for the
+// QStringTokenizer (not -Base), based on the actual arguments passed
+// to qTokenize() (or the ctor, with CTAD). It basically detects rvalue
+// QString and std::basic_string and otherwise decays the arguments to
+// the respective view type.
+//
+// #define works around a C++ restriction: [temp.deduct.guide]/3 seems
+// to ask for the simple-template-id following the `->` of a deduction
+// guide to be identical to the class name for which we guide deduction.
+// In particular, Clang rejects a template alias there, while GCC accepts
+// it.
+#define Q_TOK_RESULT \
+ QStringTokenizer< \
+ QtPrivate::Tok::PinFor<Haystack>, \
+ QtPrivate::Tok::PinFor<Needle> \
+ > \
+ /*end*/
+template <typename Haystack, typename Needle>
+using TokenizerResult = Q_TOK_RESULT;
+template <typename Haystack, typename Needle>
+using is_nothrow_constructible_from = std::is_nothrow_copy_constructible<TokenizerResult<Haystack, Needle>>;
+}
+}
+
+#ifdef __cpp_deduction_guides
+// these tell the compiler how to determine the QStringTokenizer
+// template arguments based on the constructor arguments (CTAD):
+template <typename Haystack, typename Needle>
+QStringTokenizer(Haystack&&, Needle&&)
+ -> Q_TOK_RESULT;
+template <typename Haystack, typename Needle>
+QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior)
+ -> Q_TOK_RESULT;
+template <typename Haystack, typename Needle>
+QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior, Qt::CaseSensitivity)
+ -> Q_TOK_RESULT;
+template <typename Haystack, typename Needle>
+QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity)
+ -> Q_TOK_RESULT;
+template <typename Haystack, typename Needle>
+QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity, Qt::SplitBehavior)
+ -> Q_TOK_RESULT;
+#endif
+
+#undef Q_TOK_RESULT
+
+template <typename Haystack, typename Needle, typename...Flags>
+Q_REQUIRED_RESULT constexpr auto
+qTokenize(Haystack &&h, Needle &&n, Flags...flags)
+ noexcept(QtPrivate::Tok::is_nothrow_constructible_from<Haystack, Needle>::value)
+ -> decltype(QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h),
+ std::forward<Needle>(n), flags...})
+{ return QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h),
+ std::forward<Needle>(n),
+ flags...}; }
+
+template <typename Haystack, typename Needle>
+auto QStringTokenizerBase<Haystack, Needle>::next(tokenizer_state state) const noexcept -> next_result
+{
+ while (true) {
+ if (state.end < 0) {
+ // already at end:
+ return {{}, false, state};
+ }
+ state.end = m_haystack.indexOf(m_needle, state.start + state.extra, m_cs);
+ Haystack result;
+ if (state.end >= 0) {
+ // token separator found => return intermediate element:
+ result = m_haystack.mid(state.start, state.end - state.start);
+ const auto ns = QtPrivate::Tok::size(m_needle);
+ state.start = state.end + ns;
+ state.extra = (ns == 0 ? 1 : 0);
+ } else {
+ // token separator not found => return final element:
+ result = m_haystack.mid(state.start);
+ }
+ if ((m_sb & Qt::SkipEmptyParts) && result.isEmpty())
+ continue;
+ return {result, true, state};
+ }
+}
+
+QT_END_NAMESPACE
+
+#endif /* QSTRINGTOKENIZER_H */
diff --git a/src/corelib/text/text.pri b/src/corelib/text/text.pri
index 1275c014a8..1d83bc151b 100644
--- a/src/corelib/text/text.pri
+++ b/src/corelib/text/text.pri
@@ -26,6 +26,7 @@ HEADERS += \
text/qstringliteral.h \
text/qstringmatcher.h \
text/qstringview.h \
+ text/qstringtokenizer.h \
text/qtextboundaryfinder.h \
text/qunicodetables_p.h \
text/qunicodetools_p.h
@@ -44,6 +45,7 @@ SOURCES += \
text/qstringconverter.cpp \
text/qstringlist.cpp \
text/qstringview.cpp \
+ text/qstringtokenizer.cpp \
text/qtextboundaryfinder.cpp \
text/qunicodetools.cpp \
text/qvsnprintf.cpp
diff --git a/tests/auto/corelib/text/CMakeLists.txt b/tests/auto/corelib/text/CMakeLists.txt
index 19cd71a987..1aada48d2a 100644
--- a/tests/auto/corelib/text/CMakeLists.txt
+++ b/tests/auto/corelib/text/CMakeLists.txt
@@ -19,5 +19,6 @@ add_subdirectory(qstringiterator)
add_subdirectory(qstringlist)
add_subdirectory(qstringmatcher)
add_subdirectory(qstringref)
+add_subdirectory(qstringtokenizer)
add_subdirectory(qstringview)
add_subdirectory(qtextboundaryfinder)
diff --git a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp
index a55f2f13eb..37cc7db841 100644
--- a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp
+++ b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp
@@ -1,6 +1,6 @@
/****************************************************************************
**
-** Copyright (C) 2019 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
+** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
** Copyright (C) 2019 Mail.ru Group.
** Contact: https://www.qt.io/licensing/
**
@@ -33,6 +33,7 @@
#include <QString>
#include <QStringView>
+#include <QStringTokenizer>
#include <QChar>
#include <QScopedArrayPointer>
#include <QStringRef>
@@ -512,6 +513,116 @@ private Q_SLOTS:
void split_QStringRef_char16_t() { split_impl<QStringRef, char16_t>(); }
private:
+ void tok_data(bool rhsHasVariableLength = true);
+ template <typename Haystack, typename Needle> void tok_impl() const;
+
+private Q_SLOTS:
+ // let Splittable = {QString, QStringRef, QStringView, QLatin1String, const char16_t*, std::u16string}
+ // let Separators = Splittable ∪ {QChar, char16_t}
+ // test Splittable × Separators:
+ void tok_QString_QString_data() { tok_data(); }
+ void tok_QString_QString() { tok_impl<QString, QString>(); }
+ void tok_QString_QStringRef_data() { tok_data(); }
+ void tok_QString_QStringRef() { tok_impl<QString, QStringRef>(); }
+ void tok_QString_QStringView_data() { tok_data(); }
+ void tok_QString_QStringView() { tok_impl<QString, QStringView>(); }
+ void tok_QString_QLatin1String_data() { tok_data(); }
+ void tok_QString_QLatin1String() { tok_impl<QString, QLatin1String>(); }
+ void tok_QString_const_char16_t_star_data() { tok_data(); }
+ void tok_QString_const_char16_t_star() { tok_impl<QString, const char16_t*>(); }
+ void tok_QString_stdu16string_data() { tok_data(); }
+ void tok_QString_stdu16string() { tok_impl<QString, std::u16string>(); }
+ void tok_QString_QChar_data() { tok_data(false); }
+ void tok_QString_QChar() { tok_impl<QString, QChar>(); }
+ void tok_QString_char16_t_data() { tok_data(false); }
+ void tok_QString_char16_t() { tok_impl<QString, char16_t>(); }
+
+ void tok_QStringRef_QString_data() { tok_data(); }
+ void tok_QStringRef_QString() { tok_impl<QStringRef, QString>(); }
+ void tok_QStringRef_QStringRef_data() { tok_data(); }
+ void tok_QStringRef_QStringRef() { tok_impl<QStringRef, QStringRef>(); }
+ void tok_QStringRef_QStringView_data() { tok_data(); }
+ void tok_QStringRef_QStringView() { tok_impl<QStringRef, QStringView>(); }
+ void tok_QStringRef_QLatin1String_data() { tok_data(); }
+ void tok_QStringRef_QLatin1String() { tok_impl<QStringRef, QLatin1String>(); }
+ void tok_QStringRef_const_char16_t_star_data() { tok_data(); }
+ void tok_QStringRef_const_char16_t_star() { tok_impl<QStringRef, const char16_t*>(); }
+ void tok_QStringRef_stdu16string_data() { tok_data(); }
+ void tok_QStringRef_stdu16string() { tok_impl<QStringRef, std::u16string>(); }
+ void tok_QStringRef_QChar_data() { tok_data(false); }
+ void tok_QStringRef_QChar() { tok_impl<QStringRef, QChar>(); }
+ void tok_QStringRef_char16_t_data() { tok_data(false); }
+ void tok_QStringRef_char16_t() { tok_impl<QStringRef, char16_t>(); }
+
+ void tok_QStringView_QString_data() { tok_data(); }
+ void tok_QStringView_QString() { tok_impl<QStringView, QString>(); }
+ void tok_QStringView_QStringRef_data() { tok_data(); }
+ void tok_QStringView_QStringRef() { tok_impl<QStringView, QStringRef>(); }
+ void tok_QStringView_QStringView_data() { tok_data(); }
+ void tok_QStringView_QStringView() { tok_impl<QStringView, QStringView>(); }
+ void tok_QStringView_QLatin1String_data() { tok_data(); }
+ void tok_QStringView_QLatin1String() { tok_impl<QStringView, QLatin1String>(); }
+ void tok_QStringView_const_char16_t_star_data() { tok_data(); }
+ void tok_QStringView_const_char16_t_star() { tok_impl<QStringView, const char16_t*>(); }
+ void tok_QStringView_stdu16string_data() { tok_data(); }
+ void tok_QStringView_stdu16string() { tok_impl<QStringView, std::u16string>(); }
+ void tok_QStringView_QChar_data() { tok_data(false); }
+ void tok_QStringView_QChar() { tok_impl<QStringView, QChar>(); }
+ void tok_QStringView_char16_t_data() { tok_data(false); }
+ void tok_QStringView_char16_t() { tok_impl<QStringView, char16_t>(); }
+
+ void tok_QLatin1String_QString_data() { tok_data(); }
+ void tok_QLatin1String_QString() { tok_impl<QLatin1String, QString>(); }
+ void tok_QLatin1String_QStringRef_data() { tok_data(); }
+ void tok_QLatin1String_QStringRef() { tok_impl<QLatin1String, QStringRef>(); }
+ void tok_QLatin1String_QStringView_data() { tok_data(); }
+ void tok_QLatin1String_QStringView() { tok_impl<QLatin1String, QStringView>(); }
+ void tok_QLatin1String_QLatin1String_data() { tok_data(); }
+ void tok_QLatin1String_QLatin1String() { tok_impl<QLatin1String, QLatin1String>(); }
+ void tok_QLatin1String_const_char16_t_star_data() { tok_data(); }
+ void tok_QLatin1String_const_char16_t_star() { tok_impl<QLatin1String, const char16_t*>(); }
+ void tok_QLatin1String_stdu16string_data() { tok_data(); }
+ void tok_QLatin1String_stdu16string() { tok_impl<QLatin1String, std::u16string>(); }
+ void tok_QLatin1String_QChar_data() { tok_data(false); }
+ void tok_QLatin1String_QChar() { tok_impl<QLatin1String, QChar>(); }
+ void tok_QLatin1String_char16_t_data() { tok_data(false); }
+ void tok_QLatin1String_char16_t() { tok_impl<QLatin1String, char16_t>(); }
+
+ void tok_const_char16_t_star_QString_data() { tok_data(); }
+ void tok_const_char16_t_star_QString() { tok_impl<const char16_t*, QString>(); }
+ void tok_const_char16_t_star_QStringRef_data() { tok_data(); }
+ void tok_const_char16_t_star_QStringRef() { tok_impl<const char16_t*, QStringRef>(); }
+ void tok_const_char16_t_star_QStringView_data() { tok_data(); }
+ void tok_const_char16_t_star_QStringView() { tok_impl<const char16_t*, QStringView>(); }
+ void tok_const_char16_t_star_QLatin1String_data() { tok_data(); }
+ void tok_const_char16_t_star_QLatin1String() { tok_impl<const char16_t*, QLatin1String>(); }
+ void tok_const_char16_t_star_const_char16_t_star_data() { tok_data(); }
+ void tok_const_char16_t_star_const_char16_t_star() { tok_impl<const char16_t*, const char16_t*>(); }
+ void tok_const_char16_t_star_stdu16string_data() { tok_data(); }
+ void tok_const_char16_t_star_stdu16string() { tok_impl<const char16_t*, std::u16string>(); }
+ void tok_const_char16_t_star_QChar_data() { tok_data(false); }
+ void tok_const_char16_t_star_QChar() { tok_impl<const char16_t*, QChar>(); }
+ void tok_const_char16_t_star_char16_t_data() { tok_data(false); }
+ void tok_const_char16_t_star_char16_t() { tok_impl<const char16_t*, char16_t>(); }
+
+ void tok_stdu16string_QString_data() { tok_data(); }
+ void tok_stdu16string_QString() { tok_impl<std::u16string, QString>(); }
+ void tok_stdu16string_QStringRef_data() { tok_data(); }
+ void tok_stdu16string_QStringRef() { tok_impl<std::u16string, QStringRef>(); }
+ void tok_stdu16string_QStringView_data() { tok_data(); }
+ void tok_stdu16string_QStringView() { tok_impl<std::u16string, QStringView>(); }
+ void tok_stdu16string_QLatin1String_data() { tok_data(); }
+ void tok_stdu16string_QLatin1String() { tok_impl<std::u16string, QLatin1String>(); }
+ void tok_stdu16string_const_char16_t_star_data() { tok_data(); }
+ void tok_stdu16string_const_char16_t_star() { tok_impl<std::u16string, const char16_t*>(); }
+ void tok_stdu16string_stdu16string_data() { tok_data(); }
+ void tok_stdu16string_stdu16string() { tok_impl<std::u16string, std::u16string>(); }
+ void tok_stdu16string_QChar_data() { tok_data(false); }
+ void tok_stdu16string_QChar() { tok_impl<std::u16string, QChar>(); }
+ void tok_stdu16string_char16_t_data() { tok_data(false); }
+ void tok_stdu16string_char16_t() { tok_impl<std::u16string, char16_t>(); }
+
+private:
void mid_data();
template <typename String> void mid_impl();
@@ -901,6 +1012,8 @@ template <> QStringView make(const QStringRef &sf, QLatin1String, const QBy
template <> QLatin1String make(const QStringRef &, QLatin1String l1, const QByteArray &) { return l1; }
template <> QByteArray make(const QStringRef &, QLatin1String, const QByteArray &u8) { return u8; }
template <> const char * make(const QStringRef &, QLatin1String, const QByteArray &u8) { return u8.data(); }
+template <> const char16_t* make(const QStringRef &sf, QLatin1String, const QByteArray &) { return QStringView{sf}.utf16(); } // assumes `sf` doesn't represent a substring
+template <> std::u16string make(const QStringRef &sf, QLatin1String, const QByteArray &) { return sf.toString().toStdU16String(); }
template <typename> struct is_utf8_encoded : std::false_type {};
template <> struct is_utf8_encoded<const char*> : std::true_type {};
@@ -1278,6 +1391,10 @@ static QStringList skipped(const QStringList &sl)
return result;
}
+template <typename T> T deepCopied(T s) { return s; }
+template <> QString deepCopied(QString s) { return detached(s); }
+template <> QByteArray deepCopied(QByteArray s) { return detached(s); }
+
template <typename Haystack, typename Needle>
void tst_QStringApiSymmetry::split_impl() const
{
@@ -1304,6 +1421,59 @@ void tst_QStringApiSymmetry::split_impl() const
QCOMPARE(toQStringList(haystack.split(needle, Qt::SkipEmptyParts, Qt::CaseInsensitive)), skippedResultCIS);
}
+void tst_QStringApiSymmetry::tok_data(bool rhsHasVariableLength)
+{
+ split_data(rhsHasVariableLength);
+}
+
+template <typename Haystack, typename Needle>
+void tst_QStringApiSymmetry::tok_impl() const
+{
+ QFETCH(const QStringRef, haystackU16);
+ QFETCH(const QLatin1String, haystackL1);
+ QFETCH(const QStringRef, needleU16);
+ QFETCH(const QLatin1String, needleL1);
+ QFETCH(const QStringList, resultCS);
+ QFETCH(const QStringList, resultCIS);
+
+ const QStringList skippedResultCS = skipped(resultCS);
+ const QStringList skippedResultCIS = skipped(resultCIS);
+
+ const auto haystackU8 = haystackU16.toUtf8();
+ const auto needleU8 = needleU16.toUtf8();
+
+ const auto haystack = make<Haystack>(haystackU16, haystackL1, haystackU8);
+ const auto needle = make<Needle>(needleU16, needleL1, needleU8);
+
+ QCOMPARE(toQStringList(qTokenize(haystack, needle)), resultCS);
+ QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::KeepEmptyParts, Qt::CaseSensitive)), resultCS);
+ QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::CaseInsensitive, Qt::KeepEmptyParts)), resultCIS);
+ QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::SkipEmptyParts, Qt::CaseSensitive)), skippedResultCS);
+ QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::CaseInsensitive, Qt::SkipEmptyParts)), skippedResultCIS);
+
+ {
+ const auto tok = qTokenize(deepCopied(haystack), deepCopied(needle));
+ // here, the temporaries returned from deepCopied() have already been destroyed,
+ // yet `tok` should have kept a copy alive as needed:
+ QCOMPARE(toQStringList(tok), resultCS);
+ }
+
+#ifdef __cpp_deduction_guides
+ QCOMPARE(toQStringList(QStringTokenizer{haystack, needle}), resultCS);
+ QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::KeepEmptyParts, Qt::CaseSensitive}), resultCS);
+ QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::CaseInsensitive, Qt::KeepEmptyParts}), resultCIS);
+ QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::SkipEmptyParts, Qt::CaseSensitive}), skippedResultCS);
+ QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::CaseInsensitive, Qt::SkipEmptyParts}), skippedResultCIS);
+
+ {
+ const auto tok = QStringTokenizer{deepCopied(haystack), deepCopied(needle)};
+ // here, the temporaries returned from deepCopied() have already been destroyed,
+ // yet `tok` should have kept a copy alive as needed:
+ QCOMPARE(toQStringList(tok), resultCS);
+ }
+#endif // __cpp_deduction_guides
+}
+
void tst_QStringApiSymmetry::mid_data()
{
QTest::addColumn<QStringRef>("unicode");
diff --git a/tests/auto/corelib/text/qstringtokenizer/.gitignore b/tests/auto/corelib/text/qstringtokenizer/.gitignore
new file mode 100644
index 0000000000..5925520afd
--- /dev/null
+++ b/tests/auto/corelib/text/qstringtokenizer/.gitignore
@@ -0,0 +1 @@
+tst_qstringtokenizer
diff --git a/tests/auto/corelib/text/qstringtokenizer/CMakeLists.txt b/tests/auto/corelib/text/qstringtokenizer/CMakeLists.txt
new file mode 100644
index 0000000000..5928e5b99a
--- /dev/null
+++ b/tests/auto/corelib/text/qstringtokenizer/CMakeLists.txt
@@ -0,0 +1,13 @@
+# Generated from qstringtokenizer.pro.
+
+#####################################################################
+## tst_qstringtokenizer Test:
+#####################################################################
+
+qt_add_test(tst_qstringtokenizer
+ SOURCES
+ tst_qstringtokenizer.cpp
+)
+
+## Scopes:
+#####################################################################
diff --git a/tests/auto/corelib/text/qstringtokenizer/qstringtokenizer.pro b/tests/auto/corelib/text/qstringtokenizer/qstringtokenizer.pro
new file mode 100644
index 0000000000..5ae27c6570
--- /dev/null
+++ b/tests/auto/corelib/text/qstringtokenizer/qstringtokenizer.pro
@@ -0,0 +1,7 @@
+CONFIG += testcase
+TARGET = tst_qstringtokenizer
+QT = core testlib
+contains(QT_CONFIG, c++14):CONFIG *= c++14
+contains(QT_CONFIG, c++1z):CONFIG *= c++1z
+contains(QT_CONFIG, c++2a):CONFIG *= c++2a
+SOURCES += tst_qstringtokenizer.cpp
diff --git a/tests/auto/corelib/text/qstringtokenizer/tst_qstringtokenizer.cpp b/tests/auto/corelib/text/qstringtokenizer/tst_qstringtokenizer.cpp
new file mode 100644
index 0000000000..0f50c389b4
--- /dev/null
+++ b/tests/auto/corelib/text/qstringtokenizer/tst_qstringtokenizer.cpp
@@ -0,0 +1,151 @@
+/****************************************************************************
+**
+** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:GPL-EXCEPT$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3 as published by the Free Software
+** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include <QStringTokenizer>
+#include <QStringBuilder>
+
+#include <QTest>
+
+#include <string>
+
+Q_DECLARE_METATYPE(Qt::SplitBehavior)
+
+class tst_QStringTokenizer : public QObject
+{
+ Q_OBJECT
+
+private Q_SLOTS:
+ void constExpr() const;
+ void basics_data() const;
+ void basics() const;
+ void toContainer() const;
+};
+
+static QStringList skipped(const QStringList &sl)
+{
+ QStringList result;
+ result.reserve(sl.size());
+ for (const QString &s : sl) {
+ if (!s.isEmpty())
+ result.push_back(s);
+ }
+ return result;
+}
+
+QString toQString(QStringView str)
+{
+ return str.toString();
+}
+
+template <typename Container>
+QStringList toQStringList(const Container &c)
+{
+ QStringList r;
+ for (auto &&e : c)
+ r.push_back(toQString(e));
+ return r;
+}
+
+void tst_QStringTokenizer::constExpr() const
+{
+ // compile-time checks
+ {
+ constexpr auto tok = qTokenize(u"a,b,c", u",");
+ Q_UNUSED(tok);
+ }
+ {
+ constexpr auto tok = qTokenize(u"a,b,c", u',');
+ Q_UNUSED(tok);
+ }
+}
+
+void tst_QStringTokenizer::basics_data() const
+{
+ QTest::addColumn<Qt::SplitBehavior>("sb");
+ QTest::addColumn<Qt::CaseSensitivity>("cs");
+
+#define ROW(sb, cs) \
+ do { QTest::addRow("%s/%s", #sb, #cs) << Qt::SplitBehavior{Qt::sb} << Qt::cs; } while (0)
+
+ ROW(KeepEmptyParts, CaseSensitive);
+ ROW(KeepEmptyParts, CaseInsensitive);
+ ROW(SkipEmptyParts, CaseSensitive);
+ ROW(SkipEmptyParts, CaseInsensitive);
+
+#undef ROW
+}
+
+void tst_QStringTokenizer::basics() const
+{
+ QFETCH(const Qt::SplitBehavior, sb);
+ QFETCH(const Qt::CaseSensitivity, cs);
+
+ auto expected = QStringList{"", "a", "b", "c", "d", "e", ""};
+ if (sb & Qt::SkipEmptyParts)
+ expected = skipped(expected);
+ QCOMPARE(toQStringList(qTokenize(u",a,b,c,d,e,", u',', sb, cs)), expected);
+ QCOMPARE(toQStringList(qTokenize(u",a,b,c,d,e,", u',', cs, sb)), expected);
+
+ {
+ auto tok = qTokenize(expected.join(u'x'), u"X" % QString(), Qt::CaseInsensitive);
+ // the temporary QStrings returned from join() and the QStringBuilder expression
+ // are now destroyed, but 'tok' should keep both alive
+ QCOMPARE(toQStringList(tok), expected);
+ }
+
+ using namespace std::string_literals;
+
+ {
+ auto tok = qTokenize(expected.join(u'x'), u"X"s, Qt::CaseInsensitive);
+ QCOMPARE(toQStringList(tok), expected);
+ }
+
+ {
+ auto tok = qTokenize(expected.join(u'x'), QLatin1Char('x'), cs, sb);
+ QCOMPARE(toQStringList(tok), expected);
+ }
+}
+
+void tst_QStringTokenizer::toContainer() const
+{
+ // QStringView value_type:
+ {
+ auto tok = qTokenize(u"a,b,c", u',');
+ auto v = tok.toContainer();
+ QVERIFY((std::is_same_v<decltype(v), QVector<QStringView>>));
+ }
+ // QLatin1String value_type
+ {
+ auto tok = qTokenize(QLatin1String{"a,b,c"}, u',');
+ auto v = tok.toContainer();
+ QVERIFY((std::is_same_v<decltype(v), QVector<QLatin1String>>));
+ }
+}
+
+QTEST_APPLESS_MAIN(tst_QStringTokenizer)
+#include "tst_qstringtokenizer.moc"
diff --git a/tests/auto/corelib/text/text.pro b/tests/auto/corelib/text/text.pro
index cb7de443bd..a8ad7bd2f8 100644
--- a/tests/auto/corelib/text/text.pro
+++ b/tests/auto/corelib/text/text.pro
@@ -20,5 +20,6 @@ SUBDIRS = \
qstringlist \
qstringmatcher \
qstringref \
+ qstringtokenizer \
qstringview \
qtextboundaryfinder