diff options
author | Marc Mutz <marc.mutz@kdab.com> | 2017-11-22 15:48:02 +0100 |
---|---|---|
committer | Marc Mutz <marc.mutz@kdab.com> | 2020-06-03 19:13:54 +0200 |
commit | 6a3c6f939f29c83d53d2da0c3f53b814bdd02358 (patch) | |
tree | b0734ab85ce0839a80e440b42da4216ff7291378 | |
parent | 1b33ee95e5c6e5e27f732fd273920861fdae486a (diff) |
Long live QStringTokenizer!
This class is designed as C++20-style generator / lazy sequence, and
the new return value of QString{,View}::tokenize().
It thus is more similar to a hand-coded loop around indexOf() than
QString::split(), which returns a container (the filling of which
allocates memory).
The template arguments of QStringTokenizer intricately depend on the
arguments with which it is constructed, so QStringTokenizer cannot be used
directly without C++17 CTAD. To work around this issue, add a factory
function, qTokenize().
LATER:
- ~Optimize QLatin1String needles (avoid repeated L1->UTF16 conversion)~
(out of scope for QStringTokenizer, should be solved in the respective
indexOf())
- Keep per-instantiation state:
* Boyer-Moore table
[ChangeLog][QtCore][QStringTokenizer] New class.
[ChangeLog][QtCore][qTokenize] New function.
Change-Id: I7a7a02e9175cdd3887778f29f2f91933329be759
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
-rw-r--r-- | src/corelib/text/qstringtokenizer.cpp | 357 | ||||
-rw-r--r-- | src/corelib/text/qstringtokenizer.h | 438 | ||||
-rw-r--r-- | src/corelib/text/text.pri | 2 | ||||
-rw-r--r-- | tests/auto/corelib/text/CMakeLists.txt | 1 | ||||
-rw-r--r-- | tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp | 172 | ||||
-rw-r--r-- | tests/auto/corelib/text/qstringtokenizer/.gitignore | 1 | ||||
-rw-r--r-- | tests/auto/corelib/text/qstringtokenizer/CMakeLists.txt | 13 | ||||
-rw-r--r-- | tests/auto/corelib/text/qstringtokenizer/qstringtokenizer.pro | 7 | ||||
-rw-r--r-- | tests/auto/corelib/text/qstringtokenizer/tst_qstringtokenizer.cpp | 151 | ||||
-rw-r--r-- | tests/auto/corelib/text/text.pro | 1 |
10 files changed, 1142 insertions, 1 deletions
diff --git a/src/corelib/text/qstringtokenizer.cpp b/src/corelib/text/qstringtokenizer.cpp new file mode 100644 index 0000000000..043269a3ac --- /dev/null +++ b/src/corelib/text/qstringtokenizer.cpp @@ -0,0 +1,357 @@ +/**************************************************************************** +** +** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> +** Contact: http://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qstringtokenizer.h" +#include "qstringalgorithms.h" + +QT_BEGIN_NAMESPACE + +/*! + \class QStringTokenizer + \inmodule QtCore + \since 6.0 + \brief The QStringTokenizer class splits strings into tokens along given separators + \reentrant + \ingroup tools + \ingroup string-processing + + Splits a string into substrings wherever a given separator occurs, + and returns a (lazy) list of those strings. If the separator does + not match anywhere in the string, produces a single-element + containing this string. If the separator is empty, + QStringTokenizer produces an empty string, followed by each of the + string's characters, followed by another empty string. The two + enumerations Qt::SplitBehavior and Qt::CaseSensitivity further + control the output. + + QStringTokenizer drives QStringView::tokenize(), but, at least with a + recent compiler, you can use it directly, too: + + \code + for (auto it : QStringTokenizer{string, separator}) + use(*it); + \endcode + + \note You should never, ever, name the template arguments of a + QStringTokenizer explicitly. If you can use C++17 Class Template + Argument Deduction (CTAD), you may write + \c{QStringTokenizer{string, separator}} (without template + arguments). If you can't use C++17 CTAD, you must use the + QStringView::split() or QLatin1String::split() member functions + and store the return value only in \c{auto} variables: + + \code + auto result = string.split(sep); + \endcode + + This is because the template arguments of QStringTokenizer have a + very subtle dependency on the specific string and separator types + from with which they are constructed, and they don't usually + correspond to the actual types passed. + + \section Lazy Sequences + + QStringTokenizer acts as a so-called lazy sequence, that is, each + next element is only computed once you ask for it. Lazy sequences + have the advantage that they only require O(1) memory. They have + the disadvantage that, at least for QStringTokenizer, they only + allow forward, not random-access, iteration. + + The intended use-case is that you just plug it into a ranged for loop: + + \code + for (auto it : QStringTokenizer{string, separator}) + use(*it); + \endcode + + or a C++20 ranged algorithm: + + \code + std::ranges::for_each(QStringTokenizer{string, separator}, + [] (auto token) { use(token); }); + \endcode + + \section End Sentinel + + The QStringTokenizer iterators cannot be used with classical STL + algorithms, because those require iterator/iterator pairs, while + QStringTokenizer uses sentinels, that is, it uses a different + type, QStringTokenizer::sentinel, to mark the end of the + range. This improves performance, because the sentinel is an empty + type. Sentinels are supported from C++17 (for ranged for) + and C++20 (for algorithms using the new ranges library). + + \section Temporaries + + QStringTokenizer is very carefully designed to avoid dangling + references. If you construct a tokenizer from a temporary string + (an rvalue), that argument is stored internally, so the referenced + data isn't deleted before it is tokenized: + + \code + auto tok = QStringTokenizer{widget.text(), u','}; + // return value of `widget.text()` is destroyed, but content was moved into `tok` + for (auto e : tok) + use(e); + \endcode + + If you pass named objects (lvalues), then QStringTokenizer does + not store a copy. You are reponsible to keep the named object's + data around for longer than the tokenizer operates on it: + + \code + auto text = widget.text(); + auto tok = QStringTokenizer{text, u','}; + text.clear(); // destroy content of `text` + for (auto e : tok) // ERROR: `tok` references deleted data! + use(e); + \endcode + + \sa QStringView::split(), QLatin1Sting::split(), Qt::SplitBehavior, Qt::CaseSensitivity +*/ + +/*! + \typedef QStringTokenizer::value_type + + Alias for \c{const QStringView} or \c{const QLatin1String}, + depending on the tokenizer's \c Haystack template argument. +*/ + +/*! + \typedef QStringTokenizer::difference_type + + Alias for qsizetype. +*/ + +/*! + \typedef QStringTokenizer::size_type + + Alias for qsizetype. +*/ + +/*! + \typedef QStringTokenizer::reference + + Alias for \c{value_type &}. + + QStringTokenizer does not support mutable references, so this is + the same as const_reference. +*/ + +/*! + \typedef QStringTokenizer::const_reference + + Alias for \c{value_type &}. +*/ + +/*! + \typedef QStringTokenizer::pointer + + Alias for \c{value_type *}. + + QStringTokenizer does not support mutable iterators, so this is + the same as const_pointer. +*/ + +/*! + \typedef QStringTokenizer::const_pointer + + Alias for \c{value_type *}. +*/ + +/*! + \typedef QStringTokenizer::iterator + + This typedef provides an STL-style const iterator for + QStringTokenizer. + + QStringTokenizer does not support mutable iterators, so this is + the same as const_iterator. + + \sa const_iterator +*/ + +/*! + \typedef QStringTokenizer::const_iterator + + This typedef provides an STL-style const iterator for + QStringTokenizer. + + \sa iterator +*/ + +/*! + \typedef QStringTokenizer::sentinel + + This typedef provides an STL-style sentinel for + QStringTokenizer::iterator and QStringTokenizer::const_iterator. + + \sa const_iterator +*/ + +/*! + \fn QStringTokenizer(Haystack haystack, String needle, Qt::CaseSensitivity cs, Qt::SplitBehavior sb) + \fn QStringTokenizer(Haystack haystack, String needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs) + + Constructs a string tokenizer that splits the string \a haystack + into substrings wherever \a needle occurs, and allows iteration + over those strings as they are found. If \a needle does not match + anywhere in \a haystack, a single element containing \a haystack + is produced. + + \a cs specifies whether \a needle should be matched case + sensitively or case insensitively. + + If \a sb is QString::SkipEmptyParts, empty entries don't + appear in the result. By default, empty entries are included. + + \sa QStringView::split(), QLatin1String::split(), Qt::CaseSensitivity, Qt::SplitBehavior +*/ + +/*! + \fn QStringTokenizer::const_iterator QStringTokenizer::begin() const + + Returns a const \l{STL-style iterators}{STL-style iterator} + pointing to the first token in the list. + + \sa end(), cbegin() +*/ + +/*! + \fn QStringTokenizer::const_iterator QStringTokenizer::cbegin() const + + Same as begin(). + + \sa cend(), begin() +*/ + +/*! + \fn QStringTokenizer::sentinel QStringTokenizer::end() const + + Returns a const \l{STL-style iterators}{STL-style sentinel} + pointing to the imaginary token after the last token in the list. + + \sa begin(), cend() +*/ + +/*! + \fn QStringTokenizer::sentinel QStringTokenizer::cend() const + + Same as end(). + + \sa cbegin(), end() +*/ + +/*! + \fn QStringTokenizer::toContainer(Container &&c) const & + + Convenience method to convert the lazy sequence into a + (typically) random-access container. + + This function is only available if \c Container has a \c value_type + matching this tokenizer's value_type. + + If you pass in a named container (an lvalue), then that container + is filled, and a reference to it is returned. + + If you pass in a temporary container (an rvalue, incl. the default + argument), then that container is filled, and returned by value. + + \code + // assuming tok's value_type is QStringView, then... + auto tok = QStringTokenizer{~~~}; + // ... rac1 is a QVector: + auto rac1 = tok.toContainer(); + // ... rac2 is std::pmr::vector<QStringView>: + auto rac2 = tok.toContainer<std::pmr::vector<QStringView>>(); + auto rac3 = QVarLengthArray<QStringView, 12>{}; + // appends the token sequence produced by tok to rac3 + // and returns a reference to rac3 (which we ignore here): + tok.toContainer(rac3); + \endcode + + This gives you maximum flexibility in how you want the sequence to + be stored. +*/ + +/*! + \fn QStringTokenizer::toContainer(Container &&c) const && + \overload + + In addition to the constraints on the lvalue-this overload, this + rvalue-this overload is only available when this QStringTokenizer + does not store the haystack internally, as this could create a + container full of dangling references: + + \code + auto tokens = QStringTokenizer{widget.text(), u','}.toContainer(); + // ERROR: cannot call toContainer() on rvalue + // 'tokens' references the data of the copy of widget.text() + // stored inside the QStringTokenizer, which has since been deleted + \endcode + + To fix, store the QStringTokenizer in a temporary: + + \code + auto tokenizer = QStringTokenizer{widget.text90, u','}; + auto tokens = tokenizer.toContainer(); + // OK: the copy of widget.text() stored in 'tokenizer' keeps the data + // referenced by 'tokens' alive. + \endcode + + You can force this function into existence by passing a view instead: + + \code + func(QStringTokenizer{QStringView{widget.text()}, u','}.toContainer()); + // OK: compiler keeps widget.text() around until after func() has executed + \endcode +*/ + +/*! + \fn qTokenize(Haystack &&haystack, Needle &&needle, Flags...flags) + \relates QStringTokenizer + \since 6.0 + + Factory function for QStringTokenizer. You can use this function + if your compiler doesn't, yet, support C++17 Class Template + Argument Deduction (CTAD), but we recommend direct use of + QStringTokenizer with CTAD instead. +*/ + +QT_END_NAMESPACE diff --git a/src/corelib/text/qstringtokenizer.h b/src/corelib/text/qstringtokenizer.h new file mode 100644 index 0000000000..31bbbf01c8 --- /dev/null +++ b/src/corelib/text/qstringtokenizer.h @@ -0,0 +1,438 @@ +/**************************************************************************** +** +** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> +** Contact: http://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ +#ifndef QSTRINGTOKENIZER_H +#define QSTRINGTOKENIZER_H + +#include <QtCore/qnamespace.h> + +QT_BEGIN_NAMESPACE + +template <typename, typename> class QStringBuilder; +template <typename> class QVector; + +#if defined(Q_QDOC) || 1 || (defined(__cpp_range_based_for) && __cpp_range_based_for >= 201603) +# define Q_STRINGTOKENIZER_USE_SENTINEL +#endif + +class QStringTokenizerBaseBase +{ +protected: + ~QStringTokenizerBaseBase() = default; + constexpr QStringTokenizerBaseBase(Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept + : m_sb{sb}, m_cs{cs} {} + + struct tokenizer_state { + qsizetype start, end, extra; + friend constexpr bool operator==(tokenizer_state lhs, tokenizer_state rhs) noexcept + { return lhs.start == rhs.start && lhs.end == rhs.end && lhs.extra == rhs.extra; } + friend constexpr bool operator!=(tokenizer_state lhs, tokenizer_state rhs) noexcept + { return !operator==(lhs, rhs); } + }; + + Qt::SplitBehavior m_sb; + Qt::CaseSensitivity m_cs; +}; + +template <typename Haystack, typename Needle> +class QStringTokenizerBase : protected QStringTokenizerBaseBase +{ + struct next_result { + Haystack value; + bool ok; + tokenizer_state state; + }; + inline next_result next(tokenizer_state state) const noexcept; + inline next_result toFront() const noexcept { return next({}); } +public: + constexpr explicit QStringTokenizerBase(Haystack haystack, Needle needle, Qt::SplitBehavior sb, Qt::CaseSensitivity cs) noexcept + : QStringTokenizerBaseBase{sb, cs}, m_haystack{haystack}, m_needle{needle} {} + + class iterator; + friend class iterator; +#ifdef Q_STRINGTOKENIZER_USE_SENTINEL + class sentinel { + friend constexpr bool operator==(sentinel, sentinel) noexcept { return true; } + friend constexpr bool operator!=(sentinel, sentinel) noexcept { return false; } + }; +#else + using sentinel = iterator; +#endif + class iterator { + const QStringTokenizerBase *tokenizer; + next_result current; + friend class QStringTokenizerBase; + explicit iterator(const QStringTokenizerBase &t) noexcept + : tokenizer{&t}, current{t.toFront()} {} + public: + using difference_type = qsizetype; + using value_type = Haystack; + using pointer = const value_type*; + using reference = const value_type&; + using iterator_category = std::forward_iterator_tag; + + iterator() noexcept = default; + + // violates std::forward_iterator (returns a reference into the iterator) + Q_REQUIRED_RESULT constexpr const Haystack* operator->() const { return Q_ASSERT(current.ok), ¤t.value; } + Q_REQUIRED_RESULT constexpr const Haystack& operator*() const { return *operator->(); } + + iterator& operator++() { advance(); return *this; } + iterator operator++(int) { auto tmp = *this; advance(); return tmp; } + + friend constexpr bool operator==(const iterator &lhs, const iterator &rhs) noexcept + { return lhs.current.ok == rhs.current.ok && (!lhs.current.ok || (Q_ASSERT(lhs.tokenizer == rhs.tokenizer), lhs.current.state == rhs.current.state)); } + friend constexpr bool operator!=(const iterator &lhs, const iterator &rhs) noexcept + { return !operator==(lhs, rhs); } +#ifdef Q_STRINGTOKENIZER_USE_SENTINEL + friend constexpr bool operator==(const iterator &lhs, sentinel) noexcept + { return !lhs.current.ok; } + friend constexpr bool operator!=(const iterator &lhs, sentinel) noexcept + { return !operator==(lhs, sentinel{}); } + friend constexpr bool operator==(sentinel, const iterator &rhs) noexcept + { return !rhs.current.ok; } + friend constexpr bool operator!=(sentinel, const iterator &rhs) noexcept + { return !operator==(sentinel{}, rhs); } +#endif + private: + void advance() { + Q_ASSERT(current.ok); + current = tokenizer->next(current.state); + } + }; + using const_iterator = iterator; + + using size_type = std::size_t; + using difference_type = typename iterator::difference_type; + using value_type = typename iterator::value_type; + using pointer = typename iterator::pointer; + using const_pointer = pointer; + using reference = typename iterator::reference; + using const_reference = reference; + + Q_REQUIRED_RESULT iterator begin() const noexcept { return iterator{*this}; } + Q_REQUIRED_RESULT iterator cbegin() const noexcept { return begin(); } + template <bool = std::is_same<iterator, sentinel>::value> // ODR protection + Q_REQUIRED_RESULT constexpr sentinel end() const noexcept { return {}; } + template <bool = std::is_same<iterator, sentinel>::value> // ODR protection + Q_REQUIRED_RESULT constexpr sentinel cend() const noexcept { return {}; } + +private: + Haystack m_haystack; + Needle m_needle; +}; + +QT_BEGIN_INCLUDE_NAMESPACE +#include <QtCore/qstringview.h> +QT_END_INCLUDE_NAMESPACE + +namespace QtPrivate { +namespace Tok { + + constexpr qsizetype size(QChar) noexcept { return 1; } + template <typename String> + constexpr qsizetype size(const String &s) noexcept { return static_cast<qsizetype>(s.size()); } + + template <typename String> struct ViewForImpl {}; + template <> struct ViewForImpl<QStringView> { using type = QStringView; }; + template <> struct ViewForImpl<QLatin1String> { using type = QLatin1String; }; + template <> struct ViewForImpl<QChar> { using type = QChar; }; + template <> struct ViewForImpl<QString> : ViewForImpl<QStringView> {}; + template <> struct ViewForImpl<QStringRef> : ViewForImpl<QStringView> {}; + template <> struct ViewForImpl<QLatin1Char> : ViewForImpl<QChar> {}; + template <> struct ViewForImpl<char16_t> : ViewForImpl<QChar> {}; + template <> struct ViewForImpl<char16_t*> : ViewForImpl<QStringView> {}; + template <> struct ViewForImpl<const char16_t*> : ViewForImpl<QStringView> {}; + template <typename LHS, typename RHS> + struct ViewForImpl<QStringBuilder<LHS, RHS>> : ViewForImpl<typename QStringBuilder<LHS,RHS>::ConvertTo> {}; + template <typename Char, typename...Args> + struct ViewForImpl<std::basic_string<Char, Args...>> : ViewForImpl<Char*> {}; +#ifdef __cpp_lib_string_view + template <typename Char, typename...Args> + struct ViewForImpl<std::basic_string_view<Char, Args...>> : ViewForImpl<Char*> {}; +#endif + + // This metafunction maps a StringLike to a View (currently, QChar, + // QStringView, QLatin1String). This is what QStringTokenizerBase + // operates on. QStringTokenizer adds pinning to keep rvalues alive + // for the duration of the algorithm. + template <typename String> + using ViewFor = typename ViewForImpl<typename std::decay<String>::type>::type; + + // Pinning: + // rvalues of owning string types need to be moved into QStringTokenizer + // to keep them alive for the lifetime of the tokenizer. For lvalues, we + // assume the user takes care of that. + + // default: don't pin anything (characters are pinned implicitly) + template <typename String> + struct PinForImpl { using type = ViewFor<String>; }; + + // rvalue QString -> QString + template <> + struct PinForImpl<QString> { using type = QString; }; + + // rvalue std::basic_string -> basic_string + template <typename Char, typename...Args> + struct PinForImpl<std::basic_string<Char, Args...>> + { using type = std::basic_string<Char, Args...>; }; + + // rvalue QStringBuilder -> pin as the nested ConvertTo type + template <typename LHS, typename RHS> + struct PinForImpl<QStringBuilder<LHS, RHS>> + : PinForImpl<typename QStringBuilder<LHS, RHS>::ConvertTo> {}; + + template <typename StringLike> + using PinFor = typename PinForImpl<typename std::remove_cv<StringLike>::type>::type; + + template <typename T> struct is_owning_string_type : std::false_type {}; + template <> struct is_owning_string_type<QString> : std::true_type {}; + template <typename...Args> struct is_owning_string_type<std::basic_string<Args...>> : std::true_type {}; + + // unpinned + template <typename T, bool pinned = is_owning_string_type<T>::value> + struct Pinning + { + // this is the storage for non-pinned types - no storage + constexpr Pinning(const T&) noexcept {} + // Since we don't store something, the view() method needs to be + // given something it can return. + constexpr T view(T t) const noexcept { return t; } + }; + + // pinned + template <typename T> + struct Pinning<T, true> + { + T m_string; + // specialisation for owning string types (QString, std::u16string): + // stores the string: + constexpr Pinning(T &&s) noexcept : m_string{std::move(s)} {} + // ... and thus view() uses that instead of the argument passed in: + constexpr QStringView view(const T&) const noexcept { return m_string; } + }; + + // NeedlePinning and HaystackPinning are there to distinguish them as + // base classes of QStringTokenizer. We use inheritance to reap the + // empty base class optimization. + template <typename T> + struct NeedlePinning : Pinning<T> + { + using Pinning<T>::Pinning; + template <typename Arg> + constexpr auto needleView(Arg &&a) noexcept + -> decltype(this->view(std::forward<Arg>(a))) + { return this->view(std::forward<Arg>(a)); } + }; + + template <typename T> + struct HaystackPinning : Pinning<T> + { + using Pinning<T>::Pinning; + template <typename Arg> + constexpr auto haystackView(Arg &&a) noexcept + -> decltype(this->view(std::forward<Arg>(a))) + { return this->view(std::forward<Arg>(a)); } + }; + + // The Base of a QStringTokenizer is QStringTokenizerBase for the views + // corresponding to the Haystack and Needle template arguments + // + // ie. QStringTokenizer<QString, QString> + // : QStringTokenizerBase<QStringView, QStringView> (+ pinning) + template <typename Haystack, typename Needle> + using TokenizerBase = QStringTokenizerBase<ViewFor<Haystack>, ViewFor<Needle>>; +} // namespace Tok +} // namespace QtPrivate + +template <typename Haystack, typename Needle> +class QStringTokenizer + : private QtPrivate::Tok::HaystackPinning<Haystack>, + private QtPrivate::Tok::NeedlePinning<Needle>, + public QtPrivate::Tok::TokenizerBase<Haystack, Needle> +{ + using HPin = QtPrivate::Tok::HaystackPinning<Haystack>; + using NPin = QtPrivate::Tok::NeedlePinning<Needle>; + using Base = QtPrivate::Tok::TokenizerBase<Haystack, Needle>; + template <typename Container, typename HPin> + struct if_haystack_not_pinned_impl : std::enable_if<std::is_empty<HPin>::value, bool> {}; + template <typename Container> + using if_haystack_not_pinned = typename if_haystack_not_pinned_impl<Container, HPin>::type; + template <typename Container, typename Iterator = decltype(std::begin(std::declval<Container>()))> + using if_compatible_container = typename std::enable_if< + std::is_same< + typename Base::value_type, + typename std::iterator_traits<Iterator>::value_type + >::value, + bool + >::type; +public: + using value_type = typename Base::value_type; + + constexpr explicit QStringTokenizer(Haystack haystack, Needle needle, + Qt::CaseSensitivity cs, + Qt::SplitBehavior sb = Qt::KeepEmptyParts) + noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value) + // here, we present the haystack to Pinning<>, for optional storing. + // If it did store, haystack is moved-from and mustn't be touched + // any longer, which is why view() for these Pinning<>s ignores the + // argument. + : HPin{std::forward<Haystack>(haystack)}, + NPin{std::forward<Needle>(needle)}, + // If Pinning<> didn't store, we pass the haystack (ditto needle) + // to view() again, so it can be copied from there. + Base{this->haystackView(haystack), + this->needleView(needle), sb, cs} + {} + constexpr explicit QStringTokenizer(Haystack haystack, Needle needle, + Qt::SplitBehavior sb = Qt::KeepEmptyParts, + Qt::CaseSensitivity cs = Qt::CaseSensitive) + noexcept(std::is_nothrow_copy_constructible<QStringTokenizer>::value) + : HPin{std::forward<Haystack>(haystack)}, + NPin{std::forward<Needle>(needle)}, + Base{this->haystackView(haystack), + this->needleView(needle), sb, cs} + {} + + template <typename Container = QVector<value_type>, + if_compatible_container<Container> = true> + Container toContainer(Container &&c = {}) const & + { + for (auto e : *this) + c.emplace_back(e); + return c; + } + + template <typename Container = QVector<value_type>, + if_compatible_container<Container> = true, + if_haystack_not_pinned<Container> = true> + Container toContainer(Container &&c = {}) const && + { + for (auto e : *this) + c.emplace_back(e); + return c; + } +}; + +namespace QtPrivate { +namespace Tok { +// This meta function just calculated the template arguments for the +// QStringTokenizer (not -Base), based on the actual arguments passed +// to qTokenize() (or the ctor, with CTAD). It basically detects rvalue +// QString and std::basic_string and otherwise decays the arguments to +// the respective view type. +// +// #define works around a C++ restriction: [temp.deduct.guide]/3 seems +// to ask for the simple-template-id following the `->` of a deduction +// guide to be identical to the class name for which we guide deduction. +// In particular, Clang rejects a template alias there, while GCC accepts +// it. +#define Q_TOK_RESULT \ + QStringTokenizer< \ + QtPrivate::Tok::PinFor<Haystack>, \ + QtPrivate::Tok::PinFor<Needle> \ + > \ + /*end*/ +template <typename Haystack, typename Needle> +using TokenizerResult = Q_TOK_RESULT; +template <typename Haystack, typename Needle> +using is_nothrow_constructible_from = std::is_nothrow_copy_constructible<TokenizerResult<Haystack, Needle>>; +} +} + +#ifdef __cpp_deduction_guides +// these tell the compiler how to determine the QStringTokenizer +// template arguments based on the constructor arguments (CTAD): +template <typename Haystack, typename Needle> +QStringTokenizer(Haystack&&, Needle&&) + -> Q_TOK_RESULT; +template <typename Haystack, typename Needle> +QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior) + -> Q_TOK_RESULT; +template <typename Haystack, typename Needle> +QStringTokenizer(Haystack&&, Needle&&, Qt::SplitBehavior, Qt::CaseSensitivity) + -> Q_TOK_RESULT; +template <typename Haystack, typename Needle> +QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity) + -> Q_TOK_RESULT; +template <typename Haystack, typename Needle> +QStringTokenizer(Haystack&&, Needle&&, Qt::CaseSensitivity, Qt::SplitBehavior) + -> Q_TOK_RESULT; +#endif + +#undef Q_TOK_RESULT + +template <typename Haystack, typename Needle, typename...Flags> +Q_REQUIRED_RESULT constexpr auto +qTokenize(Haystack &&h, Needle &&n, Flags...flags) + noexcept(QtPrivate::Tok::is_nothrow_constructible_from<Haystack, Needle>::value) + -> decltype(QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h), + std::forward<Needle>(n), flags...}) +{ return QtPrivate::Tok::TokenizerResult<Haystack, Needle>{std::forward<Haystack>(h), + std::forward<Needle>(n), + flags...}; } + +template <typename Haystack, typename Needle> +auto QStringTokenizerBase<Haystack, Needle>::next(tokenizer_state state) const noexcept -> next_result +{ + while (true) { + if (state.end < 0) { + // already at end: + return {{}, false, state}; + } + state.end = m_haystack.indexOf(m_needle, state.start + state.extra, m_cs); + Haystack result; + if (state.end >= 0) { + // token separator found => return intermediate element: + result = m_haystack.mid(state.start, state.end - state.start); + const auto ns = QtPrivate::Tok::size(m_needle); + state.start = state.end + ns; + state.extra = (ns == 0 ? 1 : 0); + } else { + // token separator not found => return final element: + result = m_haystack.mid(state.start); + } + if ((m_sb & Qt::SkipEmptyParts) && result.isEmpty()) + continue; + return {result, true, state}; + } +} + +QT_END_NAMESPACE + +#endif /* QSTRINGTOKENIZER_H */ diff --git a/src/corelib/text/text.pri b/src/corelib/text/text.pri index 1275c014a8..1d83bc151b 100644 --- a/src/corelib/text/text.pri +++ b/src/corelib/text/text.pri @@ -26,6 +26,7 @@ HEADERS += \ text/qstringliteral.h \ text/qstringmatcher.h \ text/qstringview.h \ + text/qstringtokenizer.h \ text/qtextboundaryfinder.h \ text/qunicodetables_p.h \ text/qunicodetools_p.h @@ -44,6 +45,7 @@ SOURCES += \ text/qstringconverter.cpp \ text/qstringlist.cpp \ text/qstringview.cpp \ + text/qstringtokenizer.cpp \ text/qtextboundaryfinder.cpp \ text/qunicodetools.cpp \ text/qvsnprintf.cpp diff --git a/tests/auto/corelib/text/CMakeLists.txt b/tests/auto/corelib/text/CMakeLists.txt index 19cd71a987..1aada48d2a 100644 --- a/tests/auto/corelib/text/CMakeLists.txt +++ b/tests/auto/corelib/text/CMakeLists.txt @@ -19,5 +19,6 @@ add_subdirectory(qstringiterator) add_subdirectory(qstringlist) add_subdirectory(qstringmatcher) add_subdirectory(qstringref) +add_subdirectory(qstringtokenizer) add_subdirectory(qstringview) add_subdirectory(qtextboundaryfinder) diff --git a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp index a55f2f13eb..37cc7db841 100644 --- a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp +++ b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp @@ -1,6 +1,6 @@ /**************************************************************************** ** -** Copyright (C) 2019 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> +** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> ** Copyright (C) 2019 Mail.ru Group. ** Contact: https://www.qt.io/licensing/ ** @@ -33,6 +33,7 @@ #include <QString> #include <QStringView> +#include <QStringTokenizer> #include <QChar> #include <QScopedArrayPointer> #include <QStringRef> @@ -512,6 +513,116 @@ private Q_SLOTS: void split_QStringRef_char16_t() { split_impl<QStringRef, char16_t>(); } private: + void tok_data(bool rhsHasVariableLength = true); + template <typename Haystack, typename Needle> void tok_impl() const; + +private Q_SLOTS: + // let Splittable = {QString, QStringRef, QStringView, QLatin1String, const char16_t*, std::u16string} + // let Separators = Splittable ∪ {QChar, char16_t} + // test Splittable × Separators: + void tok_QString_QString_data() { tok_data(); } + void tok_QString_QString() { tok_impl<QString, QString>(); } + void tok_QString_QStringRef_data() { tok_data(); } + void tok_QString_QStringRef() { tok_impl<QString, QStringRef>(); } + void tok_QString_QStringView_data() { tok_data(); } + void tok_QString_QStringView() { tok_impl<QString, QStringView>(); } + void tok_QString_QLatin1String_data() { tok_data(); } + void tok_QString_QLatin1String() { tok_impl<QString, QLatin1String>(); } + void tok_QString_const_char16_t_star_data() { tok_data(); } + void tok_QString_const_char16_t_star() { tok_impl<QString, const char16_t*>(); } + void tok_QString_stdu16string_data() { tok_data(); } + void tok_QString_stdu16string() { tok_impl<QString, std::u16string>(); } + void tok_QString_QChar_data() { tok_data(false); } + void tok_QString_QChar() { tok_impl<QString, QChar>(); } + void tok_QString_char16_t_data() { tok_data(false); } + void tok_QString_char16_t() { tok_impl<QString, char16_t>(); } + + void tok_QStringRef_QString_data() { tok_data(); } + void tok_QStringRef_QString() { tok_impl<QStringRef, QString>(); } + void tok_QStringRef_QStringRef_data() { tok_data(); } + void tok_QStringRef_QStringRef() { tok_impl<QStringRef, QStringRef>(); } + void tok_QStringRef_QStringView_data() { tok_data(); } + void tok_QStringRef_QStringView() { tok_impl<QStringRef, QStringView>(); } + void tok_QStringRef_QLatin1String_data() { tok_data(); } + void tok_QStringRef_QLatin1String() { tok_impl<QStringRef, QLatin1String>(); } + void tok_QStringRef_const_char16_t_star_data() { tok_data(); } + void tok_QStringRef_const_char16_t_star() { tok_impl<QStringRef, const char16_t*>(); } + void tok_QStringRef_stdu16string_data() { tok_data(); } + void tok_QStringRef_stdu16string() { tok_impl<QStringRef, std::u16string>(); } + void tok_QStringRef_QChar_data() { tok_data(false); } + void tok_QStringRef_QChar() { tok_impl<QStringRef, QChar>(); } + void tok_QStringRef_char16_t_data() { tok_data(false); } + void tok_QStringRef_char16_t() { tok_impl<QStringRef, char16_t>(); } + + void tok_QStringView_QString_data() { tok_data(); } + void tok_QStringView_QString() { tok_impl<QStringView, QString>(); } + void tok_QStringView_QStringRef_data() { tok_data(); } + void tok_QStringView_QStringRef() { tok_impl<QStringView, QStringRef>(); } + void tok_QStringView_QStringView_data() { tok_data(); } + void tok_QStringView_QStringView() { tok_impl<QStringView, QStringView>(); } + void tok_QStringView_QLatin1String_data() { tok_data(); } + void tok_QStringView_QLatin1String() { tok_impl<QStringView, QLatin1String>(); } + void tok_QStringView_const_char16_t_star_data() { tok_data(); } + void tok_QStringView_const_char16_t_star() { tok_impl<QStringView, const char16_t*>(); } + void tok_QStringView_stdu16string_data() { tok_data(); } + void tok_QStringView_stdu16string() { tok_impl<QStringView, std::u16string>(); } + void tok_QStringView_QChar_data() { tok_data(false); } + void tok_QStringView_QChar() { tok_impl<QStringView, QChar>(); } + void tok_QStringView_char16_t_data() { tok_data(false); } + void tok_QStringView_char16_t() { tok_impl<QStringView, char16_t>(); } + + void tok_QLatin1String_QString_data() { tok_data(); } + void tok_QLatin1String_QString() { tok_impl<QLatin1String, QString>(); } + void tok_QLatin1String_QStringRef_data() { tok_data(); } + void tok_QLatin1String_QStringRef() { tok_impl<QLatin1String, QStringRef>(); } + void tok_QLatin1String_QStringView_data() { tok_data(); } + void tok_QLatin1String_QStringView() { tok_impl<QLatin1String, QStringView>(); } + void tok_QLatin1String_QLatin1String_data() { tok_data(); } + void tok_QLatin1String_QLatin1String() { tok_impl<QLatin1String, QLatin1String>(); } + void tok_QLatin1String_const_char16_t_star_data() { tok_data(); } + void tok_QLatin1String_const_char16_t_star() { tok_impl<QLatin1String, const char16_t*>(); } + void tok_QLatin1String_stdu16string_data() { tok_data(); } + void tok_QLatin1String_stdu16string() { tok_impl<QLatin1String, std::u16string>(); } + void tok_QLatin1String_QChar_data() { tok_data(false); } + void tok_QLatin1String_QChar() { tok_impl<QLatin1String, QChar>(); } + void tok_QLatin1String_char16_t_data() { tok_data(false); } + void tok_QLatin1String_char16_t() { tok_impl<QLatin1String, char16_t>(); } + + void tok_const_char16_t_star_QString_data() { tok_data(); } + void tok_const_char16_t_star_QString() { tok_impl<const char16_t*, QString>(); } + void tok_const_char16_t_star_QStringRef_data() { tok_data(); } + void tok_const_char16_t_star_QStringRef() { tok_impl<const char16_t*, QStringRef>(); } + void tok_const_char16_t_star_QStringView_data() { tok_data(); } + void tok_const_char16_t_star_QStringView() { tok_impl<const char16_t*, QStringView>(); } + void tok_const_char16_t_star_QLatin1String_data() { tok_data(); } + void tok_const_char16_t_star_QLatin1String() { tok_impl<const char16_t*, QLatin1String>(); } + void tok_const_char16_t_star_const_char16_t_star_data() { tok_data(); } + void tok_const_char16_t_star_const_char16_t_star() { tok_impl<const char16_t*, const char16_t*>(); } + void tok_const_char16_t_star_stdu16string_data() { tok_data(); } + void tok_const_char16_t_star_stdu16string() { tok_impl<const char16_t*, std::u16string>(); } + void tok_const_char16_t_star_QChar_data() { tok_data(false); } + void tok_const_char16_t_star_QChar() { tok_impl<const char16_t*, QChar>(); } + void tok_const_char16_t_star_char16_t_data() { tok_data(false); } + void tok_const_char16_t_star_char16_t() { tok_impl<const char16_t*, char16_t>(); } + + void tok_stdu16string_QString_data() { tok_data(); } + void tok_stdu16string_QString() { tok_impl<std::u16string, QString>(); } + void tok_stdu16string_QStringRef_data() { tok_data(); } + void tok_stdu16string_QStringRef() { tok_impl<std::u16string, QStringRef>(); } + void tok_stdu16string_QStringView_data() { tok_data(); } + void tok_stdu16string_QStringView() { tok_impl<std::u16string, QStringView>(); } + void tok_stdu16string_QLatin1String_data() { tok_data(); } + void tok_stdu16string_QLatin1String() { tok_impl<std::u16string, QLatin1String>(); } + void tok_stdu16string_const_char16_t_star_data() { tok_data(); } + void tok_stdu16string_const_char16_t_star() { tok_impl<std::u16string, const char16_t*>(); } + void tok_stdu16string_stdu16string_data() { tok_data(); } + void tok_stdu16string_stdu16string() { tok_impl<std::u16string, std::u16string>(); } + void tok_stdu16string_QChar_data() { tok_data(false); } + void tok_stdu16string_QChar() { tok_impl<std::u16string, QChar>(); } + void tok_stdu16string_char16_t_data() { tok_data(false); } + void tok_stdu16string_char16_t() { tok_impl<std::u16string, char16_t>(); } + +private: void mid_data(); template <typename String> void mid_impl(); @@ -901,6 +1012,8 @@ template <> QStringView make(const QStringRef &sf, QLatin1String, const QBy template <> QLatin1String make(const QStringRef &, QLatin1String l1, const QByteArray &) { return l1; } template <> QByteArray make(const QStringRef &, QLatin1String, const QByteArray &u8) { return u8; } template <> const char * make(const QStringRef &, QLatin1String, const QByteArray &u8) { return u8.data(); } +template <> const char16_t* make(const QStringRef &sf, QLatin1String, const QByteArray &) { return QStringView{sf}.utf16(); } // assumes `sf` doesn't represent a substring +template <> std::u16string make(const QStringRef &sf, QLatin1String, const QByteArray &) { return sf.toString().toStdU16String(); } template <typename> struct is_utf8_encoded : std::false_type {}; template <> struct is_utf8_encoded<const char*> : std::true_type {}; @@ -1278,6 +1391,10 @@ static QStringList skipped(const QStringList &sl) return result; } +template <typename T> T deepCopied(T s) { return s; } +template <> QString deepCopied(QString s) { return detached(s); } +template <> QByteArray deepCopied(QByteArray s) { return detached(s); } + template <typename Haystack, typename Needle> void tst_QStringApiSymmetry::split_impl() const { @@ -1304,6 +1421,59 @@ void tst_QStringApiSymmetry::split_impl() const QCOMPARE(toQStringList(haystack.split(needle, Qt::SkipEmptyParts, Qt::CaseInsensitive)), skippedResultCIS); } +void tst_QStringApiSymmetry::tok_data(bool rhsHasVariableLength) +{ + split_data(rhsHasVariableLength); +} + +template <typename Haystack, typename Needle> +void tst_QStringApiSymmetry::tok_impl() const +{ + QFETCH(const QStringRef, haystackU16); + QFETCH(const QLatin1String, haystackL1); + QFETCH(const QStringRef, needleU16); + QFETCH(const QLatin1String, needleL1); + QFETCH(const QStringList, resultCS); + QFETCH(const QStringList, resultCIS); + + const QStringList skippedResultCS = skipped(resultCS); + const QStringList skippedResultCIS = skipped(resultCIS); + + const auto haystackU8 = haystackU16.toUtf8(); + const auto needleU8 = needleU16.toUtf8(); + + const auto haystack = make<Haystack>(haystackU16, haystackL1, haystackU8); + const auto needle = make<Needle>(needleU16, needleL1, needleU8); + + QCOMPARE(toQStringList(qTokenize(haystack, needle)), resultCS); + QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::KeepEmptyParts, Qt::CaseSensitive)), resultCS); + QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::CaseInsensitive, Qt::KeepEmptyParts)), resultCIS); + QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::SkipEmptyParts, Qt::CaseSensitive)), skippedResultCS); + QCOMPARE(toQStringList(qTokenize(haystack, needle, Qt::CaseInsensitive, Qt::SkipEmptyParts)), skippedResultCIS); + + { + const auto tok = qTokenize(deepCopied(haystack), deepCopied(needle)); + // here, the temporaries returned from deepCopied() have already been destroyed, + // yet `tok` should have kept a copy alive as needed: + QCOMPARE(toQStringList(tok), resultCS); + } + +#ifdef __cpp_deduction_guides + QCOMPARE(toQStringList(QStringTokenizer{haystack, needle}), resultCS); + QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::KeepEmptyParts, Qt::CaseSensitive}), resultCS); + QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::CaseInsensitive, Qt::KeepEmptyParts}), resultCIS); + QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::SkipEmptyParts, Qt::CaseSensitive}), skippedResultCS); + QCOMPARE(toQStringList(QStringTokenizer{haystack, needle, Qt::CaseInsensitive, Qt::SkipEmptyParts}), skippedResultCIS); + + { + const auto tok = QStringTokenizer{deepCopied(haystack), deepCopied(needle)}; + // here, the temporaries returned from deepCopied() have already been destroyed, + // yet `tok` should have kept a copy alive as needed: + QCOMPARE(toQStringList(tok), resultCS); + } +#endif // __cpp_deduction_guides +} + void tst_QStringApiSymmetry::mid_data() { QTest::addColumn<QStringRef>("unicode"); diff --git a/tests/auto/corelib/text/qstringtokenizer/.gitignore b/tests/auto/corelib/text/qstringtokenizer/.gitignore new file mode 100644 index 0000000000..5925520afd --- /dev/null +++ b/tests/auto/corelib/text/qstringtokenizer/.gitignore @@ -0,0 +1 @@ +tst_qstringtokenizer diff --git a/tests/auto/corelib/text/qstringtokenizer/CMakeLists.txt b/tests/auto/corelib/text/qstringtokenizer/CMakeLists.txt new file mode 100644 index 0000000000..5928e5b99a --- /dev/null +++ b/tests/auto/corelib/text/qstringtokenizer/CMakeLists.txt @@ -0,0 +1,13 @@ +# Generated from qstringtokenizer.pro. + +##################################################################### +## tst_qstringtokenizer Test: +##################################################################### + +qt_add_test(tst_qstringtokenizer + SOURCES + tst_qstringtokenizer.cpp +) + +## Scopes: +##################################################################### diff --git a/tests/auto/corelib/text/qstringtokenizer/qstringtokenizer.pro b/tests/auto/corelib/text/qstringtokenizer/qstringtokenizer.pro new file mode 100644 index 0000000000..5ae27c6570 --- /dev/null +++ b/tests/auto/corelib/text/qstringtokenizer/qstringtokenizer.pro @@ -0,0 +1,7 @@ +CONFIG += testcase +TARGET = tst_qstringtokenizer +QT = core testlib +contains(QT_CONFIG, c++14):CONFIG *= c++14 +contains(QT_CONFIG, c++1z):CONFIG *= c++1z +contains(QT_CONFIG, c++2a):CONFIG *= c++2a +SOURCES += tst_qstringtokenizer.cpp diff --git a/tests/auto/corelib/text/qstringtokenizer/tst_qstringtokenizer.cpp b/tests/auto/corelib/text/qstringtokenizer/tst_qstringtokenizer.cpp new file mode 100644 index 0000000000..0f50c389b4 --- /dev/null +++ b/tests/auto/corelib/text/qstringtokenizer/tst_qstringtokenizer.cpp @@ -0,0 +1,151 @@ +/**************************************************************************** +** +** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com> +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:GPL-EXCEPT$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3 as published by the Free Software +** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include <QStringTokenizer> +#include <QStringBuilder> + +#include <QTest> + +#include <string> + +Q_DECLARE_METATYPE(Qt::SplitBehavior) + +class tst_QStringTokenizer : public QObject +{ + Q_OBJECT + +private Q_SLOTS: + void constExpr() const; + void basics_data() const; + void basics() const; + void toContainer() const; +}; + +static QStringList skipped(const QStringList &sl) +{ + QStringList result; + result.reserve(sl.size()); + for (const QString &s : sl) { + if (!s.isEmpty()) + result.push_back(s); + } + return result; +} + +QString toQString(QStringView str) +{ + return str.toString(); +} + +template <typename Container> +QStringList toQStringList(const Container &c) +{ + QStringList r; + for (auto &&e : c) + r.push_back(toQString(e)); + return r; +} + +void tst_QStringTokenizer::constExpr() const +{ + // compile-time checks + { + constexpr auto tok = qTokenize(u"a,b,c", u","); + Q_UNUSED(tok); + } + { + constexpr auto tok = qTokenize(u"a,b,c", u','); + Q_UNUSED(tok); + } +} + +void tst_QStringTokenizer::basics_data() const +{ + QTest::addColumn<Qt::SplitBehavior>("sb"); + QTest::addColumn<Qt::CaseSensitivity>("cs"); + +#define ROW(sb, cs) \ + do { QTest::addRow("%s/%s", #sb, #cs) << Qt::SplitBehavior{Qt::sb} << Qt::cs; } while (0) + + ROW(KeepEmptyParts, CaseSensitive); + ROW(KeepEmptyParts, CaseInsensitive); + ROW(SkipEmptyParts, CaseSensitive); + ROW(SkipEmptyParts, CaseInsensitive); + +#undef ROW +} + +void tst_QStringTokenizer::basics() const +{ + QFETCH(const Qt::SplitBehavior, sb); + QFETCH(const Qt::CaseSensitivity, cs); + + auto expected = QStringList{"", "a", "b", "c", "d", "e", ""}; + if (sb & Qt::SkipEmptyParts) + expected = skipped(expected); + QCOMPARE(toQStringList(qTokenize(u",a,b,c,d,e,", u',', sb, cs)), expected); + QCOMPARE(toQStringList(qTokenize(u",a,b,c,d,e,", u',', cs, sb)), expected); + + { + auto tok = qTokenize(expected.join(u'x'), u"X" % QString(), Qt::CaseInsensitive); + // the temporary QStrings returned from join() and the QStringBuilder expression + // are now destroyed, but 'tok' should keep both alive + QCOMPARE(toQStringList(tok), expected); + } + + using namespace std::string_literals; + + { + auto tok = qTokenize(expected.join(u'x'), u"X"s, Qt::CaseInsensitive); + QCOMPARE(toQStringList(tok), expected); + } + + { + auto tok = qTokenize(expected.join(u'x'), QLatin1Char('x'), cs, sb); + QCOMPARE(toQStringList(tok), expected); + } +} + +void tst_QStringTokenizer::toContainer() const +{ + // QStringView value_type: + { + auto tok = qTokenize(u"a,b,c", u','); + auto v = tok.toContainer(); + QVERIFY((std::is_same_v<decltype(v), QVector<QStringView>>)); + } + // QLatin1String value_type + { + auto tok = qTokenize(QLatin1String{"a,b,c"}, u','); + auto v = tok.toContainer(); + QVERIFY((std::is_same_v<decltype(v), QVector<QLatin1String>>)); + } +} + +QTEST_APPLESS_MAIN(tst_QStringTokenizer) +#include "tst_qstringtokenizer.moc" diff --git a/tests/auto/corelib/text/text.pro b/tests/auto/corelib/text/text.pro index cb7de443bd..a8ad7bd2f8 100644 --- a/tests/auto/corelib/text/text.pro +++ b/tests/auto/corelib/text/text.pro @@ -20,5 +20,6 @@ SUBDIRS = \ qstringlist \ qstringmatcher \ qstringref \ + qstringtokenizer \ qstringview \ qtextboundaryfinder |