summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qanystringview.cpp
blob: 4129257c02dcead52d271a6ebd8f6cd996c530ae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
// Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only

#include "qanystringview.h"
#include "qdebug.h"
#include "qttypetraits.h"

QT_BEGIN_NAMESPACE

/*!
    \class QAnyStringView
    \inmodule QtCore
    \since 6.0
    \brief The QAnyStringView class provides a unified view on Latin-1, UTF-8,
           or UTF-16 strings with a read-only subset of the QString API.
    \reentrant
    \ingroup tools
    \ingroup string-processing

    \compares strong
    \compareswith strong char16_t QChar {const char16_t *} {const char *} \
                  QByteArray QByteArrayView QString QStringView QUtf8StringView \
                  QLatin1StringView
    \endcompareswith

    A QAnyStringView references a contiguous portion of a string it does
    not own. It acts as an interface type to all kinds of strings,
    without the need to construct a QString first.

    Unlike QStringView and QUtf8StringView, QAnyStringView can hold
    strings of any of the following encodings: UTF-8, UTF-16, and
    Latin-1. The latter is supported because Latin-1, unlike UTF-8,
    can be efficiently compared to UTF-16 data: a length mismatch
    already means the strings cannot be equal. This is not true for
    UTF-8/UTF-16 comparisons, because UTF-8 is a variable-length
    encoding.

    The string may be represented as an array (or an array-compatible
    data-structure such as QString, std::basic_string, etc.) of \c
    char, \c char8_t, QChar, \c ushort, \c char16_t or (on platforms,
    such as Windows, where it is a 16-bit type) \c wchar_t.

    QAnyStringView is designed as an interface type; its main use-case
    is as a function parameter type. When QAnyStringViews are used as
    automatic variables or data members, care must be taken to ensure
    that the referenced string data (for example, owned by a QString)
    outlives the QAnyStringView on all code paths, lest the string
    view ends up referencing deleted data.

    When used as an interface type, QAnyStringView allows a single
    function to accept a wide variety of string data sources. One
    function accepting QAnyStringView thus replaces five function
    overloads (taking QString, \c{(const QChar*, qsizetype)},
    QUtf8StringView, QLatin1StringView (but see above), and QChar), while
    at the same time enabling even more string data sources to be
    passed to the function, such as \c{u8"Hello World"}, a \c char8_t
    string literal.

    Like elsewhere in Qt, QAnyStringView assumes \c char data is encoded
    in UTF-8, unless it is presented as a QLatin1StringView.

    Since Qt 6.4, however, UTF-8 string literals that are pure US-ASCII are
    automatically stored as Latin-1. This is a compile-time check with no
    runtime overhead. The feature requires compiling in C++20, or with a recent
    GCC.

    QAnyStringViews should be passed by value, not by reference-to-const:
    \snippet code/src_corelib_text_qanystringview.cpp 0

    QAnyStringView can also be used as the return value of a function,
    but this is not recommended. QUtf8StringView or QStringView are
    better suited as function return values.  If you call a function
    returning QAnyStringView, take extra care to not keep the
    QAnyStringView around longer than the function promises to keep
    the referenced string data alive. If in doubt, obtain a strong
    reference to the data by calling toString() to convert the
    QAnyStringView into a QString.

    QAnyStringView is a \e{Literal Type}.

    \section2 Compatible Character Types

    QAnyStringView accepts strings over a variety of character types:

    \list
    \li \c char (both signed and unsigned)
    \li \c char8_t (C++20 only)
    \li \c char16_t
    \li \c wchar_t (where it's a 16-bit type, e.g. Windows)
    \li \c ushort
    \li \c QChar
    \endlist

    The 8-bit character types are interpreted as UTF-8 data (except when
    presented as a QLatin1StringView) while the 16-bit character types are
    interpreted as UTF-16 data in host byte order (the same as QString).

    \section2 Sizes and Sub-Strings

    All sizes and positions in QAnyStringView functions are in the
    encoding's code units (that is, UTF-16 surrogate pairs count as
    two for the purposes of these functions, the same as in QString,
    and UTF-8 multibyte sequences count as two, three or four,
    depending on their length).

    \sa QUtf8StringView, QStringView
*/

/*!
    \typedef QAnyStringView::difference_type

    Alias for \c{std::ptrdiff_t}. Provided for compatibility with the STL.
*/

/*!
    \typedef QAnyStringView::size_type

    Alias for qsizetype. Provided for compatibility with the STL.
*/

/*!
    \fn QAnyStringView::QAnyStringView()

    Constructs a null string view.

    \sa isNull()
*/

/*!
    \fn QAnyStringView::QAnyStringView(std::nullptr_t)

    Constructs a null string view.

    \sa isNull()
*/

/*!
    \fn template <typename Char, QAnyStringView::if_compatible_char<Char> = true> QAnyStringView::QAnyStringView(const Char *str, qsizetype len)

    Constructs a string view on \a str with length \a len.

    The range \c{[str,len)} must remain valid for the lifetime of this string view object.

    Passing \nullptr as \a str is safe if \a len is 0, too, and results in a null string view.

    The behavior is undefined if \a len is negative or, when positive, if \a str is \nullptr.

    This constructor only participates in overload resolution if \c Char is a compatible
    character type.

    \sa isNull(), {Compatible Character Types}
*/

/*!
    \fn template <typename Char, QAnyStringView::if_compatible_char<Char> = true> QAnyStringView::QAnyStringView(const Char *first, const Char *last)

    Constructs a string view on \a first with length (\a last - \a first).

    The range \c{[first,last)} must remain valid for the lifetime of
    this string view object.

    Passing \nullptr as \a first is safe if \a last is \nullptr, too,
    and results in a null string view.

    The behavior is undefined if \a last precedes \a first, or \a first
    is \nullptr and \a last is not.

    This constructor only participates in overload resolution if \c Char
    is a compatible character type.

    \sa isNull(), {Compatible Character Types}
*/

/*!
    \fn template <typename Char> QAnyStringView::QAnyStringView(const Char *str)

    Constructs a string view on \a str. The length is determined
    by scanning for the first \c{Char(0)}.

    \a str must remain valid for the lifetime of this string view object.

    Passing \nullptr as \a str is safe and results in a null string view.

    This constructor only participates in overload resolution if \a
    str is not an array and if \c Char is a compatible character
    type.

    \sa isNull(), {Compatible Character Types}
*/

/*!
    \fn template <typename Char, size_t N> QAnyStringView::QAnyStringView(const Char (&string)[N])

    Constructs a string view on the character string literal \a string.
    The view covers the array until the first \c{Char(0)} is encountered,
    or \c N, whichever comes first.
    If you need the full array, use fromArray() instead.

    \a string must remain valid for the lifetime of this string view
    object.

    This constructor only participates in overload resolution if \a
    string is an actual array and \c Char is a compatible character
    type.

    \sa {Compatible Character Types}
*/

/*!
    \fn QAnyStringView::QAnyStringView(const QString &str)

    Constructs a string view on \a str.

    \c{str.data()} must remain valid for the lifetime of this string view object.

    The string view will be null if and only if \c{str.isNull()}.
*/

/*!
    \fn QAnyStringView::QAnyStringView(const QByteArray &str)

    Constructs a string view on \a str. The data in \a str is interpreted as UTF-8.

    \c{str.data()} must remain valid for the lifetime of this string view object.

    The string view will be null if and only if \c{str.isNull()}.
*/

/*!
    \fn template <typename Container, QAnyStringView::if_compatible_container<Container>> QAnyStringView::QAnyStringView(const Container &str)

    Constructs a string view on \a str. The length is taken from \c{std::size(str)}.

    \c{std::data(str)} must remain valid for the lifetime of this string view object.

    This constructor only participates in overload resolution if \c Container is a
    container with a compatible character type as \c{value_type}.

    The string view will be empty if and only if \c{std::size(str) == 0}. It is unspecified
    whether this constructor can result in a null string view (\c{std::data(str)} would
    have to return \nullptr for this).

    \sa isNull(), isEmpty()
*/

/*!
    \fn template <typename Char, size_t Size> static QAnyStringView fromArray(const Char (&string)[Size]) noexcept

    Constructs a string view on the full character string literal \a string,
    including any trailing \c{Char(0)}. If you don't want the
    null-terminator included in the view then you can use the constructor
    overload taking a pointer and a size:

    \snippet code/src_corelib_text_qanystringview.cpp 2

    Alternatively you can use the constructor overload taking an
    array literal which will create a view up to, but not including,
    the first null-terminator in the data.

    \a string must remain valid for the lifetime of this string view
    object.

    This function will work with any array literal if \c Char is a
    compatible character type.
*/

/*!
    \fn QString QAnyStringView::toString() const

    Returns a deep copy of this string view's data as a QString.

    The return value will be a null QString if and only if this string view is null.
*/

/*!
    \fn const void *QAnyStringView::data() const

    Returns a const pointer to the first character in the string view.

    \note The character array represented by the return value is \e not null-terminated.

    \sa size_bytes()
*/

/*!
    \fn bool QAnyStringView::empty() const

    Returns whether this string view is empty - that is, whether \c{size() == 0}.

    This function is provided for STL compatibility.

    \sa isEmpty(), isNull(), size()
*/

/*!
    \fn bool QAnyStringView::isEmpty() const

    Returns whether this string view is empty - that is, whether \c{size() == 0}.

    This function is provided for compatibility with other Qt containers.

    \sa empty(), isNull(), size()
*/

/*!
    \fn bool QAnyStringView::isNull() const

    Returns whether this string view is null - that is, whether \c{data() == nullptr}.

    This functions is provided for compatibility with other Qt containers.

    \sa empty(), isEmpty(), size()
*/

/*!
    \fn qsizetype QAnyStringView::size() const

    Returns the size of this string view, in the encoding's code points.

    \sa empty(), isEmpty(), isNull(), size_bytes(), {Sizes and Sub-Strings}
*/

/*!
    \fn QAnyStringView::size_bytes() const

    Returns the size of this string view, but in bytes, not code-points.

    You can use this function together with data() for hashing or serialization.

    This function is provided for STL compatibility.

    \sa size(), data()
*/

/*!
    \fn QAnyStringView::length() const

    Same as size().

    This function is provided for compatibility with other Qt containers.

    \sa size()
*/

/*!
    \fn QChar QAnyStringView::front() const

    Returns the first character in the string view.

    This function is provided for STL compatibility.

    \warning Calling this function on an empty string view constitutes
    undefined behavior.

    \sa back(), {Sizes and Sub-Strings}
*/

/*!
    \fn QChar QAnyStringView::back() const

    Returns the last character in the string view.

    This function is provided for STL compatibility.

    \warning Calling this function on an empty string view constitutes
    undefined behavior.

    \sa front(), {Sizes and Sub-Strings}
*/

/*!
    \fn QAnyStringView::mid(qsizetype pos, qsizetype n) const
    \since 6.5

    Returns the substring of length \a n starting at position
    \a pos in this object.

    \deprecated Use sliced() instead in new code.

    Returns an empty string view if \a n exceeds the
    length of the string view. If there are less than \a n code points
    available in the string view starting at \a pos, or if
    \a n is negative (default), the function returns all code points that
    are available from \a pos.

    \sa first(), last(), sliced(), chopped(), chop(), truncate(), {Sizes and Sub-Strings}
*/

/*!
    \fn QAnyStringView::left(qsizetype n) const
    \since 6.5

    \deprecated Use first() instead in new code.

    Returns the substring of length \a n starting at position
    0 in this object.

    The entire string view is returned if \a n is greater than or equal
    to size(), or less than zero.

    \sa first(), last(), sliced(), chopped(), chop(), truncate(), {Sizes and Sub-Strings}
*/

/*!
    \fn QAnyStringView::right(qsizetype n) const
    \since 6.5

    \deprecated Use last() instead in new code.

    Returns the substring of length \a n starting at position
    size() - \a n in this object.

    The entire string view is returned if \a n is greater than or equal
    to size(), or less than zero.

    \sa first(), last(), sliced(), chopped(), chop(), truncate(), {Sizes and Sub-Strings}
*/

/*!
    \fn QAnyStringView::first(qsizetype n) const
    \since 6.5

    Returns a string view that contains the first \a n code points
    of this string view.

    \note The behavior is undefined when \a n < 0 or \a n > size().

    \sa last(), sliced(), chopped(), chop(), truncate(), {Sizes and Sub-Strings}
*/

/*!
    \fn QAnyStringView::last(qsizetype n) const
    \since 6.5

    Returns a string view that contains the last \a n code points of this string view.

    \note The behavior is undefined when \a n < 0 or \a n > size().

    \sa first(), sliced(), chopped(), chop(), truncate(), {Sizes and Sub-Strings}
*/

/*!
    \fn QAnyStringView::sliced(qsizetype pos, qsizetype n) const
    \since 6.5

    Returns a string view containing \a n code points of this string view,
    starting at position \a pos.

//! [UB-sliced-index-length]
    \note The behavior is undefined when \a pos < 0, \a n < 0,
    or \a pos + \a n > size().
//! [UB-sliced-index-length]

    \sa first(), last(), chopped(), chop(), truncate(), {Sizes and Sub-Strings}
*/

/*!
    \fn QAnyStringView::sliced(qsizetype pos) const
    \since 6.5

    Returns a string view starting at position \a pos in this object,
    and extending to its end.

//! [UB-sliced-index-only]
    \note The behavior is undefined when \a pos < 0 or \a pos > size().
//! [UB-sliced-index-only]

    \sa first(), last(), chopped(), chop(), truncate(), {Sizes and Sub-Strings}
*/

/*!
    \fn QAnyStringView::chopped(qsizetype n) const
    \since 6.5

    Returns the substring of length size() - \a n starting at the
    beginning of this object.

    Same as \c{first(size() - n)}.

    \note The behavior is undefined when \a n < 0 or \a n > size().

    \sa sliced(), first(), last(), chop(), truncate(), {Sizes and Sub-Strings}
*/

/*!
    \fn QAnyStringView::truncate(qsizetype n)
    \since 6.5

    Truncates this string view to \a n code points.

    Same as \c{*this = first(n)}.

    \note The behavior is undefined when \a n < 0 or \a n > size().

    \sa sliced(), first(), last(), chopped(), chop(), {Sizes and Sub-Strings}
*/

/*!
    \fn QAnyStringView::chop(qsizetype n)
    \since 6.5

    Truncates this string view by \a n code points.

    Same as \c{*this = first(size() - n)}.

    \note The behavior is undefined when \a n < 0 or \a n > size().

    \sa sliced(), first(), last(), chopped(), truncate(), {Sizes and Sub-Strings}
*/

/*! \fn template <typename Visitor> decltype(auto) QAnyStringView::visit(Visitor &&v) const

    Calls \a v with either a QUtf8StringView, QLatin1String, or QStringView, depending
    on the encoding of the string data this string-view references.

    This is how most functions taking QAnyStringView fork off into per-encoding
    functions:

    \code
    void processImpl(QLatin1String s) { ~~~ }
    void processImpl(QUtf8StringView s) { ~~~ }
    void processImpl(QStringView s) { ~~~ }

    void process(QAnyStringView s)
    {
        s.visit([](auto s) { processImpl(s); });
    }
    \endcode

    Here, we're reusing the same name, \c s, for both the QAnyStringView
    object, as well as the lambda's parameter. This is idiomatic code and helps
    track the identity of the objects through visit() calls, for example in more
    complex situations such as

    \code
    bool equal(QAnyStringView lhs, QAnyStringView rhs)
    {
        // assuming operator==(QAnyStringView, QAnyStringView) didn't, yet, exist:
        return lhs.visit([rhs](auto lhs) {
            rhs.visit([lhs](auto rhs) {
                return lhs == rhs;
            });
        });
    }
    \endcode

    visit() requires that all lambda instantiations have the same return type.
    If they differ, you get a compile error, even if there is a common type. To
    fix, you can use explicit return types on the lambda, or cast in the return
    statements:

    \code
    // wrong:
    QAnyStringView firstHalf(QAnyStringView input)
    {
        return input.visit([](auto input) {   // ERROR: lambdas return different types
            return input.sliced(0, input.size() / 2);
        });
    }
    // correct:
    QAnyStringView firstHalf(QAnyStringView input)
    {
        return input.visit([](auto input) -> QAnyStringView { // OK, explicit return type
            return input.sliced(0, input.size() / 2);
        });
    }
    // also correct:
    QAnyStringView firstHalf(QAnyStringView input)
    {
        return input.visit([](auto input) {
            return QAnyStringView(input.sliced(0, input.size() / 2)); // OK, cast to common type
        });
    }
    \endcode
*/

/*!
    \fn QAnyStringView::compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs)

    Compares the string view \a lhs with the string view \a rhs and returns a
    negative integer if \a lhs is less than \a rhs, a positive integer if it is
    greater than \a rhs, and zero if they are equal.

    If \a cs is Qt::CaseSensitive (the default), the comparison is case sensitive;
    otherwise the comparison is case-insensitive.

    \sa operator==(), operator<(), operator>()
*/

/*!
    \fn bool QAnyStringView::operator==(const QAnyStringView &lhs, const QAnyStringView & rhs)
    \fn bool QAnyStringView::operator!=(const QAnyStringView & lhs, const QAnyStringView & rhs)
    \fn bool QAnyStringView::operator<=(const QAnyStringView & lhs, const QAnyStringView & rhs)
    \fn bool QAnyStringView::operator>=(const QAnyStringView & lhs, const QAnyStringView & rhs)
    \fn bool QAnyStringView::operator<(const QAnyStringView & lhs, const QAnyStringView & rhs)
    \fn bool QAnyStringView::operator>(const QAnyStringView & lhs, const QAnyStringView & rhs)

    Operators that compare \a lhs to \a rhs.

    \sa compare()
*/

/*!
    \fn template <typename QStringLike> qToAnyStringViewIgnoringNull(const QStringLike &s);
    \since 6.0
    \internal

    Convert \a s to a QAnyStringView ignoring \c{s.isNull()}.

    Returns a string view that references \a{s}'s data, but is never null.

    This is a faster way to convert a QString or QByteArray to a QAnyStringView,
    if null QStrings or QByteArrays can legitimately be treated as empty ones.

    \sa QString::isNull(), QAnyStringView
*/

/*!
    \fn QAnyStringView::operator<<(QDebug d, QAnyStringView s)
    \since 6.7
    \relates QDebug

    Outputs \a s to debug stream \a d.

    If \c{d.quotedString()} is \c true, indicates which encoding the string is
    in. If you just want the string data, use visit() like this:

    \code
    s.visit([&d) (auto s) { d << s; });
    \endcode

    \sa QAnyStringView::visit()
*/
QDebug operator<<(QDebug d, QAnyStringView s)
{
    struct S { const char *prefix, *suffix; };
    const auto affixes = s.visit([](auto s) {
            using View = decltype(s);
            if constexpr (std::is_same_v<View, QLatin1StringView>) {
                return S{"", "_L1"};
            } else if constexpr (std::is_same_v<View, QUtf8StringView>) {
                return S{"u8", ""};
            } else if constexpr (std::is_same_v<View, QStringView>) {
                return S{"u", ""};
            } else {
                static_assert(QtPrivate::type_dependent_false<View>());
            }
        });
    const QDebugStateSaver saver(d);
    d.nospace();
    if (d.quoteStrings())
        d << affixes.prefix;
    s.visit([&d](auto s) { d << s; });
    if (d.quoteStrings())
        d << affixes.suffix;
    return d;
}


QT_END_NAMESPACE