summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qutf8stringview.qdoc
blob: 9c65799ca4d0a1bc60969ab3137f0cf1d654b47b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
/****************************************************************************
**
** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Marc Mutz <marc.mutz@kdab.com>
** Contact: https://www.qt.io/licensing/
**
** This file is part of the documentation of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:FDL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU Free Documentation License Usage
** Alternatively, this file may be used under the terms of the GNU Free
** Documentation License version 1.3 as published by the Free Software
** Foundation and appearing in the file included in the packaging of
** this file. Please review the following information to ensure
** the GNU Free Documentation License version 1.3 requirements
** will be met: https://www.gnu.org/licenses/fdl-1.3.html.
** $QT_END_LICENSE$
**
****************************************************************************/

/*!
    \class QUtf8StringView
    \inmodule QtCore
    \since 6.0
    \brief The QUtf8StringView class provides a unified view on UTF-8 strings
           with a read-only subset of the QString API.
    \reentrant
    \ingroup tools
    \ingroup string-processing

    A QUtf8StringView references a contiguous portion of a UTF-8
    string it does not own. It acts as an interface type to all kinds
    of UTF-8 string, without the need to construct a QString or
    QByteArray first.

    The UTF-8 string may be represented as an array (or an
    array-compatible data-structure such as std::basic_string, etc.)
    of \c char8_t, \c char, \c{signed char} or \c{unsigned char}.

    QUtf8StringView is designed as an interface type; its main
    use-case is as a function parameter type. When QUtf8StringViews
    are used as automatic variables or data members, care must be
    taken to ensure that the referenced string data (for example,
    owned by a std::u8string) outlives the QUtf8StringView on all code
    paths, lest the string view ends up referencing deleted data.

    When used as an interface type, QUtf8StringView allows a single
    function to accept a wide variety of UTF-8 string data
    sources. One function accepting QUtf8StringView thus replaces
    several function overloads (taking e.g. QByteArray), while at the
    same time enabling even more string data sources to be passed to
    the function, such as \c{u8"Hello World"}, a \c char8_t (C++20) or
    \c char (C++17) string literal. The \c char8_t incompatibility
    between C++17 and C++20 goes away when using QUtf8StringView.

    Like all views, QUtf8StringViews should be passed by value, not by
    reference-to-const:
    \snippet code/src_corelib_text_qutf8stringview.cpp 0

    If you want to give your users maximum freedom in what strings
    they can pass to your function, consider using QAnyStringView
    instead.

    QUtf8StringView can also be used as the return value of a
    function. If you call a function returning QUtf8StringView, take
    extra care to not keep the QUtf8StringView around longer than the
    function promises to keep the referenced string data alive.  If in
    doubt, obtain a strong reference to the data by calling toString()
    to convert the QUtf8StringView into a QString.

    QUtf8StringView is a \e{Literal Type}.

    \section2 Compatible Character Types

    QUtf8StringView accepts strings over a variety of character types:

    \list
    \li \c char (both signed and unsigned)
    \li \c char8_t (C++20 only)
    \endlist

    \section2 Sizes and Sub-Strings

    All sizes and positions in QUtf8StringView functions are in
    UTF-8 code points (that is, UTF-8 multibyte sequences count as
    two, three or four, depending on their length). QUtf8StringView
    does not an attempt to detect or prevent slicing right through
    UTF-8 multibyte sequences. This is similar to the situation with
    QStringView and surrogate pairs.

    \section2 C++20, char8_t, and QUtf8StringView

    In C++20, \c{u8""} string literals changed their type from
    \c{const char[]} to \c{const char8_t[]}. If Qt 6 could have depended
    on C++20, QUtf8StringView would store \c char8_t natively, and the
    following functions and aliases would use (pointers to) \c char8_t:

    \list
    \li storage_type, value_type, etc
    \li begin(), end(), data(), etc
    \li front(), back(), at(), operator[]()
    \endlist

    This is what QUtf8StringView is expected to look like in Qt 7, but for
    Qt 6, this was not possible. Instead of locking users into a C++17-era
    interface for the next decade, Qt provides two QUtf8StringView classes,
    in different (inline) namespaces. The first, in namespace \c{q_no_char8_t},
    has a value_type of \c{const char} and is universally available.
    The second, in namespace \c{q_has_char8_t}, has a value_type of
    \c{const char8_t} and is only available when compiling in C++20 mode.

    \c{q_no_char8_t} is an inline namespace regardless of C++ edition, to avoid
    accidental binary incompatibilities. To use the \c{char8_t} version, you
    need to name it explicitly with \c{q_has_char8_t::QUtf8StringView}.

    Internally, both are instantiations of the same template class,
    QBasicUtf8StringView. Please do not use the template class's name in your
    source code.

    \sa QAnyStringView, QUtf8StringView, QString
*/

/*!
    \typedef QUtf8StringView::storage_type

    Alias for \c{char}.
*/

/*!
    \typedef QUtf8StringView::value_type

    Alias for \c{const char}. Provided for compatibility with the STL.
*/

/*!
    \typedef QUtf8StringView::difference_type

    Alias for \c{std::ptrdiff_t}. Provided for compatibility with the STL.
*/

/*!
    \typedef QUtf8StringView::size_type

    Alias for qsizetype. Provided for compatibility with the STL.
*/

/*!
    \typedef QUtf8StringView::reference

    Alias for \c{value_type &}. Provided for compatibility with the STL.

    QUtf8StringView does not support mutable references, so this is the same
    as const_reference.
*/

/*!
    \typedef QUtf8StringView::const_reference

    Alias for \c{value_type &}. Provided for compatibility with the STL.
*/

/*!
    \typedef QUtf8StringView::pointer

    Alias for \c{value_type *}. Provided for compatibility with the STL.

    QUtf8StringView does not support mutable pointers, so this is the same
    as const_pointer.
*/

/*!
    \typedef QUtf8StringView::const_pointer

    Alias for \c{value_type *}. Provided for compatibility with the STL.
*/

/*!
    \typedef QUtf8StringView::iterator

    This typedef provides an STL-style const iterator for QUtf8StringView.

    QUtf8StringView does not support mutable iterators, so this is the same
    as const_iterator.

    \sa const_iterator, reverse_iterator
*/

/*!
    \typedef QUtf8StringView::const_iterator

    This typedef provides an STL-style const iterator for QUtf8StringView.

    \sa iterator, const_reverse_iterator
*/

/*!
    \typedef QUtf8StringView::reverse_iterator

    This typedef provides an STL-style const reverse iterator for QUtf8StringView.

    QUtf8StringView does not support mutable reverse iterators, so this is the
    same as const_reverse_iterator.

    \sa const_reverse_iterator, iterator
*/

/*!
    \typedef QUtf8StringView::const_reverse_iterator

    This typedef provides an STL-style const reverse iterator for QUtf8StringView.

    \sa reverse_iterator, const_iterator
*/

/*!
    \fn QUtf8StringView::QUtf8StringView()

    Constructs a null string view.

    \sa isNull()
*/

/*!
    \fn QUtf8StringView::QUtf8StringView(std::nullptr_t)

    Constructs a null string view.

    \sa isNull()
*/

/*!
    \fn template <typename Char> QUtf8StringView::QUtf8StringView(const Char *str, qsizetype len)

    Constructs a string view on \a str with length \a len.

    The range \c{[str,len)} must remain valid for the lifetime of this string view object.

    Passing \nullptr as \a str is safe if \a len is 0, too, and results in a null string view.

    The behavior is undefined if \a len is negative or, when positive, if \a str is \nullptr.

    This constructor only participates in overload resolution if \c Char is a compatible
    character type. The compatible character types are: \c char8_t, \c char, \c{signed char} and
    \c{unsigned char}.
*/

/*!
    \fn template <typename Char> QUtf8StringView::QUtf8StringView(const Char *first, const Char *last)

    Constructs a string view on \a first with length (\a last - \a first).

    The range \c{[first,last)} must remain valid for the lifetime of
    this string view object.

    Passing \c \nullptr as \a first is safe if \a last is \nullptr, too,
    and results in a null string view.

    The behavior is undefined if \a last precedes \a first, or \a first
    is \nullptr and \a last is not.

    This constructor only participates in overload resolution if \c Char is a compatible
    character type. The compatible character types are: \c char8_t, \c char, \c{signed char} and
    \c{unsigned char}.
*/

/*!
    \fn template <typename Char> QUtf8StringView::QUtf8StringView(const Char *str)

    Constructs a string view on \a str. The length is determined
    by scanning for the first \c{Char(0)}.

    \a str must remain valid for the lifetime of this string view object.

    Passing \nullptr as \a str is safe and results in a null string view.

    This constructor only participates in overload resolution if \a str
    is not an array and if \c Char is a compatible character type. The
    compatible character types are: \c char8_t, \c char, \c{signed char} and
    \c{unsigned char}.
*/

/*!
    \fn template <typename Char, size_t N> QUtf8StringView::QUtf8StringView(const Char (&string)[N])

    Constructs a string view on the character string literal \a string.
    The view covers the array until the first \c{Char(0)} is encountered,
    or \c N, whichever comes first.
    If you need the full array, use fromArray() instead.

    \a string must remain valid for the lifetime of this string view
    object.

    This constructor only participates in overload resolution if \a string
    is an actual array and if \c Char is a compatible character type. The
    compatible character types are: \c char8_t, \c char, \c{signed char} and
    \c{unsigned char}.

    \sa fromArray()
*/

/*!
    \fn template <typename Container, if_compatible_container<Container>> QUtf8StringView::QUtf8StringView(const Container &str)

    Constructs a string view on \a str. The length is taken from \c{str.size()}.

    \c{str.data()} must remain valid for the lifetime of this string view object.

    This constructor only participates in overload resolution if \c Container is an
    instantiation of \c std::basic_string with a compatible character type. The
    compatible character types are: \c char8_t, \c char, \c{signed char} and
    \c{unsigned char}.

    The string view will be empty if and only if \c{str.empty()}. It is unspecified
    whether this constructor can result in a null string view (\c{str.data()} would
    have to return \nullptr for this).

    \sa isNull(), isEmpty()
*/

/*!
    \fn template <typename Char, size_t Size, if_compatible_char<Char>> QUtf8StringView::fromArray(const Char (&string)[Size])

    Constructs a string view on the full character string literal \a string,
    including any trailing \c{Char(0)}. If you don't want the
    null-terminator included in the view then you can chop() it off
    when you are certain it is at the end. Alternatively you can use
    the constructor overload taking an array literal which will create
    a view up to, but not including, the first null-terminator in the data.

    \a string must remain valid for the lifetime of this string view
    object.

    This function will work with any array literal if \c Char is a
    compatible character type. The compatible character types
    are: \c char8_t, \c char, \c{signed char} and \c{unsigned char}.
*/

/*!
    \fn QString QUtf8StringView::toString() const

    Returns a deep copy of this string view's data as a QString.

    The return value will be a null QString if and only if this string view is null.
*/

/*!
    \fn QUtf8StringView::data() const

    Returns a const pointer to the first code point in the string.

    \note The character array represented by the return value is \e not null-terminated.

    \sa begin(), end(), utf8()
*/

/*!
    \fn QUtf8StringView::utf8() const

    Returns a const pointer to the first code point in the string.

    The result is returned as a \c{const char8_t*}, so this function is only available when
    compiling in C++20 mode.

    \note The character array represented by the return value is \e not null-terminated.

    \sa begin(), end(), data()
*/

/*!
    \fn QUtf8StringView::const_iterator QUtf8StringView::begin() const

    Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the first code point in
    the string.

    This function is provided for STL compatibility.

    \sa end(), cbegin(), rbegin(), data()
*/

/*!
    \fn QUtf8StringView::const_iterator QUtf8StringView::cbegin() const

    Same as begin().

    This function is provided for STL compatibility.

    \sa cend(), begin(), crbegin(), data()
*/

/*!
    \fn QUtf8StringView::const_iterator QUtf8StringView::end() const

    Returns a const \l{STL-style iterators}{STL-style iterator} pointing to the imaginary
    code point after the last code point in the list.

    This function is provided for STL compatibility.

    \sa begin(), cend(), rend()
*/

/*! \fn QUtf8StringView::const_iterator QUtf8StringView::cend() const

    Same as end().

    This function is provided for STL compatibility.

    \sa cbegin(), end(), crend()
*/

/*!
    \fn QUtf8StringView::const_reverse_iterator QUtf8StringView::rbegin() const

    Returns a const \l{STL-style iterators}{STL-style} reverse iterator pointing to the first
    code point in the string, in reverse order.

    This function is provided for STL compatibility.

    \sa rend(), crbegin(), begin()
*/

/*!
    \fn QUtf8StringView::const_reverse_iterator QUtf8StringView::crbegin() const

    Same as rbegin().

    This function is provided for STL compatibility.

    \sa crend(), rbegin(), cbegin()
*/

/*!
    \fn QUtf8StringView::const_reverse_iterator QUtf8StringView::rend() const

    Returns a \l{STL-style iterators}{STL-style} reverse iterator pointing to one past
    the last code point in the string, in reverse order.

    This function is provided for STL compatibility.

    \sa rbegin(), crend(), end()
*/

/*!
    \fn QUtf8StringView::const_reverse_iterator QUtf8StringView::crend() const

    Same as rend().

    This function is provided for STL compatibility.

    \sa crbegin(), rend(), cend()
*/

/*!
    \fn bool QUtf8StringView::empty() const

    Returns whether this string view is empty - that is, whether \c{size() == 0}.

    This function is provided for STL compatibility.

    \sa isEmpty(), isNull(), size(), length()
*/

/*!
    \fn bool QUtf8StringView::isEmpty() const

    Returns whether this string view is empty - that is, whether \c{size() == 0}.

    This function is provided for compatibility with other Qt containers.

    \sa empty(), isNull(), size(), length()
*/

/*!
    \fn bool QUtf8StringView::isNull() const

    Returns whether this string view is null - that is, whether \c{data() == nullptr}.

    This functions is provided for compatibility with other Qt containers.

    \sa empty(), isEmpty(), size(), length()
*/

/*!
    \fn qsizetype QUtf8StringView::size() const

    Returns the size of this string view, in UTF-8 code points (that is,
    multi-byte sequences count as more than one for the purposes of this function, the same
    as surrogate pairs in QString and QStringView).

    \sa empty(), isEmpty(), isNull(), length()
*/

/*!
    \fn int QUtf8StringView::length() const
    \obsolete
    Use size() and port callers to qsizetype.

    Same as size(), except returns the result as an \c int.

    This function is provided for compatibility with other Qt containers.

    \warning QUtf8StringView can represent strings with more than 2\sup{31} code points.
    Calling this function on a string view for which size() returns a value greater
    than \c{INT_MAX} constitutes undefined behavior.

    \sa empty(), isEmpty(), isNull(), size()
*/

/*!
    \fn QUtf8StringView::operator[](qsizetype n) const

    Returns the code point at position \a n in this string view.

    The behavior is undefined if \a n is negative or not less than size().

    \sa at(), front(), back()
*/

/*!
    \fn QUtf8StringView::at(qsizetype n) const

    Returns the code point at position \a n in this string view.

    The behavior is undefined if \a n is negative or not less than size().

    \sa operator[](), front(), back()
*/

/*!
    \fn QUtf8StringView::front() const

    Returns the first code point in the string. Same as first().

    This function is provided for STL compatibility.

    \warning Calling this function on an empty string view constitutes
    undefined behavior.

    \sa back()
*/

/*!
    \fn QUtf8StringView::back() const

    Returns the last code point in the string. Same as last().

    This function is provided for STL compatibility.

    \warning Calling this function on an empty string view constitutes
    undefined behavior.

    \sa front()
*/

/*!
    \fn QUtf8StringView::mid(qsizetype pos, qsizetype n) const

    Returns the substring of length \a n starting at position
    \a pos in this object.

    \obsolete Use sliced() instead in new code.

    Returns an empty string view if \a n exceeds the
    length of the string. If there are less than \a n code points
    available in the string starting at \a pos, or if
    \a n is negative (default), the function returns all code points that
    are available from \a pos.

    \sa first(), last(), sliced(), chopped(), chop(), truncate()
*/

/*!
    \fn QUtf8StringView::left(qsizetype n) const

    \obsolete Use first() instead in new code.

    Returns the substring of length \a n starting at position
    0 in this object.

    The entire string is returned if \a n is greater than or equal
    to size(), or less than zero.

    \sa first(), last(), sliced(), chopped(), chop(), truncate()
*/

/*!
    \fn QUtf8StringView::right(qsizetype n) const

    \obsolete Use last() instead in new code.

    Returns the substring of length \a n starting at position
    size() - \a n in this object.

    The entire string is returned if \a n is greater than or equal
    to size(), or less than zero.

    \sa first(), last(), sliced(), chopped(), chop(), truncate()
*/

/*!
    \fn QUtf8StringView::first(qsizetype n) const

    Returns a string view that contains the first \a n code points
    of this string.

    \note The behavior is undefined when \a n < 0 or \a n > size().

    \sa last(), sliced(), chopped(), chop(), truncate()
*/

/*!
    \fn QUtf8StringView::last(qsizetype n) const

    Returns a string view that contains the last \a n code points of this string.

    \note The behavior is undefined when \a n < 0 or \a n > size().

    \sa first(), sliced(), chopped(), chop(), truncate()
*/

/*!
    \fn QUtf8StringView::sliced(qsizetype pos, qsizetype n) const

    Returns a string view containing \a n code points of this string view,
    starting at position \a pos.

    \note The behavior is undefined when \a pos < 0, \a n < 0,
    or \a pos + \a n > size().

    \sa first(), last(), chopped(), chop(), truncate()
*/

/*!
    \fn QUtf8StringView::sliced(qsizetype pos) const

    Returns a string view starting at position \a pos in this object,
    and extending to its end.

    \note The behavior is undefined when \a pos < 0 or \a pos > size().

    \sa first(), last(), chopped(), chop(), truncate()
*/

/*!
    \fn QUtf8StringView::chopped(qsizetype n) const

    Returns the substring of length size() - \a n starting at the
    beginning of this object.

    Same as \c{first(size() - n)}.

    \note The behavior is undefined when \a n < 0 or \a n > size().

    \sa sliced(), first(), last(), chop(), truncate()
*/

/*!
    \fn QUtf8StringView::truncate(qsizetype n)

    Truncates this string view to \a n code points.

    Same as \c{*this = first(n)}.

    \note The behavior is undefined when \a n < 0 or \a n > size().

    \sa sliced(), first(), last(), chopped(), chop()
*/

/*!
    \fn QUtf8StringView::chop(qsizetype n)

    Truncates this string view by \a n code points.

    Same as \c{*this = first(size() - n)}.

    \note The behavior is undefined when \a n < 0 or \a n > size().

    \sa sliced(), first(), last(), chopped(), truncate()
*/

/*!
    \fn template <typename QStringLike> qToUtf8StringViewIgnoringNull(const QStringLike &s);
    \relates QUtf8StringView
    \internal

    Convert \a s to a QUtf8StringView ignoring \c{s.isNull()}.

    Returns a string-view that references \a{s}'s data, but is never null.

    This is a faster way to convert a QByteArray to a QUtf8StringView,
    if null QByteArrays can legitimately be treated as empty ones.

    \sa QByteArray::isNull(), QUtf8StringView
*/