diff options
Diffstat (limited to 'src/corelib/text/qstringconverter_p.h')
-rw-r--r-- | src/corelib/text/qstringconverter_p.h | 203 |
1 files changed, 111 insertions, 92 deletions
diff --git a/src/corelib/text/qstringconverter_p.h b/src/corelib/text/qstringconverter_p.h index 242f3f0303..e68ffb2bb0 100644 --- a/src/corelib/text/qstringconverter_p.h +++ b/src/corelib/text/qstringconverter_p.h @@ -1,42 +1,6 @@ -/**************************************************************************** -** -** Copyright (C) 2020 The Qt Company Ltd. -** Copyright (C) 2020 Intel Corporation. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2020 The Qt Company Ltd. +// Copyright (C) 2020 Intel Corporation. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only #ifndef QSTRINGCONVERTER_P_H #define QSTRINGCONVERTER_P_H @@ -55,13 +19,42 @@ #include <QtCore/qstring.h> #include <QtCore/qendian.h> #include <QtCore/qstringconverter.h> +#include <QtCore/private/qglobal_p.h> QT_BEGIN_NAMESPACE #ifndef __cpp_char8_t -enum char8_t : uchar {}; +enum qchar8_t : uchar {}; +#else +using qchar8_t = char8_t; #endif +struct QLatin1 +{ + // Defined in qstring.cpp + static char16_t *convertToUnicode(char16_t *dst, QLatin1StringView in) noexcept; + + static QChar *convertToUnicode(QChar *buffer, QLatin1StringView in) noexcept + { + char16_t *dst = reinterpret_cast<char16_t *>(buffer); + dst = convertToUnicode(dst, in); + return reinterpret_cast<QChar *>(dst); + } + + static QChar *convertToUnicode(QChar *dst, QByteArrayView in, + [[maybe_unused]] QStringConverterBase::State *state) noexcept + { + Q_ASSERT(state); + + return convertToUnicode(dst, QLatin1StringView(in.data(), in.size())); + } + + static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state) noexcept; + + // Defined in qstring.cpp + static char *convertFromUnicode(char *out, QStringView in) noexcept; +}; + struct QUtf8BaseTraits { static const bool isTrusted = false; @@ -70,81 +63,60 @@ struct QUtf8BaseTraits static const int Error = -1; static const int EndOfString = -2; - static bool isValidCharacter(uint u) - { return int(u) >= 0; } - static void appendByte(uchar *&ptr, uchar b) { *ptr++ = b; } - static void appendByte(char8_t *&ptr, char8_t b) + static void appendByte(qchar8_t *&ptr, qchar8_t b) { *ptr++ = b; } + static uchar peekByte(const char *ptr, qsizetype n = 0) + { return ptr[n]; } + static uchar peekByte(const uchar *ptr, qsizetype n = 0) { return ptr[n]; } - static uchar peekByte(const char8_t *ptr, int n = 0) + static uchar peekByte(const qchar8_t *ptr, qsizetype n = 0) { return ptr[n]; } + static qptrdiff availableBytes(const char *ptr, const char *end) + { return end - ptr; } + static qptrdiff availableBytes(const uchar *ptr, const uchar *end) { return end - ptr; } - static qptrdiff availableBytes(const char8_t *ptr, const char8_t *end) + static qptrdiff availableBytes(const qchar8_t *ptr, const qchar8_t *end) { return end - ptr; } - static void advanceByte(const uchar *&ptr, qsizetype n = 1) + static void advanceByte(const char *&ptr, qsizetype n = 1) { ptr += n; } - static void advanceByte(const char8_t *&ptr, int n = 1) + static void advanceByte(const uchar *&ptr, qsizetype n = 1) { ptr += n; } - static void appendUtf16(ushort *&ptr, ushort uc) - { *ptr++ = uc; } + static void advanceByte(const qchar8_t *&ptr, qsizetype n = 1) + { ptr += n; } - static void appendUtf16(char16_t *&ptr, ushort uc) + static void appendUtf16(char16_t *&ptr, char16_t uc) { *ptr++ = char16_t(uc); } - static void appendUcs4(ushort *&ptr, uint uc) - { - appendUtf16(ptr, QChar::highSurrogate(uc)); - appendUtf16(ptr, QChar::lowSurrogate(uc)); - } - static void appendUcs4(char16_t *&ptr, char32_t uc) { appendUtf16(ptr, QChar::highSurrogate(uc)); appendUtf16(ptr, QChar::lowSurrogate(uc)); } - static ushort peekUtf16(const ushort *ptr, qsizetype n = 0) - { return ptr[n]; } - - static ushort peekUtf16(const char16_t *ptr, int n = 0) - { return ptr[n]; } - - static qptrdiff availableUtf16(const ushort *ptr, const ushort *end) - { return end - ptr; } + static char16_t peekUtf16(const char16_t *ptr, qsizetype n = 0) { return ptr[n]; } static qptrdiff availableUtf16(const char16_t *ptr, const char16_t *end) { return end - ptr; } - static void advanceUtf16(const ushort *&ptr, qsizetype n = 1) - { ptr += n; } - - static void advanceUtf16(const char16_t *&ptr, int n = 1) - { ptr += n; } - - // it's possible to output to UCS-4 too - static void appendUtf16(uint *&ptr, ushort uc) - { *ptr++ = uc; } + static void advanceUtf16(const char16_t *&ptr, qsizetype n = 1) { ptr += n; } - static void appendUtf16(char32_t *&ptr, ushort uc) + static void appendUtf16(char32_t *&ptr, char16_t uc) { *ptr++ = char32_t(uc); } - static void appendUcs4(uint *&ptr, uint uc) + static void appendUcs4(char32_t *&ptr, char32_t uc) { *ptr++ = uc; } - - static void appendUcs4(char32_t *&ptr, uint uc) - { *ptr++ = char32_t(uc); } }; struct QUtf8BaseTraitsNoAscii : public QUtf8BaseTraits @@ -159,7 +131,7 @@ namespace QUtf8Functions /// if \a u is a high surrogate, Error if the next isn't a low one, /// EndOfString if we run into the end of the string. template <typename Traits, typename OutputPtr, typename InputPtr> inline - int toUtf8(ushort u, OutputPtr &dst, InputPtr &src, InputPtr end) + int toUtf8(char16_t u, OutputPtr &dst, InputPtr &src, InputPtr end) { if (!Traits::skipAsciiHandling && u < 0x80) { // U+0000 to U+007F (US-ASCII) - one byte @@ -183,14 +155,14 @@ namespace QUtf8Functions if (Traits::availableUtf16(src, end) == 0) return Traits::EndOfString; - ushort low = Traits::peekUtf16(src); + char16_t low = Traits::peekUtf16(src); if (!QChar::isHighSurrogate(u)) return Traits::Error; if (!QChar::isLowSurrogate(low)) return Traits::Error; Traits::advanceUtf16(src); - uint ucs4 = QChar::surrogateToUcs4(u, low); + char32_t ucs4 = QChar::surrogateToUcs4(u, low); if (!Traits::allowNonCharacters && QChar::isNonCharacter(ucs4)) return Traits::Error; @@ -202,7 +174,7 @@ namespace QUtf8Functions Traits::appendByte(dst, 0x80 | (uchar(ucs4 >> 12) & 0x3f)); // for the rest of the bytes - u = ushort(ucs4); + u = char16_t(ucs4); } // second to last byte @@ -225,8 +197,8 @@ namespace QUtf8Functions qsizetype fromUtf8(uchar b, OutputPtr &dst, InputPtr &src, InputPtr end) { qsizetype charsNeeded; - uint min_uc; - uint uc; + char32_t min_uc; + char32_t uc; if (!Traits::skipAsciiHandling && b < 0x80) { // US-ASCII @@ -306,7 +278,7 @@ namespace QUtf8Functions if (!QChar::requiresSurrogates(uc)) { // UTF-8 decoded and no surrogates are required // detach if necessary - Traits::appendUtf16(dst, ushort(uc)); + Traits::appendUtf16(dst, char16_t(uc)); } else { // UTF-8 decoded to something that requires a surrogate pair Traits::appendUcs4(dst, uc); @@ -326,20 +298,41 @@ enum DataEndianness struct QUtf8 { - Q_CORE_EXPORT static QChar *convertToUnicode(QChar *buffer, QByteArrayView in) noexcept; + static QChar *convertToUnicode(QChar *buffer, QByteArrayView in) noexcept + { + char16_t *dst = reinterpret_cast<char16_t *>(buffer); + dst = QUtf8::convertToUnicode(dst, in); + return reinterpret_cast<QChar *>(dst); + } + + Q_CORE_EXPORT static char16_t* convertToUnicode(char16_t *dst, QByteArrayView in) noexcept; static QString convertToUnicode(QByteArrayView in); Q_CORE_EXPORT static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state); - static QChar *convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::State *state); + + static QChar *convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::State *state) + { + char16_t *buffer = reinterpret_cast<char16_t *>(out); + buffer = convertToUnicode(buffer, in, state); + return reinterpret_cast<QChar *>(buffer); + } + + static char16_t *convertToUnicode(char16_t *dst, QByteArrayView in, QStringConverter::State *state); + Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView in); Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView in, QStringConverterBase::State *state); static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state); + Q_CORE_EXPORT static char *convertFromLatin1(char *out, QLatin1StringView in); struct ValidUtf8Result { bool isValidUtf8; bool isValidAscii; }; static ValidUtf8Result isValidUtf8(QByteArrayView in); - static int compareUtf8(QByteArrayView utf8, QStringView utf16) noexcept; - static int compareUtf8(QByteArrayView utf8, QLatin1String s); + static int compareUtf8(QByteArrayView utf8, QStringView utf16, + Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; + static int compareUtf8(QByteArrayView utf8, QLatin1StringView s, + Qt::CaseSensitivity cs = Qt::CaseSensitive); + static int compareUtf8(QByteArrayView lhs, QByteArrayView rhs, + Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; }; struct QUtf16 @@ -366,8 +359,34 @@ struct Q_CORE_EXPORT QLocal8Bit static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state) { return QUtf8::convertFromUnicode(in, state); } #else - static QString convertToUnicode(QByteArrayView, QStringConverter::State *); - static QByteArray convertFromUnicode(QStringView, QStringConverter::State *); + static int checkUtf8(); + static bool isUtf8() + { + Q_CONSTINIT + static QBasicAtomicInteger<qint8> result = { 0 }; + int r = result.loadRelaxed(); + if (r == 0) { + r = checkUtf8(); + result.storeRelaxed(r); + } + return r > 0; + } + static QString convertToUnicode_sys(QByteArrayView, quint32, QStringConverter::State *); + static QString convertToUnicode_sys(QByteArrayView, QStringConverter::State *); + static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state) + { + if (isUtf8()) + return QUtf8::convertToUnicode(in, state); + return convertToUnicode_sys(in, state); + } + static QByteArray convertFromUnicode_sys(QStringView, quint32, QStringConverter::State *); + static QByteArray convertFromUnicode_sys(QStringView, QStringConverter::State *); + static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state) + { + if (isUtf8()) + return QUtf8::convertFromUnicode(in, state); + return convertFromUnicode_sys(in, state); + } #endif }; |