diff options
Diffstat (limited to 'src/corelib/text/qstringconverter_p.h')
-rw-r--r-- | src/corelib/text/qstringconverter_p.h | 223 |
1 files changed, 141 insertions, 82 deletions
diff --git a/src/corelib/text/qstringconverter_p.h b/src/corelib/text/qstringconverter_p.h index 2e897c47eb..e68ffb2bb0 100644 --- a/src/corelib/text/qstringconverter_p.h +++ b/src/corelib/text/qstringconverter_p.h @@ -1,42 +1,6 @@ -/**************************************************************************** -** -** Copyright (C) 2018 The Qt Company Ltd. -** Copyright (C) 2018 Intel Corporation. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2020 The Qt Company Ltd. +// Copyright (C) 2020 Intel Corporation. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only #ifndef QSTRINGCONVERTER_P_H #define QSTRINGCONVERTER_P_H @@ -55,9 +19,42 @@ #include <QtCore/qstring.h> #include <QtCore/qendian.h> #include <QtCore/qstringconverter.h> +#include <QtCore/private/qglobal_p.h> QT_BEGIN_NAMESPACE +#ifndef __cpp_char8_t +enum qchar8_t : uchar {}; +#else +using qchar8_t = char8_t; +#endif + +struct QLatin1 +{ + // Defined in qstring.cpp + static char16_t *convertToUnicode(char16_t *dst, QLatin1StringView in) noexcept; + + static QChar *convertToUnicode(QChar *buffer, QLatin1StringView in) noexcept + { + char16_t *dst = reinterpret_cast<char16_t *>(buffer); + dst = convertToUnicode(dst, in); + return reinterpret_cast<QChar *>(dst); + } + + static QChar *convertToUnicode(QChar *dst, QByteArrayView in, + [[maybe_unused]] QStringConverterBase::State *state) noexcept + { + Q_ASSERT(state); + + return convertToUnicode(dst, QLatin1StringView(in.data(), in.size())); + } + + static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state) noexcept; + + // Defined in qstring.cpp + static char *convertFromUnicode(char *out, QStringView in) noexcept; +}; + struct QUtf8BaseTraits { static const bool isTrusted = false; @@ -66,44 +63,59 @@ struct QUtf8BaseTraits static const int Error = -1; static const int EndOfString = -2; - static bool isValidCharacter(uint u) - { return int(u) >= 0; } - static void appendByte(uchar *&ptr, uchar b) { *ptr++ = b; } + static void appendByte(qchar8_t *&ptr, qchar8_t b) + { *ptr++ = b; } + + static uchar peekByte(const char *ptr, qsizetype n = 0) + { return ptr[n]; } + static uchar peekByte(const uchar *ptr, qsizetype n = 0) { return ptr[n]; } + static uchar peekByte(const qchar8_t *ptr, qsizetype n = 0) + { return ptr[n]; } + + static qptrdiff availableBytes(const char *ptr, const char *end) + { return end - ptr; } + static qptrdiff availableBytes(const uchar *ptr, const uchar *end) { return end - ptr; } + static qptrdiff availableBytes(const qchar8_t *ptr, const qchar8_t *end) + { return end - ptr; } + + static void advanceByte(const char *&ptr, qsizetype n = 1) + { ptr += n; } + static void advanceByte(const uchar *&ptr, qsizetype n = 1) { ptr += n; } - static void appendUtf16(ushort *&ptr, ushort uc) - { *ptr++ = uc; } + static void advanceByte(const qchar8_t *&ptr, qsizetype n = 1) + { ptr += n; } + + static void appendUtf16(char16_t *&ptr, char16_t uc) + { *ptr++ = char16_t(uc); } - static void appendUcs4(ushort *&ptr, uint uc) + static void appendUcs4(char16_t *&ptr, char32_t uc) { appendUtf16(ptr, QChar::highSurrogate(uc)); appendUtf16(ptr, QChar::lowSurrogate(uc)); } - static ushort peekUtf16(const ushort *ptr, qsizetype n = 0) - { return ptr[n]; } + static char16_t peekUtf16(const char16_t *ptr, qsizetype n = 0) { return ptr[n]; } - static qptrdiff availableUtf16(const ushort *ptr, const ushort *end) + static qptrdiff availableUtf16(const char16_t *ptr, const char16_t *end) { return end - ptr; } - static void advanceUtf16(const ushort *&ptr, qsizetype n = 1) - { ptr += n; } + static void advanceUtf16(const char16_t *&ptr, qsizetype n = 1) { ptr += n; } - // it's possible to output to UCS-4 too - static void appendUtf16(uint *&ptr, ushort uc) - { *ptr++ = uc; } + static void appendUtf16(char32_t *&ptr, char16_t uc) + { *ptr++ = char32_t(uc); } - static void appendUcs4(uint *&ptr, uint uc) + static void appendUcs4(char32_t *&ptr, char32_t uc) { *ptr++ = uc; } }; @@ -119,7 +131,7 @@ namespace QUtf8Functions /// if \a u is a high surrogate, Error if the next isn't a low one, /// EndOfString if we run into the end of the string. template <typename Traits, typename OutputPtr, typename InputPtr> inline - int toUtf8(ushort u, OutputPtr &dst, InputPtr &src, InputPtr end) + int toUtf8(char16_t u, OutputPtr &dst, InputPtr &src, InputPtr end) { if (!Traits::skipAsciiHandling && u < 0x80) { // U+0000 to U+007F (US-ASCII) - one byte @@ -143,14 +155,14 @@ namespace QUtf8Functions if (Traits::availableUtf16(src, end) == 0) return Traits::EndOfString; - ushort low = Traits::peekUtf16(src); + char16_t low = Traits::peekUtf16(src); if (!QChar::isHighSurrogate(u)) return Traits::Error; if (!QChar::isLowSurrogate(low)) return Traits::Error; Traits::advanceUtf16(src); - uint ucs4 = QChar::surrogateToUcs4(u, low); + char32_t ucs4 = QChar::surrogateToUcs4(u, low); if (!Traits::allowNonCharacters && QChar::isNonCharacter(ucs4)) return Traits::Error; @@ -162,7 +174,7 @@ namespace QUtf8Functions Traits::appendByte(dst, 0x80 | (uchar(ucs4 >> 12) & 0x3f)); // for the rest of the bytes - u = ushort(ucs4); + u = char16_t(ucs4); } // second to last byte @@ -185,8 +197,8 @@ namespace QUtf8Functions qsizetype fromUtf8(uchar b, OutputPtr &dst, InputPtr &src, InputPtr end) { qsizetype charsNeeded; - uint min_uc; - uint uc; + char32_t min_uc; + char32_t uc; if (!Traits::skipAsciiHandling && b < 0x80) { // US-ASCII @@ -266,7 +278,7 @@ namespace QUtf8Functions if (!QChar::requiresSurrogates(uc)) { // UTF-8 decoded and no surrogates are required // detach if necessary - Traits::appendUtf16(dst, ushort(uc)); + Traits::appendUtf16(dst, char16_t(uc)); } else { // UTF-8 decoded to something that requires a surrogate pair Traits::appendUcs4(dst, uc); @@ -286,48 +298,95 @@ enum DataEndianness struct QUtf8 { - static QChar *convertToUnicode(QChar *, const char *, qsizetype) noexcept; - static QString convertToUnicode(const char *, qsizetype); - Q_CORE_EXPORT static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *); - static QChar *convertToUnicode(QChar *out, const char *in, qsizetype length, QStringConverter::State *state); - static QByteArray convertFromUnicode(const QChar *, qsizetype); - Q_CORE_EXPORT static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *); + static QChar *convertToUnicode(QChar *buffer, QByteArrayView in) noexcept + { + char16_t *dst = reinterpret_cast<char16_t *>(buffer); + dst = QUtf8::convertToUnicode(dst, in); + return reinterpret_cast<QChar *>(dst); + } + + Q_CORE_EXPORT static char16_t* convertToUnicode(char16_t *dst, QByteArrayView in) noexcept; + static QString convertToUnicode(QByteArrayView in); + Q_CORE_EXPORT static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state); + + static QChar *convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::State *state) + { + char16_t *buffer = reinterpret_cast<char16_t *>(out); + buffer = convertToUnicode(buffer, in, state); + return reinterpret_cast<QChar *>(buffer); + } + + static char16_t *convertToUnicode(char16_t *dst, QByteArrayView in, QStringConverter::State *state); + + Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView in); + Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView in, QStringConverterBase::State *state); static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state); + Q_CORE_EXPORT static char *convertFromLatin1(char *out, QLatin1StringView in); struct ValidUtf8Result { bool isValidUtf8; bool isValidAscii; }; - static ValidUtf8Result isValidUtf8(const char *, qsizetype); - static int compareUtf8(const char *, qsizetype, const QChar *, qsizetype); - static int compareUtf8(const char *, qsizetype, QLatin1String s); + static ValidUtf8Result isValidUtf8(QByteArrayView in); + static int compareUtf8(QByteArrayView utf8, QStringView utf16, + Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; + static int compareUtf8(QByteArrayView utf8, QLatin1StringView s, + Qt::CaseSensitivity cs = Qt::CaseSensitive); + static int compareUtf8(QByteArrayView lhs, QByteArrayView rhs, + Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; }; struct QUtf16 { - Q_CORE_EXPORT static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness); - static QChar *convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian); - Q_CORE_EXPORT static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness); + Q_CORE_EXPORT static QString convertToUnicode(QByteArrayView, QStringConverter::State *, DataEndianness = DetectEndianness); + static QChar *convertToUnicode(QChar *out, QByteArrayView, QStringConverter::State *state, DataEndianness endian); + Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness = DetectEndianness); static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state, DataEndianness endian); }; struct QUtf32 { - static QChar *convertToUnicode(QChar *out, const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness endian); - Q_CORE_EXPORT static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness); - Q_CORE_EXPORT static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness); + static QChar *convertToUnicode(QChar *out, QByteArrayView, QStringConverter::State *state, DataEndianness endian); + Q_CORE_EXPORT static QString convertToUnicode(QByteArrayView, QStringConverter::State *, DataEndianness = DetectEndianness); + Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness = DetectEndianness); static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state, DataEndianness endian); }; struct Q_CORE_EXPORT QLocal8Bit { #if !defined(Q_OS_WIN) || defined(QT_BOOTSTRAPPED) - static QString convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state) - { return QUtf8::convertToUnicode(chars, len, state); } - static QByteArray convertFromUnicode(const QChar *chars, qsizetype len, QStringConverter::State *state) - { return QUtf8::convertFromUnicode(chars, len, state); } + static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state) + { return QUtf8::convertToUnicode(in, state); } + static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state) + { return QUtf8::convertFromUnicode(in, state); } #else - static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *); - static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *); + static int checkUtf8(); + static bool isUtf8() + { + Q_CONSTINIT + static QBasicAtomicInteger<qint8> result = { 0 }; + int r = result.loadRelaxed(); + if (r == 0) { + r = checkUtf8(); + result.storeRelaxed(r); + } + return r > 0; + } + static QString convertToUnicode_sys(QByteArrayView, quint32, QStringConverter::State *); + static QString convertToUnicode_sys(QByteArrayView, QStringConverter::State *); + static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state) + { + if (isUtf8()) + return QUtf8::convertToUnicode(in, state); + return convertToUnicode_sys(in, state); + } + static QByteArray convertFromUnicode_sys(QStringView, quint32, QStringConverter::State *); + static QByteArray convertFromUnicode_sys(QStringView, QStringConverter::State *); + static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state) + { + if (isUtf8()) + return QUtf8::convertFromUnicode(in, state); + return convertFromUnicode_sys(in, state); + } #endif }; |