/**************************************************************************** ** ** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies). ** Copyright (C) 2013 Intel Corporation ** Contact: http://www.qt-project.org/legal ** ** This file is part of the QtCore module of the Qt Toolkit. ** ** $QT_BEGIN_LICENSE:LGPL$ ** Commercial License Usage ** Licensees holding valid commercial Qt licenses may use this file in ** accordance with the commercial license agreement provided with the ** Software or, alternatively, in accordance with the terms contained in ** a written agreement between you and Digia. For licensing terms and ** conditions see http://qt.digia.com/licensing. For further information ** use the contact form at http://qt.digia.com/contact-us. ** ** GNU Lesser General Public License Usage ** Alternatively, this file may be used under the terms of the GNU Lesser ** General Public License version 2.1 as published by the Free Software ** Foundation and appearing in the file LICENSE.LGPL included in the ** packaging of this file. Please review the following information to ** ensure the GNU Lesser General Public License version 2.1 requirements ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. ** ** In addition, as a special exception, Digia gives you certain additional ** rights. These rights are described in the Digia Qt LGPL Exception ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. ** ** GNU General Public License Usage ** Alternatively, this file may be used under the terms of the GNU ** General Public License version 3.0 as published by the Free Software ** Foundation and appearing in the file LICENSE.GPL included in the ** packaging of this file. Please review the following information to ** ensure the GNU General Public License version 3.0 requirements will be ** met: http://www.gnu.org/copyleft/gpl.html. ** ** ** $QT_END_LICENSE$ ** ****************************************************************************/ #ifndef QUTFCODEC_P_H #define QUTFCODEC_P_H // // W A R N I N G // ------------- // // This file is not part of the Qt API. It exists purely as an // implementation detail. This header file may change from version to // version without notice, or even be removed. // // We mean it. // #include "QtCore/qtextcodec.h" #include "private/qtextcodec_p.h" QT_BEGIN_NAMESPACE struct QUtf8BaseTraits { static const bool isTrusted = false; static const bool allowNonCharacters = true; static const bool skipAsciiHandling = false; static const int Error = -1; static const int EndOfString = -2; static bool isValidCharacter(uint u) { return int(u) >= 0; } static void appendByte(uchar *&ptr, uchar b) { *ptr++ = b; } static uchar peekByte(const uchar *ptr, int n = 0) { return ptr[n]; } static qptrdiff availableBytes(const uchar *ptr, const uchar *end) { return end - ptr; } static void advanceByte(const uchar *&ptr, int n = 1) { ptr += n; } static void appendUtf16(ushort *&ptr, ushort uc) { *ptr++ = uc; } static void appendUcs4(ushort *&ptr, uint uc) { appendUtf16(ptr, QChar::highSurrogate(uc)); appendUtf16(ptr, QChar::lowSurrogate(uc)); } static ushort peekUtf16(const ushort *ptr, int n = 0) { return ptr[n]; } static qptrdiff availableUtf16(const ushort *ptr, const ushort *end) { return end - ptr; } static void advanceUtf16(const ushort *&ptr, int n = 1) { ptr += n; } // it's possible to output to UCS-4 too static void appendUtf16(uint *&ptr, ushort uc) { *ptr++ = uc; } static void appendUcs4(uint *&ptr, uint uc) { *ptr++ = uc; } }; struct QUtf8BaseTraitsNoAscii : public QUtf8BaseTraits { static const bool skipAsciiHandling = true; }; namespace QUtf8Functions { /// returns 0 on success; errors can only happen if \a u is a surrogate: /// Error if \a u is a low surrogate; /// if \a u is a high surrogate, Error if the next isn't a low one, /// EndOfString if we run into the end of the string. template inline int toUtf8(ushort u, OutputPtr &dst, InputPtr &src, InputPtr end) { if (!Traits::skipAsciiHandling && u < 0x80) { // U+0000 to U+007F (US-ASCII) - one byte Traits::appendByte(dst, uchar(u)); return 0; } else if (u < 0x0800) { // U+0080 to U+07FF - two bytes // first of two bytes Traits::appendByte(dst, 0xc0 | uchar(u >> 6)); } else { if (!QChar::isSurrogate(u)) { // U+0800 to U+FFFF (except U+D800-U+DFFF) - three bytes if (!Traits::allowNonCharacters && QChar::isNonCharacter(u)) return Traits::Error; // first of three bytes Traits::appendByte(dst, 0xe0 | uchar(u >> 12)); } else { // U+10000 to U+10FFFF - four bytes // need to get one extra codepoint if (Traits::availableUtf16(src, end) == 0) return Traits::EndOfString; ushort low = Traits::peekUtf16(src); if (!QChar::isHighSurrogate(u)) return Traits::Error; if (!QChar::isLowSurrogate(low)) return Traits::Error; Traits::advanceUtf16(src); uint ucs4 = QChar::surrogateToUcs4(u, low); if (!Traits::allowNonCharacters && QChar::isNonCharacter(ucs4)) return Traits::Error; // first byte Traits::appendByte(dst, 0xf0 | (uchar(ucs4 >> 18) & 0xf)); // second of four bytes Traits::appendByte(dst, 0x80 | (uchar(ucs4 >> 12) & 0x3f)); // for the rest of the bytes u = ushort(ucs4); } // second to last byte Traits::appendByte(dst, 0x80 | (uchar(u >> 6) & 0x3f)); } // last byte Traits::appendByte(dst, 0x80 | (u & 0x3f)); return 0; } inline bool isContinuationByte(uchar b) { return (b & 0xc0) == 0x80; } /// returns the number of characters consumed (including \a b) in case of success; /// returns negative in case of error: Traits::Error or Traits::EndOfString template inline int fromUtf8(uchar b, OutputPtr &dst, InputPtr &src, InputPtr end) { int charsNeeded; uint min_uc; uint uc; if (!Traits::skipAsciiHandling && b < 0x80) { // US-ASCII Traits::appendUtf16(dst, b); return 1; } if (!Traits::isTrusted && Q_UNLIKELY(b <= 0xC1)) { // an UTF-8 first character must be at least 0xC0 // however, all 0xC0 and 0xC1 first bytes can only produce overlong sequences return Traits::Error; } else if (b < 0xe0) { charsNeeded = 2; min_uc = 0x80; uc = b & 0x1f; } else if (b < 0xf0) { charsNeeded = 3; min_uc = 0x800; uc = b & 0x0f; } else if (b < 0xf5) { charsNeeded = 4; min_uc = 0x10000; uc = b & 0x07; } else { // the last Unicode character is U+10FFFF // it's encoded in UTF-8 as "\xF4\x8F\xBF\xBF" // therefore, a byte higher than 0xF4 is not the UTF-8 first byte return Traits::Error; } int bytesAvailable = Traits::availableBytes(src, end); if (Q_UNLIKELY(bytesAvailable < charsNeeded - 1)) { // it's possible that we have an error instead of just unfinished bytes if (bytesAvailable > 0 && !isContinuationByte(Traits::peekByte(src, 0))) return Traits::Error; if (bytesAvailable > 1 && !isContinuationByte(Traits::peekByte(src, 1))) return Traits::Error; if (bytesAvailable > 2 && !isContinuationByte(Traits::peekByte(src, 2))) return Traits::Error; return Traits::EndOfString; } // first continuation character b = Traits::peekByte(src, 0); if (!isContinuationByte(b)) return Traits::Error; uc <<= 6; uc |= b & 0x3f; if (charsNeeded > 2) { // second continuation character b = Traits::peekByte(src, 1); if (!isContinuationByte(b)) return Traits::Error; uc <<= 6; uc |= b & 0x3f; if (charsNeeded > 3) { // third continuation character b = Traits::peekByte(src, 2); if (!isContinuationByte(b)) return Traits::Error; uc <<= 6; uc |= b & 0x3f; } } // we've decoded something; safety-check it if (!Traits::isTrusted) { if (uc < min_uc) return Traits::Error; if (QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) return Traits::Error; if (!Traits::allowNonCharacters && QChar::isNonCharacter(uc)) return Traits::Error; } // write the UTF-16 sequence if (!QChar::requiresSurrogates(uc)) { // UTF-8 decoded and no surrogates are required // detach if necessary Traits::appendUtf16(dst, ushort(uc)); } else { // UTF-8 decoded to something that requires a surrogate pair Traits::appendUcs4(dst, uc); } Traits::advanceByte(src, charsNeeded - 1); return charsNeeded; } } enum DataEndianness { DetectEndianness, BigEndianness, LittleEndianness }; struct QUtf8 { static QString convertToUnicode(const char *, int); static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *); static QByteArray convertFromUnicode(const QChar *, int); static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *); }; struct QUtf16 { static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness); static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness); }; struct QUtf32 { static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness); static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness); }; #ifndef QT_NO_TEXTCODEC class QUtf8Codec : public QTextCodec { public: ~QUtf8Codec(); QByteArray name() const; int mibEnum() const; QString convertToUnicode(const char *, int, ConverterState *) const; QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; void convertToUnicode(QString *target, const char *, int, ConverterState *) const; }; class QUtf16Codec : public QTextCodec { protected: public: QUtf16Codec() { e = DetectEndianness; } ~QUtf16Codec(); QByteArray name() const; QList aliases() const; int mibEnum() const; QString convertToUnicode(const char *, int, ConverterState *) const; QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; protected: DataEndianness e; }; class QUtf16BECodec : public QUtf16Codec { public: QUtf16BECodec() : QUtf16Codec() { e = BigEndianness; } QByteArray name() const; QList aliases() const; int mibEnum() const; }; class QUtf16LECodec : public QUtf16Codec { public: QUtf16LECodec() : QUtf16Codec() { e = LittleEndianness; } QByteArray name() const; QList aliases() const; int mibEnum() const; }; class QUtf32Codec : public QTextCodec { public: QUtf32Codec() { e = DetectEndianness; } ~QUtf32Codec(); QByteArray name() const; QList aliases() const; int mibEnum() const; QString convertToUnicode(const char *, int, ConverterState *) const; QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; protected: DataEndianness e; }; class QUtf32BECodec : public QUtf32Codec { public: QUtf32BECodec() : QUtf32Codec() { e = BigEndianness; } QByteArray name() const; QList aliases() const; int mibEnum() const; }; class QUtf32LECodec : public QUtf32Codec { public: QUtf32LECodec() : QUtf32Codec() { e = LittleEndianness; } QByteArray name() const; QList aliases() const; int mibEnum() const; }; #endif // QT_NO_TEXTCODEC QT_END_NAMESPACE #endif // QUTFCODEC_P_H