summaryrefslogtreecommitdiffstats
path: root/src/corelib/codecs/qtextcodec.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/codecs/qtextcodec.cpp')
-rw-r--r--src/corelib/codecs/qtextcodec.cpp1299
1 files changed, 0 insertions, 1299 deletions
diff --git a/src/corelib/codecs/qtextcodec.cpp b/src/corelib/codecs/qtextcodec.cpp
deleted file mode 100644
index be585d0407..0000000000
--- a/src/corelib/codecs/qtextcodec.cpp
+++ /dev/null
@@ -1,1299 +0,0 @@
-/****************************************************************************
-**
-** Copyright (C) 2018 The Qt Company Ltd.
-** Copyright (C) 2018 Intel Corporation.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
-
-#include "qplatformdefs.h"
-
-#include "qtextcodec.h"
-#include "qtextcodec_p.h"
-
-#include "qbytearraymatcher.h"
-#include "qendian.h"
-#include "qfile.h"
-#include "qlist.h"
-#include <private/qlocking_p.h>
-#include "qstringlist.h"
-#include "qvarlengtharray.h"
-#if !defined(QT_BOOTSTRAPPED)
-#include <private/qcoreapplication_p.h>
-#endif
-
-#include "qutfcodec_p.h"
-#include "qlatincodec_p.h"
-
-#if !defined(QT_BOOTSTRAPPED)
-#if QT_CONFIG(codecs)
-# include "qtsciicodec_p.h"
-# include "qisciicodec_p.h"
-#endif
-#if QT_CONFIG(icu)
-#include "qicucodec_p.h"
-#else
-#if QT_CONFIG(iconv)
-# include "qiconvcodec_p.h"
-#endif
-#ifdef Q_OS_WIN
-# include "qwindowscodec_p.h"
-#endif
-# include "qsimplecodec_p.h"
-#if QT_CONFIG(big_codecs)
-# ifndef Q_OS_INTEGRITY
-# include "qgb18030codec_p.h"
-# include "qeucjpcodec_p.h"
-# include "qjiscodec_p.h"
-# include "qsjiscodec_p.h"
-# include "qeuckrcodec_p.h"
-# include "qbig5codec_p.h"
-# endif // !Q_OS_INTEGRITY
-#endif // big_codecs
-
-#endif // icu
-#endif // QT_BOOTSTRAPPED
-
-#include <mutex>
-
-#include <stdlib.h>
-#include <ctype.h>
-#include <locale.h>
-#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX) && !defined(Q_OS_ANDROID)
-# include <langinfo.h>
-#endif
-
-QT_BEGIN_NAMESPACE
-
-typedef QList<QTextCodec*>::ConstIterator TextCodecListConstIt;
-typedef QList<QByteArray>::ConstIterator ByteArrayListConstIt;
-
-Q_GLOBAL_STATIC(QRecursiveMutex, textCodecsMutex);
-
-Q_GLOBAL_STATIC(QTextCodecData, textCodecData)
-
-QTextCodecData::QTextCodecData()
- : codecForLocale(nullptr)
-{
-}
-
-QTextCodecData::~QTextCodecData()
-{
- codecForLocale = nullptr;
- QList<QTextCodec *> tmp = allCodecs;
- allCodecs.clear();
- codecCache.clear();
- for (QList<QTextCodec *>::const_iterator it = tmp.constBegin(); it != tmp.constEnd(); ++it)
- delete *it;
-}
-
-QTextCodecData *QTextCodecData::instance()
-{
- return textCodecData();
-}
-
-class TextCodecsMutexLocker
-{
- using Lock = decltype(qt_unique_lock(std::declval<QRecursiveMutex&>()));
- // ### FIXME: this is used when textCodecsMutex already == nullptr
- const Lock lock = qt_unique_lock(textCodecsMutex());
-public:
- TextCodecsMutexLocker() {} // required d/t an ICC 19 bug
-};
-
-#if !QT_CONFIG(icu)
-static char qtolower(char c)
-{ if (c >= 'A' && c <= 'Z') return c + 0x20; return c; }
-static bool qisalnum(char c)
-{ return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
-
-bool qTextCodecNameMatch(const char *n, const char *h)
-{
- if (qstricmp(n, h) == 0)
- return true;
-
- // if the letters and numbers are the same, we have a match
- while (*n != '\0') {
- if (qisalnum(*n)) {
- for (;;) {
- if (*h == '\0')
- return false;
- if (qisalnum(*h))
- break;
- ++h;
- }
- if (qtolower(*n) != qtolower(*h))
- return false;
- ++h;
- }
- ++n;
- }
- while (*h && !qisalnum(*h))
- ++h;
- return (*h == '\0');
-}
-
-
-#if !defined(Q_OS_WIN32) && !defined(QT_LOCALE_IS_UTF8)
-static QTextCodec *checkForCodec(const QByteArray &name) {
- QTextCodec *c = QTextCodec::codecForName(name);
- if (!c) {
- const int index = name.indexOf('@');
- if (index != -1) {
- c = QTextCodec::codecForName(name.left(index));
- }
- }
- return c;
-}
-#endif
-
-static void setup();
-
-// \threadsafe
-// this returns the codec the method sets up as locale codec to
-// avoid a race condition in codecForLocale() when
-// setCodecForLocale(0) is called at the same time.
-static QTextCodec *setupLocaleMapper()
-{
- QTextCodecData *globalData = QTextCodecData::instance();
-
- QTextCodec *locale = nullptr;
-
- {
- const TextCodecsMutexLocker locker;
- if (globalData->allCodecs.isEmpty())
- setup();
- }
-
-#if !defined(QT_BOOTSTRAPPED)
- QCoreApplicationPrivate::initLocale();
-#endif
-
-#if defined(QT_LOCALE_IS_UTF8)
- locale = QTextCodec::codecForName("UTF-8");
-#elif defined(Q_OS_WIN)
- locale = QTextCodec::codecForName("System");
-#else
-
- // First try getting the codecs name from nl_langinfo and see
- // if we have a builtin codec for it.
- // Only fall back to using iconv if we can't find a builtin codec
- // This is because the builtin utf8 codec is around 5 times faster
- // then the using QIconvCodec
-
-#if defined (_XOPEN_UNIX)
- char *charset = nl_langinfo(CODESET);
- if (charset)
- locale = QTextCodec::codecForName(charset);
-#endif
-#if QT_CONFIG(iconv)
- if (!locale) {
- // no builtin codec for the locale found, let's try using iconv
- (void) new QIconvCodec();
- locale = QTextCodec::codecForName("System");
- }
-#endif
-
- if (!locale) {
- // Very poorly defined and followed standards causes lots of
- // code to try to get all the cases... This logic is
- // duplicated in QIconvCodec, so if you change it here, change
- // it there too.
-
- // Try to determine locale codeset from locale name assigned to
- // LC_CTYPE category.
-
- // First part is getting that locale name. First try setlocale() which
- // definitely knows it, but since we cannot fully trust it, get ready
- // to fall back to environment variables.
- const QByteArray ctype = setlocale(LC_CTYPE, nullptr);
-
- // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
- // environment variables.
- QByteArray lang = qgetenv("LC_ALL");
- if (lang.isEmpty() || lang == "C") {
- lang = qgetenv("LC_CTYPE");
- }
- if (lang.isEmpty() || lang == "C") {
- lang = qgetenv("LANG");
- }
-
- // Now try these in order:
- // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
- // 2. CODESET from lang if it contains a .CODESET part
- // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
- // 4. locale (ditto)
- // 5. check for "@euro"
- // 6. guess locale from ctype unless ctype is "C"
- // 7. guess locale from lang
-
- // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
- int indexOfDot = ctype.indexOf('.');
- if (indexOfDot != -1)
- locale = checkForCodec( ctype.mid(indexOfDot + 1) );
-
- // 2. CODESET from lang if it contains a .CODESET part
- if (!locale) {
- indexOfDot = lang.indexOf('.');
- if (indexOfDot != -1)
- locale = checkForCodec( lang.mid(indexOfDot + 1) );
- }
-
- // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
- if (!locale && !ctype.isEmpty() && ctype != "C")
- locale = checkForCodec(ctype);
-
- // 4. locale (ditto)
- if (!locale && !lang.isEmpty())
- locale = checkForCodec(lang);
-
- // 5. "@euro"
- if ((!locale && ctype.contains("@euro")) || lang.contains("@euro"))
- locale = checkForCodec("ISO 8859-15");
- }
-
-#endif
- // If everything failed, we default to 8859-1
- if (!locale)
- locale = QTextCodec::codecForName("ISO 8859-1");
- globalData->codecForLocale.storeRelease(locale);
- return locale;
-}
-
-
-// textCodecsMutex need to be locked to enter this function
-static void setup()
-{
- static bool initialized = false;
- if (initialized)
- return;
- initialized = true;
-
-#if QT_CONFIG(codecs) && !defined(QT_BOOTSTRAPPED)
- (void)new QTsciiCodec;
- for (int i = 0; i < 9; ++i)
- (void)new QIsciiCodec(i);
- for (int i = 0; i < QSimpleTextCodec::numSimpleCodecs; ++i)
- (void)new QSimpleTextCodec(i);
-
-# if QT_CONFIG(big_codecs) && !defined(Q_OS_INTEGRITY)
- (void)new QGb18030Codec;
- (void)new QGbkCodec;
- (void)new QGb2312Codec;
- (void)new QEucJpCodec;
- (void)new QJisCodec;
- (void)new QSjisCodec;
- (void)new QEucKrCodec;
- (void)new QCP949Codec;
- (void)new QBig5Codec;
- (void)new QBig5hkscsCodec;
-# endif // big_codecs && !Q_OS_INTEGRITY
-#if QT_CONFIG(iconv)
- (void) new QIconvCodec;
-#endif
-#if defined(Q_OS_WIN32)
- (void) new QWindowsLocalCodec;
-#endif // Q_OS_WIN32
-#endif // codecs && !QT_BOOTSTRAPPED
-
- (void)new QUtf16Codec;
- (void)new QUtf16BECodec;
- (void)new QUtf16LECodec;
- (void)new QUtf32Codec;
- (void)new QUtf32BECodec;
- (void)new QUtf32LECodec;
- (void)new QLatin15Codec;
- (void)new QLatin1Codec;
- (void)new QUtf8Codec;
-}
-#else
-static void setup() {}
-#endif // icu
-
-/*!
- \enum QTextCodec::ConversionFlag
-
- \value DefaultConversion No flag is set.
- \value ConvertInvalidToNull If this flag is set, each invalid input
- character is output as a null character.
- \value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any.
-
- \omitvalue FreeFunction
-*/
-
-/*!
- \fn QTextCodec::ConverterState::ConverterState(ConversionFlags flags)
-
- Constructs a ConverterState object initialized with the given \a flags.
-*/
-
-/*!
- \class QTextCodec
- \inmodule QtCore
- \brief The QTextCodec class provides conversions between text encodings.
- \reentrant
- \ingroup i18n
-
- Qt uses Unicode to store, draw and manipulate strings. In many
- situations you may wish to deal with data that uses a different
- encoding. For example, most Japanese documents are still stored
- in Shift-JIS or ISO 2022-JP, while Russian users often have their
- documents in KOI8-R or Windows-1251.
-
- Qt provides a set of QTextCodec classes to help with converting
- non-Unicode formats to and from Unicode. You can also create your
- own codec classes.
-
- The supported encodings are:
-
- \list
- \li \l{Big5 Text Codec}{Big5}
- \li \l{Big5-HKSCS Text Codec}{Big5-HKSCS}
- \li CP949
- \li \l{EUC-JP Text Codec}{EUC-JP}
- \li \l{EUC-KR Text Codec}{EUC-KR}
- \li \l{GBK Text Codec}{GB18030}
- \li HP-ROMAN8
- \li IBM 850
- \li IBM 866
- \li IBM 874
- \li \l{ISO 2022-JP (JIS) Text Codec}{ISO 2022-JP}
- \li ISO 8859-1 to 10
- \li ISO 8859-13 to 16
- \li Iscii-Bng, Dev, Gjr, Knd, Mlm, Ori, Pnj, Tlg, and Tml
- \li KOI8-R
- \li KOI8-U
- \li Macintosh
- \li \l{Shift-JIS Text Codec}{Shift-JIS}
- \li TIS-620
- \li \l{TSCII Text Codec}{TSCII}
- \li UTF-8
- \li UTF-16
- \li UTF-16BE
- \li UTF-16LE
- \li UTF-32
- \li UTF-32BE
- \li UTF-32LE
- \li Windows-1250 to 1258
- \endlist
-
- If Qt is compiled with ICU support enabled, most codecs supported by
- ICU will also be available to the application.
-
- \l {QTextCodec}s can be used as follows to convert some locally encoded
- string to Unicode. Suppose you have some string encoded in Russian
- KOI8-R encoding, and want to convert it to Unicode. The simple way
- to do it is like this:
-
- \snippet code/src_corelib_codecs_qtextcodec.cpp 0
-
- After this, \c string holds the text converted to Unicode.
- Converting a string from Unicode to the local encoding is just as
- easy:
-
- \snippet code/src_corelib_codecs_qtextcodec.cpp 1
-
- To read or write files in various encodings, use QTextStream and
- its \l{QTextStream::setCodec()}{setCodec()} function. See the
- \l{tools/codecs}{Codecs} example for an application of QTextCodec
- to file I/O.
-
- Some care must be taken when trying to convert the data in chunks,
- for example, when receiving it over a network. In such cases it is
- possible that a multi-byte character will be split over two
- chunks. At best this might result in the loss of a character and
- at worst cause the entire conversion to fail.
-
- The approach to use in these situations is to create a QTextDecoder
- object for the codec and use this QTextDecoder for the whole
- decoding process, as shown below:
-
- \snippet code/src_corelib_codecs_qtextcodec.cpp 2
-
- The QTextDecoder object maintains state between chunks and therefore
- works correctly even if a multi-byte character is split between
- chunks.
-
- \section1 Creating Your Own Codec Class
-
- Support for new text encodings can be added to Qt by creating
- QTextCodec subclasses.
-
- The pure virtual functions describe the encoder to the system and
- the coder is used as required in the different text file formats
- supported by QTextStream, and under X11, for the locale-specific
- character input and output.
-
- To add support for another encoding to Qt, make a subclass of
- QTextCodec and implement the functions listed in the table below.
-
- \table
- \header \li Function \li Description
-
- \row \li name()
- \li Returns the official name for the encoding. If the
- encoding is listed in the
- \l{IANA character-sets encoding file}, the name
- should be the preferred MIME name for the encoding.
-
- \row \li aliases()
- \li Returns a list of alternative names for the encoding.
- QTextCodec provides a default implementation that returns
- an empty list. For example, "ISO-8859-1" has "latin1",
- "CP819", "IBM819", and "iso-ir-100" as aliases.
-
- \row \li \l{QTextCodec::mibEnum()}{mibEnum()}
- \li Return the MIB enum for the encoding if it is listed in
- the \l{IANA character-sets encoding file}.
-
- \row \li convertToUnicode()
- \li Converts an 8-bit character string to Unicode.
-
- \row \li convertFromUnicode()
- \li Converts a Unicode string to an 8-bit character string.
- \endtable
-
- \sa QTextStream, QTextDecoder, QTextEncoder, {Text Codecs Example}
-*/
-
-/*!
- Constructs a QTextCodec, and gives it the highest precedence. The
- QTextCodec should always be constructed on the heap (i.e. with \c
- new). Qt takes ownership and will delete it when the application
- terminates.
-*/
-QTextCodec::QTextCodec()
-{
- const TextCodecsMutexLocker locker;
-
- QTextCodecData *globalInstance = QTextCodecData::instance();
- if (globalInstance->allCodecs.isEmpty())
- setup();
-
- globalInstance->allCodecs.prepend(this);
-}
-
-
-/*!
- \nonreentrant
-
- Destroys the QTextCodec. Note that you should not delete codecs
- yourself: once created they become Qt's responsibility.
-*/
-QTextCodec::~QTextCodec()
-{
- QTextCodecData *globalData = QTextCodecData::instance();
- if (!globalData)
- return;
-
- globalData->codecForLocale.testAndSetRelaxed(this, nullptr);
-
- const TextCodecsMutexLocker locker;
-
- globalData->allCodecs.removeOne(this);
-
- auto it = globalData->codecCache.begin();
-
- while (it != globalData->codecCache.end()) {
- if (it.value() == this)
- it = globalData->codecCache.erase(it);
- else
- ++it;
- }
-}
-
-/*!
- \fn QTextCodec *QTextCodec::codecForName(const char *name)
-
- Searches all installed QTextCodec objects and returns the one
- which best matches \a name; the match is case-insensitive. Returns
- 0 if no codec matching the name \a name could be found.
-*/
-
-/*!
- \threadsafe
- Searches all installed QTextCodec objects and returns the one
- which best matches \a name; the match is case-insensitive. Returns
- 0 if no codec matching the name \a name could be found.
-*/
-QTextCodec *QTextCodec::codecForName(const QByteArray &name)
-{
- if (name.isEmpty())
- return nullptr;
-
- const TextCodecsMutexLocker locker;
-
- QTextCodecData *globalData = QTextCodecData::instance();
- if (!globalData)
- return nullptr;
- setup();
-
-#if !QT_CONFIG(icu)
- QTextCodecCache *cache = &globalData->codecCache;
- QTextCodec *codec;
- codec = cache->value(name);
- if (codec)
- return codec;
-
- for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) {
- QTextCodec *cursor = *it;
- if (qTextCodecNameMatch(cursor->name(), name)) {
- if (cache)
- cache->insert(name, cursor);
- return cursor;
- }
- QList<QByteArray> aliases = cursor->aliases();
- for (ByteArrayListConstIt ait = aliases.constBegin(), acend = aliases.constEnd(); ait != acend; ++ait) {
- if (qTextCodecNameMatch(*ait, name)) {
- cache->insert(name, cursor);
- return cursor;
- }
- }
- }
-
- return nullptr;
-#else
- return QIcuCodec::codecForNameUnlocked(name);
-#endif
-}
-
-
-/*!
- \threadsafe
- Returns the QTextCodec which matches the
- \l{QTextCodec::mibEnum()}{MIBenum} \a mib.
-*/
-QTextCodec* QTextCodec::codecForMib(int mib)
-{
- const TextCodecsMutexLocker locker;
-
- QTextCodecData *globalData = QTextCodecData::instance();
- if (!globalData)
- return nullptr;
- if (globalData->allCodecs.isEmpty())
- setup();
-
- QByteArray key = "MIB: " + QByteArray::number(mib);
-
- QTextCodecCache *cache = &globalData->codecCache;
- QTextCodec *codec;
- if (cache) {
- codec = cache->value(key);
- if (codec)
- return codec;
- }
-
- for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) {
- QTextCodec *cursor = *it;
- if (cursor->mibEnum() == mib) {
- if (cache)
- cache->insert(key, cursor);
- return cursor;
- }
- }
-
-#if QT_CONFIG(icu)
- return QIcuCodec::codecForMibUnlocked(mib);
-#else
- return nullptr;
-#endif
-}
-
-/*!
- \threadsafe
- Returns the list of all available codecs, by name. Call
- QTextCodec::codecForName() to obtain the QTextCodec for the name.
-
- The list may contain many mentions of the same codec
- if the codec has aliases.
-
- \sa availableMibs(), name(), aliases()
-*/
-QList<QByteArray> QTextCodec::availableCodecs()
-{
- const TextCodecsMutexLocker locker;
-
- QTextCodecData *globalData = QTextCodecData::instance();
- if (globalData->allCodecs.isEmpty())
- setup();
-
- QList<QByteArray> codecs;
-
- for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) {
- codecs += (*it)->name();
- codecs += (*it)->aliases();
- }
-
-#if QT_CONFIG(icu)
- codecs += QIcuCodec::availableCodecs();
-#endif
-
- return codecs;
-}
-
-/*!
- \threadsafe
- Returns the list of MIBs for all available codecs. Call
- QTextCodec::codecForMib() to obtain the QTextCodec for the MIB.
-
- \sa availableCodecs(), mibEnum()
-*/
-QList<int> QTextCodec::availableMibs()
-{
-#if QT_CONFIG(icu)
- return QIcuCodec::availableMibs();
-#else
- const TextCodecsMutexLocker locker;
-
- QTextCodecData *globalData = QTextCodecData::instance();
- if (globalData->allCodecs.isEmpty())
- setup();
-
- QList<int> codecs;
-
- for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it)
- codecs += (*it)->mibEnum();
-
- return codecs;
-#endif
-}
-
-/*!
- \nonreentrant
-
- Set the codec to \a c; this will be returned by
- codecForLocale(). If \a c is \nullptr, the codec is reset to
- the default.
-
- This might be needed for some applications that want to use their
- own mechanism for setting the locale.
-
- \sa codecForLocale()
-*/
-void QTextCodec::setCodecForLocale(QTextCodec *c)
-{
- QTextCodecData::instance()->codecForLocale.storeRelease(c);
-}
-
-/*!
- \threadsafe
- Returns a pointer to the codec most suitable for this locale.
-
- The codec will be retrieved from ICU where that backend is in use, otherwise
- it may be obtained from an OS-specific API. In the latter case, the codec's
- name may be "System".
-*/
-
-QTextCodec* QTextCodec::codecForLocale()
-{
- QTextCodecData *globalData = QTextCodecData::instance();
- if (!globalData)
- return nullptr;
-
- QTextCodec *codec = globalData->codecForLocale.loadAcquire();
- if (!codec) {
-#if QT_CONFIG(icu)
- const TextCodecsMutexLocker locker;
- codec = QIcuCodec::defaultCodecUnlocked();
-#else
- // setupLocaleMapper locks as necessary
- codec = setupLocaleMapper();
-#endif
- }
-
- return codec;
-}
-
-
-/*!
- \fn QByteArray QTextCodec::name() const
-
- QTextCodec subclasses must reimplement this function. It returns
- the name of the encoding supported by the subclass.
-
- If the codec is registered as a character set in the
- \l{IANA character-sets encoding file} this method should
- return the preferred mime name for the codec if defined,
- otherwise its name.
-*/
-
-/*!
- \fn int QTextCodec::mibEnum() const
-
- Subclasses of QTextCodec must reimplement this function. It
- returns the \l{QTextCodec::mibEnum()}{MIBenum} (see \l{IANA character-sets encoding file}
- for more information). It is important that each QTextCodec
- subclass returns the correct unique value for this function.
-*/
-
-/*!
- Subclasses can return a number of aliases for the codec in question.
-
- Standard aliases for codecs can be found in the
- \l{IANA character-sets encoding file}.
-*/
-QList<QByteArray> QTextCodec::aliases() const
-{
- return QList<QByteArray>();
-}
-
-/*!
- \fn QString QTextCodec::convertToUnicode(const char *chars, int len,
- ConverterState *state) const
-
- QTextCodec subclasses must reimplement this function.
-
- Converts the first \a len characters of \a chars from the
- encoding of the subclass to Unicode, and returns the result in a
- QString.
-
- \a state can be \nullptr, in which case the conversion is stateless and
- default conversion rules should be used. If state is not 0, the
- codec should save the state after the conversion in \a state, and
- adjust the \c remainingChars and \c invalidChars members of the struct.
-*/
-
-/*!
- \fn QByteArray QTextCodec::convertFromUnicode(const QChar *input, int number,
- ConverterState *state) const
-
- QTextCodec subclasses must reimplement this function.
-
- Converts the first \a number of characters from the \a input array
- from Unicode to the encoding of the subclass, and returns the result
- in a QByteArray.
-
- \a state can be \nullptr in which case the conversion is stateless and
- default conversion rules should be used. If state is not 0, the
- codec should save the state after the conversion in \a state, and
- adjust the \c remainingChars and \c invalidChars members of the struct.
-*/
-
-/*!
- Creates a QTextDecoder with a specified \a flags to decode chunks
- of \c{char *} data to create chunks of Unicode data.
-
- The caller is responsible for deleting the returned object.
-
- \since 4.7
-*/
-QTextDecoder* QTextCodec::makeDecoder(QTextCodec::ConversionFlags flags) const
-{
- return new QTextDecoder(this, flags);
-}
-
-/*!
- Creates a QTextEncoder with a specified \a flags to encode chunks
- of Unicode data as \c{char *} data.
-
- The caller is responsible for deleting the returned object.
-
- \since 4.7
-*/
-QTextEncoder* QTextCodec::makeEncoder(QTextCodec::ConversionFlags flags) const
-{
- return new QTextEncoder(this, flags);
-}
-
-/*!
- \fn QByteArray QTextCodec::fromUnicode(const QChar *input, int number,
- ConverterState *state) const
-
- Converts the first \a number of characters from the \a input array
- from Unicode to the encoding of this codec, and returns the result
- in a QByteArray.
-
- The \a state of the convertor used is updated.
-*/
-
-#if QT_STRINGVIEW_LEVEL < 2
-/*!
- Converts \a str from Unicode to the encoding of this codec, and
- returns the result in a QByteArray.
-*/
-QByteArray QTextCodec::fromUnicode(const QString& str) const
-{
- return convertFromUnicode(str.constData(), str.length(), nullptr);
-}
-#endif
-
-/*!
- \overload
- \since 5.10
-
- Converts \a str from Unicode to the encoding of this codec, and
- returns the result in a QByteArray.
-*/
-QByteArray QTextCodec::fromUnicode(QStringView str) const
-{
- return convertFromUnicode(str.data(), str.length(), nullptr);
-}
-
-/*!
- \fn QString QTextCodec::toUnicode(const char *input, int size,
- ConverterState *state) const
-
- Converts the first \a size characters from the \a input from the
- encoding of this codec to Unicode, and returns the result in a
- QString.
-
- The \a state of the convertor used is updated.
-*/
-
-/*!
- Converts \a a from the encoding of this codec to Unicode, and
- returns the result in a QString.
-*/
-QString QTextCodec::toUnicode(const QByteArray& a) const
-{
- return convertToUnicode(a.constData(), a.length(), nullptr);
-}
-
-/*!
- Returns \c true if the Unicode character \a ch can be fully encoded
- with this codec; otherwise returns \c false.
-*/
-bool QTextCodec::canEncode(QChar ch) const
-{
- ConverterState state;
- state.flags = ConvertInvalidToNull;
- convertFromUnicode(&ch, 1, &state);
- return (state.invalidChars == 0);
-}
-
-#if QT_STRINGVIEW_LEVEL < 2
-/*!
- \overload
-
- \a s contains the string being tested for encode-ability.
-*/
-bool QTextCodec::canEncode(const QString& s) const
-{
- ConverterState state;
- state.flags = ConvertInvalidToNull;
- convertFromUnicode(s.constData(), s.length(), &state);
- return (state.invalidChars == 0);
-}
-#endif
-
-/*!
- \overload
- \since 5.10
-
- Returns \c true if the Unicode string \a s can be fully encoded
- with this codec; otherwise returns \c false.
-*/
-bool QTextCodec::canEncode(QStringView s) const
-{
- ConverterState state;
- state.flags = ConvertInvalidToNull;
- convertFromUnicode(s.data(), s.length(), &state);
- return !state.invalidChars;
-}
-/*!
- \overload
-
- \a chars contains the source characters.
-*/
-QString QTextCodec::toUnicode(const char *chars) const
-{
- int len = qstrlen(chars);
- return convertToUnicode(chars, len, nullptr);
-}
-
-
-/*!
- \class QTextEncoder
- \inmodule QtCore
- \brief The QTextEncoder class provides a state-based encoder.
- \reentrant
- \ingroup i18n
-
- A text encoder converts text from Unicode into an encoded text format
- using a specific codec.
-
- The encoder converts Unicode into another format, remembering any
- state that is required between calls.
-
- \sa QTextCodec::makeEncoder(), QTextDecoder
-*/
-
-/*!
- \fn QTextEncoder::QTextEncoder(const QTextCodec *codec)
-
- Constructs a text encoder for the given \a codec.
-*/
-
-/*!
- Constructs a text encoder for the given \a codec and conversion \a flags.
-
- \since 4.7
-*/
-QTextEncoder::QTextEncoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags)
- : c(codec), state()
-{
- state.flags = flags;
-}
-
-/*!
- Destroys the encoder.
-*/
-QTextEncoder::~QTextEncoder()
-{
-}
-
-/*!
- \internal
- \since 4.5
- Determines whether the eecoder encountered a failure while decoding the input. If
- an error was encountered, the produced result is undefined, and gets converted as according
- to the conversion flags.
- */
-bool QTextEncoder::hasFailure() const
-{
- return state.invalidChars != 0;
-}
-
-#if QT_STRINGVIEW_LEVEL < 2
-/*!
- Converts the Unicode string \a str into an encoded QByteArray.
-*/
-QByteArray QTextEncoder::fromUnicode(const QString& str)
-{
- QByteArray result = c->fromUnicode(str.constData(), str.length(), &state);
- return result;
-}
-#endif
-
-/*!
- \overload
- \since 5.10
- Converts the Unicode string \a str into an encoded QByteArray.
-*/
-QByteArray QTextEncoder::fromUnicode(QStringView str)
-{
- return c->fromUnicode(str.data(), str.length(), &state);
-}
-
-/*!
- \overload
-
- Converts \a len characters (not bytes) from \a uc, and returns the
- result in a QByteArray.
-*/
-QByteArray QTextEncoder::fromUnicode(const QChar *uc, int len)
-{
- QByteArray result = c->fromUnicode(uc, len, &state);
- return result;
-}
-
-/*!
- \class QTextDecoder
- \inmodule QtCore
- \brief The QTextDecoder class provides a state-based decoder.
- \reentrant
- \ingroup i18n
-
- A text decoder converts text from an encoded text format into Unicode
- using a specific codec.
-
- The decoder converts text in this format into Unicode, remembering any
- state that is required between calls.
-
- \sa QTextCodec::makeDecoder(), QTextEncoder
-*/
-
-/*!
- \fn QTextDecoder::QTextDecoder(const QTextCodec *codec)
-
- Constructs a text decoder for the given \a codec.
-*/
-
-/*!
- Constructs a text decoder for the given \a codec and conversion \a flags.
-
- \since 4.7
-*/
-
-QTextDecoder::QTextDecoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags)
- : c(codec), state()
-{
- state.flags = flags;
-}
-
-/*!
- Destroys the decoder.
-*/
-QTextDecoder::~QTextDecoder()
-{
-}
-
-/*!
- \fn QString QTextDecoder::toUnicode(const char *chars, int len)
-
- Converts the first \a len bytes in \a chars to Unicode, returning
- the result.
-
- If not all characters are used (e.g. if only part of a multi-byte
- encoding is at the end of the characters), the decoder remembers
- enough state to continue with the next call to this function.
-*/
-QString QTextDecoder::toUnicode(const char *chars, int len)
-{
- return c->toUnicode(chars, len, &state);
-}
-
-// in qstring.cpp:
-void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept;
-
-/*! \overload
-
- The converted string is returned in \a target.
- */
-void QTextDecoder::toUnicode(QString *target, const char *chars, int len)
-{
- Q_ASSERT(target);
- switch (c->mibEnum()) {
- case 106: // utf8
- static_cast<const QUtf8Codec*>(c)->convertToUnicode(target, chars, len, &state);
- break;
- case 4: // latin1
- target->resize(len);
- qt_from_latin1((char16_t*)target->data(), chars, len);
- break;
- default:
- *target = c->toUnicode(chars, len, &state);
- }
-}
-
-
-/*!
- \overload
-
- Converts the bytes in the byte array specified by \a ba to Unicode
- and returns the result.
-*/
-QString QTextDecoder::toUnicode(const QByteArray &ba)
-{
- return c->toUnicode(ba.constData(), ba.length(), &state);
-}
-
-/*!
- \since 4.4
-
- Tries to detect the encoding of the provided snippet of HTML in
- the given byte array, \a ba, by checking the BOM (Byte Order Mark)
- and the content-type meta header and returns a QTextCodec instance
- that is capable of decoding the html to unicode. If the codec
- cannot be detected from the content provided, \a defaultCodec is
- returned.
-
- \sa codecForUtfText()
-*/
-QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec)
-{
- // determine charset
- QTextCodec *c = QTextCodec::codecForUtfText(ba, nullptr);
- if (!c) {
- static Q_RELAXED_CONSTEXPR auto matcher = qMakeStaticByteArrayMatcher("meta ");
- QByteArray header = ba.left(1024).toLower();
- int pos = matcher.indexIn(header);
- if (pos != -1) {
- static Q_RELAXED_CONSTEXPR auto matcher = qMakeStaticByteArrayMatcher("charset=");
- pos = matcher.indexIn(header, pos);
- if (pos != -1) {
- pos += qstrlen("charset=");
-
- int pos2 = pos;
- // The attribute can be closed with either """, "'", ">" or "/",
- // none of which are valid charset characters.
- while (++pos2 < header.size()) {
- char ch = header.at(pos2);
- if (ch == '\"' || ch == '\'' || ch == '>') {
- QByteArray name = header.mid(pos, pos2 - pos);
- if (name == "unicode") // QTBUG-41998, ICU will return UTF-16.
- name = QByteArrayLiteral("UTF-8");
- c = QTextCodec::codecForName(name);
- return c ? c : defaultCodec;
- }
- }
- }
- }
- }
- if (!c)
- c = defaultCodec;
-
- return c;
-}
-
-/*!
- \overload
-
- Tries to detect the encoding of the provided snippet of HTML in
- the given byte array, \a ba, by checking the BOM (Byte Order Mark)
- and the content-type meta header and returns a QTextCodec instance
- that is capable of decoding the html to unicode. If the codec cannot
- be detected, this overload returns a Latin-1 QTextCodec.
-*/
-QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba)
-{
- return codecForHtml(ba, QTextCodec::codecForName("ISO-8859-1"));
-}
-
-/*!
- \since 4.6
-
- Tries to detect the encoding of the provided snippet \a ba by
- using the BOM (Byte Order Mark) and returns a QTextCodec instance
- that is capable of decoding the text to unicode. This function can
- detect one of the following codecs:
-
- \list
- \li UTF-32 Little Endian
- \li UTF-32 Big Endian
- \li UTF-16 Little Endian
- \li UTF-16 Big Endian
- \li UTF-8
- \endlist
-
- If the codec cannot be detected from the content provided, \a defaultCodec
- is returned.
-
- \sa codecForHtml()
-*/
-QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec)
-{
- const int arraySize = ba.size();
- const uchar *buf = reinterpret_cast<const uchar *>(ba.constData());
- const uint bom = 0xfeff;
-
- if (arraySize > 3) {
- uint uc = qFromUnaligned<uint>(buf);
- if (uc == qToBigEndian(bom))
- return QTextCodec::codecForMib(1018); // utf-32 be
- else if (uc == qToLittleEndian(bom))
- return QTextCodec::codecForMib(1019); // utf-32 le
- }
-
- if (arraySize < 2)
- return defaultCodec;
-
- ushort uc = qFromUnaligned<ushort>(buf);
- if (uc == qToBigEndian(ushort(bom)))
- return QTextCodec::codecForMib(1013); // utf16 be
- else if (uc == qToLittleEndian(ushort(bom)))
- return QTextCodec::codecForMib(1014); // utf16 le
-
- if (arraySize < 3)
- return defaultCodec;
-
- static const char utf8bom[] = "\xef\xbb\xbf";
- if (memcmp(buf, utf8bom, sizeof(utf8bom) - 1) == 0)
- return QTextCodec::codecForMib(106); // utf-8
-
- return defaultCodec;
-}
-
-/*!
- \overload
-
- Tries to detect the encoding of the provided snippet \a ba by
- using the BOM (Byte Order Mark) and returns a QTextCodec instance
- that is capable of decoding the text to unicode. This function can
- detect one of the following codecs:
-
- \list
- \li UTF-32 Little Endian
- \li UTF-32 Big Endian
- \li UTF-16 Little Endian
- \li UTF-16 Big Endian
- \li UTF-8
- \endlist
-
- If the codec cannot be detected from the content provided, this overload
- returns a Latin-1 QTextCodec.
-
- \sa codecForHtml()
-*/
-QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba)
-{
- return codecForUtfText(ba, QTextCodec::codecForMib(/*Latin 1*/ 4));
-}
-
-/*!
- \fn QTextCodec * QTextCodec::codecForTr ()
- \obsolete
-
- Returns the codec used by QObject::tr() on its argument. If this
- function returns \nullptr (the default), tr() assumes Latin-1.
-*/
-
-/*!
- \internal
- \since 4.3
- Determines whether the decoder encountered a failure while decoding the
- input. If an error was encountered, the produced result is undefined, and
- gets converted as according to the conversion flags.
- */
-bool QTextDecoder::hasFailure() const
-{
- return state.invalidChars != 0;
-}
-
-/*!
- \internal
- \since 5.12
-
- Determines whether the decoder needs more bytes to continue decoding. That
- is, this signifies that the input string ended in the middle of a
- multi-byte sequence. Note that it's possible some codecs do not report this.
- */
-bool QTextDecoder::needsMoreData() const
-{
- return state.remainingChars;
-}
-
-/*!
- \fn QTextCodec *Qt::codecForHtml(const QByteArray &ba)
- \internal
-
- This function is defined in the \c <QTextCodec> header file.
-*/
-QTextCodec *Qt::codecForHtml(const QByteArray &ba)
-{
- return QTextCodec::codecForHtml(ba);
-}
-
-QT_END_NAMESPACE