Move QTextCodec support out of QtCore

* Assume UTF-8 on all Unix like systems * Export some functions to be able to compile QTextCodec once moved to Qt5Compat. Task-number: QTBUG-75665 Change-Id: I52ec47a848bc0ba72e9c7689668b1bcc5d736c29 Reviewed-by: Lars Knoll <lars.knoll@qt.io>
author: Karsten Heimrich <karsten.heimrich@qt.io> 2020-06-17 12:33:40 +0200
committer: Karsten Heimrich <karsten.heimrich@qt.io> 2020-06-20 02:04:38 +0200
commit: 18ec53156ee704fdb4977436fccfdc85333e614b (patch)
tree: df734ce7893d570f4a90f923fcfe9c80ddc3de05 /src/corelib/codecs/qtextcodec.cpp
parent: 4455de24d453138411c57bae55af18f3fba58d4b (diff)
1 files changed, 0 insertions, 1299 deletions
diff --git a/src/corelib/codecs/qtextcodec.cpp b/src/corelib/codecs/qtextcodec.cpp
deleted file mode 100644
index be585d0407..0000000000
--- a/src/corelib/codecs/qtextcodec.cpp
+++ /dev/null
@@ -1,1299 +0,0 @@
-/****************************************************************************
-**
-** Copyright (C) 2018 The Qt Company Ltd.
-** Copyright (C) 2018 Intel Corporation.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
-
-#include "qplatformdefs.h"
-
-#include "qtextcodec.h"
-#include "qtextcodec_p.h"
-
-#include "qbytearraymatcher.h"
-#include "qendian.h"
-#include "qfile.h"
-#include "qlist.h"
-#include <private/qlocking_p.h>
-#include "qstringlist.h"
-#include "qvarlengtharray.h"
-#if !defined(QT_BOOTSTRAPPED)
-#include <private/qcoreapplication_p.h>
-#endif
-
-#include "qutfcodec_p.h"
-#include "qlatincodec_p.h"
-
-#if !defined(QT_BOOTSTRAPPED)
-#if QT_CONFIG(codecs)
-#  include "qtsciicodec_p.h"
-#  include "qisciicodec_p.h"
-#endif
-#if QT_CONFIG(icu)
-#include "qicucodec_p.h"
-#else
-#if QT_CONFIG(iconv)
-#  include "qiconvcodec_p.h"
-#endif
-#ifdef Q_OS_WIN
-#  include "qwindowscodec_p.h"
-#endif
-#  include "qsimplecodec_p.h"
-#if QT_CONFIG(big_codecs)
-#  ifndef Q_OS_INTEGRITY
-#    include "qgb18030codec_p.h"
-#    include "qeucjpcodec_p.h"
-#    include "qjiscodec_p.h"
-#    include "qsjiscodec_p.h"
-#    include "qeuckrcodec_p.h"
-#    include "qbig5codec_p.h"
-#  endif // !Q_OS_INTEGRITY
-#endif // big_codecs
-
-#endif // icu
-#endif // QT_BOOTSTRAPPED
-
-#include <mutex>
-
-#include <stdlib.h>
-#include <ctype.h>
-#include <locale.h>
-#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX) && !defined(Q_OS_ANDROID)
-# include <langinfo.h>
-#endif
-
-QT_BEGIN_NAMESPACE
-
-typedef QList<QTextCodec*>::ConstIterator TextCodecListConstIt;
-typedef QList<QByteArray>::ConstIterator ByteArrayListConstIt;
-
-Q_GLOBAL_STATIC(QRecursiveMutex, textCodecsMutex);
-
-Q_GLOBAL_STATIC(QTextCodecData, textCodecData)
-
-QTextCodecData::QTextCodecData()
-    : codecForLocale(nullptr)
-{
-}
-
-QTextCodecData::~QTextCodecData()
-{
-    codecForLocale = nullptr;
-    QList<QTextCodec *> tmp = allCodecs;
-    allCodecs.clear();
-    codecCache.clear();
-    for (QList<QTextCodec *>::const_iterator it = tmp.constBegin(); it != tmp.constEnd(); ++it)
-        delete *it;
-}
-
-QTextCodecData *QTextCodecData::instance()
-{
-    return textCodecData();
-}
-
-class TextCodecsMutexLocker
-{
-    using Lock = decltype(qt_unique_lock(std::declval<QRecursiveMutex&>()));
-    // ### FIXME: this is used when textCodecsMutex already == nullptr
-    const Lock lock = qt_unique_lock(textCodecsMutex());
-public:
-    TextCodecsMutexLocker() {} // required d/t an ICC 19 bug
-};
-
-#if !QT_CONFIG(icu)
-static char qtolower(char c)
-{ if (c >= 'A' && c <= 'Z') return c + 0x20; return c; }
-static bool qisalnum(char c)
-{ return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
-
-bool qTextCodecNameMatch(const char *n, const char *h)
-{
-    if (qstricmp(n, h) == 0)
-        return true;
-
-    // if the letters and numbers are the same, we have a match
-    while (*n != '\0') {
-        if (qisalnum(*n)) {
-            for (;;) {
-                if (*h == '\0')
-                    return false;
-                if (qisalnum(*h))
-                    break;
-                ++h;
-            }
-            if (qtolower(*n) != qtolower(*h))
-                return false;
-            ++h;
-        }
-        ++n;
-    }
-    while (*h && !qisalnum(*h))
-           ++h;
-    return (*h == '\0');
-}
-
-
-#if !defined(Q_OS_WIN32) && !defined(QT_LOCALE_IS_UTF8)
-static QTextCodec *checkForCodec(const QByteArray &name) {
-    QTextCodec *c = QTextCodec::codecForName(name);
-    if (!c) {
-        const int index = name.indexOf('@');
-        if (index != -1) {
-            c = QTextCodec::codecForName(name.left(index));
-        }
-    }
-    return c;
-}
-#endif
-
-static void setup();
-
-// \threadsafe
-// this returns the codec the method sets up as locale codec to
-// avoid a race condition in codecForLocale() when
-// setCodecForLocale(0) is called at the same time.
-static QTextCodec *setupLocaleMapper()
-{
-    QTextCodecData *globalData = QTextCodecData::instance();
-
-    QTextCodec *locale = nullptr;
-
-    {
-        const TextCodecsMutexLocker locker;
-        if (globalData->allCodecs.isEmpty())
-            setup();
-    }
-
-#if !defined(QT_BOOTSTRAPPED)
-    QCoreApplicationPrivate::initLocale();
-#endif
-
-#if defined(QT_LOCALE_IS_UTF8)
-    locale = QTextCodec::codecForName("UTF-8");
-#elif defined(Q_OS_WIN)
-    locale = QTextCodec::codecForName("System");
-#else
-
-    // First try getting the codecs name from nl_langinfo and see
-    // if we have a builtin codec for it.
-    // Only fall back to using iconv if we can't find a builtin codec
-    // This is because the builtin utf8 codec is around 5 times faster
-    // then the using QIconvCodec
-
-#if defined (_XOPEN_UNIX)
-    char *charset = nl_langinfo(CODESET);
-    if (charset)
-        locale = QTextCodec::codecForName(charset);
-#endif
-#if QT_CONFIG(iconv)
-    if (!locale) {
-        // no builtin codec for the locale found, let's try using iconv
-        (void) new QIconvCodec();
-        locale = QTextCodec::codecForName("System");
-    }
-#endif
-
-    if (!locale) {
-        // Very poorly defined and followed standards causes lots of
-        // code to try to get all the cases... This logic is
-        // duplicated in QIconvCodec, so if you change it here, change
-        // it there too.
-
-        // Try to determine locale codeset from locale name assigned to
-        // LC_CTYPE category.
-
-        // First part is getting that locale name.  First try setlocale() which
-        // definitely knows it, but since we cannot fully trust it, get ready
-        // to fall back to environment variables.
-        const QByteArray ctype = setlocale(LC_CTYPE, nullptr);
-
-        // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
-        // environment variables.
-        QByteArray lang = qgetenv("LC_ALL");
-        if (lang.isEmpty() || lang == "C") {
-            lang = qgetenv("LC_CTYPE");
-        }
-        if (lang.isEmpty() || lang == "C") {
-            lang = qgetenv("LANG");
-        }
-
-        // Now try these in order:
-        // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
-        // 2. CODESET from lang if it contains a .CODESET part
-        // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
-        // 4. locale (ditto)
-        // 5. check for "@euro"
-        // 6. guess locale from ctype unless ctype is "C"
-        // 7. guess locale from lang
-
-        // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
-        int indexOfDot = ctype.indexOf('.');
-        if (indexOfDot != -1)
-            locale = checkForCodec( ctype.mid(indexOfDot + 1) );
-
-        // 2. CODESET from lang if it contains a .CODESET part
-        if (!locale) {
-            indexOfDot = lang.indexOf('.');
-            if (indexOfDot != -1)
-                locale = checkForCodec( lang.mid(indexOfDot + 1) );
-        }
-
-        // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
-        if (!locale && !ctype.isEmpty() && ctype != "C")
-            locale = checkForCodec(ctype);
-
-        // 4. locale (ditto)
-        if (!locale && !lang.isEmpty())
-            locale = checkForCodec(lang);
-
-        // 5. "@euro"
-        if ((!locale && ctype.contains("@euro")) || lang.contains("@euro"))
-            locale = checkForCodec("ISO 8859-15");
-    }
-
-#endif
-    // If everything failed, we default to 8859-1
-    if (!locale)
-        locale = QTextCodec::codecForName("ISO 8859-1");
-    globalData->codecForLocale.storeRelease(locale);
-    return locale;
-}
-
-
-// textCodecsMutex need to be locked to enter this function
-static void setup()
-{
-    static bool initialized = false;
-    if (initialized)
-        return;
-    initialized = true;
-
-#if QT_CONFIG(codecs) && !defined(QT_BOOTSTRAPPED)
-    (void)new QTsciiCodec;
-    for (int i = 0; i < 9; ++i)
-        (void)new QIsciiCodec(i);
-    for (int i = 0; i < QSimpleTextCodec::numSimpleCodecs; ++i)
-        (void)new QSimpleTextCodec(i);
-
-#  if QT_CONFIG(big_codecs) && !defined(Q_OS_INTEGRITY)
-    (void)new QGb18030Codec;
-    (void)new QGbkCodec;
-    (void)new QGb2312Codec;
-    (void)new QEucJpCodec;
-    (void)new QJisCodec;
-    (void)new QSjisCodec;
-    (void)new QEucKrCodec;
-    (void)new QCP949Codec;
-    (void)new QBig5Codec;
-    (void)new QBig5hkscsCodec;
-#  endif // big_codecs && !Q_OS_INTEGRITY
-#if QT_CONFIG(iconv)
-    (void) new QIconvCodec;
-#endif
-#if defined(Q_OS_WIN32)
-    (void) new QWindowsLocalCodec;
-#endif // Q_OS_WIN32
-#endif // codecs && !QT_BOOTSTRAPPED
-
-    (void)new QUtf16Codec;
-    (void)new QUtf16BECodec;
-    (void)new QUtf16LECodec;
-    (void)new QUtf32Codec;
-    (void)new QUtf32BECodec;
-    (void)new QUtf32LECodec;
-    (void)new QLatin15Codec;
-    (void)new QLatin1Codec;
-    (void)new QUtf8Codec;
-}
-#else
-static void setup() {}
-#endif // icu
-
-/*!
-    \enum QTextCodec::ConversionFlag
-
-    \value DefaultConversion  No flag is set.
-    \value ConvertInvalidToNull  If this flag is set, each invalid input
-                                 character is output as a null character.
-    \value IgnoreHeader  Ignore any Unicode byte-order mark and don't generate any.
-
-    \omitvalue FreeFunction
-*/
-
-/*!
-    \fn QTextCodec::ConverterState::ConverterState(ConversionFlags flags)
-
-    Constructs a ConverterState object initialized with the given \a flags.
-*/
-
-/*!
-    \class QTextCodec
-    \inmodule QtCore
-    \brief The QTextCodec class provides conversions between text encodings.
-    \reentrant
-    \ingroup i18n
-
-    Qt uses Unicode to store, draw and manipulate strings. In many
-    situations you may wish to deal with data that uses a different
-    encoding. For example, most Japanese documents are still stored
-    in Shift-JIS or ISO 2022-JP, while Russian users often have their
-    documents in KOI8-R or Windows-1251.
-
-    Qt provides a set of QTextCodec classes to help with converting
-    non-Unicode formats to and from Unicode. You can also create your
-    own codec classes.
-
-    The supported encodings are:
-
-    \list
-    \li \l{Big5 Text Codec}{Big5}
-    \li \l{Big5-HKSCS Text Codec}{Big5-HKSCS}
-    \li CP949
-    \li \l{EUC-JP Text Codec}{EUC-JP}
-    \li \l{EUC-KR Text Codec}{EUC-KR}
-    \li \l{GBK Text Codec}{GB18030}
-    \li HP-ROMAN8
-    \li IBM 850
-    \li IBM 866
-    \li IBM 874
-    \li \l{ISO 2022-JP (JIS) Text Codec}{ISO 2022-JP}
-    \li ISO 8859-1 to 10
-    \li ISO 8859-13 to 16
-    \li Iscii-Bng, Dev, Gjr, Knd, Mlm, Ori, Pnj, Tlg, and Tml
-    \li KOI8-R
-    \li KOI8-U
-    \li Macintosh
-    \li \l{Shift-JIS Text Codec}{Shift-JIS}
-    \li TIS-620
-    \li \l{TSCII Text Codec}{TSCII}
-    \li UTF-8
-    \li UTF-16
-    \li UTF-16BE
-    \li UTF-16LE
-    \li UTF-32
-    \li UTF-32BE
-    \li UTF-32LE
-    \li Windows-1250 to 1258
-    \endlist
-
-    If Qt is compiled with ICU support enabled, most codecs supported by
-    ICU will also be available to the application.
-
-    \l {QTextCodec}s can be used as follows to convert some locally encoded
-    string to Unicode. Suppose you have some string encoded in Russian
-    KOI8-R encoding, and want to convert it to Unicode. The simple way
-    to do it is like this:
-
-    \snippet code/src_corelib_codecs_qtextcodec.cpp 0
-
-    After this, \c string holds the text converted to Unicode.
-    Converting a string from Unicode to the local encoding is just as
-    easy:
-
-    \snippet code/src_corelib_codecs_qtextcodec.cpp 1
-
-    To read or write files in various encodings, use QTextStream and
-    its \l{QTextStream::setCodec()}{setCodec()} function. See the
-    \l{tools/codecs}{Codecs} example for an application of QTextCodec
-    to file I/O.
-
-    Some care must be taken when trying to convert the data in chunks,
-    for example, when receiving it over a network. In such cases it is
-    possible that a multi-byte character will be split over two
-    chunks. At best this might result in the loss of a character and
-    at worst cause the entire conversion to fail.
-
-    The approach to use in these situations is to create a QTextDecoder
-    object for the codec and use this QTextDecoder for the whole
-    decoding process, as shown below:
-
-    \snippet code/src_corelib_codecs_qtextcodec.cpp 2
-
-    The QTextDecoder object maintains state between chunks and therefore
-    works correctly even if a multi-byte character is split between
-    chunks.
-
-    \section1 Creating Your Own Codec Class
-
-    Support for new text encodings can be added to Qt by creating
-    QTextCodec subclasses.
-
-    The pure virtual functions describe the encoder to the system and
-    the coder is used as required in the different text file formats
-    supported by QTextStream, and under X11, for the locale-specific
-    character input and output.
-
-    To add support for another encoding to Qt, make a subclass of
-    QTextCodec and implement the functions listed in the table below.
-
-    \table
-    \header \li Function \li Description
-
-    \row \li name()
-         \li Returns the official name for the encoding. If the
-            encoding is listed in the
-            \l{IANA character-sets encoding file}, the name
-            should be the preferred MIME name for the encoding.
-
-    \row \li aliases()
-         \li Returns a list of alternative names for the encoding.
-            QTextCodec provides a default implementation that returns
-            an empty list. For example, "ISO-8859-1" has "latin1",
-            "CP819", "IBM819", and "iso-ir-100" as aliases.
-
-    \row \li \l{QTextCodec::mibEnum()}{mibEnum()}
-         \li Return the MIB enum for the encoding if it is listed in
-            the \l{IANA character-sets encoding file}.
-
-    \row \li convertToUnicode()
-         \li Converts an 8-bit character string to Unicode.
-
-    \row \li convertFromUnicode()
-         \li Converts a Unicode string to an 8-bit character string.
-    \endtable
-
-    \sa QTextStream, QTextDecoder, QTextEncoder, {Text Codecs Example}
-*/
-
-/*!
-    Constructs a QTextCodec, and gives it the highest precedence. The
-    QTextCodec should always be constructed on the heap (i.e. with \c
-    new). Qt takes ownership and will delete it when the application
-    terminates.
-*/
-QTextCodec::QTextCodec()
-{
-    const TextCodecsMutexLocker locker;
-
-    QTextCodecData *globalInstance = QTextCodecData::instance();
-    if (globalInstance->allCodecs.isEmpty())
-        setup();
-
-    globalInstance->allCodecs.prepend(this);
-}
-
-
-/*!
-    \nonreentrant
-
-    Destroys the QTextCodec. Note that you should not delete codecs
-    yourself: once created they become Qt's responsibility.
-*/
-QTextCodec::~QTextCodec()
-{
-    QTextCodecData *globalData = QTextCodecData::instance();
-    if (!globalData)
-        return;
-
-    globalData->codecForLocale.testAndSetRelaxed(this, nullptr);
-
-    const TextCodecsMutexLocker locker;
-
-    globalData->allCodecs.removeOne(this);
-
-    auto it = globalData->codecCache.begin();
-
-    while (it != globalData->codecCache.end()) {
-        if (it.value() == this)
-            it = globalData->codecCache.erase(it);
-        else
-            ++it;
-    }
-}
-
-/*!
-    \fn QTextCodec *QTextCodec::codecForName(const char *name)
-
-    Searches all installed QTextCodec objects and returns the one
-    which best matches \a name; the match is case-insensitive. Returns
-    0 if no codec matching the name \a name could be found.
-*/
-
-/*!
-    \threadsafe
-    Searches all installed QTextCodec objects and returns the one
-    which best matches \a name; the match is case-insensitive. Returns
-    0 if no codec matching the name \a name could be found.
-*/
-QTextCodec *QTextCodec::codecForName(const QByteArray &name)
-{
-    if (name.isEmpty())
-        return nullptr;
-
-    const TextCodecsMutexLocker locker;
-
-    QTextCodecData *globalData = QTextCodecData::instance();
-    if (!globalData)
-        return nullptr;
-    setup();
-
-#if !QT_CONFIG(icu)
-    QTextCodecCache *cache = &globalData->codecCache;
-    QTextCodec *codec;
-    codec = cache->value(name);
-    if (codec)
-        return codec;
-
-    for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) {
-        QTextCodec *cursor = *it;
-        if (qTextCodecNameMatch(cursor->name(), name)) {
-            if (cache)
-                cache->insert(name, cursor);
-            return cursor;
-        }
-        QList<QByteArray> aliases = cursor->aliases();
-        for (ByteArrayListConstIt ait = aliases.constBegin(), acend = aliases.constEnd(); ait != acend; ++ait) {
-            if (qTextCodecNameMatch(*ait, name)) {
-                cache->insert(name, cursor);
-                return cursor;
-            }
-        }
-    }
-
-    return nullptr;
-#else
-    return QIcuCodec::codecForNameUnlocked(name);
-#endif
-}
-
-
-/*!
-    \threadsafe
-    Returns the QTextCodec which matches the
-    \l{QTextCodec::mibEnum()}{MIBenum} \a mib.
-*/
-QTextCodec* QTextCodec::codecForMib(int mib)
-{
-    const TextCodecsMutexLocker locker;
-
-    QTextCodecData *globalData = QTextCodecData::instance();
-    if (!globalData)
-        return nullptr;
-    if (globalData->allCodecs.isEmpty())
-        setup();
-
-    QByteArray key = "MIB: " + QByteArray::number(mib);
-
-    QTextCodecCache *cache = &globalData->codecCache;
-    QTextCodec *codec;
-    if (cache) {
-        codec = cache->value(key);
-        if (codec)
-            return codec;
-    }
-
-    for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) {
-        QTextCodec *cursor = *it;
-        if (cursor->mibEnum() == mib) {
-            if (cache)
-                cache->insert(key, cursor);
-            return cursor;
-        }
-    }
-
-#if QT_CONFIG(icu)
-    return QIcuCodec::codecForMibUnlocked(mib);
-#else
-    return nullptr;
-#endif
-}
-
-/*!
-    \threadsafe
-    Returns the list of all available codecs, by name. Call
-    QTextCodec::codecForName() to obtain the QTextCodec for the name.
-
-    The list may contain many mentions of the same codec
-    if the codec has aliases.
-
-    \sa availableMibs(), name(), aliases()
-*/
-QList<QByteArray> QTextCodec::availableCodecs()
-{
-    const TextCodecsMutexLocker locker;
-
-    QTextCodecData *globalData = QTextCodecData::instance();
-    if (globalData->allCodecs.isEmpty())
-        setup();
-
-    QList<QByteArray> codecs;
-
-    for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) {
-        codecs += (*it)->name();
-        codecs += (*it)->aliases();
-    }
-
-#if QT_CONFIG(icu)
-    codecs += QIcuCodec::availableCodecs();
-#endif
-
-    return codecs;
-}
-
-/*!
-    \threadsafe
-    Returns the list of MIBs for all available codecs. Call
-    QTextCodec::codecForMib() to obtain the QTextCodec for the MIB.
-
-    \sa availableCodecs(), mibEnum()
-*/
-QList<int> QTextCodec::availableMibs()
-{
-#if QT_CONFIG(icu)
-    return QIcuCodec::availableMibs();
-#else
-    const TextCodecsMutexLocker locker;
-
-    QTextCodecData *globalData = QTextCodecData::instance();
-    if (globalData->allCodecs.isEmpty())
-        setup();
-
-    QList<int> codecs;
-
-    for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it)
-        codecs += (*it)->mibEnum();
-
-    return codecs;
-#endif
-}
-
-/*!
-    \nonreentrant
-
-    Set the codec to \a c; this will be returned by
-    codecForLocale(). If \a c is \nullptr, the codec is reset to
-    the default.
-
-    This might be needed for some applications that want to use their
-    own mechanism for setting the locale.
-
-    \sa codecForLocale()
-*/
-void QTextCodec::setCodecForLocale(QTextCodec *c)
-{
-    QTextCodecData::instance()->codecForLocale.storeRelease(c);
-}
-
-/*!
-    \threadsafe
-    Returns a pointer to the codec most suitable for this locale.
-
-    The codec will be retrieved from ICU where that backend is in use, otherwise
-    it may be obtained from an OS-specific API.  In the latter case, the codec's
-    name may be "System".
-*/
-
-QTextCodec* QTextCodec::codecForLocale()
-{
-    QTextCodecData *globalData = QTextCodecData::instance();
-    if (!globalData)
-        return nullptr;
-
-    QTextCodec *codec = globalData->codecForLocale.loadAcquire();
-    if (!codec) {
-#if QT_CONFIG(icu)
-        const TextCodecsMutexLocker locker;
-        codec = QIcuCodec::defaultCodecUnlocked();
-#else
-        // setupLocaleMapper locks as necessary
-        codec = setupLocaleMapper();
-#endif
-    }
-
-    return codec;
-}
-
-
-/*!
-    \fn QByteArray QTextCodec::name() const
-
-    QTextCodec subclasses must reimplement this function. It returns
-    the name of the encoding supported by the subclass.
-
-    If the codec is registered as a character set in the
-    \l{IANA character-sets encoding file} this method should
-    return the preferred mime name for the codec if defined,
-    otherwise its name.
-*/
-
-/*!
-    \fn int QTextCodec::mibEnum() const
-
-    Subclasses of QTextCodec must reimplement this function. It
-    returns the \l{QTextCodec::mibEnum()}{MIBenum} (see \l{IANA character-sets encoding file}
-    for more information). It is important that each QTextCodec
-    subclass returns the correct unique value for this function.
-*/
-
-/*!
-  Subclasses can return a number of aliases for the codec in question.
-
-  Standard aliases for codecs can be found in the
-  \l{IANA character-sets encoding file}.
-*/
-QList<QByteArray> QTextCodec::aliases() const
-{
-    return QList<QByteArray>();
-}
-
-/*!
-    \fn QString QTextCodec::convertToUnicode(const char *chars, int len,
-                                             ConverterState *state) const
-
-    QTextCodec subclasses must reimplement this function.
-
-    Converts the first \a len characters of \a chars from the
-    encoding of the subclass to Unicode, and returns the result in a
-    QString.
-
-    \a state can be \nullptr, in which case the conversion is stateless and
-    default conversion rules should be used. If state is not 0, the
-    codec should save the state after the conversion in \a state, and
-    adjust the \c remainingChars and \c invalidChars members of the struct.
-*/
-
-/*!
-    \fn QByteArray QTextCodec::convertFromUnicode(const QChar *input, int number,
-                                                  ConverterState *state) const
-
-    QTextCodec subclasses must reimplement this function.
-
-    Converts the first \a number of characters from the \a input array
-    from Unicode to the encoding of the subclass, and returns the result
-    in a QByteArray.
-
-    \a state can be \nullptr in which case the conversion is stateless and
-    default conversion rules should be used. If state is not 0, the
-    codec should save the state after the conversion in \a state, and
-    adjust the \c remainingChars and \c invalidChars members of the struct.
-*/
-
-/*!
-    Creates a QTextDecoder with a specified \a flags to decode chunks
-    of \c{char *} data to create chunks of Unicode data.
-
-    The caller is responsible for deleting the returned object.
-
-    \since 4.7
-*/
-QTextDecoder* QTextCodec::makeDecoder(QTextCodec::ConversionFlags flags) const
-{
-    return new QTextDecoder(this, flags);
-}
-
-/*!
-    Creates a QTextEncoder with a specified \a flags to encode chunks
-    of Unicode data as \c{char *} data.
-
-    The caller is responsible for deleting the returned object.
-
-    \since 4.7
-*/
-QTextEncoder* QTextCodec::makeEncoder(QTextCodec::ConversionFlags flags) const
-{
-    return new QTextEncoder(this, flags);
-}
-
-/*!
-    \fn QByteArray QTextCodec::fromUnicode(const QChar *input, int number,
-                                           ConverterState *state) const
-
-    Converts the first \a number of characters from the \a input array
-    from Unicode to the encoding of this codec, and returns the result
-    in a QByteArray.
-
-    The \a state of the convertor used is updated.
-*/
-
-#if QT_STRINGVIEW_LEVEL < 2
-/*!
-    Converts \a str from Unicode to the encoding of this codec, and
-    returns the result in a QByteArray.
-*/
-QByteArray QTextCodec::fromUnicode(const QString& str) const
-{
-    return convertFromUnicode(str.constData(), str.length(), nullptr);
-}
-#endif
-
-/*!
-    \overload
-    \since 5.10
-
-    Converts \a str from Unicode to the encoding of this codec, and
-    returns the result in a QByteArray.
-*/
-QByteArray QTextCodec::fromUnicode(QStringView str) const
-{
-    return convertFromUnicode(str.data(), str.length(), nullptr);
-}
-
-/*!
-    \fn QString QTextCodec::toUnicode(const char *input, int size,
-                                      ConverterState *state) const
-
-    Converts the first \a size characters from the \a input from the
-    encoding of this codec to Unicode, and returns the result in a
-    QString.
-
-    The \a state of the convertor used is updated.
-*/
-
-/*!
-    Converts \a a from the encoding of this codec to Unicode, and
-    returns the result in a QString.
-*/
-QString QTextCodec::toUnicode(const QByteArray& a) const
-{
-    return convertToUnicode(a.constData(), a.length(), nullptr);
-}
-
-/*!
-    Returns \c true if the Unicode character \a ch can be fully encoded
-    with this codec; otherwise returns \c false.
-*/
-bool QTextCodec::canEncode(QChar ch) const
-{
-    ConverterState state;
-    state.flags = ConvertInvalidToNull;
-    convertFromUnicode(&ch, 1, &state);
-    return (state.invalidChars == 0);
-}
-
-#if QT_STRINGVIEW_LEVEL < 2
-/*!
-    \overload
-
-    \a s contains the string being tested for encode-ability.
-*/
-bool QTextCodec::canEncode(const QString& s) const
-{
-    ConverterState state;
-    state.flags = ConvertInvalidToNull;
-    convertFromUnicode(s.constData(), s.length(), &state);
-    return (state.invalidChars == 0);
-}
-#endif
-
-/*!
-    \overload
-    \since 5.10
-
-    Returns \c true if the Unicode string \a s can be fully encoded
-    with this codec; otherwise returns \c false.
-*/
-bool QTextCodec::canEncode(QStringView s) const
-{
-    ConverterState state;
-    state.flags = ConvertInvalidToNull;
-    convertFromUnicode(s.data(), s.length(), &state);
-    return !state.invalidChars;
-}
-/*!
-    \overload
-
-    \a chars contains the source characters.
-*/
-QString QTextCodec::toUnicode(const char *chars) const
-{
-    int len = qstrlen(chars);
-    return convertToUnicode(chars, len, nullptr);
-}
-
-
-/*!
-    \class QTextEncoder
-    \inmodule QtCore
-    \brief The QTextEncoder class provides a state-based encoder.
-    \reentrant
-    \ingroup i18n
-
-    A text encoder converts text from Unicode into an encoded text format
-    using a specific codec.
-
-    The encoder converts Unicode into another format, remembering any
-    state that is required between calls.
-
-    \sa QTextCodec::makeEncoder(), QTextDecoder
-*/
-
-/*!
-    \fn QTextEncoder::QTextEncoder(const QTextCodec *codec)
-
-    Constructs a text encoder for the given \a codec.
-*/
-
-/*!
-    Constructs a text encoder for the given \a codec and conversion \a flags.
-
-    \since 4.7
-*/
-QTextEncoder::QTextEncoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags)
-    : c(codec), state()
-{
-    state.flags = flags;
-}
-
-/*!
-    Destroys the encoder.
-*/
-QTextEncoder::~QTextEncoder()
-{
-}
-
-/*!
-    \internal
-    \since 4.5
-    Determines whether the eecoder encountered a failure while decoding the input. If
-    an error was encountered, the produced result is undefined, and gets converted as according
-    to the conversion flags.
- */
-bool QTextEncoder::hasFailure() const
-{
-    return state.invalidChars != 0;
-}
-
-#if QT_STRINGVIEW_LEVEL < 2
-/*!
-    Converts the Unicode string \a str into an encoded QByteArray.
-*/
-QByteArray QTextEncoder::fromUnicode(const QString& str)
-{
-    QByteArray result = c->fromUnicode(str.constData(), str.length(), &state);
-    return result;
-}
-#endif
-
-/*!
-    \overload
-    \since 5.10
-    Converts the Unicode string \a str into an encoded QByteArray.
-*/
-QByteArray QTextEncoder::fromUnicode(QStringView str)
-{
-    return c->fromUnicode(str.data(), str.length(), &state);
-}
-
-/*!
-    \overload
-
-    Converts \a len characters (not bytes) from \a uc, and returns the
-    result in a QByteArray.
-*/
-QByteArray QTextEncoder::fromUnicode(const QChar *uc, int len)
-{
-    QByteArray result = c->fromUnicode(uc, len, &state);
-    return result;
-}
-
-/*!
-    \class QTextDecoder
-    \inmodule QtCore
-    \brief The QTextDecoder class provides a state-based decoder.
-    \reentrant
-    \ingroup i18n
-
-    A text decoder converts text from an encoded text format into Unicode
-    using a specific codec.
-
-    The decoder converts text in this format into Unicode, remembering any
-    state that is required between calls.
-
-    \sa QTextCodec::makeDecoder(), QTextEncoder
-*/
-
-/*!
-    \fn QTextDecoder::QTextDecoder(const QTextCodec *codec)
-
-    Constructs a text decoder for the given \a codec.
-*/
-
-/*!
-    Constructs a text decoder for the given \a codec and conversion \a flags.
-
-    \since 4.7
-*/
-
-QTextDecoder::QTextDecoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags)
-    : c(codec), state()
-{
-    state.flags = flags;
-}
-
-/*!
-    Destroys the decoder.
-*/
-QTextDecoder::~QTextDecoder()
-{
-}
-
-/*!
-    \fn QString QTextDecoder::toUnicode(const char *chars, int len)
-
-    Converts the first \a len bytes in \a chars to Unicode, returning
-    the result.
-
-    If not all characters are used (e.g. if only part of a multi-byte
-    encoding is at the end of the characters), the decoder remembers
-    enough state to continue with the next call to this function.
-*/
-QString QTextDecoder::toUnicode(const char *chars, int len)
-{
-    return c->toUnicode(chars, len, &state);
-}
-
-// in qstring.cpp:
-void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept;
-
-/*! \overload
-
-    The converted string is returned in \a target.
- */
-void QTextDecoder::toUnicode(QString *target, const char *chars, int len)
-{
-    Q_ASSERT(target);
-    switch (c->mibEnum()) {
-    case 106: // utf8
-        static_cast<const QUtf8Codec*>(c)->convertToUnicode(target, chars, len, &state);
-        break;
-    case 4: // latin1
-        target->resize(len);
-        qt_from_latin1((char16_t*)target->data(), chars, len);
-        break;
-    default:
-        *target = c->toUnicode(chars, len, &state);
-    }
-}
-
-
-/*!
-    \overload
-
-    Converts the bytes in the byte array specified by \a ba to Unicode
-    and returns the result.
-*/
-QString QTextDecoder::toUnicode(const QByteArray &ba)
-{
-    return c->toUnicode(ba.constData(), ba.length(), &state);
-}
-
-/*!
-    \since 4.4
-
-    Tries to detect the encoding of the provided snippet of HTML in
-    the given byte array, \a ba, by checking the BOM (Byte Order Mark)
-    and the content-type meta header and returns a QTextCodec instance
-    that is capable of decoding the html to unicode.  If the codec
-    cannot be detected from the content provided, \a defaultCodec is
-    returned.
-
-    \sa codecForUtfText()
-*/
-QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec)
-{
-    // determine charset
-    QTextCodec *c = QTextCodec::codecForUtfText(ba, nullptr);
-    if (!c) {
-        static Q_RELAXED_CONSTEXPR auto matcher = qMakeStaticByteArrayMatcher("meta ");
-        QByteArray header = ba.left(1024).toLower();
-        int pos = matcher.indexIn(header);
-        if (pos != -1) {
-            static Q_RELAXED_CONSTEXPR auto matcher = qMakeStaticByteArrayMatcher("charset=");
-            pos = matcher.indexIn(header, pos);
-            if (pos != -1) {
-                pos += qstrlen("charset=");
-
-                int pos2 = pos;
-                // The attribute can be closed with either """, "'", ">" or "/",
-                // none of which are valid charset characters.
-                while (++pos2 < header.size()) {
-                    char ch = header.at(pos2);
-                    if (ch == '\"' || ch == '\'' || ch == '>') {
-                        QByteArray name = header.mid(pos, pos2 - pos);
-                        if (name == "unicode") // QTBUG-41998, ICU will return UTF-16.
-                            name = QByteArrayLiteral("UTF-8");
-                        c = QTextCodec::codecForName(name);
-                        return c ? c : defaultCodec;
-                    }
-                }
-            }
-        }
-    }
-    if (!c)
-        c = defaultCodec;
-
-    return c;
-}
-
-/*!
-    \overload
-
-    Tries to detect the encoding of the provided snippet of HTML in
-    the given byte array, \a ba, by checking the BOM (Byte Order Mark)
-    and the content-type meta header and returns a QTextCodec instance
-    that is capable of decoding the html to unicode. If the codec cannot
-    be detected, this overload returns a Latin-1 QTextCodec.
-*/
-QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba)
-{
-    return codecForHtml(ba, QTextCodec::codecForName("ISO-8859-1"));
-}
-
-/*!
-    \since 4.6
-
-    Tries to detect the encoding of the provided snippet \a ba by
-    using the BOM (Byte Order Mark) and returns a QTextCodec instance
-    that is capable of decoding the text to unicode. This function can
-    detect one of the following codecs:
-
-    \list
-      \li UTF-32 Little Endian
-      \li UTF-32 Big Endian
-      \li UTF-16 Little Endian
-      \li UTF-16 Big Endian
-      \li UTF-8
-    \endlist
-
-    If the codec cannot be detected from the content provided, \a defaultCodec
-    is returned.
-
-    \sa codecForHtml()
-*/
-QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec)
-{
-    const int arraySize = ba.size();
-    const uchar *buf = reinterpret_cast<const uchar *>(ba.constData());
-    const uint bom = 0xfeff;
-
-    if (arraySize > 3) {
-        uint uc = qFromUnaligned<uint>(buf);
-        if (uc == qToBigEndian(bom))
-            return QTextCodec::codecForMib(1018); // utf-32 be
-        else if (uc == qToLittleEndian(bom))
-            return QTextCodec::codecForMib(1019); // utf-32 le
-    }
-
-    if (arraySize < 2)
-        return defaultCodec;
-
-    ushort uc = qFromUnaligned<ushort>(buf);
-    if (uc == qToBigEndian(ushort(bom)))
-        return QTextCodec::codecForMib(1013); // utf16 be
-    else if (uc == qToLittleEndian(ushort(bom)))
-        return QTextCodec::codecForMib(1014); // utf16 le
-
-    if (arraySize < 3)
-        return defaultCodec;
-
-    static const char utf8bom[] = "\xef\xbb\xbf";
-    if (memcmp(buf, utf8bom, sizeof(utf8bom) - 1) == 0)
-        return QTextCodec::codecForMib(106); // utf-8
-
-    return defaultCodec;
-}
-
-/*!
-    \overload
-
-    Tries to detect the encoding of the provided snippet \a ba by
-    using the BOM (Byte Order Mark) and returns a QTextCodec instance
-    that is capable of decoding the text to unicode. This function can
-    detect one of the following codecs:
-
-    \list
-      \li UTF-32 Little Endian
-      \li UTF-32 Big Endian
-      \li UTF-16 Little Endian
-      \li UTF-16 Big Endian
-      \li UTF-8
-    \endlist
-
-    If the codec cannot be detected from the content provided, this overload
-    returns a Latin-1 QTextCodec.
-
-    \sa codecForHtml()
-*/
-QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba)
-{
-    return codecForUtfText(ba, QTextCodec::codecForMib(/*Latin 1*/ 4));
-}
-
-/*!
-    \fn QTextCodec * QTextCodec::codecForTr ()
-    \obsolete
-
-    Returns the codec used by QObject::tr() on its argument. If this
-    function returns \nullptr (the default), tr() assumes Latin-1.
-*/
-
-/*!
-    \internal
-    \since 4.3
-    Determines whether the decoder encountered a failure while decoding the
-    input. If an error was encountered, the produced result is undefined, and
-    gets converted as according to the conversion flags.
- */
-bool QTextDecoder::hasFailure() const
-{
-    return state.invalidChars != 0;
-}
-
-/*!
-    \internal
-    \since 5.12
-
-    Determines whether the decoder needs more bytes to continue decoding. That
-    is, this signifies that the input string ended in the middle of a
-    multi-byte sequence. Note that it's possible some codecs do not report this.
- */
-bool QTextDecoder::needsMoreData() const
-{
-    return state.remainingChars;
-}
-
-/*!
-    \fn QTextCodec *Qt::codecForHtml(const QByteArray &ba)
-    \internal
-
-    This function is defined in the \c <QTextCodec> header file.
-*/
-QTextCodec *Qt::codecForHtml(const QByteArray &ba)
-{
-    return QTextCodec::codecForHtml(ba);
-}
-
-QT_END_NAMESPACE
author	Karsten Heimrich <karsten.heimrich@qt.io>	2020-06-17 12:33:40 +0200
committer	Karsten Heimrich <karsten.heimrich@qt.io>	2020-06-20 02:04:38 +0200
commit	18ec53156ee704fdb4977436fccfdc85333e614b (patch)
tree	df734ce7893d570f4a90f923fcfe9c80ddc3de05 /src/corelib/codecs/qtextcodec.cpp
parent	4455de24d453138411c57bae55af18f3fba58d4b (diff)