diff options
Diffstat (limited to 'src/corelib/codecs/qiconvcodec.cpp')
-rw-r--r-- | src/corelib/codecs/qiconvcodec.cpp | 522 |
1 files changed, 0 insertions, 522 deletions
diff --git a/src/corelib/codecs/qiconvcodec.cpp b/src/corelib/codecs/qiconvcodec.cpp deleted file mode 100644 index 0fa18eadba..0000000000 --- a/src/corelib/codecs/qiconvcodec.cpp +++ /dev/null @@ -1,522 +0,0 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#include <QtCore/private/qglobal_p.h> - -#include "qiconvcodec_p.h" -#include "qtextcodec_p.h" -#include <qdebug.h> -#include <qthreadstorage.h> - -#include <errno.h> -#include <locale.h> -#include <stdio.h> -#include <dlfcn.h> - -// unistd.h is needed for the _XOPEN_UNIX macro -#include <unistd.h> -#if defined(_XOPEN_UNIX) && !defined(Q_OS_QNX) -# include <langinfo.h> -#endif - -#if defined(Q_OS_HPUX) -# define NO_BOM -# define UTF16 "ucs2" -#elif defined(Q_OS_AIX) -# define NO_BOM -# define UTF16 "UCS-2" -#elif defined(Q_OS_FREEBSD) -# define NO_BOM -# if Q_BYTE_ORDER == Q_BIG_ENDIAN -# define UTF16 "UTF-16BE" -# else -# define UTF16 "UTF-16LE" -# endif -#else -# define UTF16 "UTF-16" -#endif - -QT_BEGIN_NAMESPACE - -QIconvCodec::QIconvCodec() - : utf16Codec(0) -{ -} - -void QIconvCodec::init() const -{ - utf16Codec = QTextCodec::codecForMib(1015); - Q_ASSERT_X(utf16Codec != 0, - "QIconvCodec::convertToUnicode", - "internal error, UTF-16 codec not found"); - if (!utf16Codec) { - fprintf(stderr, "QIconvCodec::convertToUnicode: internal error, UTF-16 codec not found\n"); - utf16Codec = reinterpret_cast<QTextCodec *>(~0); - } -} - -QIconvCodec::~QIconvCodec() -{ -} - -QIconvCodec::IconvState::IconvState(iconv_t x) - : buffer(array), bufferLen(sizeof array), cd(x) -{ -} - -QIconvCodec::IconvState::~IconvState() -{ - if (cd != reinterpret_cast<iconv_t>(-1)) - iconv_close(cd); - if (buffer != array) - delete[] buffer; -} - -void QIconvCodec::IconvState::saveChars(const char *c, int count) -{ - if (count > bufferLen) { - if (buffer != array) - delete[] buffer; - buffer = new char[bufferLen = count]; - } - - memcpy(buffer, c, count); -} - -static void qIconvCodecStateFree(QTextCodec::ConverterState *state) -{ - delete reinterpret_cast<QIconvCodec::IconvState *>(state->d[0]); -} - -Q_GLOBAL_STATIC(QThreadStorage<QIconvCodec::IconvState *>, toUnicodeState) - -QString QIconvCodec::convertToUnicode(const char* chars, int len, ConverterState *convState) const -{ - if (utf16Codec == reinterpret_cast<QTextCodec *>(~0)) - return QString::fromLatin1(chars, len); - - int invalidCount = 0; - int remainingCount = 0; - char *remainingBuffer = 0; - IconvState *temporaryState = 0; - IconvState **pstate; - - if (convState) { - // stateful conversion - pstate = reinterpret_cast<IconvState **>(&convState->d[0]); - if (convState->d[0]) { - // restore state - remainingCount = convState->remainingChars; - remainingBuffer = (*pstate)->buffer; - } else { - // first time - convState->clearFn = qIconvCodecStateFree; - } - } else { - QThreadStorage<QIconvCodec::IconvState *> *ts = toUnicodeState(); - if (!ts) { - // we're running after the Q_GLOBAL_STATIC has been deleted - // or before the QCoreApplication initialization - // bad programmer, no cookie for you - pstate = &temporaryState; - } else { - // stateless conversion -- use thread-local data - pstate = &toUnicodeState()->localData(); - } - } - - if (!*pstate) { - // first time, create the state - iconv_t cd = createIconv_t(UTF16, 0); - if (cd == reinterpret_cast<iconv_t>(-1)) { - static int reported = 0; - if (!reported++) { - fprintf(stderr, - "QIconvCodec::convertToUnicode: using Latin-1 for conversion, iconv_open failed\n"); - } - return QString::fromLatin1(chars, len); - } - - *pstate = new IconvState(cd); - } - - IconvState *state = *pstate; - size_t inBytesLeft = len; - // best case assumption, each byte is converted into one UTF-16 character, plus 2 bytes for the BOM -#if !QT_CONFIG(posix_libiconv) - // GNU doesn't disagree with POSIX :/ - const char *inBytes = chars; -#else - char *inBytes = const_cast<char *>(chars); -#endif - - QByteArray in; - if (remainingCount) { - // we have to prepend the remaining bytes from the previous conversion - inBytesLeft += remainingCount; - in.resize(inBytesLeft); - inBytes = in.data(); - - memcpy(in.data(), remainingBuffer, remainingCount); - memcpy(in.data() + remainingCount, chars, len); - - remainingCount = 0; - } - - size_t outBytesLeft = len * 2 + 2; - QByteArray ba(outBytesLeft, Qt::Uninitialized); - char *outBytes = ba.data(); - do { - size_t ret = iconv(state->cd, &inBytes, &inBytesLeft, &outBytes, &outBytesLeft); - if (ret == (size_t) -1) { - if (errno == E2BIG) { - int offset = ba.size() - outBytesLeft; - ba.resize(ba.size() * 2); - outBytes = ba.data() + offset; - outBytesLeft = ba.size() - offset; - - continue; - } - - if (errno == EILSEQ) { - // conversion stopped because of an invalid character in the sequence - ++invalidCount; - } else if (errno == EINVAL && convState) { - // conversion stopped because the remaining inBytesLeft make up - // an incomplete multi-byte sequence; save them for later - state->saveChars(inBytes, inBytesLeft); - remainingCount = inBytesLeft; - break; - } - - if (errno == EILSEQ || errno == EINVAL) { - // skip the next character - ++inBytes; - --inBytesLeft; - continue; - } - - // some other error - // note, cannot use qWarning() since we are implementing the codecForLocale :) - perror("QIconvCodec::convertToUnicode: using Latin-1 for conversion, iconv failed"); - - if (!convState) { - // reset state - iconv(state->cd, 0, &inBytesLeft, 0, &outBytesLeft); - } - - delete temporaryState; - return QString::fromLatin1(chars, len); - } - } while (inBytesLeft != 0); - - QString s; - - if (convState) { - s = utf16Codec->toUnicode(ba.constData(), ba.size() - outBytesLeft, &state->internalState); - - convState->invalidChars = invalidCount; - convState->remainingChars = remainingCount; - } else { - s = utf16Codec->toUnicode(ba.constData(), ba.size() - outBytesLeft); - - // reset state - iconv(state->cd, 0, &inBytesLeft, 0, &outBytesLeft); - } - - delete temporaryState; - return s; -} - -Q_GLOBAL_STATIC(QThreadStorage<QIconvCodec::IconvState *>, fromUnicodeState) - -static bool setByteOrder(iconv_t cd) -{ -#if !defined(NO_BOM) - // give iconv() a BOM - char buf[4]; - ushort bom[] = { QChar::ByteOrderMark }; - - char *outBytes = buf; - char *inBytes = reinterpret_cast<char *>(bom); - size_t outBytesLeft = sizeof buf; - size_t inBytesLeft = sizeof bom; - -#if !QT_CONFIG(posix_libiconv) - const char **inBytesPtr = const_cast<const char **>(&inBytes); -#else - char **inBytesPtr = &inBytes; -#endif - - if (iconv(cd, inBytesPtr, &inBytesLeft, &outBytes, &outBytesLeft) == (size_t) -1) { - return false; - } -#else - Q_UNUSED(cd); -#endif // NO_BOM - - return true; -} - -QByteArray QIconvCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *convState) const -{ - char *inBytes; - char *outBytes; - size_t inBytesLeft; - -#if !QT_CONFIG(posix_libiconv) - const char **inBytesPtr = const_cast<const char **>(&inBytes); -#else - char **inBytesPtr = &inBytes; -#endif - - IconvState *temporaryState = 0; - QThreadStorage<QIconvCodec::IconvState *> *ts = fromUnicodeState(); - IconvState *&state = ts ? ts->localData() : temporaryState; - if (!state) { - iconv_t cd = createIconv_t(0, UTF16); - if (cd != reinterpret_cast<iconv_t>(-1)) { - if (!setByteOrder(cd)) { - perror("QIconvCodec::convertFromUnicode: using Latin-1 for conversion, iconv failed for BOM"); - - iconv_close(cd); - cd = reinterpret_cast<iconv_t>(-1); - - return QString(uc, len).toLatin1(); - } - } - state = new IconvState(cd); - } - if (state->cd == reinterpret_cast<iconv_t>(-1)) { - static int reported = 0; - if (!reported++) { - fprintf(stderr, - "QIconvCodec::convertFromUnicode: using Latin-1 for conversion, iconv_open failed\n"); - } - delete temporaryState; - return QString(uc, len).toLatin1(); - } - - size_t outBytesLeft = len; - QByteArray ba(outBytesLeft, Qt::Uninitialized); - outBytes = ba.data(); - - // now feed iconv() the real data - inBytes = const_cast<char *>(reinterpret_cast<const char *>(uc)); - inBytesLeft = len * sizeof(QChar); - - QByteArray in; - if (convState && convState->remainingChars) { - // we have one surrogate char to be prepended - in.resize(sizeof(QChar) + len); - inBytes = in.data(); - - QChar remaining = convState->state_data[0]; - memcpy(in.data(), &remaining, sizeof(QChar)); - memcpy(in.data() + sizeof(QChar), uc, inBytesLeft); - - inBytesLeft += sizeof(QChar); - convState->remainingChars = 0; - } - - int invalidCount = 0; - while (inBytesLeft != 0) { - if (iconv(state->cd, inBytesPtr, &inBytesLeft, &outBytes, &outBytesLeft) == (size_t) -1) { - if (errno == EINVAL && convState) { - // buffer ends in a surrogate - Q_ASSERT(inBytesLeft == 2); - convState->remainingChars = 1; - convState->state_data[0] = uc[len - 1].unicode(); - break; - } - - switch (errno) { - case EILSEQ: - ++invalidCount; - Q_FALLTHROUGH(); - case EINVAL: - { - inBytes += sizeof(QChar); - inBytesLeft -= sizeof(QChar); - break; - } - case E2BIG: - { - int offset = ba.size() - outBytesLeft; - ba.resize(ba.size() * 2); - outBytes = ba.data() + offset; - outBytesLeft = ba.size() - offset; - break; - } - default: - { - // note, cannot use qWarning() since we are implementing the codecForLocale :) - perror("QIconvCodec::convertFromUnicode: using Latin-1 for conversion, iconv failed"); - - // reset to initial state - iconv(state->cd, 0, &inBytesLeft, 0, &outBytesLeft); - - delete temporaryState; - return QString(uc, len).toLatin1(); - } - } - } - } - - // reset to initial state - iconv(state->cd, 0, &inBytesLeft, 0, &outBytesLeft); - setByteOrder(state->cd); - - ba.resize(ba.size() - outBytesLeft); - - if (convState) - convState->invalidChars = invalidCount; - - delete temporaryState; - return ba; -} - -QByteArray QIconvCodec::name() const -{ - return "System"; -} - -int QIconvCodec::mibEnum() const -{ - return 0; -} - -iconv_t QIconvCodec::createIconv_t(const char *to, const char *from) const -{ - Q_ASSERT((to == 0 && from != 0) || (to != 0 && from == 0)); - - if (!utf16Codec) - init(); - - iconv_t cd = (iconv_t) -1; -#if defined(__GLIBC__) || !QT_CONFIG(posix_libiconv) || defined(Q_OS_QNX) -#if defined(Q_OS_QNX) - // on QNX the default locale is UTF-8, and an empty string will cause iconv_open to fail - static const char empty_codeset[] = "UTF-8"; -#else - // both GLIBC and libgnuiconv will use the locale's encoding if from or to is an empty string - static const char empty_codeset[] = ""; -#endif - const char *codeset = empty_codeset; - cd = iconv_open(to ? to : codeset, from ? from : codeset); -#else - char *codeset = 0; -#endif - -#if defined(_XOPEN_UNIX) && !defined(Q_OS_QNX) - if (cd == (iconv_t) -1) { - codeset = nl_langinfo(CODESET); - if (codeset) - cd = iconv_open(to ? to : codeset, from ? from : codeset); - } -#endif - - if (cd == (iconv_t) -1) { - // Very poorly defined and followed standards causes lots of - // code to try to get all the cases... This logic is - // duplicated in QTextCodec, so if you change it here, change - // it there too. - - // Try to determine locale codeset from locale name assigned to - // LC_CTYPE category. - - // First part is getting that locale name. First try setlocale() which - // definitely knows it, but since we cannot fully trust it, get ready - // to fall back to environment variables. - char * ctype = qstrdup(setlocale(LC_CTYPE, 0)); - - // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG - // environment variables. - char * lang = qstrdup(qgetenv("LC_ALL").constData()); - if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) { - if (lang) delete [] lang; - lang = qstrdup(qgetenv("LC_CTYPE").constData()); - } - if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) { - if (lang) delete [] lang; - lang = qstrdup(qgetenv("LANG").constData()); - } - - // Now try these in order: - // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) - // 2. CODESET from lang if it contains a .CODESET part - // 3. ctype (maybe the locale is named "ISO-8859-1" or something) - // 4. locale (ditto) - // 5. check for "@euro" - - // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) - codeset = ctype ? strchr(ctype, '.') : 0; - if (codeset && *codeset == '.') { - ++codeset; - cd = iconv_open(to ? to : codeset, from ? from : codeset); - } - - // 2. CODESET from lang if it contains a .CODESET part - codeset = lang ? strchr(lang, '.') : 0; - if (cd == (iconv_t) -1 && codeset && *codeset == '.') { - ++codeset; - cd = iconv_open(to ? to : codeset, from ? from : codeset); - } - - // 3. ctype (maybe the locale is named "ISO-8859-1" or something) - if (cd == (iconv_t) -1 && ctype && *ctype != 0 && strcmp (ctype, "C") != 0) - cd = iconv_open(to ? to : ctype, from ? from : ctype); - - - // 4. locale (ditto) - if (cd == (iconv_t) -1 && lang && *lang != 0) - cd = iconv_open(to ? to : lang, from ? from : lang); - - // 5. "@euro" - if ((cd == (iconv_t) -1 && ctype && strstr(ctype, "@euro")) || (lang && strstr(lang, "@euro"))) - cd = iconv_open(to ? to : "ISO8859-15", from ? from : "ISO8859-15"); - - delete [] ctype; - delete [] lang; - } - - return cd; -} - -QT_END_NAMESPACE |