summaryrefslogtreecommitdiffstats
path: root/src/corelib/codecs/qtextcodec.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/codecs/qtextcodec.cpp')
-rw-r--r--src/corelib/codecs/qtextcodec.cpp1878
1 files changed, 1878 insertions, 0 deletions
diff --git a/src/corelib/codecs/qtextcodec.cpp b/src/corelib/codecs/qtextcodec.cpp
new file mode 100644
index 0000000000..3c3d39ed99
--- /dev/null
+++ b/src/corelib/codecs/qtextcodec.cpp
@@ -0,0 +1,1878 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qplatformdefs.h"
+#include "qtextcodec.h"
+#include "qtextcodec_p.h"
+
+#ifndef QT_NO_TEXTCODEC
+
+#include "qlist.h"
+#include "qfile.h"
+#ifndef QT_NO_LIBRARY
+# include "qcoreapplication.h"
+# include "qtextcodecplugin.h"
+# include "private/qfactoryloader_p.h"
+#endif
+#include "qstringlist.h"
+
+#ifdef Q_OS_UNIX
+# include "qiconvcodec_p.h"
+#endif
+
+#include "qutfcodec_p.h"
+#include "qsimplecodec_p.h"
+#include "qlatincodec_p.h"
+#ifndef QT_NO_CODECS
+# include "qtsciicodec_p.h"
+# include "qisciicodec_p.h"
+#if !defined(Q_OS_SYMBIAN) && !defined(Q_OS_INTEGRITY)
+# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
+// no iconv(3) support, must build all codecs into the library
+# include "../../plugins/codecs/cn/qgb18030codec.h"
+# include "../../plugins/codecs/jp/qeucjpcodec.h"
+# include "../../plugins/codecs/jp/qjiscodec.h"
+# include "../../plugins/codecs/jp/qsjiscodec.h"
+# include "../../plugins/codecs/kr/qeuckrcodec.h"
+# include "../../plugins/codecs/tw/qbig5codec.h"
+# endif // QT_NO_ICONV
+# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
+# include "qfontlaocodec_p.h"
+# include "../../plugins/codecs/jp/qfontjpcodec.h"
+# endif
+#endif // QT_NO_SYMBIAN
+#endif // QT_NO_CODECS
+#include "qlocale.h"
+#include "qmutex.h"
+#include "qhash.h"
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <locale.h>
+#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX) && !defined(Q_OS_OSF)
+#include <langinfo.h>
+#endif
+
+#if defined(Q_OS_WINCE)
+# define QT_NO_SETLOCALE
+#endif
+
+#ifdef Q_OS_SYMBIAN
+#include "qtextcodec_symbian.cpp"
+#endif
+
+
+// enabling this is not exception safe!
+// #define Q_DEBUG_TEXTCODEC
+
+QT_BEGIN_NAMESPACE
+
+#if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
+Q_GLOBAL_STATIC_WITH_ARGS(QFactoryLoader, loader,
+ (QTextCodecFactoryInterface_iid, QLatin1String("/codecs")))
+#endif
+
+//Cache for QTextCodec::codecForName and codecForMib.
+typedef QHash<QByteArray, QTextCodec *> QTextCodecCache;
+Q_GLOBAL_STATIC(QTextCodecCache, qTextCodecCache)
+
+
+static char qtolower(register char c)
+{ if (c >= 'A' && c <= 'Z') return c + 0x20; return c; }
+static bool qisalnum(register char c)
+{ return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
+
+static bool nameMatch(const QByteArray &name, const QByteArray &test)
+{
+ // if they're the same, return a perfect score
+ if (qstricmp(name, test) == 0)
+ return true;
+
+ const char *n = name.constData();
+ const char *h = test.constData();
+
+ // if the letters and numbers are the same, we have a match
+ while (*n != '\0') {
+ if (qisalnum(*n)) {
+ for (;;) {
+ if (*h == '\0')
+ return false;
+ if (qisalnum(*h))
+ break;
+ ++h;
+ }
+ if (qtolower(*n) != qtolower(*h))
+ return false;
+ ++h;
+ }
+ ++n;
+ }
+ while (*h && !qisalnum(*h))
+ ++h;
+ return (*h == '\0');
+}
+
+
+static QTextCodec *createForName(const QByteArray &name)
+{
+#if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
+ QFactoryLoader *l = loader();
+ QStringList keys = l->keys();
+ for (int i = 0; i < keys.size(); ++i) {
+ if (nameMatch(name, keys.at(i).toLatin1())) {
+ QString realName = keys.at(i);
+ if (QTextCodecFactoryInterface *factory
+ = qobject_cast<QTextCodecFactoryInterface*>(l->instance(realName))) {
+ return factory->create(realName);
+ }
+ }
+ }
+#else
+ Q_UNUSED(name);
+#endif
+ return 0;
+}
+
+static QTextCodec *createForMib(int mib)
+{
+#ifndef QT_NO_TEXTCODECPLUGIN
+ QString name = QLatin1String("MIB: ") + QString::number(mib);
+ if (QTextCodecFactoryInterface *factory
+ = qobject_cast<QTextCodecFactoryInterface*>(loader()->instance(name)))
+ return factory->create(name);
+#else
+ Q_UNUSED(mib);
+#endif
+ return 0;
+}
+
+static QList<QTextCodec*> *all = 0;
+#ifdef Q_DEBUG_TEXTCODEC
+static bool destroying_is_ok = false;
+#endif
+
+static QTextCodec *localeMapper = 0;
+QTextCodec *QTextCodec::cftr = 0;
+
+
+class QTextCodecCleanup
+{
+public:
+ ~QTextCodecCleanup();
+};
+
+/*
+ Deletes all the created codecs. This destructor is called just
+ before exiting to delete any QTextCodec objects that may be lying
+ around.
+*/
+QTextCodecCleanup::~QTextCodecCleanup()
+{
+ if (!all)
+ return;
+
+#ifdef Q_DEBUG_TEXTCODEC
+ destroying_is_ok = true;
+#endif
+
+ for (QList<QTextCodec *>::const_iterator it = all->constBegin()
+ ; it != all->constEnd(); ++it) {
+ delete *it;
+ }
+ delete all;
+ all = 0;
+ localeMapper = 0;
+
+#ifdef Q_DEBUG_TEXTCODEC
+ destroying_is_ok = false;
+#endif
+}
+
+Q_GLOBAL_STATIC(QTextCodecCleanup, createQTextCodecCleanup)
+
+bool QTextCodec::validCodecs()
+{
+#ifdef Q_OS_SYMBIAN
+ // If we don't have a trap handler, we're outside of the main() function,
+ // ie. in global constructors or destructors. Don't use codecs in this
+ // case as it would lead to crashes because we don't have a cleanup stack on Symbian
+ return (User::TrapHandler() != NULL);
+#else
+ return true;
+#endif
+}
+
+
+#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
+class QWindowsLocalCodec: public QTextCodec
+{
+public:
+ QWindowsLocalCodec();
+ ~QWindowsLocalCodec();
+
+ QString convertToUnicode(const char *, int, ConverterState *) const;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+ QString convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const;
+
+ QByteArray name() const;
+ int mibEnum() const;
+
+};
+
+QWindowsLocalCodec::QWindowsLocalCodec()
+{
+}
+
+QWindowsLocalCodec::~QWindowsLocalCodec()
+{
+}
+
+QString QWindowsLocalCodec::convertToUnicode(const char *chars, int length, ConverterState *state) const
+{
+ const char *mb = chars;
+ int mblen = length;
+
+ if (!mb || !mblen)
+ return QString();
+
+ const int wclen_auto = 4096;
+ wchar_t wc_auto[wclen_auto];
+ int wclen = wclen_auto;
+ wchar_t *wc = wc_auto;
+ int len;
+ QString sp;
+ bool prepend = false;
+ char state_data = 0;
+ int remainingChars = 0;
+
+ //save the current state information
+ if (state) {
+ state_data = (char)state->state_data[0];
+ remainingChars = state->remainingChars;
+ }
+
+ //convert the pending charcter (if available)
+ if (state && remainingChars) {
+ char prev[3] = {0};
+ prev[0] = state_data;
+ prev[1] = mb[0];
+ remainingChars = 0;
+ len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
+ prev, 2, wc, wclen);
+ if (len) {
+ prepend = true;
+ sp.append(QChar(wc[0]));
+ mb++;
+ mblen--;
+ wc[0] = 0;
+ }
+ }
+
+ while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
+ mb, mblen, wc, wclen))) {
+ int r = GetLastError();
+ if (r == ERROR_INSUFFICIENT_BUFFER) {
+ if (wc != wc_auto) {
+ qWarning("MultiByteToWideChar: Size changed");
+ break;
+ } else {
+ wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
+ mb, mblen, 0, 0);
+ wc = new wchar_t[wclen];
+ // and try again...
+ }
+ } else if (r == ERROR_NO_UNICODE_TRANSLATION) {
+ //find the last non NULL character
+ while (mblen > 1 && !(mb[mblen-1]))
+ mblen--;
+ //check whether, we hit an invalid character in the middle
+ if ((mblen <= 1) || (remainingChars && state_data))
+ return convertToUnicodeCharByChar(chars, length, state);
+ //Remove the last character and try again...
+ state_data = mb[mblen-1];
+ remainingChars = 1;
+ mblen--;
+ } else {
+ // Fail.
+ qWarning("MultiByteToWideChar: Cannot convert multibyte text");
+ break;
+ }
+ }
+ if (len <= 0)
+ return QString();
+ if (wc[len-1] == 0) // len - 1: we don't want terminator
+ --len;
+
+ //save the new state information
+ if (state) {
+ state->state_data[0] = (char)state_data;
+ state->remainingChars = remainingChars;
+ }
+ QString s((QChar*)wc, len);
+ if (wc != wc_auto)
+ delete [] wc;
+ if (prepend) {
+ return sp+s;
+ }
+ return s;
+}
+
+QString QWindowsLocalCodec::convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const
+{
+ if (!chars || !length)
+ return QString();
+
+ int copyLocation = 0;
+ int extra = 2;
+ if (state && state->remainingChars) {
+ copyLocation = state->remainingChars;
+ extra += copyLocation;
+ }
+ int newLength = length + extra;
+ char *mbcs = new char[newLength];
+ //ensure that we have a NULL terminated string
+ mbcs[newLength-1] = 0;
+ mbcs[newLength-2] = 0;
+ memcpy(&(mbcs[copyLocation]), chars, length);
+ if (copyLocation) {
+ //copy the last character from the state
+ mbcs[0] = (char)state->state_data[0];
+ state->remainingChars = 0;
+ }
+ const char *mb = mbcs;
+#ifndef Q_OS_WINCE
+ const char *next = 0;
+ QString s;
+ while((next = CharNextExA(CP_ACP, mb, 0)) != mb) {
+ wchar_t wc[2] ={0};
+ int charlength = next - mb;
+ int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
+ if (len>0) {
+ s.append(QChar(wc[0]));
+ } else {
+ int r = GetLastError();
+ //check if the character being dropped is the last character
+ if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) {
+ state->remainingChars = 1;
+ state->state_data[0] = (char)*mb;
+ }
+ }
+ mb = next;
+ }
+#else
+ QString s;
+ int size = mbstowcs(NULL, mb, length);
+ if (size < 0) {
+ Q_ASSERT("Error in CE TextCodec");
+ return QString();
+ }
+ wchar_t* ws = new wchar_t[size + 2];
+ ws[size +1] = 0;
+ ws[size] = 0;
+ size = mbstowcs(ws, mb, length);
+ for (int i=0; i< size; i++)
+ s.append(QChar(ws[i]));
+ delete [] ws;
+#endif
+ delete mbcs;
+ return s;
+}
+
+QByteArray QWindowsLocalCodec::convertFromUnicode(const QChar *ch, int uclen, ConverterState *) const
+{
+ if (!ch)
+ return QByteArray();
+ if (uclen == 0)
+ return QByteArray("");
+ BOOL used_def;
+ QByteArray mb(4096, 0);
+ int len;
+ while (!(len=WideCharToMultiByte(CP_ACP, 0, (const wchar_t*)ch, uclen,
+ mb.data(), mb.size()-1, 0, &used_def)))
+ {
+ int r = GetLastError();
+ if (r == ERROR_INSUFFICIENT_BUFFER) {
+ mb.resize(1+WideCharToMultiByte(CP_ACP, 0,
+ (const wchar_t*)ch, uclen,
+ 0, 0, 0, &used_def));
+ // and try again...
+ } else {
+#ifndef QT_NO_DEBUG
+ // Fail.
+ qWarning("WideCharToMultiByte: Cannot convert multibyte text (error %d): %s (UTF-8)",
+ r, QString(ch, uclen).toLocal8Bit().data());
+#endif
+ break;
+ }
+ }
+ mb.resize(len);
+ return mb;
+}
+
+
+QByteArray QWindowsLocalCodec::name() const
+{
+ return "System";
+}
+
+int QWindowsLocalCodec::mibEnum() const
+{
+ return 0;
+}
+
+#else
+
+/* locale names mostly copied from XFree86 */
+static const char * const iso8859_2locales[] = {
+ "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",
+ "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",
+ "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",
+ "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 };
+
+static const char * const iso8859_3locales[] = {
+ "eo", 0 };
+
+static const char * const iso8859_4locales[] = {
+ "ee", "ee_EE", 0 };
+
+static const char * const iso8859_5locales[] = {
+ "mk", "mk_MK", "sp", "sp_YU", 0 };
+
+static const char * const cp_1251locales[] = {
+ "be", "be_BY", "bg", "bg_BG", "bulgarian", 0 };
+
+static const char * const pt_154locales[] = {
+ "ba_RU", "ky", "ky_KG", "kk", "kk_KZ", 0 };
+
+static const char * const iso8859_6locales[] = {
+ "ar_AA", "ar_SA", "arabic", 0 };
+
+static const char * const iso8859_7locales[] = {
+ "el", "el_GR", "greek", 0 };
+
+static const char * const iso8859_8locales[] = {
+ "hebrew", "he", "he_IL", "iw", "iw_IL", 0 };
+
+static const char * const iso8859_9locales[] = {
+ "tr", "tr_TR", "turkish", 0 };
+
+static const char * const iso8859_13locales[] = {
+ "lt", "lt_LT", "lv", "lv_LV", 0 };
+
+static const char * const iso8859_15locales[] = {
+ "et", "et_EE",
+ // Euro countries
+ "br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE",
+ "es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR",
+ "fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR",
+ "nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE",
+ 0 };
+
+static const char * const koi8_ulocales[] = {
+ "uk", "uk_UA", "ru_UA", "ukrainian", 0 };
+
+static const char * const tis_620locales[] = {
+ "th", "th_TH", "thai", 0 };
+
+// static const char * const tcvnlocales[] = {
+// "vi", "vi_VN", 0 };
+
+static bool try_locale_list(const char * const locale[], const QByteArray &lang)
+{
+ int i;
+ for(i=0; locale[i] && lang != locale[i]; i++)
+ ;
+ return locale[i] != 0;
+}
+
+// For the probably_koi8_locales we have to look. the standard says
+// these are 8859-5, but almost all Russian users use KOI8-R and
+// incorrectly set $LANG to ru_RU. We'll check tolower() to see what
+// it thinks ru_RU means.
+
+// If you read the history, it seems that many Russians blame ISO and
+// Perestroika for the confusion.
+//
+// The real bug is that some programs break if the user specifies
+// ru_RU.KOI8-R.
+
+static const char * const probably_koi8_rlocales[] = {
+ "ru", "ru_SU", "ru_RU", "russian", 0 };
+
+static QTextCodec * ru_RU_hack(const char * i) {
+ QTextCodec * ru_RU_codec = 0;
+
+#if !defined(QT_NO_SETLOCALE)
+ QByteArray origlocale(setlocale(LC_CTYPE, i));
+#else
+ QByteArray origlocale(i);
+#endif
+ // unicode koi8r latin5 name
+ // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU
+ // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU
+ int latin5 = tolower(0xCE);
+ int koi8r = tolower(0xE0);
+ if (koi8r == 0xC0 && latin5 != 0xEE) {
+ ru_RU_codec = QTextCodec::codecForName("KOI8-R");
+ } else if (koi8r != 0xC0 && latin5 == 0xEE) {
+ ru_RU_codec = QTextCodec::codecForName("ISO 8859-5");
+ } else {
+ // something else again... let's assume... *throws dice*
+ ru_RU_codec = QTextCodec::codecForName("KOI8-R");
+ qWarning("QTextCodec: Using KOI8-R, probe failed (%02x %02x %s)",
+ koi8r, latin5, i);
+ }
+#if !defined(QT_NO_SETLOCALE)
+ setlocale(LC_CTYPE, origlocale);
+#endif
+
+ return ru_RU_codec;
+}
+
+#endif
+
+#if !defined(Q_OS_WIN32) && !defined(Q_OS_WINCE)
+static QTextCodec *checkForCodec(const QByteArray &name) {
+ QTextCodec *c = QTextCodec::codecForName(name);
+ if (!c) {
+ const int index = name.indexOf('@');
+ if (index != -1) {
+ c = QTextCodec::codecForName(name.left(index));
+ }
+ }
+ return c;
+}
+#endif
+
+/* the next two functions are implicitely thread safe,
+ as they are only called by setup() which uses a mutex.
+*/
+static void setupLocaleMapper()
+{
+#ifdef Q_OS_SYMBIAN
+ localeMapper = QSymbianTextCodec::localeMapper;
+ if (localeMapper)
+ return;
+#endif
+
+#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
+ localeMapper = QTextCodec::codecForName("System");
+#else
+
+#ifndef QT_NO_ICONV
+ localeMapper = QTextCodec::codecForName("System");
+#endif
+
+#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX) && !defined(Q_OS_OSF)
+ if (!localeMapper) {
+ char *charset = nl_langinfo (CODESET);
+ if (charset)
+ localeMapper = QTextCodec::codecForName(charset);
+ }
+#endif
+
+ if (!localeMapper) {
+ // Very poorly defined and followed standards causes lots of
+ // code to try to get all the cases... This logic is
+ // duplicated in QIconvCodec, so if you change it here, change
+ // it there too.
+
+ // Try to determine locale codeset from locale name assigned to
+ // LC_CTYPE category.
+
+ // First part is getting that locale name. First try setlocale() which
+ // definitely knows it, but since we cannot fully trust it, get ready
+ // to fall back to environment variables.
+#if !defined(QT_NO_SETLOCALE)
+ const QByteArray ctype = setlocale(LC_CTYPE, 0);
+#else
+ const QByteArray ctype;
+#endif
+
+ // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
+ // environment variables.
+ QByteArray lang = qgetenv("LC_ALL");
+ if (lang.isEmpty() || lang == "C") {
+ lang = qgetenv("LC_CTYPE");
+ }
+ if (lang.isEmpty() || lang == "C") {
+ lang = qgetenv("LANG");
+ }
+
+ // Now try these in order:
+ // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
+ // 2. CODESET from lang if it contains a .CODESET part
+ // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
+ // 4. locale (ditto)
+ // 5. check for "@euro"
+ // 6. guess locale from ctype unless ctype is "C"
+ // 7. guess locale from lang
+
+ // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
+ int indexOfDot = ctype.indexOf('.');
+ if (indexOfDot != -1)
+ localeMapper = checkForCodec( ctype.mid(indexOfDot + 1) );
+
+ // 2. CODESET from lang if it contains a .CODESET part
+ if (!localeMapper) {
+ indexOfDot = lang.indexOf('.');
+ if (indexOfDot != -1)
+ localeMapper = checkForCodec( lang.mid(indexOfDot + 1) );
+ }
+
+ // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
+ if (!localeMapper && !ctype.isEmpty() && ctype != "C")
+ localeMapper = checkForCodec(ctype);
+
+ // 4. locale (ditto)
+ if (!localeMapper && !lang.isEmpty())
+ localeMapper = checkForCodec(lang);
+
+ // 5. "@euro"
+ if ((!localeMapper && ctype.contains("@euro")) || lang.contains("@euro"))
+ localeMapper = checkForCodec("ISO 8859-15");
+
+ // 6. guess locale from ctype unless ctype is "C"
+ // 7. guess locale from lang
+ const QByteArray &try_by_name = (!ctype.isEmpty() && ctype != "C") ? lang : ctype;
+
+ // Now do the guessing.
+ if (!lang.isEmpty() && !localeMapper && !try_by_name.isEmpty()) {
+ if (try_locale_list(iso8859_15locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-15");
+ else if (try_locale_list(iso8859_2locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-2");
+ else if (try_locale_list(iso8859_3locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-3");
+ else if (try_locale_list(iso8859_4locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-4");
+ else if (try_locale_list(iso8859_5locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-5");
+ else if (try_locale_list(iso8859_6locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-6");
+ else if (try_locale_list(iso8859_7locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-7");
+ else if (try_locale_list(iso8859_8locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-8-I");
+ else if (try_locale_list(iso8859_9locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-9");
+ else if (try_locale_list(iso8859_13locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-13");
+ else if (try_locale_list(tis_620locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-11");
+ else if (try_locale_list(koi8_ulocales, lang))
+ localeMapper = QTextCodec::codecForName("KOI8-U");
+ else if (try_locale_list(cp_1251locales, lang))
+ localeMapper = QTextCodec::codecForName("CP 1251");
+ else if (try_locale_list(pt_154locales, lang))
+ localeMapper = QTextCodec::codecForName("PT 154");
+ else if (try_locale_list(probably_koi8_rlocales, lang))
+ localeMapper = ru_RU_hack(lang);
+ }
+
+ }
+
+ // If everything failed, we default to 8859-1
+ // We could perhaps default to 8859-15.
+ if (!localeMapper)
+ localeMapper = QTextCodec::codecForName("ISO 8859-1");
+#endif
+}
+
+#ifndef QT_NO_THREAD
+Q_GLOBAL_STATIC_WITH_ARGS(QMutex, textCodecsMutex, (QMutex::Recursive));
+#endif
+
+// textCodecsMutex need to be locked to enter this function
+static void setup()
+{
+ if (all)
+ return;
+
+#ifdef Q_OS_SYMBIAN
+ // If we don't have a trap handler, we're outside of the main() function,
+ // ie. in global constructors or destructors. Don't create codecs in this
+ // case as it would lead to crashes because of a missing cleanup stack on Symbian
+ if (User::TrapHandler() == NULL)
+ return;
+#endif
+
+#ifdef Q_DEBUG_TEXTCODEC
+ if (destroying_is_ok)
+ qWarning("QTextCodec: Creating new codec during codec cleanup");
+#endif
+ all = new QList<QTextCodec*>;
+ // create the cleanup object to cleanup all codecs on exit
+ (void) createQTextCodecCleanup();
+
+#ifndef QT_NO_CODECS
+ (void)new QTsciiCodec;
+ for (int i = 0; i < 9; ++i)
+ (void)new QIsciiCodec(i);
+
+ for (int i = 0; i < QSimpleTextCodec::numSimpleCodecs; ++i)
+ (void)new QSimpleTextCodec(i);
+
+#ifdef Q_OS_SYMBIAN
+ localeMapper = QSymbianTextCodec::init();
+#endif
+
+# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
+ // no font codecs when bootstrapping
+ (void)new QFontLaoCodec;
+# if defined(QT_NO_ICONV)
+ // no iconv(3) support, must build all codecs into the library
+ (void)new QFontGb2312Codec;
+ (void)new QFontGbkCodec;
+ (void)new QFontGb18030_0Codec;
+ (void)new QFontJis0208Codec;
+ (void)new QFontJis0201Codec;
+ (void)new QFontKsc5601Codec;
+ (void)new QFontBig5hkscsCodec;
+ (void)new QFontBig5Codec;
+# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
+# endif // Q_WS_X11
+
+
+#if !defined(Q_OS_SYMBIAN) && !defined(Q_OS_INTEGRITY)
+# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
+ // no asian codecs when bootstrapping, sorry
+ (void)new QGb18030Codec;
+ (void)new QGbkCodec;
+ (void)new QGb2312Codec;
+ (void)new QEucJpCodec;
+ (void)new QJisCodec;
+ (void)new QSjisCodec;
+ (void)new QEucKrCodec;
+ (void)new QCP949Codec;
+ (void)new QBig5Codec;
+ (void)new QBig5hkscsCodec;
+# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
+#endif //Q_OS_SYMBIAN
+#endif // QT_NO_CODECS
+
+#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
+ (void) new QWindowsLocalCodec;
+#endif // Q_OS_WIN32
+
+ (void)new QUtf16Codec;
+ (void)new QUtf16BECodec;
+ (void)new QUtf16LECodec;
+ (void)new QUtf32Codec;
+ (void)new QUtf32BECodec;
+ (void)new QUtf32LECodec;
+#ifndef Q_OS_SYMBIAN
+ (void)new QLatin15Codec;
+#endif
+ (void)new QLatin1Codec;
+ (void)new QUtf8Codec;
+
+#if !defined(Q_OS_SYMBIAN) && !defined(Q_OS_INTEGRITY)
+#if defined(Q_OS_UNIX) && !defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
+ // QIconvCodec depends on the UTF-16 codec, so it needs to be created last
+ (void) new QIconvCodec();
+#endif
+#endif
+
+ if (!localeMapper)
+ setupLocaleMapper();
+}
+
+/*!
+ \enum QTextCodec::ConversionFlag
+
+ \value DefaultConversion No flag is set.
+ \value ConvertInvalidToNull If this flag is set, each invalid input
+ character is output as a null character.
+ \value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any.
+
+ \omitvalue FreeFunction
+*/
+
+/*!
+ \fn QTextCodec::ConverterState::ConverterState(ConversionFlags flags)
+
+ Constructs a ConverterState object initialized with the given \a flags.
+*/
+
+/*!
+ Destroys the ConverterState object.
+*/
+QTextCodec::ConverterState::~ConverterState()
+{
+ if (flags & FreeFunction)
+ (QTextCodecUnalignedPointer::decode(state_data))(this);
+ else if (d)
+ qFree(d);
+}
+
+/*!
+ \class QTextCodec
+ \brief The QTextCodec class provides conversions between text encodings.
+ \reentrant
+ \ingroup i18n
+
+ Qt uses Unicode to store, draw and manipulate strings. In many
+ situations you may wish to deal with data that uses a different
+ encoding. For example, most Japanese documents are still stored
+ in Shift-JIS or ISO 2022-JP, while Russian users often have their
+ documents in KOI8-R or Windows-1251.
+
+ Qt provides a set of QTextCodec classes to help with converting
+ non-Unicode formats to and from Unicode. You can also create your
+ own codec classes.
+
+ The supported encodings are:
+
+ \list
+ \o Apple Roman
+ \o \l{Big5 Text Codec}{Big5}
+ \o \l{Big5-HKSCS Text Codec}{Big5-HKSCS}
+ \o CP949
+ \o \l{EUC-JP Text Codec}{EUC-JP}
+ \o \l{EUC-KR Text Codec}{EUC-KR}
+ \o \l{GBK Text Codec}{GB18030-0}
+ \o IBM 850
+ \o IBM 866
+ \o IBM 874
+ \o \l{ISO 2022-JP (JIS) Text Codec}{ISO 2022-JP}
+ \o ISO 8859-1 to 10
+ \o ISO 8859-13 to 16
+ \o Iscii-Bng, Dev, Gjr, Knd, Mlm, Ori, Pnj, Tlg, and Tml
+ \o JIS X 0201
+ \o JIS X 0208
+ \o KOI8-R
+ \o KOI8-U
+ \o MuleLao-1
+ \o ROMAN8
+ \o \l{Shift-JIS Text Codec}{Shift-JIS}
+ \o TIS-620
+ \o \l{TSCII Text Codec}{TSCII}
+ \o UTF-8
+ \o UTF-16
+ \o UTF-16BE
+ \o UTF-16LE
+ \o UTF-32
+ \o UTF-32BE
+ \o UTF-32LE
+ \o Windows-1250 to 1258
+ \o WINSAMI2
+ \endlist
+
+ QTextCodecs can be used as follows to convert some locally encoded
+ string to Unicode. Suppose you have some string encoded in Russian
+ KOI8-R encoding, and want to convert it to Unicode. The simple way
+ to do it is like this:
+
+ \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 0
+
+ After this, \c string holds the text converted to Unicode.
+ Converting a string from Unicode to the local encoding is just as
+ easy:
+
+ \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 1
+
+ To read or write files in various encodings, use QTextStream and
+ its \l{QTextStream::setCodec()}{setCodec()} function. See the
+ \l{tools/codecs}{Codecs} example for an application of QTextCodec
+ to file I/O.
+
+ Some care must be taken when trying to convert the data in chunks,
+ for example, when receiving it over a network. In such cases it is
+ possible that a multi-byte character will be split over two
+ chunks. At best this might result in the loss of a character and
+ at worst cause the entire conversion to fail.
+
+ The approach to use in these situations is to create a QTextDecoder
+ object for the codec and use this QTextDecoder for the whole
+ decoding process, as shown below:
+
+ \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 2
+
+ The QTextDecoder object maintains state between chunks and therefore
+ works correctly even if a multi-byte character is split between
+ chunks.
+
+ \section1 Creating Your Own Codec Class
+
+ Support for new text encodings can be added to Qt by creating
+ QTextCodec subclasses.
+
+ The pure virtual functions describe the encoder to the system and
+ the coder is used as required in the different text file formats
+ supported by QTextStream, and under X11, for the locale-specific
+ character input and output.
+
+ To add support for another encoding to Qt, make a subclass of
+ QTextCodec and implement the functions listed in the table below.
+
+ \table
+ \header \o Function \o Description
+
+ \row \o name()
+ \o Returns the official name for the encoding. If the
+ encoding is listed in the
+ \l{IANA character-sets encoding file}, the name
+ should be the preferred MIME name for the encoding.
+
+ \row \o aliases()
+ \o Returns a list of alternative names for the encoding.
+ QTextCodec provides a default implementation that returns
+ an empty list. For example, "ISO-8859-1" has "latin1",
+ "CP819", "IBM819", and "iso-ir-100" as aliases.
+
+ \row \o mibEnum()
+ \o Return the MIB enum for the encoding if it is listed in
+ the \l{IANA character-sets encoding file}.
+
+ \row \o convertToUnicode()
+ \o Converts an 8-bit character string to Unicode.
+
+ \row \o convertFromUnicode()
+ \o Converts a Unicode string to an 8-bit character string.
+ \endtable
+
+ You may find it more convenient to make your codec class
+ available as a plugin; see \l{How to Create Qt Plugins} for
+ details.
+
+ \sa QTextStream, QTextDecoder, QTextEncoder, {Codecs Example}
+*/
+
+/*!
+ Constructs a QTextCodec, and gives it the highest precedence. The
+ QTextCodec should always be constructed on the heap (i.e. with \c
+ new). Qt takes ownership and will delete it when the application
+ terminates.
+*/
+QTextCodec::QTextCodec()
+{
+#ifndef QT_NO_THREAD
+ QMutexLocker locker(textCodecsMutex());
+#endif
+ setup();
+ all->prepend(this);
+}
+
+
+/*!
+ \nonreentrant
+
+ Destroys the QTextCodec. Note that you should not delete codecs
+ yourself: once created they become Qt's responsibility.
+*/
+QTextCodec::~QTextCodec()
+{
+#ifdef Q_DEBUG_TEXTCODEC
+ if (!destroying_is_ok)
+ qWarning("QTextCodec::~QTextCodec: Called by application");
+#endif
+ if (all) {
+#ifndef QT_NO_THREAD
+ QMutexLocker locker(textCodecsMutex());
+#endif
+ all->removeAll(this);
+ QTextCodecCache *cache = qTextCodecCache();
+ if (cache)
+ cache->clear();
+ }
+}
+
+/*!
+ \fn QTextCodec *QTextCodec::codecForName(const char *name)
+
+ Searches all installed QTextCodec objects and returns the one
+ which best matches \a name; the match is case-insensitive. Returns
+ 0 if no codec matching the name \a name could be found.
+*/
+
+/*!
+ Searches all installed QTextCodec objects and returns the one
+ which best matches \a name; the match is case-insensitive. Returns
+ 0 if no codec matching the name \a name could be found.
+*/
+QTextCodec *QTextCodec::codecForName(const QByteArray &name)
+{
+ if (name.isEmpty())
+ return 0;
+
+#ifndef QT_NO_THREAD
+ QMutexLocker locker(textCodecsMutex());
+#endif
+ setup();
+
+ if (!validCodecs())
+ return 0;
+
+ QTextCodecCache *cache = qTextCodecCache();
+ QTextCodec *codec;
+ if (cache) {
+ codec = cache->value(name);
+ if (codec)
+ return codec;
+ }
+
+ for (int i = 0; i < all->size(); ++i) {
+ QTextCodec *cursor = all->at(i);
+ if (nameMatch(cursor->name(), name)) {
+ if (cache)
+ cache->insert(name, cursor);
+ return cursor;
+ }
+ QList<QByteArray> aliases = cursor->aliases();
+ for (int y = 0; y < aliases.size(); ++y)
+ if (nameMatch(aliases.at(y), name)) {
+ if (cache)
+ cache->insert(name, cursor);
+ return cursor;
+ }
+ }
+
+ codec = createForName(name);
+ if (codec && cache)
+ cache->insert(name, codec);
+ return codec;
+}
+
+
+/*!
+ Returns the QTextCodec which matches the \link
+ QTextCodec::mibEnum() MIBenum\endlink \a mib.
+*/
+QTextCodec* QTextCodec::codecForMib(int mib)
+{
+#ifndef QT_NO_THREAD
+ QMutexLocker locker(textCodecsMutex());
+#endif
+ setup();
+
+ if (!validCodecs())
+ return 0;
+
+ QByteArray key = "MIB: " + QByteArray::number(mib);
+ QTextCodecCache *cache = qTextCodecCache();
+ QTextCodec *codec;
+ if (cache) {
+ codec = cache->value(key);
+ if (codec)
+ return codec;
+ }
+
+ QList<QTextCodec*>::ConstIterator i;
+ for (int i = 0; i < all->size(); ++i) {
+ QTextCodec *cursor = all->at(i);
+ if (cursor->mibEnum() == mib) {
+ if (cache)
+ cache->insert(key, cursor);
+ return cursor;
+ }
+ }
+
+ codec = createForMib(mib);
+
+ // Qt 3 used 1000 (mib for UCS2) as its identifier for the utf16 codec. Map
+ // this correctly for compatibility.
+ if (!codec && mib == 1000)
+ return codecForMib(1015);
+
+ if (codec && cache)
+ cache->insert(key, codec);
+ return codec;
+}
+
+/*!
+ Returns the list of all available codecs, by name. Call
+ QTextCodec::codecForName() to obtain the QTextCodec for the name.
+
+ The list may contain many mentions of the same codec
+ if the codec has aliases.
+
+ \sa availableMibs(), name(), aliases()
+*/
+QList<QByteArray> QTextCodec::availableCodecs()
+{
+#ifndef QT_NO_THREAD
+ QMutexLocker locker(textCodecsMutex());
+#endif
+ setup();
+
+ QList<QByteArray> codecs;
+
+ if (!validCodecs())
+ return codecs;
+
+ for (int i = 0; i < all->size(); ++i) {
+ codecs += all->at(i)->name();
+ codecs += all->at(i)->aliases();
+ }
+
+#ifndef QT_NO_THREAD
+ locker.unlock();
+#endif
+
+#if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
+ QFactoryLoader *l = loader();
+ QStringList keys = l->keys();
+ for (int i = 0; i < keys.size(); ++i) {
+ if (!keys.at(i).startsWith(QLatin1String("MIB: "))) {
+ QByteArray name = keys.at(i).toLatin1();
+ if (!codecs.contains(name))
+ codecs += name;
+ }
+ }
+#endif
+
+ return codecs;
+}
+
+/*!
+ Returns the list of MIBs for all available codecs. Call
+ QTextCodec::codecForMib() to obtain the QTextCodec for the MIB.
+
+ \sa availableCodecs(), mibEnum()
+*/
+QList<int> QTextCodec::availableMibs()
+{
+#ifndef QT_NO_THREAD
+ QMutexLocker locker(textCodecsMutex());
+#endif
+ setup();
+
+ QList<int> codecs;
+
+ if (!validCodecs())
+ return codecs;
+
+ for (int i = 0; i < all->size(); ++i)
+ codecs += all->at(i)->mibEnum();
+
+#ifndef QT_NO_THREAD
+ locker.unlock();
+#endif
+
+#if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
+ QFactoryLoader *l = loader();
+ QStringList keys = l->keys();
+ for (int i = 0; i < keys.size(); ++i) {
+ if (keys.at(i).startsWith(QLatin1String("MIB: "))) {
+ int mib = keys.at(i).mid(5).toInt();
+ if (!codecs.contains(mib))
+ codecs += mib;
+ }
+ }
+#endif
+
+ return codecs;
+}
+
+/*!
+ Set the codec to \a c; this will be returned by
+ codecForLocale(). If \a c is a null pointer, the codec is reset to
+ the default.
+
+ This might be needed for some applications that want to use their
+ own mechanism for setting the locale.
+
+ \sa codecForLocale()
+*/
+void QTextCodec::setCodecForLocale(QTextCodec *c)
+{
+#ifndef QT_NO_THREAD
+ QMutexLocker locker(textCodecsMutex());
+#endif
+ localeMapper = c;
+ if (!localeMapper)
+ setupLocaleMapper();
+}
+
+/*!
+ Returns a pointer to the codec most suitable for this locale.
+
+ On Windows, the codec will be based on a system locale. On Unix
+ systems, starting with Qt 4.2, the codec will be using the \e
+ iconv library. Note that in both cases the codec's name will be
+ "System".
+*/
+
+QTextCodec* QTextCodec::codecForLocale()
+{
+ if (!validCodecs())
+ return 0;
+
+ if (localeMapper)
+ return localeMapper;
+
+#ifndef QT_NO_THREAD
+ QMutexLocker locker(textCodecsMutex());
+#endif
+ setup();
+
+ return localeMapper;
+}
+
+
+/*!
+ \fn QByteArray QTextCodec::name() const
+
+ QTextCodec subclasses must reimplement this function. It returns
+ the name of the encoding supported by the subclass.
+
+ If the codec is registered as a character set in the
+ \l{IANA character-sets encoding file} this method should
+ return the preferred mime name for the codec if defined,
+ otherwise its name.
+*/
+
+/*!
+ \fn int QTextCodec::mibEnum() const
+
+ Subclasses of QTextCodec must reimplement this function. It
+ returns the MIBenum (see \l{IANA character-sets encoding file}
+ for more information). It is important that each QTextCodec
+ subclass returns the correct unique value for this function.
+*/
+
+/*!
+ Subclasses can return a number of aliases for the codec in question.
+
+ Standard aliases for codecs can be found in the
+ \l{IANA character-sets encoding file}.
+*/
+QList<QByteArray> QTextCodec::aliases() const
+{
+ return QList<QByteArray>();
+}
+
+/*!
+ \fn QString QTextCodec::convertToUnicode(const char *chars, int len,
+ ConverterState *state) const
+
+ QTextCodec subclasses must reimplement this function.
+
+ Converts the first \a len characters of \a chars from the
+ encoding of the subclass to Unicode, and returns the result in a
+ QString.
+
+ \a state can be 0, in which case the conversion is stateless and
+ default conversion rules should be used. If state is not 0, the
+ codec should save the state after the conversion in \a state, and
+ adjust the remainingChars and invalidChars members of the struct.
+*/
+
+/*!
+ \fn QByteArray QTextCodec::convertFromUnicode(const QChar *input, int number,
+ ConverterState *state) const
+
+ QTextCodec subclasses must reimplement this function.
+
+ Converts the first \a number of characters from the \a input array
+ from Unicode to the encoding of the subclass, and returns the result
+ in a QByteArray.
+
+ \a state can be 0 in which case the conversion is stateless and
+ default conversion rules should be used. If state is not 0, the
+ codec should save the state after the conversion in \a state, and
+ adjust the remainingChars and invalidChars members of the struct.
+*/
+
+/*!
+ Creates a QTextDecoder which stores enough state to decode chunks
+ of \c{char *} data to create chunks of Unicode data.
+
+ The caller is responsible for deleting the returned object.
+*/
+QTextDecoder* QTextCodec::makeDecoder() const
+{
+ return new QTextDecoder(this);
+}
+
+/*!
+ Creates a QTextDecoder with a specified \a flags to decode chunks
+ of \c{char *} data to create chunks of Unicode data.
+
+ The caller is responsible for deleting the returned object.
+
+ \since 4.7
+*/
+QTextDecoder* QTextCodec::makeDecoder(QTextCodec::ConversionFlags flags) const
+{
+ return new QTextDecoder(this, flags);
+}
+
+
+/*!
+ Creates a QTextEncoder which stores enough state to encode chunks
+ of Unicode data as \c{char *} data.
+
+ The caller is responsible for deleting the returned object.
+*/
+QTextEncoder* QTextCodec::makeEncoder() const
+{
+ return new QTextEncoder(this);
+}
+
+/*!
+ Creates a QTextEncoder with a specified \a flags to encode chunks
+ of Unicode data as \c{char *} data.
+
+ The caller is responsible for deleting the returned object.
+
+ \since 4.7
+*/
+QTextEncoder* QTextCodec::makeEncoder(QTextCodec::ConversionFlags flags) const
+{
+ return new QTextEncoder(this, flags);
+}
+
+/*!
+ \fn QByteArray QTextCodec::fromUnicode(const QChar *input, int number,
+ ConverterState *state) const
+
+ Converts the first \a number of characters from the \a input array
+ from Unicode to the encoding of this codec, and returns the result
+ in a QByteArray.
+
+ The \a state of the convertor used is updated.
+*/
+
+/*!
+ Converts \a str from Unicode to the encoding of this codec, and
+ returns the result in a QByteArray.
+*/
+QByteArray QTextCodec::fromUnicode(const QString& str) const
+{
+ return convertFromUnicode(str.constData(), str.length(), 0);
+}
+
+/*!
+ \fn QString QTextCodec::toUnicode(const char *input, int size,
+ ConverterState *state) const
+
+ Converts the first \a size characters from the \a input from the
+ encoding of this codec to Unicode, and returns the result in a
+ QString.
+
+ The \a state of the convertor used is updated.
+*/
+
+/*!
+ Converts \a a from the encoding of this codec to Unicode, and
+ returns the result in a QString.
+*/
+QString QTextCodec::toUnicode(const QByteArray& a) const
+{
+ return convertToUnicode(a.constData(), a.length(), 0);
+}
+
+/*!
+ Returns true if the Unicode character \a ch can be fully encoded
+ with this codec; otherwise returns false.
+*/
+bool QTextCodec::canEncode(QChar ch) const
+{
+ ConverterState state;
+ state.flags = ConvertInvalidToNull;
+ convertFromUnicode(&ch, 1, &state);
+ return (state.invalidChars == 0);
+}
+
+/*!
+ \overload
+
+ \a s contains the string being tested for encode-ability.
+*/
+bool QTextCodec::canEncode(const QString& s) const
+{
+ ConverterState state;
+ state.flags = ConvertInvalidToNull;
+ convertFromUnicode(s.constData(), s.length(), &state);
+ return (state.invalidChars == 0);
+}
+
+#ifdef QT3_SUPPORT
+/*!
+ Returns a string representing the current language and
+ sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil.
+
+ \sa QLocale
+*/
+const char *QTextCodec::locale()
+{
+ static char locale[6];
+ QByteArray l = QLocale::system().name().toLatin1();
+ int len = qMin(l.length(), 5);
+ memcpy(locale, l.constData(), len);
+ locale[len] = '\0';
+
+ return locale;
+}
+
+/*!
+ \overload
+*/
+
+QByteArray QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const
+{
+ QByteArray result = convertFromUnicode(uc.constData(), lenInOut, 0);
+ lenInOut = result.length();
+ return result;
+}
+
+/*!
+ \overload
+
+ \a a contains the source characters; \a len contains the number of
+ characters in \a a to use.
+*/
+QString QTextCodec::toUnicode(const QByteArray& a, int len) const
+{
+ len = qMin(a.size(), len);
+ return convertToUnicode(a.constData(), len, 0);
+}
+#endif
+
+/*!
+ \overload
+
+ \a chars contains the source characters.
+*/
+QString QTextCodec::toUnicode(const char *chars) const
+{
+ int len = qstrlen(chars);
+ return convertToUnicode(chars, len, 0);
+}
+
+
+/*!
+ \class QTextEncoder
+ \brief The QTextEncoder class provides a state-based encoder.
+ \reentrant
+ \ingroup i18n
+
+ A text encoder converts text from Unicode into an encoded text format
+ using a specific codec.
+
+ The encoder converts Unicode into another format, remembering any
+ state that is required between calls.
+
+ \sa QTextCodec::makeEncoder(), QTextDecoder
+*/
+
+/*!
+ \fn QTextEncoder::QTextEncoder(const QTextCodec *codec)
+
+ Constructs a text encoder for the given \a codec.
+*/
+
+/*!
+ Constructs a text encoder for the given \a codec and conversion \a flags.
+
+ \since 4.7
+*/
+QTextEncoder::QTextEncoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags)
+ : c(codec), state()
+{
+ state.flags = flags;
+}
+
+/*!
+ Destroys the encoder.
+*/
+QTextEncoder::~QTextEncoder()
+{
+}
+
+/*! \internal
+ \since 4.5
+ Determines whether the eecoder encountered a failure while decoding the input. If
+ an error was encountered, the produced result is undefined, and gets converted as according
+ to the conversion flags.
+ */
+bool QTextEncoder::hasFailure() const
+{
+ return state.invalidChars != 0;
+}
+
+/*!
+ Converts the Unicode string \a str into an encoded QByteArray.
+*/
+QByteArray QTextEncoder::fromUnicode(const QString& str)
+{
+ QByteArray result = c->fromUnicode(str.constData(), str.length(), &state);
+ return result;
+}
+
+/*!
+ \overload
+
+ Converts \a len characters (not bytes) from \a uc, and returns the
+ result in a QByteArray.
+*/
+QByteArray QTextEncoder::fromUnicode(const QChar *uc, int len)
+{
+ QByteArray result = c->fromUnicode(uc, len, &state);
+ return result;
+}
+
+#ifdef QT3_SUPPORT
+/*!
+ \overload
+
+ Converts \a lenInOut characters (not bytes) from \a uc, and returns the
+ result in a QByteArray. The number of characters read is returned in
+ the \a lenInOut parameter.
+*/
+QByteArray QTextEncoder::fromUnicode(const QString& uc, int& lenInOut)
+{
+ QByteArray result = c->fromUnicode(uc.constData(), lenInOut, &state);
+ lenInOut = result.length();
+ return result;
+}
+#endif
+
+/*!
+ \class QTextDecoder
+ \brief The QTextDecoder class provides a state-based decoder.
+ \reentrant
+ \ingroup i18n
+
+ A text decoder converts text from an encoded text format into Unicode
+ using a specific codec.
+
+ The decoder converts text in this format into Unicode, remembering any
+ state that is required between calls.
+
+ \sa QTextCodec::makeDecoder(), QTextEncoder
+*/
+
+/*!
+ \fn QTextDecoder::QTextDecoder(const QTextCodec *codec)
+
+ Constructs a text decoder for the given \a codec.
+*/
+
+/*!
+ Constructs a text decoder for the given \a codec and conversion \a flags.
+
+ \since 4.7
+*/
+
+QTextDecoder::QTextDecoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags)
+ : c(codec), state()
+{
+ state.flags = flags;
+}
+
+/*!
+ Destroys the decoder.
+*/
+QTextDecoder::~QTextDecoder()
+{
+}
+
+/*!
+ \fn QString QTextDecoder::toUnicode(const char *chars, int len)
+
+ Converts the first \a len bytes in \a chars to Unicode, returning
+ the result.
+
+ If not all characters are used (e.g. if only part of a multi-byte
+ encoding is at the end of the characters), the decoder remembers
+ enough state to continue with the next call to this function.
+*/
+QString QTextDecoder::toUnicode(const char *chars, int len)
+{
+ return c->toUnicode(chars, len, &state);
+}
+
+
+/*! \overload
+
+ The converted string is returned in \a target.
+ */
+void QTextDecoder::toUnicode(QString *target, const char *chars, int len)
+{
+ Q_ASSERT(target);
+ switch (c->mibEnum()) {
+ case 106: // utf8
+ static_cast<const QUtf8Codec*>(c)->convertToUnicode(target, chars, len, &state);
+ break;
+ case 4: { // latin1
+ target->resize(len);
+ ushort *data = (ushort*)target->data();
+ for (int i = len; i >=0; --i)
+ data[i] = (uchar) chars[i];
+ } break;
+ default:
+ *target = c->toUnicode(chars, len, &state);
+ }
+}
+
+
+/*!
+ \overload
+
+ Converts the bytes in the byte array specified by \a ba to Unicode
+ and returns the result.
+*/
+QString QTextDecoder::toUnicode(const QByteArray &ba)
+{
+ return c->toUnicode(ba.constData(), ba.length(), &state);
+}
+
+
+/*!
+ \fn QTextCodec* QTextCodec::codecForTr()
+
+ Returns the codec used by QObject::tr() on its argument. If this
+ function returns 0 (the default), tr() assumes Latin-1.
+
+ \sa setCodecForTr()
+*/
+
+/*!
+ \fn void QTextCodec::setCodecForTr(QTextCodec *c)
+ \nonreentrant
+
+ Sets the codec used by QObject::tr() on its argument to \a c. If
+ \a c is 0 (the default), tr() assumes Latin-1.
+
+ If the literal quoted text in the program is not in the Latin-1
+ encoding, this function can be used to set the appropriate
+ encoding. For example, software developed by Korean programmers
+ might use eucKR for all the text in the program, in which case the
+ main() function might look like this:
+
+ \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 3
+
+ Note that this is not the way to select the encoding that the \e
+ user has chosen. For example, to convert an application containing
+ literal English strings to Korean, all that is needed is for the
+ English strings to be passed through tr() and for translation
+ files to be loaded. For details of internationalization, see
+ \l{Internationalization with Qt}.
+
+ \sa codecForTr(), setCodecForCStrings()
+*/
+
+
+/*!
+ \fn QTextCodec* QTextCodec::codecForCStrings()
+
+ Returns the codec used by QString to convert to and from \c{const
+ char *} and QByteArrays. If this function returns 0 (the default),
+ QString assumes Latin-1.
+
+ \sa setCodecForCStrings()
+*/
+
+/*!
+ \fn void QTextCodec::setCodecForCStrings(QTextCodec *codec)
+ \nonreentrant
+
+ Sets the codec used by QString to convert to and from \c{const
+ char *} and QByteArrays. If the \a codec is 0 (the default),
+ QString assumes Latin-1.
+
+ \warning Some codecs do not preserve the characters in the ASCII
+ range (0x00 to 0x7F). For example, the Japanese Shift-JIS
+ encoding maps the backslash character (0x5A) to the Yen
+ character. To avoid undesirable side-effects, we recommend
+ avoiding such codecs with setCodecsForCString().
+
+ \sa codecForCStrings(), setCodecForTr()
+*/
+
+/*!
+ \since 4.4
+
+ Tries to detect the encoding of the provided snippet of HTML in
+ the given byte array, \a ba, by checking the BOM (Byte Order Mark)
+ and the content-type meta header and returns a QTextCodec instance
+ that is capable of decoding the html to unicode. If the codec
+ cannot be detected from the content provided, \a defaultCodec is
+ returned.
+
+ \sa codecForUtfText()
+*/
+QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec)
+{
+ // determine charset
+ int pos;
+ QTextCodec *c = 0;
+
+ c = QTextCodec::codecForUtfText(ba, c);
+ if (!c) {
+ QByteArray header = ba.left(512).toLower();
+ if ((pos = header.indexOf("http-equiv=")) != -1) {
+ if ((pos = header.lastIndexOf("meta ", pos)) != -1) {
+ pos = header.indexOf("charset=", pos) + int(strlen("charset="));
+ if (pos != -1) {
+ int pos2 = header.indexOf('\"', pos+1);
+ QByteArray cs = header.mid(pos, pos2-pos);
+ // qDebug("found charset: %s", cs.data());
+ c = QTextCodec::codecForName(cs);
+ }
+ }
+ }
+ }
+ if (!c)
+ c = defaultCodec;
+
+ return c;
+}
+
+/*!
+ \overload
+
+ Tries to detect the encoding of the provided snippet of HTML in
+ the given byte array, \a ba, by checking the BOM (Byte Order Mark)
+ and the content-type meta header and returns a QTextCodec instance
+ that is capable of decoding the html to unicode. If the codec cannot
+ be detected, this overload returns a Latin-1 QTextCodec.
+*/
+QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba)
+{
+ return codecForHtml(ba, QTextCodec::codecForMib(/*Latin 1*/ 4));
+}
+
+/*!
+ \since 4.6
+
+ Tries to detect the encoding of the provided snippet \a ba by
+ using the BOM (Byte Order Mark) and returns a QTextCodec instance
+ that is capable of decoding the text to unicode. If the codec
+ cannot be detected from the content provided, \a defaultCodec is
+ returned.
+
+ \sa codecForHtml()
+*/
+QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec)
+{
+ const int arraySize = ba.size();
+
+ if (arraySize > 3) {
+ if ((uchar)ba[0] == 0x00
+ && (uchar)ba[1] == 0x00
+ && (uchar)ba[2] == 0xFE
+ && (uchar)ba[3] == 0xFF)
+ return QTextCodec::codecForMib(1018); // utf-32 be
+ else if ((uchar)ba[0] == 0xFF
+ && (uchar)ba[1] == 0xFE
+ && (uchar)ba[2] == 0x00
+ && (uchar)ba[3] == 0x00)
+ return QTextCodec::codecForMib(1019); // utf-32 le
+ }
+
+ if (arraySize < 2)
+ return defaultCodec;
+ if ((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff)
+ return QTextCodec::codecForMib(1013); // utf16 be
+ else if ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe)
+ return QTextCodec::codecForMib(1014); // utf16 le
+
+ if (arraySize < 3)
+ return defaultCodec;
+ if ((uchar)ba[0] == 0xef
+ && (uchar)ba[1] == 0xbb
+ && (uchar)ba[2] == 0xbf)
+ return QTextCodec::codecForMib(106); // utf-8
+
+ return defaultCodec;
+}
+
+/*!
+ \overload
+
+ Tries to detect the encoding of the provided snippet \a ba by
+ using the BOM (Byte Order Mark) and returns a QTextCodec instance
+ that is capable of decoding the text to unicode. If the codec
+ cannot be detected, this overload returns a Latin-1 QTextCodec.
+
+ \sa codecForHtml()
+*/
+QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba)
+{
+ return codecForUtfText(ba, QTextCodec::codecForMib(/*Latin 1*/ 4));
+}
+
+
+/*! \internal
+ \since 4.3
+ Determines whether the decoder encountered a failure while decoding the input. If
+ an error was encountered, the produced result is undefined, and gets converted as according
+ to the conversion flags.
+ */
+bool QTextDecoder::hasFailure() const
+{
+ return state.invalidChars != 0;
+}
+
+/*!
+ \fn QTextCodec *QTextCodec::codecForContent(const char *str, int size)
+
+ This functionality is no longer provided by Qt. This
+ compatibility function always returns a null pointer.
+*/
+
+/*!
+ \fn QTextCodec *QTextCodec::codecForName(const char *hint, int accuracy)
+
+ Use the codecForName(const QByteArray &) overload instead.
+*/
+
+/*!
+ \fn QTextCodec *QTextCodec::codecForIndex(int i)
+
+ Use availableCodecs() or availableMibs() instead and iterate
+ through the resulting list.
+*/
+
+
+/*!
+ \fn QByteArray QTextCodec::mimeName() const
+
+ Use name() instead.
+*/
+
+QT_END_NAMESPACE
+
+#endif // QT_NO_TEXTCODEC