summaryrefslogtreecommitdiffstats
path: root/src/corelib/codecs
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@nokia.com>2009-03-23 10:18:55 +0100
committerSimon Hausmann <simon.hausmann@nokia.com>2009-03-23 10:18:55 +0100
commite5fcad302d86d316390c6b0f62759a067313e8a9 (patch)
treec2afbf6f1066b6ce261f14341cf6d310e5595bc1 /src/corelib/codecs
Long live Qt 4.5!
Diffstat (limited to 'src/corelib/codecs')
-rw-r--r--src/corelib/codecs/codecs.pri53
-rw-r--r--src/corelib/codecs/qfontlaocodec.cpp124
-rw-r--r--src/corelib/codecs/qfontlaocodec_p.h78
-rw-r--r--src/corelib/codecs/qiconvcodec.cpp536
-rw-r--r--src/corelib/codecs/qiconvcodec_p.h104
-rw-r--r--src/corelib/codecs/qisciicodec.cpp288
-rw-r--r--src/corelib/codecs/qisciicodec_p.h81
-rw-r--r--src/corelib/codecs/qlatincodec.cpp246
-rw-r--r--src/corelib/codecs/qlatincodec_p.h94
-rw-r--r--src/corelib/codecs/qsimplecodec.cpp733
-rw-r--r--src/corelib/codecs/qsimplecodec_p.h87
-rw-r--r--src/corelib/codecs/qtextcodec.cpp1598
-rw-r--r--src/corelib/codecs/qtextcodec.h189
-rw-r--r--src/corelib/codecs/qtextcodec_p.h84
-rw-r--r--src/corelib/codecs/qtextcodecplugin.cpp161
-rw-r--r--src/corelib/codecs/qtextcodecplugin.h96
-rw-r--r--src/corelib/codecs/qtsciicodec.cpp500
-rw-r--r--src/corelib/codecs/qtsciicodec_p.h106
-rw-r--r--src/corelib/codecs/qutfcodec.cpp634
-rw-r--r--src/corelib/codecs/qutfcodec_p.h155
20 files changed, 5947 insertions, 0 deletions
diff --git a/src/corelib/codecs/codecs.pri b/src/corelib/codecs/codecs.pri
new file mode 100644
index 0000000000..2e247e5c5a
--- /dev/null
+++ b/src/corelib/codecs/codecs.pri
@@ -0,0 +1,53 @@
+# Qt core library codecs module
+
+HEADERS += \
+ codecs/qisciicodec_p.h \
+ codecs/qlatincodec_p.h \
+ codecs/qsimplecodec_p.h \
+ codecs/qtextcodec.h \
+ codecs/qtsciicodec_p.h \
+ codecs/qutfcodec_p.h \
+ codecs/qtextcodecplugin.h
+
+SOURCES += \
+ codecs/qisciicodec.cpp \
+ codecs/qlatincodec.cpp \
+ codecs/qsimplecodec.cpp \
+ codecs/qtextcodec.cpp \
+ codecs/qtsciicodec.cpp \
+ codecs/qutfcodec.cpp \
+ codecs/qtextcodecplugin.cpp
+
+unix {
+ SOURCES += codecs/qfontlaocodec.cpp
+
+ contains(QT_CONFIG,iconv) {
+ HEADERS += codecs/qiconvcodec_p.h
+ SOURCES += codecs/qiconvcodec.cpp
+ } else:contains(QT_CONFIG,gnu-libiconv) {
+ HEADERS += codecs/qiconvcodec_p.h
+ SOURCES += codecs/qiconvcodec.cpp
+
+ DEFINES += GNU_LIBICONV
+ !mac:LIBS *= -liconv
+ } else {
+ # no iconv, so we put all plugins in the library
+ HEADERS += \
+ ../plugins/codecs/cn/qgb18030codec.h \
+ ../plugins/codecs/jp/qeucjpcodec.h \
+ ../plugins/codecs/jp/qjiscodec.h \
+ ../plugins/codecs/jp/qsjiscodec.h \
+ ../plugins/codecs/kr/qeuckrcodec.h \
+ ../plugins/codecs/tw/qbig5codec.h \
+ ../plugins/codecs/jp/qfontjpcodec.h
+ SOURCES += \
+ ../plugins/codecs/cn/qgb18030codec.cpp \
+ ../plugins/codecs/jp/qjpunicode.cpp \
+ ../plugins/codecs/jp/qeucjpcodec.cpp \
+ ../plugins/codecs/jp/qjiscodec.cpp \
+ ../plugins/codecs/jp/qsjiscodec.cpp \
+ ../plugins/codecs/kr/qeuckrcodec.cpp \
+ ../plugins/codecs/tw/qbig5codec.cpp \
+ ../plugins/codecs/jp/qfontjpcodec.cpp
+ }
+}
diff --git a/src/corelib/codecs/qfontlaocodec.cpp b/src/corelib/codecs/qfontlaocodec.cpp
new file mode 100644
index 0000000000..496ac025fa
--- /dev/null
+++ b/src/corelib/codecs/qfontlaocodec.cpp
@@ -0,0 +1,124 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qfontlaocodec_p.h"
+#include "qlist.h"
+
+#ifndef QT_NO_CODECS
+#ifndef QT_NO_BIG_CODECS
+
+QT_BEGIN_NAMESPACE
+
+static unsigned char const unicode_to_mulelao[256] =
+ {
+ // U+0E80
+ 0x00, 0xa1, 0xa2, 0x00, 0xa4, 0x00, 0x00, 0xa7,
+ 0xa8, 0x00, 0xaa, 0x00, 0x00, 0xad, 0x00, 0x00,
+ // U+0E90
+ 0x00, 0x00, 0x00, 0x00, 0xb4, 0xb5, 0xb6, 0xb7,
+ 0x00, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ // U+0EA0
+ 0x00, 0xc1, 0xc2, 0xc3, 0x00, 0xc5, 0x00, 0xc7,
+ 0x00, 0x00, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf,
+ // U+0EB0
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+ 0xd8, 0xd9, 0x00, 0xdb, 0xdc, 0xdd, 0x00, 0x00,
+ // U+0EC0
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0x00, 0xe6, 0x00,
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0x00, 0x00,
+ // U+0ED0
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0x00, 0x00, 0xfc, 0xfd, 0x00, 0x00,
+ // U+0EE0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ // U+0EF0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ };
+
+
+QFontLaoCodec::~QFontLaoCodec()
+{
+}
+
+QByteArray QFontLaoCodec::name() const
+{
+ return "mulelao-1";
+}
+
+int QFontLaoCodec::mibEnum() const
+{
+ return -4242;
+}
+
+QString QFontLaoCodec::convertToUnicode(const char *, int, ConverterState *) const
+{
+ return QString();
+}
+
+QByteArray QFontLaoCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *) const
+{
+ QByteArray rstring;
+ rstring.resize(len);
+ uchar *rdata = (uchar *) rstring.data();
+ const QChar *sdata = uc;
+ int i = 0;
+ for (; i < len; ++i, ++sdata, ++rdata) {
+ if (sdata->unicode() < 0x80) {
+ *rdata = (uchar) sdata->unicode();
+ } else if (sdata->unicode() >= 0x0e80 && sdata->unicode() <= 0x0eff) {
+ uchar lao = unicode_to_mulelao[sdata->unicode() - 0x0e80];
+ if (lao)
+ *rdata = lao;
+ else
+ *rdata = 0;
+ } else {
+ *rdata = 0;
+ }
+ }
+ return rstring;
+}
+
+QT_END_NAMESPACE
+
+#endif // QT_NO_BIG_CODECS
+#endif // QT_NO_CODECS
diff --git a/src/corelib/codecs/qfontlaocodec_p.h b/src/corelib/codecs/qfontlaocodec_p.h
new file mode 100644
index 0000000000..a8e142da84
--- /dev/null
+++ b/src/corelib/codecs/qfontlaocodec_p.h
@@ -0,0 +1,78 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QFONTLAOCODEC_P_H
+#define QFONTLAOCODEC_P_H
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists for the convenience
+// of qfontencodings_x11.cpp and qfont_x11.cpp. This header file may
+// change from version to version without notice, or even be removed.
+//
+// We mean it.
+//
+
+#include "QtCore/qtextcodec.h"
+
+QT_BEGIN_NAMESPACE
+
+#ifndef QT_NO_CODECS
+
+class Q_CORE_EXPORT QFontLaoCodec : public QTextCodec
+{
+public:
+ ~QFontLaoCodec();
+
+ QByteArray name() const;
+ int mibEnum() const;
+
+ QString convertToUnicode(const char *, int, ConverterState *) const;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+};
+
+#endif // QT_NO_CODECS
+
+QT_END_NAMESPACE
+
+#endif // QFONTLAOCODEC_P_H
diff --git a/src/corelib/codecs/qiconvcodec.cpp b/src/corelib/codecs/qiconvcodec.cpp
new file mode 100644
index 0000000000..c8f28d9c76
--- /dev/null
+++ b/src/corelib/codecs/qiconvcodec.cpp
@@ -0,0 +1,536 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qiconvcodec_p.h"
+#include "qtextcodec_p.h"
+#include <qlibrary.h>
+#include <qdebug.h>
+#include <qthreadstorage.h>
+
+#include <errno.h>
+#include <locale.h>
+#include <stdio.h>
+#include <dlfcn.h>
+
+// unistd.h is needed for the _XOPEN_UNIX macro
+#include <unistd.h>
+#if defined(_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF)
+# include <langinfo.h>
+#endif
+
+#if defined(Q_OS_HPUX)
+# define NO_BOM
+# define UTF16 "ucs2"
+#elif defined(Q_OS_AIX)
+# define NO_BOM
+# define UTF16 "UCS-2"
+#elif defined(Q_OS_MAC)
+# define NO_BOM
+# if Q_BYTE_ORDER == Q_BIG_ENDIAN
+# define UTF16 "UTF-16BE"
+# else
+# define UTF16 "UTF-16LE"
+# endif
+#else
+# define UTF16 "UTF-16"
+#endif
+
+#if defined(Q_OS_MAC)
+#ifndef GNU_LIBICONV
+#define GNU_LIBICONV
+#endif
+typedef iconv_t (*Ptr_iconv_open) (const char*, const char*);
+typedef size_t (*Ptr_iconv) (iconv_t, const char **, size_t *, char **, size_t *);
+typedef int (*Ptr_iconv_close) (iconv_t);
+
+static Ptr_iconv_open ptr_iconv_open = 0;
+static Ptr_iconv ptr_iconv = 0;
+static Ptr_iconv_close ptr_iconv_close = 0;
+#endif
+
+QT_BEGIN_NAMESPACE
+
+extern bool qt_locale_initialized;
+
+QIconvCodec::QIconvCodec()
+ : utf16Codec(0)
+{
+ utf16Codec = QTextCodec::codecForMib(1015);
+ Q_ASSERT_X(utf16Codec != 0,
+ "QIconvCodec::convertToUnicode",
+ "internal error, UTF-16 codec not found");
+ if (!utf16Codec) {
+ fprintf(stderr, "QIconvCodec::convertToUnicode: internal error, UTF-16 codec not found\n");
+ utf16Codec = reinterpret_cast<QTextCodec *>(~0);
+ }
+#if defined(Q_OS_MAC)
+ if (ptr_iconv_open == 0) {
+ QLibrary libiconv(QLatin1String("/usr/lib/libiconv"));
+ libiconv.setLoadHints(QLibrary::ExportExternalSymbolsHint);
+
+ ptr_iconv_open = reinterpret_cast<Ptr_iconv_open>(libiconv.resolve("libiconv_open"));
+ if (!ptr_iconv_open)
+ ptr_iconv_open = reinterpret_cast<Ptr_iconv_open>(libiconv.resolve("iconv_open"));
+ ptr_iconv = reinterpret_cast<Ptr_iconv>(libiconv.resolve("libiconv"));
+ if (!ptr_iconv)
+ ptr_iconv = reinterpret_cast<Ptr_iconv>(libiconv.resolve("iconv"));
+ ptr_iconv_close = reinterpret_cast<Ptr_iconv_close>(libiconv.resolve("libiconv_close"));
+ if (!ptr_iconv_close)
+ ptr_iconv_close = reinterpret_cast<Ptr_iconv_close>(libiconv.resolve("iconv_close"));
+
+ Q_ASSERT_X(ptr_iconv_open && ptr_iconv && ptr_iconv_close,
+ "QIconvCodec::QIconvCodec()",
+ "internal error, could not resolve the iconv functions");
+
+# undef iconv_open
+# define iconv_open ptr_iconv_open
+# undef iconv
+# define iconv ptr_iconv
+# undef iconv_close
+# define iconv_close ptr_iconv_close
+ }
+#endif
+}
+
+QIconvCodec::~QIconvCodec()
+{
+}
+
+QIconvCodec::IconvState::IconvState(iconv_t x)
+ : buffer(array), bufferLen(sizeof array), cd(x)
+{
+}
+
+QIconvCodec::IconvState::~IconvState()
+{
+ if (cd != reinterpret_cast<iconv_t>(-1))
+ iconv_close(cd);
+ if (buffer != array)
+ delete[] buffer;
+}
+
+void QIconvCodec::IconvState::saveChars(const char *c, int count)
+{
+ if (count > bufferLen) {
+ if (buffer != array)
+ delete[] buffer;
+ buffer = new char[bufferLen = count];
+ }
+
+ memcpy(buffer, c, count);
+}
+
+static void qIconvCodecStateFree(QTextCodec::ConverterState *state)
+{
+ delete reinterpret_cast<QIconvCodec::IconvState *>(state->d);
+}
+
+Q_GLOBAL_STATIC(QThreadStorage<QIconvCodec::IconvState *>, toUnicodeState)
+
+QString QIconvCodec::convertToUnicode(const char* chars, int len, ConverterState *convState) const
+{
+ if (utf16Codec == reinterpret_cast<QTextCodec *>(~0))
+ return QString::fromAscii(chars, len);
+
+ int invalidCount = 0;
+ int remainingCount = 0;
+ char *remainingBuffer = 0;
+ IconvState **pstate;
+
+ if (convState) {
+ // stateful conversion
+ pstate = reinterpret_cast<IconvState **>(&convState->d);
+ if (convState->d) {
+ // restore state
+ remainingCount = convState->remainingChars;
+ remainingBuffer = (*pstate)->buffer;
+ } else {
+ // first time
+ convState->flags |= FreeFunction;
+ QTextCodecUnalignedPointer::encode(convState->state_data, qIconvCodecStateFree);
+ }
+ } else {
+ QThreadStorage<QIconvCodec::IconvState *> *ts = toUnicodeState();
+ if (!qt_locale_initialized || !ts) {
+ // we're running after the Q_GLOBAL_STATIC has been deleted
+ // or before the QCoreApplication initialization
+ // bad programmer, no cookie for you
+ return QString::fromLatin1(chars, len);
+ }
+
+ // stateless conversion -- use thread-local data
+ pstate = &toUnicodeState()->localData();
+ }
+
+ if (!*pstate) {
+ // first time, create the state
+ iconv_t cd = QIconvCodec::createIconv_t(UTF16, 0);
+ if (cd == reinterpret_cast<iconv_t>(-1)) {
+ static int reported = 0;
+ if (!reported++) {
+ fprintf(stderr,
+ "QIconvCodec::convertToUnicode: using ASCII for conversion, iconv_open failed\n");
+ }
+ return QString::fromAscii(chars, len);
+ }
+
+ *pstate = new IconvState(cd);
+ }
+
+ IconvState *state = *pstate;
+ size_t inBytesLeft = len;
+ // best case assumption, each byte is converted into one UTF-16 character, plus 2 bytes for the BOM
+#ifdef GNU_LIBICONV
+ // GNU doesn't disagree with POSIX :/
+ const char *inBytes = chars;
+#else
+ char *inBytes = const_cast<char *>(chars);
+#endif
+
+ QByteArray in;
+ if (remainingCount) {
+ // we have to prepend the remaining bytes from the previous conversion
+ inBytesLeft += remainingCount;
+ in.resize(inBytesLeft);
+ inBytes = in.data();
+
+ memcpy(in.data(), remainingBuffer, remainingCount);
+ memcpy(in.data() + remainingCount, chars, len);
+
+ remainingCount = 0;
+ }
+
+ QByteArray ba;
+ size_t outBytesLeft = len * 2 + 2;
+ ba.resize(outBytesLeft);
+ char *outBytes = ba.data();
+ do {
+ size_t ret = iconv(state->cd, &inBytes, &inBytesLeft, &outBytes, &outBytesLeft);
+ if (ret == (size_t) -1) {
+ if (errno == E2BIG) {
+ int offset = ba.size() - outBytesLeft;
+ ba.resize(ba.size() * 2);
+ outBytes = ba.data() + offset;
+ outBytesLeft = ba.size() - offset;
+
+ continue;
+ }
+
+ if (errno == EILSEQ) {
+ // conversion stopped because of an invalid character in the sequence
+ ++invalidCount;
+ } else if (errno == EINVAL && convState) {
+ // conversion stopped because the remaining inBytesLeft make up
+ // an incomplete multi-byte sequence; save them for later
+ state->saveChars(inBytes, inBytesLeft);
+ remainingCount = inBytesLeft;
+ break;
+ }
+
+ if (errno == EILSEQ || errno == EINVAL) {
+ // skip the next character
+ ++inBytes;
+ --inBytesLeft;
+ continue;
+ }
+
+ // some other error
+ // note, cannot use qWarning() since we are implementing the codecForLocale :)
+ perror("QIconvCodec::convertToUnicode: using ASCII for conversion, iconv failed");
+
+ if (!convState) {
+ // reset state
+ iconv(state->cd, 0, &inBytesLeft, 0, &outBytesLeft);
+ }
+
+ return QString::fromAscii(chars, len);
+ }
+ } while (inBytesLeft != 0);
+
+ QString s = utf16Codec->toUnicode(ba.constData(), ba.size() - outBytesLeft);
+
+ if (convState) {
+ convState->invalidChars = invalidCount;
+ convState->remainingChars = remainingCount;
+ } else {
+ // reset state
+ iconv(state->cd, 0, &inBytesLeft, 0, &outBytesLeft);
+ }
+
+ return s;
+}
+
+Q_GLOBAL_STATIC(QThreadStorage<QIconvCodec::IconvState *>, fromUnicodeState)
+
+QByteArray QIconvCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *convState) const
+{
+ char *inBytes;
+ char *outBytes;
+ size_t inBytesLeft;
+
+#if defined(GNU_LIBICONV)
+ const char **inBytesPtr = const_cast<const char **>(&inBytes);
+#else
+ char **inBytesPtr = &inBytes;
+#endif
+
+ QThreadStorage<QIconvCodec::IconvState *> *ts = fromUnicodeState();
+ if (!qt_locale_initialized || !ts) {
+ // we're running after the Q_GLOBAL_STATIC has been deleted
+ // or before the QCoreApplication initialization
+ // bad programmer, no cookie for you
+ if (!len)
+ // this is a special case - zero-sized string should be
+ // translated to empty but not-null QByteArray.
+ return QByteArray("");
+ return QString::fromRawData(uc, len).toLatin1();
+ }
+ IconvState *&state = ts->localData();
+ if (!state) {
+ state = new IconvState(QIconvCodec::createIconv_t(0, UTF16));
+ if (state->cd != reinterpret_cast<iconv_t>(-1)) {
+ size_t outBytesLeft = len + 3; // +3 for the BOM
+ QByteArray ba;
+ ba.resize(outBytesLeft);
+ outBytes = ba.data();
+
+#if !defined(NO_BOM)
+ // give iconv() a BOM
+ QChar bom[] = { QChar(QChar::ByteOrderMark) };
+ inBytes = reinterpret_cast<char *>(bom);
+ inBytesLeft = sizeof(bom);
+ if (iconv(state->cd, inBytesPtr, &inBytesLeft, &outBytes, &outBytesLeft) == (size_t) -1) {
+ perror("QIconvCodec::convertFromUnicode: using ASCII for conversion, iconv failed for BOM");
+
+ iconv_close(state->cd);
+ state->cd = reinterpret_cast<iconv_t>(-1);
+
+ return QString(uc, len).toAscii();
+ }
+#endif // NO_BOM
+ }
+ }
+ if (state->cd == reinterpret_cast<iconv_t>(-1)) {
+ static int reported = 0;
+ if (!reported++) {
+ fprintf(stderr,
+ "QIconvCodec::convertFromUnicode: using ASCII for conversion, iconv_open failed\n");
+ }
+ return QString(uc, len).toAscii();
+ }
+
+ size_t outBytesLeft = len;
+ QByteArray ba;
+ ba.resize(outBytesLeft);
+ outBytes = ba.data();
+
+ // now feed iconv() the real data
+ inBytes = const_cast<char *>(reinterpret_cast<const char *>(uc));
+ inBytesLeft = len * sizeof(QChar);
+
+ QByteArray in;
+ if (convState && convState->remainingChars) {
+ // we have one surrogate char to be prepended
+ in.resize(sizeof(QChar) + len);
+ inBytes = in.data();
+
+ QChar remaining = convState->state_data[0];
+ memcpy(in.data(), &remaining, sizeof(QChar));
+ memcpy(in.data() + sizeof(QChar), uc, inBytesLeft);
+
+ inBytesLeft += sizeof(QChar);
+ convState->remainingChars = 0;
+ }
+
+ int invalidCount = 0;
+ do {
+ if (iconv(state->cd, inBytesPtr, &inBytesLeft, &outBytes, &outBytesLeft) == (size_t) -1) {
+ if (errno == EINVAL && convState) {
+ // buffer ends in a surrogate
+ Q_ASSERT(inBytesLeft == 2);
+ convState->remainingChars = 1;
+ convState->state_data[0] = uc[len - 1].unicode();
+ break;
+ }
+
+ switch (errno) {
+ case EILSEQ:
+ ++invalidCount;
+ // fall through
+ case EINVAL:
+ {
+ inBytes += sizeof(QChar);
+ inBytesLeft -= sizeof(QChar);
+ break;
+ }
+ case E2BIG:
+ {
+ int offset = ba.size() - outBytesLeft;
+ ba.resize(ba.size() * 2);
+ outBytes = ba.data() + offset;
+ outBytesLeft = ba.size() - offset;
+ break;
+ }
+ default:
+ {
+ // note, cannot use qWarning() since we are implementing the codecForLocale :)
+ perror("QIconvCodec::convertFromUnicode: using ASCII for conversion, iconv failed");
+
+ // reset to initial state
+ iconv(state->cd, 0, &inBytesLeft, 0, &outBytesLeft);
+
+ return QString(uc, len).toAscii();
+ }
+ }
+ }
+ } while (inBytesLeft != 0);
+
+ // reset to initial state
+ iconv(state->cd, 0, &inBytesLeft, 0, &outBytesLeft);
+
+ ba.resize(ba.size() - outBytesLeft);
+
+ if (convState)
+ convState->invalidChars = invalidCount;
+
+ return ba;
+}
+
+QByteArray QIconvCodec::name() const
+{
+ return "System";
+}
+
+int QIconvCodec::mibEnum() const
+{
+ return 0;
+}
+
+iconv_t QIconvCodec::createIconv_t(const char *to, const char *from)
+{
+ Q_ASSERT((to == 0 && from != 0) || (to != 0 && from == 0));
+
+ iconv_t cd = (iconv_t) -1;
+#if defined(__GLIBC__) || defined(GNU_LIBICONV)
+ // both GLIBC and libgnuiconv will use the locale's encoding if from or to is an empty string
+ static const char empty_codeset[] = "";
+ const char *codeset = empty_codeset;
+ cd = iconv_open(to ? to : codeset, from ? from : codeset);
+#else
+ char *codeset = 0;
+#endif
+
+#if defined(_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF)
+ if (cd == (iconv_t) -1) {
+ codeset = nl_langinfo(CODESET);
+ if (codeset)
+ cd = iconv_open(to ? to : codeset, from ? from : codeset);
+ }
+#endif
+
+ if (cd == (iconv_t) -1) {
+ // Very poorly defined and followed standards causes lots of
+ // code to try to get all the cases... This logic is
+ // duplicated in QTextCodec, so if you change it here, change
+ // it there too.
+
+ // Try to determine locale codeset from locale name assigned to
+ // LC_CTYPE category.
+
+ // First part is getting that locale name. First try setlocale() which
+ // definitely knows it, but since we cannot fully trust it, get ready
+ // to fall back to environment variables.
+ char * ctype = qstrdup(setlocale(LC_CTYPE, 0));
+
+ // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
+ // environment variables.
+ char * lang = qstrdup(qgetenv("LC_ALL").constData());
+ if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) {
+ if (lang) delete [] lang;
+ lang = qstrdup(qgetenv("LC_CTYPE").constData());
+ }
+ if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) {
+ if (lang) delete [] lang;
+ lang = qstrdup(qgetenv("LANG").constData());
+ }
+
+ // Now try these in order:
+ // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
+ // 2. CODESET from lang if it contains a .CODESET part
+ // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
+ // 4. locale (ditto)
+ // 5. check for "@euro"
+
+ // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
+ codeset = ctype ? strchr(ctype, '.') : 0;
+ if (codeset && *codeset == '.') {
+ ++codeset;
+ cd = iconv_open(to ? to : codeset, from ? from : codeset);
+ }
+
+ // 2. CODESET from lang if it contains a .CODESET part
+ codeset = lang ? strchr(lang, '.') : 0;
+ if (cd == (iconv_t) -1 && codeset && *codeset == '.') {
+ ++codeset;
+ cd = iconv_open(to ? to : codeset, from ? from : codeset);
+ }
+
+ // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
+ if (cd == (iconv_t) -1 && ctype && *ctype != 0 && strcmp (ctype, "C") != 0)
+ cd = iconv_open(to ? to : ctype, from ? from : ctype);
+
+
+ // 4. locale (ditto)
+ if (cd == (iconv_t) -1 && lang && *lang != 0)
+ cd = iconv_open(to ? to : lang, from ? from : lang);
+
+ // 5. "@euro"
+ if ((cd == (iconv_t) -1 && ctype && strstr(ctype, "@euro")) || (lang && strstr(lang, "@euro")))
+ cd = iconv_open(to ? to : "ISO8859-15", from ? from : "ISO8859-15");
+
+ delete [] ctype;
+ delete [] lang;
+ }
+
+ return cd;
+}
+
+QT_END_NAMESPACE
diff --git a/src/corelib/codecs/qiconvcodec_p.h b/src/corelib/codecs/qiconvcodec_p.h
new file mode 100644
index 0000000000..839bee7380
--- /dev/null
+++ b/src/corelib/codecs/qiconvcodec_p.h
@@ -0,0 +1,104 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QICONVCODEC_P_H
+#define QICONVCODEC_P_H
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists for the convenience
+// of the QLibrary class. This header file may change from
+// version to version without notice, or even be removed.
+//
+// We mean it.
+//
+
+#include "qtextcodec.h"
+
+#if defined(Q_OS_UNIX) && !defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
+
+#ifdef Q_OS_MAC
+typedef void * iconv_t;
+#else
+#include <iconv.h>
+#endif
+
+QT_BEGIN_NAMESPACE
+
+class QIconvCodec: public QTextCodec
+{
+private:
+ mutable QTextCodec *utf16Codec;
+
+public:
+ QIconvCodec();
+ ~QIconvCodec();
+
+ QString convertToUnicode(const char *, int, ConverterState *) const;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+
+ QByteArray name() const;
+ int mibEnum() const;
+
+ static iconv_t createIconv_t(const char *to, const char *from);
+
+ class IconvState
+ {
+ public:
+ IconvState(iconv_t x);
+ ~IconvState();
+ char *buffer;
+ int bufferLen;
+ iconv_t cd;
+
+ char array[8];
+
+ void saveChars(const char *c, int count);
+ };
+};
+
+QT_END_NAMESPACE
+
+#endif // Q_OS_UNIX && !QT_NO_ICONV && !QT_BOOTSTRAPPED
+
+#endif // QICONVCODEC_P_H
diff --git a/src/corelib/codecs/qisciicodec.cpp b/src/corelib/codecs/qisciicodec.cpp
new file mode 100644
index 0000000000..dd2bc8d38a
--- /dev/null
+++ b/src/corelib/codecs/qisciicodec.cpp
@@ -0,0 +1,288 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qisciicodec_p.h"
+#include "qlist.h"
+
+#ifndef QT_NO_CODECS
+
+QT_BEGIN_NAMESPACE
+
+/*!
+ \class QIsciiCodec
+ \brief The QIsciiCodec class provides conversion to and from the ISCII encoding.
+
+ \internal
+*/
+
+
+struct Codecs {
+ const char *name;
+ ushort base;
+};
+
+static const Codecs codecs [] = {
+ { "Iscii-Dev", 0x900 },
+ { "Iscii-Bng", 0x980 },
+ { "Iscii-Pnj", 0xa00 },
+ { "Iscii-Gjr", 0xa80 },
+ { "Iscii-Ori", 0xb00 },
+ { "Iscii-Tml", 0xb80 },
+ { "Iscii-Tlg", 0xc00 },
+ { "Iscii-Knd", 0xc80 },
+ { "Iscii-Mlm", 0xd00 }
+};
+
+QIsciiCodec::~QIsciiCodec()
+{
+}
+
+QByteArray QIsciiCodec::name() const
+{
+ return codecs[idx].name;
+}
+
+int QIsciiCodec::mibEnum() const
+{
+ /* There is no MIBEnum for Iscii */
+ return -3000-idx;
+}
+
+static const uchar inv = 0xFF;
+
+/* iscii range from 0xa0 - 0xff */
+static const uchar iscii_to_uni_table[0x60] = {
+ 0x00, 0x01, 0x02, 0x03,
+ 0x05, 0x06, 0x07, 0x08,
+ 0x09, 0x0a, 0x0b, 0x0e,
+ 0x0f, 0x20, 0x0d, 0x12,
+
+ 0x13, 0x14, 0x11, 0x15,
+ 0x16, 0x17, 0x18, 0x19,
+ 0x1a, 0x1b, 0x1c, 0x1d,
+ 0x1e, 0x1f, 0x20, 0x21,
+
+ 0x22, 0x23, 0x24, 0x25,
+ 0x26, 0x27, 0x28, 0x29,
+ 0x2a, 0x2b, 0x2c, 0x2d,
+ 0x2e, 0x2f, 0x5f, 0x30,
+
+ 0x31, 0x32, 0x33, 0x34,
+ 0x35, 0x36, 0x37, 0x38,
+ 0x39, inv, 0x3e, 0x3f,
+ 0x40, 0x41, 0x42, 0x43,
+
+ 0x46, 0x47, 0x48, 0x45,
+ 0x4a, 0x4b, 0x4c, 0x49,
+ 0x4d, 0x3c, 0x64, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+
+ 0x00, 0x66, 0x67, 0x68,
+ 0x69, 0x6a, 0x6b, 0x6c,
+ 0x6d, 0x6e, 0x6f, 0x00,
+ 0x00, 0x00, 0x00, 0x00
+};
+
+static const uchar uni_to_iscii_table[0x80] = {
+ 0x00, 0xa1, 0xa2, 0xa3,
+ 0x00, 0xa4, 0xa5, 0xa6,
+ 0xa7, 0xa8, 0xa9, 0xaa,
+ 0x00, 0xae, 0xab, 0xac,
+
+ 0xad, 0xb2, 0xaf, 0xb0,
+ 0xb1, 0xb3, 0xb4, 0xb5,
+ 0xb6, 0xb7, 0xb8, 0xb9,
+ 0xba, 0xbb, 0xbc, 0xbd,
+
+ 0xbe, 0xbf, 0xc0, 0xc1,
+ 0xc2, 0xc3, 0xc4, 0xc5,
+ 0xc6, 0xc7, 0xc8, 0xc9,
+ 0xca, 0xcb, 0xcc, 0xcd,
+
+ 0xcf, 0xd0, 0xd1, 0xd2,
+ 0xd3, 0xd4, 0xd5, 0xd6,
+ 0xd7, 0xd8, 0x00, 0x00,
+ 0xe9, 0x00, 0xda, 0xdb,
+
+ 0xdc, 0xdd, 0xde, 0xdf,
+ 0x00, 0xe3, 0xe0, 0xe1,
+ 0xe2, 0xe7, 0xe4, 0xe5,
+ 0xe6, 0xe8, 0x00, 0x00,
+
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x02, 0x03, 0x04, // decomposable into the uc codes listed here + nukta
+ 0x05, 0x06, 0x07, 0xce,
+
+ 0x00, 0x00, 0x00, 0x00,
+ 0xea, 0x08, 0xf1, 0xf2,
+ 0xf3, 0xf4, 0xf5, 0xf6,
+ 0xf7, 0xf8, 0xf9, 0xfa,
+
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00
+};
+
+static const uchar uni_to_iscii_pairs[] = {
+ 0x00, 0x00,
+ 0x15, 0x3c, // 0x958
+ 0x16, 0x3c, // 0x959
+ 0x17, 0x3c, // 0x95a
+ 0x1c, 0x3c, // 0x95b
+ 0x21, 0x3c, // 0x95c
+ 0x22, 0x3c, // 0x95d
+ 0x2b, 0x3c, // 0x95e
+ 0x64, 0x64 // 0x965
+};
+
+
+QByteArray QIsciiCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
+{
+ char replacement = '?';
+ bool halant = false;
+ if (state) {
+ if (state->flags & ConvertInvalidToNull)
+ replacement = 0;
+ halant = state->state_data[0];
+ }
+ int invalid = 0;
+
+ QByteArray result;
+ result.resize(2*len); //worst case
+
+ uchar *ch = reinterpret_cast<uchar *>(result.data());
+
+ const int base = codecs[idx].base;
+
+ for (int i =0; i < len; ++i) {
+ const ushort codePoint = uc[i].unicode();
+
+ /* The low 7 bits of ISCII is plain ASCII. However, we go all the
+ * way up to 0xA0 such that we can roundtrip with convertToUnicode()'s
+ * behavior. */
+ if(codePoint < 0xA0) {
+ *ch++ = static_cast<uchar>(codePoint);
+ continue;
+ }
+
+ const int pos = codePoint - base;
+ if (pos > 0 && pos < 0x80) {
+ uchar iscii = uni_to_iscii_table[pos];
+ if (iscii > 0x80) {
+ *ch++ = iscii;
+ } else if (iscii) {
+ const uchar *pair = uni_to_iscii_pairs + 2*iscii;
+ *ch++ = *pair++;
+ *ch++ = *pair++;
+ } else {
+ *ch++ = replacement;
+ ++invalid;
+ }
+ } else {
+ if (uc[i].unicode() == 0x200c) { // ZWNJ
+ if (halant)
+ // Consonant Halant ZWNJ -> Consonant Halant Halant
+ *ch++ = 0xe8;
+ } else if (uc[i].unicode() == 0x200d) { // ZWJ
+ if (halant)
+ // Consonant Halant ZWJ -> Consonant Halant Nukta
+ *ch++ = 0xe9;
+ } else {
+ *ch++ = replacement;
+ ++invalid;
+ }
+ }
+ halant = (pos == 0x4d);
+ }
+ result.truncate(ch - (uchar *)result.data());
+
+ if (state) {
+ state->invalidChars += invalid;
+ state->state_data[0] = halant;
+ }
+ return result;
+}
+
+QString QIsciiCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const
+{
+ bool halant = false;
+ if (state) {
+ halant = state->state_data[0];
+ }
+
+ QString result;
+ result.resize(len);
+ QChar *uc = result.data();
+
+ const int base = codecs[idx].base;
+
+ for (int i = 0; i < len; ++i) {
+ ushort ch = (uchar) chars[i];
+ if (ch < 0xa0)
+ *uc++ = ch;
+ else {
+ ushort c = iscii_to_uni_table[ch - 0xa0];
+ if (halant && (c == inv || c == 0xe9)) {
+ // Consonant Halant inv -> Consonant Halant ZWJ
+ // Consonant Halant Nukta -> Consonant Halant ZWJ
+ *uc++ = QChar(0x200d);
+ } else if (halant && c == 0xe8) {
+ // Consonant Halant Halant -> Consonant Halant ZWNJ
+ *uc++ = QChar(0x200c);
+ } else {
+ *uc++ = QChar(c+base);
+ }
+ }
+ halant = ((uchar)chars[i] == 0xe8);
+ }
+ result.resize(uc - result.unicode());
+
+ if (state) {
+ state->state_data[0] = halant;
+ }
+ return result;
+}
+
+QT_END_NAMESPACE
+
+#endif // QT_NO_CODECS
diff --git a/src/corelib/codecs/qisciicodec_p.h b/src/corelib/codecs/qisciicodec_p.h
new file mode 100644
index 0000000000..0477ca6707
--- /dev/null
+++ b/src/corelib/codecs/qisciicodec_p.h
@@ -0,0 +1,81 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QISCIICODEC_P_H
+#define QISCIICODEC_P_H
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+#include "QtCore/qtextcodec.h"
+
+QT_BEGIN_NAMESPACE
+
+#ifndef QT_NO_CODECS
+
+class QIsciiCodec : public QTextCodec {
+public:
+ explicit QIsciiCodec(int i) : idx(i) {}
+ ~QIsciiCodec();
+
+ QByteArray name() const;
+ int mibEnum() const;
+
+ QString convertToUnicode(const char *, int, ConverterState *) const;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+
+private:
+ int idx;
+};
+
+#endif // QT_NO_CODECS
+
+QT_END_NAMESPACE
+
+#endif // QISCIICODEC_P_H
diff --git a/src/corelib/codecs/qlatincodec.cpp b/src/corelib/codecs/qlatincodec.cpp
new file mode 100644
index 0000000000..aae436e7e0
--- /dev/null
+++ b/src/corelib/codecs/qlatincodec.cpp
@@ -0,0 +1,246 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qlatincodec_p.h"
+#include "qlist.h"
+
+#ifndef QT_NO_TEXTCODEC
+
+QT_BEGIN_NAMESPACE
+
+QLatin1Codec::~QLatin1Codec()
+{
+}
+
+QString QLatin1Codec::convertToUnicode(const char *chars, int len, ConverterState *) const
+{
+ if (chars == 0)
+ return QString();
+
+ return QString::fromLatin1(chars, len);
+}
+
+
+QByteArray QLatin1Codec::convertFromUnicode(const QChar *ch, int len, ConverterState *state) const
+{
+ const char replacement = (state && state->flags & ConvertInvalidToNull) ? 0 : '?';
+ QByteArray r;
+ r.resize(len);
+ char *d = r.data();
+ int invalid = 0;
+ for (int i = 0; i < len; ++i) {
+ if (ch[i] > 0xff) {
+ d[i] = replacement;
+ ++invalid;
+ } else {
+ d[i] = (char)ch[i].cell();
+ }
+ }
+ if (state) {
+ state->invalidChars += invalid;
+ }
+ return r;
+}
+
+QByteArray QLatin1Codec::name() const
+{
+ return "ISO-8859-1";
+}
+
+QList<QByteArray> QLatin1Codec::aliases() const
+{
+ QList<QByteArray> list;
+ list << "latin1"
+ << "CP819"
+ << "IBM819"
+ << "iso-ir-100"
+ << "csISOLatin1";
+ return list;
+}
+
+
+int QLatin1Codec::mibEnum() const
+{
+ return 4;
+}
+
+
+QLatin15Codec::~QLatin15Codec()
+{
+}
+
+QString QLatin15Codec::convertToUnicode(const char* chars, int len, ConverterState *) const
+{
+ if (chars == 0)
+ return QString();
+
+ QString str = QString::fromLatin1(chars, len);
+ QChar *uc = str.data();
+ while(len--) {
+ switch(uc->unicode()) {
+ case 0xa4:
+ *uc = 0x20ac;
+ break;
+ case 0xa6:
+ *uc = 0x0160;
+ break;
+ case 0xa8:
+ *uc = 0x0161;
+ break;
+ case 0xb4:
+ *uc = 0x017d;
+ break;
+ case 0xb8:
+ *uc = 0x017e;
+ break;
+ case 0xbc:
+ *uc = 0x0152;
+ break;
+ case 0xbd:
+ *uc = 0x0153;
+ break;
+ case 0xbe:
+ *uc = 0x0178;
+ break;
+ default:
+ break;
+ }
+ uc++;
+ }
+ return str;
+}
+
+QByteArray QLatin15Codec::convertFromUnicode(const QChar *in, int length, ConverterState *state) const
+{
+ const char replacement = (state && state->flags & ConvertInvalidToNull) ? 0 : '?';
+ QByteArray r;
+ r.resize(length);
+ char *d = r.data();
+ int invalid = 0;
+ for (int i = 0; i < length; ++i) {
+ uchar c;
+ ushort uc = in[i].unicode();
+ if (uc < 0x0100) {
+ if (uc > 0xa3) {
+ switch(uc) {
+ case 0xa4:
+ case 0xa6:
+ case 0xa8:
+ case 0xb4:
+ case 0xb8:
+ case 0xbc:
+ case 0xbd:
+ case 0xbe:
+ c = replacement;
+ ++invalid;
+ break;
+ default:
+ c = (unsigned char) uc;
+ break;
+ }
+ } else {
+ c = (unsigned char) uc;
+ }
+ } else {
+ if (uc == 0x20ac)
+ c = 0xa4;
+ else if ((uc & 0xff00) == 0x0100) {
+ switch(uc) {
+ case 0x0160:
+ c = 0xa6;
+ break;
+ case 0x0161:
+ c = 0xa8;
+ break;
+ case 0x017d:
+ c = 0xb4;
+ break;
+ case 0x017e:
+ c = 0xb8;
+ break;
+ case 0x0152:
+ c = 0xbc;
+ break;
+ case 0x0153:
+ c = 0xbd;
+ break;
+ case 0x0178:
+ c = 0xbe;
+ break;
+ default:
+ c = replacement;
+ ++invalid;
+ }
+ } else {
+ c = replacement;
+ ++invalid;
+ }
+ }
+ d[i] = (char)c;
+ }
+ if (state) {
+ state->remainingChars = 0;
+ state->invalidChars += invalid;
+ }
+ return r;
+}
+
+
+QByteArray QLatin15Codec::name() const
+{
+ return "ISO-8859-15";
+}
+
+QList<QByteArray> QLatin15Codec::aliases() const
+{
+ QList<QByteArray> list;
+ list << "latin9";
+ return list;
+}
+
+int QLatin15Codec::mibEnum() const
+{
+ return 111;
+}
+
+QT_END_NAMESPACE
+
+#endif // QT_NO_TEXTCODEC
diff --git a/src/corelib/codecs/qlatincodec_p.h b/src/corelib/codecs/qlatincodec_p.h
new file mode 100644
index 0000000000..676647a8d5
--- /dev/null
+++ b/src/corelib/codecs/qlatincodec_p.h
@@ -0,0 +1,94 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QLATINCODEC_P_H
+#define QLATINCODEC_P_H
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+#include "QtCore/qtextcodec.h"
+
+QT_BEGIN_NAMESPACE
+
+#ifndef QT_NO_TEXTCODEC
+
+class QLatin1Codec : public QTextCodec
+{
+public:
+ ~QLatin1Codec();
+
+ QString convertToUnicode(const char *, int, ConverterState *) const;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+
+ QByteArray name() const;
+ QList<QByteArray> aliases() const;
+ int mibEnum() const;
+};
+
+
+
+class QLatin15Codec: public QTextCodec
+{
+public:
+ ~QLatin15Codec();
+
+ QString convertToUnicode(const char *, int, ConverterState *) const;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+
+ QByteArray name() const;
+ QList<QByteArray> aliases() const;
+ int mibEnum() const;
+};
+
+#endif // QT_NO_TEXTCODEC
+
+QT_END_NAMESPACE
+
+#endif // QLATINCODEC_P_H
diff --git a/src/corelib/codecs/qsimplecodec.cpp b/src/corelib/codecs/qsimplecodec.cpp
new file mode 100644
index 0000000000..0184a5a24a
--- /dev/null
+++ b/src/corelib/codecs/qsimplecodec.cpp
@@ -0,0 +1,733 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qsimplecodec_p.h"
+#include "qlist.h"
+
+#ifndef QT_NO_TEXTCODEC
+
+QT_BEGIN_NAMESPACE
+
+#define LAST_MIB 2004
+
+static const struct {
+ const char *mime;
+ const char *aliases[7];
+ int mib;
+ quint16 values[128];
+} unicodevalues[QSimpleTextCodec::numSimpleCodecs] = {
+ // from RFC 1489, ftp://ftp.isi.edu/in-notes/rfc1489.txt
+ { "KOI8-R", { "csKOI8R", 0 }, 2084,
+ { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
+ 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
+ 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219/**/, 0x221A, 0x2248,
+ 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
+ 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
+ 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E,
+ 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
+ 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9,
+ 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
+ 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
+ 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
+ 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
+ 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
+ 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
+ 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
+ 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
+ // /**/ - The BULLET OPERATOR is confused. Some people think
+ // it should be 0x2022 (BULLET).
+
+ // from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt
+ { "KOI8-U", { "KOI8-RU", 0 }, 2088,
+ { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
+ 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
+ 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
+ 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
+ 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
+ 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,
+ 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
+ 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,
+ 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
+ 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
+ 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
+ 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
+ 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
+ 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
+ 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
+ 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
+
+ // next bits generated from tables on the Unicode 2.0 CD. we can
+ // use these tables since this is part of the transition to using
+ // unicode everywhere in qt.
+
+ // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do (awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
+
+ // then I inserted the files manually.
+ { "ISO-8859-2", {"latin2", "iso-ir-101", "csISOLatin2", 0 }, 5,
+ { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+ 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
+ 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
+ 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
+ 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
+ 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
+ 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
+ 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
+ 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
+ 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
+ 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
+ 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
+ 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
+ { "ISO-8859-3", { "latin3", "iso-ir-109", "csISOLatin3", 0 }, 6,
+ { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+ 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7,
+ 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B,
+ 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7,
+ 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C,
+ 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7,
+ 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
+ 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7,
+ 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,
+ 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7,
+ 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
+ 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7,
+ 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9} },
+ { "ISO-8859-4", { "latin4", "iso-ir-110", "csISOLatin4", 0 }, 7,
+ { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+ 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7,
+ 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,
+ 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7,
+ 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,
+ 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
+ 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,
+ 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
+ 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,
+ 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
+ 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,
+ 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
+ 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9} },
+ { "ISO-8859-5", { "cyrillic", "iso-ir-144", "csISOLatinCyrillic", 0 }, 8,
+ { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+ 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
+ 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,
+ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
+ 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
+ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
+ 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
+ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
+ 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
+ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
+ 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
+ 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
+ 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F} },
+ { "ISO-8859-6", { "ISO-8859-6-I", "ECMA-114", "ASMO-708", "arabic", "iso-ir-127", "csISOLatinArabic", 0 }, 82,
+ { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+ 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F,
+ 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
+ 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
+ 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
+ 0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
+ 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,
+ 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
+ { "ISO-8859-7", { "ECMA-118", "greek", "iso-ir-126", "csISOLatinGreek", 0 }, 10,
+ { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+ 0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7,
+ 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015,
+ 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7,
+ 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
+ 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
+ 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
+ 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
+ 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
+ 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
+ 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
+ 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
+ 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
+ { "ISO-8859-8", { "ISO 8859-8-I", "iso-ir-138", "hebrew", "csISOLatinHebrew", 0 }, 85,
+ { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+ 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
+ 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E,
+ 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
+ 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD,
+ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017,
+ 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
+ 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
+ 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
+ 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
+ { "ISO-8859-9", { "iso-ir-148", "latin5", "csISOLatin5", 0 }, 12,
+ { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+ 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
+ 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
+ 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
+ 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
+ 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
+ 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
+ 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
+ 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
+ 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
+ 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
+ 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
+ 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
+ { "ISO-8859-10", { "iso-ir-157", "latin6", "ISO-8859-10:1992", "csISOLatin6", 0 }, 13,
+ { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+ 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7,
+ 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,
+ 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7,
+ 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,
+ 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
+ 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,
+ 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168,
+ 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
+ 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
+ 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
+ 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169,
+ 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} },
+ { "ISO-8859-13", { 0 }, 109,
+ { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+ 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7,
+ 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
+ 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7,
+ 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
+ 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
+ 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
+ 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
+ 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
+ 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
+ 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
+ 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
+ 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} },
+ { "ISO-8859-14", { "iso-ir-199", "latin8", "iso-celtic", 0 }, 110,
+ { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+ 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7,
+ 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,
+ 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56,
+ 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,
+ 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
+ 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
+ 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A,
+ 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,
+ 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
+ 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
+ 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,
+ 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} },
+ { "ISO-8859-16", { "iso-ir-226", "latin10", 0 }, 112,
+ { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+ 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+ 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+ 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7,
+ 0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B,
+ 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7,
+ 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C,
+ 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7,
+ 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
+ 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A,
+ 0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF,
+ 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7,
+ 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
+ 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B,
+ 0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF} },
+
+ // next bits generated again from tables on the Unicode 3.0 CD.
+
+ // $ for a in CP* ; do (awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a) | sort | sed -e 's/#UNDEF.*$/0xFFFD/' | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
+
+ { "IBM850", { "CP850", "csPC850Multilingual", 0 }, 2009,
+ { 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
+ 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
+ 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
+ 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
+ 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
+ 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
+ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
+ 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
+ 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
+ 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
+ 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
+ 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
+ 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
+ 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
+ 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
+ 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0} },
+ { "IBM874", { "CP874", 0 }, -874, //### what is the mib?
+ { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
+ 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
+ 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
+ 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
+ 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
+ 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
+ 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
+ 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
+ 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
+ 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
+ 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
+ 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
+ { "IBM866", { "CP866", "csIBM866", 0 }, 2086,
+ { 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
+ 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
+ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
+ 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
+ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
+ 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
+ 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
+ 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
+ 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F,
+ 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
+ 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
+ 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
+ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
+ 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
+ 0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E,
+ 0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0} },
+
+ { "windows-1250", { "CP1250", 0 }, 2250,
+ { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179,
+ 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0xFFFD, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A,
+ 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7,
+ 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B,
+ 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
+ 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C,
+ 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
+ 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
+ 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
+ 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
+ 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
+ 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
+ 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
+ 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
+ { "windows-1251", { "CP1251", 0 }, 2251,
+ { 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F,
+ 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0xFFFD, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F,
+ 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7,
+ 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407,
+ 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7,
+ 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457,
+ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
+ 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
+ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
+ 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
+ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
+ 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
+ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
+ 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F} },
+ { "windows-1252", { "CP1252", 0 }, 2252,
+ { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
+ 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178,
+ 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
+ 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
+ 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
+ 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
+ 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
+ 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
+ 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
+ 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
+ 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
+ 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
+ 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
+ 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
+ { "windows-1253", {"CP1253", 0 }, 2253,
+ { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
+ 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015,
+ 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7,
+ 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
+ 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
+ 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
+ 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
+ 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
+ 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
+ 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
+ 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
+ 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
+ { "windows-1254", { "CP1254", 0 }, 2254,
+ { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
+ 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
+ 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
+ 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
+ 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
+ 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
+ 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
+ 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
+ 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
+ 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
+ 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
+ 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
+ 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
+ { "windows-1255", { "CP1255", 0 }, 2255,
+ { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0x02C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0x02DC, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7,
+ 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
+ 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
+ 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
+ 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7,
+ 0x05B8, 0x05B9, 0xFFFD, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
+ 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3,
+ 0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
+ 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
+ 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
+ 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD} },
+ { "windows-1256", { "CP1256", 0 }, 2256,
+ { 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688,
+ 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA,
+ 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
+ 0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
+ 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
+ 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F,
+ 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
+ 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
+ 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7,
+ 0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643,
+ 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7,
+ 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF,
+ 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7,
+ 0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2} },
+ { "windows-1257", { "CP1257", 0 }, 2257,
+ { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0x00A8, 0x02C7, 0x00B8,
+ 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0x00AF, 0x02DB, 0xFFFD,
+ 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7,
+ 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
+ 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
+ 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
+ 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
+ 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
+ 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
+ 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
+ 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
+ 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
+ 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
+ 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9} },
+ { "windows-1258", { "CP1258", 0 }, 2258,
+ { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+ 0x02C6, 0x2030, 0xFFFD, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0x02DC, 0x2122, 0xFFFD, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
+ 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
+ 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
+ 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
+ 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
+ 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
+ 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF,
+ 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7,
+ 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF,
+ 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
+ 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF,
+ 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7,
+ 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF} },
+
+ { "Apple Roman", { "macintosh", "MacRoman", 0 }, -168,
+ { 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
+ 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
+ 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3,
+ 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC,
+ 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF,
+ 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8,
+ 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211,
+ 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8,
+ 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB,
+ 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153,
+ 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
+ 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02,
+ 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1,
+ 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4,
+ 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
+ 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7} },
+
+
+
+ // This one is based on the charmap file
+ // /usr/share/i18n/charmaps/SAMI-WS2.gz, which is manually adapted
+ // to this format by Boerre Gaup <boerre@subdimension.com>
+ { "WINSAMI2", { "WS2", 0 }, -165,
+ { 0x20AC, 0xFFFD, 0x010C, 0x0192, 0x010D, 0x01B7, 0x0292, 0x01EE,
+ 0x01EF, 0x0110, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0x0111, 0x01E6, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
+ 0x00A0, 0x01E7, 0x01E4, 0x00A3, 0x00A4, 0x01E5, 0x00A6, 0x00A7,
+ 0x00A8, 0x00A9, 0x021E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x021F,
+ 0x00B0, 0x00B1, 0x01E8, 0x01E9, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
+ 0x014A, 0x014B, 0x0166, 0x00BB, 0x0167, 0x00BD, 0x017D, 0x017E,
+ 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
+ 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
+ 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
+ 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
+ 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
+ 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
+ 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
+ 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
+
+
+ // this one is generated from the charmap file located in /usr/share/i18n/charmaps
+ // on most Linux distributions. The thai character set tis620 is byte by byte equivalent
+ // to iso8859-11, so we name it 8859-11 here, but recognise the name tis620 too.
+
+ // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; (cut -c25- < TIS-620 ; cat /tmp/digits) | awk '/^x[89ABCDEF]/{ print $1, $2 }' | sed -e 's/<U/0x/' -e 's/>//' | sort | uniq -w4 | cut -c5- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/tis-620
+ { "TIS-620", { "ISO 8859-11", 0 }, 2259, // Thai character set mib enum taken from tis620 (which is byte by byte equivalent)
+ { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
+ 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
+ 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
+ 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
+ 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
+ 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
+ 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
+ 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
+ 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
+ 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
+ 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
+ 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD } },
+
+ /*
+ Name: hp-roman8 [HP-PCL5,RFC1345,KXS2]
+ MIBenum: 2004
+ Source: LaserJet IIP Printer User's Manual,
+ HP part no 33471-90901, Hewlet-Packard, June 1989.
+ Alias: roman8
+ Alias: r8
+ Alias: csHPRoman8
+ */
+ { "roman8", { "hp-roman8", "csHPRoman8", 0 }, 2004,
+ { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
+ 0x00A0, 0x00C0, 0x00C2, 0x00C8, 0x00CA, 0x00CB, 0x00CE, 0x00CF,
+ 0x00B4, 0x02CB, 0x02C6, 0x00A8, 0x02DC, 0x00D9, 0x00DB, 0x20A4,
+ 0x00AF, 0x00DD, 0x00FD, 0x00B0, 0x00C7, 0x00E7, 0x00D1, 0x00F1,
+ 0x00A1, 0x00BF, 0x00A4, 0x00A3, 0x00A5, 0x00A7, 0x0192, 0x00A2,
+ 0x00E2, 0x00EA, 0x00F4, 0x00FB, 0x00E1, 0x00E9, 0x00F3, 0x00FA,
+ 0x00E0, 0x00E8, 0x00F2, 0x00F9, 0x00E4, 0x00EB, 0x00F6, 0x00FC,
+ 0x00C5, 0x00EE, 0x00D8, 0x00C6, 0x00E5, 0x00ED, 0x00F8, 0x00E6,
+ 0x00C4, 0x00EC, 0x00D6, 0x00DC, 0x00C9, 0x00EF, 0x00DF, 0x00D4,
+ 0x00C1, 0x00C3, 0x00E3, 0x00D0, 0x00F0, 0x00CD, 0x00CC, 0x00D3,
+ 0x00D2, 0x00D5, 0x00F5, 0x0160, 0x0161, 0x00DA, 0x0178, 0x00FF,
+ 0x00DE, 0x00FE, 0x00B7, 0x00B5, 0x00B6, 0x00BE, 0x2014, 0x00BC,
+ 0x00BD, 0x00AA, 0x00BA, 0x00AB, 0x25A0, 0x00BB, 0x00B1, 0xFFFD } }
+
+ // if you add more chacater sets at the end, change LAST_MIB above
+};
+
+QSimpleTextCodec::QSimpleTextCodec(int i) : forwardIndex(i), reverseMap(0)
+{
+}
+
+
+QSimpleTextCodec::~QSimpleTextCodec()
+{
+ delete reverseMap;
+}
+
+static QByteArray *buildReverseMap(int forwardIndex)
+{
+ QByteArray *map = new QByteArray();
+ int m = 0;
+ int i = 0;
+ while(i < 128) {
+ if (unicodevalues[forwardIndex].values[i] > m &&
+ unicodevalues[forwardIndex].values[i] < 0xfffd)
+ m = unicodevalues[forwardIndex].values[i];
+ i++;
+ }
+ m++;
+ map->resize(m);
+ for(i = 0; i < 128 && i < m; i++)
+ (*map)[i] = (char)i;
+ for(;i < m; i++)
+ (*map)[i] = 0;
+ for(i=128; i<256; i++) {
+ int u = unicodevalues[forwardIndex].values[i-128];
+ if (u < m)
+ (*map)[u] = (char)(unsigned char)(i);
+ }
+ return map;
+}
+
+QString QSimpleTextCodec::convertToUnicode(const char* chars, int len, ConverterState *) const
+{
+ if (len <= 0 || chars == 0)
+ return QString();
+
+ const unsigned char * c = (const unsigned char *)chars;
+
+ QString r;
+ r.resize(len);
+ QChar* uc = r.data();
+
+ for (int i = 0; i < len; i++) {
+ if (c[i] > 127)
+ uc[i] = unicodevalues[forwardIndex].values[c[i]-128];
+ else
+ uc[i] = QLatin1Char(c[i]);
+ }
+ return r;
+}
+
+QByteArray QSimpleTextCodec::convertFromUnicode(const QChar *in, int length, ConverterState *state) const
+{
+ const char replacement = (state && state->flags & ConvertInvalidToNull) ? 0 : '?';
+ int invalid = 0;
+
+ if (!reverseMap){
+ QByteArray *tmp = buildReverseMap(this->forwardIndex);
+ if (!reverseMap.testAndSetOrdered(0, tmp))
+ delete tmp;
+ }
+
+ QByteArray r;
+ r.resize(length);
+ int i = length;
+ int u;
+ const QChar* ucp = in;
+ unsigned char* rp = (unsigned char *)r.data();
+ const unsigned char* rmp = (const unsigned char *)reverseMap->data();
+ int rmsize = (int) reverseMap->size();
+ while(i--)
+ {
+ u = ucp->unicode();
+ if (u < 128) {
+ *rp = (char)u;
+ } else {
+ *rp = ((u < rmsize) ? (*(rmp+u)) : 0);
+ if (*rp == 0) {
+ *rp = replacement;
+ ++invalid;
+ }
+ }
+ rp++;
+ ucp++;
+ }
+
+ if (state) {
+ state->invalidChars += invalid;
+ }
+ return r;
+}
+
+QByteArray QSimpleTextCodec::name() const
+{
+ return unicodevalues[forwardIndex].mime;
+}
+
+QList<QByteArray> QSimpleTextCodec::aliases() const
+{
+ QList<QByteArray> list;
+ const char * const*a = unicodevalues[forwardIndex].aliases;
+ while (*a) {
+ list << *a;
+ ++a;
+ }
+ return list;
+}
+
+int QSimpleTextCodec::mibEnum() const
+{
+ return unicodevalues[forwardIndex].mib;
+}
+
+QT_END_NAMESPACE
+
+#endif // QT_NO_TEXTCODEC
diff --git a/src/corelib/codecs/qsimplecodec_p.h b/src/corelib/codecs/qsimplecodec_p.h
new file mode 100644
index 0000000000..0fa874a2cc
--- /dev/null
+++ b/src/corelib/codecs/qsimplecodec_p.h
@@ -0,0 +1,87 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QSIMPLECODEC_P_H
+#define QSIMPLECODEC_P_H
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+#include "QtCore/qtextcodec.h"
+
+QT_BEGIN_NAMESPACE
+
+#ifndef QT_NO_TEXTCODEC
+
+template <typename T> class QAtomicPointer;
+
+class QSimpleTextCodec: public QTextCodec
+{
+public:
+ enum { numSimpleCodecs = 30 };
+ explicit QSimpleTextCodec(int);
+ ~QSimpleTextCodec();
+
+ QString convertToUnicode(const char *, int, ConverterState *) const;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+
+ QByteArray name() const;
+ QList<QByteArray> aliases() const;
+ int mibEnum() const;
+
+private:
+ int forwardIndex;
+ mutable QAtomicPointer<QByteArray> reverseMap;
+};
+
+#endif // QT_NO_TEXTCODEC
+
+QT_END_NAMESPACE
+
+#endif // QSIMPLECODEC_P_H
diff --git a/src/corelib/codecs/qtextcodec.cpp b/src/corelib/codecs/qtextcodec.cpp
new file mode 100644
index 0000000000..6e8ffa145d
--- /dev/null
+++ b/src/corelib/codecs/qtextcodec.cpp
@@ -0,0 +1,1598 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qplatformdefs.h"
+#include "qtextcodec.h"
+#include "qtextcodec_p.h"
+
+#ifndef QT_NO_TEXTCODEC
+
+#include "qlist.h"
+#include "qfile.h"
+#ifndef QT_NO_LIBRARY
+# include "qcoreapplication.h"
+# include "qtextcodecplugin.h"
+# include "private/qfactoryloader_p.h"
+#endif
+#include "qstringlist.h"
+
+#ifdef Q_OS_UNIX
+# include "qiconvcodec_p.h"
+#endif
+
+#include "qutfcodec_p.h"
+#include "qsimplecodec_p.h"
+#include "qlatincodec_p.h"
+#ifndef QT_NO_CODECS
+# include "qtsciicodec_p.h"
+# include "qisciicodec_p.h"
+# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
+// no iconv(3) support, must build all codecs into the library
+# include "../../plugins/codecs/cn/qgb18030codec.h"
+# include "../../plugins/codecs/jp/qeucjpcodec.h"
+# include "../../plugins/codecs/jp/qjiscodec.h"
+# include "../../plugins/codecs/jp/qsjiscodec.h"
+# include "../../plugins/codecs/kr/qeuckrcodec.h"
+# include "../../plugins/codecs/tw/qbig5codec.h"
+# endif // QT_NO_ICONV
+# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
+# include "qfontlaocodec_p.h"
+# include "../../plugins/codecs/jp/qfontjpcodec.h"
+# endif
+#endif // QT_NO_CODECS
+#include "qlocale.h"
+#include "private/qmutexpool_p.h"
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <locale.h>
+#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF)
+#include <langinfo.h>
+#endif
+
+#if defined(Q_OS_WINCE)
+# define QT_NO_SETLOCALE
+#endif
+
+QT_BEGIN_NAMESPACE
+
+#ifndef QT_NO_TEXTCODECPLUGIN
+Q_GLOBAL_STATIC_WITH_ARGS(QFactoryLoader, loader,
+ (QTextCodecFactoryInterface_iid, QLatin1String("/codecs")))
+#endif
+
+
+static bool nameMatch(const QByteArray &name, const QByteArray &test)
+{
+ // if they're the same, return a perfect score
+ if (qstricmp(name, test) == 0)
+ return true;
+
+ const char *n = name.constData();
+ const char *h = test.constData();
+
+ // if the letters and numbers are the same, we have a match
+ while (*n != '\0') {
+ if (isalnum((uchar)*n)) {
+ for (;;) {
+ if (*h == '\0')
+ return false;
+ if (isalnum((uchar)*h))
+ break;
+ ++h;
+ }
+ if (tolower((uchar)*n) != tolower((uchar)*h))
+ return false;
+ ++h;
+ }
+ ++n;
+ }
+ while (*h && !isalnum((uchar)*h))
+ ++h;
+ return (*h == '\0');
+}
+
+
+static QTextCodec *createForName(const QByteArray &name)
+{
+#ifndef QT_NO_TEXTCODECPLUGIN
+ QFactoryLoader *l = loader();
+ QStringList keys = l->keys();
+ for (int i = 0; i < keys.size(); ++i) {
+ if (nameMatch(name, keys.at(i).toLatin1())) {
+ QString realName = keys.at(i);
+ if (QTextCodecFactoryInterface *factory
+ = qobject_cast<QTextCodecFactoryInterface*>(l->instance(realName))) {
+ return factory->create(realName);
+ }
+ }
+ }
+#else
+ Q_UNUSED(name);
+#endif
+ return 0;
+}
+
+static QTextCodec *createForMib(int mib)
+{
+#ifndef QT_NO_TEXTCODECPLUGIN
+ QString name = QLatin1String("MIB: ") + QString::number(mib);
+ if (QTextCodecFactoryInterface *factory
+ = qobject_cast<QTextCodecFactoryInterface*>(loader()->instance(name)))
+ return factory->create(name);
+#else
+ Q_UNUSED(mib);
+#endif
+ return 0;
+}
+
+static QList<QTextCodec*> *all = 0;
+static bool destroying_is_ok = false;
+
+static QTextCodec *localeMapper = 0;
+QTextCodec *QTextCodec::cftr = 0;
+
+
+class QTextCodecCleanup
+{
+public:
+ ~QTextCodecCleanup();
+};
+
+/*
+ Deletes all the created codecs. This destructor is called just
+ before exiting to delete any QTextCodec objects that may be lying
+ around.
+*/
+QTextCodecCleanup::~QTextCodecCleanup()
+{
+ if (!all)
+ return;
+
+ destroying_is_ok = true;
+
+ while (all->size())
+ delete all->takeFirst();
+ delete all;
+ all = 0;
+ localeMapper = 0;
+
+ destroying_is_ok = false;
+}
+
+Q_GLOBAL_STATIC(QTextCodecCleanup, createQTextCodecCleanup)
+
+#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
+class QWindowsLocalCodec: public QTextCodec
+{
+public:
+ QWindowsLocalCodec();
+ ~QWindowsLocalCodec();
+
+ QString convertToUnicode(const char *, int, ConverterState *) const;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+ QString convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const;
+
+ QByteArray name() const;
+ int mibEnum() const;
+
+};
+
+QWindowsLocalCodec::QWindowsLocalCodec()
+{
+}
+
+QWindowsLocalCodec::~QWindowsLocalCodec()
+{
+}
+
+QString QWindowsLocalCodec::convertToUnicode(const char *chars, int length, ConverterState *state) const
+{
+ const char *mb = chars;
+ int mblen = length;
+
+ if (!mb || !mblen)
+ return QString();
+
+ const int wclen_auto = 4096;
+ WCHAR wc_auto[wclen_auto];
+ int wclen = wclen_auto;
+ WCHAR *wc = wc_auto;
+ int len;
+ QString sp;
+ bool prepend = false;
+ char state_data = 0;
+ int remainingChars = 0;
+
+ //save the current state information
+ if (state) {
+ state_data = (char)state->state_data[0];
+ remainingChars = state->remainingChars;
+ }
+
+ //convert the pending charcter (if available)
+ if (state && remainingChars) {
+ char prev[3] = {0};
+ prev[0] = state_data;
+ prev[1] = mb[0];
+ remainingChars = 0;
+ len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
+ prev, 2, wc, wclen);
+ if (len) {
+ prepend = true;
+ sp.append(QChar(wc[0]));
+ mb++;
+ mblen--;
+ wc[0] = 0;
+ }
+ }
+
+ while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
+ mb, mblen, wc, wclen))) {
+ int r = GetLastError();
+ if (r == ERROR_INSUFFICIENT_BUFFER) {
+ if (wc != wc_auto) {
+ qWarning("MultiByteToWideChar: Size changed");
+ break;
+ } else {
+ wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
+ mb, mblen, 0, 0);
+ wc = new WCHAR[wclen];
+ // and try again...
+ }
+ } else if (r == ERROR_NO_UNICODE_TRANSLATION) {
+ //find the last non NULL character
+ while (mblen > 1 && !(mb[mblen-1]))
+ mblen--;
+ //check whether, we hit an invalid character in the middle
+ if ((mblen <= 1) || (remainingChars && state_data))
+ return convertToUnicodeCharByChar(chars, length, state);
+ //Remove the last character and try again...
+ state_data = mb[mblen-1];
+ remainingChars = 1;
+ mblen--;
+ } else {
+ // Fail.
+ qWarning("MultiByteToWideChar: Cannot convert multibyte text");
+ break;
+ }
+ }
+ if (len <= 0)
+ return QString();
+ if (wc[len-1] == 0) // len - 1: we don't want terminator
+ --len;
+
+ //save the new state information
+ if (state) {
+ state->state_data[0] = (char)state_data;
+ state->remainingChars = remainingChars;
+ }
+ QString s((QChar*)wc, len);
+ if (wc != wc_auto)
+ delete [] wc;
+ if (prepend) {
+ return sp+s;
+ }
+ return s;
+}
+
+QString QWindowsLocalCodec::convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const
+{
+ if (!chars || !length)
+ return QString();
+
+ int copyLocation = 0;
+ int extra = 2;
+ if (state && state->remainingChars) {
+ copyLocation = state->remainingChars;
+ extra += copyLocation;
+ }
+ int newLength = length + extra;
+ char *mbcs = new char[newLength];
+ //ensure that we have a NULL terminated string
+ mbcs[newLength-1] = 0;
+ mbcs[newLength-2] = 0;
+ memcpy(&(mbcs[copyLocation]), chars, length);
+ if (copyLocation) {
+ //copy the last character from the state
+ mbcs[0] = (char)state->state_data[0];
+ state->remainingChars = 0;
+ }
+ const char *mb = mbcs;
+#ifndef Q_OS_WINCE
+ const char *next = 0;
+ QString s;
+ while((next = CharNextExA(CP_ACP, mb, 0)) != mb) {
+ WCHAR wc[2] ={0};
+ int charlength = next - mb;
+ int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
+ if (len>0) {
+ s.append(QChar(wc[0]));
+ } else {
+ int r = GetLastError();
+ //check if the character being dropped is the last character
+ if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) {
+ state->remainingChars = 1;
+ state->state_data[0] = (char)*mb;
+ }
+ }
+ mb = next;
+ }
+#else
+ QString s;
+ int size = mbstowcs(NULL, mb, length);
+ if (size < 0) {
+ Q_ASSERT("Error in CE TextCodec");
+ return QString();
+ }
+ wchar_t* ws = new wchar_t[size + 2];
+ ws[size +1] = 0;
+ ws[size] = 0;
+ size = mbstowcs(ws, mb, length);
+ for (int i=0; i< size; i++)
+ s.append(QChar(ws[i]));
+ delete [] ws;
+#endif
+ delete mbcs;
+ return s;
+}
+
+QByteArray QWindowsLocalCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *) const
+{
+ return qt_winQString2MB(uc, len);
+}
+
+
+QByteArray QWindowsLocalCodec::name() const
+{
+ return "System";
+}
+
+int QWindowsLocalCodec::mibEnum() const
+{
+ return 0;
+}
+
+#else
+
+/* locale names mostly copied from XFree86 */
+static const char * const iso8859_2locales[] = {
+ "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",
+ "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",
+ "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",
+ "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 };
+
+static const char * const iso8859_3locales[] = {
+ "eo", 0 };
+
+static const char * const iso8859_4locales[] = {
+ "ee", "ee_EE", 0 };
+
+static const char * const iso8859_5locales[] = {
+ "mk", "mk_MK", "sp", "sp_YU", 0 };
+
+static const char * const cp_1251locales[] = {
+ "be", "be_BY", "bg", "bg_BG", "bulgarian", 0 };
+
+static const char * const pt_154locales[] = {
+ "ba_RU", "ky", "ky_KG", "kk", "kk_KZ", 0 };
+
+static const char * const iso8859_6locales[] = {
+ "ar_AA", "ar_SA", "arabic", 0 };
+
+static const char * const iso8859_7locales[] = {
+ "el", "el_GR", "greek", 0 };
+
+static const char * const iso8859_8locales[] = {
+ "hebrew", "he", "he_IL", "iw", "iw_IL", 0 };
+
+static const char * const iso8859_9locales[] = {
+ "tr", "tr_TR", "turkish", 0 };
+
+static const char * const iso8859_13locales[] = {
+ "lt", "lt_LT", "lv", "lv_LV", 0 };
+
+static const char * const iso8859_15locales[] = {
+ "et", "et_EE",
+ // Euro countries
+ "br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE",
+ "es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR",
+ "fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR",
+ "nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE",
+ 0 };
+
+static const char * const koi8_ulocales[] = {
+ "uk", "uk_UA", "ru_UA", "ukrainian", 0 };
+
+static const char * const tis_620locales[] = {
+ "th", "th_TH", "thai", 0 };
+
+// static const char * const tcvnlocales[] = {
+// "vi", "vi_VN", 0 };
+
+static bool try_locale_list(const char * const locale[], const char * lang)
+{
+ int i;
+ for(i=0; locale[i] && *locale[i] && strcmp(locale[i], lang); i++)
+ ;
+ return locale[i] != 0;
+}
+
+// For the probably_koi8_locales we have to look. the standard says
+// these are 8859-5, but almost all Russian users use KOI8-R and
+// incorrectly set $LANG to ru_RU. We'll check tolower() to see what
+// it thinks ru_RU means.
+
+// If you read the history, it seems that many Russians blame ISO and
+// Perestroika for the confusion.
+//
+// The real bug is that some programs break if the user specifies
+// ru_RU.KOI8-R.
+
+static const char * const probably_koi8_rlocales[] = {
+ "ru", "ru_SU", "ru_RU", "russian", 0 };
+
+static QTextCodec * ru_RU_hack(const char * i) {
+ QTextCodec * ru_RU_codec = 0;
+
+#if !defined(QT_NO_SETLOCALE)
+ QByteArray origlocale(setlocale(LC_CTYPE, i));
+#else
+ QByteArray origlocale(i);
+#endif
+ // unicode koi8r latin5 name
+ // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU
+ // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU
+ int latin5 = tolower(0xCE);
+ int koi8r = tolower(0xE0);
+ if (koi8r == 0xC0 && latin5 != 0xEE) {
+ ru_RU_codec = QTextCodec::codecForName("KOI8-R");
+ } else if (koi8r != 0xC0 && latin5 == 0xEE) {
+ ru_RU_codec = QTextCodec::codecForName("ISO 8859-5");
+ } else {
+ // something else again... let's assume... *throws dice*
+ ru_RU_codec = QTextCodec::codecForName("KOI8-R");
+ qWarning("QTextCodec: Using KOI8-R, probe failed (%02x %02x %s)",
+ koi8r, latin5, i);
+ }
+#if !defined(QT_NO_SETLOCALE)
+ setlocale(LC_CTYPE, origlocale);
+#endif
+
+ return ru_RU_codec;
+}
+
+#endif
+
+#if !defined(Q_OS_WIN32) && !defined(Q_OS_WINCE)
+static QTextCodec *checkForCodec(const char *name) {
+ QTextCodec *c = QTextCodec::codecForName(name);
+ if (!c) {
+ const char *at = strchr(name, '@');
+ if (at) {
+ QByteArray n(name, at - name);
+ c = QTextCodec::codecForName(n.data());
+ }
+ }
+ return c;
+}
+#endif
+
+/* the next two functions are implicitely thread safe,
+ as they are only called by setup() which uses a mutex.
+*/
+static void setupLocaleMapper()
+{
+#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
+ localeMapper = QTextCodec::codecForName("System");
+#else
+
+#ifndef QT_NO_ICONV
+ localeMapper = QTextCodec::codecForName("System");
+#endif
+
+#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF)
+ if (!localeMapper) {
+ char *charset = nl_langinfo (CODESET);
+ if (charset)
+ localeMapper = QTextCodec::codecForName(charset);
+ }
+#endif
+
+ if (!localeMapper) {
+ // Very poorly defined and followed standards causes lots of
+ // code to try to get all the cases... This logic is
+ // duplicated in QIconvCodec, so if you change it here, change
+ // it there too.
+
+ // Try to determine locale codeset from locale name assigned to
+ // LC_CTYPE category.
+
+ // First part is getting that locale name. First try setlocale() which
+ // definitely knows it, but since we cannot fully trust it, get ready
+ // to fall back to environment variables.
+#if !defined(QT_NO_SETLOCALE)
+ char * ctype = qstrdup(setlocale(LC_CTYPE, 0));
+#else
+ char * ctype = qstrdup("");
+#endif
+
+ // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
+ // environment variables.
+ char * lang = qstrdup(qgetenv("LC_ALL").constData());
+ if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) {
+ if (lang) delete [] lang;
+ lang = qstrdup(qgetenv("LC_CTYPE").constData());
+ }
+ if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) {
+ if (lang) delete [] lang;
+ lang = qstrdup(qgetenv("LANG").constData());
+ }
+
+ // Now try these in order:
+ // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
+ // 2. CODESET from lang if it contains a .CODESET part
+ // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
+ // 4. locale (ditto)
+ // 5. check for "@euro"
+ // 6. guess locale from ctype unless ctype is "C"
+ // 7. guess locale from lang
+
+ // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
+ char * codeset = ctype ? strchr(ctype, '.') : 0;
+ if (codeset && *codeset == '.')
+ localeMapper = checkForCodec(codeset + 1);
+
+ // 2. CODESET from lang if it contains a .CODESET part
+ codeset = lang ? strchr(lang, '.') : 0;
+ if (!localeMapper && codeset && *codeset == '.')
+ localeMapper = checkForCodec(codeset + 1);
+
+ // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
+ if (!localeMapper && ctype && *ctype != 0 && strcmp (ctype, "C") != 0)
+ localeMapper = checkForCodec(ctype);
+
+ // 4. locale (ditto)
+ if (!localeMapper && lang && *lang != 0)
+ localeMapper = checkForCodec(lang);
+
+ // 5. "@euro"
+ if ((!localeMapper && ctype && strstr(ctype, "@euro")) || (lang && strstr(lang, "@euro")))
+ localeMapper = checkForCodec("ISO 8859-15");
+
+ // 6. guess locale from ctype unless ctype is "C"
+ // 7. guess locale from lang
+ char * try_by_name = ctype;
+ if (ctype && *ctype != 0 && strcmp (ctype, "C") != 0)
+ try_by_name = lang;
+
+ // Now do the guessing.
+ if (lang && *lang && !localeMapper && try_by_name && *try_by_name) {
+ if (try_locale_list(iso8859_15locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-15");
+ else if (try_locale_list(iso8859_2locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-2");
+ else if (try_locale_list(iso8859_3locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-3");
+ else if (try_locale_list(iso8859_4locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-4");
+ else if (try_locale_list(iso8859_5locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-5");
+ else if (try_locale_list(iso8859_6locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-6");
+ else if (try_locale_list(iso8859_7locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-7");
+ else if (try_locale_list(iso8859_8locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-8-I");
+ else if (try_locale_list(iso8859_9locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-9");
+ else if (try_locale_list(iso8859_13locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-13");
+ else if (try_locale_list(tis_620locales, lang))
+ localeMapper = QTextCodec::codecForName("ISO 8859-11");
+ else if (try_locale_list(koi8_ulocales, lang))
+ localeMapper = QTextCodec::codecForName("KOI8-U");
+ else if (try_locale_list(cp_1251locales, lang))
+ localeMapper = QTextCodec::codecForName("CP 1251");
+ else if (try_locale_list(pt_154locales, lang))
+ localeMapper = QTextCodec::codecForName("PT 154");
+ else if (try_locale_list(probably_koi8_rlocales, lang))
+ localeMapper = ru_RU_hack(lang);
+ }
+
+ delete [] ctype;
+ delete [] lang;
+ }
+
+ // If everything failed, we default to 8859-1
+ // We could perhaps default to 8859-15.
+ if (!localeMapper)
+ localeMapper = QTextCodec::codecForName("ISO 8859-1");
+#endif
+}
+
+
+static void setup()
+{
+#ifndef QT_NO_THREAD
+ QMutexLocker locker(QMutexPool::globalInstanceGet(&all));
+#endif
+
+ if (all)
+ return;
+
+ if (destroying_is_ok)
+ qWarning("QTextCodec: Creating new codec during codec cleanup");
+ all = new QList<QTextCodec*>;
+ // create the cleanup object to cleanup all codecs on exit
+ (void) createQTextCodecCleanup();
+
+#ifndef QT_NO_CODECS
+# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
+ // no font codecs when bootstrapping
+ (void)new QFontLaoCodec;
+# if defined(QT_NO_ICONV)
+ // no iconv(3) support, must build all codecs into the library
+ (void)new QFontGb2312Codec;
+ (void)new QFontGbkCodec;
+ (void)new QFontGb18030_0Codec;
+ (void)new QFontJis0208Codec;
+ (void)new QFontJis0201Codec;
+ (void)new QFontKsc5601Codec;
+ (void)new QFontBig5hkscsCodec;
+ (void)new QFontBig5Codec;
+# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
+# endif // Q_WS_X11
+
+ (void)new QTsciiCodec;
+
+ for (int i = 0; i < 9; ++i)
+ (void)new QIsciiCodec(i);
+
+
+# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
+ // no asian codecs when bootstrapping, sorry
+ (void)new QGb18030Codec;
+ (void)new QGbkCodec;
+ (void)new QGb2312Codec;
+ (void)new QEucJpCodec;
+ (void)new QJisCodec;
+ (void)new QSjisCodec;
+ (void)new QEucKrCodec;
+ (void)new QBig5Codec;
+ (void)new QBig5hkscsCodec;
+# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
+#endif // QT_NO_CODECS
+
+#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
+ (void) new QWindowsLocalCodec;
+#endif // Q_OS_WIN32
+
+ (void)new QUtf16Codec;
+ (void)new QUtf16BECodec;
+ (void)new QUtf16LECodec;
+ (void)new QUtf32Codec;
+ (void)new QUtf32BECodec;
+ (void)new QUtf32LECodec;
+ (void)new QLatin15Codec;
+ (void)new QLatin1Codec;
+ (void)new QUtf8Codec;
+
+ for (int i = 0; i < QSimpleTextCodec::numSimpleCodecs; ++i)
+ (void)new QSimpleTextCodec(i);
+
+#if defined(Q_OS_UNIX) && !defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
+ // QIconvCodec depends on the UTF-16 codec, so it needs to be created last
+ (void) new QIconvCodec();
+#endif
+
+ if (!localeMapper)
+ setupLocaleMapper();
+}
+
+QTextCodec::ConverterState::~ConverterState()
+{
+ if (flags & FreeFunction)
+ (QTextCodecUnalignedPointer::decode(state_data))(this);
+ else if (d)
+ qFree(d);
+}
+
+/*!
+ \class QTextCodec
+ \brief The QTextCodec class provides conversions between text encodings.
+ \reentrant
+ \ingroup i18n
+
+ Qt uses Unicode to store, draw and manipulate strings. In many
+ situations you may wish to deal with data that uses a different
+ encoding. For example, most Japanese documents are still stored
+ in Shift-JIS or ISO 2022-JP, while Russian users often have their
+ documents in KOI8-R or Windows-1251.
+
+ Qt provides a set of QTextCodec classes to help with converting
+ non-Unicode formats to and from Unicode. You can also create your
+ own codec classes.
+
+ The supported encodings are:
+
+ \list
+ \o Apple Roman
+ \o \l{Big5 Text Codec}{Big5}
+ \o \l{Big5-HKSCS Text Codec}{Big5-HKSCS}
+ \o CP949
+ \o \l{EUC-JP Text Codec}{EUC-JP}
+ \o \l{EUC-KR Text Codec}{EUC-KR}
+ \o \l{GBK Text Codec}{GB18030-0}
+ \o IBM 850
+ \o IBM 866
+ \o IBM 874
+ \o \l{ISO 2022-JP (JIS) Text Codec}{ISO 2022-JP}
+ \o ISO 8859-1 to 10
+ \o ISO 8859-13 to 16
+ \o Iscii-Bng, Dev, Gjr, Knd, Mlm, Ori, Pnj, Tlg, and Tml
+ \o JIS X 0201
+ \o JIS X 0208
+ \o KOI8-R
+ \o KOI8-U
+ \o MuleLao-1
+ \o ROMAN8
+ \o \l{Shift-JIS Text Codec}{Shift-JIS}
+ \o TIS-620
+ \o \l{TSCII Text Codec}{TSCII}
+ \o UTF-8
+ \o UTF-16
+ \o UTF-16BE
+ \o UTF-16LE
+ \o UTF-32
+ \o UTF-32BE
+ \o UTF-32LE
+ \o Windows-1250 to 1258
+ \o WINSAMI2
+ \endlist
+
+ QTextCodecs can be used as follows to convert some locally encoded
+ string to Unicode. Suppose you have some string encoded in Russian
+ KOI8-R encoding, and want to convert it to Unicode. The simple way
+ to do it is like this:
+
+ \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 0
+
+ After this, \c string holds the text converted to Unicode.
+ Converting a string from Unicode to the local encoding is just as
+ easy:
+
+ \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 1
+
+ To read or write files in various encodings, use QTextStream and
+ its \l{QTextStream::setCodec()}{setCodec()} function. See the
+ \l{tools/codecs}{Codecs} example for an application of QTextCodec
+ to file I/O.
+
+ Some care must be taken when trying to convert the data in chunks,
+ for example, when receiving it over a network. In such cases it is
+ possible that a multi-byte character will be split over two
+ chunks. At best this might result in the loss of a character and
+ at worst cause the entire conversion to fail.
+
+ The approach to use in these situations is to create a QTextDecoder
+ object for the codec and use this QTextDecoder for the whole
+ decoding process, as shown below:
+
+ \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 2
+
+ The QTextDecoder object maintains state between chunks and therefore
+ works correctly even if a multi-byte character is split between
+ chunks.
+
+ \section1 Creating Your Own Codec Class
+
+ Support for new text encodings can be added to Qt by creating
+ QTextCodec subclasses.
+
+ The pure virtual functions describe the encoder to the system and
+ the coder is used as required in the different text file formats
+ supported by QTextStream, and under X11, for the locale-specific
+ character input and output.
+
+ To add support for another encoding to Qt, make a subclass of
+ QTextCodec and implement the functions listed in the table below.
+
+ \table
+ \header \o Function \o Description
+
+ \row \o name()
+ \o Returns the official name for the encoding. If the
+ encoding is listed in the
+ \l{IANA character-sets encoding file}, the name
+ should be the preferred MIME name for the encoding.
+
+ \row \o aliases()
+ \o Returns a list of alternative names for the encoding.
+ QTextCodec provides a default implementation that returns
+ an empty list. For example, "ISO-8859-1" has "latin1",
+ "CP819", "IBM819", and "iso-ir-100" as aliases.
+
+ \row \o mibEnum()
+ \o Return the MIB enum for the encoding if it is listed in
+ the \l{IANA character-sets encoding file}.
+
+ \row \o convertToUnicode()
+ \o Converts an 8-bit character string to Unicode.
+
+ \row \o convertFromUnicode()
+ \o Converts a Unicode string to an 8-bit character string.
+ \endtable
+
+ You may find it more convenient to make your codec class
+ available as a plugin; see \l{How to Create Qt Plugins} for
+ details.
+
+ \sa QTextStream, QTextDecoder, QTextEncoder, {Codecs Example}
+*/
+
+/*!
+ \enum QTextCodec::ConversionFlag
+
+ \value DefaultConversion No flag is set.
+ \value ConvertInvalidToNull If this flag is set, each invalid input
+ character is output as a null character.
+ \value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any.
+
+ \omitvalue FreeFunction
+*/
+
+/*!
+ \fn QTextCodec::ConverterState::ConverterState(ConversionFlags flags)
+
+ Constructs a ConverterState object initialized with the given \a flags.
+*/
+
+/*!
+ \fn QTextCodec::ConverterState::~ConverterState()
+
+ Destroys the ConverterState object.
+*/
+
+/*!
+ \nonreentrant
+
+ Constructs a QTextCodec, and gives it the highest precedence. The
+ QTextCodec should always be constructed on the heap (i.e. with \c
+ new). Qt takes ownership and will delete it when the application
+ terminates.
+*/
+QTextCodec::QTextCodec()
+{
+ setup();
+ all->prepend(this);
+}
+
+
+/*!
+ \nonreentrant
+
+ Destroys the QTextCodec. Note that you should not delete codecs
+ yourself: once created they become Qt's responsibility.
+*/
+QTextCodec::~QTextCodec()
+{
+ if (!destroying_is_ok)
+ qWarning("QTextCodec::~QTextCodec: Called by application");
+ if (all)
+ all->removeAll(this);
+}
+
+/*!
+ \fn QTextCodec *QTextCodec::codecForName(const char *name)
+
+ Searches all installed QTextCodec objects and returns the one
+ which best matches \a name; the match is case-insensitive. Returns
+ 0 if no codec matching the name \a name could be found.
+*/
+
+/*!
+ Searches all installed QTextCodec objects and returns the one
+ which best matches \a name; the match is case-insensitive. Returns
+ 0 if no codec matching the name \a name could be found.
+*/
+QTextCodec *QTextCodec::codecForName(const QByteArray &name)
+{
+ if (name.isEmpty())
+ return 0;
+
+ setup();
+
+ for (int i = 0; i < all->size(); ++i) {
+ QTextCodec *cursor = all->at(i);
+ if (nameMatch(cursor->name(), name))
+ return cursor;
+ QList<QByteArray> aliases = cursor->aliases();
+ for (int i = 0; i < aliases.size(); ++i)
+ if (nameMatch(aliases.at(i), name))
+ return cursor;
+ }
+
+ return createForName(name);
+}
+
+
+/*!
+ Returns the QTextCodec which matches the \link
+ QTextCodec::mibEnum() MIBenum\endlink \a mib.
+*/
+QTextCodec* QTextCodec::codecForMib(int mib)
+{
+ setup();
+
+ // Qt 3 used 1000 (mib for UCS2) as its identifier for the utf16 codec. Map
+ // this correctly for compatibility.
+ if (mib == 1000)
+ mib = 1015;
+
+ QList<QTextCodec*>::ConstIterator i;
+ for (int i = 0; i < all->size(); ++i) {
+ QTextCodec *cursor = all->at(i);
+ if (cursor->mibEnum() == mib)
+ return cursor;
+ }
+
+ return createForMib(mib);
+}
+
+/*!
+ Returns the list of all available codecs, by name. Call
+ QTextCodec::codecForName() to obtain the QTextCodec for the name.
+
+ The list may contain many mentions of the same codec
+ if the codec has aliases.
+
+ \sa availableMibs(), name(), aliases()
+*/
+QList<QByteArray> QTextCodec::availableCodecs()
+{
+ setup();
+
+ QList<QByteArray> codecs;
+ for (int i = 0; i < all->size(); ++i) {
+ codecs += all->at(i)->name();
+ codecs += all->at(i)->aliases();
+ }
+#ifndef QT_NO_TEXTCODECPLUGIN
+ QFactoryLoader *l = loader();
+ QStringList keys = l->keys();
+ for (int i = 0; i < keys.size(); ++i) {
+ if (!keys.at(i).startsWith(QLatin1String("MIB: "))) {
+ QByteArray name = keys.at(i).toLatin1();
+ if (!codecs.contains(name))
+ codecs += name;
+ }
+ }
+#endif
+
+ return codecs;
+}
+
+/*!
+ Returns the list of MIBs for all available codecs. Call
+ QTextCodec::codecForMib() to obtain the QTextCodec for the MIB.
+
+ \sa availableCodecs(), mibEnum()
+*/
+QList<int> QTextCodec::availableMibs()
+{
+ setup();
+
+ QList<int> codecs;
+ for (int i = 0; i < all->size(); ++i)
+ codecs += all->at(i)->mibEnum();
+#ifndef QT_NO_TEXTCODECPLUGIN
+ QFactoryLoader *l = loader();
+ QStringList keys = l->keys();
+ for (int i = 0; i < keys.size(); ++i) {
+ if (keys.at(i).startsWith(QLatin1String("MIB: "))) {
+ int mib = keys.at(i).mid(5).toInt();
+ if (!codecs.contains(mib))
+ codecs += mib;
+ }
+ }
+#endif
+
+ return codecs;
+}
+
+/*!
+ Set the codec to \a c; this will be returned by
+ codecForLocale(). If \a c is a null pointer, the codec is reset to
+ the default.
+
+ This might be needed for some applications that want to use their
+ own mechanism for setting the locale.
+
+ Setting this codec is not supported on DOS based Windows.
+
+ \sa codecForLocale()
+*/
+void QTextCodec::setCodecForLocale(QTextCodec *c)
+{
+#ifdef Q_WS_WIN
+ if (QSysInfo::WindowsVersion& QSysInfo::WV_DOS_based)
+ return;
+#endif
+ localeMapper = c;
+ if (!localeMapper)
+ setupLocaleMapper();
+}
+
+/*!
+ Returns a pointer to the codec most suitable for this locale.
+
+ On Windows, the codec will be based on a system locale. On Unix
+ systems, starting with Qt 4.2, the codec will be using the \e
+ iconv library. Note that in both cases the codec's name will be
+ "System".
+*/
+
+QTextCodec* QTextCodec::codecForLocale()
+{
+ if (localeMapper)
+ return localeMapper;
+
+ setup();
+
+ return localeMapper;
+}
+
+
+/*!
+ \fn QByteArray QTextCodec::name() const
+
+ QTextCodec subclasses must reimplement this function. It returns
+ the name of the encoding supported by the subclass.
+
+ If the codec is registered as a character set in the
+ \l{IANA character-sets encoding file} this method should
+ return the preferred mime name for the codec if defined,
+ otherwise its name.
+*/
+
+/*!
+ \fn int QTextCodec::mibEnum() const
+
+ Subclasses of QTextCodec must reimplement this function. It
+ returns the MIBenum (see \l{IANA character-sets encoding file}
+ for more information). It is important that each QTextCodec
+ subclass returns the correct unique value for this function.
+*/
+
+/*!
+ Subclasses can return a number of aliases for the codec in question.
+
+ Standard aliases for codecs can be found in the
+ \l{IANA character-sets encoding file}.
+*/
+QList<QByteArray> QTextCodec::aliases() const
+{
+ return QList<QByteArray>();
+}
+
+/*!
+ \fn QString QTextCodec::convertToUnicode(const char *chars, int len,
+ ConverterState *state) const
+
+ QTextCodec subclasses must reimplement this function.
+
+ Converts the first \a len characters of \a chars from the
+ encoding of the subclass to Unicode, and returns the result in a
+ QString.
+
+ \a state can be 0, in which case the conversion is stateless and
+ default conversion rules should be used. If state is not 0, the
+ codec should save the state after the conversion in \a state, and
+ adjust the remainingChars and invalidChars members of the struct.
+*/
+
+/*!
+ \fn QByteArray QTextCodec::convertFromUnicode(const QChar *input, int number,
+ ConverterState *state) const
+
+ QTextCodec subclasses must reimplement this function.
+
+ Converts the first \a number of characters from the \a input array
+ from Unicode to the encoding of the subclass, and returns the result
+ in a QByteArray.
+
+ \a state can be 0 in which case the conversion is stateless and
+ default conversion rules should be used. If state is not 0, the
+ codec should save the state after the conversion in \a state, and
+ adjust the remainingChars and invalidChars members of the struct.
+*/
+
+/*!
+ Creates a QTextDecoder which stores enough state to decode chunks
+ of \c{char *} data to create chunks of Unicode data.
+
+ The caller is responsible for deleting the returned object.
+*/
+QTextDecoder* QTextCodec::makeDecoder() const
+{
+ return new QTextDecoder(this);
+}
+
+
+/*!
+ Creates a QTextEncoder which stores enough state to encode chunks
+ of Unicode data as \c{char *} data.
+
+ The caller is responsible for deleting the returned object.
+*/
+QTextEncoder* QTextCodec::makeEncoder() const
+{
+ return new QTextEncoder(this);
+}
+
+/*!
+ \fn QByteArray QTextCodec::fromUnicode(const QChar *input, int number,
+ ConverterState *state) const
+
+ Converts the first \a number of characters from the \a input array
+ from Unicode to the encoding of this codec, and returns the result
+ in a QByteArray.
+
+ The \a state of the convertor used is updated.
+*/
+
+/*!
+ Converts \a str from Unicode to the encoding of this codec, and
+ returns the result in a QByteArray.
+*/
+QByteArray QTextCodec::fromUnicode(const QString& str) const
+{
+ return convertFromUnicode(str.constData(), str.length(), 0);
+}
+
+/*!
+ \fn QString QTextCodec::toUnicode(const char *input, int size,
+ ConverterState *state) const
+
+ Converts the first \a size characters from the \a input from the
+ encoding of this codec to Unicode, and returns the result in a
+ QString.
+
+ The \a state of the convertor used is updated.
+*/
+
+/*!
+ Converts \a a from the encoding of this codec to Unicode, and
+ returns the result in a QString.
+*/
+QString QTextCodec::toUnicode(const QByteArray& a) const
+{
+ return convertToUnicode(a.constData(), a.length(), 0);
+}
+
+/*!
+ Returns true if the Unicode character \a ch can be fully encoded
+ with this codec; otherwise returns false.
+*/
+bool QTextCodec::canEncode(QChar ch) const
+{
+ ConverterState state;
+ state.flags = ConvertInvalidToNull;
+ convertFromUnicode(&ch, 1, &state);
+ return (state.invalidChars == 0);
+}
+
+/*!
+ \overload
+
+ \a s contains the string being tested for encode-ability.
+*/
+bool QTextCodec::canEncode(const QString& s) const
+{
+ ConverterState state;
+ state.flags = ConvertInvalidToNull;
+ convertFromUnicode(s.constData(), s.length(), &state);
+ return (state.invalidChars == 0);
+}
+
+#ifdef QT3_SUPPORT
+/*!
+ Returns a string representing the current language and
+ sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil.
+
+ \sa QLocale
+*/
+const char *QTextCodec::locale()
+{
+ static char locale[6];
+ QByteArray l = QLocale::system().name().toLatin1();
+ int len = qMin(l.length(), 5);
+ memcpy(locale, l.constData(), len);
+ locale[len] = '\0';
+
+ return locale;
+}
+
+/*!
+ \overload
+*/
+
+QByteArray QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const
+{
+ QByteArray result = convertFromUnicode(uc.constData(), lenInOut, 0);
+ lenInOut = result.length();
+ return result;
+}
+
+/*!
+ \overload
+
+ \a a contains the source characters; \a len contains the number of
+ characters in \a a to use.
+*/
+QString QTextCodec::toUnicode(const QByteArray& a, int len) const
+{
+ len = qMin(a.size(), len);
+ return convertToUnicode(a.constData(), len, 0);
+}
+#endif
+
+/*!
+ \overload
+
+ \a chars contains the source characters.
+*/
+QString QTextCodec::toUnicode(const char *chars) const
+{
+ int len = qstrlen(chars);
+ return convertToUnicode(chars, len, 0);
+}
+
+
+/*!
+ \class QTextEncoder
+ \brief The QTextEncoder class provides a state-based encoder.
+ \reentrant
+ \ingroup i18n
+
+ A text encoder converts text from Unicode into an encoded text format
+ using a specific codec.
+
+ The encoder converts Unicode into another format, remembering any
+ state that is required between calls.
+
+ \sa QTextCodec::makeEncoder(), QTextDecoder
+*/
+
+/*!
+ \fn QTextEncoder::QTextEncoder(const QTextCodec *codec)
+
+ Constructs a text encoder for the given \a codec.
+*/
+
+/*!
+ Destroys the encoder.
+*/
+QTextEncoder::~QTextEncoder()
+{
+}
+
+/*! \internal
+ \since 4.5
+ Determines whether the eecoder encountered a failure while decoding the input. If
+ an error was encountered, the produced result is undefined, and gets converted as according
+ to the conversion flags.
+ */
+bool QTextEncoder::hasFailure() const
+{
+ return state.invalidChars != 0;
+}
+
+/*!
+ Converts the Unicode string \a str into an encoded QByteArray.
+*/
+QByteArray QTextEncoder::fromUnicode(const QString& str)
+{
+ QByteArray result = c->fromUnicode(str.constData(), str.length(), &state);
+ return result;
+}
+
+/*!
+ \overload
+
+ Converts \a len characters (not bytes) from \a uc, and returns the
+ result in a QByteArray.
+*/
+QByteArray QTextEncoder::fromUnicode(const QChar *uc, int len)
+{
+ QByteArray result = c->fromUnicode(uc, len, &state);
+ return result;
+}
+
+#ifdef QT3_SUPPORT
+/*!
+ \overload
+
+ Converts \a lenInOut characters (not bytes) from \a uc, and returns the
+ result in a QByteArray. The number of characters read is returned in
+ the \a lenInOut parameter.
+*/
+QByteArray QTextEncoder::fromUnicode(const QString& uc, int& lenInOut)
+{
+ QByteArray result = c->fromUnicode(uc.constData(), lenInOut, &state);
+ lenInOut = result.length();
+ return result;
+}
+#endif
+
+/*!
+ \class QTextDecoder
+ \brief The QTextDecoder class provides a state-based decoder.
+ \reentrant
+ \ingroup i18n
+
+ A text decoder converts text from an encoded text format into Unicode
+ using a specific codec.
+
+ The decoder converts text in this format into Unicode, remembering any
+ state that is required between calls.
+
+ \sa QTextCodec::makeDecoder(), QTextEncoder
+*/
+
+/*!
+ \fn QTextDecoder::QTextDecoder(const QTextCodec *codec)
+
+ Constructs a text decoder for the given \a codec.
+*/
+
+/*!
+ Destroys the decoder.
+*/
+QTextDecoder::~QTextDecoder()
+{
+}
+
+/*!
+ \fn QString QTextDecoder::toUnicode(const char *chars, int len)
+
+ Converts the first \a len bytes in \a chars to Unicode, returning
+ the result.
+
+ If not all characters are used (e.g. if only part of a multi-byte
+ encoding is at the end of the characters), the decoder remembers
+ enough state to continue with the next call to this function.
+*/
+QString QTextDecoder::toUnicode(const char *chars, int len)
+{
+ return c->toUnicode(chars, len, &state);
+}
+
+
+/*! \overload
+
+ The converted string is returned in \a target.
+ */
+void QTextDecoder::toUnicode(QString *target, const char *chars, int len)
+{
+ Q_ASSERT(target);
+ switch (c->mibEnum()) {
+ case 106: // utf8
+ static_cast<const QUtf8Codec*>(c)->convertToUnicode(target, chars, len, &state);
+ break;
+ case 4: { // latin1
+ target->resize(len);
+ ushort *data = (ushort*)target->data();
+ for (int i = len; i >=0; --i)
+ data[i] = (uchar) chars[i];
+ } break;
+ default:
+ *target = c->toUnicode(chars, len, &state);
+ }
+}
+
+
+/*!
+ \overload
+
+ Converts the bytes in the byte array specified by \a ba to Unicode
+ and returns the result.
+*/
+QString QTextDecoder::toUnicode(const QByteArray &ba)
+{
+ return c->toUnicode(ba.constData(), ba.length(), &state);
+}
+
+
+/*!
+ \fn QTextCodec* QTextCodec::codecForTr()
+
+ Returns the codec used by QObject::tr() on its argument. If this
+ function returns 0 (the default), tr() assumes Latin-1.
+
+ \sa setCodecForTr()
+*/
+
+/*!
+ \fn void QTextCodec::setCodecForTr(QTextCodec *c)
+ \nonreentrant
+
+ Sets the codec used by QObject::tr() on its argument to \a c. If
+ \a c is 0 (the default), tr() assumes Latin-1.
+
+ If the literal quoted text in the program is not in the Latin-1
+ encoding, this function can be used to set the appropriate
+ encoding. For example, software developed by Korean programmers
+ might use eucKR for all the text in the program, in which case the
+ main() function might look like this:
+
+ \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 3
+
+ Note that this is not the way to select the encoding that the \e
+ user has chosen. For example, to convert an application containing
+ literal English strings to Korean, all that is needed is for the
+ English strings to be passed through tr() and for translation
+ files to be loaded. For details of internationalization, see
+ \l{Internationalization with Qt}.
+
+ \sa codecForTr(), setCodecForCStrings()
+*/
+
+
+/*!
+ \fn QTextCodec* QTextCodec::codecForCStrings()
+
+ Returns the codec used by QString to convert to and from \c{const
+ char *} and QByteArrays. If this function returns 0 (the default),
+ QString assumes Latin-1.
+
+ \sa setCodecForCStrings()
+*/
+
+/*!
+ \fn void QTextCodec::setCodecForCStrings(QTextCodec *codec)
+ \nonreentrant
+
+ Sets the codec used by QString to convert to and from \c{const
+ char *} and QByteArrays. If the \a codec is 0 (the default),
+ QString assumes Latin-1.
+
+ \warning Some codecs do not preserve the characters in the ASCII
+ range (0x00 to 0x7F). For example, the Japanese Shift-JIS
+ encoding maps the backslash character (0x5A) to the Yen
+ character. To avoid undesirable side-effects, we recommend
+ avoiding such codecs with setCodecsForCString().
+
+ \sa codecForCStrings(), setCodecForTr()
+*/
+
+/*!
+ \since 4.4
+
+ Tries to detect the encoding of the provided snippet of HTML in the given byte array, \a ba,
+ and returns a QTextCodec instance that is capable of decoding the html to unicode.
+ If the codec cannot be detected from the content provided, \a defaultCodec is returned.
+*/
+QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec)
+{
+ // determine charset
+ int pos;
+ QTextCodec *c = 0;
+
+ if (ba.size() > 1 && (((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff)
+ || ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe))) {
+ c = QTextCodec::codecForMib(1015); // utf16
+ } else if (ba.size() > 2
+ && (uchar)ba[0] == 0xef
+ && (uchar)ba[1] == 0xbb
+ && (uchar)ba[2] == 0xbf) {
+ c = QTextCodec::codecForMib(106); // utf-8
+ } else {
+ QByteArray header = ba.left(512).toLower();
+ if ((pos = header.indexOf("http-equiv=")) != -1) {
+ pos = header.indexOf("charset=", pos) + int(strlen("charset="));
+ if (pos != -1) {
+ int pos2 = header.indexOf('\"', pos+1);
+ QByteArray cs = header.mid(pos, pos2-pos);
+ // qDebug("found charset: %s", cs.data());
+ c = QTextCodec::codecForName(cs);
+ }
+ }
+ }
+ if (!c)
+ c = defaultCodec;
+
+ return c;
+}
+
+/*!
+ \overload
+
+ If the codec cannot be detected, this overload returns a Latin-1 QTextCodec.
+*/
+QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba)
+{
+ return codecForHtml(ba, QTextCodec::codecForMib(/*Latin 1*/ 4));
+}
+
+
+/*! \internal
+ \since 4.3
+ Determines whether the decoder encountered a failure while decoding the input. If
+ an error was encountered, the produced result is undefined, and gets converted as according
+ to the conversion flags.
+ */
+bool QTextDecoder::hasFailure() const
+{
+ return state.invalidChars != 0;
+}
+
+/*!
+ \fn QTextCodec *QTextCodec::codecForContent(const char *str, int size)
+
+ This functionality is no longer provided by Qt. This
+ compatibility function always returns a null pointer.
+*/
+
+/*!
+ \fn QTextCodec *QTextCodec::codecForName(const char *hint, int accuracy)
+
+ Use the codecForName(const QByteArray &) overload instead.
+*/
+
+/*!
+ \fn QTextCodec *QTextCodec::codecForIndex(int i)
+
+ Use availableCodecs() or availableMibs() instead and iterate
+ through the resulting list.
+*/
+
+
+/*!
+ \fn QByteArray QTextCodec::mimeName() const
+
+ Use name() instead.
+*/
+
+QT_END_NAMESPACE
+
+#endif // QT_NO_TEXTCODEC
diff --git a/src/corelib/codecs/qtextcodec.h b/src/corelib/codecs/qtextcodec.h
new file mode 100644
index 0000000000..e32650fb4b
--- /dev/null
+++ b/src/corelib/codecs/qtextcodec.h
@@ -0,0 +1,189 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QTEXTCODEC_H
+#define QTEXTCODEC_H
+
+#include <QtCore/qstring.h>
+#include <QtCore/qlist.h>
+
+QT_BEGIN_HEADER
+
+QT_BEGIN_NAMESPACE
+
+QT_MODULE(Core)
+
+#ifndef QT_NO_TEXTCODEC
+
+class QTextCodec;
+class QIODevice;
+
+class QTextDecoder;
+class QTextEncoder;
+
+class Q_CORE_EXPORT QTextCodec
+{
+ Q_DISABLE_COPY(QTextCodec)
+public:
+ static QTextCodec* codecForName(const QByteArray &name);
+ static QTextCodec* codecForName(const char *name) { return codecForName(QByteArray(name)); }
+ static QTextCodec* codecForMib(int mib);
+
+ static QList<QByteArray> availableCodecs();
+ static QList<int> availableMibs();
+
+ static QTextCodec* codecForLocale();
+ static void setCodecForLocale(QTextCodec *c);
+
+ static QTextCodec* codecForTr();
+ static void setCodecForTr(QTextCodec *c);
+
+ static QTextCodec* codecForCStrings();
+ static void setCodecForCStrings(QTextCodec *c);
+
+ static QTextCodec *codecForHtml(const QByteArray &ba);
+ static QTextCodec *codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec);
+
+ QTextDecoder* makeDecoder() const;
+ QTextEncoder* makeEncoder() const;
+
+ bool canEncode(QChar) const;
+ bool canEncode(const QString&) const;
+
+ QString toUnicode(const QByteArray&) const;
+ QString toUnicode(const char* chars) const;
+ QByteArray fromUnicode(const QString& uc) const;
+ enum ConversionFlag {
+ DefaultConversion,
+ ConvertInvalidToNull = 0x80000000,
+ IgnoreHeader = 0x1,
+ FreeFunction = 0x2
+ };
+ Q_DECLARE_FLAGS(ConversionFlags, ConversionFlag)
+
+ struct Q_CORE_EXPORT ConverterState {
+ ConverterState(ConversionFlags f = DefaultConversion)
+ : flags(f), remainingChars(0), invalidChars(0), d(0) { state_data[0] = state_data[1] = state_data[2] = 0; }
+ ~ConverterState();
+ ConversionFlags flags;
+ int remainingChars;
+ int invalidChars;
+ uint state_data[3];
+ void *d;
+ private:
+ Q_DISABLE_COPY(ConverterState)
+ };
+
+ QString toUnicode(const char *in, int length, ConverterState *state = 0) const
+ { return convertToUnicode(in, length, state); }
+ QByteArray fromUnicode(const QChar *in, int length, ConverterState *state = 0) const
+ { return convertFromUnicode(in, length, state); }
+
+ virtual QByteArray name() const = 0;
+ virtual QList<QByteArray> aliases() const;
+ virtual int mibEnum() const = 0;
+
+protected:
+ virtual QString convertToUnicode(const char *in, int length, ConverterState *state) const = 0;
+ virtual QByteArray convertFromUnicode(const QChar *in, int length, ConverterState *state) const = 0;
+
+ QTextCodec();
+ virtual ~QTextCodec();
+
+public:
+#ifdef QT3_SUPPORT
+ static QT3_SUPPORT QTextCodec* codecForContent(const char*, int) { return 0; }
+ static QT3_SUPPORT const char* locale();
+ static QT3_SUPPORT QTextCodec* codecForName(const char* hint, int) { return codecForName(QByteArray(hint)); }
+ QT3_SUPPORT QByteArray fromUnicode(const QString& uc, int& lenInOut) const;
+ QT3_SUPPORT QString toUnicode(const QByteArray&, int len) const;
+ QT3_SUPPORT QByteArray mimeName() const { return name(); }
+ static QT3_SUPPORT QTextCodec *codecForIndex(int i) { return codecForName(availableCodecs().value(i)); }
+#endif
+
+private:
+ friend class QTextCodecCleanup;
+ static QTextCodec *cftr;
+};
+Q_DECLARE_OPERATORS_FOR_FLAGS(QTextCodec::ConversionFlags)
+
+inline QTextCodec* QTextCodec::codecForTr() { return cftr; }
+inline void QTextCodec::setCodecForTr(QTextCodec *c) { cftr = c; }
+inline QTextCodec* QTextCodec::codecForCStrings() { return QString::codecForCStrings; }
+inline void QTextCodec::setCodecForCStrings(QTextCodec *c) { QString::codecForCStrings = c; }
+
+class Q_CORE_EXPORT QTextEncoder {
+ Q_DISABLE_COPY(QTextEncoder)
+public:
+ explicit QTextEncoder(const QTextCodec *codec) : c(codec), state() {}
+ ~QTextEncoder();
+ QByteArray fromUnicode(const QString& str);
+ QByteArray fromUnicode(const QChar *uc, int len);
+#ifdef QT3_SUPPORT
+ QByteArray fromUnicode(const QString& uc, int& lenInOut);
+#endif
+ bool hasFailure() const;
+private:
+ const QTextCodec *c;
+ QTextCodec::ConverterState state;
+};
+
+class Q_CORE_EXPORT QTextDecoder {
+ Q_DISABLE_COPY(QTextDecoder)
+public:
+ explicit QTextDecoder(const QTextCodec *codec) : c(codec), state() {}
+ ~QTextDecoder();
+ QString toUnicode(const char* chars, int len);
+ QString toUnicode(const QByteArray &ba);
+ void toUnicode(QString *target, const char *chars, int len);
+ bool hasFailure() const;
+private:
+ const QTextCodec *c;
+ QTextCodec::ConverterState state;
+};
+
+#endif // QT_NO_TEXTCODEC
+
+QT_END_NAMESPACE
+
+QT_END_HEADER
+
+#endif // QTEXTCODEC_H
diff --git a/src/corelib/codecs/qtextcodec_p.h b/src/corelib/codecs/qtextcodec_p.h
new file mode 100644
index 0000000000..39f643df7a
--- /dev/null
+++ b/src/corelib/codecs/qtextcodec_p.h
@@ -0,0 +1,84 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QTEXTCODEC_P_H
+#define QTEXTCODEC_P_H
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists for the convenience
+// of the QTextCodec class. This header file may change from
+// version to version without notice, or even be removed.
+//
+// We mean it.
+//
+
+#include "qtextcodec.h"
+#include <string.h>
+
+QT_BEGIN_NAMESPACE
+
+#ifndef QT_NO_TEXTCODEC
+
+typedef void (*QTextCodecStateFreeFunction)(QTextCodec::ConverterState*);
+
+struct QTextCodecUnalignedPointer
+{
+ static inline QTextCodecStateFreeFunction decode(const uint *src)
+ {
+ quintptr data;
+ memcpy(&data, src, sizeof(data));
+ return reinterpret_cast<QTextCodecStateFreeFunction>(data);
+ }
+ static inline void encode(uint *dst, QTextCodecStateFreeFunction fn)
+ {
+ quintptr data = reinterpret_cast<quintptr>(fn);
+ memcpy(dst, &data, sizeof(data));
+ }
+};
+
+#endif //QT_NO_TEXTCODEC
+
+QT_END_NAMESPACE
+
+#endif
diff --git a/src/corelib/codecs/qtextcodecplugin.cpp b/src/corelib/codecs/qtextcodecplugin.cpp
new file mode 100644
index 0000000000..7342b45a66
--- /dev/null
+++ b/src/corelib/codecs/qtextcodecplugin.cpp
@@ -0,0 +1,161 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qtextcodecplugin.h"
+#include "qstringlist.h"
+
+#ifndef QT_NO_TEXTCODECPLUGIN
+
+QT_BEGIN_NAMESPACE
+
+/*!
+ \class QTextCodecPlugin
+ \brief The QTextCodecPlugin class provides an abstract base for custom QTextCodec plugins.
+ \reentrant
+ \ingroup plugins
+
+ The text codec plugin is a simple plugin interface that makes it
+ easy to create custom text codecs that can be loaded dynamically
+ into applications.
+
+ Writing a text codec plugin is achieved by subclassing this base
+ class, reimplementing the pure virtual functions names(),
+ aliases(), createForName(), mibEnums() and createForMib(), and
+ exporting the class with the Q_EXPORT_PLUGIN2() macro. See \l{How
+ to Create Qt Plugins} for details.
+
+ See the \l{http://www.iana.org/assignments/character-sets}{IANA
+ character-sets encoding file} for more information on mime
+ names and mib enums.
+*/
+
+/*!
+ \fn QStringList QTextCodecPlugin::names() const
+
+ Returns the list of MIME names supported by this plugin.
+
+ If a codec has several names, the extra names are returned by aliases().
+
+ \sa createForName(), aliases()
+*/
+
+/*!
+ \fn QList<QByteArray> QTextCodecPlugin::aliases() const
+
+ Returns the list of aliases supported by this plugin.
+*/
+
+/*!
+ \fn QTextCodec *QTextCodecPlugin::createForName(const QByteArray &name)
+
+ Creates a QTextCodec object for the codec called \a name. The \a name
+ must come from the list of encodings returned by names(). Encoding
+ names are case sensitive.
+
+ Example:
+
+ \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodecplugin.cpp 0
+
+ \sa names()
+*/
+
+
+/*!
+ \fn QList<int> QTextCodecPlugin::mibEnums() const
+
+ Returns the list of mib enums supported by this plugin.
+
+ \sa createForMib()
+*/
+
+/*!
+ \fn QTextCodec *QTextCodecPlugin::createForMib(int mib);
+
+ Creates a QTextCodec object for the mib enum \a mib.
+
+ See \l{http://www.iana.org/assignments/character-sets}{the
+ IANA character-sets encoding file} for more information.
+
+ \sa mibEnums()
+*/
+
+/*!
+ Constructs a text codec plugin with the given \a parent. This is
+ invoked automatically by the Q_EXPORT_PLUGIN2() macro.
+*/
+QTextCodecPlugin::QTextCodecPlugin(QObject *parent)
+ : QObject(parent)
+{
+}
+
+/*!
+ Destroys the text codec plugin.
+
+ You never have to call this explicitly. Qt destroys a plugin
+ automatically when it is no longer used.
+*/
+QTextCodecPlugin::~QTextCodecPlugin()
+{
+}
+
+QStringList QTextCodecPlugin::keys() const
+{
+ QStringList keys;
+ QList<QByteArray> list = names();
+ list += aliases();
+ for (int i = 0; i < list.size(); ++i)
+ keys += QString::fromLatin1(list.at(i));
+ QList<int> mibs = mibEnums();
+ for (int i = 0; i < mibs.count(); ++i)
+ keys += QLatin1String("MIB: ") + QString::number(mibs.at(i));
+ return keys;
+}
+
+QTextCodec *QTextCodecPlugin::create(const QString &name)
+{
+ if (name.startsWith(QLatin1String("MIB: ")))
+ return createForMib(name.mid(4).toInt());
+ return createForName(name.toLatin1());
+}
+
+QT_END_NAMESPACE
+
+#endif // QT_NO_TEXTCODECPLUGIN
diff --git a/src/corelib/codecs/qtextcodecplugin.h b/src/corelib/codecs/qtextcodecplugin.h
new file mode 100644
index 0000000000..4600fecc3a
--- /dev/null
+++ b/src/corelib/codecs/qtextcodecplugin.h
@@ -0,0 +1,96 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QTEXTCODECPLUGIN_H
+#define QTEXTCODECPLUGIN_H
+
+#include <QtCore/qplugin.h>
+#include <QtCore/qfactoryinterface.h>
+#include <QtCore/qlist.h>
+#include <QtCore/qbytearray.h>
+
+QT_BEGIN_HEADER
+
+QT_BEGIN_NAMESPACE
+
+QT_MODULE(Core)
+
+#ifndef QT_NO_TEXTCODECPLUGIN
+
+class QTextCodec;
+
+struct Q_CORE_EXPORT QTextCodecFactoryInterface : public QFactoryInterface
+{
+ virtual QTextCodec *create(const QString &key) = 0;
+};
+
+#define QTextCodecFactoryInterface_iid "com.trolltech.Qt.QTextCodecFactoryInterface"
+
+Q_DECLARE_INTERFACE(QTextCodecFactoryInterface, QTextCodecFactoryInterface_iid)
+
+
+class Q_CORE_EXPORT QTextCodecPlugin : public QObject, public QTextCodecFactoryInterface
+{
+ Q_OBJECT
+ Q_INTERFACES(QTextCodecFactoryInterface:QFactoryInterface)
+public:
+ explicit QTextCodecPlugin(QObject *parent = 0);
+ ~QTextCodecPlugin();
+
+ virtual QList<QByteArray> names() const = 0;
+ virtual QList<QByteArray> aliases() const = 0;
+ virtual QTextCodec *createForName(const QByteArray &name) = 0;
+
+ virtual QList<int> mibEnums() const = 0;
+ virtual QTextCodec *createForMib(int mib) = 0;
+
+private:
+ QStringList keys() const;
+ QTextCodec *create(const QString &name);
+};
+
+#endif // QT_NO_TEXTCODECPLUGIN
+
+QT_END_NAMESPACE
+
+QT_END_HEADER
+
+#endif // QTEXTCODECPLUGIN_H
diff --git a/src/corelib/codecs/qtsciicodec.cpp b/src/corelib/codecs/qtsciicodec.cpp
new file mode 100644
index 0000000000..14d2c9c5f9
--- /dev/null
+++ b/src/corelib/codecs/qtsciicodec.cpp
@@ -0,0 +1,500 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+// Most of the code here was originally written by Hans Petter Bieker,
+// and is included in Qt with the author's permission, and the grateful
+// thanks of the Trolltech team.
+
+#include "qtsciicodec_p.h"
+#include "qlist.h"
+
+#ifndef QT_NO_CODECS
+
+QT_BEGIN_NAMESPACE
+
+static unsigned char qt_UnicodeToTSCII(ushort u1, ushort u2, ushort u3);
+static unsigned int qt_TSCIIToUnicode(unsigned int code, uint *s);
+
+#define IsTSCIIChar(c) (((c) >= 0x80) && ((c) <= 0xfd))
+
+/*! \class QTsciiCodec
+ \reentrant
+ \internal
+*/
+
+/*!
+ Destroys the text codec object.
+*/
+QTsciiCodec::~QTsciiCodec()
+{
+}
+
+/*!
+ Converts the first \a len characters in \a uc from Unicode to this
+ encoding, and returns the result in a byte array. The \a state contains
+ some conversion flags, and is used by the codec to maintain state
+ information.
+*/
+QByteArray QTsciiCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
+{
+ char replacement = '?';
+ if (state) {
+ if (state->flags & ConvertInvalidToNull)
+ replacement = 0;
+ }
+ int invalid = 0;
+
+ QByteArray rstr;
+ rstr.resize(len);
+ uchar* cursor = (uchar*)rstr.data();
+ for (int i = 0; i < len; i++) {
+ QChar ch = uc[i];
+ uchar j;
+ if (ch.row() == 0x00 && ch.cell() < 0x80) {
+ // ASCII
+ j = ch.cell();
+ } else if ((j = qt_UnicodeToTSCII(uc[i].unicode(),
+ uc[i + 1].unicode(),
+ uc[i + 2].unicode()))) {
+ // We have to check the combined chars first!
+ i += 2;
+ } else if ((j = qt_UnicodeToTSCII(uc[i].unicode(),
+ uc[i + 1].unicode(), 0))) {
+ i++;
+ } else if ((j = qt_UnicodeToTSCII(uc[i].unicode(), 0, 0))) {
+ } else {
+ // Error
+ j = replacement;
+ ++invalid;
+ }
+ *cursor++ = j;
+ }
+ rstr.resize(cursor - (const uchar*)rstr.constData());
+
+ if (state) {
+ state->invalidChars += invalid;
+ }
+ return rstr;
+}
+
+/*!
+ Converts the first \a len characters in \a chars from this encoding
+ to Unicode, and returns the result in a QString. The \a state contains
+ some conversion flags, and is used by the codec to maintain state
+ information.
+*/
+QString QTsciiCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const
+{
+ QChar replacement = QChar::ReplacementCharacter;
+ if (state) {
+ if (state->flags & ConvertInvalidToNull)
+ replacement = QChar::Null;
+ }
+ int invalid = 0;
+
+ QString result;
+ for (int i = 0; i < len; i++) {
+ uchar ch = chars[i];
+ if (ch < 0x80) {
+ // ASCII
+ result += QLatin1Char(ch);
+ } else if (IsTSCIIChar(ch)) {
+ // TSCII
+ uint s[3];
+ uint u = qt_TSCIIToUnicode(ch, s);
+ uint *p = s;
+ while (u--) {
+ uint c = *p++;
+ if (c)
+ result += QChar(c);
+ else {
+ result += replacement;
+ ++invalid;
+ }
+ }
+ } else {
+ // Invalid
+ result += replacement;
+ ++invalid;
+ }
+ }
+
+ if (state) {
+ state->invalidChars += invalid;
+ }
+ return result;
+}
+
+/*!
+ Returns the official name for the encoding that is handled by the codec.
+
+ \sa QTextCodec::name()
+*/
+QByteArray QTsciiCodec::name() const
+{
+ return "TSCII";
+}
+
+/*!
+ Returns the MIB enum for the encoding.
+
+ \sa QTextCodec::mibEnum()
+*/
+int QTsciiCodec::mibEnum() const
+{
+ /* There is no MIBEnum for TSCII now */
+ return -3197;
+}
+
+static const int UnToTsLast = 124; // 125 items -- so the last will be 124
+static const ushort UnToTs [][4] = {
+ // *Sorted* list of TSCII maping for unicode chars
+ //FIRST SECOND THIRD TSCII
+ {0x00A0, 0x0000, 0x0000, 0xA0},
+ {0x00A9, 0x0000, 0x0000, 0xA9},
+ {0x0B83, 0x0000, 0x0000, 0xB7},
+ {0x0B85, 0x0000, 0x0000, 0xAB},
+ {0x0B86, 0x0000, 0x0000, 0xAC},
+ {0x0B87, 0x0000, 0x0000, 0xAD},
+ {0x0B88, 0x0000, 0x0000, 0xAE},
+ {0x0B89, 0x0000, 0x0000, 0xAF},
+ {0x0B8A, 0x0000, 0x0000, 0xB0},
+ {0x0B8E, 0x0000, 0x0000, 0xB1},
+ {0x0B8F, 0x0000, 0x0000, 0xB2},
+ {0x0B90, 0x0000, 0x0000, 0xB3},
+ {0x0B92, 0x0000, 0x0000, 0xB4},
+ {0x0B93, 0x0000, 0x0000, 0xB5},
+ {0x0B94, 0x0000, 0x0000, 0xB6},
+ {0x0B95, 0x0000, 0x0000, 0xB8},
+ {0x0B95, 0x0B82, 0x0000, 0xEC},
+ {0x0B95, 0x0BC1, 0x0000, 0xCC},
+ {0x0B95, 0x0BC2, 0x0000, 0xDC},
+ {0x0B99, 0x0000, 0x0000, 0xB9},
+ {0x0B99, 0x0B82, 0x0000, 0xED},
+ {0x0B99, 0x0BC1, 0x0000, 0x99},
+ {0x0B99, 0x0BC2, 0x0000, 0x9B},
+ {0x0B9A, 0x0000, 0x0000, 0xBA},
+ {0x0B9A, 0x0B82, 0x0000, 0xEE},
+ {0x0B9A, 0x0BC1, 0x0000, 0xCD},
+ {0x0B9A, 0x0BC2, 0x0000, 0xDD},
+ {0x0B9C, 0x0000, 0x0000, 0x83},
+ {0x0B9C, 0x0B82, 0x0000, 0x88},
+ {0x0B9E, 0x0000, 0x0000, 0xBB},
+ {0x0B9E, 0x0B82, 0x0000, 0xEF},
+ {0x0B9E, 0x0BC1, 0x0000, 0x9A},
+ {0x0B9E, 0x0BC2, 0x0000, 0x9C},
+ {0x0B9F, 0x0000, 0x0000, 0xBC},
+ {0x0B9F, 0x0B82, 0x0000, 0xF0},
+ {0x0B9F, 0x0BBF, 0x0000, 0xCA},
+ {0x0B9F, 0x0BC0, 0x0000, 0xCB},
+ {0x0B9F, 0x0BC1, 0x0000, 0xCE},
+ {0x0B9F, 0x0BC2, 0x0000, 0xDE},
+ {0x0BA1, 0x0B82, 0x0000, 0xF2},
+ {0x0BA3, 0x0000, 0x0000, 0xBD},
+ {0x0BA3, 0x0B82, 0x0000, 0xF1},
+ {0x0BA3, 0x0BC1, 0x0000, 0xCF},
+ {0x0BA3, 0x0BC2, 0x0000, 0xDF},
+ {0x0BA4, 0x0000, 0x0000, 0xBE},
+ {0x0BA4, 0x0BC1, 0x0000, 0xD0},
+ {0x0BA4, 0x0BC2, 0x0000, 0xE0},
+ {0x0BA8, 0x0000, 0x0000, 0xBF},
+ {0x0BA8, 0x0B82, 0x0000, 0xF3},
+ {0x0BA8, 0x0BC1, 0x0000, 0xD1},
+ {0x0BA8, 0x0BC2, 0x0000, 0xE1},
+ {0x0BA9, 0x0000, 0x0000, 0xC9},
+ {0x0BA9, 0x0B82, 0x0000, 0xFD},
+ {0x0BA9, 0x0BC1, 0x0000, 0xDB},
+ {0x0BA9, 0x0BC2, 0x0000, 0xEB},
+ {0x0BAA, 0x0000, 0x0000, 0xC0},
+ {0x0BAA, 0x0B82, 0x0000, 0xF4},
+ {0x0BAA, 0x0BC1, 0x0000, 0xD2},
+ {0x0BAA, 0x0BC2, 0x0000, 0xE2},
+ {0x0BAE, 0x0000, 0x0000, 0xC1},
+ {0x0BAE, 0x0B82, 0x0000, 0xF5},
+ {0x0BAE, 0x0BC1, 0x0000, 0xD3},
+ {0x0BAE, 0x0BC2, 0x0000, 0xE3},
+ {0x0BAF, 0x0000, 0x0000, 0xC2},
+ {0x0BAF, 0x0B82, 0x0000, 0xF6},
+ {0x0BAF, 0x0BC1, 0x0000, 0xD4},
+ {0x0BAF, 0x0BC2, 0x0000, 0xE4},
+ {0x0BB0, 0x0000, 0x0000, 0xC3},
+ {0x0BB0, 0x0B82, 0x0000, 0xF7},
+ {0x0BB0, 0x0BC1, 0x0000, 0xD5},
+ {0x0BB0, 0x0BC2, 0x0000, 0xE5},
+ {0x0BB1, 0x0000, 0x0000, 0xC8},
+ {0x0BB1, 0x0B82, 0x0000, 0xFC},
+ {0x0BB1, 0x0BC1, 0x0000, 0xDA},
+ {0x0BB1, 0x0BC2, 0x0000, 0xEA},
+ {0x0BB2, 0x0000, 0x0000, 0xC4},
+ {0x0BB2, 0x0B82, 0x0000, 0xF8},
+ {0x0BB2, 0x0BC1, 0x0000, 0xD6},
+ {0x0BB2, 0x0BC2, 0x0000, 0xE6},
+ {0x0BB3, 0x0000, 0x0000, 0xC7},
+ {0x0BB3, 0x0B82, 0x0000, 0xFB},
+ {0x0BB3, 0x0BC1, 0x0000, 0xD9},
+ {0x0BB3, 0x0BC2, 0x0000, 0xE9},
+ {0x0BB4, 0x0000, 0x0000, 0xC6},
+ {0x0BB4, 0x0B82, 0x0000, 0xFA},
+ {0x0BB4, 0x0BC1, 0x0000, 0xD8},
+ {0x0BB4, 0x0BC2, 0x0000, 0xE8},
+ {0x0BB5, 0x0000, 0x0000, 0xC5},
+ {0x0BB5, 0x0B82, 0x0000, 0xF9},
+ {0x0BB5, 0x0BC1, 0x0000, 0xD7},
+ {0x0BB5, 0x0BC2, 0x0000, 0xE7},
+ {0x0BB7, 0x0000, 0x0000, 0x84},
+ {0x0BB7, 0x0B82, 0x0000, 0x89},
+ {0x0BB8, 0x0000, 0x0000, 0x85},
+ {0x0BB8, 0x0B82, 0x0000, 0x8A},
+ {0x0BB9, 0x0000, 0x0000, 0x86},
+ {0x0BB9, 0x0B82, 0x0000, 0x8B},
+ {0x0BBE, 0x0000, 0x0000, 0xA1},
+ {0x0BBF, 0x0000, 0x0000, 0xA2},
+ {0x0BC0, 0x0000, 0x0000, 0xA3},
+ {0x0BC1, 0x0000, 0x0000, 0xA4},
+ {0x0BC2, 0x0000, 0x0000, 0xA5},
+ {0x0BC6, 0x0000, 0x0000, 0xA6},
+ {0x0BC7, 0x0000, 0x0000, 0xA7},
+ {0x0BC8, 0x0000, 0x0000, 0xA8},
+ {0x0BCC, 0x0000, 0x0000, 0xAA},
+ {0x0BE6, 0x0000, 0x0000, 0x80},
+ {0x0BE7, 0x0000, 0x0000, 0x81},
+ {0x0BE7, 0x0BB7, 0x0000, 0x87},
+ {0x0BE7, 0x0BB7, 0x0B82, 0x8C},
+ {0x0BE8, 0x0000, 0x0000, 0x8D},
+ {0x0BE9, 0x0000, 0x0000, 0x8E},
+ {0x0BEA, 0x0000, 0x0000, 0x8F},
+ {0x0BEB, 0x0000, 0x0000, 0x90},
+ {0x0BEC, 0x0000, 0x0000, 0x95},
+ {0x0BED, 0x0000, 0x0000, 0x96},
+ {0x0BEE, 0x0000, 0x0000, 0x97},
+ {0x0BEF, 0x0000, 0x0000, 0x98},
+ {0x0BF0, 0x0000, 0x0000, 0x9D},
+ {0x0BF1, 0x0000, 0x0000, 0x9E},
+ {0x0BF2, 0x0000, 0x0000, 0x9F},
+ {0x2018, 0x0000, 0x0000, 0x91},
+ {0x2019, 0x0000, 0x0000, 0x92},
+ {0x201C, 0x0000, 0x0000, 0x93},
+ {0x201C, 0x0000, 0x0000, 0x94}
+};
+
+static const ushort TsToUn [][3] = {
+ // Starting at 0x80
+ {0x0BE6, 0x0000, 0x0000},
+ {0x0BE7, 0x0000, 0x0000},
+ {0x0000, 0x0000, 0x0000}, // unknown
+ {0x0B9C, 0x0000, 0x0000},
+ {0x0BB7, 0x0000, 0x0000},
+ {0x0BB8, 0x0000, 0x0000},
+ {0x0BB9, 0x0000, 0x0000},
+ {0x0BE7, 0x0BB7, 0x0000},
+ {0x0B9C, 0x0B82, 0x0000},
+ {0x0BB7, 0x0B82, 0x0000},
+ {0x0BB8, 0x0B82, 0x0000},
+ {0x0BB9, 0x0B82, 0x0000},
+ {0x0BE7, 0x0BB7, 0x0B82},
+ {0x0BE8, 0x0000, 0x0000},
+ {0x0BE9, 0x0000, 0x0000},
+ {0x0BEA, 0x0000, 0x0000},
+ {0x0BEB, 0x0000, 0x0000},
+ {0x2018, 0x0000, 0x0000},
+ {0x2019, 0x0000, 0x0000},
+ {0x201C, 0x0000, 0x0000},
+ {0x201C, 0x0000, 0x0000}, // two of the same??
+ {0x0BEC, 0x0000, 0x0000},
+ {0x0BED, 0x0000, 0x0000},
+ {0x0BEE, 0x0000, 0x0000},
+ {0x0BEF, 0x0000, 0x0000},
+ {0x0B99, 0x0BC1, 0x0000},
+ {0x0B9E, 0x0BC1, 0x0000},
+ {0x0B99, 0x0BC2, 0x0000},
+ {0x0B9E, 0x0BC2, 0x0000},
+ {0x0BF0, 0x0000, 0x0000},
+ {0x0BF1, 0x0000, 0x0000},
+ {0x0BF2, 0x0000, 0x0000},
+ {0x00A0, 0x0000, 0x0000},
+ {0x0BBE, 0x0000, 0x0000},
+ {0x0BBF, 0x0000, 0x0000},
+ {0x0BC0, 0x0000, 0x0000},
+ {0x0BC1, 0x0000, 0x0000},
+ {0x0BC2, 0x0000, 0x0000},
+ {0x0BC6, 0x0000, 0x0000},
+ {0x0BC7, 0x0000, 0x0000},
+ {0x0BC8, 0x0000, 0x0000},
+ {0x00A9, 0x0000, 0x0000},
+ {0x0BCC, 0x0000, 0x0000},
+ {0x0B85, 0x0000, 0x0000},
+ {0x0B86, 0x0000, 0x0000},
+ {0x0B87, 0x0000, 0x0000},
+ {0x0B88, 0x0000, 0x0000},
+ {0x0B89, 0x0000, 0x0000},
+ {0x0B8A, 0x0000, 0x0000},
+ {0x0B8E, 0x0000, 0x0000},
+ {0x0B8F, 0x0000, 0x0000},
+ {0x0B90, 0x0000, 0x0000},
+ {0x0B92, 0x0000, 0x0000},
+ {0x0B93, 0x0000, 0x0000},
+ {0x0B94, 0x0000, 0x0000},
+ {0x0B83, 0x0000, 0x0000},
+ {0x0B95, 0x0000, 0x0000},
+ {0x0B99, 0x0000, 0x0000},
+ {0x0B9A, 0x0000, 0x0000},
+ {0x0B9E, 0x0000, 0x0000},
+ {0x0B9F, 0x0000, 0x0000},
+ {0x0BA3, 0x0000, 0x0000},
+ {0x0BA4, 0x0000, 0x0000},
+ {0x0BA8, 0x0000, 0x0000},
+ {0x0BAA, 0x0000, 0x0000},
+ {0x0BAE, 0x0000, 0x0000},
+ {0x0BAF, 0x0000, 0x0000},
+ {0x0BB0, 0x0000, 0x0000},
+ {0x0BB2, 0x0000, 0x0000},
+ {0x0BB5, 0x0000, 0x0000},
+ {0x0BB4, 0x0000, 0x0000},
+ {0x0BB3, 0x0000, 0x0000},
+ {0x0BB1, 0x0000, 0x0000},
+ {0x0BA9, 0x0000, 0x0000},
+ {0x0B9F, 0x0BBF, 0x0000},
+ {0x0B9F, 0x0BC0, 0x0000},
+ {0x0B95, 0x0BC1, 0x0000},
+ {0x0B9A, 0x0BC1, 0x0000},
+ {0x0B9F, 0x0BC1, 0x0000},
+ {0x0BA3, 0x0BC1, 0x0000},
+ {0x0BA4, 0x0BC1, 0x0000},
+ {0x0BA8, 0x0BC1, 0x0000},
+ {0x0BAA, 0x0BC1, 0x0000},
+ {0x0BAE, 0x0BC1, 0x0000},
+ {0x0BAF, 0x0BC1, 0x0000},
+ {0x0BB0, 0x0BC1, 0x0000},
+ {0x0BB2, 0x0BC1, 0x0000},
+ {0x0BB5, 0x0BC1, 0x0000},
+ {0x0BB4, 0x0BC1, 0x0000},
+ {0x0BB3, 0x0BC1, 0x0000},
+ {0x0BB1, 0x0BC1, 0x0000},
+ {0x0BA9, 0x0BC1, 0x0000},
+ {0x0B95, 0x0BC2, 0x0000},
+ {0x0B9A, 0x0BC2, 0x0000},
+ {0x0B9F, 0x0BC2, 0x0000},
+ {0x0BA3, 0x0BC2, 0x0000},
+ {0x0BA4, 0x0BC2, 0x0000},
+ {0x0BA8, 0x0BC2, 0x0000},
+ {0x0BAA, 0x0BC2, 0x0000},
+ {0x0BAE, 0x0BC2, 0x0000},
+ {0x0BAF, 0x0BC2, 0x0000},
+ {0x0BB0, 0x0BC2, 0x0000},
+ {0x0BB2, 0x0BC2, 0x0000},
+ {0x0BB5, 0x0BC2, 0x0000},
+ {0x0BB4, 0x0BC2, 0x0000},
+ {0x0BB3, 0x0BC2, 0x0000},
+ {0x0BB1, 0x0BC2, 0x0000},
+ {0x0BA9, 0x0BC2, 0x0000},
+ {0x0B95, 0x0B82, 0x0000},
+ {0x0B99, 0x0B82, 0x0000},
+ {0x0B9A, 0x0B82, 0x0000},
+ {0x0B9E, 0x0B82, 0x0000},
+ {0x0B9F, 0x0B82, 0x0000},
+ {0x0BA3, 0x0B82, 0x0000},
+ {0x0BA1, 0x0B82, 0x0000},
+ {0x0BA8, 0x0B82, 0x0000},
+ {0x0BAA, 0x0B82, 0x0000},
+ {0x0BAE, 0x0B82, 0x0000},
+ {0x0BAF, 0x0B82, 0x0000},
+ {0x0BB0, 0x0B82, 0x0000},
+ {0x0BB2, 0x0B82, 0x0000},
+ {0x0BB5, 0x0B82, 0x0000},
+ {0x0BB4, 0x0B82, 0x0000},
+ {0x0BB3, 0x0B82, 0x0000},
+ {0x0BB1, 0x0B82, 0x0000},
+ {0x0BA9, 0x0B82, 0x0000}
+};
+
+static int cmp(const ushort *s1, const ushort *s2, size_t len)
+{
+ int diff = 0;
+
+ while (len-- && (diff = *s1++ - *s2++) == 0)
+ ;
+
+ return diff;
+}
+
+static unsigned char qt_UnicodeToTSCII(ushort u1, ushort u2, ushort u3)
+{
+ ushort s[3];
+ s[0] = u1;
+ s[1] = u2;
+ s[2] = u3;
+
+ int a = 0; // start pos
+ int b = UnToTsLast; // end pos
+
+ // do a binary search for the composed unicode in the list
+ while (a <= b) {
+ int w = (a + b) / 2;
+ int j = cmp(UnToTs[w], s, 3);
+
+ if (j == 0)
+ // found it
+ return UnToTs[w][3];
+
+ if (j < 0)
+ a = w + 1;
+ else
+ b = w - 1;
+ }
+
+ return 0;
+}
+
+static unsigned int qt_TSCIIToUnicode(uint code, uint *s)
+{
+ int len = 0;
+ for (int i = 0; i < 3; i++) {
+ uint u = TsToUn[code & 0x7f][i];
+ s[i] = u;
+ if (s[i]) len = i + 1;
+ }
+
+ return len;
+}
+
+QT_END_NAMESPACE
+
+#endif // QT_NO_CODECS
diff --git a/src/corelib/codecs/qtsciicodec_p.h b/src/corelib/codecs/qtsciicodec_p.h
new file mode 100644
index 0000000000..8f11e485b1
--- /dev/null
+++ b/src/corelib/codecs/qtsciicodec_p.h
@@ -0,0 +1,106 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+// Most of the code here was originally written by Hans Petter Bieker,
+// and is included in Qt with the author's permission, and the grateful
+// thanks of the Trolltech team.
+
+/*
+ * Copyright (C) 2000 Hans Petter Bieker. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef QTSCIICODEC_P_H
+#define QTSCIICODEC_P_H
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists for the convenience
+// of other Qt classes. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+#include "QtCore/qtextcodec.h"
+
+QT_BEGIN_NAMESPACE
+
+#ifndef QT_NO_CODECS
+
+class Q_CORE_EXPORT QTsciiCodec : public QTextCodec {
+public:
+ ~QTsciiCodec();
+
+ QByteArray name() const;
+ int mibEnum() const;
+
+ QString convertToUnicode(const char *, int, ConverterState *) const;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+};
+
+#endif // QT_NO_CODECS
+
+QT_END_NAMESPACE
+
+#endif // QTSCIICODEC_P_H
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp
new file mode 100644
index 0000000000..281bf75519
--- /dev/null
+++ b/src/corelib/codecs/qutfcodec.cpp
@@ -0,0 +1,634 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qutfcodec_p.h"
+#include "qlist.h"
+#include "qendian.h"
+#include "qchar.h"
+
+#ifndef QT_NO_TEXTCODEC
+
+QT_BEGIN_NAMESPACE
+
+QUtf8Codec::~QUtf8Codec()
+{
+}
+
+QByteArray QUtf8Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
+{
+ uchar replacement = '?';
+ int rlen = 3*len;
+ int surrogate_high = -1;
+ if (state) {
+ if (state->flags & ConvertInvalidToNull)
+ replacement = 0;
+ if (!(state->flags & IgnoreHeader))
+ rlen += 3;
+ if (state->remainingChars)
+ surrogate_high = state->state_data[0];
+ }
+
+ QByteArray rstr;
+ rstr.resize(rlen);
+ uchar* cursor = (uchar*)rstr.data();
+ const QChar *ch = uc;
+ int invalid = 0;
+ if (state && !(state->flags & IgnoreHeader)) {
+ *cursor++ = 0xef;
+ *cursor++ = 0xbb;
+ *cursor++ = 0xbf;
+ }
+
+ const QChar *end = ch + len;
+ while (ch < end) {
+ uint u = ch->unicode();
+ if (surrogate_high >= 0) {
+ if (u >= 0xdc00 && u < 0xe000) {
+ u = (surrogate_high - 0xd800)*0x400 + (u - 0xdc00) + 0x10000;
+ surrogate_high = -1;
+ } else {
+ // high surrogate without low
+ *cursor = replacement;
+ ++ch;
+ ++invalid;
+ surrogate_high = -1;
+ continue;
+ }
+ } else if (u >= 0xdc00 && u < 0xe000) {
+ // low surrogate without high
+ *cursor = replacement;
+ ++ch;
+ ++invalid;
+ continue;
+ } else if (u >= 0xd800 && u < 0xdc00) {
+ surrogate_high = u;
+ ++ch;
+ continue;
+ }
+
+ if (u < 0x80) {
+ *cursor++ = (uchar)u;
+ } else {
+ if (u < 0x0800) {
+ *cursor++ = 0xc0 | ((uchar) (u >> 6));
+ } else {
+ if (u > 0xffff) {
+ // see QString::fromUtf8() and QString::utf8() for explanations
+ if (u > 0x10fe00 && u < 0x10ff00) {
+ *cursor++ = (u - 0x10fe00);
+ ++ch;
+ continue;
+ } else {
+ *cursor++ = 0xf0 | ((uchar) (u >> 18));
+ *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f);
+ }
+ } else {
+ *cursor++ = 0xe0 | (((uchar) (u >> 12)) & 0x3f);
+ }
+ *cursor++ = 0x80 | (((uchar) (u >> 6)) & 0x3f);
+ }
+ *cursor++ = 0x80 | ((uchar) (u&0x3f));
+ }
+ ++ch;
+ }
+
+ rstr.resize(cursor - (const uchar*)rstr.constData());
+ if (state) {
+ state->invalidChars += invalid;
+ state->flags |= IgnoreHeader;
+ state->remainingChars = 0;
+ if (surrogate_high >= 0) {
+ state->remainingChars = 1;
+ state->state_data[0] = surrogate_high;
+ }
+ }
+ return rstr;
+}
+
+void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, ConverterState *state) const
+{
+ bool headerdone = false;
+ QChar replacement = QChar::ReplacementCharacter;
+ int need = 0;
+ int error = -1;
+ uint uc = 0;
+ uint min_uc = 0;
+ if (state) {
+ if (state->flags & IgnoreHeader)
+ headerdone = true;
+ if (state->flags & ConvertInvalidToNull)
+ replacement = QChar::Null;
+ need = state->remainingChars;
+ if (need) {
+ uc = state->state_data[0];
+ min_uc = state->state_data[1];
+ }
+ }
+ if (!headerdone && len > 3
+ && (uchar)chars[0] == 0xef && (uchar)chars[1] == 0xbb && (uchar)chars[2] == 0xbf) {
+ // starts with a byte order mark
+ chars += 3;
+ len -= 3;
+ headerdone = true;
+ }
+
+ int originalLength = target->length();
+ QString &result = *target;
+ result.resize(originalLength + len + 1); // worst case
+ QChar *qch = result.data() + originalLength;
+ uchar ch;
+ int invalid = 0;
+
+ for (int i=0; i<len; i++) {
+ ch = chars[i];
+ if (need) {
+ if ((ch&0xc0) == 0x80) {
+ uc = (uc << 6) | (ch & 0x3f);
+ need--;
+ if (!need) {
+ if (uc > 0xffff && uc < 0x110000) {
+ // surrogate pair
+ uc -= 0x10000;
+ unsigned short high = uc/0x400 + 0xd800;
+ unsigned short low = uc%0x400 + 0xdc00;
+
+ // resize if necessary
+ long where = qch - result.unicode();
+ if (where + 2 >= result.length()) {
+ result.resize(where + 2);
+ qch = result.data() + where;
+ }
+
+ *qch++ = QChar(high);
+ *qch++ = QChar(low);
+ } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
+ // error
+ *qch++ = replacement;
+ ++invalid;
+ } else {
+ *qch++ = uc;
+ }
+ }
+ } else {
+ // error
+ i = error;
+ *qch++ = replacement;
+ ++invalid;
+ need = 0;
+ }
+ } else {
+ if (ch < 128) {
+ *qch++ = QLatin1Char(ch);
+ } else if ((ch & 0xe0) == 0xc0) {
+ uc = ch & 0x1f;
+ need = 1;
+ error = i;
+ min_uc = 0x80;
+ } else if ((ch & 0xf0) == 0xe0) {
+ uc = ch & 0x0f;
+ need = 2;
+ error = i;
+ min_uc = 0x800;
+ } else if ((ch&0xf8) == 0xf0) {
+ uc = ch & 0x07;
+ need = 3;
+ error = i;
+ min_uc = 0x10000;
+ } else {
+ // error
+ *qch++ = replacement;
+ ++invalid;
+ }
+ }
+ }
+ if (!state && need > 0) {
+ // unterminated UTF sequence
+ for (int i = error; i < len; ++i) {
+ *qch++ = replacement;
+ ++invalid;
+ }
+ }
+ result.truncate(qch - result.unicode());
+ if (state) {
+ state->invalidChars += invalid;
+ state->remainingChars = need;
+ if (headerdone)
+ state->flags |= IgnoreHeader;
+ state->state_data[0] = need ? uc : 0;
+ state->state_data[1] = need ? min_uc : 0;
+ }
+}
+
+QString QUtf8Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const
+{
+ QString result;
+ convertToUnicode(&result, chars, len, state);
+ return result;
+}
+
+QByteArray QUtf8Codec::name() const
+{
+ return "UTF-8";
+}
+
+int QUtf8Codec::mibEnum() const
+{
+ return 106;
+}
+
+enum { Endian = 0, Data = 1 };
+
+QUtf16Codec::~QUtf16Codec()
+{
+}
+
+QByteArray QUtf16Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
+{
+ Endianness endian = e;
+ int length = 2*len;
+ if (!state || (!(state->flags & IgnoreHeader))) {
+ length += 2;
+ }
+ if (e == Detect) {
+ endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE;
+ }
+
+ QByteArray d;
+ d.resize(length);
+ char *data = d.data();
+ if (!state || !(state->flags & IgnoreHeader)) {
+ QChar bom(QChar::ByteOrderMark);
+ if (endian == BE) {
+ data[0] = bom.row();
+ data[1] = bom.cell();
+ } else {
+ data[0] = bom.cell();
+ data[1] = bom.row();
+ }
+ data += 2;
+ }
+ if (endian == BE) {
+ for (int i = 0; i < len; ++i) {
+ *(data++) = uc[i].row();
+ *(data++) = uc[i].cell();
+ }
+ } else {
+ for (int i = 0; i < len; ++i) {
+ *(data++) = uc[i].cell();
+ *(data++) = uc[i].row();
+ }
+ }
+
+ if (state) {
+ state->remainingChars = 0;
+ state->flags |= IgnoreHeader;
+ }
+ return d;
+}
+
+QString QUtf16Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const
+{
+ Endianness endian = e;
+ bool half = false;
+ uchar buf = 0;
+ bool headerdone = false;
+ if (state) {
+ headerdone = state->flags & IgnoreHeader;
+ if (endian == Detect)
+ endian = (Endianness)state->state_data[Endian];
+ if (state->remainingChars) {
+ half = true;
+ buf = state->state_data[Data];
+ }
+ }
+ if (headerdone && endian == Detect)
+ endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE;
+
+ QString result;
+ result.resize(len); // worst case
+ QChar *qch = (QChar *)result.unicode();
+ while (len--) {
+ if (half) {
+ QChar ch;
+ if (endian == LE) {
+ ch.setRow(*chars++);
+ ch.setCell(buf);
+ } else {
+ ch.setRow(buf);
+ ch.setCell(*chars++);
+ }
+ if (!headerdone) {
+ if (endian == Detect) {
+ if (ch == QChar::ByteOrderSwapped && endian != BE) {
+ endian = LE;
+ } else if (ch == QChar::ByteOrderMark && endian != LE) {
+ // ignore BOM
+ endian = BE;
+ } else {
+ if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
+ endian = BE;
+ } else {
+ endian = LE;
+ ch = QChar((ch.unicode() >> 8) | ((ch.unicode() & 0xff) << 8));
+ }
+ *qch++ = ch;
+ }
+ } else if (ch != QChar::ByteOrderMark) {
+ *qch++ = ch;
+ }
+ headerdone = true;
+ } else {
+ *qch++ = ch;
+ }
+ half = false;
+ } else {
+ buf = *chars++;
+ half = true;
+ }
+ }
+ result.truncate(qch - result.unicode());
+
+ if (state) {
+ if (endian != Detect)
+ state->flags |= IgnoreHeader;
+ state->state_data[Endian] = endian;
+ if (half) {
+ state->remainingChars = 1;
+ state->state_data[Data] = buf;
+ } else {
+ state->remainingChars = 0;
+ state->state_data[Data] = 0;
+ }
+ }
+ return result;
+}
+
+int QUtf16Codec::mibEnum() const
+{
+ return 1015;
+}
+
+QByteArray QUtf16Codec::name() const
+{
+ return "UTF-16";
+}
+
+QList<QByteArray> QUtf16Codec::aliases() const
+{
+ QList<QByteArray> list;
+ list << "ISO-10646-UCS-2";
+ return list;
+}
+
+int QUtf16BECodec::mibEnum() const
+{
+ return 1013;
+}
+
+QByteArray QUtf16BECodec::name() const
+{
+ return "UTF-16BE";
+}
+
+QList<QByteArray> QUtf16BECodec::aliases() const
+{
+ QList<QByteArray> list;
+ return list;
+}
+
+int QUtf16LECodec::mibEnum() const
+{
+ return 1014;
+}
+
+QByteArray QUtf16LECodec::name() const
+{
+ return "UTF-16LE";
+}
+
+QList<QByteArray> QUtf16LECodec::aliases() const
+{
+ QList<QByteArray> list;
+ return list;
+}
+
+QUtf32Codec::~QUtf32Codec()
+{
+}
+
+QByteArray QUtf32Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
+{
+ Endianness endian = e;
+ int length = 4*len;
+ if (!state || (!(state->flags & IgnoreHeader))) {
+ length += 4;
+ }
+ if (e == Detect) {
+ endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE;
+ }
+
+ QByteArray d;
+ d.resize(length);
+ char *data = d.data();
+ if (!state || !(state->flags & IgnoreHeader)) {
+ if (endian == BE) {
+ data[0] = 0;
+ data[1] = 0;
+ data[2] = (char)0xfe;
+ data[3] = (char)0xff;
+ } else {
+ data[0] = (char)0xff;
+ data[1] = (char)0xfe;
+ data[2] = 0;
+ data[3] = 0;
+ }
+ data += 2;
+ }
+ if (endian == BE) {
+ for (int i = 0; i < len; ++i) {
+ uint cp = uc[i].unicode();
+ if (uc[i].isHighSurrogate() && i < len - 1)
+ cp = QChar::surrogateToUcs4(cp, uc[++i].unicode());
+ *(data++) = cp >> 24;
+ *(data++) = (cp >> 16) & 0xff;
+ *(data++) = (cp >> 8) & 0xff;
+ *(data++) = cp & 0xff;
+ }
+ } else {
+ for (int i = 0; i < len; ++i) {
+ uint cp = uc[i].unicode();
+ if (uc[i].isHighSurrogate() && i < len - 1)
+ cp = QChar::surrogateToUcs4(cp, uc[++i].unicode());
+ *(data++) = cp & 0xff;
+ *(data++) = (cp >> 8) & 0xff;
+ *(data++) = (cp >> 16) & 0xff;
+ *(data++) = cp >> 24;
+ }
+ }
+
+ if (state) {
+ state->remainingChars = 0;
+ state->flags |= IgnoreHeader;
+ }
+ return d;
+}
+
+QString QUtf32Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const
+{
+ Endianness endian = e;
+ uchar tuple[4];
+ int num = 0;
+ bool headerdone = false;
+ if (state) {
+ headerdone = state->flags & IgnoreHeader;
+ if (endian == Detect) {
+ endian = (Endianness)state->state_data[Endian];
+ }
+ num = state->remainingChars;
+ memcpy(tuple, &state->state_data[Data], 4);
+ }
+ if (headerdone && endian == Detect)
+ endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE;
+
+ QString result;
+ result.resize((num + len) >> 2 << 1); // worst case
+ QChar *qch = (QChar *)result.unicode();
+
+ const char *end = chars + len;
+ while (chars < end) {
+ tuple[num++] = *chars++;
+ if (num == 4) {
+ if (!headerdone) {
+ if (endian == Detect) {
+ if (endian == Detect) {
+ if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0 && endian != BE) {
+ endian = LE;
+ num = 0;
+ continue;
+ } else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff && endian != LE) {
+ endian = BE;
+ num = 0;
+ continue;
+ } else if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
+ endian = BE;
+ } else {
+ endian = LE;
+ }
+ }
+ } else if (((endian == BE) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple)) == QChar::ByteOrderMark) {
+ num = 0;
+ continue;
+ }
+ }
+ uint code = (endian == BE) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple);
+ if (code >= 0x10000) {
+ *qch++ = QChar::highSurrogate(code);
+ *qch++ = QChar::lowSurrogate(code);
+ } else {
+ *qch++ = code;
+ }
+ num = 0;
+ }
+ }
+ result.truncate(qch - result.unicode());
+
+ if (state) {
+ if (endian != Detect)
+ state->flags |= IgnoreHeader;
+ state->state_data[Endian] = endian;
+ state->remainingChars = num;
+ memcpy(&state->state_data[Data], tuple, 4);
+ }
+ return result;
+}
+
+int QUtf32Codec::mibEnum() const
+{
+ return 1017;
+}
+
+QByteArray QUtf32Codec::name() const
+{
+ return "UTF-32";
+}
+
+QList<QByteArray> QUtf32Codec::aliases() const
+{
+ QList<QByteArray> list;
+ return list;
+}
+
+int QUtf32BECodec::mibEnum() const
+{
+ return 1018;
+}
+
+QByteArray QUtf32BECodec::name() const
+{
+ return "UTF-32BE";
+}
+
+QList<QByteArray> QUtf32BECodec::aliases() const
+{
+ QList<QByteArray> list;
+ return list;
+}
+
+int QUtf32LECodec::mibEnum() const
+{
+ return 1019;
+}
+
+QByteArray QUtf32LECodec::name() const
+{
+ return "UTF-32LE";
+}
+
+QList<QByteArray> QUtf32LECodec::aliases() const
+{
+ QList<QByteArray> list;
+ return list;
+}
+
+
+QT_END_NAMESPACE
+
+#endif //QT_NO_TEXTCODEC
diff --git a/src/corelib/codecs/qutfcodec_p.h b/src/corelib/codecs/qutfcodec_p.h
new file mode 100644
index 0000000000..0abcfaf144
--- /dev/null
+++ b/src/corelib/codecs/qutfcodec_p.h
@@ -0,0 +1,155 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QUTFCODEC_P_H
+#define QUTFCODEC_P_H
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+#include "QtCore/qtextcodec.h"
+
+QT_BEGIN_NAMESPACE
+
+#ifndef QT_NO_TEXTCODEC
+
+class QUtf8Codec : public QTextCodec {
+public:
+ ~QUtf8Codec();
+
+ QByteArray name() const;
+ int mibEnum() const;
+
+ QString convertToUnicode(const char *, int, ConverterState *) const;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+ void convertToUnicode(QString *target, const char *, int, ConverterState *) const;
+};
+
+class QUtf16Codec : public QTextCodec {
+protected:
+ enum Endianness {
+ Detect,
+ BE,
+ LE
+ };
+public:
+ QUtf16Codec() { e = Detect; }
+ ~QUtf16Codec();
+
+ QByteArray name() const;
+ QList<QByteArray> aliases() const;
+ int mibEnum() const;
+
+ QString convertToUnicode(const char *, int, ConverterState *) const;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+
+protected:
+ Endianness e;
+};
+
+class QUtf16BECodec : public QUtf16Codec {
+public:
+ QUtf16BECodec() : QUtf16Codec() { e = BE; }
+ QByteArray name() const;
+ QList<QByteArray> aliases() const;
+ int mibEnum() const;
+};
+
+class QUtf16LECodec : public QUtf16Codec {
+public:
+ QUtf16LECodec() : QUtf16Codec() { e = LE; }
+ QByteArray name() const;
+ QList<QByteArray> aliases() const;
+ int mibEnum() const;
+};
+
+class QUtf32Codec : public QTextCodec {
+protected:
+ enum Endianness {
+ Detect,
+ BE,
+ LE
+ };
+public:
+ QUtf32Codec() { e = Detect; }
+ ~QUtf32Codec();
+
+ QByteArray name() const;
+ QList<QByteArray> aliases() const;
+ int mibEnum() const;
+
+ QString convertToUnicode(const char *, int, ConverterState *) const;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+
+protected:
+ Endianness e;
+};
+
+class QUtf32BECodec : public QUtf32Codec {
+public:
+ QUtf32BECodec() : QUtf32Codec() { e = BE; }
+ QByteArray name() const;
+ QList<QByteArray> aliases() const;
+ int mibEnum() const;
+};
+
+class QUtf32LECodec : public QUtf32Codec {
+public:
+ QUtf32LECodec() : QUtf32Codec() { e = LE; }
+ QByteArray name() const;
+ QList<QByteArray> aliases() const;
+ int mibEnum() const;
+};
+
+
+#endif // QT_NO_TEXTCODEC
+
+QT_END_NAMESPACE
+
+#endif // QUTFCODEC_P_H