summaryrefslogtreecommitdiffstats
path: root/src/corelib/codecs/qjiscodec.cpp
diff options
context:
space:
mode:
authorMark Brand <mabrand@mabrand.nl>2012-01-13 00:24:13 +0100
committerQt by Nokia <qt-info@nokia.com>2012-01-20 18:48:22 +0100
commit2ae91491b48a8684b3bba76664d82cedad92bfcd (patch)
tree92b03504d7d2405976d0f7a93e3623822d0f335d /src/corelib/codecs/qjiscodec.cpp
parent7ee3d8c8ecb78dd7c5ae09b04ebf1420958f0001 (diff)
move plugin text codecs to QtCore
Having plugin text codecs adds considerable complexity to configuring Qt. The plugin interface is designed for optional features, but text codecs tend to be used for essential functions. A dramatic example is loading a codec plugin from a file whose path needs to be converted by the codec. Codec plugins can also be a nuisance to builders of applications linking to static Qt. This is because the application might need to explicilty import the static codec plugins which are actually dependencies of QtCore. For these reasons, it has been decided not to have text codec plugins any longer. Change-Id: Ic6c80a9c949bd42e881e932d1edae23fe4fe4c88 Reviewed-by: Lars Knoll <lars.knoll@nokia.com> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib/codecs/qjiscodec.cpp')
-rw-r--r--src/corelib/codecs/qjiscodec.cpp367
1 files changed, 367 insertions, 0 deletions
diff --git a/src/corelib/codecs/qjiscodec.cpp b/src/corelib/codecs/qjiscodec.cpp
new file mode 100644
index 0000000000..99c756e859
--- /dev/null
+++ b/src/corelib/codecs/qjiscodec.cpp
@@ -0,0 +1,367 @@
+/****************************************************************************
+**
+** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the plugins of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** GNU Lesser General Public License Usage
+** This file may be used under the terms of the GNU Lesser General Public
+** License version 2.1 as published by the Free Software Foundation and
+** appearing in the file LICENSE.LGPL included in the packaging of this
+** file. Please review the following information to ensure the GNU Lesser
+** General Public License version 2.1 requirements will be met:
+** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU General
+** Public License version 3.0 as published by the Free Software Foundation
+** and appearing in the file LICENSE.GPL included in the packaging of this
+** file. Please review the following information to ensure the GNU General
+** Public License version 3.0 requirements will be met:
+** http://www.gnu.org/copyleft/gpl.html.
+**
+** Other Usage
+** Alternatively, this file may be used in accordance with the terms and
+** conditions contained in a signed written agreement between you and Nokia.
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+// Most of the code here was originally written by Serika Kurusugawa,
+// a.k.a. Junji Takagi, and is included in Qt with the author's permission
+// and the grateful thanks of the Qt team.
+
+/*! \class QJisCodec
+ \reentrant
+ \internal
+*/
+
+#include "qjiscodec.h"
+#include "qlist.h"
+
+QT_BEGIN_NAMESPACE
+
+#ifndef QT_NO_TEXTCODEC
+enum {
+ Esc = 0x1b,
+ So = 0x0e, // Shift Out
+ Si = 0x0f, // Shift In
+
+ ReverseSolidus = 0x5c,
+ YenSign = 0x5c,
+ Tilde = 0x7e,
+ Overline = 0x7e
+};
+
+#define IsKana(c) (((c) >= 0xa1) && ((c) <= 0xdf))
+#define IsJisChar(c) (((c) >= 0x21) && ((c) <= 0x7e))
+
+#define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter))
+
+enum Iso2022State{ Ascii, MinState = Ascii,
+ JISX0201_Latin, JISX0201_Kana,
+ JISX0208_1978, JISX0208_1983,
+ JISX0212, MaxState = JISX0212,
+ UnknownState };
+
+static const char Esc_CHARS[] = "()*+-./";
+
+static const char Esc_Ascii[] = {Esc, '(', 'B', 0 };
+static const char Esc_JISX0201_Latin[] = {Esc, '(', 'J', 0 };
+static const char Esc_JISX0201_Kana[] = {Esc, '(', 'I', 0 };
+static const char Esc_JISX0208_1978[] = {Esc, '$', '@', 0 };
+static const char Esc_JISX0208_1983[] = {Esc, '$', 'B', 0 };
+static const char Esc_JISX0212[] = {Esc, '$', '(', 'D', 0 };
+static const char * const Esc_SEQ[] = { Esc_Ascii,
+ Esc_JISX0201_Latin,
+ Esc_JISX0201_Kana,
+ Esc_JISX0208_1978,
+ Esc_JISX0208_1983,
+ Esc_JISX0212 };
+
+/*! \internal */
+QJisCodec::QJisCodec() : conv(QJpUnicodeConv::newConverter(QJpUnicodeConv::Default))
+{
+}
+
+
+/*! \internal */
+QJisCodec::~QJisCodec()
+{
+ delete (QJpUnicodeConv*)conv;
+ conv = 0;
+}
+
+QByteArray QJisCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *cs) const
+{
+ char replacement = '?';
+ if (cs) {
+ if (cs->flags & ConvertInvalidToNull)
+ replacement = 0;
+ }
+ int invalid = 0;
+
+ QByteArray result;
+ Iso2022State state = Ascii;
+ Iso2022State prev = Ascii;
+ for (int i = 0; i < len; i++) {
+ QChar ch = uc[i];
+ uint j;
+ if (ch.row() == 0x00 && ch.cell() < 0x80) {
+ // Ascii
+ if (state != JISX0201_Latin ||
+ ch.cell() == ReverseSolidus || ch.cell() == Tilde) {
+ state = Ascii;
+ }
+ j = ch.cell();
+ } else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) {
+ if (j < 0x80) {
+ // JIS X 0201 Latin
+ if (state != Ascii ||
+ ch.cell() == YenSign || ch.cell() == Overline) {
+ state = JISX0201_Latin;
+ }
+ } else {
+ // JIS X 0201 Kana
+ state = JISX0201_Kana;
+ j &= 0x7f;
+ }
+ } else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) {
+ // JIS X 0208
+ state = JISX0208_1983;
+ } else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) {
+ // JIS X 0212
+ state = JISX0212;
+ } else {
+ // Invalid
+ state = UnknownState;
+ j = replacement;
+ ++invalid;
+ }
+ if (state != prev) {
+ if (state == UnknownState) {
+ result += Esc_Ascii;
+ } else {
+ result += Esc_SEQ[state - MinState];
+ }
+ prev = state;
+ }
+ if (j < 0x0100) {
+ result += j & 0xff;
+ } else {
+ result += (j >> 8) & 0xff;
+ result += j & 0xff;
+ }
+ }
+ if (prev != Ascii) {
+ result += Esc_Ascii;
+ }
+
+ if (cs) {
+ cs->invalidChars += invalid;
+ }
+ return result;
+}
+
+QString QJisCodec::convertToUnicode(const char* chars, int len, ConverterState *cs) const
+{
+ uchar buf[4] = {0, 0, 0, 0};
+ int nbuf = 0;
+ Iso2022State state = Ascii, prev = Ascii;
+ bool esc = false;
+ QChar replacement = QChar::ReplacementCharacter;
+ if (cs) {
+ if (cs->flags & ConvertInvalidToNull)
+ replacement = QChar::Null;
+ nbuf = cs->remainingChars;
+ buf[0] = (cs->state_data[0] >> 24) & 0xff;
+ buf[1] = (cs->state_data[0] >> 16) & 0xff;
+ buf[2] = (cs->state_data[0] >> 8) & 0xff;
+ buf[3] = (cs->state_data[0] >> 0) & 0xff;
+ state = (Iso2022State)((cs->state_data[1] >> 0) & 0xff);
+ prev = (Iso2022State)((cs->state_data[1] >> 8) & 0xff);
+ esc = cs->state_data[2];
+ }
+ int invalid = 0;
+
+ QString result;
+ for (int i=0; i<len; i++) {
+ uchar ch = chars[i];
+ if (esc) {
+ // Escape sequence
+ state = UnknownState;
+ switch (nbuf) {
+ case 0:
+ if (ch == '$' || strchr(Esc_CHARS, ch)) {
+ buf[nbuf++] = ch;
+ } else {
+ nbuf = 0;
+ esc = false;
+ }
+ break;
+ case 1:
+ if (buf[0] == '$') {
+ if (strchr(Esc_CHARS, ch)) {
+ buf[nbuf++] = ch;
+ } else {
+ switch (ch) {
+ case '@':
+ state = JISX0208_1978; // Esc $ @
+ break;
+ case 'B':
+ state = JISX0208_1983; // Esc $ B
+ break;
+ }
+ nbuf = 0;
+ esc = false;
+ }
+ } else {
+ if (buf[0] == '(') {
+ switch (ch) {
+ case 'B':
+ state = Ascii; // Esc (B
+ break;
+ case 'I':
+ state = JISX0201_Kana; // Esc (I
+ break;
+ case 'J':
+ state = JISX0201_Latin; // Esc (J
+ break;
+ }
+ }
+ nbuf = 0;
+ esc = false;
+ }
+ break;
+ case 2:
+ if (buf[1] == '(') {
+ switch (ch) {
+ case 'D':
+ state = JISX0212; // Esc $ (D
+ break;
+ }
+ }
+ nbuf = 0;
+ esc = false;
+ break;
+ }
+ } else {
+ if (ch == Esc) {
+ // Escape sequence
+ nbuf = 0;
+ esc = true;
+ } else if (ch == So) {
+ // Shift out
+ prev = state;
+ state = JISX0201_Kana;
+ nbuf = 0;
+ } else if (ch == Si) {
+ // Shift in
+ if (prev == Ascii || prev == JISX0201_Latin) {
+ state = prev;
+ } else {
+ state = Ascii;
+ }
+ nbuf = 0;
+ } else {
+ uint u;
+ switch (nbuf) {
+ case 0:
+ switch (state) {
+ case Ascii:
+ if (ch < 0x80) {
+ result += QLatin1Char(ch);
+ break;
+ }
+ /* fall through */
+ case JISX0201_Latin:
+ u = conv->jisx0201ToUnicode(ch);
+ result += QValidChar(u);
+ break;
+ case JISX0201_Kana:
+ u = conv->jisx0201ToUnicode(ch | 0x80);
+ result += QValidChar(u);
+ break;
+ case JISX0208_1978:
+ case JISX0208_1983:
+ case JISX0212:
+ buf[nbuf++] = ch;
+ break;
+ default:
+ result += QChar::ReplacementCharacter;
+ break;
+ }
+ break;
+ case 1:
+ switch (state) {
+ case JISX0208_1978:
+ case JISX0208_1983:
+ u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f);
+ result += QValidChar(u);
+ break;
+ case JISX0212:
+ u = conv->jisx0212ToUnicode(buf[0] & 0x7f, ch & 0x7f);
+ result += QValidChar(u);
+ break;
+ default:
+ result += replacement;
+ ++invalid;
+ break;
+ }
+ nbuf = 0;
+ break;
+ }
+ }
+ }
+ }
+
+ if (cs) {
+ cs->remainingChars = nbuf;
+ cs->invalidChars += invalid;
+ cs->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3];
+ cs->state_data[1] = (prev << 8) + state;
+ cs->state_data[2] = esc;
+ }
+
+ return result;
+}
+
+
+
+/*! \internal */
+int QJisCodec::_mibEnum()
+{
+ return 39;
+}
+
+/*! \internal */
+QByteArray QJisCodec::_name()
+{
+ return "ISO-2022-JP";
+}
+
+/*!
+ Returns the codec's mime name.
+*/
+QList<QByteArray> QJisCodec::_aliases()
+{
+ QList<QByteArray> list;
+ list << "JIS7"; // Qt 3 compat
+ return list;
+}
+
+#endif // QT_NO_TEXTCODEC
+
+QT_END_NAMESPACE