summaryrefslogtreecommitdiffstats
path: root/src/versit/versitutils.cpp
blob: efbd5b04cd64baa765e846c72cfdb4787f6076ce (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
/****************************************************************************
**
** Copyright (C) 2012 Digia Plc and/or its subsidiary(-ies).
** Contact: http://www.qt-project.org/legal
**
** This file is part of the Qt Mobility Components.
**
** $QT_BEGIN_LICENSE:LGPL$
** GNU Lesser General Public License Usage
** This file may be used under the terms of the GNU Lesser General Public
** License version 2.1 as published by the Free Software Foundation and
** appearing in the file LICENSE.LGPL included in the packaging of this
** file. Please review the following information to ensure the GNU Lesser
** General Public License version 2.1 requirements will be met:
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Nokia gives you certain additional
** rights. These rights are described in the Nokia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU General
** Public License version 3.0 as published by the Free Software Foundation
** and appearing in the file LICENSE.GPL included in the packaging of this
** file. Please review the following information to ensure the GNU General
** Public License version 3.0 requirements will be met:
** http://www.gnu.org/copyleft/gpl.html.
**
** Other Usage
** Alternatively, this file may be used in accordance with the terms and
** conditions contained in a signed written agreement between you and Nokia.
**
**
**
**
**
**
** $QT_END_LICENSE$
**
****************************************************************************/

#include "versitutils_p.h"
#include "qversitdocument.h"
#include "qversitproperty.h"
#include "qmobilityglobal.h"

#include <QMap>
#include <QTextCodec>
#include <QScopedPointer>

QTM_USE_NAMESPACE

QTextCodec* VersitUtils::m_previousCodec = 0;
QList<QByteArrayMatcher>* VersitUtils::m_newlineList = 0;
QByteArray VersitUtils::m_encodingMap[256];

/*!
 * Encode \a ch with \a codec, without adding an byte-order mark
 */
QByteArray VersitUtils::encode(char ch, QTextCodec* codec)
{
    if (codec != m_previousCodec) {
        changeCodec(codec);
    }
    return m_encodingMap[(int)ch];
}

/*!
 * Encode \a ba with \a codec, without adding an byte-order mark.  \a ba is interpreted as ASCII
 */
QByteArray VersitUtils::encode(const QByteArray& ba, QTextCodec* codec)
{
    QTextCodec::ConverterState state(QTextCodec::IgnoreHeader);
    return codec->fromUnicode(QString::fromAscii(ba.data()).data(), ba.length(), &state);
}

/*!
 * Returns the list of DOS, UNIX and Mac newline characters for \a codec.
 */
QList<QByteArrayMatcher>* VersitUtils::newlineList(QTextCodec* codec)
{
    if (m_newlineList != 0 && codec == m_previousCodec) {
        return m_newlineList;
    }
    changeCodec(codec);
    return m_newlineList;
}

/*!
 * Update the cached tables of pregenerated encoded text with \a codec.
 */
void VersitUtils::changeCodec(QTextCodec* codec) {
    // Build m_encodingMap
    QChar qch;
    QTextCodec::ConverterState state(QTextCodec::IgnoreHeader);
    for (int c = 0; c < 256; c++) {
        qch = QLatin1Char(c);
        m_encodingMap[c] = codec->fromUnicode(&qch, 1, &state);
    }

    // Build m_newlineList
    if (m_newlineList != 0)
        delete m_newlineList;
    m_newlineList = new QList<QByteArrayMatcher>;
    m_newlineList->append(QByteArrayMatcher(encode("\r\n", codec)));
    m_newlineList->append(QByteArrayMatcher(encode("\n", codec)));
    m_newlineList->append(QByteArrayMatcher(encode("\r", codec)));

    m_previousCodec = codec;
}

/*!
 * Finds a property in the \a document with the given \a propertyName, adds it to \a toBeRemoved,
 * and returns it.
 */
QVersitProperty VersitUtils::takeProperty(const QVersitDocument& document,
                                          const QString& propertyName,
                                          QList<QVersitProperty>* toBeRemoved) {
    foreach (const QVersitProperty& currentProperty, document.properties()) {
        if (currentProperty.name() == propertyName) {
            *toBeRemoved << currentProperty;
            return currentProperty;
        }
    }
    return QVersitProperty();
}

/*!
 * Returns true iff \a bytes is a valid UTF-8 sequence.
 */
bool VersitUtils::isValidUtf8(const QByteArray& bytes) {
    int sequenceLength = 1; // number of bytes in total for a sequence
    int continuation = 0;   // number of bytes left in a continuation
    quint32 codePoint = 0;
    for (int i = 0; i < bytes.size(); i++) {
        quint8 byte = bytes[i];
        if (continuation == 0) {
            if (byte & 0x80) { // 1xxxxxxx
                if (byte & 0x40) { // 11xxxxxx
                    if (byte == 0xc0 || byte == 0xc1) // 1100000x
                        return false; // overlong 2 byte sequence
                    if (byte & 0x20) { // 111xxxxx
                        if (byte & 0x10) { // 1111xxxx
                            if (byte & 0x08) { // 11111xxx
                                // Outside unicode range
                                return false;
                            } else { // 11110xxx
                                sequenceLength = 4;
                                continuation = 3; // three more bytes
                                codePoint = byte & 0x07; // take the last 3 bits
                            }
                        } else { // 1110xxxx
                            sequenceLength = 3;
                            continuation = 2; // two more bytes
                            codePoint = byte & 0x0f; // take last 4 bits
                        }
                    } else { // 110xxxxx
                        sequenceLength = 2;
                        continuation = 1; // one more byte
                        codePoint = byte & 0x1f; // take last 5 bits
                    }
                } else { // 10xxxxxx
                    // unexpected continuation
                    return false;
                }
            } else { // 0xxxxxxx
                sequenceLength = 1;
            }
        } else { // continuation > 0
            if ((byte & 0xc0) != 0x80) // 10xxxxxx
                return false; // expected continuation not found
            codePoint = (codePoint << 6) | (byte & 0x3f); // append last 6 bits
            continuation--;
        }

        if (continuation == 0) {
            // Finished decoding a character - it's not overlong and that it's in range
            switch (sequenceLength) {
                // 1-byte sequence can't be overlong
                // 2-byte sequence has already been checked for overlongness
                case 3:
                    if (codePoint < 0x800) // overlong
                        return false;

                    // Filter out codepoints outside the Unicode range
                    if ((codePoint >= 0xd800 && codePoint <= 0xdfff) // utf-16 surrogate halves
                            || (codePoint >= 0xfffe && codePoint <= 0xffff)) { // reversed utf-16 BOM
                        return false;
                    }
                    break;
                case 4:
                    if (codePoint < 0x10000      // overlong
                        || codePoint > 0x10ffff) // above Unicode range
                        return false;
                    break;
            }
            codePoint = 0;
        }
    }
    return continuation == 0;
}