aboutsummaryrefslogtreecommitdiffstats
path: root/src/virtualkeyboard/hangul.cpp
blob: d5b820e8bcc5d2ff5102dc84b379bb228862132c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
/****************************************************************************
**
** Copyright (C) 2015 Digia Plc
** All rights reserved.
** For any questions to Digia, please use contact form at http://www.qt.io
**
** This file is part of the Qt Virtual Keyboard add-on for Qt Enterprise.
**
** Licensees holding valid Qt Enterprise licenses may use this file in
** accordance with the Qt Enterprise License Agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Digia.
**
** If you have questions regarding the use of this file, please use
** contact form at http://www.qt.io
**
****************************************************************************/

#include "hangul.h"

const QList<ushort> Hangul::initials = QList<ushort>()
    << 0x3131 << 0x3132 << 0x3134 << 0x3137 << 0x3138 << 0x3139 << 0x3141
    << 0x3142 << 0x3143 << 0x3145 << 0x3146 << 0x3147 << 0x3148 << 0x3149
    << 0x314A << 0x314B << 0x314C << 0x314D << 0x314E;
const QList<ushort> Hangul::finals = QList<ushort>()
    << 0x0000 << 0x3131 << 0x3132 << 0x3133 << 0x3134 << 0x3135 << 0x3136
    << 0x3137 << 0x3139 << 0x313A << 0x313B << 0x313C << 0x313D << 0x313E
    << 0x313F << 0x3140 << 0x3141 << 0x3142 << 0x3144 << 0x3145 << 0x3146
    << 0x3147 << 0x3148 << 0x314A << 0x314B << 0x314C << 0x314D << 0x314E;
const QMap<ushort, Hangul::HangulMedialIndex> Hangul::doubleMedialMap =
    Hangul::initDoubleMedialMap();
const QMap<ushort, Hangul::HangulFinalIndex> Hangul::doubleFinalMap =
    Hangul::initDoubleFinalMap();
const int Hangul::SBase = 0xAC00;
const int Hangul::LBase = 0x1100;
const int Hangul::VBase = 0x314F;
const int Hangul::TBase = 0x11A7;
const int Hangul::LCount = 19;
const int Hangul::VCount = 21;
const int Hangul::TCount = 28;
const int Hangul::NCount = Hangul::VCount * Hangul::TCount; // 588
const int Hangul::SCount = Hangul::LCount * Hangul::NCount; // 11172

QString Hangul::decompose(const QString &source)
{
    QString result;
    const int len = source.length();
    for (int i = 0; i < len; i++) {
        QChar ch = source.at(i);
        int SIndex = (int)ch.unicode() - SBase;
        if (SIndex >= 0 && SIndex < SCount) {

            // Decompose initial consonant
            result.append(QChar((int)initials[SIndex / NCount]));

            // Decompose medial vowel and check if it consists of double Jamo
            int VIndex = (SIndex % NCount) / TCount;
            ushort key = findDoubleMedial((HangulMedialIndex)VIndex);
            if (key) {
                HangulMedialIndex VIndexA, VIndexB;
                unpackDoubleMedial(key, VIndexA, VIndexB);
                result.append(QChar(VBase + (int)VIndexA));
                result.append(QChar(VBase + (int)VIndexB));
            } else {
                result.append(QChar(VBase + VIndex));
            }

            // Decompose final consonant and check if it consists of double Jamo
            int TIndex = SIndex % TCount;
            if (TIndex != 0) {
                key = findDoubleFinal((HangulFinalIndex)TIndex);
                if (key) {
                    HangulFinalIndex TIndexA, TIndexB;
                    unpackDoubleFinal(key, TIndexA, TIndexB);
                    result.append(QChar(finals[(int)TIndexA]));
                    result.append(QChar(finals[(int)TIndexB]));
                } else {
                    result.append(QChar(finals[TIndex]));
                }
            }
        } else {
            result.append(ch);
        }
    }
    return result;
}

QString Hangul::compose(const QString &source)
{
    const int len = source.length();
    if (len == 0)
        return QString();

    // Always add the initial character into buffer.
    // The last character will serve as the current
    // Hangul Syllable.
    QChar last = source.at(0);
    QString result = QString(last);

    // Go through the input buffer starting at next character
    for (int i = 1; i < len; i++) {
        const QChar ch = source.at(i);

        // Check to see if the character is Hangul Compatibility Jamo
        const ushort unicode = ch.unicode();
        if (isJamo(unicode)) {

            // Check to see if the character is syllable
            const ushort lastUnicode = last.unicode();
            int SIndex = (int)lastUnicode - SBase;
            if (SIndex >= 0 && SIndex < SCount) {

                // Check to see if the syllable type is LV or LV+T
                int TIndex = SIndex % TCount;
                if (TIndex == 0) {

                    // If the current character is final consonant, then
                    // make syllable of form LV+T
                    TIndex = finals.indexOf(unicode);
                    if (TIndex != -1) {
                        last = QChar((int)lastUnicode + TIndex);
                        result.replace(result.length() - 1, 1, last);
                        continue;
                    }

                    // Check to see if the current character is vowel
                    HangulMedialIndex VIndexB = (HangulMedialIndex)((int)unicode - VBase);
                    if (isMedial(VIndexB)) {

                        // Some medial Jamos do not exist in the keyboard layout as is.
                        // Such Jamos can only be formed by combining the two specific Jamos,
                        // aka the double Jamos.

                        HangulMedialIndex VIndexA = (HangulMedialIndex)((SIndex % NCount) / TCount);
                        if (isMedial(VIndexA)) {

                            // Search the double medial map if such a combination exists
                            ushort key = packDoubleMedial(VIndexA, VIndexB);
                            if (doubleMedialMap.contains(key)) {

                                // Update syllable by adding the difference between
                                // the vowels indices
                                HangulMedialIndex VIndexD = doubleMedialMap[key];
                                int VDiff = (int)VIndexD - (int)VIndexA;
                                last = QChar((int)lastUnicode + VDiff * TCount);
                                result.replace(result.length() - 1, 1, last);
                                continue;
                            }
                        }
                    }

                } else {

                    // Check to see if current jamo is vowel
                    int VIndex = (int)unicode - VBase;
                    if (VIndex >= 0 && VIndex < VCount) {

                        // Since some initial and final consonants use the same
                        // Unicode values, we need to check whether the previous final
                        // Jamo is actually an initial Jamo of the next syllable.
                        //
                        // Consider the following scenario:
                        //      LVT+V == not possible
                        //      LV, L+V == possible
                        int LIndex = initials.indexOf(finals[TIndex]);
                        if (LIndex >= 0 && LIndex < LCount) {

                            // Remove the previous final jamo from the syllable,
                            // making the current syllable of form LV
                            last = QChar((int)lastUnicode - TIndex);
                            result.replace(result.length() - 1, 1, last);

                            // Make new syllable of form LV
                            last = QChar(SBase + (LIndex * VCount + VIndex) * TCount);
                            result.append(last);
                            continue;
                        }

                        // Check to see if the current final Jamo is double consonant.
                        // In this scenario, the double consonant is split into parts
                        // and the second part is removed from the current syllable.
                        // Then the second part is joined with the current vowel making
                        // the new syllable of form LV.
                        ushort key = findDoubleFinal((HangulFinalIndex)TIndex);
                        if (key) {

                            // Split the consonant into two jamos and remove the
                            // second jamo B from the current syllable
                            HangulFinalIndex TIndexA, TIndexB;
                            unpackDoubleFinal(key, TIndexA, TIndexB);
                            last = QChar((int)lastUnicode - TIndex + (int)TIndexA);
                            result.replace(result.length() - 1, 1, last);

                            // Add new syllable by combining the initial jamo
                            // and the current vowel
                            LIndex = initials.indexOf(finals[TIndexB]);
                            last = QChar(SBase + (LIndex * VCount + VIndex) * TCount);
                            result.append(last);
                            continue;
                        }
                    }

                    // Check whether the current consonant can connect to current
                    // consonant forming a double final consonant
                    HangulFinalIndex TIndexA = (HangulFinalIndex)TIndex;
                    if (isFinal(TIndexA)) {

                        HangulFinalIndex TIndexB = (HangulFinalIndex)finals.indexOf(unicode);
                        if (isFinal(TIndexB)) {

                            // Search the double final map if such a combination exists
                            ushort key = packDoubleFinal(TIndexA, TIndexB);
                            if (doubleFinalMap.contains(key)) {

                                // Update syllable by adding the difference between
                                // the consonant indices
                                HangulFinalIndex TIndexD = doubleFinalMap[key];
                                int TDiff = (int)TIndexD - (int)TIndexA;
                                last = QChar((int)lastUnicode + TDiff);
                                result.replace(result.length() - 1, 1, last);
                                continue;
                            }
                        }
                    }
                }

            } else {

                // The last character is not syllable.
                // Check to see if the last character is an initial consonant
                int LIndex = initials.indexOf(lastUnicode);
                if (LIndex != -1) {

                    // If the current character is medial vowel,
                    // make syllable of form LV
                    int VIndex = (int)unicode - VBase;
                    if (VIndex >= 0 && VIndex < VCount) {
                        last = QChar(SBase + (LIndex * VCount + VIndex) * TCount);
                        result.replace(result.length() - 1, 1, last);
                        continue;
                    }
                }

            }
        }

        // Otherwise, add the character into buffer
        last = ch;
        result = result.append(ch);
    }
    return result;
}

bool Hangul::isJamo(const ushort &unicode)
{
    return unicode >= 0x3131 && unicode <= 0x3163;
}

bool Hangul::isMedial(HangulMedialIndex vowel)
{
    return vowel >= HANGUL_MEDIAL_A && vowel <= HANGUL_MEDIAL_I;
}

bool Hangul::isFinal(HangulFinalIndex consonant)
{
    return consonant >= HANGUL_FINAL_KIYEOK && consonant <= HANGUL_FINAL_HIEUH;
}

ushort Hangul::findDoubleMedial(HangulMedialIndex vowel)
{
    for (QMap<ushort, HangulMedialIndex>::ConstIterator i = doubleMedialMap.constBegin();
         i != doubleMedialMap.constEnd(); i++) {
        if (i.value() == vowel)
            return i.key();
    }
    return 0;
}

ushort Hangul::findDoubleFinal(HangulFinalIndex consonant)
{
    for (QMap<ushort, HangulFinalIndex>::ConstIterator i = doubleFinalMap.constBegin();
         i != doubleFinalMap.constEnd(); i++) {
        if (i.value() == consonant)
            return i.key();
    }
    return 0;
}

// Packs two Hangul Jamo indices into 16-bit integer.
// The result can be used as a key to the double jamos lookup table.
// Note: The returned value is not a Unicode character!
ushort Hangul::packDoubleMedial(HangulMedialIndex a, HangulMedialIndex b)
{
    Q_ASSERT(isMedial(a));
    Q_ASSERT(isMedial(b));
    return (ushort)a | ((ushort)b << 8);
}

ushort Hangul::packDoubleFinal(HangulFinalIndex a, HangulFinalIndex b)
{
    Q_ASSERT(isFinal(a));
    Q_ASSERT(isFinal(b));
    return (ushort)a | ((ushort)b << 8);
}

void Hangul::unpackDoubleMedial(ushort key, HangulMedialIndex &a, HangulMedialIndex &b)
{
    a = (HangulMedialIndex)(key & 0xFF);
    b = (HangulMedialIndex)(key >> 8);
    Q_ASSERT(isMedial(a));
    Q_ASSERT(isMedial(b));
}

void Hangul::unpackDoubleFinal(ushort key, HangulFinalIndex &a, HangulFinalIndex &b)
{
    a = (HangulFinalIndex)(key & 0xFF);
    b = (HangulFinalIndex)(key >> 8);
    Q_ASSERT(isFinal(a));
    Q_ASSERT(isFinal(b));
}

QMap<ushort, Hangul::HangulMedialIndex> Hangul::initDoubleMedialMap()
{
    QMap<ushort, HangulMedialIndex> map;
    map.insert(packDoubleMedial(HANGUL_MEDIAL_O, HANGUL_MEDIAL_A), HANGUL_MEDIAL_WA);
    map.insert(packDoubleMedial(HANGUL_MEDIAL_O, HANGUL_MEDIAL_AE), HANGUL_MEDIAL_WAE);
    map.insert(packDoubleMedial(HANGUL_MEDIAL_O, HANGUL_MEDIAL_I), HANGUL_MEDIAL_OE);
    map.insert(packDoubleMedial(HANGUL_MEDIAL_U, HANGUL_MEDIAL_EO), HANGUL_MEDIAL_WEO);
    map.insert(packDoubleMedial(HANGUL_MEDIAL_U, HANGUL_MEDIAL_E), HANGUL_MEDIAL_WE);
    map.insert(packDoubleMedial(HANGUL_MEDIAL_U, HANGUL_MEDIAL_I), HANGUL_MEDIAL_WI);
    map.insert(packDoubleMedial(HANGUL_MEDIAL_EU, HANGUL_MEDIAL_I), HANGUL_MEDIAL_YI);
    return map;
}

QMap<ushort, Hangul::HangulFinalIndex> Hangul::initDoubleFinalMap()
{
    QMap<ushort, HangulFinalIndex> map;
    map.insert(packDoubleFinal(HANGUL_FINAL_KIYEOK, HANGUL_FINAL_SIOS), HANGUL_FINAL_KIYEOK_SIOS);
    map.insert(packDoubleFinal(HANGUL_FINAL_NIEUN, HANGUL_FINAL_CIEUC), HANGUL_FINAL_NIEUN_CIEUC);
    map.insert(packDoubleFinal(HANGUL_FINAL_NIEUN, HANGUL_FINAL_HIEUH), HANGUL_FINAL_NIEUN_HIEUH);
    map.insert(packDoubleFinal(HANGUL_FINAL_RIEUL, HANGUL_FINAL_KIYEOK), HANGUL_FINAL_RIEUL_KIYEOK);
    map.insert(packDoubleFinal(HANGUL_FINAL_RIEUL, HANGUL_FINAL_MIEUM), HANGUL_FINAL_RIEUL_MIEUM);
    map.insert(packDoubleFinal(HANGUL_FINAL_RIEUL, HANGUL_FINAL_PIEUP), HANGUL_FINAL_RIEUL_PIEUP);
    map.insert(packDoubleFinal(HANGUL_FINAL_RIEUL, HANGUL_FINAL_SIOS), HANGUL_FINAL_RIEUL_SIOS);
    map.insert(packDoubleFinal(HANGUL_FINAL_RIEUL, HANGUL_FINAL_THIEUTH), HANGUL_FINAL_RIEUL_THIEUTH);
    map.insert(packDoubleFinal(HANGUL_FINAL_RIEUL, HANGUL_FINAL_PHIEUPH), HANGUL_FINAL_RIEUL_PHIEUPH);
    map.insert(packDoubleFinal(HANGUL_FINAL_RIEUL, HANGUL_FINAL_HIEUH), HANGUL_FINAL_RIEUL_HIEUH);
    map.insert(packDoubleFinal(HANGUL_FINAL_PIEUP, HANGUL_FINAL_SIOS), HANGUL_FINAL_PIEUP_SIOS);
    map.insert(packDoubleFinal(HANGUL_FINAL_SIOS, HANGUL_FINAL_SIOS), HANGUL_FINAL_SSANGSIOS);
    return map;
}