diff options
Diffstat (limited to 'src/plugins/tcime/3rdparty/tcime')
23 files changed, 1282 insertions, 0 deletions
diff --git a/src/plugins/tcime/3rdparty/tcime/COPYING b/src/plugins/tcime/3rdparty/tcime/COPYING new file mode 100644 index 00000000..72844840 --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/COPYING @@ -0,0 +1,92 @@ +The project in general is under the following licenses: + +================================================================================ +Copyright 2010 Google Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +================================================================================ + +File dict_phrases.dat is built from libTabe; the licenses of libTabe is: + +================================================================================ + + Copyrighy (c) 1999 TaBE Project. + Copyright (c) 1999 Pai-Hsiang Hsiao. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + . Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + . Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + . Neither the name of the TaBE Project nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + OF THE POSSIBILITY OF SUCH DAMAGE. + + + + Copyright (c) 1999 Computer Systems and Communication Lab, + Institute of Information Science, Academia Sinica. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + . Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + . Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + . Neither the name of the Computer Systems and Communication Lab + nor the names of its contributors may be used to endorse or + promote products derived from this software without specific + prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + OF THE POSSIBILITY OF SUCH DAMAGE. + + +Copyright 1996 Chih-Hao Tsai @ Beckman Institute, University of Illinois +c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4 +================================================================================ + diff --git a/src/plugins/tcime/3rdparty/tcime/cangjiedictionary.cpp b/src/plugins/tcime/3rdparty/tcime/cangjiedictionary.cpp new file mode 100644 index 00000000..7326b839 --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/cangjiedictionary.cpp @@ -0,0 +1,141 @@ +/* + * Qt implementation of TCIME library + * This file is part of the Qt Virtual Keyboard module. + * Contact: http://www.qt.io/licensing/ + * + * Copyright (C) 2015 The Qt Company + * Copyright 2010 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cangjiedictionary.h" +#include "cangjietable.h" + +using namespace tcime; + +bool CangjieDictionary::_simplified = false; + +CangjieDictionary::CangjieDictionary() : + WordDictionary(), + _collator(QLocale(QLatin1String("zh_TW"))) +{ +} + +bool CangjieDictionary::simplified() const +{ + return _simplified; +} + +void CangjieDictionary::setSimplified(bool simplified) +{ + _simplified = simplified; +} + +QStringList CangjieDictionary::getWords(const QString &input) const +{ + // Look up the index in the dictionary for the specified input. + int primaryIndex = CangjieTable::getPrimaryIndex(input); + if (primaryIndex < 0 || primaryIndex >= dictionary().size()) + return QStringList(); + + // [25 * 26] char[] array; each primary entry points to a char[] + // containing words with the same primary index; then words can be looked up + // by their secondary index stored at the beginning of each char[]. + const DictionaryEntry &data = dictionary()[primaryIndex]; + if (data.isEmpty()) + return QStringList(); + + if (_simplified) + // Sort words of this primary index for simplified-cangjie. + return sortWords(data); + + int secondaryIndex = CangjieTable::getSecondaryIndex(input); + if (secondaryIndex < 0) + return QStringList(); + + // Find words match this secondary index for cangjie. + return searchWords(secondaryIndex, data); +} + +class DictionaryComparator +{ +public: + explicit DictionaryComparator(const std::vector<QCollatorSortKey> &sortKeys) : + sortKeys(sortKeys) + {} + + bool operator()(int a, int b) + { + return sortKeys[a] < sortKeys[b]; + } + +private: + const std::vector<QCollatorSortKey> &sortKeys; +}; + +QStringList CangjieDictionary::sortWords(const DictionaryEntry &data) const +{ + int length = data.size() / 2; + std::vector<QCollatorSortKey> sortKeys; + QVector<int> keys; + sortKeys.reserve(length); + keys.reserve(length); + for (int i = 0; i < length; ++i) { + sortKeys.push_back(_collator.sortKey(data[length + i])); + keys.append(i); + } + DictionaryComparator dictionaryComparator(sortKeys); + std::sort(keys.begin(), keys.end(), dictionaryComparator); + + QStringList words; + for (int i = 0; i < length; ++i) + words.append(data[length + keys[i]]); + + return words; +} + +QStringList CangjieDictionary::searchWords(int secondaryIndex, const DictionaryEntry &data) const +{ + int length = data.size() / 2; + + DictionaryEntry::ConstIterator start = data.constBegin(); + DictionaryEntry::ConstIterator end = start + length; + DictionaryEntry::ConstIterator rangeStart = qBinaryFind(start, end, (DictionaryWord)secondaryIndex); + if (rangeStart == end) + return QStringList(); + + // There may be more than one words with the same index; look up words with + // the same secondary index. + while (rangeStart != start) { + if (*(rangeStart - 1) != (DictionaryWord)secondaryIndex) + break; + rangeStart--; + } + + DictionaryEntry::ConstIterator rangeEnd = rangeStart + 1; + while (rangeEnd != end) { + if (*rangeEnd != (DictionaryWord)secondaryIndex) + break; + rangeEnd++; + } + + QStringList words; + words.reserve(rangeEnd - rangeStart); + for (DictionaryEntry::ConstIterator rangeIndex = rangeStart; rangeIndex < rangeEnd; ++rangeIndex) { + DictionaryEntry::ConstIterator item(rangeIndex + length); + words.append(*item); + } + + return words; +} diff --git a/src/plugins/tcime/3rdparty/tcime/cangjiedictionary.h b/src/plugins/tcime/3rdparty/tcime/cangjiedictionary.h new file mode 100644 index 00000000..b87013a9 --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/cangjiedictionary.h @@ -0,0 +1,54 @@ +/* + * Qt implementation of TCIME library + * This file is part of the Qt Virtual Keyboard module. + * Contact: http://www.qt.io/licensing/ + * + * Copyright (C) 2015 The Qt Company + * Copyright 2010 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CANGJIEDICTIONARY_H +#define CANGJIEDICTIONARY_H + +#include "worddictionary.h" +#include <QCollator> + +namespace tcime { + +/** + * Extends WordDictionary to provide cangjie word-suggestions. + */ +class CangjieDictionary : public WordDictionary +{ +public: + CangjieDictionary(); + + bool simplified() const; + void setSimplified(bool simplified); + + QStringList getWords(const QString &input) const; + +private: + QStringList sortWords(const DictionaryEntry &data) const; + QStringList searchWords(int secondaryIndex, const DictionaryEntry &data) const; + +private: + QCollator _collator; + static bool _simplified; +}; + +} + +#endif // CANGJIEDICTIONARY_H diff --git a/src/plugins/tcime/3rdparty/tcime/cangjietable.cpp b/src/plugins/tcime/3rdparty/tcime/cangjietable.cpp new file mode 100644 index 00000000..ee8ae4f8 --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/cangjietable.cpp @@ -0,0 +1,111 @@ +/* + * Qt implementation of TCIME library + * This file is part of the Qt Virtual Keyboard module. + * Contact: http://www.qt.io/licensing/ + * + * Copyright (C) 2015 The Qt Company + * Copyright 2010 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cangjietable.h" + +using namespace tcime; + +const int CangjieTable::BASE_NUMBER = 26; +const int CangjieTable::MAX_CODE_LENGTH = 5; +const int CangjieTable::MAX_SIMPLIFIED_CODE_LENGTH = 2; + +const QMap<QChar, int> &CangjieTable::letters() +{ + static QMap<QChar, int> letters; + if (letters.isEmpty()) { + int i = 1; + letters.insert(0x65e5, i++); + letters.insert(0x6708, i++); + letters.insert(0x91d1, i++); + letters.insert(0x6728, i++); + letters.insert(0x6c34, i++); + letters.insert(0x706b, i++); + letters.insert(0x571f, i++); + letters.insert(0x7af9, i++); + letters.insert(0x6208, i++); + letters.insert(0x5341, i++); + letters.insert(0x5927, i++); + letters.insert(0x4e2d, i++); + letters.insert(0x4e00, i++); + letters.insert(0x5f13, i++); + letters.insert(0x4eba, i++); + letters.insert(0x5fc3, i++); + letters.insert(0x624b, i++); + letters.insert(0x53e3, i++); + letters.insert(0x5c38, i++); + letters.insert(0x5eff, i++); + letters.insert(0x5c71, i++); + letters.insert(0x5973, i++); + letters.insert(0x7530, i++); + letters.insert(0x96e3, i++); + letters.insert(0x535c, i++); + } + return letters; +} + +bool CangjieTable::isLetter(const QChar &c) +{ + static const QMap<QChar, int> &letters = CangjieTable::letters(); + return letters.contains(c); +} + +int CangjieTable::getPrimaryIndex(const QString &code) +{ + static const QMap<QChar, int> &letters = CangjieTable::letters(); + int length = code.length(); + if ((length < 1) || (length > MAX_CODE_LENGTH)) + return -1; + + QChar c = code.at(0); + if (!isLetter(c)) + return -1; + + // The first letter cannot be absent in the code; therefore, the numerical + // index of the first letter starts from 0 instead. + int index = (letters[c] - 1) * BASE_NUMBER; + if (length < 2) + return index; + + c = code.at(length - 1); + if (!isLetter(c)) + return -1; + + return index + letters[c]; +} + +int CangjieTable::getSecondaryIndex(const QString &code) +{ + static const QMap<QChar, int> &letters = CangjieTable::letters(); + int index = 0; + int last = code.length() - 1; + for (int i = 1; i < last; i++) { + QChar c = code.at(i); + if (!isLetter(c)) + return -1; + index = index * BASE_NUMBER + letters[c]; + } + + int maxEnd = MAX_CODE_LENGTH - 1; + for (int i = last; i < maxEnd; i++) + index = index * BASE_NUMBER; + + return index; +} diff --git a/src/plugins/tcime/3rdparty/tcime/cangjietable.h b/src/plugins/tcime/3rdparty/tcime/cangjietable.h new file mode 100644 index 00000000..54adced3 --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/cangjietable.h @@ -0,0 +1,79 @@ +/* + * Qt implementation of TCIME library + * This file is part of the Qt Virtual Keyboard module. + * Contact: http://www.qt.io/licensing/ + * + * Copyright (C) 2015 The Qt Company + * Copyright 2010 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CANGJIETABLE_H +#define CANGJIETABLE_H + +#include <QMap> +#include <QChar> +#include <QString> + +namespace tcime { + +/** + * Defines cangjie letters and calculates the index of the given cangjie code. + */ +class CangjieTable +{ + Q_DISABLE_COPY(CangjieTable) + CangjieTable() {} + + // Cangjie 25 letters with number-index starting from 1: + // 日月金木水火土竹戈十大中一弓人心手口尸廿山女田難卜 + static const QMap<QChar, int> &letters(); + static const int BASE_NUMBER; + +public: + + // Cangjie codes contain at most five letters. A cangjie code can be + // converted to a numerical code by the number-index of each letter. + // The absent letter will be indexed as 0 if the cangjie code contains less + // than five-letters. + static const int MAX_CODE_LENGTH; + static const int MAX_SIMPLIFIED_CODE_LENGTH; + + /** + * Returns {@code true} only if the given character is a valid cangjie letter. + */ + static bool isLetter(const QChar &c); + + /** + * Returns the primary index calculated by the first and last letter of + * the given cangjie code. + * + * @param code should not be null. + * @return -1 for invalid code. + */ + static int getPrimaryIndex(const QString &code); + + /** + * Returns the secondary index calculated by letters between the first and + * last letter of the given cangjie code. + * + * @param code should not be null. + * @return -1 for invalid code. + */ + static int getSecondaryIndex(const QString &code); +}; + +} + +#endif // CANGJIETABLE_H diff --git a/src/plugins/tcime/3rdparty/tcime/data/java/dict_cangjie.dat b/src/plugins/tcime/3rdparty/tcime/data/java/dict_cangjie.dat Binary files differnew file mode 100644 index 00000000..1c692c48 --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/data/java/dict_cangjie.dat diff --git a/src/plugins/tcime/3rdparty/tcime/data/java/dict_phrases.dat b/src/plugins/tcime/3rdparty/tcime/data/java/dict_phrases.dat Binary files differnew file mode 100644 index 00000000..0b34ee1f --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/data/java/dict_phrases.dat diff --git a/src/plugins/tcime/3rdparty/tcime/data/java/dict_zhuyin.dat b/src/plugins/tcime/3rdparty/tcime/data/java/dict_zhuyin.dat Binary files differnew file mode 100644 index 00000000..3587635e --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/data/java/dict_zhuyin.dat diff --git a/src/plugins/tcime/3rdparty/tcime/data/qt/dict_cangjie.dat b/src/plugins/tcime/3rdparty/tcime/data/qt/dict_cangjie.dat Binary files differnew file mode 100644 index 00000000..f99ed965 --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/data/qt/dict_cangjie.dat diff --git a/src/plugins/tcime/3rdparty/tcime/data/qt/dict_phrases.dat b/src/plugins/tcime/3rdparty/tcime/data/qt/dict_phrases.dat Binary files differnew file mode 100644 index 00000000..463301f9 --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/data/qt/dict_phrases.dat diff --git a/src/plugins/tcime/3rdparty/tcime/data/qt/dict_zhuyin.dat b/src/plugins/tcime/3rdparty/tcime/data/qt/dict_zhuyin.dat Binary files differnew file mode 100644 index 00000000..6aee7de5 --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/data/qt/dict_zhuyin.dat diff --git a/src/plugins/tcime/3rdparty/tcime/phrasedictionary.cpp b/src/plugins/tcime/3rdparty/tcime/phrasedictionary.cpp new file mode 100644 index 00000000..cdeaecdd --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/phrasedictionary.cpp @@ -0,0 +1,66 @@ +/* + * Qt implementation of TCIME library + * This file is part of the Qt Virtual Keyboard module. + * Contact: http://www.qt.io/licensing/ + * + * Copyright (C) 2015 The Qt Company + * Copyright 2010 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phrasedictionary.h" + +using namespace tcime; + +PhraseDictionary::PhraseDictionary() : + WordDictionary() +{ +} + +QStringList PhraseDictionary::getWords(const QString &input) const +{ + if (input.length() != 1) + return QStringList(); + + // Phrases are stored in an array consisting of three character arrays. + // char[0][] contains a char[] of words to look for phrases. + // char[2][] contains a char[] of following words for char[0][]. + // char[1][] contains offsets of char[0][] words to map its following words. + // For example, there are 5 phrases: Aa, Aa', Bb, Bb', Cc. + // char[0][] { A, B, C } + // char[1][] { 0, 2, 4 } + // char[2][] { a, a', b, b', c} + const Dictionary &dict = dictionary(); + if (dict.length() != 3) + return QStringList(); + + const DictionaryEntry &words = dict[0]; + + DictionaryEntry::ConstIterator word = qBinaryFind(words, input.at(0)); + if (word == words.constEnd()) + return QStringList(); + + int index = word - words.constBegin(); + const DictionaryEntry &offsets = dict[1]; + const DictionaryEntry &phrases = dict[2]; + int offset = (int)offsets[index].unicode(); + int count = (index < offsets.length() - 1) ? + ((int)offsets[index + 1].unicode() - offset) : (phrases.length() - offset); + + QStringList result; + for (int i = 0; i < count; ++i) + result.append(phrases[offset + i]); + + return result; +} diff --git a/src/plugins/tcime/3rdparty/tcime/phrasedictionary.h b/src/plugins/tcime/3rdparty/tcime/phrasedictionary.h new file mode 100644 index 00000000..06fe9578 --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/phrasedictionary.h @@ -0,0 +1,43 @@ +/* + * Qt implementation of TCIME library + * This file is part of the Qt Virtual Keyboard module. + * Contact: http://www.qt.io/licensing/ + * + * Copyright (C) 2015 The Qt Company + * Copyright 2010 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHRASEDICTIONARY_H +#define PHRASEDICTIONARY_H + +#include "worddictionary.h" + +namespace tcime { + +/** + * Reads a phrase dictionary and provides following-word suggestions as a list + * of characters for the given character. + */ +class PhraseDictionary : public WordDictionary +{ +public: + PhraseDictionary(); + + QStringList getWords(const QString &input) const; +}; + +} + +#endif // PHRASEDICTIONARY_H diff --git a/src/plugins/tcime/3rdparty/tcime/qt_attribution.json b/src/plugins/tcime/3rdparty/tcime/qt_attribution.json new file mode 100644 index 00000000..717f87ac --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/qt_attribution.json @@ -0,0 +1,16 @@ +{ + "Id": "tcime", + "Name": "Traditional Chinese IME (tcime)", + "QDocModule": "qtvirtualkeyboard", + "Description": "Traditional Chinese IME (tcime) is an input method engine for traditional Chinese.", + "QtUsage": "Optionally used in Qt Virtual Keyboard.", + + "License": "Apache License 2.0 and BSD 3-clause \"New\" or \"Revised\" License", + "LicenseId": "Apache-2.0 AND BSD-3-Clause", + "LicenseFile": "COPYING", + "Copyright": "Copyright 2010 Google Inc. +Copyrighy (c) 1999 TaBE Project. +Copyright (c) 1999 Pai-Hsiang Hsiao. +Copyright (c) 1999 Computer Systems and Communication Lab, Institute of Information Science, Academia Sinica. +Copyright 1996 Chih-Hao Tsai @ Beckman Institute, University of Illinois" +} diff --git a/src/plugins/tcime/3rdparty/tcime/tcime.pro b/src/plugins/tcime/3rdparty/tcime/tcime.pro new file mode 100644 index 00000000..d9fc4f2e --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/tcime.pro @@ -0,0 +1,37 @@ +TARGET = qttcime + +CONFIG += static + +SOURCES += \ + cangjiedictionary.cpp \ + cangjietable.cpp \ + phrasedictionary.cpp \ + worddictionary.cpp \ + zhuyindictionary.cpp \ + zhuyintable.cpp + +HEADERS += \ + cangjiedictionary.h \ + cangjietable.h \ + phrasedictionary.h \ + worddictionary.h \ + zhuyindictionary.h \ + zhuyintable.h + +OTHER_FILES += \ + data/dict_cangjie.dat \ + data/dict_phrases.dat + +DEFINES += \ + QT_NO_CAST_TO_ASCII \ + QT_ASCII_CAST_WARNINGS \ + QT_NO_CAST_FROM_ASCII \ + QT_NO_CAST_FROM_BYTEARRAY + +MODULE_INCLUDEPATH = $$PWD +MODULE_DEFINES = HAVE_TCIME + +load(qt_helper_lib) + +CONFIG += qt +QT = core diff --git a/src/plugins/tcime/3rdparty/tcime/tools/dict2qt.class b/src/plugins/tcime/3rdparty/tcime/tools/dict2qt.class Binary files differnew file mode 100644 index 00000000..d1e70d8e --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/tools/dict2qt.class diff --git a/src/plugins/tcime/3rdparty/tcime/tools/dict2qt.java b/src/plugins/tcime/3rdparty/tcime/tools/dict2qt.java new file mode 100644 index 00000000..6dd81212 --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/tools/dict2qt.java @@ -0,0 +1,201 @@ +/****************************************************************************** +** +** Copyright (C) 2015 The Qt Company Ltd +** All rights reserved. +** For any questions to The Qt Company, please use contact form at http://qt.io +** +** This file is part of the Qt Virtual Keyboard module. +** +** Licensees holding valid commercial license for Qt may use this file in +** accordance with the Qt License Agreement provided with the Software +** or, alternatively, in accordance with the terms contained in a written +** agreement between you and The Qt Company. +** +** If you have questions regarding the use of this file, please use +** contact form at http://qt.io +** +******************************************************************************/ + +import java.io.BufferedInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.ObjectInputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +public class dict2qt { + + public static void main(String[] args) { + boolean showHelp = false; + boolean showUsage = false; + boolean littleEndian = false; + String outputFileName = ""; + String inputFileName = ""; + File inputFile = null; + + if (args.length > 0) { + for (int i = 0; i < args.length; i++) { + if (args[i].startsWith("-")) { + if (args[i].compareTo("-h") == 0) { + showHelp = true; + showUsage = true; + break; + } else if (args[i].compareTo("-o") == 0) { + if (++i >= args.length) { + System.err.println("Error: missing argument <output file>"); + showUsage = true; + break; + } + outputFileName = args[i]; + } else if (args[i].compareTo("-le") == 0) { + littleEndian = true; + } else { + System.err.println("Error: unknown option '" + args[i] + "'"); + showUsage = true; + break; + } + } else if (inputFileName.isEmpty() && i + 1 == args.length) { + inputFileName = args[i]; + } else { + System.err.println("Error: unexpected argument '" + args[i] + "'"); + showUsage = true; + break; + } + } + + if (!showUsage && !showHelp) { + if (!inputFileName.isEmpty()) { + inputFile = new File(inputFileName); + if (!inputFile.exists()) { + System.err.println("Error: input file does not exist '" + inputFileName + "'"); + return; + } + if (outputFileName.isEmpty()) + outputFileName = inputFileName; + } + + if (inputFile == null) { + System.err.println("Error: missing argument file"); + showUsage = true; + } + } + } else { + showUsage = true; + } + + if (showUsage || showHelp) { + if (showHelp) { + System.err.println("TCIME dictionary converter for Qt Virtual Keyboard"); + System.err.println(""); + System.err.println("Copyright (C) 2015 The Qt Company Ltd - All rights reserved."); + System.err.println(""); + System.err.println(" This utility converts TCIME dictionaries to Qt compatible"); + System.err.println(" format. The dictionaries are basically Java char[][] arrays"); + System.err.println(" serialized to file with ObjectOutputStream."); + System.err.println(""); + System.err.println(" The corresponding data format in the Qt dictionary is"); + System.err.println(" QVector<QVector<ushort>>. The byte order is set to big endian"); + System.err.println(" by default, but can be changed with -le option."); + } + if (showUsage) { + System.err.println(""); + System.err.println("Usage: java dict2qt [options] file"); + System.err.println("Options:"); + System.err.println(" -o <output file> Place the output into <output file>"); + System.err.println(" -le Change byte order to little endian"); + System.err.println(" -h Display help"); + } + return; + } + + char[][] dictionary = null; + try { + dictionary = loadDictionary(new FileInputStream(inputFile)); + } catch (FileNotFoundException e) { + e.printStackTrace(); + return; + } + if (dictionary == null) + return; + + int dictionarySize = calculateDictionarySize(dictionary); + ByteBuffer buffer = ByteBuffer.allocate(dictionarySize); + if (littleEndian) + buffer.order(ByteOrder.LITTLE_ENDIAN); + else + buffer.order(ByteOrder.BIG_ENDIAN); + buffer.putInt(dictionary.length); + for (int i = 0; i < dictionary.length; i++) { + char[] data = dictionary[i]; + if (data != null) { + buffer.putInt(data.length); + for (int j = 0; j < data.length; j++) { + buffer.putChar(data[j]); + } + } else { + buffer.putInt(0); + } + } + + byte[] bytes = buffer.array(); + DataOutputStream dos = null; + try { + File outputFile = new File(outputFileName); + FileOutputStream os = new FileOutputStream(outputFile); + dos = new DataOutputStream(os); + dos.write(bytes, 0, buffer.position()); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } finally { + if (dos != null) { + try { + dos.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + } + + static char[][] loadDictionary(InputStream ins) { + char[][] result = null; + ObjectInputStream oin = null; + try { + BufferedInputStream bis = new BufferedInputStream(ins); + oin = new ObjectInputStream(bis); + result = (char[][]) oin.readObject(); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } finally { + if (oin != null) { + try { + oin.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + return result; + } + + static int calculateDictionarySize(final char[][] dictionary) { + int result = 4; + for (int i = 0; i < dictionary.length; i++) { + char[] data = dictionary[i]; + result += 4; + if (data != null) + result += data.length * 2; + } + return result; + } + +} diff --git a/src/plugins/tcime/3rdparty/tcime/worddictionary.cpp b/src/plugins/tcime/3rdparty/tcime/worddictionary.cpp new file mode 100644 index 00000000..6bc0a9e2 --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/worddictionary.cpp @@ -0,0 +1,43 @@ +/* + * Qt implementation of TCIME library + * This file is part of the Qt Virtual Keyboard module. + * Contact: http://www.qt.io/licensing/ + * + * Copyright (C) 2015 The Qt Company + * Copyright 2010 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "worddictionary.h" +#include <QDataStream> +#include <QFile> + +using namespace tcime; + +bool WordDictionary::load(const QString &fileName, bool littleEndian) +{ + _dictionary.clear(); + + QFile dictionaryFile(fileName); + if (!dictionaryFile.open(QIODevice::ReadOnly)) + return false; + + QDataStream ds(&dictionaryFile); + if (littleEndian) + ds.setByteOrder(QDataStream::LittleEndian); + Q_ASSERT((ds.byteOrder() == QDataStream::LittleEndian) == littleEndian); + ds >> _dictionary; + + return !_dictionary.isEmpty(); +} diff --git a/src/plugins/tcime/3rdparty/tcime/worddictionary.h b/src/plugins/tcime/3rdparty/tcime/worddictionary.h new file mode 100644 index 00000000..3f1ea43e --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/worddictionary.h @@ -0,0 +1,61 @@ +/* + * Qt implementation of TCIME library + * This file is part of the Qt Virtual Keyboard module. + * Contact: http://www.qt.io/licensing/ + * + * Copyright (C) 2015 The Qt Company + * Copyright 2010 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef WORDDICTIONARY_H +#define WORDDICTIONARY_H + +#include <QVector> +#include <QString> +#include <QStringList> + +namespace tcime { + +/** + * Reads a word-dictionary and provides word-suggestions as a list of characters + * for the specified input. + */ +class WordDictionary +{ + Q_DISABLE_COPY(WordDictionary) + +protected: + typedef QChar DictionaryWord; + typedef QVector<DictionaryWord> DictionaryEntry; + typedef QVector<DictionaryEntry> Dictionary; + + const Dictionary &dictionary() const { return _dictionary; } + +public: + WordDictionary() {} + virtual ~WordDictionary() {} + + bool isEmpty() const { return _dictionary.isEmpty(); } + + virtual bool load(const QString &fileName, bool littleEndian = false); + virtual QStringList getWords(const QString &input) const = 0; + +private: + Dictionary _dictionary; +}; + +} + +#endif // WORDDICTIONARY_H diff --git a/src/plugins/tcime/3rdparty/tcime/zhuyindictionary.cpp b/src/plugins/tcime/3rdparty/tcime/zhuyindictionary.cpp new file mode 100644 index 00000000..1cf303c1 --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/zhuyindictionary.cpp @@ -0,0 +1,61 @@ +/* + * Qt implementation of TCIME library + * This file is part of the Qt Virtual Keyboard module. + * Contact: http://www.qt.io/licensing/ + * + * Copyright (C) 2015 The Qt Company + * Copyright 2010 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "zhuyindictionary.h" +#include "zhuyintable.h" + +using namespace tcime; + +ZhuyinDictionary::ZhuyinDictionary() : + WordDictionary() +{ +} + +QStringList ZhuyinDictionary::getWords(const QString &input) const +{ + // Look up the syllables index; return empty string for invalid syllables. + QStringList pair = ZhuyinTable::stripTones(input); + int syllablesIndex = !pair.isEmpty() ? ZhuyinTable::getSyllablesIndex(pair[0]) : -1; + if (syllablesIndex < 0 || syllablesIndex >= dictionary().size()) + return QStringList(); + + // [22-initials * 39-finals] syllables array; each syllables entry points to + // a char[] containing words for that syllables. + const DictionaryEntry &data = dictionary()[syllablesIndex]; + if (data.isEmpty()) + return QStringList(); + + // Counts of words for each tone are stored in the array beginning. + int tone = ZhuyinTable::getTones(pair[1].at(0)); + int length = (int) data[tone].unicode(); + if (length == 0) + return QStringList(); + + int start = ZhuyinTable::getTonesCount(); + for (int i = 0; i < tone; ++i) + start += (int) data[i].unicode(); + + QStringList words; + for (int i = 0; i < length; ++i) + words.append(data[start + i]); + + return words; +} diff --git a/src/plugins/tcime/3rdparty/tcime/zhuyindictionary.h b/src/plugins/tcime/3rdparty/tcime/zhuyindictionary.h new file mode 100644 index 00000000..c9469b72 --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/zhuyindictionary.h @@ -0,0 +1,42 @@ +/* + * Qt implementation of TCIME library + * This file is part of the Qt Virtual Keyboard module. + * Contact: http://www.qt.io/licensing/ + * + * Copyright (C) 2015 The Qt Company + * Copyright 2010 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ZHUYINDICTIONARY_H +#define ZHUYINDICTIONARY_H + +#include "worddictionary.h" + +namespace tcime { + +/** + * Extends WordDictionary to provide zhuyin word-suggestions. + */ +class ZhuyinDictionary : public WordDictionary +{ +public: + ZhuyinDictionary(); + + QStringList getWords(const QString &input) const; +}; + +} + +#endif // ZHUYINDICTIONARY_H diff --git a/src/plugins/tcime/3rdparty/tcime/zhuyintable.cpp b/src/plugins/tcime/3rdparty/tcime/zhuyintable.cpp new file mode 100644 index 00000000..8198122a --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/zhuyintable.cpp @@ -0,0 +1,161 @@ +/* + * Qt implementation of TCIME library + * This file is part of the Qt Virtual Keyboard module. + * Contact: http://www.qt.io/licensing/ + * + * Copyright (C) 2015 The Qt Company + * Copyright 2010 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "zhuyintable.h" +#include <QStringList> + +using namespace tcime; + +const int ZhuyinTable::INITIALS_SIZE = 22; +const QList<QChar> ZhuyinTable::yiEndingFinals = QList<QChar>() + << 0x311a << 0x311b << 0x311d << 0x311e << 0x3120 << 0x3121 << 0x3122 + << 0x3123 << 0x3124 << 0x3125; +const QList<QChar> ZhuyinTable::wuEndingFinals = QList<QChar>() + << 0x311a << 0x311b << 0x311e << 0x311f << 0x3122 << 0x3123 << 0x3124 + << 0x3125; +const QList<QChar> ZhuyinTable::yuEndingFinals = QList<QChar>() + << 0x311d << 0x3122 << 0x3123 << 0x3125; +const int ZhuyinTable::YI_FINALS_INDEX = 14; +const int ZhuyinTable::WU_FINALS_INDEX = 25; +const int ZhuyinTable::YU_FINALS_INDEX = 34; +const QChar ZhuyinTable::YI_FINALS = 0x3127; +const QChar ZhuyinTable::WU_FINALS = 0x3128; +const QChar ZhuyinTable::YU_FINALS = 0x3129; +const QList<QChar> ZhuyinTable::tones = QList<QChar>() + << ZhuyinTable::DEFAULT_TONE << 0x02d9 << 0x02ca << 0x02c7 << 0x02cb; +const QChar ZhuyinTable::DEFAULT_TONE = QLatin1Char(' '); + +int ZhuyinTable::getInitials(const QChar &initials) +{ + // Calculate the index by its distance to the first initials 'ㄅ' (b). + int index = initials.unicode() - 0x3105 + 1; + if (index >= ZhuyinTable::INITIALS_SIZE) + // Syllables starting with finals can still be valid. + return 0; + + return (index >= 0) ? index : -1; +} + +int ZhuyinTable::getFinals(const QString &finals) +{ + if (finals.length() == 0) + // Syllables ending with no finals can still be valid. + return 0; + + if (finals.length() > 2) + return -1; + + // Compute the index instead of direct lookup the whole array to save + // traversing time. First calculate the distance to the first finals + // 'ㄚ' (a). + const QChar firstFinal = finals.at(0); + int index = firstFinal.unicode() - 0x311a + 1; + if (index < YI_FINALS_INDEX) + return index; + + // Check 'ㄧ' (yi), 'ㄨ' (wu) , and 'ㄩ' (yu) group finals. + QList<QChar> endingFinals; + if (firstFinal == YI_FINALS) { + index = YI_FINALS_INDEX; + endingFinals = yiEndingFinals; + } else if (firstFinal == WU_FINALS) { + index = WU_FINALS_INDEX; + endingFinals = wuEndingFinals; + } else if (firstFinal == YU_FINALS) { + index = YU_FINALS_INDEX; + endingFinals = yuEndingFinals; + } else { + return -1; + } + + if (finals.length() == 1) + return index; + + for (int i = 0; i < endingFinals.size(); ++i) { + if (finals.at(1) == endingFinals[i]) + return index + i + 1; + } + return -1; +} + +int ZhuyinTable::getSyllablesIndex(const QString &syllables) +{ + if (syllables.isEmpty()) + return -1; + + int initials = getInitials(syllables.at(0)); + if (initials < 0) + return -1; + + // Strip out initials before getting finals column-index. + int finals = getFinals((initials != 0) ? syllables.mid(1) : syllables); + if (finals < 0) + return -1; + + return (finals * INITIALS_SIZE + initials); +} + +int ZhuyinTable::getTones(const QChar &c) +{ + for (int i = 0; i < tones.size(); ++i) { + if (tones[i] == c) + return i; + } + // Treat all other characters as the default tone with the index 0. + return 0; +} + +int ZhuyinTable::getTonesCount() +{ + return tones.size(); +} + +bool ZhuyinTable::isTone(const QChar &c) +{ + for (int i = 0; i < tones.size(); ++i) { + if (tones[i] == c) + return true; + } + return false; +} + +bool ZhuyinTable::isYiWuYuFinals(const QChar &c) +{ + ushort unicode = c.unicode(); + return unicode == YI_FINALS || unicode == WU_FINALS || unicode == YU_FINALS; +} + +QStringList ZhuyinTable::stripTones(const QString &input) +{ + const int last = input.length() - 1; + if (last < 0) + return QStringList(); + + QChar tone = input.at(last); + if (isTone(tone)) { + QString syllables = input.left(last); + if (syllables.length() <= 0) + return QStringList(); + return QStringList() << syllables << QString(tone); + } + // Treat the tone-less input as the default tone (tone-0). + return QStringList() << input << QString(DEFAULT_TONE); +} diff --git a/src/plugins/tcime/3rdparty/tcime/zhuyintable.h b/src/plugins/tcime/3rdparty/tcime/zhuyintable.h new file mode 100644 index 00000000..8512574e --- /dev/null +++ b/src/plugins/tcime/3rdparty/tcime/zhuyintable.h @@ -0,0 +1,74 @@ +/* + * Qt implementation of TCIME library + * This file is part of the Qt Virtual Keyboard module. + * Contact: http://www.qt.io/licensing/ + * + * Copyright (C) 2015 The Qt Company + * Copyright 2010 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ZHUYINTABLE_H +#define ZHUYINTABLE_H + +#include <QMap> +#include <QChar> +#include <QString> + +namespace tcime { + +class ZhuyinTable +{ + Q_DISABLE_COPY(ZhuyinTable) + ZhuyinTable() {} + + // All Chinese characters are mapped into a zhuyin table as described in + // http://en.wikipedia.org/wiki/Zhuyin_table. + static const int INITIALS_SIZE; + + // Finals that can be appended after 'ㄧ' (yi), 'ㄨ' (wu), or 'ㄩ' (yu). + static const QList<QChar> yiEndingFinals; + static const QList<QChar> wuEndingFinals; + static const QList<QChar> yuEndingFinals; + + // 'ㄧ' (yi) finals start from position 14 and are followed by 'ㄨ' (wu) + // finals, and 'ㄩ' (yu) finals follow after 'ㄨ' (wu) finals. + static const int YI_FINALS_INDEX; + static const int WU_FINALS_INDEX; + static const int YU_FINALS_INDEX; + + // 'ㄧ' (yi), 'ㄨ' (wu) , and 'ㄩ' (yu) finals. + static const QChar YI_FINALS; + static const QChar WU_FINALS; + static const QChar YU_FINALS; + + // Default tone and four tone symbols: '˙', 'ˊ', 'ˇ', and 'ˋ'. + static const QList<QChar> tones; + +public: + static const QChar DEFAULT_TONE; + + static int getInitials(const QChar &initials); + static int getFinals(const QString &finals); + static int getSyllablesIndex(const QString &syllables); + static int getTones(const QChar &c); + static int getTonesCount(); + static bool isTone(const QChar &c); + static bool isYiWuYuFinals(const QChar &c); + static QStringList stripTones(const QString &input); +}; + +} + +#endif // ZHUYINTABLE_H |