aboutsummaryrefslogtreecommitdiffstats
path: root/src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h
diff options
context:
space:
mode:
authorJarkko Koivikko <jarkko.koivikko@code-q.fi>2018-05-27 12:12:06 +0300
committerMitch Curtis <mitch.curtis@qt.io>2018-08-06 08:20:01 +0000
commit59208edaaf40be982904a6c8cad4eab2d14f938e (patch)
treea613276e1f785e6cd82d019f01dbe478c7cebc5d /src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h
parent263a16009e8d08004a546954cd7b2671c26fab9e (diff)
Modularize virtual keyboard and add an extension interface
With the extension interface it is possible to add new input method and/or languages without recompiling the Qt Virtual Keyboard plugin itself. The existing input methods are now isolated into plugins. So installing a new input method is a matter of copying the corresponding extension plugin to the plugins/virtualkeyboard directory (and any collateral required by the extension plugin itself). This change also renames the HunspellInputMethod to DefaultInputMethod. This change is necessary because some other extension plugin may want to provide the default input method instead. Implementation plan =================== [x] Create virtualkeyboard-private module [x] Create plugin library [x] Define interface for input method plugin [x] Define interface for keyboard layouts [x] Move existing input methods and layouts to plugins [x] HangulInputMethod [x] HunspellInputMethod [x] Maybe create a private library for sharing between HunspellInputMethod and LipiInputMethod [x] LipiInputMethod [x] OpenWnnInputMethod [x] PinyinInputMethod [x] T9WriteInputMethod [x] TCInputMethod [o] Rename some C++ classes (too generic name?) [x] Think again replacing the QtVirtualKeyboard namespace [x] Update documentation [x] General instructions for creating a plugin [x] For the C++ interfaces too (previously removed) [ChangeLog][Important Behavior Changes] Introduce an extension interface for the virtual keyboard. All the current input methods and some special keyboard layouts (e.g. Hunspell, OpenWnn, etc.) have been moved to extensions. The extension interface allows third party to create a new input method without having to modify or rebuild the virtual keyboard. In addition, this change makes it possible to add features and languages independently by copying the desired extension to the system. [ChangeLog][Important Behavior Changes] Introduce a virtualkeyboard module, which can be linked against an extension plugin. This module provides the C++ API necessary for creating an input method. [ChangeLog][Important Behavior Changes] Wrap the entire virtual keyboard API into Qt namespace (e.g. QT_BEGIN_NAMESPACE/QT_END_NAMESPACE). Task-number: QTBUG-57602 Change-Id: I449f4429109f596a7a1df7517c81f97d4aada27c Reviewed-by: Mitch Curtis <mitch.curtis@qt.io>
Diffstat (limited to 'src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h')
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h234
1 files changed, 0 insertions, 234 deletions
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h b/src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h
deleted file mode 100644
index 86a8ee25..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_DICTTRIE_H__
-#define PINYINIME_INCLUDE_DICTTRIE_H__
-
-#include <stdlib.h>
-#include "./atomdictbase.h"
-#include "./dictdef.h"
-#include "./dictlist.h"
-#include "./searchutility.h"
-#include <QFile>
-
-namespace ime_pinyin {
-
-class DictTrie : AtomDictBase {
- private:
- struct ParsingMark {
- size_t node_offset:24;
- size_t node_num:8; // Number of nodes with this spelling id given
- // by spl_id. If spl_id is a Shengmu, for nodes
- // in the first layer of DictTrie, it equals to
- // SpellingTrie::shm2full_num(); but for those
- // nodes which are not in the first layer,
- // node_num < SpellingTrie::shm2full_num().
- // For a full spelling id, node_num = 1;
- };
-
- // Used to indicate an extended mile stone.
- // An extended mile stone is used to mark a partial match in the dictionary
- // trie to speed up further potential extending.
- // For example, when the user inputs "w", a mile stone is created to mark the
- // partial match status, so that when user inputs another char 'm', it will be
- // faster to extend search space based on this mile stone.
- //
- // For partial match status of "wm", there can be more than one sub mile
- // stone, for example, "wm" can be matched to "wanm", "wom", ..., etc, so
- // there may be more one parsing mark used to mark these partial matchings.
- // A mile stone records the starting position in the mark list and number of
- // marks.
- struct MileStone {
- uint16 mark_start;
- uint16 mark_num;
- };
-
- DictList* dict_list_;
-
- const SpellingTrie *spl_trie_;
-
- LmaNodeLE0* root_; // Nodes for root and the first layer.
- LmaNodeGE1* nodes_ge1_; // Nodes for other layers.
-
- // An quick index from spelling id to the LmaNodeLE0 node buffer, or
- // to the root_ buffer.
- // Index length:
- // SpellingTrie::get_instance().get_spelling_num() + 1. The last one is used
- // to get the end.
- // All Shengmu ids are not indexed because they will be converted into
- // corresponding full ids.
- // So, given an id splid, the son is:
- // root_[splid_le0_index_[splid - kFullSplIdStart]]
- uint16 *splid_le0_index_;
-
- uint32 lma_node_num_le0_;
- uint32 lma_node_num_ge1_;
-
- // The first part is for homophnies, and the last top_lma_num_ items are
- // lemmas with highest scores.
- unsigned char *lma_idx_buf_;
- uint32 lma_idx_buf_len_; // The total size of lma_idx_buf_ in byte.
- uint32 total_lma_num_; // Total number of lemmas in this dictionary.
- uint32 top_lmas_num_; // Number of lemma with highest scores.
-
- // Parsing mark list used to mark the detailed extended statuses.
- ParsingMark *parsing_marks_;
- // The position for next available mark.
- uint16 parsing_marks_pos_;
-
- // Mile stone list used to mark the extended status.
- MileStone *mile_stones_;
- // The position for the next available mile stone. We use positions (except 0)
- // as handles.
- MileStoneHandle mile_stones_pos_;
-
- // Get the offset of sons for a node.
- inline size_t get_son_offset(const LmaNodeGE1 *node);
-
- // Get the offset of homonious ids for a node.
- inline size_t get_homo_idx_buf_offset(const LmaNodeGE1 *node);
-
- // Get the lemma id by the offset.
- inline LemmaIdType get_lemma_id(size_t id_offset);
-
- void free_resource(bool free_dict_list);
-
- bool load_dict(QFile *fp);
-
- // Given a LmaNodeLE0 node, extract the lemmas specified by it, and fill
- // them into the lpi_items buffer.
- // This function is called by the search engine.
- size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size,
- LmaNodeLE0 *node);
-
- // Given a LmaNodeGE1 node, extract the lemmas specified by it, and fill
- // them into the lpi_items buffer.
- // This function is called by inner functions extend_dict0(), extend_dict1()
- // and extend_dict2().
- size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size,
- size_t homo_buf_off, LmaNodeGE1 *node,
- uint16 lma_len);
-
- // Extend in the trie from level 0.
- MileStoneHandle extend_dict0(MileStoneHandle from_handle,
- const DictExtPara *dep, LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num);
-
- // Extend in the trie from level 1.
- MileStoneHandle extend_dict1(MileStoneHandle from_handle,
- const DictExtPara *dep, LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num);
-
- // Extend in the trie from level 2.
- MileStoneHandle extend_dict2(MileStoneHandle from_handle,
- const DictExtPara *dep, LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num);
-
- // Try to extend the given spelling id buffer, and if the given id_lemma can
- // be successfully gotten, return true;
- // The given spelling ids are all valid full ids.
- bool try_extend(const uint16 *splids, uint16 splid_num, LemmaIdType id_lemma);
-
-#ifdef ___BUILD_MODEL___
- bool save_dict(FILE *fp);
-#endif // ___BUILD_MODEL___
-
- static const int kMaxMileStone = 100;
- static const int kMaxParsingMark = 600;
- static const MileStoneHandle kFirstValidMileStoneHandle = 1;
-
- friend class DictParser;
- friend class DictBuilder;
-
- public:
-
- DictTrie();
- ~DictTrie();
-
-#ifdef ___BUILD_MODEL___
- // Construct the tree from the file fn_raw.
- // fn_validhzs provide the valid hanzi list. If fn_validhzs is
- // NULL, only chars in GB2312 will be included.
- bool build_dict(const char *fn_raw, const char *fn_validhzs);
-
- // Save the binary dictionary
- // Actually, the SpellingTrie/DictList instance will be also saved.
- bool save_dict(const char *filename);
-#endif // ___BUILD_MODEL___
-
- void convert_to_hanzis(char16 *str, uint16 str_len);
-
- void convert_to_scis_ids(char16 *str, uint16 str_len);
-
- // Load a binary dictionary
- // The SpellingTrie instance/DictList will be also loaded
- bool load_dict(const char *filename, LemmaIdType start_id,
- LemmaIdType end_id);
- bool load_dict_fd(int sys_fd, long start_offset, long length,
- LemmaIdType start_id, LemmaIdType end_id);
- bool close_dict() {return true;}
- size_t number_of_lemmas() {return 0;}
-
- void reset_milestones(uint16 from_step, MileStoneHandle from_handle);
-
- MileStoneHandle extend_dict(MileStoneHandle from_handle,
- const DictExtPara *dep,
- LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num);
-
- size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
- LmaPsbItem *lpi_items, size_t lpi_max);
-
- uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
-
- uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
- uint16 splids_max, bool arg_valid);
-
- size_t predict(const char16 *last_hzs, uint16 hzs_len,
- NPredictItem *npre_items, size_t npre_max,
- size_t b4_used);
-
- LemmaIdType put_lemma(char16 /*lemma_str*/[], uint16 /*splids*/[],
- uint16 /*lemma_len*/, uint16 /*count*/) {return 0;}
-
- LemmaIdType update_lemma(LemmaIdType /*lemma_id*/, int16 /*delta_count*/,
- bool /*selected*/) {return 0;}
-
- LemmaIdType get_lemma_id(char16 /*lemma_str*/[], uint16 /*splids*/[],
- uint16 /*lemma_len*/) {return 0;}
-
- LmaScoreType get_lemma_score(LemmaIdType /*lemma_id*/) {return 0;}
-
- LmaScoreType get_lemma_score(char16 /*lemma_str*/[], uint16 /*splids*/[],
- uint16 /*lemma_len*/) {return 0;}
-
- bool remove_lemma(LemmaIdType /*lemma_id*/) {return false;}
-
- size_t get_total_lemma_count() {return 0;}
- void set_total_lemma_count_of_others(size_t count);
-
- void flush_cache() {}
-
- LemmaIdType get_lemma_id(const char16 lemma_str[], uint16 lemma_len);
-
- // Fill the lemmas with highest scores to the prediction buffer.
- // his_len is the history length to fill in the prediction buffer.
- size_t predict_top_lmas(size_t his_len, NPredictItem *npre_items,
- size_t npre_max, size_t b4_used);
-};
-}
-
-#endif // PINYINIME_INCLUDE_DICTTRIE_H__