diff options
Diffstat (limited to 'src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h')
-rw-r--r-- | src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h | 234 |
1 files changed, 0 insertions, 234 deletions
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h b/src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h deleted file mode 100644 index 86a8ee25..00000000 --- a/src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Copyright (C) 2009 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PINYINIME_INCLUDE_DICTTRIE_H__ -#define PINYINIME_INCLUDE_DICTTRIE_H__ - -#include <stdlib.h> -#include "./atomdictbase.h" -#include "./dictdef.h" -#include "./dictlist.h" -#include "./searchutility.h" -#include <QFile> - -namespace ime_pinyin { - -class DictTrie : AtomDictBase { - private: - struct ParsingMark { - size_t node_offset:24; - size_t node_num:8; // Number of nodes with this spelling id given - // by spl_id. If spl_id is a Shengmu, for nodes - // in the first layer of DictTrie, it equals to - // SpellingTrie::shm2full_num(); but for those - // nodes which are not in the first layer, - // node_num < SpellingTrie::shm2full_num(). - // For a full spelling id, node_num = 1; - }; - - // Used to indicate an extended mile stone. - // An extended mile stone is used to mark a partial match in the dictionary - // trie to speed up further potential extending. - // For example, when the user inputs "w", a mile stone is created to mark the - // partial match status, so that when user inputs another char 'm', it will be - // faster to extend search space based on this mile stone. - // - // For partial match status of "wm", there can be more than one sub mile - // stone, for example, "wm" can be matched to "wanm", "wom", ..., etc, so - // there may be more one parsing mark used to mark these partial matchings. - // A mile stone records the starting position in the mark list and number of - // marks. - struct MileStone { - uint16 mark_start; - uint16 mark_num; - }; - - DictList* dict_list_; - - const SpellingTrie *spl_trie_; - - LmaNodeLE0* root_; // Nodes for root and the first layer. - LmaNodeGE1* nodes_ge1_; // Nodes for other layers. - - // An quick index from spelling id to the LmaNodeLE0 node buffer, or - // to the root_ buffer. - // Index length: - // SpellingTrie::get_instance().get_spelling_num() + 1. The last one is used - // to get the end. - // All Shengmu ids are not indexed because they will be converted into - // corresponding full ids. - // So, given an id splid, the son is: - // root_[splid_le0_index_[splid - kFullSplIdStart]] - uint16 *splid_le0_index_; - - uint32 lma_node_num_le0_; - uint32 lma_node_num_ge1_; - - // The first part is for homophnies, and the last top_lma_num_ items are - // lemmas with highest scores. - unsigned char *lma_idx_buf_; - uint32 lma_idx_buf_len_; // The total size of lma_idx_buf_ in byte. - uint32 total_lma_num_; // Total number of lemmas in this dictionary. - uint32 top_lmas_num_; // Number of lemma with highest scores. - - // Parsing mark list used to mark the detailed extended statuses. - ParsingMark *parsing_marks_; - // The position for next available mark. - uint16 parsing_marks_pos_; - - // Mile stone list used to mark the extended status. - MileStone *mile_stones_; - // The position for the next available mile stone. We use positions (except 0) - // as handles. - MileStoneHandle mile_stones_pos_; - - // Get the offset of sons for a node. - inline size_t get_son_offset(const LmaNodeGE1 *node); - - // Get the offset of homonious ids for a node. - inline size_t get_homo_idx_buf_offset(const LmaNodeGE1 *node); - - // Get the lemma id by the offset. - inline LemmaIdType get_lemma_id(size_t id_offset); - - void free_resource(bool free_dict_list); - - bool load_dict(QFile *fp); - - // Given a LmaNodeLE0 node, extract the lemmas specified by it, and fill - // them into the lpi_items buffer. - // This function is called by the search engine. - size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size, - LmaNodeLE0 *node); - - // Given a LmaNodeGE1 node, extract the lemmas specified by it, and fill - // them into the lpi_items buffer. - // This function is called by inner functions extend_dict0(), extend_dict1() - // and extend_dict2(). - size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size, - size_t homo_buf_off, LmaNodeGE1 *node, - uint16 lma_len); - - // Extend in the trie from level 0. - MileStoneHandle extend_dict0(MileStoneHandle from_handle, - const DictExtPara *dep, LmaPsbItem *lpi_items, - size_t lpi_max, size_t *lpi_num); - - // Extend in the trie from level 1. - MileStoneHandle extend_dict1(MileStoneHandle from_handle, - const DictExtPara *dep, LmaPsbItem *lpi_items, - size_t lpi_max, size_t *lpi_num); - - // Extend in the trie from level 2. - MileStoneHandle extend_dict2(MileStoneHandle from_handle, - const DictExtPara *dep, LmaPsbItem *lpi_items, - size_t lpi_max, size_t *lpi_num); - - // Try to extend the given spelling id buffer, and if the given id_lemma can - // be successfully gotten, return true; - // The given spelling ids are all valid full ids. - bool try_extend(const uint16 *splids, uint16 splid_num, LemmaIdType id_lemma); - -#ifdef ___BUILD_MODEL___ - bool save_dict(FILE *fp); -#endif // ___BUILD_MODEL___ - - static const int kMaxMileStone = 100; - static const int kMaxParsingMark = 600; - static const MileStoneHandle kFirstValidMileStoneHandle = 1; - - friend class DictParser; - friend class DictBuilder; - - public: - - DictTrie(); - ~DictTrie(); - -#ifdef ___BUILD_MODEL___ - // Construct the tree from the file fn_raw. - // fn_validhzs provide the valid hanzi list. If fn_validhzs is - // NULL, only chars in GB2312 will be included. - bool build_dict(const char *fn_raw, const char *fn_validhzs); - - // Save the binary dictionary - // Actually, the SpellingTrie/DictList instance will be also saved. - bool save_dict(const char *filename); -#endif // ___BUILD_MODEL___ - - void convert_to_hanzis(char16 *str, uint16 str_len); - - void convert_to_scis_ids(char16 *str, uint16 str_len); - - // Load a binary dictionary - // The SpellingTrie instance/DictList will be also loaded - bool load_dict(const char *filename, LemmaIdType start_id, - LemmaIdType end_id); - bool load_dict_fd(int sys_fd, long start_offset, long length, - LemmaIdType start_id, LemmaIdType end_id); - bool close_dict() {return true;} - size_t number_of_lemmas() {return 0;} - - void reset_milestones(uint16 from_step, MileStoneHandle from_handle); - - MileStoneHandle extend_dict(MileStoneHandle from_handle, - const DictExtPara *dep, - LmaPsbItem *lpi_items, - size_t lpi_max, size_t *lpi_num); - - size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len, - LmaPsbItem *lpi_items, size_t lpi_max); - - uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max); - - uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids, - uint16 splids_max, bool arg_valid); - - size_t predict(const char16 *last_hzs, uint16 hzs_len, - NPredictItem *npre_items, size_t npre_max, - size_t b4_used); - - LemmaIdType put_lemma(char16 /*lemma_str*/[], uint16 /*splids*/[], - uint16 /*lemma_len*/, uint16 /*count*/) {return 0;} - - LemmaIdType update_lemma(LemmaIdType /*lemma_id*/, int16 /*delta_count*/, - bool /*selected*/) {return 0;} - - LemmaIdType get_lemma_id(char16 /*lemma_str*/[], uint16 /*splids*/[], - uint16 /*lemma_len*/) {return 0;} - - LmaScoreType get_lemma_score(LemmaIdType /*lemma_id*/) {return 0;} - - LmaScoreType get_lemma_score(char16 /*lemma_str*/[], uint16 /*splids*/[], - uint16 /*lemma_len*/) {return 0;} - - bool remove_lemma(LemmaIdType /*lemma_id*/) {return false;} - - size_t get_total_lemma_count() {return 0;} - void set_total_lemma_count_of_others(size_t count); - - void flush_cache() {} - - LemmaIdType get_lemma_id(const char16 lemma_str[], uint16 lemma_len); - - // Fill the lemmas with highest scores to the prediction buffer. - // his_len is the history length to fill in the prediction buffer. - size_t predict_top_lmas(size_t his_len, NPredictItem *npre_items, - size_t npre_max, size_t b4_used); -}; -} - -#endif // PINYINIME_INCLUDE_DICTTRIE_H__ |