aboutsummaryrefslogtreecommitdiffstats
path: root/src/virtualkeyboard/3rdparty/pinyin/include
diff options
context:
space:
mode:
Diffstat (limited to 'src/virtualkeyboard/3rdparty/pinyin/include')
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/atomdictbase.h269
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/dictbuilder.h171
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/dictdef.h157
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/dictlist.h120
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h234
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/lpicache.h62
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/matrixsearch.h460
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/mystdlib.h32
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/ngram.h97
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/pinyinime.h223
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/searchutility.h142
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/spellingtable.h111
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/spellingtrie.h259
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/splparser.h96
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/sync.h85
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/userdict.h434
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/utf16char.h56
-rw-r--r--src/virtualkeyboard/3rdparty/pinyin/include/utf16reader.h48
18 files changed, 0 insertions, 3056 deletions
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/atomdictbase.h b/src/virtualkeyboard/3rdparty/pinyin/include/atomdictbase.h
deleted file mode 100644
index 0a70a510..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/atomdictbase.h
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * This class defines AtomDictBase class which is the base class for all atom
- * dictionaries. Atom dictionaries are managed by the decoder class
- * MatrixSearch.
- *
- * When the user appends a new character to the Pinyin string, all enabled atom
- * dictionaries' extend_dict() will be called at least once to get candidates
- * ended in this step (the information of starting step is also given in the
- * parameter). Usually, when extend_dict() is called, a MileStoneHandle object
- * returned by a previous calling for a earlier step is given to speed up the
- * look-up process, and a new MileStoneHandle object will be returned if
- * the extension is successful.
- *
- * A returned MileStoneHandle object should keep alive until Function
- * reset_milestones() is called and this object is noticed to be reset.
- *
- * Usually, the atom dictionary can use step information to manage its
- * MileStoneHandle objects, or it can make the objects in ascendant order to
- * make the reset easier.
- *
- * When the decoder loads the dictionary, it will give a starting lemma id for
- * this atom dictionary to map a inner id to a global id. Global ids should be
- * used when an atom dictionary talks to any component outside.
- */
-#ifndef PINYINIME_INCLUDE_ATOMDICTBASE_H__
-#define PINYINIME_INCLUDE_ATOMDICTBASE_H__
-
-#include <stdlib.h>
-#include "./dictdef.h"
-#include "./searchutility.h"
-
-namespace ime_pinyin {
-class AtomDictBase {
- public:
- virtual ~AtomDictBase() {}
-
- /**
- * Load an atom dictionary from a file.
- *
- * @param file_name The file name to load dictionary.
- * @param start_id The starting id used for this atom dictionary.
- * @param end_id The end id (included) which can be used for this atom
- * dictionary. User dictionary will always use the last id space, so it can
- * ignore this paramter. All other atom dictionaries should check this
- * parameter.
- * @return True if succeed.
- */
- virtual bool load_dict(const char *file_name, LemmaIdType start_id,
- LemmaIdType end_id) = 0;
-
- /**
- * Close this atom dictionary.
- *
- * @return True if succeed.
- */
- virtual bool close_dict() = 0;
-
- /**
- * Get the total number of lemmas in this atom dictionary.
- *
- * @return The total number of lemmas.
- */
- virtual size_t number_of_lemmas() = 0;
-
- /**
- * This function is called by the decoder when user deletes a character from
- * the input string, or begins a new input string.
- *
- * Different atom dictionaries may implement this function in different way.
- * an atom dictionary can use one of these two parameters (or both) to reset
- * its corresponding MileStoneHandle objects according its detailed
- * implementation.
- *
- * For example, if an atom dictionary uses step information to manage its
- * MileStoneHandle objects, parameter from_step can be used to identify which
- * objects should be reset; otherwise, if another atom dictionary does not
- * use the detailed step information, it only uses ascendant handles
- * (according to step. For the same step, earlier call, smaller handle), it
- * can easily reset those MileStoneHandle which are larger than from_handle.
- *
- * The decoder always reset the decoding state by step. So when it begins
- * resetting, it will call reset_milestones() of its atom dictionaries with
- * the step information, and the MileStoneHandle objects returned by the
- * earliest calling of extend_dict() for that step.
- *
- * If an atom dictionary does not implement incremental search, this function
- * can be totally ignored.
- *
- * @param from_step From which step(included) the MileStoneHandle
- * objects should be reset.
- * @param from_handle The ealiest MileStoneHandle object for step from_step
- */
- virtual void reset_milestones(uint16 from_step,
- MileStoneHandle from_handle) = 0;
-
- /**
- * Used to extend in this dictionary. The handle returned should keep valid
- * until reset_milestones() is called.
- *
- * @param from_handle Its previous returned extended handle without the new
- * spelling id, it can be used to speed up the extending.
- * @param dep The paramter used for extending.
- * @param lpi_items Used to fill in the lemmas matched.
- * @param lpi_max The length of the buffer
- * @param lpi_num Used to return the newly added items.
- * @return The new mile stone for this extending. 0 if fail.
- */
- virtual MileStoneHandle extend_dict(MileStoneHandle from_handle,
- const DictExtPara *dep,
- LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num) = 0;
-
- /**
- * Get lemma items with scores according to a spelling id stream.
- * This atom dictionary does not need to sort the returned items.
- *
- * @param splid_str The spelling id stream buffer.
- * @param splid_str_len The length of the spelling id stream buffer.
- * @param lpi_items Used to return matched lemma items with scores.
- * @param lpi_max The maximum size of the buffer to return result.
- * @return The number of matched items which have been filled in to lpi_items.
- */
- virtual size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
- LmaPsbItem *lpi_items, size_t lpi_max) = 0;
-
- /**
- * Get a lemma string (The Chinese string) by the given lemma id.
- *
- * @param id_lemma The lemma id to get the string.
- * @param str_buf The buffer to return the Chinese string.
- * @param str_max The maximum size of the buffer.
- * @return The length of the string, 0 if fail.
- */
- virtual uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf,
- uint16 str_max) = 0;
-
- /**
- * Get the full spelling ids for the given lemma id.
- * If the given buffer is too short, return 0.
- *
- * @param splids Used to return the spelling ids.
- * @param splids_max The maximum buffer length of splids.
- * @param arg_valid Used to indicate if the incoming parameters have been
- * initialized are valid. If it is true, the splids and splids_max are valid
- * and there may be half ids in splids to be updated to full ids. In this
- * case, splids_max is the number of valid ids in splids.
- * @return The number of ids in the buffer.
- */
- virtual uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
- uint16 splids_max, bool arg_valid) = 0;
-
- /**
- * Function used for prediction.
- * No need to sort the newly added items.
- *
- * @param last_hzs The last n Chinese chracters(called Hanzi), its length
- * should be less than or equal to kMaxPredictSize.
- * @param hzs_len specifies the length(<= kMaxPredictSize) of the history.
- * @param npre_items Used used to return the result.
- * @param npre_max The length of the buffer to return result
- * @param b4_used Number of prediction result (from npre_items[-b4_used])
- * from other atom dictionaries. A atom ditionary can just ignore it.
- * @return The number of prediction result from this atom dictionary.
- */
- virtual size_t predict(const char16 last_hzs[], uint16 hzs_len,
- NPredictItem *npre_items, size_t npre_max,
- size_t b4_used) = 0;
-
- /**
- * Add a lemma to the dictionary. If the dictionary allows to add new
- * items and this item does not exist, add it.
- *
- * @param lemma_str The Chinese string of the lemma.
- * @param splids The spelling ids of the lemma.
- * @param lemma_len The length of the Chinese lemma.
- * @param count The frequency count for this lemma.
- */
- virtual LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[],
- uint16 lemma_len, uint16 count) = 0;
-
- /**
- * Update a lemma's occuring count.
- *
- * @param lemma_id The lemma id to update.
- * @param delta_count The frequnecy count to ajust.
- * @param selected Indicate whether this lemma is selected by user and
- * submitted to target edit box.
- * @return The id if succeed, 0 if fail.
- */
- virtual LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count,
- bool selected) = 0;
-
- /**
- * Get the lemma id for the given lemma.
- *
- * @param lemma_str The Chinese string of the lemma.
- * @param splids The spelling ids of the lemma.
- * @param lemma_len The length of the lemma.
- * @return The matched lemma id, or 0 if fail.
- */
- virtual LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[],
- uint16 lemma_len) = 0;
-
- /**
- * Get the lemma score.
- *
- * @param lemma_id The lemma id to get score.
- * @return The score of the lemma, or 0 if fail.
- */
- virtual LmaScoreType get_lemma_score(LemmaIdType lemma_id) = 0;
-
- /**
- * Get the lemma score.
- *
- * @param lemma_str The Chinese string of the lemma.
- * @param splids The spelling ids of the lemma.
- * @param lemma_len The length of the lemma.
- * @return The score of the lamm, or 0 if fail.
- */
- virtual LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[],
- uint16 lemma_len) = 0;
-
- /**
- * If the dictionary allowed, remove a lemma from it.
- *
- * @param lemma_id The id of the lemma to remove.
- * @return True if succeed.
- */
- virtual bool remove_lemma(LemmaIdType lemma_id) = 0;
-
- /**
- * Get the total occuring count of this atom dictionary.
- *
- * @return The total occuring count of this atom dictionary.
- */
- virtual size_t get_total_lemma_count() = 0;
-
- /**
- * Set the total occuring count of other atom dictionaries.
- *
- * @param count The total occuring count of other atom dictionaies.
- */
- virtual void set_total_lemma_count_of_others(size_t count) = 0;
-
- /**
- * Notify this atom dictionary to flush the cached data to persistent storage
- * if necessary.
- */
- virtual void flush_cache() = 0;
-};
-}
-
-#endif // PINYINIME_INCLUDE_ATOMDICTBASE_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/dictbuilder.h b/src/virtualkeyboard/3rdparty/pinyin/include/dictbuilder.h
deleted file mode 100644
index da0d6cd3..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/dictbuilder.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_DICTBUILDER_H__
-#define PINYINIME_INCLUDE_DICTBUILDER_H__
-
-#include <stdlib.h>
-#include "./utf16char.h"
-#include "./dictdef.h"
-#include "./dictlist.h"
-#include "./spellingtable.h"
-#include "./spellingtrie.h"
-#include "./splparser.h"
-
-namespace ime_pinyin {
-
-#ifdef ___BUILD_MODEL___
-
-#define ___DO_STATISTICS___
-
-class DictTrie;
-
-class DictBuilder {
- private:
- // The raw lemma array buffer.
- LemmaEntry *lemma_arr_;
- size_t lemma_num_;
-
- // Used to store all possible single char items.
- // Two items may have the same Hanzi while their spelling ids are different.
- SingleCharItem *scis_;
- size_t scis_num_;
-
- // In the tree, root's level is -1.
- // Lemma nodes for root, and level 0
- LmaNodeLE0 *lma_nodes_le0_;
-
- // Lemma nodes for layers whose levels are deeper than 0
- LmaNodeGE1 *lma_nodes_ge1_;
-
- // Number of used lemma nodes
- size_t lma_nds_used_num_le0_;
- size_t lma_nds_used_num_ge1_;
-
- // Used to store homophonies' ids.
- LemmaIdType *homo_idx_buf_;
- // Number of homophonies each of which only contains one Chinese character.
- size_t homo_idx_num_eq1_;
- // Number of homophonies each of which contains more than one character.
- size_t homo_idx_num_gt1_;
-
- // The items with highest scores.
- LemmaEntry *top_lmas_;
- size_t top_lmas_num_;
-
- SpellingTable *spl_table_;
- SpellingParser *spl_parser_;
-
-#ifdef ___DO_STATISTICS___
- size_t max_sonbuf_len_[kMaxLemmaSize];
- size_t max_homobuf_len_[kMaxLemmaSize];
-
- size_t total_son_num_[kMaxLemmaSize];
- size_t total_node_hasson_[kMaxLemmaSize];
- size_t total_sonbuf_num_[kMaxLemmaSize];
- size_t total_sonbuf_allnoson_[kMaxLemmaSize];
- size_t total_node_in_sonbuf_allnoson_[kMaxLemmaSize];
- size_t total_homo_num_[kMaxLemmaSize];
-
- size_t sonbufs_num1_; // Number of son buffer with only 1 son
- size_t sonbufs_numgt1_; // Number of son buffer with more 1 son;
-
- size_t total_lma_node_num_;
-
- void stat_init();
- void stat_print();
-#endif
-
- public:
-
- DictBuilder();
- ~DictBuilder();
-
- // Build dictionary trie from the file fn_raw. File fn_validhzs provides
- // valid chars. If fn_validhzs is NULL, only chars in GB2312 will be
- // included.
- bool build_dict(const char* fn_raw, const char* fn_validhzs,
- DictTrie *dict_trie);
-
- private:
- // Fill in the buffer with id. The caller guarantees that the paramters are
- // vaild.
- void id_to_charbuf(unsigned char *buf, LemmaIdType id);
-
- // Update the offset of sons for a node.
- void set_son_offset(LmaNodeGE1 *node, size_t offset);
-
- // Update the offset of homophonies' ids for a node.
- void set_homo_id_buf_offset(LmaNodeGE1 *node, size_t offset);
-
- // Format a speling string.
- void format_spelling_str(char *spl_str);
-
- // Sort the lemma_arr by the hanzi string, and give each of unique items
- // a id. Why we need to sort the lemma list according to their Hanzi string
- // is to find items started by a given prefix string to do prediction.
- // Actually, the single char items are be in other order, for example,
- // in spelling id order, etc.
- // Return value is next un-allocated idx available.
- LemmaIdType sort_lemmas_by_hz();
-
- // Build the SingleCharItem list, and fill the hanzi_scis_ids in the
- // lemma buffer lemma_arr_.
- // This function should be called after the lemma array is ready.
- // Return the number of unique SingleCharItem elements.
- size_t build_scis();
-
- // Construct a subtree using a subset of the spelling array (from
- // item_star to item_end)
- // parent is the parent node to update the necessary information
- // parent can be a member of LmaNodeLE0 or LmaNodeGE1
- bool construct_subset(void* parent, LemmaEntry* lemma_arr,
- size_t item_start, size_t item_end, size_t level);
-
-
- // Read valid Chinese Hanzis from the given file.
- // num is used to return number of chars.
- // The return buffer is sorted and caller needs to free the returned buffer.
- char16* read_valid_hanzis(const char *fn_validhzs, size_t *num);
-
-
- // Read a raw dictionary. max_item is the maximum number of items. If there
- // are more items in the ditionary, only the first max_item will be read.
- // Returned value is the number of items successfully read from the file.
- size_t read_raw_dict(const char* fn_raw, const char *fn_validhzs,
- size_t max_item);
-
- // Try to find if a character is in hzs buffer.
- bool hz_in_hanzis_list(const char16 *hzs, size_t hzs_len, char16 hz);
-
- // Try to find if all characters in str are in hzs buffer.
- bool str_in_hanzis_list(const char16 *hzs, size_t hzs_len,
- const char16 *str, size_t str_len);
-
- // Get these lemmas with toppest scores.
- void get_top_lemmas();
-
- // Allocate resource to build dictionary.
- // lma_num is the number of items to be loaded
- bool alloc_resource(size_t lma_num);
-
- // Free resource.
- void free_resource();
-};
-#endif // ___BUILD_MODEL___
-}
-
-#endif // PINYINIME_INCLUDE_DICTBUILDER_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/dictdef.h b/src/virtualkeyboard/3rdparty/pinyin/include/dictdef.h
deleted file mode 100644
index 5e1d7818..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/dictdef.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_DICTDEF_H__
-#define PINYINIME_INCLUDE_DICTDEF_H__
-
-#include <stdlib.h>
-#include "./utf16char.h"
-
-namespace ime_pinyin {
-
-// Enable the following line when building the binary dictionary model.
-// #define ___BUILD_MODEL___
-
-typedef unsigned char uint8;
-typedef unsigned short uint16;
-typedef unsigned int uint32;
-
-typedef signed char int8;
-typedef short int16;
-typedef int int32;
-typedef long long int64;
-typedef unsigned long long uint64;
-
-const bool kPrintDebug0 = false;
-const bool kPrintDebug1 = false;
-const bool kPrintDebug2 = false;
-
-// The max length of a lemma.
-const size_t kMaxLemmaSize = 8;
-
-// The max length of a Pinyin (spelling).
-const size_t kMaxPinyinSize = 6;
-
-// The number of half spelling ids. For Chinese Pinyin, there 30 half ids.
-// See SpellingTrie.h for details.
-const size_t kHalfSpellingIdNum = 29;
-
-// The maximum number of full spellings. For Chinese Pinyin, there are only
-// about 410 spellings.
-// If change this value is bigger(needs more bits), please also update
-// other structures like SpellingNode, to make sure than a spelling id can be
-// stored.
-// -1 is because that 0 is never used.
-const size_t kMaxSpellingNum = 512 - kHalfSpellingIdNum - 1;
-const size_t kMaxSearchSteps = 40;
-
-// One character predicts its following characters.
-const size_t kMaxPredictSize = (kMaxLemmaSize - 1);
-
-// LemmaIdType must always be size_t.
-typedef size_t LemmaIdType;
-const size_t kLemmaIdSize = 3; // Actually, a Id occupies 3 bytes in storage.
-const size_t kLemmaIdComposing = 0xffffff;
-
-typedef uint16 LmaScoreType;
-typedef uint16 KeyScoreType;
-
-// Number of items with highest score are kept for prediction purpose.
-const size_t kTopScoreLemmaNum = 10;
-
-const size_t kMaxPredictNumByGt3 = 1;
-const size_t kMaxPredictNumBy3 = 2;
-const size_t kMaxPredictNumBy2 = 2;
-
-// The last lemma id (included) for the system dictionary. The system
-// dictionary's ids always start from 1.
-const LemmaIdType kSysDictIdEnd = 500000;
-
-// The first lemma id for the user dictionary.
-const LemmaIdType kUserDictIdStart = 500001;
-
-// The last lemma id (included) for the user dictionary.
-const LemmaIdType kUserDictIdEnd = 600000;
-
-typedef struct {
- uint16 half_splid:5;
- uint16 full_splid:11;
-} SpellingId, *PSpellingId;
-
-
-/**
- * We use different node types for different layers
- * Statistical data of the building result for a testing dictionary:
- * root, level 0, level 1, level 2, level 3
- * max son num of one node: 406 280 41 2 -
- * max homo num of one node: 0 90 23 2 2
- * total node num of a layer: 1 406 31766 13516 993
- * total homo num of a layer: 9 5674 44609 12667 995
- *
- * The node number for root and level 0 won't be larger than 500
- * According to the information above, two kinds of nodes can be used; one for
- * root and level 0, the other for these layers deeper than 0.
- *
- * LE = less and equal,
- * A node occupies 16 bytes. so, totallly less than 16 * 500 = 8K
- */
-struct LmaNodeLE0 {
- uint32 son_1st_off;
- uint32 homo_idx_buf_off;
- uint16 spl_idx;
- uint16 num_of_son;
- uint16 num_of_homo;
-};
-
-/**
- * GE = great and equal
- * A node occupies 8 bytes.
- */
-struct LmaNodeGE1 {
- uint16 son_1st_off_l; // Low bits of the son_1st_off
- uint16 homo_idx_buf_off_l; // Low bits of the homo_idx_buf_off_1
- uint16 spl_idx;
- unsigned char num_of_son; // number of son nodes
- unsigned char num_of_homo; // number of homo words
- unsigned char son_1st_off_h; // high bits of the son_1st_off
- unsigned char homo_idx_buf_off_h; // high bits of the homo_idx_buf_off
-};
-
-#ifdef ___BUILD_MODEL___
-struct SingleCharItem {
- float freq;
- char16 hz;
- SpellingId splid;
-};
-
-struct LemmaEntry {
- LemmaIdType idx_by_py;
- LemmaIdType idx_by_hz;
- char16 hanzi_str[kMaxLemmaSize + 1];
-
- // The SingleCharItem id for each Hanzi.
- uint16 hanzi_scis_ids[kMaxLemmaSize];
-
- uint16 spl_idx_arr[kMaxLemmaSize + 1];
- char pinyin_str[kMaxLemmaSize][kMaxPinyinSize + 1];
- unsigned char hz_str_len;
- float freq;
-};
-#endif // ___BUILD_MODEL___
-
-} // namespace ime_pinyin
-
-#endif // PINYINIME_INCLUDE_DICTDEF_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/dictlist.h b/src/virtualkeyboard/3rdparty/pinyin/include/dictlist.h
deleted file mode 100644
index 1c1daef4..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/dictlist.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_DICTLIST_H__
-#define PINYINIME_INCLUDE_DICTLIST_H__
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "./dictdef.h"
-#include "./searchutility.h"
-#include "./spellingtrie.h"
-#include "./utf16char.h"
-
-namespace ime_pinyin {
-
-class DictList {
- private:
- bool initialized_;
-
- const SpellingTrie *spl_trie_;
-
- // Number of SingCharItem. The first is blank, because id 0 is invalid.
- uint32 scis_num_;
- char16 *scis_hz_;
- SpellingId *scis_splid_;
-
- // The large memory block to store the word list.
- char16 *buf_;
-
- // Starting position of those words whose lengths are i+1, counted in
- // char16
- uint32 start_pos_[kMaxLemmaSize + 1];
-
- uint32 start_id_[kMaxLemmaSize + 1];
-
- int (*cmp_func_[kMaxLemmaSize])(const void *, const void *);
-
- bool alloc_resource(size_t buf_size, size_t scim_num);
-
- void free_resource();
-
-#ifdef ___BUILD_MODEL___
- // Calculate the requsted memory, including the start_pos[] buffer.
- size_t calculate_size(const LemmaEntry *lemma_arr, size_t lemma_num);
-
- void fill_scis(const SingleCharItem *scis, size_t scis_num);
-
- // Copy the related content to the inner buffer
- // It should be called after calculate_size()
- void fill_list(const LemmaEntry *lemma_arr, size_t lemma_num);
-
- // Find the starting position for the buffer of those 2-character Chinese word
- // whose first character is the given Chinese character.
- char16* find_pos2_startedbyhz(char16 hz_char);
-#endif
-
- // Find the starting position for the buffer of those words whose lengths are
- // word_len. The given parameter cmp_func decides how many characters from
- // beginning will be used to compare.
- char16* find_pos_startedbyhzs(const char16 last_hzs[],
- size_t word_Len,
- int (*cmp_func)(const void *, const void *));
-
- public:
-
- DictList();
- ~DictList();
-
- bool save_list(FILE *fp);
- bool load_list(QFile *fp);
-
-#ifdef ___BUILD_MODEL___
- // Init the list from the LemmaEntry array.
- // lemma_arr should have been sorted by the hanzi_str, and have been given
- // ids from 1
- bool init_list(const SingleCharItem *scis, size_t scis_num,
- const LemmaEntry *lemma_arr, size_t lemma_num);
-#endif
-
- // Get the hanzi string for the given id
- uint16 get_lemma_str(LemmaIdType id_hz, char16 *str_buf, uint16 str_max);
-
- void convert_to_hanzis(char16 *str, uint16 str_len);
-
- void convert_to_scis_ids(char16 *str, uint16 str_len);
-
- // last_hzs stores the last n Chinese characters history, its length should be
- // less or equal than kMaxPredictSize.
- // hzs_len specifies the length(<= kMaxPredictSize).
- // predict_buf is used to store the result.
- // buf_len specifies the buffer length.
- // b4_used specifies how many items before predict_buf have been used.
- // Returned value is the number of newly added items.
- size_t predict(const char16 last_hzs[], uint16 hzs_len,
- NPredictItem *npre_items, size_t npre_max,
- size_t b4_used);
-
- // If half_splid is a valid half spelling id, return those full spelling
- // ids which share this half id.
- uint16 get_splids_for_hanzi(char16 hanzi, uint16 half_splid,
- uint16 *splids, uint16 max_splids);
-
- LemmaIdType get_lemma_id(const char16 *str, uint16 str_len);
-};
-}
-
-#endif // PINYINIME_INCLUDE_DICTLIST_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h b/src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h
deleted file mode 100644
index 86a8ee25..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/dicttrie.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_DICTTRIE_H__
-#define PINYINIME_INCLUDE_DICTTRIE_H__
-
-#include <stdlib.h>
-#include "./atomdictbase.h"
-#include "./dictdef.h"
-#include "./dictlist.h"
-#include "./searchutility.h"
-#include <QFile>
-
-namespace ime_pinyin {
-
-class DictTrie : AtomDictBase {
- private:
- struct ParsingMark {
- size_t node_offset:24;
- size_t node_num:8; // Number of nodes with this spelling id given
- // by spl_id. If spl_id is a Shengmu, for nodes
- // in the first layer of DictTrie, it equals to
- // SpellingTrie::shm2full_num(); but for those
- // nodes which are not in the first layer,
- // node_num < SpellingTrie::shm2full_num().
- // For a full spelling id, node_num = 1;
- };
-
- // Used to indicate an extended mile stone.
- // An extended mile stone is used to mark a partial match in the dictionary
- // trie to speed up further potential extending.
- // For example, when the user inputs "w", a mile stone is created to mark the
- // partial match status, so that when user inputs another char 'm', it will be
- // faster to extend search space based on this mile stone.
- //
- // For partial match status of "wm", there can be more than one sub mile
- // stone, for example, "wm" can be matched to "wanm", "wom", ..., etc, so
- // there may be more one parsing mark used to mark these partial matchings.
- // A mile stone records the starting position in the mark list and number of
- // marks.
- struct MileStone {
- uint16 mark_start;
- uint16 mark_num;
- };
-
- DictList* dict_list_;
-
- const SpellingTrie *spl_trie_;
-
- LmaNodeLE0* root_; // Nodes for root and the first layer.
- LmaNodeGE1* nodes_ge1_; // Nodes for other layers.
-
- // An quick index from spelling id to the LmaNodeLE0 node buffer, or
- // to the root_ buffer.
- // Index length:
- // SpellingTrie::get_instance().get_spelling_num() + 1. The last one is used
- // to get the end.
- // All Shengmu ids are not indexed because they will be converted into
- // corresponding full ids.
- // So, given an id splid, the son is:
- // root_[splid_le0_index_[splid - kFullSplIdStart]]
- uint16 *splid_le0_index_;
-
- uint32 lma_node_num_le0_;
- uint32 lma_node_num_ge1_;
-
- // The first part is for homophnies, and the last top_lma_num_ items are
- // lemmas with highest scores.
- unsigned char *lma_idx_buf_;
- uint32 lma_idx_buf_len_; // The total size of lma_idx_buf_ in byte.
- uint32 total_lma_num_; // Total number of lemmas in this dictionary.
- uint32 top_lmas_num_; // Number of lemma with highest scores.
-
- // Parsing mark list used to mark the detailed extended statuses.
- ParsingMark *parsing_marks_;
- // The position for next available mark.
- uint16 parsing_marks_pos_;
-
- // Mile stone list used to mark the extended status.
- MileStone *mile_stones_;
- // The position for the next available mile stone. We use positions (except 0)
- // as handles.
- MileStoneHandle mile_stones_pos_;
-
- // Get the offset of sons for a node.
- inline size_t get_son_offset(const LmaNodeGE1 *node);
-
- // Get the offset of homonious ids for a node.
- inline size_t get_homo_idx_buf_offset(const LmaNodeGE1 *node);
-
- // Get the lemma id by the offset.
- inline LemmaIdType get_lemma_id(size_t id_offset);
-
- void free_resource(bool free_dict_list);
-
- bool load_dict(QFile *fp);
-
- // Given a LmaNodeLE0 node, extract the lemmas specified by it, and fill
- // them into the lpi_items buffer.
- // This function is called by the search engine.
- size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size,
- LmaNodeLE0 *node);
-
- // Given a LmaNodeGE1 node, extract the lemmas specified by it, and fill
- // them into the lpi_items buffer.
- // This function is called by inner functions extend_dict0(), extend_dict1()
- // and extend_dict2().
- size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size,
- size_t homo_buf_off, LmaNodeGE1 *node,
- uint16 lma_len);
-
- // Extend in the trie from level 0.
- MileStoneHandle extend_dict0(MileStoneHandle from_handle,
- const DictExtPara *dep, LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num);
-
- // Extend in the trie from level 1.
- MileStoneHandle extend_dict1(MileStoneHandle from_handle,
- const DictExtPara *dep, LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num);
-
- // Extend in the trie from level 2.
- MileStoneHandle extend_dict2(MileStoneHandle from_handle,
- const DictExtPara *dep, LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num);
-
- // Try to extend the given spelling id buffer, and if the given id_lemma can
- // be successfully gotten, return true;
- // The given spelling ids are all valid full ids.
- bool try_extend(const uint16 *splids, uint16 splid_num, LemmaIdType id_lemma);
-
-#ifdef ___BUILD_MODEL___
- bool save_dict(FILE *fp);
-#endif // ___BUILD_MODEL___
-
- static const int kMaxMileStone = 100;
- static const int kMaxParsingMark = 600;
- static const MileStoneHandle kFirstValidMileStoneHandle = 1;
-
- friend class DictParser;
- friend class DictBuilder;
-
- public:
-
- DictTrie();
- ~DictTrie();
-
-#ifdef ___BUILD_MODEL___
- // Construct the tree from the file fn_raw.
- // fn_validhzs provide the valid hanzi list. If fn_validhzs is
- // NULL, only chars in GB2312 will be included.
- bool build_dict(const char *fn_raw, const char *fn_validhzs);
-
- // Save the binary dictionary
- // Actually, the SpellingTrie/DictList instance will be also saved.
- bool save_dict(const char *filename);
-#endif // ___BUILD_MODEL___
-
- void convert_to_hanzis(char16 *str, uint16 str_len);
-
- void convert_to_scis_ids(char16 *str, uint16 str_len);
-
- // Load a binary dictionary
- // The SpellingTrie instance/DictList will be also loaded
- bool load_dict(const char *filename, LemmaIdType start_id,
- LemmaIdType end_id);
- bool load_dict_fd(int sys_fd, long start_offset, long length,
- LemmaIdType start_id, LemmaIdType end_id);
- bool close_dict() {return true;}
- size_t number_of_lemmas() {return 0;}
-
- void reset_milestones(uint16 from_step, MileStoneHandle from_handle);
-
- MileStoneHandle extend_dict(MileStoneHandle from_handle,
- const DictExtPara *dep,
- LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num);
-
- size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
- LmaPsbItem *lpi_items, size_t lpi_max);
-
- uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
-
- uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
- uint16 splids_max, bool arg_valid);
-
- size_t predict(const char16 *last_hzs, uint16 hzs_len,
- NPredictItem *npre_items, size_t npre_max,
- size_t b4_used);
-
- LemmaIdType put_lemma(char16 /*lemma_str*/[], uint16 /*splids*/[],
- uint16 /*lemma_len*/, uint16 /*count*/) {return 0;}
-
- LemmaIdType update_lemma(LemmaIdType /*lemma_id*/, int16 /*delta_count*/,
- bool /*selected*/) {return 0;}
-
- LemmaIdType get_lemma_id(char16 /*lemma_str*/[], uint16 /*splids*/[],
- uint16 /*lemma_len*/) {return 0;}
-
- LmaScoreType get_lemma_score(LemmaIdType /*lemma_id*/) {return 0;}
-
- LmaScoreType get_lemma_score(char16 /*lemma_str*/[], uint16 /*splids*/[],
- uint16 /*lemma_len*/) {return 0;}
-
- bool remove_lemma(LemmaIdType /*lemma_id*/) {return false;}
-
- size_t get_total_lemma_count() {return 0;}
- void set_total_lemma_count_of_others(size_t count);
-
- void flush_cache() {}
-
- LemmaIdType get_lemma_id(const char16 lemma_str[], uint16 lemma_len);
-
- // Fill the lemmas with highest scores to the prediction buffer.
- // his_len is the history length to fill in the prediction buffer.
- size_t predict_top_lmas(size_t his_len, NPredictItem *npre_items,
- size_t npre_max, size_t b4_used);
-};
-}
-
-#endif // PINYINIME_INCLUDE_DICTTRIE_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/lpicache.h b/src/virtualkeyboard/3rdparty/pinyin/include/lpicache.h
deleted file mode 100644
index 60735971..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/lpicache.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_ANDPY_INCLUDE_LPICACHE_H__
-#define PINYINIME_ANDPY_INCLUDE_LPICACHE_H__
-
-#include <stdlib.h>
-#include "./searchutility.h"
-#include "./spellingtrie.h"
-
-namespace ime_pinyin {
-
-// Used to cache LmaPsbItem list for half spelling ids.
-class LpiCache {
- private:
- static LpiCache *instance_;
- static const int kMaxLpiCachePerId = 15;
-
- LmaPsbItem *lpi_cache_;
- uint16 *lpi_cache_len_;
-
- public:
- LpiCache();
- ~LpiCache();
-
- static LpiCache& get_instance();
-
- // Test if the LPI list of the given splid has been cached.
- // If splid is a full spelling id, it returns false, because we only cache
- // list for half ids.
- bool is_cached(uint16 splid);
-
- // Put LPI list to cahce. If the length of the list, lpi_num, is longer than
- // the cache buffer. the list will be truncated, and function returns the
- // maximum length of the cache buffer.
- // Note: splid must be a half id, and lpi_items must be not NULL. The
- // caller of this function should guarantee this.
- size_t put_cache(uint16 splid, LmaPsbItem lpi_items[], size_t lpi_num);
-
- // Get the cached list for the given half id.
- // Return the length of the cached buffer.
- // Note: splid must be a half id, and lpi_items must be not NULL. The
- // caller of this function should guarantee this.
- size_t get_cache(uint16 splid, LmaPsbItem lpi_items[], size_t lpi_max);
-};
-
-} // namespace
-
-#endif // PINYINIME_ANDPY_INCLUDE_LPICACHE_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/matrixsearch.h b/src/virtualkeyboard/3rdparty/pinyin/include/matrixsearch.h
deleted file mode 100644
index 61e78aa6..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/matrixsearch.h
+++ /dev/null
@@ -1,460 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
-#define PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
-
-#include <stdlib.h>
-#include "./atomdictbase.h"
-#include "./dicttrie.h"
-#include "./searchutility.h"
-#include "./spellingtrie.h"
-#include "./splparser.h"
-
-namespace ime_pinyin {
-
-static const size_t kMaxRowNum = kMaxSearchSteps;
-
-typedef struct {
- // MileStoneHandle objects for the system and user dictionaries.
- MileStoneHandle dict_handles[2];
- // From which DMI node. -1 means it's from root.
- PoolPosType dmi_fr;
- // The spelling id for the Pinyin string from the previous DMI to this node.
- // If it is a half id like Shengmu, the node pointed by dict_node is the first
- // node with this Shengmu,
- uint16 spl_id;
- // What's the level of the dict node. Level of root is 0, but root is never
- // recorded by dict_node.
- unsigned char dict_level:7;
- // If this node is for composing phrase, this bit is 1.
- unsigned char c_phrase:1;
- // Whether the spl_id is parsed with a split character at the end.
- unsigned char splid_end_split:1;
- // What's the length of the spelling string for this match, for the whole
- // word.
- unsigned char splstr_len:7;
- // Used to indicate whether all spelling ids from the root are full spelling
- // ids. This information is useful for keymapping mode(not finished). Because
- // in this mode, there is no clear boundaries, we prefer those results which
- // have full spelling ids.
- unsigned char all_full_id:1;
-} DictMatchInfo, *PDictMatchInfo;
-
-typedef struct MatrixNode {
- LemmaIdType id;
- float score;
- MatrixNode *from;
- // From which DMI node. Used to trace the spelling segmentation.
- PoolPosType dmi_fr;
- uint16 step;
-} MatrixNode, *PMatrixNode;
-
-typedef struct {
- // The MatrixNode position in the matrix pool
- PoolPosType mtrx_nd_pos;
- // The DictMatchInfo position in the DictMatchInfo pool.
- PoolPosType dmi_pos;
- uint16 mtrx_nd_num;
- uint16 dmi_num:15;
- // Used to indicate whether there are dmi nodes in this step with full
- // spelling id. This information is used to decide whether a substring of a
- // valid Pinyin should be extended.
- //
- // Example1: shoudao
- // When the last char 'o' is added, the parser will find "dao" is a valid
- // Pinyin, and because all dmi nodes at location 'd' (including those for
- // "shoud", and those for "d") have Shengmu id only, so it is not necessary
- // to extend "ao", otherwise the result may be "shoud ao", that is not
- // reasonable.
- //
- // Example2: hengao
- // When the last 'o' is added, the parser finds "gao" is a valid Pinyin.
- // Because some dmi nodes at 'g' has Shengmu ids (hen'g and g), but some dmi
- // nodes at 'g' has full ids ('heng'), so it is necessary to extend "ao", thus
- // "heng ao" can also be the result.
- //
- // Similarly, "ganga" is expanded to "gang a".
- //
- // For Pinyin string "xian", because "xian" is a valid Pinyin, because all dmi
- // nodes at 'x' only have Shengmu ids, the parser will not try "x ian" (and it
- // is not valid either). If the parser uses break in the loop, the result
- // always be "xian"; but if the parser uses continue in the loop, "xi an" will
- // also be tried. This behaviour can be set via the function
- // set_xi_an_switch().
- uint16 dmi_has_full_id:1;
- // Points to a MatrixNode of the current step to indicate which choice the
- // user selects.
- MatrixNode *mtrx_nd_fixed;
-} MatrixRow, *PMatrixRow;
-
-// When user inputs and selects candidates, the fixed lemma ids are stored in
-// lma_id_ of class MatrixSearch, and fixed_lmas_ is used to indicate how many
-// lemmas from the beginning are fixed. If user deletes Pinyin characters one
-// by one from the end, these fixed lemmas can be unlocked one by one when
-// necessary. Whenever user deletes a Chinese character and its spelling string
-// in these fixed lemmas, all fixed lemmas will be merged together into a unit
-// named ComposingPhrase with a lemma id kLemmaIdComposing, and this composing
-// phrase will be the first lemma in the sentence. Because it contains some
-// modified lemmas (by deleting a character), these merged lemmas are called
-// sub lemmas (sublma), and each of them are represented individually, so that
-// when user deletes Pinyin characters from the end, these sub lemmas can also
-// be unlocked one by one.
-typedef struct {
- uint16 spl_ids[kMaxRowNum];
- uint16 spl_start[kMaxRowNum];
- char16 chn_str[kMaxRowNum]; // Chinese string.
- uint16 sublma_start[kMaxRowNum]; // Counted in Chinese characters.
- size_t sublma_num;
- uint16 length; // Counted in Chinese characters.
-} ComposingPhrase, *TComposingPhrase;
-
-class MatrixSearch {
- private:
- // If it is true, prediction list by string whose length is greater than 1
- // will be limited to a reasonable number.
- static const bool kPredictLimitGt1 = false;
-
- // If it is true, the engine will prefer long history based prediction,
- // for example, when user inputs "BeiJing", we prefer "DaXue", etc., which are
- // based on the two-character history.
- static const bool kPreferLongHistoryPredict = true;
-
- // If it is true, prediction will only be based on user dictionary. this flag
- // is for debug purpose.
- static const bool kOnlyUserDictPredict = false;
-
- // The maximum buffer to store LmaPsbItems.
- static const size_t kMaxLmaPsbItems = 1450;
-
- // How many rows for each step.
- static const size_t kMaxNodeARow = 5;
-
- // The maximum length of the sentence candidates counted in chinese
- // characters
- static const size_t kMaxSentenceLength = 16;
-
- // The size of the matrix node pool.
- static const size_t kMtrxNdPoolSize = 200;
-
- // The size of the DMI node pool.
- static const size_t kDmiPoolSize = 800;
-
- // Used to indicate whether this object has been initialized.
- bool inited_;
-
- // Spelling trie.
- const SpellingTrie *spl_trie_;
-
- // Used to indicate this switcher status: when "xian" is parseed, should
- // "xi an" also be extended. Default is false.
- // These cases include: xia, xian, xiang, zhuan, jiang..., etc. The string
- // should be valid for a FULL spelling, or a combination of two spellings,
- // first of which is a FULL id too. So even it is true, "da" will never be
- // split into "d a", because "d" is not a full spelling id.
- bool xi_an_enabled_;
-
- // System dictionary.
- DictTrie* dict_trie_;
-
- // User dictionary.
- AtomDictBase* user_dict_;
-
- // Spelling parser.
- SpellingParser* spl_parser_;
-
- // The maximum allowed length of spelling string (such as a Pinyin string).
- size_t max_sps_len_;
-
- // The maximum allowed length of a result Chinese string.
- size_t max_hzs_len_;
-
- // Pinyin string. Max length: kMaxRowNum - 1
- char pys_[kMaxRowNum];
-
- // The length of the string that has been decoded successfully.
- size_t pys_decoded_len_;
-
- // Shared buffer for multiple purposes.
- size_t *share_buf_;
-
- MatrixNode *mtrx_nd_pool_;
- PoolPosType mtrx_nd_pool_used_; // How many nodes used in the pool
- DictMatchInfo *dmi_pool_;
- PoolPosType dmi_pool_used_; // How many items used in the pool
-
- MatrixRow *matrix_; // The first row is for starting
-
- DictExtPara *dep_; // Parameter used to extend DMI nodes.
-
- NPredictItem *npre_items_; // Used to do prediction
- size_t npre_items_len_;
-
- // The starting positions and lemma ids for the full sentence candidate.
- size_t lma_id_num_;
- uint16 lma_start_[kMaxRowNum]; // Counted in spelling ids.
- LemmaIdType lma_id_[kMaxRowNum];
- size_t fixed_lmas_;
-
- // If fixed_lmas_ is bigger than i, Element i is used to indicate whether
- // the i'th lemma id in lma_id_ is the first candidate for that step.
- // If all candidates are the first one for that step, the whole string can be
- // decoded by the engine automatically, so no need to add it to user
- // dictionary. (We are considering to add it to user dictionary in the
- // future).
- uint8 fixed_lmas_no1_[kMaxRowNum];
-
- // Composing phrase
- ComposingPhrase c_phrase_;
-
- // If dmi_c_phrase_ is true, the decoder will try to match the
- // composing phrase (And definitely it will match successfully). If it
- // is false, the decoder will try to match lemmas items in dictionaries.
- bool dmi_c_phrase_;
-
- // The starting positions and spelling ids for the first full sentence
- // candidate.
- size_t spl_id_num_; // Number of splling ids
- uint16 spl_start_[kMaxRowNum]; // Starting positions
- uint16 spl_id_[kMaxRowNum]; // Spelling ids
- // Used to remember the last fixed position, counted in Hanzi.
- size_t fixed_hzs_;
-
- // Lemma Items with possibility score, two purposes:
- // 1. In Viterbi decoding, this buffer is used to get all possible candidates
- // for current step;
- // 2. When the search is done, this buffer is used to get candiates from the
- // first un-fixed step and show them to the user.
- LmaPsbItem lpi_items_[kMaxLmaPsbItems];
- size_t lpi_total_;
-
- // Assign the pointers with NULL. The caller makes sure that all pointers are
- // not valid before calling it. This function only will be called in the
- // construction function and free_resource().
- void reset_pointers_to_null();
-
- bool alloc_resource();
-
- void free_resource();
-
- // Reset the search space totally.
- bool reset_search0();
-
- // Reset the search space from ch_pos step. For example, if the original
- // input Pinyin is "an", reset_search(1) will reset the search space to the
- // result of "a". If the given position is out of range, return false.
- // if clear_fixed_this_step is true, and the ch_pos step is a fixed step,
- // clear its fixed status. if clear_dmi_his_step is true, clear the DMI nodes.
- // If clear_mtrx_this_sTep is true, clear the mtrx nodes of this step.
- // The DMI nodes will be kept.
- //
- // Note: this function should not destroy content of pys_.
- bool reset_search(size_t ch_pos, bool clear_fixed_this_step,
- bool clear_dmi_this_step, bool clear_mtrx_this_step);
-
- // Delete a part of the content in pys_.
- void del_in_pys(size_t start, size_t len);
-
- // Delete a spelling id and its corresponding Chinese character, and merge
- // the fixed lemmas into the composing phrase.
- // del_spl_pos indicates which spelling id needs to be delete.
- // This function will update the lemma and spelling segmentation information.
- // The caller guarantees that fixed_lmas_ > 0 and del_spl_pos is within
- // the fixed lemmas.
- void merge_fixed_lmas(size_t del_spl_pos);
-
- // Get spelling start posistions and ids. The result will be stored in
- // spl_id_num_, spl_start_[], spl_id_[].
- // fixed_hzs_ will be also assigned.
- void get_spl_start_id();
-
- // Get all lemma ids with match the given spelling id stream(shorter than the
- // maximum length of a word).
- // If pfullsent is not NULL, means the full sentence candidate may be the
- // same with the coming lemma string, if so, remove that lemma.
- // The result is sorted in descendant order by the frequency score.
- size_t get_lpis(const uint16* splid_str, size_t splid_str_len,
- LmaPsbItem* lma_buf, size_t max_lma_buf,
- const char16 *pfullsent, bool sort_by_psb);
-
- uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
-
- uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
- uint16 splids_max, bool arg_valid);
-
-
- // Extend a DMI node with a spelling id. ext_len is the length of the rows
- // to extend, actually, it is the size of the spelling string of splid.
- // return value can be 1 or 0.
- // 1 means a new DMI is filled in (dmi_pool_used_ is the next blank DMI in
- // the pool).
- // 0 means either the dmi node can not be extended with splid, or the splid
- // is a Shengmu id, which is only used to get lpi_items, or the result node
- // in DictTrie has no son, it is not nccessary to keep the new DMI.
- //
- // This function modifies the content of lpi_items_ and lpi_total_.
- // lpi_items_ is used to get the LmaPsbItem list, lpi_total_ returns the size.
- // The function's returned value has no relation with the value of lpi_num.
- //
- // If dmi == NULL, this function will extend the root node of DictTrie
- //
- // This function will not change dmi_nd_pool_used_. Please change it after
- // calling this function if necessary.
- //
- // The caller should guarantees that NULL != dep.
- size_t extend_dmi(DictExtPara *dep, DictMatchInfo *dmi_s);
-
- // Extend dmi for the composing phrase.
- size_t extend_dmi_c(DictExtPara *dep, DictMatchInfo *dmi_s);
-
- // Extend a MatrixNode with the give LmaPsbItem list.
- // res_row is the destination row number.
- // This function does not change mtrx_nd_pool_used_. Please change it after
- // calling this function if necessary.
- // return 0 always.
- size_t extend_mtrx_nd(MatrixNode *mtrx_nd, LmaPsbItem lpi_items[],
- size_t lpi_num, PoolPosType dmi_fr, size_t res_row);
-
-
- // Try to find a dmi node at step_to position, and the found dmi node should
- // match the given spelling id strings.
- PoolPosType match_dmi(size_t step_to, uint16 spl_ids[], uint16 spl_id_num);
-
- bool add_char(char ch);
- bool prepare_add_char(char ch);
-
- // Called after prepare_add_char, so the input char has been saved.
- bool add_char_qwerty();
-
- // Prepare candidates from the last fixed hanzi position.
- void prepare_candidates();
-
- // Is the character in step pos a splitter character?
- // The caller guarantees that the position is valid.
- bool is_split_at(uint16 pos);
-
- void fill_dmi(DictMatchInfo *dmi, MileStoneHandle *handles,
- PoolPosType dmi_fr,
- uint16 spl_id, uint16 node_num, unsigned char dict_level,
- bool splid_end_split, unsigned char splstr_len,
- unsigned char all_full_id);
-
- size_t inner_predict(const char16 fixed_scis_ids[], uint16 scis_num,
- char16 predict_buf[][kMaxPredictSize + 1],
- size_t buf_len);
-
- // Add the first candidate to the user dictionary.
- bool try_add_cand0_to_userdict();
-
- // Add a user lemma to the user dictionary. This lemma is a subset of
- // candidate 0. lma_from is from which lemma in lma_ids_, lma_num is the
- // number of lemmas to be combined together as a new lemma. The caller
- // gurantees that the combined new lemma's length is less or equal to
- // kMaxLemmaSize.
- bool add_lma_to_userdict(uint16 lma_from, uint16 lma_num, float score);
-
- // Update dictionary frequencies.
- void update_dict_freq();
-
- void debug_print_dmi(PoolPosType dmi_pos, uint16 nest_level);
-
- public:
- MatrixSearch();
- ~MatrixSearch();
-
- bool init(const char *fn_sys_dict, const char *fn_usr_dict);
-
- bool init_fd(int sys_fd, long start_offset, long length,
- const char *fn_usr_dict);
-
- void init_user_dictionary(const char *fn_usr_dict);
-
- bool is_user_dictionary_enabled() const;
-
- void set_max_lens(size_t max_sps_len, size_t max_hzs_len);
-
- void close();
-
- void flush_cache();
-
- void set_xi_an_switch(bool xi_an_enabled);
-
- bool get_xi_an_switch();
-
- // Reset the search space. Equivalent to reset_search(0).
- // If inited, always return true;
- bool reset_search();
-
- // Search a Pinyin string.
- // Return value is the position successfully parsed.
- size_t search(const char *py, size_t py_len);
-
- // Used to delete something in the Pinyin string kept by the engine, and do
- // a re-search.
- // Return value is the new length of Pinyin string kept by the engine which
- // is parsed successfully.
- // If is_pos_in_splid is false, pos is used to indicate that pos-th Pinyin
- // character needs to be deleted. If is_pos_in_splid is true, all Pinyin
- // characters for pos-th spelling id needs to be deleted.
- // If the deleted character(s) is just after a fixed lemma or sub lemma in
- // composing phrase, clear_fixed_this_step indicates whether we needs to
- // unlock the last fixed lemma or sub lemma.
- // If is_pos_in_splid is false, and pos-th character is in the range for the
- // fixed lemmas or composing string, this function will do nothing and just
- // return the result of the previous search.
- size_t delsearch(size_t pos, bool is_pos_in_splid,
- bool clear_fixed_this_step);
-
- // Get the number of candiates, called after search().
- size_t get_candidate_num();
-
- // Get the Pinyin string stored by the engine.
- // *decoded_len returns the length of the successfully decoded string.
- const char* get_pystr(size_t *decoded_len);
-
- // Get the spelling boundaries for the first sentence candidate.
- // Number of spellings will be returned. The number of valid elements in
- // spl_start is one more than the return value because the last one is used
- // to indicate the beginning of the next un-input speling.
- // For a Pinyin "women", the returned value is 2, spl_start is [0, 2, 5] .
- size_t get_spl_start(const uint16 *&spl_start);
-
- // Get one candiate string. If full sentence candidate is available, it will
- // be the first one.
- char16* get_candidate(size_t cand_id, char16 *cand_str, size_t max_len);
-
- // Get the first candiate, which is a "full sentence".
- // retstr_len is not NULL, it will be used to return the string length.
- // If only_unfixed is true, only unfixed part will be fetched.
- char16* get_candidate0(char16* cand_str, size_t max_len,
- uint16 *retstr_len, bool only_unfixed);
-
- // Choose a candidate. The decoder will do a search after the fixed position.
- size_t choose(size_t cand_id);
-
- // Cancel the last choosing operation, and return the new number of choices.
- size_t cancel_last_choice();
-
- // Get the length of fixed Hanzis.
- size_t get_fixedlen();
-
- size_t get_predicts(const char16 fixed_buf[],
- char16 predict_buf[][kMaxPredictSize + 1],
- size_t buf_len);
-};
-}
-
-#endif // PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/mystdlib.h b/src/virtualkeyboard/3rdparty/pinyin/include/mystdlib.h
deleted file mode 100644
index dfcf980b..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/mystdlib.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_MYSTDLIB_H__
-#define PINYINIME_INCLUDE_MYSTDLIB_H__
-
-#include <stdlib.h>
-
-namespace ime_pinyin {
-
-void myqsort(void *p, size_t n, size_t es,
- int (*cmp)(const void *, const void *));
-
-void *mybsearch(const void *key, const void *base,
- size_t nmemb, size_t size,
- int (*compar)(const void *, const void *));
-}
-
-#endif // PINYINIME_INCLUDE_MYSTDLIB_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/ngram.h b/src/virtualkeyboard/3rdparty/pinyin/include/ngram.h
deleted file mode 100644
index 1d3a86e6..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/ngram.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_NGRAM_H__
-#define PINYINIME_INCLUDE_NGRAM_H__
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "./dictdef.h"
-#include <QFile>
-
-namespace ime_pinyin {
-
-typedef unsigned char CODEBOOK_TYPE;
-
-static const size_t kCodeBookSize = 256;
-
-class NGram {
- public:
- // The maximum score of a lemma item.
- static const LmaScoreType kMaxScore = 0x3fff;
-
- // In order to reduce the storage size, the original log value is amplified by
- // kScoreAmplifier, and we use LmaScoreType to store.
- // After this process, an item with a lower score has a higher frequency.
- static const int kLogValueAmplifier = -800;
-
- // System words' total frequency. It is not the real total frequency, instead,
- // It is only used to adjust system lemmas' scores when the user dictionary's
- // total frequency changes.
- // In this version, frequencies of system lemmas are fixed. We are considering
- // to make them changable in next version.
- static const size_t kSysDictTotalFreq = 100000000;
-
- private:
-
- static NGram* instance_;
-
- bool initialized_;
- uint32 idx_num_;
-
- size_t total_freq_none_sys_;
-
- // Score compensation for system dictionary lemmas.
- // Because after user adds some user lemmas, the total frequency changes, and
- // we use this value to normalize the score.
- float sys_score_compensation_;
-
-#ifdef ___BUILD_MODEL___
- double *freq_codes_df_;
-#endif
- LmaScoreType *freq_codes_;
- CODEBOOK_TYPE *lma_freq_idx_;
-
- public:
- NGram();
- ~NGram();
-
- static NGram& get_instance();
-
- bool save_ngram(FILE *fp);
- bool load_ngram(QFile *fp);
-
- // Set the total frequency of all none system dictionaries.
- void set_total_freq_none_sys(size_t freq_none_sys);
-
- float get_uni_psb(LemmaIdType lma_id);
-
- // Convert a probability to score. Actually, the score will be limited to
- // kMaxScore, but at runtime, we also need float expression to get accurate
- // value of the score.
- // After the conversion, a lower score indicates a higher probability of the
- // item.
- static float convert_psb_to_score(double psb);
-
-#ifdef ___BUILD_MODEL___
- // For constructing the unigram mode model.
- bool build_unigram(LemmaEntry *lemma_arr, size_t num,
- LemmaIdType next_idx_unused);
-#endif
-};
-}
-
-#endif // PINYINIME_INCLUDE_NGRAM_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/pinyinime.h b/src/virtualkeyboard/3rdparty/pinyin/include/pinyinime.h
deleted file mode 100644
index e376c20c..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/pinyinime.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_ANDPYIME_H__
-#define PINYINIME_INCLUDE_ANDPYIME_H__
-
-#include <stdlib.h>
-#include "./dictdef.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- namespace ime_pinyin {
-
- /**
- * Open the decoder engine via the system and user dictionary file names.
- *
- * @param fn_sys_dict The file name of the system dictionary.
- * @param fn_usr_dict The file name of the user dictionary.
- * @return true if open the decoder engine successfully.
- */
- bool im_open_decoder(const char *fn_sys_dict, const char *fn_usr_dict);
-
- /**
- * Open the decoder engine via the system dictionary FD and user dictionary
- * file name. Because on Android, the system dictionary is embedded in the
- * whole application apk file.
- *
- * @param sys_fd The file in which the system dictionary is embedded.
- * @param start_offset The starting position of the system dictionary in the
- * file sys_fd.
- * @param length The length of the system dictionary in the file sys_fd,
- * counted in byte.
- * @return true if succeed.
- */
- bool im_open_decoder_fd(int sys_fd, long start_offset, long length,
- const char *fn_usr_dict);
-
- /**
- * Close the decoder engine.
- */
- void im_close_decoder();
-
- /**
- * Set maximum limitations for decoding. If this function is not called,
- * default values will be used. For example, due to screen size limitation,
- * the UI engine of the IME can only show a certain number of letters(input)
- * to decode, and a certain number of Chinese characters(output). If after
- * user adds a new letter, the input or the output string is longer than the
- * limitations, the engine will discard the recent letter.
- *
- * @param max_sps_len Maximum length of the spelling string(Pinyin string).
- * @max_hzs_len Maximum length of the decoded Chinese character string.
- */
- void im_set_max_lens(size_t max_sps_len, size_t max_hzs_len);
-
- /**
- * Flush cached data to persistent memory. Because at runtime, in order to
- * achieve best performance, some data is only store in memory.
- */
- void im_flush_cache();
-
- /**
- * Use a spelling string(Pinyin string) to search. The engine will try to do
- * an incremental search based on its previous search result, so if the new
- * string has the same prefix with the previous one stored in the decoder,
- * the decoder will only continue the search from the end of the prefix.
- * If the caller needs to do a brand new search, please call im_reset_search()
- * first. Calling im_search() is equivalent to calling im_add_letter() one by
- * one.
- *
- * @param sps_buf The spelling string buffer to decode.
- * @param sps_len The length of the spelling string buffer.
- * @return The number of candidates.
- */
- size_t im_search(const char* sps_buf, size_t sps_len);
-
- /**
- * Make a delete operation in the current search result, and make research if
- * necessary.
- *
- * @param pos The posistion of char in spelling string to delete, or the
- * position of spelling id in result string to delete.
- * @param is_pos_in_splid Indicate whether the pos parameter is the position
- * in the spelling string, or the position in the result spelling id string.
- * @return The number of candidates.
- */
- size_t im_delsearch(size_t pos, bool is_pos_in_splid,
- bool clear_fixed_this_step);
-
- /**
- * Reset the previous search result.
- */
- void im_reset_search();
-
- /**
- * Add a Pinyin letter to the current spelling string kept by decoder. If the
- * decoder fails in adding the letter, it will do nothing. im_get_sps_str()
- * can be used to get the spelling string kept by decoder currently.
- *
- * @param ch The letter to add.
- * @return The number of candidates.
- */
- size_t im_add_letter(char ch);
-
- /**
- * Get the spelling string kept by the decoder.
- *
- * @param decoded_len Used to return how many characters in the spelling
- * string is successfully parsed.
- * @return The spelling string kept by the decoder.
- */
- const char *im_get_sps_str(size_t *decoded_len);
-
- /**
- * Get a candidate(or choice) string.
- *
- * @param cand_id The id to get a candidate. Started from 0. Usually, id 0
- * is a sentence-level candidate.
- * @param cand_str The buffer to store the candidate.
- * @param max_len The maximum length of the buffer.
- * @return cand_str if succeeds, otherwise NULL.
- */
- char16* im_get_candidate(size_t cand_id, char16* cand_str,
- size_t max_len);
-
- /**
- * Get the segmentation information(the starting positions) of the spelling
- * string.
- *
- * @param spl_start Used to return the starting posistions.
- * @return The number of spelling ids. If it is L, there will be L+1 valid
- * elements in spl_start, and spl_start[L] is the posistion after the end of
- * the last spelling id.
- */
- size_t im_get_spl_start_pos(const uint16 *&spl_start);
-
- /**
- * Choose a candidate and make it fixed. If the candidate does not match
- * the end of all spelling ids, new candidates will be provided from the
- * first unfixed position. If the candidate matches the end of the all
- * spelling ids, there will be only one new candidates, or the whole fixed
- * sentence.
- *
- * @param cand_id The id of candidate to select and make it fixed.
- * @return The number of candidates. If after the selection, the whole result
- * string has been fixed, there will be only one candidate.
- */
- size_t im_choose(size_t cand_id);
-
- /**
- * Cancel the last selection, or revert the last operation of im_choose().
- *
- * @return The number of candidates.
- */
- size_t im_cancel_last_choice();
-
- /**
- * Get the number of fixed spelling ids, or Chinese characters.
- *
- * @return The number of fixed spelling ids, of Chinese characters.
- */
- size_t im_get_fixed_len();
-
- /**
- * Cancel the input state and reset the search workspace.
- */
- bool im_cancel_input();
-
- /**
- * Get prediction candiates based on the given fixed Chinese string as the
- * history.
- *
- * @param his_buf The history buffer to do the prediction. It should be ended
- * with '\0'.
- * @param pre_buf Used to return prediction result list.
- * @return The number of predicted result string.
- */
- size_t im_get_predicts(const char16 *his_buf,
- char16 (*&pre_buf)[kMaxPredictSize + 1]);
-
- /**
- * Enable Shengmus in ShouZiMu mode.
- */
- void im_enable_shm_as_szm(bool enable);
-
- /**
- * Enable Yunmus in ShouZiMu mode.
- */
- void im_enable_ym_as_szm(bool enable);
-
- /**
- * Initializes or uninitializes the user dictionary.
- *
- * @param fn_usr_dict The file name of the user dictionary.
- */
- void im_init_user_dictionary(const char *fn_usr_dict);
-
- /**
- * Returns the current status of user dictinary.
- */
- bool im_is_user_dictionary_enabled(void);
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // PINYINIME_INCLUDE_ANDPYIME_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/searchutility.h b/src/virtualkeyboard/3rdparty/pinyin/include/searchutility.h
deleted file mode 100644
index f1357107..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/searchutility.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
-#define PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
-
-#include <stdlib.h>
-#include "./spellingtrie.h"
-
-namespace ime_pinyin {
-
-// Type used to identify the size of a pool, such as id pool, etc.
-typedef uint16 PoolPosType;
-
-// Type used to identify a parsing mile stone in an atom dictionary.
-typedef uint16 MileStoneHandle;
-
-// Type used to express a lemma and its probability score.
-typedef struct {
- size_t id:(kLemmaIdSize * 8);
- size_t lma_len:4;
- uint16 psb; // The score, the lower psb, the higher possibility.
- // For single character items, we may also need Hanzi.
- // For multiple characer items, ignore it.
- char16 hanzi;
-} LmaPsbItem, *PLmaPsbItem;
-
-// LmaPsbItem extended with string.
-typedef struct {
- LmaPsbItem lpi;
- char16 str[kMaxLemmaSize + 1];
-} LmaPsbStrItem, *PLmaPsbStrItem;
-
-
-typedef struct {
- float psb;
- char16 pre_hzs[kMaxPredictSize];
- uint16 his_len; // The length of the history used to do the prediction.
-} NPredictItem, *PNPredictItem;
-
-// Parameter structure used to extend in a dictionary. All dictionaries
-// receives the same DictExtPara and a dictionary specific MileStoneHandle for
-// extending.
-//
-// When the user inputs a new character, AtomDictBase::extend_dict() will be
-// called at least once for each dictionary.
-//
-// For example, when the user inputs "wm", extend_dict() will be called twice,
-// and the DictExtPara parameter are as follows respectively:
-// 1. splids = {w, m}; splids_extended = 1; ext_len = 1; step_no = 1;
-// splid_end_split = false; id_start = wa(the first id start with 'w');
-// id_num = number of ids starting with 'w'.
-// 2. splids = {m}; splids_extended = 0; ext_len = 1; step_no = 1;
-// splid_end_split = false; id_start = wa; id_num = number of ids starting with
-// 'w'.
-//
-// For string "women", one of the cases of the DictExtPara parameter is:
-// splids = {wo, men}, splids_extended = 1, ext_len = 3 (length of "men"),
-// step_no = 4; splid_end_split = false; id_start = men, id_num = 1.
-//
-typedef struct {
- // Spelling ids for extending, there are splids_extended + 1 ids in the
- // buffer.
- // For a normal lemma, there can only be kMaxLemmaSize spelling ids in max,
- // but for a composing phrase, there can kMaxSearchSteps spelling ids.
- uint16 splids[kMaxSearchSteps];
-
- // Number of ids that have been used before. splids[splids_extended] is the
- // newly added id for the current extension.
- uint16 splids_extended;
-
- // The step span of the extension. It is also the size of the string for
- // the newly added spelling id.
- uint16 ext_len;
-
- // The step number for the current extension. It is also the ending position
- // in the input Pinyin string for the substring of spelling ids in splids[].
- // For example, when the user inputs "women", step_no = 4.
- // This parameter may useful to manage the MileStoneHandle list for each
- // step. When the user deletes a character from the string, MileStoneHandle
- // objects for the the steps after that character should be reset; when the
- // user begins a new string, all MileStoneHandle objects should be reset.
- uint16 step_no;
-
- // Indicate whether the newly added spelling ends with a splitting character
- bool splid_end_split;
-
- // If the newly added id is a half id, id_start is the first id of the
- // corresponding full ids; if the newly added id is a full id, id_start is
- // that id.
- uint16 id_start;
-
- // If the newly added id is a half id, id_num is the number of corresponding
- // ids; if it is a full id, id_num == 1.
- uint16 id_num;
-}DictExtPara, *PDictExtPara;
-
-bool is_system_lemma(LemmaIdType lma_id);
-bool is_user_lemma(LemmaIdType lma_id);
-bool is_composing_lemma(LemmaIdType lma_id);
-
-int cmp_lpi_with_psb(const void *p1, const void *p2);
-int cmp_lpi_with_unified_psb(const void *p1, const void *p2);
-int cmp_lpi_with_id(const void *p1, const void *p2);
-int cmp_lpi_with_hanzi(const void *p1, const void *p2);
-
-int cmp_lpsi_with_str(const void *p1, const void *p2);
-
-int cmp_hanzis_1(const void *p1, const void *p2);
-int cmp_hanzis_2(const void *p1, const void *p2);
-int cmp_hanzis_3(const void *p1, const void *p2);
-int cmp_hanzis_4(const void *p1, const void *p2);
-int cmp_hanzis_5(const void *p1, const void *p2);
-int cmp_hanzis_6(const void *p1, const void *p2);
-int cmp_hanzis_7(const void *p1, const void *p2);
-int cmp_hanzis_8(const void *p1, const void *p2);
-
-int cmp_npre_by_score(const void *p1, const void *p2);
-int cmp_npre_by_hislen_score(const void *p1, const void *p2);
-int cmp_npre_by_hanzi_score(const void *p1, const void *p2);
-
-
-size_t remove_duplicate_npre(NPredictItem *npre_items, size_t npre_num);
-
-size_t align_to_size_t(size_t size);
-
-} // namespace
-
-#endif // PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/spellingtable.h b/src/virtualkeyboard/3rdparty/pinyin/include/spellingtable.h
deleted file mode 100644
index fd79c6ef..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/spellingtable.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_SPELLINGTABLE_H__
-#define PINYINIME_INCLUDE_SPELLINGTABLE_H__
-
-#include <stdlib.h>
-#include "./dictdef.h"
-
-namespace ime_pinyin {
-
-#ifdef ___BUILD_MODEL___
-
-const size_t kMaxSpellingSize = kMaxPinyinSize;
-
-typedef struct {
- char str[kMaxSpellingSize + 1];
- double freq;
-} RawSpelling, *PRawSpelling;
-
-// This class is used to store the spelling strings
-// The length of the input spelling string should be less or equal to the
-// spelling_size_ (set by init_table). If the input string is too long,
-// we only keep its first spelling_size_ chars.
-class SpellingTable {
- private:
- static const size_t kNotSupportNum = 3;
- static const char kNotSupportList[kNotSupportNum][kMaxSpellingSize + 1];
-
- bool need_score_;
-
- size_t spelling_max_num_;
-
- RawSpelling *raw_spellings_;
-
- // Used to store spelling strings. If the spelling table needs to calculate
- // score, an extra char after each spelling string is the score.
- // An item with a lower score has a higher probability.
- char *spelling_buf_;
- size_t spelling_size_;
-
- double total_freq_;
-
- size_t spelling_num_;
-
- double score_amplifier_;
-
- unsigned char average_score_;
-
- // If frozen is true, put_spelling() and contain() are not allowed to call.
- bool frozen_;
-
- size_t get_hash_pos(const char* spelling_str);
- size_t hash_pos_next(size_t hash_pos);
- void free_resource();
- public:
- SpellingTable();
- ~SpellingTable();
-
- // pure_spl_size is the pure maximum spelling string size. For example,
- // "zhuang" is the longgest item in Pinyin, so pure_spl_size should be 6.
- // spl_max_num is the maximum number of spelling strings to store.
- // need_score is used to indicate whether the caller needs to calculate a
- // score for each spelling.
- bool init_table(size_t pure_spl_size, size_t spl_max_num, bool need_score);
-
- // Put a spelling string to the table.
- // It always returns false if called after arrange() withtout a new
- // init_table() operation.
- // freq is the spelling's occuring count.
- // If the spelling has been in the table, occuring count will accumulated.
- bool put_spelling(const char* spelling_str, double spl_count);
-
- // Test whether a spelling string is in the table.
- // It always returns false, when being called after arrange() withtout a new
- // init_table() operation.
- bool contain(const char* spelling_str);
-
- // Sort the spelling strings and put them from the begin of the buffer.
- // Return the pointer of the sorted spelling strings.
- // item_size and spl_num return the item size and number of spelling.
- // Because each spelling uses a '\0' as terminator, the returned item_size is
- // at least one char longer than the spl_size parameter specified by
- // init_table(). If the table is initialized to calculate score, item_size
- // will be increased by 1, and current_spl_str[item_size - 1] stores an
- // unsinged char score.
- // An item with a lower score has a higher probability.
- // Do not call put_spelling() and contains() after arrange().
- const char* arrange(size_t *item_size, size_t *spl_num);
-
- float get_score_amplifier();
-
- unsigned char get_average_score();
-};
-#endif // ___BUILD_MODEL___
-}
-
-#endif // PINYINIME_INCLUDE_SPELLINGTABLE_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/spellingtrie.h b/src/virtualkeyboard/3rdparty/pinyin/include/spellingtrie.h
deleted file mode 100644
index f943a24d..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/spellingtrie.h
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_SPELLINGTRIE_H__
-#define PINYINIME_INCLUDE_SPELLINGTRIE_H__
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "./dictdef.h"
-#include <QFile>
-
-namespace ime_pinyin {
-
-static const unsigned short kFullSplIdStart = kHalfSpellingIdNum + 1;
-
-// Node used for the trie of spellings
-struct SpellingNode {
- SpellingNode *first_son;
- // The spelling id for each node. If you need more bits to store
- // spelling id, please adjust this structure.
- uint16 spelling_idx:11;
- uint16 num_of_son:5;
- char char_this_node;
- unsigned char score;
-};
-
-class SpellingTrie {
- private:
- static const int kMaxYmNum = 64;
- static const size_t kValidSplCharNum = 26;
-
- static const uint16 kHalfIdShengmuMask = 0x01;
- static const uint16 kHalfIdYunmuMask = 0x02;
- static const uint16 kHalfIdSzmMask = 0x04;
-
- // Map from half spelling id to single char.
- // For half ids of Zh/Ch/Sh, map to z/c/s (low case) respectively.
- // For example, 1 to 'A', 2 to 'B', 3 to 'C', 4 to 'c', 5 to 'D', ...,
- // 28 to 'Z', 29 to 'z'.
- // [0] is not used to achieve better efficiency.
- static const char kHalfId2Sc_[kFullSplIdStart + 1];
-
- static unsigned char char_flags_[];
- static SpellingTrie* instance_;
-
- // The spelling table
- char *spelling_buf_;
-
- // The size of longest spelling string, includes '\0' and an extra char to
- // store score. For example, "zhuang" is the longgest item in Pinyin list,
- // so spelling_size_ is 8.
- // Structure: The string ended with '\0' + score char.
- // An item with a lower score has a higher probability.
- uint32 spelling_size_;
-
- // Number of full spelling ids.
- uint32 spelling_num_;
-
- float score_amplifier_;
- unsigned char average_score_;
-
- // The Yunmu id list for the spelling ids (for half ids of Shengmu,
- // the Yunmu id is 0).
- // The length of the list is spelling_num_ + kFullSplIdStart,
- // so that spl_ym_ids_[splid] is the Yunmu id of the splid.
- uint8 *spl_ym_ids_;
-
- // The Yunmu table.
- // Each Yunmu will be assigned with Yunmu id from 1.
- char *ym_buf_;
- size_t ym_size_; // The size of longest Yunmu string, '\0'included.
- size_t ym_num_;
-
- // The spelling string just queried
- char *splstr_queried_;
-
- // The spelling string just queried
- char16 *splstr16_queried_;
-
- // The root node of the spelling tree
- SpellingNode* root_;
-
- // If a none qwerty key such as a fnction key like ENTER is given, this node
- // will be used to indicate that this is not a QWERTY node.
- SpellingNode* dumb_node_;
-
- // If a splitter key is pressed, this node will be used to indicate that this
- // is a splitter key.
- SpellingNode* splitter_node_;
-
- // Used to get the first level sons.
- SpellingNode* level1_sons_[kValidSplCharNum];
-
- // The full spl_id range for specific half id.
- // h2f means half to full.
- // A half id can be a ShouZiMu id (id to represent the first char of a full
- // spelling, including Shengmu and Yunmu), or id of zh/ch/sh.
- // [1..kFullSplIdStart-1] is the arrange of half id.
- uint16 h2f_start_[kFullSplIdStart];
- uint16 h2f_num_[kFullSplIdStart];
-
- // Map from full id to half id.
- uint16 *f2h_;
-
-#ifdef ___BUILD_MODEL___
- // How many node used to build the trie.
- size_t node_num_;
-#endif
-
- SpellingTrie();
-
- void free_son_trie(SpellingNode* node);
-
- // Construct a subtree using a subset of the spelling array (from
- // item_star to item_end).
- // Member spelliing_buf_ and spelling_size_ should be valid.
- // parent is used to update its num_of_son and score.
- SpellingNode* construct_spellings_subset(size_t item_start, size_t item_end,
- size_t level, SpellingNode *parent);
- bool build_f2h();
-
- // The caller should guarantee ch >= 'A' && ch <= 'Z'
- bool is_shengmu_char(char ch) const;
-
- // The caller should guarantee ch >= 'A' && ch <= 'Z'
- bool is_yunmu_char(char ch) const;
-
-#ifdef ___BUILD_MODEL___
- // Given a spelling string, return its Yunmu string.
- // The caller guaratees spl_str is valid.
- const char* get_ym_str(const char *spl_str);
-
- // Build the Yunmu list, and the mapping relation between the full ids and the
- // Yunmu ids. This functin is called after the spelling trie is built.
- bool build_ym_info();
-#endif
-
- friend class SpellingParser;
- friend class SmartSplParser;
- friend class SmartSplParser2;
-
- public:
- ~SpellingTrie();
-
- inline static bool is_valid_spl_char(char ch) {
- return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
- }
-
- // The caller guarantees that the two chars are valid spelling chars.
- inline static bool is_same_spl_char(char ch1, char ch2) {
- return ch1 == ch2 || ch1 - ch2 == 'a' - 'A' || ch2 - ch1 == 'a' - 'A';
- }
-
- // Construct the tree from the input pinyin array
- // The given string list should have been sorted.
- // score_amplifier is used to convert a possibility value into score.
- // average_score is the average_score of all spellings. The dumb node is
- // assigned with this score.
- bool construct(const char* spelling_arr, size_t item_size, size_t item_num,
- float score_amplifier, unsigned char average_score);
-
- // Test if the given id is a valid spelling id.
- // If function returns true, the given splid may be updated like this:
- // When 'A' is not enabled in ShouZiMu mode, the parsing result for 'A' is
- // first given as a half id 1, but because 'A' is a one-char Yunmu and
- // it is a valid id, it needs to updated to its corresponding full id.
- bool if_valid_id_update(uint16 *splid) const;
-
- // Test if the given id is a half id.
- bool is_half_id(uint16 splid) const;
-
- bool is_full_id(uint16 splid) const;
-
- // Test if the given id is a one-char Yunmu id (obviously, it is also a half
- // id), such as 'A', 'E' and 'O'.
- bool is_half_id_yunmu(uint16 splid) const;
-
- // Test if this char is a ShouZiMu char. This ShouZiMu char may be not enabled.
- // For Pinyin, only i/u/v is not a ShouZiMu char.
- // The caller should guarantee that ch >= 'A' && ch <= 'Z'
- bool is_szm_char(char ch) const;
-
- // Test If this char is enabled in ShouZiMu mode.
- // The caller should guarantee that ch >= 'A' && ch <= 'Z'
- bool szm_is_enabled(char ch) const;
-
- // Enable/disable Shengmus in ShouZiMu mode(using the first char of a spelling
- // to input).
- void szm_enable_shm(bool enable);
-
- // Enable/disable Yunmus in ShouZiMu mode.
- void szm_enable_ym(bool enable);
-
- // Test if this char is enabled in ShouZiMu mode.
- // The caller should guarantee ch >= 'A' && ch <= 'Z'
- bool is_szm_enabled(char ch) const;
-
- // Return the number of full ids for the given half id.
- uint16 half2full_num(uint16 half_id) const;
-
- // Return the number of full ids for the given half id, and fill spl_id_start
- // to return the first full id.
- uint16 half_to_full(uint16 half_id, uint16 *spl_id_start) const;
-
- // Return the corresponding half id for the given full id.
- // Not frequently used, low efficient.
- // Return 0 if fails.
- uint16 full_to_half(uint16 full_id) const;
-
- // To test whether a half id is compatible with a full id.
- // Generally, when half_id == full_to_half(full_id), return true.
- // But for "Zh, Ch, Sh", if fussy mode is on, half id for 'Z' is compatible
- // with a full id like "Zhe". (Fussy mode is not ready).
- bool half_full_compatible(uint16 half_id, uint16 full_id) const;
-
- static const SpellingTrie* get_cpinstance();
-
- static SpellingTrie& get_instance();
-
- // Save to the file stream
- bool save_spl_trie(FILE *fp);
-
- // Load from the file stream
- bool load_spl_trie(QFile *fp);
-
- // Get the number of spellings
- size_t get_spelling_num();
-
- // Return the Yunmu id for the given Yunmu string.
- // If the string is not valid, return 0;
- uint8 get_ym_id(const char* ym_str);
-
- // Get the readonly Pinyin string for a given spelling id
- const char* get_spelling_str(uint16 splid);
-
- // Get the readonly Pinyin string for a given spelling id
- const char16* get_spelling_str16(uint16 splid);
-
- // Get Pinyin string for a given spelling id. Return the length of the
- // string, and fill-in '\0' at the end.
- size_t get_spelling_str16(uint16 splid, char16 *splstr16,
- size_t splstr16_len);
-};
-}
-
-#endif // PINYINIME_INCLUDE_SPELLINGTRIE_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/splparser.h b/src/virtualkeyboard/3rdparty/pinyin/include/splparser.h
deleted file mode 100644
index d783bd73..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/splparser.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_SPLPARSER_H__
-#define PINYINIME_INCLUDE_SPLPARSER_H__
-
-#include "./dictdef.h"
-#include "./spellingtrie.h"
-
-namespace ime_pinyin {
-
-class SpellingParser {
- protected:
- const SpellingTrie *spl_trie_;
-
- public:
- SpellingParser();
-
- // Given a string, parse it into a spelling id stream.
- // If the whole string are sucessfully parsed, last_is_pre will be true;
- // if the whole string is not fullly parsed, last_is_pre will return whether
- // the last part of the string is a prefix of a full spelling string. For
- // example, given string "zhengzhon", "zhon" is not a valid speling, but it is
- // the prefix of "zhong".
- //
- // If splstr starts with a character not in ['a'-z'] (it is a split char),
- // return 0.
- // Split char can only appear in the middle of the string or at the end.
- uint16 splstr_to_idxs(const char *splstr, uint16 str_len, uint16 splidx[],
- uint16 start_pos[], uint16 max_size, bool &last_is_pre);
-
- // Similar to splstr_to_idxs(), the only difference is that splstr_to_idxs()
- // convert single-character Yunmus into half ids, while this function converts
- // them into full ids.
- uint16 splstr_to_idxs_f(const char *splstr, uint16 str_len, uint16 splidx[],
- uint16 start_pos[], uint16 max_size, bool &last_is_pre);
-
- // Similar to splstr_to_idxs(), the only difference is that this function
- // uses char16 instead of char8.
- uint16 splstr16_to_idxs(const char16 *splstr, uint16 str_len, uint16 splidx[],
- uint16 start_pos[], uint16 max_size, bool &last_is_pre);
-
- // Similar to splstr_to_idxs_f(), the only difference is that this function
- // uses char16 instead of char8.
- uint16 splstr16_to_idxs_f(const char16 *splstr16, uint16 str_len,
- uint16 splidx[], uint16 start_pos[],
- uint16 max_size, bool &last_is_pre);
-
- // If the given string is a spelling, return the id, others, return 0.
- // If the give string is a single char Yunmus like "A", and the char is
- // enabled in ShouZiMu mode, the returned spelling id will be a half id.
- // When the returned spelling id is a half id, *is_pre returns whether it
- // is a prefix of a full spelling string.
- uint16 get_splid_by_str(const char *splstr, uint16 str_len, bool *is_pre);
-
- // If the given string is a spelling, return the id, others, return 0.
- // If the give string is a single char Yunmus like "a", no matter the char
- // is enabled in ShouZiMu mode or not, the returned spelling id will be
- // a full id.
- // When the returned spelling id is a half id, *p_is_pre returns whether it
- // is a prefix of a full spelling string.
- uint16 get_splid_by_str_f(const char *splstr, uint16 str_len, bool *is_pre);
-
- // Splitter chars are not included.
- bool is_valid_to_parse(char ch);
-
- // When auto-correction is not enabled, get_splid_by_str() will be called to
- // return the single result. When auto-correction is enabled, this function
- // will be called to get the results. Auto-correction is not ready.
- // full_id_num returns number of full spelling ids.
- // is_pre returns whether the given string is the prefix of a full spelling
- // string.
- // If splstr starts with a character not in [a-zA-Z] (it is a split char),
- // return 0.
- // Split char can only appear in the middle of the string or at the end.
- // The caller should guarantee NULL != splstr && str_len > 0 && NULL != splidx
- uint16 get_splids_parallel(const char *splstr, uint16 str_len,
- uint16 splidx[], uint16 max_size,
- uint16 &full_id_num, bool &is_pre);
-};
-}
-
-#endif // PINYINIME_INCLUDE_SPLPARSER_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/sync.h b/src/virtualkeyboard/3rdparty/pinyin/include/sync.h
deleted file mode 100644
index bf42d1f1..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/sync.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_SYNC_H__
-#define PINYINIME_INCLUDE_SYNC_H__
-
-#define ___SYNC_ENABLED___
-
-#ifdef ___SYNC_ENABLED___
-
-#include "userdict.h"
-
-namespace ime_pinyin {
-
-// Class for user dictionary synchronization
-// This class is not thread safe
-// Normal invoking flow will be
-// begin() ->
-// put_lemmas() x N ->
-// {
-// get_lemmas() ->
-// [ get_last_got_count() ] ->
-// clear_last_got() ->
-// } x N ->
-// finish()
-class Sync {
- public:
- Sync();
- ~Sync();
-
- static const int kUserDictMaxLemmaCount = 5000;
- static const int kUserDictMaxLemmaSize = 200000;
- static const int kUserDictRatio = 20;
-
- bool begin(const char * filename);
-
- // Merge lemmas downloaded from sync server into local dictionary
- // lemmas, lemmas string encoded in UTF16LE
- // len, length of lemmas string
- // Return how many lemmas merged successfully
- int put_lemmas(char16 * lemmas, int len);
-
- // Get local new user lemmas into UTF16LE string
- // str, buffer ptr to store new user lemmas
- // size, size of buffer
- // Return length of returned buffer in measure of UTF16LE
- int get_lemmas(char16 * str, int size);
-
- // Return lemmas count in last get_lemmas()
- int get_last_got_count();
-
- // Return total lemmas count need get_lemmas()
- int get_total_count();
-
- // Clear lemmas got by recent get_lemmas()
- void clear_last_got();
-
- void finish();
-
- int get_capacity();
-
- private:
- UserDict * userdict_;
- char * dictfile_;
- int last_count_;
-};
-
-}
-
-#endif
-
-#endif // PINYINIME_INCLUDE_SYNC_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/userdict.h b/src/virtualkeyboard/3rdparty/pinyin/include/userdict.h
deleted file mode 100644
index db010912..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/userdict.h
+++ /dev/null
@@ -1,434 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_USERDICT_H__
-#define PINYINIME_INCLUDE_USERDICT_H__
-
-#define ___CACHE_ENABLED___
-#define ___SYNC_ENABLED___
-#define ___PREDICT_ENABLED___
-
-// Debug performance for operations
-// #define ___DEBUG_PERF___
-
-#ifdef _WIN32
-#include <time.h>
-#include <winsock.h> // timeval
-#else
-#include <pthread.h>
-#include <sys/time.h>
-#endif
-#include "atomdictbase.h"
-
-namespace ime_pinyin {
-
-class UserDict : public AtomDictBase {
- public:
- UserDict();
- ~UserDict();
-
- bool load_dict(const char *file_name, LemmaIdType start_id,
- LemmaIdType end_id);
-
- bool close_dict();
-
- size_t number_of_lemmas();
-
- void reset_milestones(uint16 from_step, MileStoneHandle from_handle);
-
- MileStoneHandle extend_dict(MileStoneHandle from_handle,
- const DictExtPara *dep, LmaPsbItem *lpi_items,
- size_t lpi_max, size_t *lpi_num);
-
- size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
- LmaPsbItem *lpi_items, size_t lpi_max);
-
- uint16 get_lemma_str(LemmaIdType id_lemma, char16* str_buf,
- uint16 str_max);
-
- uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
- uint16 splids_max, bool arg_valid);
-
- size_t predict(const char16 last_hzs[], uint16 hzs_len,
- NPredictItem *npre_items, size_t npre_max,
- size_t b4_used);
-
- // Full spelling ids are required
- LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[],
- uint16 lemma_len, uint16 count);
-
- LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count,
- bool selected);
-
- LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[],
- uint16 lemma_len);
-
- LmaScoreType get_lemma_score(LemmaIdType lemma_id);
-
- LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[],
- uint16 lemma_len);
-
- bool remove_lemma(LemmaIdType lemma_id);
-
- size_t get_total_lemma_count();
- void set_total_lemma_count_of_others(size_t count);
-
- void flush_cache();
-
- void set_limit(uint32 max_lemma_count, uint32 max_lemma_size,
- uint32 reclaim_ratio);
-
- void reclaim();
-
- void defragment();
-
-#ifdef ___SYNC_ENABLED___
- void clear_sync_lemmas(unsigned int start, unsigned int end);
-
- int get_sync_count();
-
- LemmaIdType put_lemma_no_sync(char16 lemma_str[], uint16 splids[],
- uint16 lemma_len, uint16 count, uint64 lmt);
- /**
- * Add lemmas encoded in UTF-16LE into dictionary without adding sync flag.
- *
- * @param lemmas in format of 'wo men,WM,0.32;da jia,DJ,0.12'
- * @param len length of lemmas string in UTF-16LE
- * @return newly added lemma count
- */
- int put_lemmas_no_sync_from_utf16le_string(char16 * lemmas, int len);
-
- /**
- * Get lemmas need sync to a UTF-16LE string of above format.
- * Note: input buffer (str) must not be too small. If str is too small to
- * contain single one lemma, there might be a dead loop.
- *
- * @param str buffer to write lemmas
- * @param size buffer size in UTF-16LE
- * @param count output value of lemma returned
- * @return UTF-16LE string length
- */
- int get_sync_lemmas_in_utf16le_string_from_beginning(
- char16 * str, int size, int * count);
-
-#endif
-
- struct UserDictStat {
- uint32 version;
- const char * file_name;
- struct timeval load_time;
- struct timeval last_update;
- uint32 disk_size;
- uint32 lemma_count;
- uint32 lemma_size;
- uint32 delete_count;
- uint32 delete_size;
-#ifdef ___SYNC_ENABLED___
- uint32 sync_count;
-#endif
- uint32 reclaim_ratio;
- uint32 limit_lemma_count;
- uint32 limit_lemma_size;
- };
-
- bool state(UserDictStat * stat);
-
- private:
- uint32 total_other_nfreq_;
- struct timeval load_time_;
- LemmaIdType start_id_;
- uint32 version_;
- uint8 * lemmas_;
-
- // In-Memory-Only flag for each lemma
- static const uint8 kUserDictLemmaFlagRemove = 1;
- // Inuse lemmas' offset
- uint32 * offsets_;
- // Highest bit in offset tells whether corresponding lemma is removed
- static const uint32 kUserDictOffsetFlagRemove = (1 << 31);
- // Maximum possible for the offset
- static const uint32 kUserDictOffsetMask = ~(kUserDictOffsetFlagRemove);
- // Bit width for last modified time, from 1 to 16
- static const uint32 kUserDictLMTBitWidth = 16;
- // Granularity for last modified time in second
- static const uint32 kUserDictLMTGranularity = 60 * 60 * 24 * 7;
- // Maximum frequency count
- static const uint16 kUserDictMaxFrequency = 0xFFFF;
-
-#define COARSE_UTC(year, month, day, hour, minute, second) \
- ( \
- (year - 1970) * 365 * 24 * 60 * 60 + \
- (month - 1) * 30 * 24 * 60 * 60 + \
- (day - 1) * 24 * 60 * 60 + \
- (hour - 0) * 60 * 60 + \
- (minute - 0) * 60 + \
- (second - 0) \
- )
- static const uint64 kUserDictLMTSince = COARSE_UTC(2009, 1, 1, 0, 0, 0);
-
- // Correspond to offsets_
- uint32 * scores_;
- // Following two fields are only valid in memory
- uint32 * ids_;
-#ifdef ___PREDICT_ENABLED___
- uint32 * predicts_;
-#endif
-#ifdef ___SYNC_ENABLED___
- uint32 * syncs_;
- size_t sync_count_size_;
-#endif
- uint32 * offsets_by_id_;
-
- size_t lemma_count_left_;
- size_t lemma_size_left_;
-
- const char * dict_file_;
-
- // Be sure size is 4xN
- struct UserDictInfo {
- // When limitation reached, how much percentage will be reclaimed (1 ~ 100)
- uint32 reclaim_ratio;
- // maximum lemma count, 0 means no limitation
- uint32 limit_lemma_count;
- // Maximum lemma size, it's different from
- // whole disk file size or in-mem dict size
- // 0 means no limitation
- uint32 limit_lemma_size;
- // Total lemma count including deleted and inuse
- // Also indicate offsets_ size
- uint32 lemma_count;
- // Total size of lemmas including used and freed
- uint32 lemma_size;
- // Freed lemma count
- uint32 free_count;
- // Freed lemma size in byte
- uint32 free_size;
-#ifdef ___SYNC_ENABLED___
- uint32 sync_count;
-#endif
- int32 total_nfreq;
- } dict_info_;
-
- static const uint32 kUserDictVersion = 0x0ABCDEF0;
-
- static const uint32 kUserDictPreAlloc = 32;
- static const uint32 kUserDictAverageNchar = 8;
-
- enum UserDictState {
- // Keep in order
- USER_DICT_NONE = 0,
- USER_DICT_SYNC,
-#ifdef ___SYNC_ENABLED___
- USER_DICT_SYNC_DIRTY,
-#endif
- USER_DICT_SCORE_DIRTY,
- USER_DICT_OFFSET_DIRTY,
- USER_DICT_LEMMA_DIRTY,
-
- USER_DICT_DEFRAGMENTED,
- } state_;
-
- struct UserDictSearchable {
- uint16 splids_len;
- uint16 splid_start[kMaxLemmaSize];
- uint16 splid_count[kMaxLemmaSize];
- // Compact inital letters for both FuzzyCompareSpellId and cache system
- uint32 signature[kMaxLemmaSize / 4];
- };
-
-#ifdef ___CACHE_ENABLED___
- enum UserDictCacheType {
- USER_DICT_CACHE,
- USER_DICT_MISS_CACHE,
- };
-
- static const int kUserDictCacheSize = 4;
- static const int kUserDictMissCacheSize = kMaxLemmaSize - 1;
-
- struct UserDictMissCache {
- uint32 signatures[kUserDictMissCacheSize][kMaxLemmaSize / 4];
- uint16 head, tail;
- } miss_caches_[kMaxLemmaSize];
-
- struct UserDictCache {
- uint32 signatures[kUserDictCacheSize][kMaxLemmaSize / 4];
- uint32 offsets[kUserDictCacheSize];
- uint32 lengths[kUserDictCacheSize];
- // Ring buffer
- uint16 head, tail;
- } caches_[kMaxLemmaSize];
-
- void cache_init();
-
- void cache_push(UserDictCacheType type,
- UserDictSearchable *searchable,
- uint32 offset, uint32 length);
-
- bool cache_hit(UserDictSearchable *searchable,
- uint32 *offset, uint32 *length);
-
- bool load_cache(UserDictSearchable *searchable,
- uint32 *offset, uint32 *length);
-
- void save_cache(UserDictSearchable *searchable,
- uint32 offset, uint32 length);
-
- void reset_cache();
-
- bool load_miss_cache(UserDictSearchable *searchable);
-
- void save_miss_cache(UserDictSearchable *searchable);
-
- void reset_miss_cache();
-#endif
-
- LmaScoreType translate_score(int f);
-
- int extract_score_freq(int raw_score);
-
- uint64 extract_score_lmt(int raw_score);
-
- inline int build_score(uint64 lmt, int freq);
-
- inline int64 utf16le_atoll(uint16 *s, int len);
-
- inline int utf16le_lltoa(int64 v, uint16 *s, int size);
-
- LemmaIdType _put_lemma(char16 lemma_str[], uint16 splids[],
- uint16 lemma_len, uint16 count, uint64 lmt);
-
- size_t _get_lpis(const uint16 *splid_str, uint16 splid_str_len,
- LmaPsbItem *lpi_items, size_t lpi_max, bool * need_extend);
-
- int _get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
-
- int _get_lemma_score(LemmaIdType lemma_id);
-
- int is_fuzzy_prefix_spell_id(const uint16 * id1, uint16 len1,
- const UserDictSearchable *searchable);
-
- bool is_prefix_spell_id(const uint16 * fullids,
- uint16 fulllen, const UserDictSearchable *searchable);
-
- uint32 get_dict_file_size(UserDictInfo * info);
-
- bool reset(const char *file);
-
- bool validate(const char *file);
-
- bool load(const char *file, LemmaIdType start_id);
-
- bool is_valid_state();
-
- bool is_valid_lemma_id(LemmaIdType id);
-
- LemmaIdType get_max_lemma_id();
-
- void set_lemma_flag(uint32 offset, uint8 flag);
-
- char get_lemma_flag(uint32 offset);
-
- char get_lemma_nchar(uint32 offset);
-
- uint16 * get_lemma_spell_ids(uint32 offset);
-
- uint16 * get_lemma_word(uint32 offset);
-
- // Prepare searchable to fasten locate process
- void prepare_locate(UserDictSearchable *searchable,
- const uint16 * splids, uint16 len);
-
- // Compare initial letters only
- int32 fuzzy_compare_spell_id(const uint16 * id1, uint16 len1,
- const UserDictSearchable *searchable);
-
- // Compare exactly two spell ids
- // First argument must be a full id spell id
- bool equal_spell_id(const uint16 * fullids,
- uint16 fulllen, const UserDictSearchable *searchable);
-
- // Find first item by initial letters
- int32 locate_first_in_offsets(const UserDictSearchable *searchable);
-
- LemmaIdType append_a_lemma(char16 lemma_str[], uint16 splids[],
- uint16 lemma_len, uint16 count, uint64 lmt);
-
- // Check if a lemma is in dictionary
- int32 locate_in_offsets(char16 lemma_str[],
- uint16 splid_str[], uint16 lemma_len);
-
- bool remove_lemma_by_offset_index(int offset_index);
-#ifdef ___PREDICT_ENABLED___
- uint32 locate_where_to_insert_in_predicts(const uint16 * words,
- int lemma_len);
-
- int32 locate_first_in_predicts(const uint16 * words, int lemma_len);
-
- void remove_lemma_from_predict_list(uint32 offset);
-#endif
-#ifdef ___SYNC_ENABLED___
- void queue_lemma_for_sync(LemmaIdType id);
-
- void remove_lemma_from_sync_list(uint32 offset);
-
- void write_back_sync(int fd);
-#endif
- void write_back_score(int fd);
- void write_back_offset(int fd);
- void write_back_lemma(int fd);
- void write_back_all(int fd);
- void write_back();
-
- struct UserDictScoreOffsetPair {
- int score;
- uint32 offset_index;
- };
-
- inline void swap(UserDictScoreOffsetPair * sop, int i, int j);
-
- void shift_down(UserDictScoreOffsetPair * sop, int i, int n);
-
- // On-disk format for each lemma
- // +-------------+
- // | Version (4) |
- // +-------------+
- // +-----------+-----------+--------------------+-------------------+
- // | Spare (1) | Nchar (1) | Splids (2 x Nchar) | Lemma (2 x Nchar) |
- // +-----------+-----------+--------------------+-------------------+
- // ...
- // +-----------------------+ +-------------+ <---Offset of offset
- // | Offset1 by_splids (4) | ... | OffsetN (4) |
- // +-----------------------+ +-------------+
-#ifdef ___PREDICT_ENABLED___
- // +----------------------+ +-------------+
- // | Offset1 by_lemma (4) | ... | OffsetN (4) |
- // +----------------------+ +-------------+
-#endif
- // +------------+ +------------+
- // | Score1 (4) | ... | ScoreN (4) |
- // +------------+ +------------+
-#ifdef ___SYNC_ENABLED___
- // +-------------+ +-------------+
- // | NewAdd1 (4) | ... | NewAddN (4) |
- // +-------------+ +-------------+
-#endif
- // +----------------+
- // | Dict Info (4x) |
- // +----------------+
-};
-}
-
-#endif
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/utf16char.h b/src/virtualkeyboard/3rdparty/pinyin/include/utf16char.h
deleted file mode 100644
index 7e957db5..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/utf16char.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_UTF16CHAR_H__
-#define PINYINIME_INCLUDE_UTF16CHAR_H__
-
-#include <stdlib.h>
-
-namespace ime_pinyin {
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- typedef unsigned short char16;
-
- // Get a token from utf16_str,
- // Returned pointer is a '\0'-terminated utf16 string, or NULL
- // *utf16_str_next returns the next part of the string for further tokenizing
- char16* utf16_strtok(char16 *utf16_str, size_t *token_size,
- char16 **utf16_str_next);
-
- int utf16_atoi(const char16 *utf16_str);
-
- float utf16_atof(const char16 *utf16_str);
-
- size_t utf16_strlen(const char16 *utf16_str);
-
- int utf16_strcmp(const char16 *str1, const char16 *str2);
- int utf16_strncmp(const char16 *str1, const char16 *str2, size_t size);
-
- char16* utf16_strcpy(char16 *dst, const char16 *src);
- char16* utf16_strncpy(char16 *dst, const char16 *src, size_t size);
-
-
- char* utf16_strcpy_tochar(char *dst, const char16 *src);
-
-#ifdef __cplusplus
-}
-#endif
-}
-
-#endif // PINYINIME_INCLUDE_UTF16CHAR_H__
diff --git a/src/virtualkeyboard/3rdparty/pinyin/include/utf16reader.h b/src/virtualkeyboard/3rdparty/pinyin/include/utf16reader.h
deleted file mode 100644
index b6d6719e..00000000
--- a/src/virtualkeyboard/3rdparty/pinyin/include/utf16reader.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef PINYINIME_INCLUDE_UTF16READER_H__
-#define PINYINIME_INCLUDE_UTF16READER_H__
-
-#include <stdio.h>
-#include "./utf16char.h"
-
-namespace ime_pinyin {
-
-class Utf16Reader {
- private:
- FILE *fp_;
- char16 *buffer_;
- size_t buffer_total_len_;
- size_t buffer_next_pos_;
-
- // Always less than buffer_total_len_ - buffer_next_pos_
- size_t buffer_valid_len_;
-
- public:
- Utf16Reader();
- ~Utf16Reader();
-
- // filename is the name of the file to open.
- // buffer_len specifies how long buffer should be allocated to speed up the
- // future reading
- bool open(const char* filename, size_t buffer_len);
- char16* readline(char16* read_buf, size_t max_len);
- bool close();
-};
-}
-
-#endif // PINYINIME_INCLUDE_UTF16READER_H__