diff options
Diffstat (limited to 'src/plugins/pinyin/3rdparty/pinyin/share/spellingtrie.cpp')
-rw-r--r-- | src/plugins/pinyin/3rdparty/pinyin/share/spellingtrie.cpp | 831 |
1 files changed, 831 insertions, 0 deletions
diff --git a/src/plugins/pinyin/3rdparty/pinyin/share/spellingtrie.cpp b/src/plugins/pinyin/3rdparty/pinyin/share/spellingtrie.cpp new file mode 100644 index 00000000..355c56be --- /dev/null +++ b/src/plugins/pinyin/3rdparty/pinyin/share/spellingtrie.cpp @@ -0,0 +1,831 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include "../include/dictdef.h" + +#ifdef _WIN32 +#define snprintf _snprintf +#endif + +#ifdef ___BUILD_MODEL___ +#include "../include/spellingtable.h" +#endif + +#include "../include/spellingtrie.h" + +namespace ime_pinyin { + +SpellingTrie* SpellingTrie::instance_ = NULL; + +// z/c/s is for Zh/Ch/Sh +const char SpellingTrie::kHalfId2Sc_[kFullSplIdStart + 1] = + "0ABCcDEFGHIJKLMNOPQRSsTUVWXYZz"; + +// Bit 0 : is it a Shengmu char? +// Bit 1 : is it a Yunmu char? (one char is a Yunmu) +// Bit 2 : is it enabled in ShouZiMu(first char) mode? +unsigned char SpellingTrie::char_flags_[] = { + // a b c d e f g + 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, + // h i j k l m n + 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, + // o p q r s t + 0x02, 0x01, 0x01, 0x01, 0x01, 0x01, + // u v w x y z + 0x00, 0x00, 0x01, 0x01, 0x01, 0x01 +}; + +int compare_spl(const void* p1, const void* p2) { + return strcmp((const char*)(p1), (const char*)(p2)); +} + +SpellingTrie::SpellingTrie() { + spelling_buf_ = NULL; + spelling_size_ = 0; + spelling_num_ = 0; + spl_ym_ids_ = NULL; + splstr_queried_ = NULL; + splstr16_queried_ = NULL; + root_ = NULL; + dumb_node_ = NULL; + splitter_node_ = NULL; + instance_ = NULL; + ym_buf_ = NULL; + f2h_ = NULL; + + szm_enable_shm(true); + szm_enable_ym(true); + +#ifdef ___BUILD_MODEL___ + node_num_ = 0; +#endif +} + +SpellingTrie::~SpellingTrie() { + if (NULL != spelling_buf_) + delete [] spelling_buf_; + + if (NULL != splstr_queried_) + delete [] splstr_queried_; + + if (NULL != splstr16_queried_) + delete [] splstr16_queried_; + + if (NULL != spl_ym_ids_) + delete [] spl_ym_ids_; + + if (NULL != root_) { + free_son_trie(root_); + delete root_; + } + + if (NULL != dumb_node_) { + delete [] dumb_node_; + } + + if (NULL != splitter_node_) { + delete [] splitter_node_; + } + + if (NULL != instance_) { + delete instance_; + instance_ = NULL; + } + + if (NULL != ym_buf_) + delete [] ym_buf_; + + if (NULL != f2h_) + delete [] f2h_; +} + +bool SpellingTrie::if_valid_id_update(uint16 *splid) const { + if (NULL == splid || 0 == *splid) + return false; + + if (*splid >= kFullSplIdStart) + return true; + if (*splid < kFullSplIdStart) { + char ch = kHalfId2Sc_[*splid]; + if (ch > 'Z') { + return true; + } else { + if (szm_is_enabled(ch)) { + return true; + } else if (is_yunmu_char(ch)) { + assert(h2f_num_[*splid] > 0); + *splid = h2f_start_[*splid]; + return true; + } + } + } + return false; +} + +bool SpellingTrie::is_half_id(uint16 splid) const { + if (0 == splid || splid >= kFullSplIdStart) + return false; + + return true; +} + +bool SpellingTrie::is_full_id(uint16 splid) const { + if (splid < kFullSplIdStart || splid >= kFullSplIdStart + spelling_num_) + return false; + return true; +} + +bool SpellingTrie::half_full_compatible(uint16 half_id, uint16 full_id) const { + uint16 half_fr_full = full_to_half(full_id); + + if (half_fr_full == half_id) + return true; + + // &~0x20 is used to conver the char to upper case. + // So that Zh/Ch/Sh(whose char is z/c/s) can be matched with Z/C/S. + char ch_f = (kHalfId2Sc_[half_fr_full] & (~0x20)); + char ch_h = kHalfId2Sc_[half_id]; + if (ch_f == ch_h) + return true; + + return false; +} + +bool SpellingTrie::is_half_id_yunmu(uint16 splid) const { + if (0 == splid || splid >= kFullSplIdStart) + return false; + + char ch = kHalfId2Sc_[splid]; + // If ch >= 'a', that means the half id is one of Zh/Ch/Sh + if (ch >= 'a') { + return false; + } + + return char_flags_[ch - 'A'] & kHalfIdYunmuMask; +} + +bool SpellingTrie::is_shengmu_char(char ch) const { + return char_flags_[ch - 'A'] & kHalfIdShengmuMask; +} + +bool SpellingTrie::is_yunmu_char(char ch) const { + return char_flags_[ch - 'A'] & kHalfIdYunmuMask; +} + +bool SpellingTrie::is_szm_char(char ch) const { + return is_shengmu_char(ch) || is_yunmu_char(ch); +} + +bool SpellingTrie::szm_is_enabled(char ch) const { + return char_flags_[ch - 'A'] & kHalfIdSzmMask; +} + +void SpellingTrie::szm_enable_shm(bool enable) { + if (enable) { + for (char ch = 'A'; ch <= 'Z'; ch++) { + if (is_shengmu_char(ch)) + char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask; + } + } else { + for (char ch = 'A'; ch <= 'Z'; ch++) { + if (is_shengmu_char(ch)) + char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff); + } + } +} + +void SpellingTrie::szm_enable_ym(bool enable) { + if (enable) { + for (char ch = 'A'; ch <= 'Z'; ch++) { + if (is_yunmu_char(ch)) + char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask; + } + } else { + for (char ch = 'A'; ch <= 'Z'; ch++) { + if (is_yunmu_char(ch)) + char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff); + } + } +} + +bool SpellingTrie::is_szm_enabled(char ch) const { + return char_flags_[ch - 'A'] & kHalfIdSzmMask; +} + +const SpellingTrie* SpellingTrie::get_cpinstance() { + return &get_instance(); +} + +SpellingTrie& SpellingTrie::get_instance() { + if (NULL == instance_) + instance_ = new SpellingTrie(); + + return *instance_; +} + +uint16 SpellingTrie::half2full_num(uint16 half_id) const { + if (NULL == root_ || half_id >= kFullSplIdStart) + return 0; + return h2f_num_[half_id]; +} + +uint16 SpellingTrie::half_to_full(uint16 half_id, uint16 *spl_id_start) const { + if (NULL == spl_id_start || NULL == root_ || half_id >= kFullSplIdStart) + return 0; + + *spl_id_start = h2f_start_[half_id]; + return h2f_num_[half_id]; +} + +uint16 SpellingTrie::full_to_half(uint16 full_id) const { + if (NULL == root_ || full_id < kFullSplIdStart || + full_id > spelling_num_ + kFullSplIdStart) + return 0; + + return f2h_[full_id - kFullSplIdStart]; +} + +void SpellingTrie::free_son_trie(SpellingNode* node) { + if (NULL == node) + return; + + for (size_t pos = 0; pos < node->num_of_son; pos++) { + free_son_trie(node->first_son + pos); + } + + if (NULL != node->first_son) + delete [] node->first_son; +} + +bool SpellingTrie::construct(const char* spelling_arr, size_t item_size, + size_t item_num, float score_amplifier, + unsigned char average_score) { + if (spelling_arr == NULL) + return false; + + memset(h2f_start_, 0, sizeof(uint16) * kFullSplIdStart); + memset(h2f_num_, 0, sizeof(uint16) * kFullSplIdStart); + + // If the arr is the same as the buf, means this function is called by + // load_table(), the table data are ready; otherwise the array should be + // saved. + if (spelling_arr != spelling_buf_) { + if (NULL != spelling_buf_) + delete [] spelling_buf_; + spelling_buf_ = new char[item_size * item_num]; + if (NULL == spelling_buf_) + return false; + memcpy(spelling_buf_, spelling_arr, sizeof(char) * item_size * item_num); + } + + spelling_size_ = item_size; + spelling_num_ = item_num; + + score_amplifier_ = score_amplifier; + average_score_ = average_score; + + if (NULL != splstr_queried_) + delete [] splstr_queried_; + splstr_queried_ = new char[spelling_size_]; + if (NULL == splstr_queried_) + return false; + + if (NULL != splstr16_queried_) + delete [] splstr16_queried_; + splstr16_queried_ = new char16[spelling_size_]; + if (NULL == splstr16_queried_) + return false; + + // First, sort the buf to ensure they are in ascendant order + qsort(spelling_buf_, spelling_num_, spelling_size_, compare_spl); + +#ifdef ___BUILD_MODEL___ + node_num_ = 1; +#endif + + root_ = new SpellingNode(); + memset(root_, 0, sizeof(SpellingNode)); + + dumb_node_ = new SpellingNode(); + memset(dumb_node_, 0, sizeof(SpellingNode)); + dumb_node_->score = average_score_; + + splitter_node_ = new SpellingNode(); + memset(splitter_node_, 0, sizeof(SpellingNode)); + splitter_node_->score = average_score_; + + memset(level1_sons_, 0, sizeof(SpellingNode*) * kValidSplCharNum); + + root_->first_son = construct_spellings_subset(0, spelling_num_, 0, root_); + + // Root's score should be cleared. + root_->score = 0; + + if (NULL == root_->first_son) + return false; + + h2f_start_[0] = h2f_num_[0] = 0; + + if (!build_f2h()) + return false; + +#ifdef ___BUILD_MODEL___ + if (kPrintDebug0) { + printf("---SpellingTrie Nodes: %d\n", (int)node_num_); + } + return build_ym_info(); +#else + return true; +#endif +} + +#ifdef ___BUILD_MODEL___ +const char* SpellingTrie::get_ym_str(const char *spl_str) { + bool start_ZCS = false; + if (is_shengmu_char(*spl_str)) { + if ('Z' == *spl_str || 'C' == *spl_str || 'S' == *spl_str) + start_ZCS = true; + spl_str += 1; + if (start_ZCS && 'h' == *spl_str) + spl_str += 1; + } + return spl_str; +} + +bool SpellingTrie::build_ym_info() { + bool sucess; + SpellingTable *spl_table = new SpellingTable(); + + sucess = spl_table->init_table(kMaxPinyinSize - 1, 2 * kMaxYmNum, false); + assert(sucess); + + for (uint16 pos = 0; pos < spelling_num_; pos++) { + const char *spl_str = spelling_buf_ + spelling_size_ * pos; + spl_str = get_ym_str(spl_str); + if ('\0' != spl_str[0]) { + sucess = spl_table->put_spelling(spl_str, 0); + assert(sucess); + } + } + + size_t ym_item_size; // '\0' is included + size_t ym_num; + const char* ym_buf; + ym_buf = spl_table->arrange(&ym_item_size, &ym_num); + + if (NULL != ym_buf_) + delete [] ym_buf_; + ym_buf_ = new char[ym_item_size * ym_num]; + if (NULL == ym_buf_) { + delete spl_table; + return false; + } + + memcpy(ym_buf_, ym_buf, sizeof(char) * ym_item_size * ym_num); + ym_size_ = ym_item_size; + ym_num_ = ym_num; + + delete spl_table; + + // Generate the maping from the spelling ids to the Yunmu ids. + if (spl_ym_ids_) + delete spl_ym_ids_; + spl_ym_ids_ = new uint8[spelling_num_ + kFullSplIdStart]; + if (NULL == spl_ym_ids_) + return false; + + memset(spl_ym_ids_, 0, sizeof(uint8) * (spelling_num_ + kFullSplIdStart)); + + for (uint16 id = 1; id < spelling_num_ + kFullSplIdStart; id++) { + const char *str = get_spelling_str(id); + + str = get_ym_str(str); + if ('\0' != str[0]) { + uint8 ym_id = get_ym_id(str); + spl_ym_ids_[id] = ym_id; + assert(ym_id > 0); + } else { + spl_ym_ids_[id] = 0; + } + } + return true; +} +#endif + +SpellingNode* SpellingTrie::construct_spellings_subset( + size_t item_start, size_t item_end, size_t level, SpellingNode* parent) { + if (level >= spelling_size_ || item_end <= item_start || NULL == parent) + return NULL; + + SpellingNode *first_son = NULL; + uint16 num_of_son = 0; + unsigned char min_son_score = 255; + + const char *spelling_last_start = spelling_buf_ + spelling_size_ * item_start; + char char_for_node = spelling_last_start[level]; + assert((char_for_node >= 'A' && char_for_node <= 'Z') || + 'h' == char_for_node); + + // Scan the array to find how many sons + for (size_t i = item_start + 1; i < item_end; i++) { + const char *spelling_current = spelling_buf_ + spelling_size_ * i; + char char_current = spelling_current[level]; + if (char_current != char_for_node) { + num_of_son++; + char_for_node = char_current; + } + } + num_of_son++; + + // Allocate memory +#ifdef ___BUILD_MODEL___ + node_num_ += num_of_son; +#endif + first_son = new SpellingNode[num_of_son]; + memset(first_son, 0, sizeof(SpellingNode)*num_of_son); + + // Now begin construct tree + size_t son_pos = 0; + + spelling_last_start = spelling_buf_ + spelling_size_ * item_start; + char_for_node = spelling_last_start[level]; + + bool spelling_endable = true; + if (spelling_last_start[level + 1] != '\0') + spelling_endable = false; + + size_t item_start_next = item_start; + + for (size_t i = item_start + 1; i < item_end; i++) { + const char *spelling_current = spelling_buf_ + spelling_size_ * i; + char char_current = spelling_current[level]; + assert(is_valid_spl_char(char_current)); + + if (char_current != char_for_node) { + // Construct a node + SpellingNode *node_current = first_son + son_pos; + node_current->char_this_node = char_for_node; + + // For quick search in the first level + if (0 == level) + level1_sons_[char_for_node - 'A'] = node_current; + + if (spelling_endable) { + node_current->spelling_idx = kFullSplIdStart + item_start_next; + } + + if (spelling_last_start[level + 1] != '\0' || i - item_start_next > 1) { + size_t real_start = item_start_next; + if (spelling_last_start[level + 1] == '\0') + real_start++; + + node_current->first_son = + construct_spellings_subset(real_start, i, level + 1, + node_current); + + if (real_start == item_start_next + 1) { + uint16 score_this = static_cast<unsigned char>( + spelling_last_start[spelling_size_ - 1]); + if (score_this < node_current->score) + node_current->score = score_this; + } + } else { + node_current->first_son = NULL; + node_current->score = static_cast<unsigned char>( + spelling_last_start[spelling_size_ - 1]); + } + + if (node_current->score < min_son_score) + min_son_score = node_current->score; + + bool is_half = false; + if (level == 0 && is_szm_char(char_for_node)) { + node_current->spelling_idx = + static_cast<uint16>(char_for_node - 'A' + 1); + + if (char_for_node > 'C') + node_current->spelling_idx++; + if (char_for_node > 'S') + node_current->spelling_idx++; + + h2f_num_[node_current->spelling_idx] = i - item_start_next; + is_half = true; + } else if (level == 1 && char_for_node == 'h') { + char ch_level0 = spelling_last_start[0]; + uint16 part_id = 0; + if (ch_level0 == 'C') + part_id = 'C' - 'A' + 1 + 1; + else if (ch_level0 == 'S') + part_id = 'S' - 'A' + 1 + 2; + else if (ch_level0 == 'Z') + part_id = 'Z' - 'A' + 1 + 3; + if (0 != part_id) { + node_current->spelling_idx = part_id; + h2f_num_[node_current->spelling_idx] = i - item_start_next; + is_half = true; + } + } + + if (is_half) { + if (h2f_num_[node_current->spelling_idx] > 0) + h2f_start_[node_current->spelling_idx] = + item_start_next + kFullSplIdStart; + else + h2f_start_[node_current->spelling_idx] = 0; + } + + // for next sibling + spelling_last_start = spelling_current; + char_for_node = char_current; + item_start_next = i; + spelling_endable = true; + if (spelling_current[level + 1] != '\0') + spelling_endable = false; + + son_pos++; + } + } + + // the last one + SpellingNode *node_current = first_son + son_pos; + node_current->char_this_node = char_for_node; + + // For quick search in the first level + if (0 == level) + level1_sons_[char_for_node - 'A'] = node_current; + + if (spelling_endable) { + node_current->spelling_idx = kFullSplIdStart + item_start_next; + } + + if (spelling_last_start[level + 1] != '\0' || + item_end - item_start_next > 1) { + size_t real_start = item_start_next; + if (spelling_last_start[level + 1] == '\0') + real_start++; + + node_current->first_son = + construct_spellings_subset(real_start, item_end, level + 1, + node_current); + + if (real_start == item_start_next + 1) { + uint16 score_this = static_cast<unsigned char>( + spelling_last_start[spelling_size_ - 1]); + if (score_this < node_current->score) + node_current->score = score_this; + } + } else { + node_current->first_son = NULL; + node_current->score = static_cast<unsigned char>( + spelling_last_start[spelling_size_ - 1]); + } + + if (node_current->score < min_son_score) + min_son_score = node_current->score; + + assert(son_pos + 1 == num_of_son); + + bool is_half = false; + if (level == 0 && szm_is_enabled(char_for_node)) { + node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1); + + if (char_for_node > 'C') + node_current->spelling_idx++; + if (char_for_node > 'S') + node_current->spelling_idx++; + + h2f_num_[node_current->spelling_idx] = item_end - item_start_next; + is_half = true; + } else if (level == 1 && char_for_node == 'h') { + char ch_level0 = spelling_last_start[0]; + uint16 part_id = 0; + if (ch_level0 == 'C') + part_id = 'C' - 'A' + 1 + 1; + else if (ch_level0 == 'S') + part_id = 'S' - 'A' + 1 + 2; + else if (ch_level0 == 'Z') + part_id = 'Z' - 'A' + 1 + 3; + if (0 != part_id) { + node_current->spelling_idx = part_id; + h2f_num_[node_current->spelling_idx] = item_end - item_start_next; + is_half = true; + } + } + if (is_half) { + if (h2f_num_[node_current->spelling_idx] > 0) + h2f_start_[node_current->spelling_idx] = + item_start_next + kFullSplIdStart; + else + h2f_start_[node_current->spelling_idx] = 0; + } + + parent->num_of_son = num_of_son; + parent->score = min_son_score; + return first_son; +} + +bool SpellingTrie::save_spl_trie(FILE *fp) { + if (NULL == fp || NULL == spelling_buf_) + return false; + + if (fwrite(&spelling_size_, sizeof(uint32), 1, fp) != 1) + return false; + + if (fwrite(&spelling_num_, sizeof(uint32), 1, fp) != 1) + return false; + + if (fwrite(&score_amplifier_, sizeof(float), 1, fp) != 1) + return false; + + if (fwrite(&average_score_, sizeof(unsigned char), 1, fp) != 1) + return false; + + if (fwrite(spelling_buf_, sizeof(char) * spelling_size_, + spelling_num_, fp) != spelling_num_) + return false; + + return true; +} + +bool SpellingTrie::load_spl_trie(QFile *fp) { + if (NULL == fp) + return false; + + if (fp->read((char *)&spelling_size_, sizeof(uint32)) != sizeof(uint32)) + return false; + + if (fp->read((char *)&spelling_num_, sizeof(uint32)) != sizeof(uint32)) + return false; + + if (fp->read((char *)&score_amplifier_, sizeof(float)) != sizeof(float)) + return false; + + if (fp->read((char *)&average_score_, sizeof(unsigned char)) != sizeof(unsigned char)) + return false; + + if (NULL != spelling_buf_) + delete [] spelling_buf_; + + spelling_buf_ = new char[spelling_size_ * spelling_num_]; + if (NULL == spelling_buf_) + return false; + + if (fp->read((char *)spelling_buf_, spelling_size_ * spelling_num_) != spelling_size_ * spelling_num_) + return false; + + return construct(spelling_buf_, spelling_size_, spelling_num_, + score_amplifier_, average_score_); +} + +bool SpellingTrie::build_f2h() { + if (NULL != f2h_) + delete [] f2h_; + f2h_ = new uint16[spelling_num_]; + if (NULL == f2h_) + return false; + + for (uint16 hid = 0; hid < kFullSplIdStart; hid++) { + for (uint16 fid = h2f_start_[hid]; + fid < h2f_start_[hid] + h2f_num_[hid]; fid++) + f2h_[fid - kFullSplIdStart] = hid; + } + + return true; +} + +size_t SpellingTrie::get_spelling_num() { + return spelling_num_; +} + +uint8 SpellingTrie::get_ym_id(const char *ym_str) { + if (NULL == ym_str || NULL == ym_buf_) + return 0; + + for (uint8 pos = 0; pos < ym_num_; pos++) + if (strcmp(ym_buf_ + ym_size_ * pos, ym_str) == 0) + return pos + 1; + + return 0; +} + +const char* SpellingTrie::get_spelling_str(uint16 splid) { + splstr_queried_[0] = '\0'; + + if (splid >= kFullSplIdStart) { + splid -= kFullSplIdStart; + snprintf(splstr_queried_, spelling_size_, "%s", + spelling_buf_ + splid * spelling_size_); + } else { + if (splid == 'C' - 'A' + 1 + 1) { + snprintf(splstr_queried_, spelling_size_, "%s", "Ch"); + } else if (splid == 'S' - 'A' + 1 + 2) { + snprintf(splstr_queried_, spelling_size_, "%s", "Sh"); + } else if (splid == 'Z' - 'A' + 1 + 3) { + snprintf(splstr_queried_, spelling_size_, "%s", "Zh"); + } else { + if (splid > 'C' - 'A' + 1) + splid--; + if (splid > 'S' - 'A' + 1) + splid--; + splstr_queried_[0] = 'A' + splid - 1; + splstr_queried_[1] = '\0'; + } + } + return splstr_queried_; +} + +const char16* SpellingTrie::get_spelling_str16(uint16 splid) { + splstr16_queried_[0] = '\0'; + + if (splid >= kFullSplIdStart) { + splid -= kFullSplIdStart; + for (size_t pos = 0; pos < spelling_size_; pos++) { + splstr16_queried_[pos] = static_cast<char16> + (spelling_buf_[splid * spelling_size_ + pos]); + } + } else { + if (splid == 'C' - 'A' + 1 + 1) { + splstr16_queried_[0] = static_cast<char16>('C'); + splstr16_queried_[1] = static_cast<char16>('h'); + splstr16_queried_[2] = static_cast<char16>('\0'); + } else if (splid == 'S' - 'A' + 1 + 2) { + splstr16_queried_[0] = static_cast<char16>('S'); + splstr16_queried_[1] = static_cast<char16>('h'); + splstr16_queried_[2] = static_cast<char16>('\0'); + } else if (splid == 'Z' - 'A' + 1 + 3) { + splstr16_queried_[0] = static_cast<char16>('Z'); + splstr16_queried_[1] = static_cast<char16>('h'); + splstr16_queried_[2] = static_cast<char16>('\0'); + } else { + if (splid > 'C' - 'A' + 1) + splid--; + if (splid > 'S' - 'A' + 1) + splid--; + splstr16_queried_[0] = 'A' + splid - 1; + splstr16_queried_[1] = '\0'; + } + } + return splstr16_queried_; +} + +size_t SpellingTrie::get_spelling_str16(uint16 splid, char16 *splstr16, + size_t splstr16_len) { + if (NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1) return 0; + + if (splid >= kFullSplIdStart) { + splid -= kFullSplIdStart; + for (size_t pos = 0; pos <= kMaxPinyinSize; pos++) { + splstr16[pos] = static_cast<char16> + (spelling_buf_[splid * spelling_size_ + pos]); + if (static_cast<char16>('\0') == splstr16[pos]) { + return pos; + } + } + } else { + if (splid == 'C' - 'A' + 1 + 1) { + splstr16[0] = static_cast<char16>('C'); + splstr16[1] = static_cast<char16>('h'); + splstr16[2] = static_cast<char16>('\0'); + return 2; + } else if (splid == 'S' - 'A' + 1 + 2) { + splstr16[0] = static_cast<char16>('S'); + splstr16[1] = static_cast<char16>('h'); + splstr16[2] = static_cast<char16>('\0'); + return 2; + } else if (splid == 'Z' - 'A' + 1 + 3) { + splstr16[0] = static_cast<char16>('Z'); + splstr16[1] = static_cast<char16>('h'); + splstr16[2] = static_cast<char16>('\0'); + return 2; + } else { + if (splid > 'C' - 'A' + 1) + splid--; + if (splid > 'S' - 'A' + 1) + splid--; + splstr16[0] = 'A' + splid - 1; + splstr16[1] = '\0'; + return 1; + } + } + + // Not reachable. + return 0; +} + +} // namespace ime_pinyin |