aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/hunspell/hunspellinputmethod/hunspellworker.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/hunspell/hunspellinputmethod/hunspellworker.cpp')
-rw-r--r--src/plugins/hunspell/hunspellinputmethod/hunspellworker.cpp527
1 files changed, 493 insertions, 34 deletions
diff --git a/src/plugins/hunspell/hunspellinputmethod/hunspellworker.cpp b/src/plugins/hunspell/hunspellinputmethod/hunspellworker.cpp
index 46cbf49c..6387ee16 100644
--- a/src/plugins/hunspell/hunspellinputmethod/hunspellworker.cpp
+++ b/src/plugins/hunspell/hunspellinputmethod/hunspellworker.cpp
@@ -28,17 +28,308 @@
****************************************************************************/
#include <QtHunspellInputMethod/private/hunspellworker_p.h>
-#include <QLoggingCategory>
#include <QVector>
#include <QTextCodec>
#include <QFileInfo>
#include <QRegularExpression>
#include <QTime>
+#include <QFile>
+#include <QDir>
+#include <QtAlgorithms>
QT_BEGIN_NAMESPACE
namespace QtVirtualKeyboard {
-Q_DECLARE_LOGGING_CATEGORY(lcHunspell)
+HunspellWordList::HunspellWordList(int limit) :
+ _index(0),
+ _limit(limit)
+{
+}
+
+HunspellWordList::HunspellWordList(HunspellWordList &other)
+{
+ *this = other;
+}
+
+HunspellWordList &HunspellWordList::operator=(HunspellWordList &other)
+{
+ if (this != &other) {
+ QMutexLocker guard(&_lock);
+ QMutexLocker otherGuard(&other._lock);
+ _list = other._list;
+ _flags = other._flags;
+ _index = other._index;
+ _limit = other._limit;
+ _searchIndex = other._searchIndex;
+ }
+ return *this;
+}
+
+int HunspellWordList::index() const
+{
+ return _index < _list.size() ? _index : -1;
+}
+
+void HunspellWordList::setIndex(int index)
+{
+ QMutexLocker guard(&_lock);
+ _index = index;
+}
+
+bool HunspellWordList::clear()
+{
+ QMutexLocker guard(&_lock);
+ bool result = !_list.isEmpty();
+ _list.clear();
+ _flags.clear();
+ _index = 0;
+ _searchIndex.clear();
+ return result;
+}
+
+bool HunspellWordList::clearSuggestions()
+{
+ QMutexLocker guard(&_lock);
+ if (_list.isEmpty())
+ return false;
+
+ _searchIndex.clear();
+ if (_list.size() > 1) {
+ QString word = _list.at(0);
+ Flags flags = _flags.at(0);
+ _list.clear();
+ _flags.clear();
+ if (!word.isEmpty()) {
+ _index = 0;
+ _list.append(word);
+ _flags.append(flags);
+ }
+ return true;
+ } else if (_list.at(0).isEmpty()) {
+ _list.clear();
+ _flags.clear();
+ _index = 0;
+ return true;
+ }
+ return false;
+}
+
+bool HunspellWordList::hasSuggestions() const
+{
+ return _list.size() > 1;
+}
+
+int HunspellWordList::size() const
+{
+ return _list.size();
+}
+
+int HunspellWordList::isEmpty() const
+{
+ return _list.isEmpty() || _list.at(0).isEmpty();
+}
+
+bool HunspellWordList::contains(const QString &word)
+{
+ QMutexLocker guard(&_lock);
+
+ // Use index search when the search index is available.
+ // This provides a lot faster search than QList::contains().
+ // Search index is available when it has been rebuilt using
+ // rebuildSearchIndex() method. Search index is automatically
+ // cleared when the word list is modified.
+ if (!_searchIndex.isEmpty()) {
+ Q_ASSERT(_searchIndex.size() == _list.size());
+
+ SearchContext searchContext(word, _list);
+ return std::binary_search(_searchIndex.begin(), _searchIndex.end(), -1, [searchContext](const int &a, const int &b) {
+ const QString &wordA = (a == -1) ? searchContext.word : searchContext.list[a];
+ const QString &wordB = (b == -1) ? searchContext.word : searchContext.list[b];
+ return wordA.compare(wordB, Qt::CaseInsensitive) < 0;
+ });
+ }
+
+ return _list.contains(word, Qt::CaseInsensitive);
+}
+
+QString HunspellWordList::findWordCompletion(const QString &word)
+{
+ QMutexLocker guard(&_lock);
+
+ if (!_searchIndex.isEmpty()) {
+ Q_ASSERT(_searchIndex.size() == _list.size());
+
+ SearchContext searchContext(word, _list);
+ auto match = std::lower_bound(_searchIndex.begin(), _searchIndex.end(), -1, [searchContext](const int &a, const int &b) {
+ const QString &wordA = (a == -1) ? searchContext.word : searchContext.list[a];
+ const QString &wordB = (b == -1) ? searchContext.word : searchContext.list[b];
+ return wordA.compare(wordB, Qt::CaseInsensitive) < 0;
+ });
+
+ if (match == _searchIndex.end())
+ return QString();
+
+ if (!word.compare(_list[*match], Qt::CaseInsensitive)) {
+ match++;
+ if (match == _searchIndex.end())
+ return QString();
+ }
+
+ return _list[*match].startsWith(word, Qt::CaseInsensitive) ? _list[*match] : QString();
+ }
+
+ QString bestMatch;
+ for (int i = 0, count = _list.size(); i < count; ++i) {
+ const QString &wordB(_list[i]);
+ if (wordB.length() > bestMatch.length() &&
+ word.length() < wordB.length() &&
+ wordB.startsWith(word, Qt::CaseInsensitive))
+ bestMatch = wordB;
+ }
+
+ return bestMatch;
+}
+
+int HunspellWordList::indexOfWord(const QString &word)
+{
+ QMutexLocker guard(&_lock);
+
+ if (!_searchIndex.isEmpty()) {
+ Q_ASSERT(_searchIndex.size() == _list.size());
+
+ SearchContext searchContext(word, _list);
+ auto match = std::lower_bound(_searchIndex.begin(), _searchIndex.end(), -1, [searchContext](int a, int b) {
+ const QString &wordA = (a == -1) ? searchContext.word : searchContext.list[a];
+ const QString &wordB = (b == -1) ? searchContext.word : searchContext.list[b];
+ return wordA.compare(wordB, Qt::CaseInsensitive) < 0;
+ });
+ return (match != _searchIndex.end()) ? *match : -1;
+ }
+
+ return _list.indexOf(word);
+}
+
+QString HunspellWordList::wordAt(int index)
+{
+ QMutexLocker guard(&_lock);
+
+ return index >= 0 && index < _list.size() ? _list.at(index) : QString();
+}
+
+void HunspellWordList::wordAt(int index, QString &word, Flags &flags)
+{
+ QMutexLocker guard(&_lock);
+ Q_ASSERT(index >= 0 && index < _list.size());
+
+ word = _list.at(index);
+ flags = _flags.at(index);
+}
+
+const HunspellWordList::Flags &HunspellWordList::wordFlagsAt(int index)
+{
+ QMutexLocker guard(&_lock);
+
+ return _flags[index];
+}
+
+void HunspellWordList::appendWord(const QString &word, const Flags &flags)
+{
+ QMutexLocker guard(&_lock);
+
+ _searchIndex.clear();
+ if (_limit > 0) {
+ while (_list.size() >= _limit) {
+ _list.removeAt(0);
+ _flags.removeAt(0);
+ }
+ }
+ _list.append(word);
+ _flags.append(flags);
+}
+
+void HunspellWordList::insertWord(int index, const QString &word, const Flags &flags)
+{
+ QMutexLocker guard(&_lock);
+ Q_ASSERT(_limit == 0);
+
+ _searchIndex.clear();
+ _list.insert(index, word);
+ _flags.insert(index, flags);
+}
+
+void HunspellWordList::updateWord(int index, const QString &word, const Flags &flags)
+{
+ Q_ASSERT(index >= 0);
+ QMutexLocker guard(&_lock);
+
+ if (index < _list.size()) {
+ if (word != _list[index])
+ _searchIndex.clear();
+ _list[index] = word;
+ _flags[index] = flags;
+ } else {
+ _searchIndex.clear();
+ _list.append(word);
+ _flags.append(flags);
+ }
+}
+
+void HunspellWordList::moveWord(int from, int to)
+{
+ QMutexLocker guard(&_lock);
+
+ if (from < 0 || from >= _list.size())
+ return;
+ if (to < 0 || to >= _list.size())
+ return;
+ if (from == to)
+ return;
+
+ _searchIndex.clear();
+ _list.move(from, to);
+ _flags.move(from, to);
+}
+
+int HunspellWordList::removeWord(const QString &word)
+{
+ QMutexLocker guard(&_lock);
+ int removeCount = 0;
+ for (int i = 0, count = _list.size(); i < count;) {
+ if (!_list[i].compare(word, Qt::CaseInsensitive)) {
+ _list.removeAt(i);
+ _flags.removeAt(i);
+ --count;
+ ++removeCount;
+ } else {
+ ++i;
+ }
+ }
+ if (removeCount > 0)
+ _searchIndex.clear();
+ return removeCount;
+}
+
+void HunspellWordList::removeWordAt(int index)
+{
+ QMutexLocker guard(&_lock);
+
+ _list.removeAt(index);
+}
+
+void HunspellWordList::rebuildSearchIndex()
+{
+ QMutexLocker guard(&_lock);
+ _searchIndex.clear();
+
+ if (_list.isEmpty())
+ return;
+
+ _searchIndex.resize(_list.size());
+ std::iota(_searchIndex.begin(), _searchIndex.end(), 0);
+
+ const QStringList list(_list);
+ std::sort(_searchIndex.begin(), _searchIndex.end(), [list](int a, int b) { return list[a].compare(list[b], Qt::CaseInsensitive) < 0; });
+}
/*!
\class QtVirtualKeyboard::HunspellTask
@@ -69,9 +360,6 @@ void HunspellLoadDictionaryTask::run()
qCDebug(lcHunspell) << "HunspellLoadDictionaryTask::run(): locale:" << locale;
- QTime perf;
- perf.start();
-
if (*hunspellPtr) {
Hunspell_destroy(*hunspellPtr);
*hunspellPtr = nullptr;
@@ -97,13 +385,11 @@ void HunspellLoadDictionaryTask::run()
by the QTextCodec.
*/
if (!QTextCodec::codecForName(Hunspell_get_dic_encoding(*hunspellPtr))) {
- qCWarning(lcHunspell) << "The Hunspell dictionary" << dicPath << "cannot be used because it uses an unknown text codec" << QString(Hunspell_get_dic_encoding(*hunspellPtr));
+ qCWarning(lcHunspell) << "The Hunspell dictionary" << dicPath << "cannot be used because it uses an unknown text codec" << QLatin1String(Hunspell_get_dic_encoding(*hunspellPtr));
Hunspell_destroy(*hunspellPtr);
*hunspellPtr = nullptr;
}
}
-
- qCDebug(lcHunspell) << "HunspellLoadDictionaryTask::run(): time:" << perf.elapsed() << "ms";
} else {
qCWarning(lcHunspell) << "Hunspell dictionary is missing for" << locale << ". Search paths" << searchPaths;
}
@@ -118,11 +404,11 @@ void HunspellLoadDictionaryTask::run()
void HunspellBuildSuggestionsTask::run()
{
- QTime perf;
- perf.start();
+ if (wordList->isEmpty())
+ return;
- wordList->list.append(word);
- wordList->index = 0;
+ wordList->clearSuggestions();
+ QString word = wordList->wordAt(0);
/* Select text codec based on the dictionary encoding.
Hunspell_get_dic_encoding() should always return at least
@@ -138,34 +424,45 @@ void HunspellBuildSuggestionsTask::run()
/* Collect word candidates from the Hunspell suggestions.
Insert word completions in the beginning of the list.
*/
- const int firstWordCompletionIndex = wordList->list.length();
+ const int firstWordCompletionIndex = wordList->size();
int lastWordCompletionIndex = firstWordCompletionIndex;
bool suggestCapitalization = false;
for (int i = 0; i < n; i++) {
QString wordCandidate(textCodec->toUnicode(slst[i]));
- wordCandidate.replace(QChar(0x2019), '\'');
- if (wordCandidate.compare(word) != 0) {
- QString normalizedWordCandidate = removeAccentsAndDiacritics(wordCandidate);
- /* Prioritize word Capitalization */
- if (!suggestCapitalization && !wordCandidate.compare(word, Qt::CaseInsensitive)) {
- wordList->list.insert(1, wordCandidate);
+ wordCandidate.replace(QChar(0x2019), QLatin1Char('\''));
+ QString normalizedWordCandidate = removeAccentsAndDiacritics(wordCandidate);
+ /* Prioritize word Capitalization */
+ if (!wordCandidate.compare(word, Qt::CaseInsensitive)) {
+ if (suggestCapitalization) {
+ bool wordCandidateIsCapital = wordCandidate.at(0).isUpper();
+ bool wordIsCapital = word.at(0).isUpper();
+ if (wordCandidateIsCapital == wordIsCapital) {
+ if (wordCandidateIsCapital)
+ wordCandidate = wordCandidate.toLower();
+ else
+ wordCandidate[0] = wordCandidate.at(0).toUpper();
+ }
+ wordList->insertWord(1, wordCandidate);
lastWordCompletionIndex++;
suggestCapitalization = true;
- /* Prioritize word completions, missing punctuation or missing accents */
- } else if (normalizedWordCandidate.startsWith(word) ||
- wordCandidate.contains(QChar('\''))) {
- wordList->list.insert(lastWordCompletionIndex++, wordCandidate);
- } else {
- wordList->list.append(wordCandidate);
}
+ /* Prioritize word completions, missing punctuation or missing accents */
+ } else if ((normalizedWordCandidate.length() > word.length() &&
+ normalizedWordCandidate.startsWith(word)) ||
+ wordCandidate.contains(QLatin1Char('\''))) {
+ wordList->insertWord(lastWordCompletionIndex++, wordCandidate);
+ } else {
+ wordList->appendWord(wordCandidate);
}
}
/* Prioritize words with missing spaces next to word completions.
*/
- for (int i = lastWordCompletionIndex; i < wordList->list.length(); i++) {
- if (QString(wordList->list.at(i)).replace(" ", "").compare(word) == 0) {
+ for (int i = lastWordCompletionIndex; i < wordList->size(); i++) {
+ QString wordCandidate(wordList->wordAt(i));
+ if (wordCandidate.contains(QLatin1String(" "))) {
+ wordList->updateWord(i, wordCandidate, wordList->wordFlagsAt(i) | HunspellWordList::CompoundWord);
if (i != lastWordCompletionIndex) {
- wordList->list.move(i, lastWordCompletionIndex);
+ wordList->moveWord(i, lastWordCompletionIndex);
}
lastWordCompletionIndex++;
}
@@ -178,21 +475,28 @@ void HunspellBuildSuggestionsTask::run()
which may be suboptimal for the purpose, but gives some clue
how much the suggested word differs from the given word.
*/
- if (autoCorrect && wordList->list.length() > 1 && (!spellCheck(word) || suggestCapitalization)) {
- if (lastWordCompletionIndex > firstWordCompletionIndex || levenshteinDistance(word, wordList->list.at(firstWordCompletionIndex)) < 3)
- wordList->index = firstWordCompletionIndex;
+ if (autoCorrect && wordList->size() > 1 && (!spellCheck(word) || suggestCapitalization)) {
+ if (lastWordCompletionIndex > firstWordCompletionIndex || levenshteinDistance(word, wordList->wordAt(firstWordCompletionIndex)) < 3)
+ wordList->setIndex(firstWordCompletionIndex);
}
}
Hunspell_free_list(hunspell, &slst, n);
- qCDebug(lcHunspell) << "HunspellBuildSuggestionsTask::run(): time:" << perf.elapsed() << "ms";
+ for (int i = 0, count = wordList->size(); i < count; ++i) {
+ HunspellWordList::Flags flags;
+ wordList->wordAt(i, word, flags);
+ if (flags.testFlag(HunspellWordList::CompoundWord))
+ continue;
+ if (Hunspell_spell(hunspell, textCodec->fromUnicode(word).constData()) != 0)
+ wordList->updateWord(i, word, wordList->wordFlagsAt(i) | HunspellWordList::SpellCheckOk);
+ }
}
bool HunspellBuildSuggestionsTask::spellCheck(const QString &word)
{
if (!hunspell)
return false;
- if (word.contains(QRegularExpression("[0-9]")))
+ if (word.contains(QRegularExpression(QLatin1Literal("[0-9]"))))
return true;
return Hunspell_spell(hunspell, textCodec->fromUnicode(word).constData()) != 0;
}
@@ -243,7 +547,140 @@ QString HunspellBuildSuggestionsTask::removeAccentsAndDiacritics(const QString&
void HunspellUpdateSuggestionsTask::run()
{
- emit updateSuggestions(wordList->list, wordList->index);
+ emit updateSuggestions(wordList, tag);
+}
+
+void HunspellAddWordTask::run()
+{
+ const QTextCodec *textCodec;
+ textCodec = QTextCodec::codecForName(Hunspell_get_dic_encoding(hunspell));
+ if (!textCodec)
+ return;
+
+ QString tmpWord;
+ tmpWord.reserve(64);
+ for (int i = 0, count = wordList->size(); i < count; ++i) {
+ const QString word(wordList->wordAt(i));
+ if (word.length() < 2)
+ continue;
+ Hunspell_add(hunspell, textCodec->fromUnicode(word).constData());
+ if (HunspellAddWordTask::alternativeForm(word, tmpWord))
+ Hunspell_add(hunspell, textCodec->fromUnicode(tmpWord).constData());
+ }
+}
+
+bool HunspellAddWordTask::alternativeForm(const QString &word, QString &alternativeForm)
+{
+ if (word.length() < 2)
+ return false;
+ if (!word.mid(1).isLower())
+ return false;
+
+ const QChar initial(word.at(0));
+ const QChar newInitial = initial.isUpper() ? initial.toLower() : initial.toUpper();
+ if (newInitial == initial)
+ return false;
+
+ alternativeForm.truncate(0);
+ alternativeForm.append(word);
+ alternativeForm[0] = newInitial;
+
+ return true;
+}
+
+void HunspellRemoveWordTask::run()
+{
+ const QTextCodec *textCodec;
+ textCodec = QTextCodec::codecForName(Hunspell_get_dic_encoding(hunspell));
+ if (!textCodec)
+ return;
+
+ QString tmpWord;
+ tmpWord.reserve(64);
+ for (int i = 0, count = wordList->size(); i < count; ++i) {
+ const QString word(wordList->wordAt(i));
+ if (word.isEmpty())
+ continue;
+ Hunspell_remove(hunspell, textCodec->fromUnicode(word).constData());
+ if (HunspellAddWordTask::alternativeForm(word, tmpWord))
+ Hunspell_remove(hunspell, textCodec->fromUnicode(tmpWord).constData());
+ }
+}
+
+void HunspellLoadWordListTask::run()
+{
+ wordList->clear();
+
+ QFile inputFile(filePath);
+ if (inputFile.open(QIODevice::ReadOnly | QIODevice::Text)) {
+ QTextStream inStream(&inputFile);
+ inStream.setCodec(QTextCodec::codecForName("UTF-8"));
+ QString word;
+ word.reserve(64);
+ while (inStream.readLineInto(&word)) {
+ if (!word.isEmpty())
+ wordList->appendWord(word);
+ }
+ inputFile.close();
+ }
+}
+
+void HunspellSaveWordListTask::run()
+{
+ QFile outputFile(filePath);
+ if (!QFileInfo::exists(filePath))
+ QDir().mkpath(QFileInfo(filePath).absoluteDir().path());
+ if (outputFile.open(QIODevice::WriteOnly | QIODevice::Text)) {
+ QTextStream outStream(&outputFile);
+ outStream.setCodec(QTextCodec::codecForName("UTF-8"));
+ for (int i = 0, count = wordList->size(); i < count; ++i) {
+ const QString word(wordList->wordAt(i));
+ outStream << word.toUtf8() << '\n';
+ }
+ outputFile.close();
+ }
+}
+
+void HunspellFilterWordTask::run()
+{
+ if (filterList->isEmpty())
+ return;
+
+ filterList->rebuildSearchIndex();
+
+ for (int i = startIndex, count = wordList->size(); i < count;) {
+ if (filterList->contains(wordList->wordAt(i))) {
+ wordList->removeWordAt(i);
+ --count;
+ } else {
+ ++i;
+ }
+ }
+}
+
+void HunspellBoostWordTask::run()
+{
+ if (boostList->isEmpty())
+ return;
+
+ boostList->rebuildSearchIndex();
+
+ const QString word(wordList->wordAt(0));
+ const QString wordCompletion(boostList->findWordCompletion(word));
+ if (!wordCompletion.isEmpty()) {
+ int from = wordList->indexOfWord(wordCompletion);
+ if (from != 1) {
+ int to;
+ for (to = 1; to < wordList->size() && wordList->wordAt(to).startsWith(word); ++to)
+ ;
+ if (from != -1) {
+ if (to < from)
+ wordList->moveWord(from, to);
+ } else {
+ wordList->insertWord(to, wordCompletion, HunspellWordList::SpellCheckOk);
+ }
+ }
+ }
}
/*!
@@ -253,11 +690,13 @@ void HunspellUpdateSuggestionsTask::run()
HunspellWorker::HunspellWorker(QObject *parent) :
QThread(parent),
+ idleSema(),
taskSema(),
taskLock(),
hunspell(nullptr)
{
abort = false;
+ qRegisterMetaType<QSharedPointer<HunspellWordList>>("QSharedPointer<HunspellWordList>");
}
HunspellWorker::~HunspellWorker()
@@ -282,12 +721,30 @@ void HunspellWorker::removeAllTasks()
taskList.clear();
}
+void HunspellWorker::waitForAllTasks()
+{
+ qCDebug(lcHunspell) << "waitForAllTasks enter";
+ while (isRunning()) {
+ idleSema.acquire();
+ QMutexLocker guard(&taskLock);
+ if (taskList.isEmpty()) {
+ idleSema.release();
+ break;
+ }
+ idleSema.release();
+ }
+ qCDebug(lcHunspell) << "waitForAllTasks leave";
+}
+
void HunspellWorker::run()
{
+ QTime perf;
while (!abort) {
+ idleSema.release();
taskSema.acquire();
if (abort)
break;
+ idleSema.acquire();
QSharedPointer<HunspellTask> currentTask;
{
QMutexLocker guard(&taskLock);
@@ -304,7 +761,9 @@ void HunspellWorker::run()
currentTask->hunspell = hunspell;
else
continue;
+ perf.start();
currentTask->run();
+ qCDebug(lcHunspell) << QString(QLatin1String(currentTask->metaObject()->className()) + "::run(): time:").toLatin1().constData() << perf.elapsed() << "ms";
}
}
if (hunspell) {