From 580797d1f27bd4bdcac874ab06d2b67a491b50e3 Mon Sep 17 00:00:00 2001 From: Michal Klocek Date: Tue, 19 Apr 2016 19:32:51 +0200 Subject: Adds qwebengine_convert_dict tool This is a tool which converts hunspell 'dic' dictionaries to binary 'bdic' format, which is required by spellchecker in chromium. Tool is compiled as a qt console application. It reuses linking information produced by gyp. This tool cannot be used in cross-builds due to limitations of the gyp-based build system. Change-Id: Ibee8cbc6048b522c0e4fe22b21c91c649c8515bc Reviewed-by: Joerg Bornemann --- src/core/qtwebengine.gypi | 2 +- src/src.pro | 7 + src/tools/qwebengine_convert_dict/main.cpp | 183 +++++++++++++++++++++ .../qwebengine_convert_dict.pro | 22 +++ 4 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 src/tools/qwebengine_convert_dict/main.cpp create mode 100644 src/tools/qwebengine_convert_dict/qwebengine_convert_dict.pro diff --git a/src/core/qtwebengine.gypi b/src/core/qtwebengine.gypi index 7ed12cadb..4077431b1 100644 --- a/src/core/qtwebengine.gypi +++ b/src/core/qtwebengine.gypi @@ -8,7 +8,7 @@ 'dependencies': [ '<(chromium_src_dir)/base/base.gyp:base', '<(chromium_src_dir)/base/third_party/dynamic_annotations/dynamic_annotations.gyp:dynamic_annotations', - '<(chromium_src_dir)/chrome/tools/convert_dict/convert_dict.gyp:convert_dict', + '<(chromium_src_dir)/chrome/tools/convert_dict/convert_dict.gyp:convert_dict_lib', '<(chromium_src_dir)/components/components.gyp:devtools_discovery', '<(chromium_src_dir)/components/components.gyp:devtools_http_handler', '<(chromium_src_dir)/components/components.gyp:error_page_renderer', diff --git a/src/src.pro b/src/src.pro index 6a6a6abb8..64c1703fe 100644 --- a/src/src.pro +++ b/src/src.pro @@ -17,6 +17,13 @@ SUBDIRS += core \ webengine_experimental_plugin \ plugins +# allow only desktop builds of qwebengine_convert_dict +# osx does not use hunspell +!contains(WEBENGINE_CONFIG, no_spellcheck):!osx:!cross_compile { + SUBDIRS += qwebengine_convert_dict + qwebengine_convert_dict.subdir = tools/qwebengine_convert_dict + qwebengine_convert_dict.depends = core +} isQMLTestSupportApiEnabled() { webengine_testsupport_plugin.subdir = webengine/plugin/testsupport diff --git a/src/tools/qwebengine_convert_dict/main.cpp b/src/tools/qwebengine_convert_dict/main.cpp new file mode 100644 index 000000000..2142b5f0d --- /dev/null +++ b/src/tools/qwebengine_convert_dict/main.cpp @@ -0,0 +1,183 @@ +/****************************************************************************** +** This is just slightly modified version of convert_dict.cc +** chromium/chrome/tools/convert_dict/convert_dict.cc +** +** Original work: +** Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +** Modified work: +** Copyright (C) 2016 The Qt Company Ltd. +** +** Use of this source code is governed by a BSD-style license that can be +** found in the LICENSE file. +** +** This tool converts Hunspell .aff/.dic pairs to a combined binary dictionary +** format (.bdic). This format is more compact, and can be more efficiently +** read by the client application. +** +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +// see also src/core/type_conversion.h +inline base::FilePath::StringType toFilePathString(const QString &str) +{ +#if defined(Q_OS_WIN) + return QDir::toNativeSeparators(str).toStdWString(); +#else + return str.toStdString(); +#endif +} + +inline base::FilePath toFilePath(const QString &str) +{ + return base::FilePath(toFilePathString(str)); +} + +inline QString toQt(const base::string16 &string) +{ +#if defined(OS_WIN) + return QString::fromStdWString(string.data()); +#else + return QString::fromUtf16(string.data()); +#endif +} + +inline QString toQt(const std::string &string) +{ + return QString::fromStdString(string); +} + +// Compares the given word list with the serialized trie to make sure they +// are the same. +inline bool VerifyWords(const convert_dict::DicReader::WordList& org_words, + const std::string& serialized, QTextStream& out) +{ + hunspell::BDictReader reader; + if (!reader.Init(reinterpret_cast(serialized.data()), + serialized.size())) { + out << "BDict is invalid" << endl; + return false; + } + hunspell::WordIterator iter = reader.GetAllWordIterator(); + + int affix_ids[hunspell::BDict::MAX_AFFIXES_PER_WORD]; + + static const int buf_size = 128; + char buf[buf_size]; + for (size_t i = 0; i < org_words.size(); i++) { + int affix_matches = iter.Advance(buf, buf_size, affix_ids); + if (affix_matches == 0) { + out << "Found the end before we expected" << endl; + return false; + } + + if (org_words[i].first != buf) { + out << "Word doesn't match, word #" << buf << endl; + return false; + } + + if (affix_matches != static_cast(org_words[i].second.size())) { + out << "Different number of affix indices, word #" << buf << endl; + return false; + } + + // Check the individual affix indices. + for (size_t affix_index = 0; affix_index < org_words[i].second.size(); + affix_index++) { + if (affix_ids[affix_index] != org_words[i].second[affix_index]) { + out << "Index doesn't match, word #" << buf << endl; + return false; + } + } + } + + return true; +} + +int main(int argc, char *argv[]) +{ + QTextStream out(stdout); + + if (argc != 3) { + QTextStream out(stdout); + out << "Usage: qwebengine_convert_dict \n\nExample:\n" + "qwebengine_convert_dict ./en-US.dic ./en-US.bdic\nwill read en-US.dic, " + "en-US.dic_delta, and en-US.aff from the current directory and generate " + "en-US.bdic\n" << endl; + return 1; + } + + PathService::Override(base::DIR_QT_LIBRARY_DATA, + toFilePath(QLibraryInfo::location(QLibraryInfo::DataPath) % + QLatin1String("/resources"))); + + base::AtExitManager exit_manager; + base::i18n::InitializeICU(); + + base::FilePath file_in_path = toFilePath(argv[1]); + base::FilePath file_out_path = toFilePath(argv[2]); + base::FilePath aff_path = file_in_path.ReplaceExtension(FILE_PATH_LITERAL(".aff")); + + out << "Reading " << toQt(aff_path.value()) << endl; + convert_dict::AffReader aff_reader(aff_path); + + if (!aff_reader.Read()) { + out << "Unable to read the aff file." << endl; + return 1; + } + + base::FilePath dic_path = file_in_path.ReplaceExtension(FILE_PATH_LITERAL(".dic")); + out << "Reading " << toQt(dic_path.value()) << endl; + + // DicReader will also read the .dic_delta file. + convert_dict::DicReader dic_reader(dic_path); + if (!dic_reader.Read(&aff_reader)) { + out << "Unable to read the dic file." << endl; + return 1; + } + + hunspell::BDictWriter writer; + writer.SetComment(aff_reader.comments()); + writer.SetAffixRules(aff_reader.affix_rules()); + writer.SetAffixGroups(aff_reader.GetAffixGroups()); + writer.SetReplacements(aff_reader.replacements()); + writer.SetOtherCommands(aff_reader.other_commands()); + writer.SetWords(dic_reader.words()); + + out << "Serializing..." << endl; + + std::string serialized = writer.GetBDict(); + + out << "Verifying..." << endl; + + if (!VerifyWords(dic_reader.words(), serialized, out)) { + out << "ERROR converting, the dictionary does not check out OK." << endl; + return 1; + } + + out << "Writing " << toQt(file_out_path.value()) << endl; + FILE *out_file = base::OpenFile(file_out_path, "wb"); + if (!out_file) { + out << "ERROR writing file" << endl; + return 1; + } + size_t written = fwrite(&serialized[0], 1, serialized.size(), out_file); + Q_ASSERT(written == serialized.size()); + base::CloseFile(out_file); + out << "Success. Dictionary converted." << endl; + return 0; +} + diff --git a/src/tools/qwebengine_convert_dict/qwebengine_convert_dict.pro b/src/tools/qwebengine_convert_dict/qwebengine_convert_dict.pro new file mode 100644 index 000000000..de125cc76 --- /dev/null +++ b/src/tools/qwebengine_convert_dict/qwebengine_convert_dict.pro @@ -0,0 +1,22 @@ +option(host_build) + +# Look for linking information produced by gyp for our target according to core_generated.gyp +!include($$OUT_PWD/../../core/$$getConfigDir()/QtWebEngineCore_linking.pri) { + error("Could not find the linking information that gyp should have generated.") +} +# remove object files from linking information +OBJECTS = + +# Fixme: -Werror=unused-parameter in core +QMAKE_CXXFLAGS_WARN_ON = + +# Issue with some template compliation, smb smart should look at it +win32: DEFINES += NOMINMAX + +CHROMIUM_SRC_DIR = $$QTWEBENGINE_ROOT/$$getChromiumSrcDir() +INCLUDEPATH += $$CHROMIUM_SRC_DIR + +SOURCES += \ + main.cpp + +load(qt_tool) -- cgit v1.2.3