diff options
author | Tuomas Tuononen <tuomas.tuononen@code-q.fi> | 2015-09-21 12:20:13 +0300 |
---|---|---|
committer | Tuomas Tuononen <tuomas.tuononen@code-q.fi> | 2015-10-16 14:16:29 +0000 |
commit | b728b8fbcd7d6fe501f33f41ffbaac45e6771909 (patch) | |
tree | acf04ea92bfb7362977e480b1e9cd049f24a4170 | |
parent | cee89070011a5b0d5f907a6acaf965f7cc1eecd1 (diff) |
SpeechRecognition: Add support for resetting engine adaptation state
- Add QSpeechRecognitionEngine::resetAdaptationState()
- Add QSpeechRecognitionPluginEngine::resetAdaptationState()
- Implement the feature for PocketSphinx
Change-Id: I68411064fb99014558be40f9187a453fcc546799
Reviewed-by: Andrew Knight <andrew.knight@intopalo.com>
-rwxr-xr-x | src/asr/qspeechrecognition.cpp | 2 | ||||
-rwxr-xr-x | src/asr/qspeechrecognition_p.h | 7 | ||||
-rwxr-xr-x | src/asr/qspeechrecognitionengine.cpp | 16 | ||||
-rw-r--r-- | src/asr/qspeechrecognitionengine.h | 1 | ||||
-rwxr-xr-x | src/asr/qspeechrecognitionengine_p.h | 2 | ||||
-rwxr-xr-x | src/asr/qspeechrecognitionmanager.cpp | 6 | ||||
-rwxr-xr-x | src/asr/qspeechrecognitionmanager_p.h | 1 | ||||
-rwxr-xr-x | src/asr/qspeechrecognitionpluginengine.cpp | 7 | ||||
-rw-r--r-- | src/asr/qspeechrecognitionpluginengine.h | 1 | ||||
-rwxr-xr-x | src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.cpp | 18 | ||||
-rwxr-xr-x | src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.h | 3 |
11 files changed, 64 insertions, 0 deletions
diff --git a/src/asr/qspeechrecognition.cpp b/src/asr/qspeechrecognition.cpp index ad7c39f..2d3a53d 100755 --- a/src/asr/qspeechrecognition.cpp +++ b/src/asr/qspeechrecognition.cpp @@ -343,6 +343,7 @@ QSpeechRecognitionEngine *QSpeechRecognition::createEngine(const QString &name, return 0; // Already exists QSpeechRecognitionEngineImpl *engine = new QSpeechRecognitionEngineImpl(name, this); connect(engine, &QSpeechRecognitionEngineImpl::requestSetParameter, d->m_managerInterface, &QSpeechRecognitionManagerInterface::onSetEngineParameter); + connect(engine, &QSpeechRecognitionEngineImpl::requestResetAdaptationState, d->m_managerInterface, &QSpeechRecognitionManagerInterface::onResetEngineAdaptationState); d->m_engines.insert(name, engine); emit d->m_managerInterface->createEngine(name, providerName, parameters); return engine; @@ -657,6 +658,7 @@ QSpeechRecognitionPrivate::QSpeechRecognitionPrivate(): QObject::connect(m_managerInterface, &QSpeechRecognitionManagerInterface::reset, m_manager, &QSpeechRecognitionManager::reset, Qt::QueuedConnection); QObject::connect(m_managerInterface, &QSpeechRecognitionManagerInterface::dispatchMessage, m_manager, &QSpeechRecognitionManager::dispatchMessage, Qt::QueuedConnection); QObject::connect(m_managerInterface, &QSpeechRecognitionManagerInterface::setEngineParameter, m_manager, &QSpeechRecognitionManager::setEngineParameter, Qt::QueuedConnection); + QObject::connect(m_managerInterface, &QSpeechRecognitionManagerInterface::resetEngineAdaptationState, m_manager, &QSpeechRecognitionManager::resetEngineAdaptationState, Qt::QueuedConnection); m_managerThread->start(); } diff --git a/src/asr/qspeechrecognition_p.h b/src/asr/qspeechrecognition_p.h index 60f01ab..fad6125 100755 --- a/src/asr/qspeechrecognition_p.h +++ b/src/asr/qspeechrecognition_p.h @@ -166,6 +166,12 @@ public slots: if (engine) emit setEngineParameter(engine->name(), key, value); } + void onResetEngineAdaptationState() + { + QSpeechRecognitionEngineImpl *engine = qobject_cast<QSpeechRecognitionEngineImpl *>(QObject::sender()); + if (engine) + emit resetEngineAdaptationState(engine->name()); + } signals: void setSession(int session); @@ -181,6 +187,7 @@ signals: void reset(); void dispatchMessage(const QString &engineName, const QString &message, const QVariantMap ¶meters); void setEngineParameter(const QString &engineName, const QString &key, const QVariant &value); + void resetEngineAdaptationState(const QString &engineName); private: QSpeechRecognitionPrivate *m_speech; diff --git a/src/asr/qspeechrecognitionengine.cpp b/src/asr/qspeechrecognitionengine.cpp index 5ecdce6..f7d822c 100755 --- a/src/asr/qspeechrecognitionengine.cpp +++ b/src/asr/qspeechrecognitionengine.cpp @@ -250,6 +250,17 @@ Q_DEFINE_ASR_ENGINE_PARAMETER(AudioInputDevices); \sa supportedParameters() */ +/*! + \fn void QSpeechRecognitionEngine::resetAdaptationState() + + Reset engine adaptation state to its initial value. + + If the engine has been adapted to user speech or audio path features, + the adaptation state is returned where it was after the first initialization, + before any audio data was fed to it. Any files that are used for storing the + adaptation data are deleted. +*/ + QSpeechRecognitionEngine::QSpeechRecognitionEngine(QObject *parent): QObject(parent) { @@ -291,6 +302,11 @@ QVariant QSpeechRecognitionEngineImpl::parameter(const QString &key) const return m_parameters.value(key); } +void QSpeechRecognitionEngineImpl::resetAdaptationState() +{ + emit requestResetAdaptationState(); +} + bool QSpeechRecognitionEngineImpl::isCreated() { return m_created; diff --git a/src/asr/qspeechrecognitionengine.h b/src/asr/qspeechrecognitionengine.h index 7681f17..afea163 100644 --- a/src/asr/qspeechrecognitionengine.h +++ b/src/asr/qspeechrecognitionengine.h @@ -61,6 +61,7 @@ public: Q_INVOKABLE virtual bool setParameter(const QString &key, const QVariant &value) = 0; Q_INVOKABLE virtual QVariant parameter(const QString &key) const = 0; Q_INVOKABLE virtual QList<QString> supportedParameters() const = 0; + Q_INVOKABLE virtual void resetAdaptationState() = 0; virtual bool isCreated() = 0; // Common engine parameter keys: diff --git a/src/asr/qspeechrecognitionengine_p.h b/src/asr/qspeechrecognitionengine_p.h index 7944865..8a23837 100755 --- a/src/asr/qspeechrecognitionengine_p.h +++ b/src/asr/qspeechrecognitionengine_p.h @@ -56,6 +56,7 @@ public: bool setParameter(const QString &key, const QVariant &value); QList<QString> supportedParameters() const; QVariant parameter(const QString &key) const; + void resetAdaptationState(); bool isCreated(); // Internal API: void setCreated(bool created); @@ -65,6 +66,7 @@ public: QVariantMap m_parameters; signals: void requestSetParameter(const QString &key, const QVariant &value); + void requestResetAdaptationState(); }; QT_END_NAMESPACE diff --git a/src/asr/qspeechrecognitionmanager.cpp b/src/asr/qspeechrecognitionmanager.cpp index f3738db..687bafe 100755 --- a/src/asr/qspeechrecognitionmanager.cpp +++ b/src/asr/qspeechrecognitionmanager.cpp @@ -326,6 +326,12 @@ void QSpeechRecognitionManager::setEngineParameter(const QString &engineName, co } } +void QSpeechRecognitionManager::resetEngineAdaptationState(const QString &engineName) +{ + if (QSpeechRecognitionPluginEngine *engine = m_engines.value(engineName, 0)) + engine->resetAdaptationState(); +} + void QSpeechRecognitionManager::onProcess() { QSet<QSpeechRecognitionPluginEngine*> engines = m_enginesToProcess; diff --git a/src/asr/qspeechrecognitionmanager_p.h b/src/asr/qspeechrecognitionmanager_p.h index 79f561d..86b2b7c 100755 --- a/src/asr/qspeechrecognitionmanager_p.h +++ b/src/asr/qspeechrecognitionmanager_p.h @@ -99,6 +99,7 @@ public slots: void reset(); void dispatchMessage(const QString &engineName, const QString &message, const QVariantMap ¶meters); void setEngineParameter(const QString &engineName, const QString &key, const QVariant &value); + void resetEngineAdaptationState(const QString &engineName); private slots: void onProcess(); diff --git a/src/asr/qspeechrecognitionpluginengine.cpp b/src/asr/qspeechrecognitionpluginengine.cpp index 5cdb30b..7bd1f85 100755 --- a/src/asr/qspeechrecognitionpluginengine.cpp +++ b/src/asr/qspeechrecognitionpluginengine.cpp @@ -161,6 +161,13 @@ QT_BEGIN_NAMESPACE if \l stopListening() was called before this method. */ +/*! \fn void QSpeechRecognitionPluginEngine::resetAdaptationState() + + Reset engine adaptation state to its initial value. + + See QSpeechRecognitionEngine::resetAdaptationState(). +*/ + /*! \fn bool QSpeechRecognitionPluginEngine::process() Performs any run-time tasks the engine needs to do, like audio processing. diff --git a/src/asr/qspeechrecognitionpluginengine.h b/src/asr/qspeechrecognitionpluginengine.h index c7dba78..1a90719 100644 --- a/src/asr/qspeechrecognitionpluginengine.h +++ b/src/asr/qspeechrecognitionpluginengine.h @@ -71,6 +71,7 @@ public: virtual void abortListening() = 0; virtual void unmute(qint64 timestamp) = 0; virtual void reset() = 0; + virtual void resetAdaptationState() = 0; virtual bool process() = 0; Q_SIGNALS: diff --git a/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.cpp b/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.cpp index 8f6a799..7320b9b 100755 --- a/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.cpp +++ b/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.cpp @@ -88,6 +88,7 @@ QSpeechRecognitionEnginePocketSphinx::QSpeechRecognitionEnginePocketSphinx(const m_debugAudioFile(0), m_sessionStarted(false), m_cmnVec(0), + m_initialCmnVec(0), m_cmnSize(0) { const QVariantMap &engineParams = QSpeechRecognitionPluginEngine::parameters(); @@ -108,6 +109,7 @@ QSpeechRecognitionEnginePocketSphinx::~QSpeechRecognitionEnginePocketSphinx() disconnect(&m_inputFileDecoder); ps_free(m_decoder); delete[] m_cmnVec; + delete[] m_initialCmnVec; } bool QSpeechRecognitionEnginePocketSphinx::init(QString *errorString) @@ -166,6 +168,8 @@ bool QSpeechRecognitionEnginePocketSphinx::init(QString *errorString) feat_t *feat = ps_get_feat(m_decoder); m_cmnSize = feat_cepsize(feat); m_cmnVec = new mfcc_t[m_cmnSize]; + m_initialCmnVec = new mfcc_t[m_cmnSize]; + cmn_prior_get(feat->cmn_struct, m_initialCmnVec); m_cmnFilePath = dataDirectory().absoluteFilePath(QLatin1String("pocketsphinx_") + name() + QLatin1String("_cmn")); // Attempt to load adapted cepstrum means from the data file. The default values are not @@ -375,6 +379,11 @@ void QSpeechRecognitionEnginePocketSphinx::reset() m_grammar = 0; } +void QSpeechRecognitionEnginePocketSphinx::resetAdaptationState() +{ + resetCmn(); +} + bool QSpeechRecognitionEnginePocketSphinx::process() { if (m_sessionStarted && !m_muted) @@ -545,4 +554,13 @@ void QSpeechRecognitionEnginePocketSphinx::loadCmn() dataFile.close(); } +// Reset cepstrum means to their default values +void QSpeechRecognitionEnginePocketSphinx::resetCmn() +{ + feat_t *feat = ps_get_feat(m_decoder); + cmn_prior_set(feat->cmn_struct, m_initialCmnVec); + QFile dataFile(m_cmnFilePath); + dataFile.remove(); +} + QT_END_NAMESPACE diff --git a/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.h b/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.h index 899130c..eb13998 100755 --- a/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.h +++ b/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.h @@ -67,6 +67,7 @@ public: void abortListening(); void unmute(qint64 timestamp); void reset(); + void resetAdaptationState(); bool process(); // Internal API: @@ -89,6 +90,7 @@ private: void processAudio(const void *data, size_t dataSize); void storeCmn(); void loadCmn(); + void resetCmn(); int m_session; bool m_muted; ps_decoder_t *m_decoder; @@ -104,6 +106,7 @@ private: bool m_sessionStarted; QString m_cmnFilePath; mfcc_t *m_cmnVec; + mfcc_t *m_initialCmnVec; int m_cmnSize; }; |