summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTuomas Tuononen <tuomas.tuononen@code-q.fi>2015-09-21 12:20:13 +0300
committerTuomas Tuononen <tuomas.tuononen@code-q.fi>2015-10-16 14:16:29 +0000
commitb728b8fbcd7d6fe501f33f41ffbaac45e6771909 (patch)
treeacf04ea92bfb7362977e480b1e9cd049f24a4170
parentcee89070011a5b0d5f907a6acaf965f7cc1eecd1 (diff)
SpeechRecognition: Add support for resetting engine adaptation state
- Add QSpeechRecognitionEngine::resetAdaptationState() - Add QSpeechRecognitionPluginEngine::resetAdaptationState() - Implement the feature for PocketSphinx Change-Id: I68411064fb99014558be40f9187a453fcc546799 Reviewed-by: Andrew Knight <andrew.knight@intopalo.com>
-rwxr-xr-xsrc/asr/qspeechrecognition.cpp2
-rwxr-xr-xsrc/asr/qspeechrecognition_p.h7
-rwxr-xr-xsrc/asr/qspeechrecognitionengine.cpp16
-rw-r--r--src/asr/qspeechrecognitionengine.h1
-rwxr-xr-xsrc/asr/qspeechrecognitionengine_p.h2
-rwxr-xr-xsrc/asr/qspeechrecognitionmanager.cpp6
-rwxr-xr-xsrc/asr/qspeechrecognitionmanager_p.h1
-rwxr-xr-xsrc/asr/qspeechrecognitionpluginengine.cpp7
-rw-r--r--src/asr/qspeechrecognitionpluginengine.h1
-rwxr-xr-xsrc/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.cpp18
-rwxr-xr-xsrc/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.h3
11 files changed, 64 insertions, 0 deletions
diff --git a/src/asr/qspeechrecognition.cpp b/src/asr/qspeechrecognition.cpp
index ad7c39f..2d3a53d 100755
--- a/src/asr/qspeechrecognition.cpp
+++ b/src/asr/qspeechrecognition.cpp
@@ -343,6 +343,7 @@ QSpeechRecognitionEngine *QSpeechRecognition::createEngine(const QString &name,
return 0; // Already exists
QSpeechRecognitionEngineImpl *engine = new QSpeechRecognitionEngineImpl(name, this);
connect(engine, &QSpeechRecognitionEngineImpl::requestSetParameter, d->m_managerInterface, &QSpeechRecognitionManagerInterface::onSetEngineParameter);
+ connect(engine, &QSpeechRecognitionEngineImpl::requestResetAdaptationState, d->m_managerInterface, &QSpeechRecognitionManagerInterface::onResetEngineAdaptationState);
d->m_engines.insert(name, engine);
emit d->m_managerInterface->createEngine(name, providerName, parameters);
return engine;
@@ -657,6 +658,7 @@ QSpeechRecognitionPrivate::QSpeechRecognitionPrivate():
QObject::connect(m_managerInterface, &QSpeechRecognitionManagerInterface::reset, m_manager, &QSpeechRecognitionManager::reset, Qt::QueuedConnection);
QObject::connect(m_managerInterface, &QSpeechRecognitionManagerInterface::dispatchMessage, m_manager, &QSpeechRecognitionManager::dispatchMessage, Qt::QueuedConnection);
QObject::connect(m_managerInterface, &QSpeechRecognitionManagerInterface::setEngineParameter, m_manager, &QSpeechRecognitionManager::setEngineParameter, Qt::QueuedConnection);
+ QObject::connect(m_managerInterface, &QSpeechRecognitionManagerInterface::resetEngineAdaptationState, m_manager, &QSpeechRecognitionManager::resetEngineAdaptationState, Qt::QueuedConnection);
m_managerThread->start();
}
diff --git a/src/asr/qspeechrecognition_p.h b/src/asr/qspeechrecognition_p.h
index 60f01ab..fad6125 100755
--- a/src/asr/qspeechrecognition_p.h
+++ b/src/asr/qspeechrecognition_p.h
@@ -166,6 +166,12 @@ public slots:
if (engine)
emit setEngineParameter(engine->name(), key, value);
}
+ void onResetEngineAdaptationState()
+ {
+ QSpeechRecognitionEngineImpl *engine = qobject_cast<QSpeechRecognitionEngineImpl *>(QObject::sender());
+ if (engine)
+ emit resetEngineAdaptationState(engine->name());
+ }
signals:
void setSession(int session);
@@ -181,6 +187,7 @@ signals:
void reset();
void dispatchMessage(const QString &engineName, const QString &message, const QVariantMap &parameters);
void setEngineParameter(const QString &engineName, const QString &key, const QVariant &value);
+ void resetEngineAdaptationState(const QString &engineName);
private:
QSpeechRecognitionPrivate *m_speech;
diff --git a/src/asr/qspeechrecognitionengine.cpp b/src/asr/qspeechrecognitionengine.cpp
index 5ecdce6..f7d822c 100755
--- a/src/asr/qspeechrecognitionengine.cpp
+++ b/src/asr/qspeechrecognitionengine.cpp
@@ -250,6 +250,17 @@ Q_DEFINE_ASR_ENGINE_PARAMETER(AudioInputDevices);
\sa supportedParameters()
*/
+/*!
+ \fn void QSpeechRecognitionEngine::resetAdaptationState()
+
+ Reset engine adaptation state to its initial value.
+
+ If the engine has been adapted to user speech or audio path features,
+ the adaptation state is returned where it was after the first initialization,
+ before any audio data was fed to it. Any files that are used for storing the
+ adaptation data are deleted.
+*/
+
QSpeechRecognitionEngine::QSpeechRecognitionEngine(QObject *parent):
QObject(parent)
{
@@ -291,6 +302,11 @@ QVariant QSpeechRecognitionEngineImpl::parameter(const QString &key) const
return m_parameters.value(key);
}
+void QSpeechRecognitionEngineImpl::resetAdaptationState()
+{
+ emit requestResetAdaptationState();
+}
+
bool QSpeechRecognitionEngineImpl::isCreated()
{
return m_created;
diff --git a/src/asr/qspeechrecognitionengine.h b/src/asr/qspeechrecognitionengine.h
index 7681f17..afea163 100644
--- a/src/asr/qspeechrecognitionengine.h
+++ b/src/asr/qspeechrecognitionengine.h
@@ -61,6 +61,7 @@ public:
Q_INVOKABLE virtual bool setParameter(const QString &key, const QVariant &value) = 0;
Q_INVOKABLE virtual QVariant parameter(const QString &key) const = 0;
Q_INVOKABLE virtual QList<QString> supportedParameters() const = 0;
+ Q_INVOKABLE virtual void resetAdaptationState() = 0;
virtual bool isCreated() = 0;
// Common engine parameter keys:
diff --git a/src/asr/qspeechrecognitionengine_p.h b/src/asr/qspeechrecognitionengine_p.h
index 7944865..8a23837 100755
--- a/src/asr/qspeechrecognitionengine_p.h
+++ b/src/asr/qspeechrecognitionengine_p.h
@@ -56,6 +56,7 @@ public:
bool setParameter(const QString &key, const QVariant &value);
QList<QString> supportedParameters() const;
QVariant parameter(const QString &key) const;
+ void resetAdaptationState();
bool isCreated();
// Internal API:
void setCreated(bool created);
@@ -65,6 +66,7 @@ public:
QVariantMap m_parameters;
signals:
void requestSetParameter(const QString &key, const QVariant &value);
+ void requestResetAdaptationState();
};
QT_END_NAMESPACE
diff --git a/src/asr/qspeechrecognitionmanager.cpp b/src/asr/qspeechrecognitionmanager.cpp
index f3738db..687bafe 100755
--- a/src/asr/qspeechrecognitionmanager.cpp
+++ b/src/asr/qspeechrecognitionmanager.cpp
@@ -326,6 +326,12 @@ void QSpeechRecognitionManager::setEngineParameter(const QString &engineName, co
}
}
+void QSpeechRecognitionManager::resetEngineAdaptationState(const QString &engineName)
+{
+ if (QSpeechRecognitionPluginEngine *engine = m_engines.value(engineName, 0))
+ engine->resetAdaptationState();
+}
+
void QSpeechRecognitionManager::onProcess()
{
QSet<QSpeechRecognitionPluginEngine*> engines = m_enginesToProcess;
diff --git a/src/asr/qspeechrecognitionmanager_p.h b/src/asr/qspeechrecognitionmanager_p.h
index 79f561d..86b2b7c 100755
--- a/src/asr/qspeechrecognitionmanager_p.h
+++ b/src/asr/qspeechrecognitionmanager_p.h
@@ -99,6 +99,7 @@ public slots:
void reset();
void dispatchMessage(const QString &engineName, const QString &message, const QVariantMap &parameters);
void setEngineParameter(const QString &engineName, const QString &key, const QVariant &value);
+ void resetEngineAdaptationState(const QString &engineName);
private slots:
void onProcess();
diff --git a/src/asr/qspeechrecognitionpluginengine.cpp b/src/asr/qspeechrecognitionpluginengine.cpp
index 5cdb30b..7bd1f85 100755
--- a/src/asr/qspeechrecognitionpluginengine.cpp
+++ b/src/asr/qspeechrecognitionpluginengine.cpp
@@ -161,6 +161,13 @@ QT_BEGIN_NAMESPACE
if \l stopListening() was called before this method.
*/
+/*! \fn void QSpeechRecognitionPluginEngine::resetAdaptationState()
+
+ Reset engine adaptation state to its initial value.
+
+ See QSpeechRecognitionEngine::resetAdaptationState().
+*/
+
/*! \fn bool QSpeechRecognitionPluginEngine::process()
Performs any run-time tasks the engine needs to do, like audio processing.
diff --git a/src/asr/qspeechrecognitionpluginengine.h b/src/asr/qspeechrecognitionpluginengine.h
index c7dba78..1a90719 100644
--- a/src/asr/qspeechrecognitionpluginengine.h
+++ b/src/asr/qspeechrecognitionpluginengine.h
@@ -71,6 +71,7 @@ public:
virtual void abortListening() = 0;
virtual void unmute(qint64 timestamp) = 0;
virtual void reset() = 0;
+ virtual void resetAdaptationState() = 0;
virtual bool process() = 0;
Q_SIGNALS:
diff --git a/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.cpp b/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.cpp
index 8f6a799..7320b9b 100755
--- a/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.cpp
+++ b/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.cpp
@@ -88,6 +88,7 @@ QSpeechRecognitionEnginePocketSphinx::QSpeechRecognitionEnginePocketSphinx(const
m_debugAudioFile(0),
m_sessionStarted(false),
m_cmnVec(0),
+ m_initialCmnVec(0),
m_cmnSize(0)
{
const QVariantMap &engineParams = QSpeechRecognitionPluginEngine::parameters();
@@ -108,6 +109,7 @@ QSpeechRecognitionEnginePocketSphinx::~QSpeechRecognitionEnginePocketSphinx()
disconnect(&m_inputFileDecoder);
ps_free(m_decoder);
delete[] m_cmnVec;
+ delete[] m_initialCmnVec;
}
bool QSpeechRecognitionEnginePocketSphinx::init(QString *errorString)
@@ -166,6 +168,8 @@ bool QSpeechRecognitionEnginePocketSphinx::init(QString *errorString)
feat_t *feat = ps_get_feat(m_decoder);
m_cmnSize = feat_cepsize(feat);
m_cmnVec = new mfcc_t[m_cmnSize];
+ m_initialCmnVec = new mfcc_t[m_cmnSize];
+ cmn_prior_get(feat->cmn_struct, m_initialCmnVec);
m_cmnFilePath = dataDirectory().absoluteFilePath(QLatin1String("pocketsphinx_")
+ name() + QLatin1String("_cmn"));
// Attempt to load adapted cepstrum means from the data file. The default values are not
@@ -375,6 +379,11 @@ void QSpeechRecognitionEnginePocketSphinx::reset()
m_grammar = 0;
}
+void QSpeechRecognitionEnginePocketSphinx::resetAdaptationState()
+{
+ resetCmn();
+}
+
bool QSpeechRecognitionEnginePocketSphinx::process()
{
if (m_sessionStarted && !m_muted)
@@ -545,4 +554,13 @@ void QSpeechRecognitionEnginePocketSphinx::loadCmn()
dataFile.close();
}
+// Reset cepstrum means to their default values
+void QSpeechRecognitionEnginePocketSphinx::resetCmn()
+{
+ feat_t *feat = ps_get_feat(m_decoder);
+ cmn_prior_set(feat->cmn_struct, m_initialCmnVec);
+ QFile dataFile(m_cmnFilePath);
+ dataFile.remove();
+}
+
QT_END_NAMESPACE
diff --git a/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.h b/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.h
index 899130c..eb13998 100755
--- a/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.h
+++ b/src/plugins/asr/pocketsphinx/qspeechrecognitionengine_pocketsphinx.h
@@ -67,6 +67,7 @@ public:
void abortListening();
void unmute(qint64 timestamp);
void reset();
+ void resetAdaptationState();
bool process();
// Internal API:
@@ -89,6 +90,7 @@ private:
void processAudio(const void *data, size_t dataSize);
void storeCmn();
void loadCmn();
+ void resetCmn();
int m_session;
bool m_muted;
ps_decoder_t *m_decoder;
@@ -104,6 +106,7 @@ private:
bool m_sessionStarted;
QString m_cmnFilePath;
mfcc_t *m_cmnVec;
+ mfcc_t *m_initialCmnVec;
int m_cmnSize;
};