From a99d17fa754ea25f2729c934788a1693401657d0 Mon Sep 17 00:00:00 2001 From: Tuomas Tuononen Date: Thu, 26 Nov 2015 12:57:17 +0200 Subject: flite: Move audio output and thread handling to a generic base class - Other engine integrations can later use the common parts - Replace the use of QGlobalStatic with a reference counted singleton object for flite processor instance. This prevents QAudioOutput from crashing on application exit if TTS is still speaking, as QGlobalStatic is destroyed too late. - Implement generic support for pause/resume by blocking audio output Change-Id: I573e9dececb51263db825d53d6a17da175c3e684 Reviewed-by: Jeremy Whiting Reviewed-by: Frederik Gladhorn --- src/plugins/tts/common/qtexttospeechprocessor.cpp | 310 +++++++++++++++++++ src/plugins/tts/common/qtexttospeechprocessor_p.h | 132 ++++++++ src/plugins/tts/flite/flite.pro | 8 +- src/plugins/tts/flite/qtexttospeech_flite.cpp | 331 ++------------------- src/plugins/tts/flite/qtexttospeech_flite.h | 51 +--- .../tts/flite/qtexttospeech_flite_processor.cpp | 203 +++++++++++++ .../tts/flite/qtexttospeech_flite_processor.h | 95 ++++++ 7 files changed, 782 insertions(+), 348 deletions(-) create mode 100644 src/plugins/tts/common/qtexttospeechprocessor.cpp create mode 100644 src/plugins/tts/common/qtexttospeechprocessor_p.h create mode 100644 src/plugins/tts/flite/qtexttospeech_flite_processor.cpp create mode 100644 src/plugins/tts/flite/qtexttospeech_flite_processor.h diff --git a/src/plugins/tts/common/qtexttospeechprocessor.cpp b/src/plugins/tts/common/qtexttospeechprocessor.cpp new file mode 100644 index 0000000..cbb7819 --- /dev/null +++ b/src/plugins/tts/common/qtexttospeechprocessor.cpp @@ -0,0 +1,310 @@ +/**************************************************************************** +** +** Copyright (C) 2015 The Qt Company Ltd. +** Contact: http://www.qt.io/licensing/ +** +** This file is part of the Qt Speech module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL3$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see http://www.qt.io/terms-conditions. For further +** information use the contact form at http://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPLv3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or later as published by the Free +** Software Foundation and appearing in the file LICENSE.GPL included in +** the packaging of this file. Please review the following information to +** ensure the GNU General Public License version 2.0 requirements will be +** met: http://www.gnu.org/licenses/gpl-2.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qtexttospeechprocessor_p.h" + +#include + +QT_BEGIN_NAMESPACE + +QTextToSpeechProcessor::QTextToSpeechProcessor(): + m_stop(true), + m_idle(true), + m_paused(false), + m_rate(0), + m_pitch(0), + m_volume(100), + m_audio(0), + m_audioBuffer(0) +{ +} + +QTextToSpeechProcessor::~QTextToSpeechProcessor() +{ +} + +void QTextToSpeechProcessor::say(const QString &text, int voiceId) +{ + if (isInterruptionRequested()) + return; + QMutexLocker lock(&m_lock); + bool wasPaused = m_paused; + m_stop = true; // Cancel any previous utterance + m_idle = false; + m_paused = false; + m_nextText = text; + m_nextVoice = voiceId; + // If the speech was paused, one signal is needed to release the pause + // and another to start processing the new text. + m_speakSem.release(wasPaused ? 2 : 1); +} + +void QTextToSpeechProcessor::stop() +{ + QMutexLocker lock(&m_lock); + m_stop = true; + m_paused = false; + m_nextText.clear(); + m_speakSem.release(); +} + +void QTextToSpeechProcessor::pause() +{ + QMutexLocker lock(&m_lock); + m_paused = true; + m_speakSem.release(); +} + +void QTextToSpeechProcessor::resume() +{ + QMutexLocker lock(&m_lock); + m_paused = false; + m_speakSem.release(); +} + +bool QTextToSpeechProcessor::setRate(float rate) +{ + QMutexLocker lock(&m_lock); + if (rate >= -1.0 && rate <= 1.0) { + if (updateRate(rate)) { + m_rate = rate; + return true; + } + } + return false; +} + +bool QTextToSpeechProcessor::setPitch(float pitch) +{ + QMutexLocker lock(&m_lock); + if (pitch >= -1.0 && pitch <= 1.0) { + if (updatePitch(pitch)) { + m_pitch = pitch; + return true; + } + } + return false; +} + +bool QTextToSpeechProcessor::setVolume(int volume) +{ + QMutexLocker lock(&m_lock); + if (volume >= 0 && volume <= 100) { + if (updateVolume(volume)) { + m_volume = volume; + return true; + } + } + return false; +} + +bool QTextToSpeechProcessor::isIdle() const +{ + QMutexLocker lock(&m_lock); + return m_idle; +} + +float QTextToSpeechProcessor::rate() const +{ + QMutexLocker lock(&m_lock); + return m_rate; +} + +float QTextToSpeechProcessor::pitch() const +{ + QMutexLocker lock(&m_lock); + return m_pitch; +} + +int QTextToSpeechProcessor::volume() const +{ + QMutexLocker lock(&m_lock); + return m_volume; +} + +void QTextToSpeechProcessor::start(QThread::Priority priority) +{ + QThread::start(priority); +} + +void QTextToSpeechProcessor::exit(int retcode) +{ + QThread::exit(retcode); + QThread::requestInterruption(); + stop(); + if (!QThread::wait(5000)) { + QThread::terminate(); + QThread::wait(); + } +} + +void QTextToSpeechProcessor::run() +{ + int statusCode = 0; + forever { + m_lock.lock(); + if (!m_speakSem.tryAcquire()) { + m_idle = true; + m_lock.unlock(); + emit notSpeaking(statusCode); // Going idle + m_speakSem.acquire(); + m_lock.lock(); + } + if (isInterruptionRequested()) { + if (m_audio) { + delete m_audio; + m_audio = 0; + m_audioBuffer = 0; + } + m_lock.unlock(); + break; + } + m_stop = false; + if (!m_nextText.isEmpty()) { + QString text = m_nextText; + int voice = m_nextVoice; + m_nextText.clear(); + m_lock.unlock(); + statusCode = processText(text, voice); + } else { + m_lock.unlock(); + } + } +} + +bool QTextToSpeechProcessor::audioStart(int sampleRate, int channelCount, QString *errorString) +{ + QMutexLocker lock(&m_lock); + QAudioFormat format; + format.setSampleRate(sampleRate); + format.setChannelCount(channelCount); + format.setSampleSize(16); + format.setSampleType(QAudioFormat::SignedInt); + format.setCodec("audio/pcm"); + if (errorString) + *errorString = QString(); + if (m_audio) + delete m_audio; + m_audio = new QAudioOutput(format); + m_audioBuffer = m_audio->start(); + updateVolume(m_volume); + if (m_audioBuffer && m_audio->state() == QAudio::IdleState) + return true; + if (errorString) + *errorString = QLatin1String("Failed to start audio output (error ") + + QString::number(m_audio->error()) + QLatin1Char(')'); + delete m_audio; + m_audio = 0; + m_audioBuffer = 0; + return false; +} + +bool QTextToSpeechProcessor::audioOutput(const char *data, qint64 dataSize, QString *errorString) +{ + bool ret = true; + int bytesWritten = 0; + QString error; + forever { + m_lock.lock(); + if (m_paused) { + m_audio->suspend(); + do { + m_lock.unlock(); + m_speakSem.acquire(); // Wait for any command + m_lock.lock(); + } while (m_paused); + m_audio->resume(); + } + if (m_stop || !m_audioBuffer + || m_audio->state() == QAudio::StoppedState || isInterruptionRequested()) { + if (m_audio->error() != QAudio::NoError) { + error = QLatin1String("Audio error (") + + QString::number(m_audio->error()) + QLatin1Char(')'); + } + m_lock.unlock(); + ret = false; + break; + } + bytesWritten += m_audioBuffer->write(data + bytesWritten, dataSize - bytesWritten); + m_lock.unlock(); + if (bytesWritten >= dataSize) + break; + QThread::msleep(50); + } + if (errorString) + *errorString = error; + return ret; +} + +void QTextToSpeechProcessor::audioStop(bool abort) +{ + QMutexLocker lock(&m_lock); + if (m_audio) { + if (abort) { + m_audio->reset(); // Discard buffered audio + } else { + // TODO: Find a way to reliably check if all the audio has been written out before stopping + m_audioBuffer->write(QByteArray(1024, 0)); + QThread::msleep(200); + m_audio->stop(); + } + delete m_audio; + m_audio = 0; + m_audioBuffer = 0; + } +} + +bool QTextToSpeechProcessor::updateRate(float rate) +{ + Q_UNUSED(rate); + return true; +} + +bool QTextToSpeechProcessor::updatePitch(float pitch) +{ + Q_UNUSED(pitch); + return true; +} + +bool QTextToSpeechProcessor::updateVolume(int volume) +{ + if (m_audio) + m_audio->setVolume(((qreal)volume) / 100.0); + return true; +} + + +QT_END_NAMESPACE diff --git a/src/plugins/tts/common/qtexttospeechprocessor_p.h b/src/plugins/tts/common/qtexttospeechprocessor_p.h new file mode 100644 index 0000000..094fe73 --- /dev/null +++ b/src/plugins/tts/common/qtexttospeechprocessor_p.h @@ -0,0 +1,132 @@ +/**************************************************************************** +** +** Copyright (C) 2015 The Qt Company Ltd. +** Contact: http://www.qt.io/licensing/ +** +** This file is part of the Qt Speech module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL3$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see http://www.qt.io/terms-conditions. For further +** information use the contact form at http://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPLv3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or later as published by the Free +** Software Foundation and appearing in the file LICENSE.GPL included in +** the packaging of this file. Please review the following information to +** ensure the GNU General Public License version 2.0 requirements will be +** met: http://www.gnu.org/licenses/gpl-2.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QTEXTTOSPEECHPROCESSOR_P_H +#define QTEXTTOSPEECHPROCESSOR_P_H + +#include "qvoice.h" + +#include +#include +#include +#include +#include +#include + +QT_BEGIN_NAMESPACE + +// A common base class for text-to-speech engine integrations +// that require audio output implementation and thread handling. +// +// QAudioOutput is used for audio, and each call to say() cancels +// any previous processing. The public interface is thread-safe. +class QTextToSpeechProcessor : public QThread { + Q_OBJECT + +public: + struct VoiceInfo + { + int id; + QString name; + QString locale; + QVoice::Gender gender; + QVoice::Age age; + }; + QTextToSpeechProcessor(); + ~QTextToSpeechProcessor(); + void say(const QString &text, int voiceId); + void stop(); + void pause(); + void resume(); + bool isIdle() const; + bool setRate(float rate); + bool setPitch(float pitch); + bool setVolume(int volume); + float rate() const; + float pitch() const; + int volume() const; + virtual const QVector &voices() const = 0; + +protected: + // These are re-implemented QThread methods. + // exit() waits until the processor thread finishes or the wait times out. + void start(QThread::Priority = QThread::InheritPriority); + void exit(int retcode = 0); + + // These methods can be used for audio output. + // audioOutput() blocks until all the audio has been written or processing + // is interrupted. + bool audioStart(int sampleRate, int channelCount, QString *errorString = 0); + bool audioOutput(const char* data, qint64 dataSize, QString *errorString = 0); + void audioStop(bool abort = false); + + // These methods should be re-implemented if the parameters need + // to be changed while TTS is speaking. By default, updateVolume() just + // changes the QAudioOutput volume. The other methods do nothing by default. + virtual bool updateRate(float rate); + virtual bool updatePitch(float pitch); + virtual bool updateVolume(int volume); + + // This method is called from the internal processor thread, and should block + // until the given text has been processed or processing is interrupted. + virtual int processText(const QString &text, int voiceId) = 0; + +signals: + // This signal is emitted when the processor goes to idle state, i.e. when no + // new text is set to be spoken. The parameter is the latest return value of + // processText(). As the signal is emitted from the internal thread, the recipient + // should call isIdle() to get updated state. + void notSpeaking(int statusCode); + +private: + void run() Q_DECL_OVERRIDE; + mutable QMutex m_lock; + volatile bool m_stop; + volatile bool m_idle; + volatile bool m_paused; + float m_rate; + float m_pitch; + int m_volume; + QSemaphore m_speakSem; + QString m_nextText; + int m_nextVoice; + QAudioOutput *m_audio; + QIODevice *m_audioBuffer; +}; + +QT_END_NAMESPACE + +#endif diff --git a/src/plugins/tts/flite/flite.pro b/src/plugins/tts/flite/flite.pro index 8f07abf..f80bea0 100644 --- a/src/plugins/tts/flite/flite.pro +++ b/src/plugins/tts/flite/flite.pro @@ -7,11 +7,15 @@ load(qt_plugin) HEADERS += \ qtexttospeech_flite.h \ - qtexttospeech_flite_plugin.h + qtexttospeech_flite_plugin.h \ + qtexttospeech_flite_processor.h \ + ../common/qtexttospeechprocessor_p.h SOURCES += \ qtexttospeech_flite.cpp \ - qtexttospeech_flite_plugin.cpp + qtexttospeech_flite_plugin.cpp \ + qtexttospeech_flite_processor.cpp \ + ../common/qtexttospeechprocessor.cpp OTHER_FILES += \ flite_plugin.json diff --git a/src/plugins/tts/flite/qtexttospeech_flite.cpp b/src/plugins/tts/flite/qtexttospeech_flite.cpp index 86f6974..364727f 100755 --- a/src/plugins/tts/flite/qtexttospeech_flite.cpp +++ b/src/plugins/tts/flite/qtexttospeech_flite.cpp @@ -36,292 +36,19 @@ #include "qtexttospeech_flite.h" -#include -#include -#include -#include -#include -#include -#include - -#include - -// en_US voice: -extern "C" cst_voice *register_cmu_us_kal16(); -extern "C" void unregister_cmu_us_kal16(cst_voice *vox); - QT_BEGIN_NAMESPACE -// Class that handles global Flite initialization and -// creates the processor thread. -class FliteLoader -{ -public: - struct FliteVoiceInfo { - cst_voice *vox; - void (*unregister_func)(cst_voice *vox); - QString name; - QString localeName; - QString gender; - QString age; - }; - FliteLoader() - { - flite_init(); - FliteVoiceInfo voice_enus = { register_cmu_us_kal16(), unregister_cmu_us_kal16, "kal16", "en_US", "male", "adult" }; - if (voice_enus.vox) - m_voices.append(voice_enus); - m_processor = new FliteProcessor(); - QObject::connect(m_processor, &QThread::finished, &QThread::deleteLater); - m_processor->start(); - } - ~FliteLoader() - { - foreach (const FliteLoader::FliteVoiceInfo &voice, m_voices) - voice.unregister_func(voice.vox); - m_processor->exit(); - } - const QVector &voices() const - { - return m_voices; - } - FliteProcessor *processor() const - { - return m_processor; - } -private: - QVector m_voices; - FliteProcessor *m_processor; -}; - -Q_GLOBAL_STATIC(FliteLoader, fliteLoader) - -FliteProcessor::FliteProcessor(): - m_stop(true), - m_idle(true), - m_rate(0), - m_pitch(0), - m_volume(100) -{ -} - -FliteProcessor::~FliteProcessor() -{ -} - -void FliteProcessor::say(cst_voice *voice, const QString &text) -{ - if (isInterruptionRequested()) - return; - QMutexLocker lock(&m_lock); - m_stop = true; // Cancel any previous utterance - m_idle = false; - m_nextText = text; - m_nextVoice = voice; - setRateForVoice(m_nextVoice, m_rate); - setPitchForVoice(m_nextVoice, m_pitch); - m_speakSem.release(); -} - -void FliteProcessor::stop() -{ - QMutexLocker lock(&m_lock); - m_stop = true; - m_nextText.clear(); - m_nextVoice = 0; - m_speakSem.release(); -} - -bool FliteProcessor::setRate(float rate) -{ - QMutexLocker lock(&m_lock); - if (rate >= -1.0 && rate <= 1.0) { - m_rate = rate; - return true; - } - return false; -} - -bool FliteProcessor::setPitch(float pitch) -{ - QMutexLocker lock(&m_lock); - if (pitch >= -1.0 && pitch <= 1.0) { - m_pitch = pitch; - return true; - } - return false; -} - -bool FliteProcessor::setVolume(int volume) -{ - QMutexLocker lock(&m_lock); - if (volume >= 0 && volume <= 100) { - m_volume = volume; - if (m_audio) - m_audio->setVolume(((qreal)m_volume) / 100.0); - return true; - } - return false; -} - -void FliteProcessor::exit() -{ - QThread::exit(); - requestInterruption(); - stop(); -} - -bool FliteProcessor::isIdle() -{ - QMutexLocker lock(&m_lock); - return m_idle; -} - -float FliteProcessor::rate() -{ - QMutexLocker lock(&m_lock); - return m_rate; -} - -float FliteProcessor::pitch() -{ - QMutexLocker lock(&m_lock); - return m_pitch; -} - -int FliteProcessor::volume() -{ - QMutexLocker lock(&m_lock); - return m_volume; -} - -void FliteProcessor::run() -{ - forever { - m_lock.lock(); - if (!m_speakSem.tryAcquire()) { - m_idle = true; - m_lock.unlock(); - emit notSpeaking(); // Going idle - m_speakSem.acquire(); - m_lock.lock(); - } - if (isInterruptionRequested()) { - m_lock.unlock(); - return; - } - m_stop = false; - if (!m_nextText.isEmpty() && m_nextVoice) { - cst_audio_streaming_info *asi; - QString text = m_nextText; - cst_voice *voice = m_nextVoice; - m_nextText.clear(); - m_nextVoice = 0; - m_lock.unlock(); - asi = new_audio_streaming_info(); - asi->asc = FliteProcessor::fliteAudioCb; - asi->userdata = (void *)this; - feat_set(voice->features, "streaming_info", audio_streaming_info_val(asi)); - flite_text_to_speech(text.toUtf8().constData(), voice, "none"); - } else { - m_lock.unlock(); - } - } -} - -void FliteProcessor::setRateForVoice(cst_voice *voice, float rate) -{ - float stretch = 1.0; - Q_ASSERT(rate >= -1.0 && rate <= 1.0); - // Stretch multipliers taken from Speech Dispatcher - if (rate < 0) - stretch -= rate * 2; - if (rate > 0) - stretch -= rate * (100.0 / 175.0); - feat_set_float(voice->features, "duration_stretch", stretch); -} - -void FliteProcessor::setPitchForVoice(cst_voice *voice, float pitch) -{ - float f0; - Q_ASSERT(pitch >= -1.0 && pitch <= 1.0); - // Conversion taken from Speech Dispatcher - f0 = (pitch * 80) + 100; - feat_set_float(voice->features, "int_f0_target_mean", f0); -} - -int FliteProcessor::audioOutput(const cst_wave *w, int start, int size, - int last, cst_audio_streaming_info *asi) -{ - Q_UNUSED(asi); - int ret = CST_AUDIO_STREAM_CONT; - if (start == 0) { - m_lock.lock(); - QAudioFormat format; - format.setSampleRate(w->sample_rate); - format.setChannelCount(w->num_channels); - format.setSampleSize(16); - format.setSampleType(QAudioFormat::SignedInt); - format.setCodec("audio/pcm"); - m_audio = new QAudioOutput(format); - m_audio->setVolume(((qreal)m_volume) / 100.0); - m_audioBuffer = m_audio->start(); - m_lock.unlock(); - } - int bytesToWrite = size * sizeof(short); - int bytesWritten = 0; - forever { - m_lock.lock(); - if (m_stop || !m_audioBuffer - || m_audio->state() == QAudio::StoppedState || isInterruptionRequested()) { - m_lock.unlock(); - ret = CST_AUDIO_STREAM_STOP; - break; - } - bytesWritten += m_audioBuffer->write((const char*)(&w->samples[start + bytesWritten/sizeof(short)]), bytesToWrite - bytesWritten); - m_lock.unlock(); - if (bytesWritten >= bytesToWrite) - break; - QThread::msleep(200); - } - m_lock.lock(); - if (m_stop || last == 1) { - if (m_stop) { - m_audio->reset(); // Discard buffered audio - } else { - // TODO: Find a way to reliably check if all the audio has been written out before stopping - m_audioBuffer->write(QByteArray(1024, 0)); - QThread::msleep(200); - m_audio->stop(); - } - delete m_audio; - m_audio = 0; - m_audioBuffer = 0; - } - m_lock.unlock(); - return ret; -} - -int FliteProcessor::fliteAudioCb(const cst_wave *w, int start, int size, - int last, cst_audio_streaming_info *asi) -{ - FliteProcessor *processor = static_cast(asi->userdata); - if (processor) - return processor->audioOutput(w, start, size, last, asi); - return CST_AUDIO_STREAM_STOP; -} - QTextToSpeechEngineFlite::QTextToSpeechEngineFlite( const QVariantMap ¶meters, QObject *parent) : QTextToSpeechEngine(parent), - m_state(QTextToSpeech::Ready) + m_state(QTextToSpeech::Ready), + m_processor(QTextToSpeechProcessorFlite::instance()) { Q_UNUSED(parameters); } QTextToSpeechEngineFlite::~QTextToSpeechEngineFlite() { - } QVector QTextToSpeechEngineFlite::availableLocales() const @@ -337,48 +64,54 @@ QVector QTextToSpeechEngineFlite::availableVoices() const void QTextToSpeechEngineFlite::say(const QString &text) { int id = QTextToSpeechEngine::voiceData(m_currentVoice).toInt(); - cst_voice *voiceData = fliteLoader()->voices()[id].vox; m_state = QTextToSpeech::Speaking; emit stateChanged(m_state); - fliteLoader()->processor()->say(voiceData, text); + m_processor->say(text, id); } void QTextToSpeechEngineFlite::stop() { - fliteLoader()->processor()->stop(); + m_processor->stop(); m_state = QTextToSpeech::Ready; emit stateChanged(m_state); } void QTextToSpeechEngineFlite::pause() { - // Not supported, just stop: - stop(); + if (m_state == QTextToSpeech::Speaking) { + m_processor->pause(); + m_state = QTextToSpeech::Paused; + emit stateChanged(m_state); + } } void QTextToSpeechEngineFlite::resume() { - + if (m_state == QTextToSpeech::Paused) { + m_processor->resume(); + m_state = QTextToSpeech::Speaking; + emit stateChanged(m_state); + } } double QTextToSpeechEngineFlite::rate() const { - return fliteLoader()->processor()->rate(); + return m_processor->rate(); } bool QTextToSpeechEngineFlite::setRate(double rate) { - return fliteLoader()->processor()->setRate(rate); + return m_processor->setRate(rate); } double QTextToSpeechEngineFlite::pitch() const { - return fliteLoader()->processor()->pitch(); + return m_processor->pitch(); } bool QTextToSpeechEngineFlite::setPitch(double pitch) { - return fliteLoader()->processor()->setPitch(pitch); + return m_processor->setPitch(pitch); } QLocale QTextToSpeechEngineFlite::locale() const @@ -406,12 +139,12 @@ bool QTextToSpeechEngineFlite::setLocale(const QLocale &locale) int QTextToSpeechEngineFlite::volume() const { - return fliteLoader()->processor()->volume(); + return m_processor->volume(); } bool QTextToSpeechEngineFlite::setVolume(int volume) { - return fliteLoader()->processor()->setVolume(volume); + return m_processor->setVolume(volume); } QVoice QTextToSpeechEngineFlite::voice() const @@ -438,14 +171,13 @@ QTextToSpeech::State QTextToSpeechEngineFlite::state() const bool QTextToSpeechEngineFlite::init(QString *errorString) { int i = 0; - QVector voices = fliteLoader()->voices(); - foreach (const FliteLoader::FliteVoiceInfo &fliteVoice, voices) { - QVoice::Age age = QVoice::Other; - QVoice::Gender gender = QVoice::Unknown; - QString name = fliteVoice.name; - QLocale locale(fliteVoice.localeName); - QVoice voice = QTextToSpeechEngine::createVoice(name, gender, age, QVariant(i)); - m_voices.insert(fliteVoice.localeName, voice); + const QVector &voices = m_processor->voices(); + foreach (const QTextToSpeechProcessor::VoiceInfo &voiceInfo, voices) { + QString name = voiceInfo.name; + QLocale locale(voiceInfo.locale); + QVoice voice = QTextToSpeechEngine::createVoice(name, voiceInfo.gender, voiceInfo.age, + QVariant(voiceInfo.id)); + m_voices.insert(voiceInfo.locale, voice); if (!m_locales.contains(locale)) m_locales.append(locale); // Use the first available locale/voice as a fallback @@ -457,16 +189,17 @@ bool QTextToSpeechEngineFlite::init(QString *errorString) } // Attempt to switch to the system locale setLocale(QLocale::system()); - connect(fliteLoader()->processor(), &FliteProcessor::notSpeaking, + connect(m_processor.data(), &QTextToSpeechProcessor::notSpeaking, this, &QTextToSpeechEngineFlite::onNotSpeaking); if (errorString) - *errorString = QStringLiteral(""); + *errorString = QString(); return true; } -void QTextToSpeechEngineFlite::onNotSpeaking() +void QTextToSpeechEngineFlite::onNotSpeaking(int statusCode) { - if (m_state != QTextToSpeech::Ready && fliteLoader()->processor()->isIdle()) { + Q_UNUSED(statusCode); + if (m_state != QTextToSpeech::Ready && m_processor->isIdle()) { m_state = QTextToSpeech::Ready; emit stateChanged(m_state); } diff --git a/src/plugins/tts/flite/qtexttospeech_flite.h b/src/plugins/tts/flite/qtexttospeech_flite.h index ff94bef..311ebbd 100755 --- a/src/plugins/tts/flite/qtexttospeech_flite.h +++ b/src/plugins/tts/flite/qtexttospeech_flite.h @@ -37,63 +37,19 @@ #ifndef QTEXTTOSPEECHENGINE_FLITE_H #define QTEXTTOSPEECHENGINE_FLITE_H +#include "qtexttospeech_flite_processor.h" #include "qtexttospeechengine.h" #include "qvoice.h" #include #include #include -#include -#include -#include -#include -#include +#include #include QT_BEGIN_NAMESPACE -class FliteProcessor : public QThread { - Q_OBJECT - -public: - FliteProcessor(); - ~FliteProcessor(); - void say(cst_voice *voice, const QString &text); - void stop(); - bool setRate(float rate); - bool setPitch(float pitch); - bool setVolume(int volume); - void exit(); - bool isIdle(); - float rate(); - float pitch(); - int volume(); - -signals: - void notSpeaking(); - -private: - QMutex m_lock; - bool m_stop; - bool m_idle; - float m_rate; - float m_pitch; - int m_volume; - QSemaphore m_speakSem; - QString m_nextText; - cst_voice *m_nextVoice; - QAudioOutput *m_audio; - QIODevice *m_audioBuffer; - void run(); - void setRateForVoice(cst_voice *voice, float rate); - void setPitchForVoice(cst_voice *voice, float pitch); - int audioOutput(const cst_wave *w, int start, int size, - int last, cst_audio_streaming_info *asi); - static int fliteAudioCb(const cst_wave *w, int start, int size, - int last, cst_audio_streaming_info *asi); -}; - class QTextToSpeechEngineFlite : public QTextToSpeechEngine { Q_OBJECT @@ -125,10 +81,11 @@ public: bool init(QString *errorString); public slots: - void onNotSpeaking(); + void onNotSpeaking(int statusCode); private: QTextToSpeech::State m_state; + QSharedPointer m_processor; QLocale m_currentLocale; QVector m_locales; QVoice m_currentVoice; diff --git a/src/plugins/tts/flite/qtexttospeech_flite_processor.cpp b/src/plugins/tts/flite/qtexttospeech_flite_processor.cpp new file mode 100644 index 0000000..5cb051d --- /dev/null +++ b/src/plugins/tts/flite/qtexttospeech_flite_processor.cpp @@ -0,0 +1,203 @@ +/**************************************************************************** +** +** Copyright (C) 2015 The Qt Company Ltd. +** Contact: http://www.qt.io/licensing/ +** +** This file is part of the Qt Speech module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL3$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see http://www.qt.io/terms-conditions. For further +** information use the contact form at http://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPLv3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or later as published by the Free +** Software Foundation and appearing in the file LICENSE.GPL included in +** the packaging of this file. Please review the following information to +** ensure the GNU General Public License version 2.0 requirements will be +** met: http://www.gnu.org/licenses/gpl-2.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qtexttospeech_flite_processor.h" +#include "qtexttospeech_flite_plugin.h" + +#include +#include +#include + +#include + +// en_US voice: +extern "C" cst_voice *register_cmu_us_kal16(); +extern "C" void unregister_cmu_us_kal16(cst_voice *vox); + +QT_BEGIN_NAMESPACE + +QWeakPointer QTextToSpeechProcessorFlite::m_instance; +QMutex QTextToSpeechProcessorFlite::m_instanceLock; + +QSharedPointer QTextToSpeechProcessorFlite::instance() +{ + QSharedPointer inst = m_instance.toStrongRef(); + if (inst.isNull()) { + QMutexLocker lock(&m_instanceLock); + inst = m_instance.toStrongRef(); + if (inst.isNull()) { + inst = QSharedPointer(new QTextToSpeechProcessorFlite()); + m_instance = inst; + } + } + Q_ASSERT(inst); + Q_ASSERT(inst == m_instance); + return inst; +} + +QTextToSpeechProcessorFlite::QTextToSpeechProcessorFlite(): + m_initialized(false), + m_currentVoice(-1) +{ + if (init()) { + m_initialized = true; + start(); + } else { + deinit(); + } +} + +QTextToSpeechProcessorFlite::~QTextToSpeechProcessorFlite() +{ + if (m_initialized) { + exit(); + deinit(); + } +} + +const QVector &QTextToSpeechProcessorFlite::voices() const +{ + return m_voices; +} + +int QTextToSpeechProcessorFlite::fliteOutputCb(const cst_wave *w, int start, int size, + int last, cst_audio_streaming_info *asi) +{ + QTextToSpeechProcessorFlite *processor = static_cast(asi->userdata); + if (processor) + return processor->fliteOutput(w, start, size, last, asi); + return CST_AUDIO_STREAM_STOP; +} + +int QTextToSpeechProcessorFlite::fliteOutput(const cst_wave *w, int start, int size, + int last, cst_audio_streaming_info *asi) +{ + Q_UNUSED(asi); + QString errorString; + if (start == 0) { + if (!audioStart(w->sample_rate, w->num_channels, &errorString)) { + if (!errorString.isEmpty()) + qCCritical(lcSpeechTtsFlite) << errorString; + return CST_AUDIO_STREAM_STOP; + } + } + int bytesToWrite = size * sizeof(short); + if (!audioOutput((const char *)(&w->samples[start]), bytesToWrite, &errorString)) { + if (!errorString.isEmpty()) + qCCritical(lcSpeechTtsFlite) << errorString; + audioStop(true); // Abort audio output + return CST_AUDIO_STREAM_STOP; + } + if (last == 1) + audioStop(); + return CST_AUDIO_STREAM_CONT; +} + +int QTextToSpeechProcessorFlite::processText(const QString &text, int voiceId) +{ + qCDebug(lcSpeechTtsFlite) << "processText() begin"; + if (voiceId >= 0 && voiceId < m_fliteVoices.size()) { + const FliteVoice &voiceInfo = m_fliteVoices.at(voiceId); + cst_voice *voice = voiceInfo.vox; + cst_audio_streaming_info *asi = new_audio_streaming_info(); + asi->asc = QTextToSpeechProcessorFlite::fliteOutputCb; + asi->userdata = (void *)this; + feat_set(voice->features, "streaming_info", audio_streaming_info_val(asi)); + setRateForVoice(voice, rate()); + setPitchForVoice(voice, pitch()); + flite_text_to_speech(text.toUtf8().constData(), voice, "none"); + } + qCDebug(lcSpeechTtsFlite) << "processText() end"; + return 0; +} + +void QTextToSpeechProcessorFlite::setRateForVoice(cst_voice *voice, float rate) +{ + float stretch = 1.0; + Q_ASSERT(rate >= -1.0 && rate <= 1.0); + // Stretch multipliers taken from Speech Dispatcher + if (rate < 0) + stretch -= rate * 2; + if (rate > 0) + stretch -= rate * (100.0 / 175.0); + feat_set_float(voice->features, "duration_stretch", stretch); +} + +void QTextToSpeechProcessorFlite::setPitchForVoice(cst_voice *voice, float pitch) +{ + float f0; + Q_ASSERT(pitch >= -1.0 && pitch <= 1.0); + // Conversion taken from Speech Dispatcher + f0 = (pitch * 80) + 100; + feat_set_float(voice->features, "int_f0_target_mean", f0); +} + +bool QTextToSpeechProcessorFlite::init() +{ + flite_init(); + FliteVoice voice_enus = { + register_cmu_us_kal16(), + unregister_cmu_us_kal16, + "kal16", + QLocale(QLocale::English, QLocale::UnitedStates).name(), + QVoice::Male, + QVoice::Adult + }; + m_fliteVoices.append(voice_enus); + + int totalVoiceCount = 0; + foreach (const FliteVoice &voice, m_fliteVoices) { + QTextToSpeechProcessor::VoiceInfo voiceInfo; + voiceInfo.name = voice.name; + voiceInfo.locale = voice.locale; + voiceInfo.age = voice.age; + voiceInfo.gender = voice.gender; + voiceInfo.id = totalVoiceCount; + m_voices.append(voiceInfo); + totalVoiceCount++; + } + return true; +} + +void QTextToSpeechProcessorFlite::deinit() +{ + foreach (const FliteVoice &voice, m_fliteVoices) + voice.unregister_func(voice.vox); + m_fliteVoices.clear(); + m_voices.clear(); +} + +QT_END_NAMESPACE diff --git a/src/plugins/tts/flite/qtexttospeech_flite_processor.h b/src/plugins/tts/flite/qtexttospeech_flite_processor.h new file mode 100644 index 0000000..6eb1263 --- /dev/null +++ b/src/plugins/tts/flite/qtexttospeech_flite_processor.h @@ -0,0 +1,95 @@ +/**************************************************************************** +** +** Copyright (C) 2015 The Qt Company Ltd. +** Contact: http://www.qt.io/licensing/ +** +** This file is part of the Qt Speech module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL3$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see http://www.qt.io/terms-conditions. For further +** information use the contact form at http://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPLv3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or later as published by the Free +** Software Foundation and appearing in the file LICENSE.GPL included in +** the packaging of this file. Please review the following information to +** ensure the GNU General Public License version 2.0 requirements will be +** met: http://www.gnu.org/licenses/gpl-2.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QTEXTTOSPEECHPROCESSOR_FLITE_H +#define QTEXTTOSPEECHPROCESSOR_FLITE_H + +#include "../common/qtexttospeechprocessor_p.h" + +#include "qtexttospeechengine.h" +#include "qvoice.h" + +#include +#include +#include +#include + +#include + +QT_BEGIN_NAMESPACE + +// This is a reference counted singleton class. +// The instance is automatically deleted when no users remain. +class QTextToSpeechProcessorFlite : public QTextToSpeechProcessor { + Q_OBJECT + +public: + static QSharedPointer instance(); + ~QTextToSpeechProcessorFlite(); + const QVector &voices() const Q_DECL_OVERRIDE; + +private: + QTextToSpeechProcessorFlite(); + static int fliteOutputCb(const cst_wave *w, int start, int size, + int last, cst_audio_streaming_info *asi); + int fliteOutput(const cst_wave *w, int start, int size, + int last, cst_audio_streaming_info *asi); + int processText(const QString &text, int voiceId) Q_DECL_OVERRIDE; + void setRateForVoice(cst_voice *voice, float rate); + void setPitchForVoice(cst_voice *voice, float pitch); + bool init(); + void deinit(); + +private: + struct FliteVoice { + cst_voice *vox; + void (*unregister_func)(cst_voice *vox); + QString name; + QString locale; + QVoice::Gender gender; + QVoice::Age age; + }; + static QWeakPointer m_instance; + static QMutex m_instanceLock; + bool m_initialized; + QVector m_voices; + QVector m_fliteVoices; + int m_currentVoice; +}; + +QT_END_NAMESPACE + +#endif -- cgit v1.2.3