summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVolker Hilsheimer <volker.hilsheimer@qt.io>2023-01-18 15:04:41 +0100
committerVolker Hilsheimer <volker.hilsheimer@qt.io>2023-02-19 19:36:36 +0100
commitc03afcc297bf250baff8d0693e4db0c8cc77eeed (patch)
tree257c98299d9c94a3c998b13a5ef19a7d5acd1b78
parentea5c48e518789c3387ed9c9d21978eda122e9782 (diff)
Add QTextToSpeech::synthesize to produce PCM data rather than audio
The function starts the synthesis as an asynchronous process, and emits a signal 'synthesized()' (or calls a functor) with a chunk of PCM data as a QByteArray, and the QAudioFormat in which the data is encoded. This requires a dependency to Qt Multimedia for Qt Speech for all platforms; it has so far been required only with flite and winrt backends. Implemented for all engines, except speechd and macos engines where it's not possible - these engines don't provide access to the data. The test case verifies that the implementation is asynchronous, and that it produces a reasonable amount of data. Since this involves timer-based measurements, values need to be compared with some appropriate margins. The QML documentation of this API is omitted on purpose; the QAudioFormat type is not available in QML, and we don't want to encourage users to operate on raw bytes from QML anyway. [ChangeLog][QtTextToSpeech][QTextToSpeech] Added the ability to produce PCM data as a QByteArray. The QtTextToSpeech module now depends on QtMultimedia on all platforms. Fixes: QTBUG-109837 Change-Id: I308a3e18998827089c0f75789b720f1bd36e3c46 Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org> Reviewed-by: Axel Spoerl <axel.spoerl@qt.io>
-rw-r--r--src/plugins/tts/android/jar/src/org/qtproject/qt/android/speech/QtTextToSpeech.java54
-rw-r--r--src/plugins/tts/android/src/CMakeLists.txt1
-rw-r--r--src/plugins/tts/android/src/android_plugin.json3
-rw-r--r--src/plugins/tts/android/src/qtexttospeech_android.cpp80
-rw-r--r--src/plugins/tts/android/src/qtexttospeech_android.h4
-rw-r--r--src/plugins/tts/darwin/CMakeLists.txt1
-rw-r--r--src/plugins/tts/darwin/darwin_plugin.json3
-rw-r--r--src/plugins/tts/darwin/qtexttospeech_darwin.h4
-rw-r--r--src/plugins/tts/darwin/qtexttospeech_darwin.mm77
-rw-r--r--src/plugins/tts/flite/flite_plugin.json3
-rw-r--r--src/plugins/tts/flite/qtexttospeech_flite.cpp9
-rw-r--r--src/plugins/tts/flite/qtexttospeech_flite.h1
-rw-r--r--src/plugins/tts/flite/qtexttospeech_flite_processor.cpp91
-rw-r--r--src/plugins/tts/flite/qtexttospeech_flite_processor.h18
-rw-r--r--src/plugins/tts/macos/CMakeLists.txt1
-rw-r--r--src/plugins/tts/macos/qtexttospeech_macos.h1
-rw-r--r--src/plugins/tts/macos/qtexttospeech_macos.mm5
-rw-r--r--src/plugins/tts/mock/mock_plugin.json1
-rw-r--r--src/plugins/tts/mock/qtexttospeech_mock.cpp17
-rw-r--r--src/plugins/tts/mock/qtexttospeech_mock.h2
-rw-r--r--src/plugins/tts/sapi/qtexttospeech_sapi.cpp119
-rw-r--r--src/plugins/tts/sapi/qtexttospeech_sapi.h2
-rw-r--r--src/plugins/tts/sapi/sapi_plugin.json3
-rw-r--r--src/plugins/tts/speechdispatcher/qtexttospeech_speechd.cpp5
-rw-r--r--src/plugins/tts/speechdispatcher/qtexttospeech_speechd.h1
-rw-r--r--src/plugins/tts/winrt/qtexttospeech_winrt.cpp45
-rw-r--r--src/plugins/tts/winrt/qtexttospeech_winrt.h1
-rw-r--r--src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.cpp4
-rw-r--r--src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.h2
-rw-r--r--src/plugins/tts/winrt/winrt_plugin.json3
-rw-r--r--src/tts/CMakeLists.txt1
-rw-r--r--src/tts/qtexttospeech.cpp145
-rw-r--r--src/tts/qtexttospeech.h74
-rw-r--r--src/tts/qtexttospeech_p.h3
-rw-r--r--src/tts/qtexttospeechengine.h7
-rw-r--r--tests/auto/qtexttospeech/tst_qtexttospeech.cpp184
36 files changed, 920 insertions, 55 deletions
diff --git a/src/plugins/tts/android/jar/src/org/qtproject/qt/android/speech/QtTextToSpeech.java b/src/plugins/tts/android/jar/src/org/qtproject/qt/android/speech/QtTextToSpeech.java
index a8640ef..d14add6 100644
--- a/src/plugins/tts/android/jar/src/org/qtproject/qt/android/speech/QtTextToSpeech.java
+++ b/src/plugins/tts/android/jar/src/org/qtproject/qt/android/speech/QtTextToSpeech.java
@@ -5,6 +5,7 @@ package org.qtproject.qt.android.speech;
import android.content.ContentResolver;
import android.content.Context;
+import android.media.AudioFormat;
import android.provider.Settings;
import android.provider.Settings.SettingNotFoundException;
import android.speech.tts.TextToSpeech;
@@ -15,6 +16,7 @@ import android.os.Build;
import android.os.Bundle;
import android.util.Log;
import java.lang.Float;
+import java.io.File;
import java.util.Locale;
import java.util.List;
import java.util.ArrayList;
@@ -23,12 +25,16 @@ import java.util.Set;
public class QtTextToSpeech
{
private static final String UTTERANCE_ID = "UtteranceId";
+ private static final String SYNTHESIZE_ID = "SynthesizeId";
// Native callback functions
native public void notifyError(long id, long reason);
native public void notifyReady(long id);
native public void notifySpeaking(long id);
native public void notifyRangeStart(long id, int start, int end, int frame);
+ native public void notifyBeginSynthesis(long id, int sampleRateInHz, int audioFormat, int channelCount);
+ native public void notifyAudioAvailable(long id, byte[] bytes);
+ native public void notifyEndSynthesis(long id);
private TextToSpeech mTts;
private final long mId;
@@ -62,6 +68,8 @@ public class QtTextToSpeech
Log.d(utteranceTAG, "onDone");
if (utteranceId.equals(UTTERANCE_ID)) {
notifyReady(mId);
+ } else if (utteranceId.equals(SYNTHESIZE_ID)) {
+ notifyEndSynthesis(mId);
}
}
@@ -96,6 +104,36 @@ public class QtTextToSpeech
notifyRangeStart(mId, start, end, frame);
}
}
+
+ @Override
+ public void onBeginSynthesis(String utteranceId, int sampleRateInHz, int audioFormat, int channelCount) {
+ Log.d(utteranceTAG, "onBeginSynthesis");
+ if (utteranceId.equals(SYNTHESIZE_ID)) {
+ switch (audioFormat) {
+ case AudioFormat.ENCODING_PCM_8BIT:
+ audioFormat = 1; // QAudioFormat::UInt8
+ break;
+ case AudioFormat.ENCODING_PCM_16BIT:
+ audioFormat = 2; // QAudioFormat::Int16;
+ break;
+ case AudioFormat.ENCODING_PCM_FLOAT:
+ audioFormat = 4; // QAudioFormat::Float;
+ break;
+ default:
+ audioFormat = 0; // QAudioFormat::Unknown;
+ }
+
+ notifyBeginSynthesis(mId, sampleRateInHz, audioFormat, channelCount);
+ }
+ }
+
+ @Override
+ public void onAudioAvailable(String utteranceId, byte[] bytes) {
+ Log.d(utteranceTAG, "onAudioAvailable");
+ if (utteranceId.equals(SYNTHESIZE_ID)) {
+ notifyAudioAvailable(mId, bytes);
+ }
+ }
};
QtTextToSpeech(final Context context, final long id, String engine) {
@@ -139,6 +177,22 @@ public class QtTextToSpeech
notifyError(mId, 3); // QTextToSpeech::ErrorReason::Input
}
+ public int synthesize(String text)
+ {
+ Log.d(TAG, "TTS synthesize(): " + text);
+ int result = -1;
+
+ Bundle params = new Bundle();
+ params.putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, mVolume);
+ File file = new File("/dev/null");
+ result = mTts.synthesizeToFile(text, params, file, SYNTHESIZE_ID);
+
+ Log.d(TAG, "TTS synthesize() result: " + Integer.toString(result));
+ if (result == TextToSpeech.ERROR)
+ notifyError(mId, 3); // QTextToSpeech::ErrorReason::Input
+ return -1;
+ }
+
public void stop()
{
Log.d(TAG, "Stopping TTS");
diff --git a/src/plugins/tts/android/src/CMakeLists.txt b/src/plugins/tts/android/src/CMakeLists.txt
index 6ba9172..538a435 100644
--- a/src/plugins/tts/android/src/CMakeLists.txt
+++ b/src/plugins/tts/android/src/CMakeLists.txt
@@ -11,6 +11,7 @@ qt_internal_add_plugin(QTextToSpeechEngineAndroid
Qt::Core
Qt::CorePrivate
Qt::TextToSpeech
+ Qt::Multimedia
)
add_dependencies(QTextToSpeechEngineAndroid QtAndroidTextToSpeech)
diff --git a/src/plugins/tts/android/src/android_plugin.json b/src/plugins/tts/android/src/android_plugin.json
index 099e0a8..d478826 100644
--- a/src/plugins/tts/android/src/android_plugin.json
+++ b/src/plugins/tts/android/src/android_plugin.json
@@ -5,6 +5,7 @@
"Priority": 100,
"Capabilities": [
"Speak",
- "WordByWordProgress"
+ "WordByWordProgress",
+ "Synthesize"
]
}
diff --git a/src/plugins/tts/android/src/qtexttospeech_android.cpp b/src/plugins/tts/android/src/qtexttospeech_android.cpp
index b627385..3c2eae8 100644
--- a/src/plugins/tts/android/src/qtexttospeech_android.cpp
+++ b/src/plugins/tts/android/src/qtexttospeech_android.cpp
@@ -67,6 +67,58 @@ static void notifyRangeStart(JNIEnv *env, jobject thiz, jlong id, jint start, ji
}
Q_DECLARE_JNI_NATIVE_METHOD(notifyRangeStart)
+static void notifyBeginSynthesis(JNIEnv *env, jobject thiz, jlong id, int sampleRateInHz, int audioFormat, int channelCount)
+{
+ Q_UNUSED(env);
+ Q_UNUSED(thiz);
+
+ QTextToSpeechEngineAndroid *const tts = (*textToSpeechMap)[id];
+ if (!tts)
+ return;
+
+ QAudioFormat format;
+ format.setSampleRate(sampleRateInHz);
+ format.setSampleFormat(QAudioFormat::SampleFormat(audioFormat));
+ format.setChannelCount(channelCount);
+
+ QMetaObject::invokeMethod(tts, "processNotifyBeginSynthesis", Qt::AutoConnection,
+ Q_ARG(QAudioFormat, format));
+}
+Q_DECLARE_JNI_NATIVE_METHOD(notifyBeginSynthesis)
+
+static void notifyAudioAvailable(JNIEnv *env, jobject thiz, jlong id, jbyteArray bytes)
+{
+ Q_UNUSED(thiz);
+
+ QTextToSpeechEngineAndroid *const tts = (*textToSpeechMap)[id];
+ if (!tts)
+ return;
+
+ const auto sz = env->GetArrayLength(bytes);
+ QByteArray byteArray(sz, Qt::Initialization::Uninitialized);
+ env->GetByteArrayRegion(bytes, 0, sz, reinterpret_cast<jbyte *>(byteArray.data()));
+
+ QMetaObject::invokeMethod(tts, "processNotifyAudioAvailable", Qt::AutoConnection,
+ Q_ARG(QByteArray, byteArray));
+}
+Q_DECLARE_JNI_NATIVE_METHOD(notifyAudioAvailable)
+
+static void notifyEndSynthesis(JNIEnv *env, jobject thiz, jlong id)
+{
+ Q_UNUSED(env);
+ Q_UNUSED(thiz);
+
+ QTextToSpeechEngineAndroid *const tts = (*textToSpeechMap)[id];
+ if (!tts)
+ return;
+
+ // Queued so that pending processNotifyAudioAvailable
+ // invocations get processed first.
+ QMetaObject::invokeMethod(tts, "processNotifyReady", Qt::QueuedConnection);
+}
+Q_DECLARE_JNI_NATIVE_METHOD(notifyEndSynthesis)
+
+
Q_DECL_EXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, void */*reserved*/)
{
static bool initialized = false;
@@ -95,6 +147,9 @@ Q_DECL_EXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, void */*reserved*/)
Q_JNI_NATIVE_METHOD(notifyReady),
Q_JNI_NATIVE_METHOD(notifySpeaking),
Q_JNI_NATIVE_METHOD(notifyRangeStart),
+ Q_JNI_NATIVE_METHOD(notifyBeginSynthesis),
+ Q_JNI_NATIVE_METHOD(notifyAudioAvailable),
+ Q_JNI_NATIVE_METHOD(notifyEndSynthesis),
})) {
return JNI_ERR;
}
@@ -134,6 +189,28 @@ void QTextToSpeechEngineAndroid::say(const QString &text)
m_speech.callMethod<void>("say", QJniObject::fromString(m_text).object<jstring>());
}
+void QTextToSpeechEngineAndroid::synthesize(const QString &text)
+{
+ if (text.isEmpty())
+ return;
+
+ m_errorReason = QTextToSpeech::ErrorReason::NoError;
+ m_text = text;
+ m_speech.callMethod<int>("synthesize", QJniObject::fromString(m_text).object<jstring>());
+}
+
+void QTextToSpeechEngineAndroid::processNotifyBeginSynthesis(const QAudioFormat &format)
+{
+ m_format = format;
+ setState(QTextToSpeech::Synthesizing);
+}
+
+void QTextToSpeechEngineAndroid::processNotifyAudioAvailable(const QByteArray &bytes)
+{
+ Q_ASSERT(m_format.isValid());
+ emit synthesized(m_format, bytes);
+}
+
QTextToSpeech::State QTextToSpeechEngineAndroid::state() const
{
return m_state;
@@ -179,6 +256,8 @@ void QTextToSpeechEngineAndroid::setError(QTextToSpeech::ErrorReason reason, con
void QTextToSpeechEngineAndroid::processNotifyReady()
{
+ if (m_state == QTextToSpeech::Synthesizing)
+ m_format = {};
if (m_state != QTextToSpeech::Paused)
setState(QTextToSpeech::Ready);
}
@@ -232,6 +311,7 @@ void QTextToSpeechEngineAndroid::stop(QTextToSpeech::BoundaryHint boundaryHint)
m_speech.callMethod<void>("stop");
setState(QTextToSpeech::Ready);
+ m_format = {};
}
void QTextToSpeechEngineAndroid::pause(QTextToSpeech::BoundaryHint boundaryHint)
diff --git a/src/plugins/tts/android/src/qtexttospeech_android.h b/src/plugins/tts/android/src/qtexttospeech_android.h
index 9569a8f..c86ffb1 100644
--- a/src/plugins/tts/android/src/qtexttospeech_android.h
+++ b/src/plugins/tts/android/src/qtexttospeech_android.h
@@ -26,6 +26,7 @@ public:
QList<QLocale> availableLocales() const override;
QList<QVoice> availableVoices() const override;
void say(const QString &text) override;
+ void synthesize(const QString &text) override;
void stop(QTextToSpeech::BoundaryHint boundaryHint) override;
void pause(QTextToSpeech::BoundaryHint boundaryHint) override;
void resume() override;
@@ -48,6 +49,8 @@ public Q_SLOTS:
void processNotifyError(int reason);
void processNotifySpeaking();
void processNotifyRangeStart(int start, int end, int frame);
+ void processNotifyBeginSynthesis(const QAudioFormat &format);
+ void processNotifyAudioAvailable(const QByteArray &bytes);
private:
void setState(QTextToSpeech::State state);
@@ -59,6 +62,7 @@ private:
QTextToSpeech::ErrorReason m_errorReason = QTextToSpeech::ErrorReason::Initialization;
QString m_errorString;
QString m_text;
+ QAudioFormat m_format;
};
Q_DECLARE_JNI_CLASS(QtTextToSpeech, "org/qtproject/qt/android/speech/QtTextToSpeech")
diff --git a/src/plugins/tts/darwin/CMakeLists.txt b/src/plugins/tts/darwin/CMakeLists.txt
index 9c2cf2f..c16e8f0 100644
--- a/src/plugins/tts/darwin/CMakeLists.txt
+++ b/src/plugins/tts/darwin/CMakeLists.txt
@@ -13,5 +13,6 @@ qt_internal_add_plugin(QTextToSpeechDarwinPlugin
Qt::Core
Qt::Gui
Qt::TextToSpeech
+ Qt::Multimedia
${FWAVFoundation}
)
diff --git a/src/plugins/tts/darwin/darwin_plugin.json b/src/plugins/tts/darwin/darwin_plugin.json
index 52e0087..98b428a 100644
--- a/src/plugins/tts/darwin/darwin_plugin.json
+++ b/src/plugins/tts/darwin/darwin_plugin.json
@@ -5,6 +5,7 @@
"Priority": 100,
"Capabilities": [
"Speak",
- "WordByWordProgress"
+ "WordByWordProgress",
+ "Synthesize"
]
}
diff --git a/src/plugins/tts/darwin/qtexttospeech_darwin.h b/src/plugins/tts/darwin/qtexttospeech_darwin.h
index ab6a713..cb07732 100644
--- a/src/plugins/tts/darwin/qtexttospeech_darwin.h
+++ b/src/plugins/tts/darwin/qtexttospeech_darwin.h
@@ -10,6 +10,7 @@
Q_FORWARD_DECLARE_OBJC_CLASS(AVSpeechSynthesizer);
Q_FORWARD_DECLARE_OBJC_CLASS(AVSpeechSynthesisVoice);
+Q_FORWARD_DECLARE_OBJC_CLASS(AVSpeechUtterance);
QT_BEGIN_NAMESPACE
@@ -24,6 +25,7 @@ public:
QList<QLocale> availableLocales() const override;
QList<QVoice> availableVoices() const override;
void say(const QString &text) override;
+ void synthesize(const QString &text) override;
void stop(QTextToSpeech::BoundaryHint boundaryHint) override;
void pause(QTextToSpeech::BoundaryHint boundaryHint) override;
void resume() override;
@@ -48,12 +50,14 @@ private:
AVSpeechSynthesisVoice *fromQVoice(const QVoice &voice) const;
QVoice toQVoice(AVSpeechSynthesisVoice *avVoice) const;
void setError(QTextToSpeech::ErrorReason reason, const QString &string);
+ AVSpeechUtterance *prepareUtterance(const QString &text);
AVSpeechSynthesizer *m_speechSynthesizer;
QVoice m_voice;
QTextToSpeech::State m_state = QTextToSpeech::Error;
QTextToSpeech::ErrorReason m_errorReason = QTextToSpeech::ErrorReason::Initialization;
QString m_errorString;
+ QAudioFormat m_format;
double m_pitch = 0.0;
double m_actualPitch = 1.0;
diff --git a/src/plugins/tts/darwin/qtexttospeech_darwin.mm b/src/plugins/tts/darwin/qtexttospeech_darwin.mm
index ca24165..8446687 100644
--- a/src/plugins/tts/darwin/qtexttospeech_darwin.mm
+++ b/src/plugins/tts/darwin/qtexttospeech_darwin.mm
@@ -6,6 +6,7 @@
#include "qtexttospeech_darwin.h"
#include <QtCore/QCoreApplication>
+#include <QtMultimedia/QAudioFormat>
@interface QDarwinSpeechSynthesizerDelegate : NSObject <AVSpeechSynthesizerDelegate>
@end
@@ -112,10 +113,8 @@ QTextToSpeechEngineDarwin::~QTextToSpeechEngineDarwin()
[m_speechSynthesizer release];
}
-void QTextToSpeechEngineDarwin::say(const QString &text)
+AVSpeechUtterance *QTextToSpeechEngineDarwin::prepareUtterance(const QString &text)
{
- stop(QTextToSpeech::BoundaryHint::Default);
-
// Qt pitch: [-1.0, 1.0], 0 is normal
// AVF range: [0.5, 2.0], 1.0 is normal
const double desiredPitch = 1.0 + (m_pitch >= 0 ? m_pitch : (m_pitch * 0.5));
@@ -158,9 +157,79 @@ void QTextToSpeechEngineDarwin::say(const QString &text)
utterance.volume = m_volume;
utterance.voice = fromQVoice(m_voice);
+ return utterance;
+}
+
+void QTextToSpeechEngineDarwin::say(const QString &text)
+{
+ stop(QTextToSpeech::BoundaryHint::Default);
+
+ AVSpeechUtterance *utterance = prepareUtterance(text);
[m_speechSynthesizer speakUtterance:utterance];
}
+void QTextToSpeechEngineDarwin::synthesize(const QString &text)
+{
+ AVSpeechUtterance *utterance = prepareUtterance(text);
+ m_format = {};
+
+ const auto bufferCallback = ^(AVAudioBuffer *buffer){
+ setState(QTextToSpeech::Synthesizing);
+
+ if (!m_format.isValid()) {
+ const AVAudioFormat *format = buffer.format;
+ if (format.channelCount == 1)
+ m_format.setChannelConfig(QAudioFormat::ChannelConfigMono);
+ else
+ m_format.setChannelCount(format.channelCount);
+ m_format.setSampleRate(format.sampleRate);
+ m_format.setSampleFormat([&format]{
+ switch (format.commonFormat) {
+ case AVAudioPCMFormatFloat32:
+ return QAudioFormat::Float;
+ case AVAudioPCMFormatInt16:
+ return QAudioFormat::Int16;
+ case AVAudioPCMFormatInt32:
+ return QAudioFormat::Int32;
+ case AVAudioPCMFormatFloat64:
+ return QAudioFormat::Unknown;
+ case AVAudioOtherFormat: {
+ const id bitKey = format.settings[@"AVLinearPCMBitDepthKey"];
+ const id isFloatKey = format.settings[@"AVLinearPCMIsFloatKey"];
+ if ([isFloatKey isEqual:@(YES)]) {
+ if ([bitKey isEqual:@(32)])
+ return QAudioFormat::Float;
+ } else if ([bitKey isEqual:@(8)]) {
+ return QAudioFormat::UInt8;
+ } else if ([bitKey isEqual:@(16)]) {
+ return QAudioFormat::Int16;
+ } else if ([bitKey isEqual:@(32)]) {
+ return QAudioFormat::Int32;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ return QAudioFormat::Unknown;
+ }());
+ if (!m_format.isValid())
+ qWarning() << "Audio arrived with invalid format:" << format.settings;
+ }
+
+ const AudioBufferList *bufferList = buffer.audioBufferList;
+ for (UInt32 i = 0; i < bufferList->mNumberBuffers; ++i) {
+ const AudioBuffer &buffer = bufferList->mBuffers[i];
+ // we expect all buffers to have the same number of channels
+ if (int(buffer.mNumberChannels) != m_format.channelCount())
+ continue;
+ emit synthesized(m_format, QByteArray::fromRawData(static_cast<const char *>(buffer.mData), buffer.mDataByteSize));
+ }
+ };
+ [m_speechSynthesizer writeUtterance:utterance
+ toBufferCallback:bufferCallback];
+}
+
void QTextToSpeechEngineDarwin::stop(QTextToSpeech::BoundaryHint boundaryHint)
{
Q_UNUSED(boundaryHint);
@@ -321,6 +390,8 @@ void QTextToSpeechEngineDarwin::setState(QTextToSpeech::State state)
return;
m_state = state;
+ if (m_state == QTextToSpeech::Ready)
+ m_format = {};
emit stateChanged(m_state);
}
diff --git a/src/plugins/tts/flite/flite_plugin.json b/src/plugins/tts/flite/flite_plugin.json
index 902f5d3..b9fd6ef 100644
--- a/src/plugins/tts/flite/flite_plugin.json
+++ b/src/plugins/tts/flite/flite_plugin.json
@@ -5,6 +5,7 @@
"Priority": 50,
"Capabilities": [
"Speak",
- "WordByWordProgress"
+ "WordByWordProgress",
+ "Synthesize"
]
}
diff --git a/src/plugins/tts/flite/qtexttospeech_flite.cpp b/src/plugins/tts/flite/qtexttospeech_flite.cpp
index 76c28ed..5925aea 100644
--- a/src/plugins/tts/flite/qtexttospeech_flite.cpp
+++ b/src/plugins/tts/flite/qtexttospeech_flite.cpp
@@ -33,6 +33,8 @@ QTextToSpeechEngineFlite::QTextToSpeechEngineFlite(const QVariantMap &parameters
&QTextToSpeechEngineFlite::setError);
connect(m_processor.get(), &QTextToSpeechProcessorFlite::sayingWord, this,
&QTextToSpeechEngine::sayingWord);
+ connect(m_processor.get(), &QTextToSpeechProcessorFlite::synthesized, this,
+ &QTextToSpeechEngine::synthesized);
// Read voices from processor before moving it to a separate thread
const QList<QTextToSpeechProcessorFlite::VoiceInfo> voices = m_processor->voices();
@@ -83,6 +85,13 @@ void QTextToSpeechEngineFlite::say(const QString &text)
Q_ARG(double, rate()), Q_ARG(double, volume()));
}
+void QTextToSpeechEngineFlite::synthesize(const QString &text)
+{
+ QMetaObject::invokeMethod(m_processor.get(), "synthesize", Qt::QueuedConnection, Q_ARG(QString, text),
+ Q_ARG(int, voiceData(voice()).toInt()), Q_ARG(double, pitch()),
+ Q_ARG(double, rate()), Q_ARG(double, volume()));
+}
+
void QTextToSpeechEngineFlite::stop(QTextToSpeech::BoundaryHint boundaryHint)
{
Q_UNUSED(boundaryHint);
diff --git a/src/plugins/tts/flite/qtexttospeech_flite.h b/src/plugins/tts/flite/qtexttospeech_flite.h
index a9f8522..5e1c094 100644
--- a/src/plugins/tts/flite/qtexttospeech_flite.h
+++ b/src/plugins/tts/flite/qtexttospeech_flite.h
@@ -27,6 +27,7 @@ public:
QList<QLocale> availableLocales() const override;
QList<QVoice> availableVoices() const override;
void say(const QString &text) override;
+ void synthesize(const QString &text) override;
void stop(QTextToSpeech::BoundaryHint boundaryHint) override;
void pause(QTextToSpeech::BoundaryHint boundaryHint) override;
void resume() override;
diff --git a/src/plugins/tts/flite/qtexttospeech_flite_processor.cpp b/src/plugins/tts/flite/qtexttospeech_flite_processor.cpp
index b45a8d8..3c0d974 100644
--- a/src/plugins/tts/flite/qtexttospeech_flite_processor.cpp
+++ b/src/plugins/tts/flite/qtexttospeech_flite_processor.cpp
@@ -41,8 +41,8 @@ void QTextToSpeechProcessorFlite::startTokenTimer()
m_tokenTimer.start(qMax(token.startTime - playedTime, 0), Qt::PreciseTimer, this);
}
-int QTextToSpeechProcessorFlite::fliteOutputCb(const cst_wave *w, int start, int size,
- int last, cst_audio_streaming_info *asi)
+int QTextToSpeechProcessorFlite::audioOutputCb(const cst_wave *w, int start, int size,
+ int last, cst_audio_streaming_info *asi)
{
QTextToSpeechProcessorFlite *processor = static_cast<QTextToSpeechProcessorFlite *>(asi->userdata);
if (processor) {
@@ -72,13 +72,13 @@ int QTextToSpeechProcessorFlite::fliteOutputCb(const cst_wave *w, int start, int
}
asi->item = item_next(asi->item);
}
- return processor->fliteOutput(w, start, size, last, asi);
+ return processor->audioOutput(w, start, size, last, asi);
}
return CST_AUDIO_STREAM_STOP;
}
-int QTextToSpeechProcessorFlite::fliteOutput(const cst_wave *w, int start, int size,
- int last, cst_audio_streaming_info *asi)
+int QTextToSpeechProcessorFlite::audioOutput(const cst_wave *w, int start, int size,
+ int last, cst_audio_streaming_info *asi)
{
Q_UNUSED(asi);
Q_ASSERT(QThread::currentThread() == thread());
@@ -87,14 +87,19 @@ int QTextToSpeechProcessorFlite::fliteOutput(const cst_wave *w, int start, int s
if (start == 0 && !initAudio(w->sample_rate, w->num_channels))
return CST_AUDIO_STREAM_STOP;
- int bytesToWrite = size * sizeof(short);
- QString errorString;
- if (!audioOutput((const char *)(&w->samples[start]), bytesToWrite, errorString)) {
- setError(QTextToSpeech::ErrorReason::Playback, errorString);
+ const qsizetype bytesToWrite = size * sizeof(short);
+
+ if (!m_audioBuffer->write(reinterpret_cast<const char *>(&w->samples[start]), bytesToWrite)) {
+ setError(QTextToSpeech::ErrorReason::Playback,
+ QCoreApplication::translate("QTextToSpeech", "Audio streaming error."));
stop();
return CST_AUDIO_STREAM_STOP;
}
+ // Stats for debugging
+ ++numberChunks;
+ totalBytes += bytesToWrite;
+
if (last == 1) {
qCDebug(lcSpeechTtsFlite) << "last data chunk written";
m_audioBuffer->close();
@@ -102,6 +107,41 @@ int QTextToSpeechProcessorFlite::fliteOutput(const cst_wave *w, int start, int s
return CST_AUDIO_STREAM_CONT;
}
+int QTextToSpeechProcessorFlite::dataOutputCb(const cst_wave *w, int start, int size,
+ int last, cst_audio_streaming_info *asi)
+{
+ QTextToSpeechProcessorFlite *processor = static_cast<QTextToSpeechProcessorFlite *>(asi->userdata);
+ if (processor)
+ return processor->dataOutput(w, start, size, last, asi);
+ return CST_AUDIO_STREAM_STOP;
+}
+
+int QTextToSpeechProcessorFlite::dataOutput(const cst_wave *w, int start, int size,
+ int last, cst_audio_streaming_info *)
+{
+ if (start == 0)
+ emit stateChanged(QTextToSpeech::Synthesizing);
+
+ QAudioFormat format;
+ if (w->num_channels == 1)
+ format.setChannelConfig(QAudioFormat::ChannelConfigMono);
+ else
+ format.setChannelCount(w->num_channels);
+ format.setSampleRate(w->sample_rate);
+ format.setSampleFormat(QAudioFormat::Int16);
+
+ if (!format.isValid())
+ return CST_AUDIO_STREAM_STOP;
+
+ const qsizetype bytesToWrite = size * format.bytesPerSample();
+ emit synthesized(format, QByteArray(reinterpret_cast<const char *>(&w->samples[start]), bytesToWrite));
+
+ if (last == 1)
+ emit stateChanged(QTextToSpeech::Ready);
+
+ return CST_AUDIO_STREAM_CONT;
+}
+
void QTextToSpeechProcessorFlite::timerEvent(QTimerEvent *event)
{
if (event->timerId() != m_tokenTimer.timerId()) {
@@ -121,22 +161,7 @@ void QTextToSpeechProcessorFlite::timerEvent(QTimerEvent *event)
startTokenTimer();
}
-bool QTextToSpeechProcessorFlite::audioOutput(const char *data, qint64 dataSize, QString &errorString)
-{
- // Send data
- if (!m_audioBuffer->write(data, dataSize)) {
- errorString = QCoreApplication::translate("QTextToSpeech", "Audio streaming error.");
- return false;
- }
-
- // Stats for debugging
- ++numberChunks;
- totalBytes += dataSize;
-
- return true;
-}
-
-void QTextToSpeechProcessorFlite::processText(const QString &text, int voiceId, double pitch, double rate)
+void QTextToSpeechProcessorFlite::processText(const QString &text, int voiceId, double pitch, double rate, OutputHandler outputHandler)
{
qCDebug(lcSpeechTtsFlite) << "processText() begin";
if (!checkVoice(voiceId))
@@ -150,7 +175,7 @@ void QTextToSpeechProcessorFlite::processText(const QString &text, int voiceId,
const VoiceInfo &voiceInfo = m_voices.at(voiceId);
cst_voice *voice = voiceInfo.vox;
cst_audio_streaming_info *asi = new_audio_streaming_info();
- asi->asc = QTextToSpeechProcessorFlite::fliteOutputCb;
+ asi->asc = outputHandler;
asi->userdata = (void *)this;
feat_set(voice->features, "streaming_info", audio_streaming_info_val(asi));
setRateForVoice(voice, rate);
@@ -493,7 +518,19 @@ void QTextToSpeechProcessorFlite::say(const QString &text, int voiceId, double p
return;
m_volume = volume;
- processText(text, voiceId, pitch, rate);
+ processText(text, voiceId, pitch, rate, QTextToSpeechProcessorFlite::audioOutputCb);
+}
+
+void QTextToSpeechProcessorFlite::synthesize(const QString &text, int voiceId, double pitch, double rate, double volume)
+{
+ if (text.isEmpty())
+ return;
+
+ if (!checkVoice(voiceId))
+ return;
+
+ m_volume = volume;
+ processText(text, voiceId, pitch, rate, QTextToSpeechProcessorFlite::dataOutputCb);
}
QT_END_NAMESPACE
diff --git a/src/plugins/tts/flite/qtexttospeech_flite_processor.h b/src/plugins/tts/flite/qtexttospeech_flite_processor.h
index 8a54b44..0da6987 100644
--- a/src/plugins/tts/flite/qtexttospeech_flite_processor.h
+++ b/src/plugins/tts/flite/qtexttospeech_flite_processor.h
@@ -44,6 +44,7 @@ public:
};
Q_INVOKABLE void say(const QString &text, int voiceId, double pitch, double rate, double volume);
+ Q_INVOKABLE void synthesize(const QString &text, int voiceId, double pitch, double rate, double volume);
Q_INVOKABLE void pause();
Q_INVOKABLE void resume();
Q_INVOKABLE void stop();
@@ -52,16 +53,18 @@ public:
static constexpr QTextToSpeech::State audioStateToTts(QAudio::State audioState);
private:
- // Process a single text
- void processText(const QString &text, int voiceId, double pitch, double rate);
-
// Flite callbacks
- static int fliteOutputCb(const cst_wave *w, int start, int size,
+ static int audioOutputCb(const cst_wave *w, int start, int size,
+ int last, cst_audio_streaming_info *asi);
+ static int dataOutputCb(const cst_wave *w, int start, int size,
int last, cst_audio_streaming_info *asi);
- int fliteOutput(const cst_wave *w, int start, int size,
- int last, cst_audio_streaming_info *asi);
- bool audioOutput(const char *data, qint64 dataSize, QString &errorString);
+ using OutputHandler = decltype(QTextToSpeechProcessorFlite::audioOutputCb);
+ // Process a single text
+ void processText(const QString &text, int voiceId, double pitch, double rate, OutputHandler outputHandler);
+ int audioOutput(const cst_wave *w, int start, int size, int last, cst_audio_streaming_info *asi);
+ int dataOutput(const cst_wave *w, int start, int size, int last, cst_audio_streaming_info *asi);
+
void setRateForVoice(cst_voice *voice, float rate);
void setPitchForVoice(cst_voice *voice, float pitch);
@@ -85,6 +88,7 @@ Q_SIGNALS:
void errorOccurred(QTextToSpeech::ErrorReason error, const QString &errorString);
void stateChanged(QTextToSpeech::State);
void sayingWord(qsizetype begin, qsizetype length);
+ void synthesized(const QAudioFormat &format, const QByteArray &array);
protected:
void timerEvent(QTimerEvent *event) override;
diff --git a/src/plugins/tts/macos/CMakeLists.txt b/src/plugins/tts/macos/CMakeLists.txt
index ccf66d6..3e7cd70 100644
--- a/src/plugins/tts/macos/CMakeLists.txt
+++ b/src/plugins/tts/macos/CMakeLists.txt
@@ -12,4 +12,5 @@ qt_internal_add_plugin(QTextToSpeechMacOSPlugin
Qt::Core
Qt::Gui
Qt::TextToSpeech
+ Qt::Multimedia
)
diff --git a/src/plugins/tts/macos/qtexttospeech_macos.h b/src/plugins/tts/macos/qtexttospeech_macos.h
index 231351b..6b707c0 100644
--- a/src/plugins/tts/macos/qtexttospeech_macos.h
+++ b/src/plugins/tts/macos/qtexttospeech_macos.h
@@ -30,6 +30,7 @@ public:
QList<QLocale> availableLocales() const override;
QList<QVoice> availableVoices() const override;
void say(const QString &text) override;
+ void synthesize(const QString &text) override;
void stop(QTextToSpeech::BoundaryHint boundaryHint) override;
void pause(QTextToSpeech::BoundaryHint boundaryHint) override;
void resume() override;
diff --git a/src/plugins/tts/macos/qtexttospeech_macos.mm b/src/plugins/tts/macos/qtexttospeech_macos.mm
index b08296a..bedb414 100644
--- a/src/plugins/tts/macos/qtexttospeech_macos.mm
+++ b/src/plugins/tts/macos/qtexttospeech_macos.mm
@@ -150,6 +150,11 @@ void QTextToSpeechEngineMacOS::say(const QString &text)
speaking();
}
+void QTextToSpeechEngineMacOS::synthesize(const QString &)
+{
+ setError(QTextToSpeech::ErrorReason::Configuration, tr("Synthesize not supported"));
+}
+
void QTextToSpeechEngineMacOS::stop(QTextToSpeech::BoundaryHint boundaryHint)
{
if (speechSynthesizer.isSpeaking || m_state == QTextToSpeech::Paused) {
diff --git a/src/plugins/tts/mock/mock_plugin.json b/src/plugins/tts/mock/mock_plugin.json
index 43c0d9e..7785016 100644
--- a/src/plugins/tts/mock/mock_plugin.json
+++ b/src/plugins/tts/mock/mock_plugin.json
@@ -5,6 +5,7 @@
"Priority": -1,
"Capabilities": [
"Speak",
+ "Synthesize",
"WordByWordProgress"
]
}
diff --git a/src/plugins/tts/mock/qtexttospeech_mock.cpp b/src/plugins/tts/mock/qtexttospeech_mock.cpp
index d3b8967..1118f50 100644
--- a/src/plugins/tts/mock/qtexttospeech_mock.cpp
+++ b/src/plugins/tts/mock/qtexttospeech_mock.cpp
@@ -69,6 +69,19 @@ void QTextToSpeechEngineMock::say(const QString &text)
emit stateChanged(m_state);
}
+void QTextToSpeechEngineMock::synthesize(const QString &text)
+{
+ m_text = text;
+ m_currentIndex = 0;
+ m_timer.start(wordTime(), Qt::PreciseTimer, this);
+ m_state = QTextToSpeech::Synthesizing;
+ emit stateChanged(m_state);
+
+ m_format.setSampleRate(22050);
+ m_format.setChannelConfig(QAudioFormat::ChannelConfigMono);
+ m_format.setSampleFormat(QAudioFormat::Int16);
+}
+
void QTextToSpeechEngineMock::stop(QTextToSpeech::BoundaryHint boundaryHint)
{
Q_UNUSED(boundaryHint);
@@ -112,7 +125,7 @@ void QTextToSpeechEngineMock::timerEvent(QTimerEvent *e)
return;
}
- Q_ASSERT(m_state == QTextToSpeech::Speaking);
+ Q_ASSERT(m_state == QTextToSpeech::Speaking || m_state == QTextToSpeech::Synthesizing);
Q_ASSERT(m_text.length());
// Find start of next word, skipping punctuations. This is good enough for testing.
@@ -124,6 +137,8 @@ void QTextToSpeechEngineMock::timerEvent(QTimerEvent *e)
sayingWord(m_currentIndex, nextSpace - m_currentIndex);
m_currentIndex = nextSpace + match.captured().length();
+ emit synthesized(m_format, QByteArray(m_format.bytesForDuration(wordTime() * 1000), 0));
+
if (m_currentIndex >= m_text.length()) {
// done speaking all words
m_timer.stop();
diff --git a/src/plugins/tts/mock/qtexttospeech_mock.h b/src/plugins/tts/mock/qtexttospeech_mock.h
index 0511416..dece5f4 100644
--- a/src/plugins/tts/mock/qtexttospeech_mock.h
+++ b/src/plugins/tts/mock/qtexttospeech_mock.h
@@ -21,6 +21,7 @@ public:
QList<QVoice> availableVoices() const override;
void say(const QString &text) override;
+ void synthesize(const QString &text) override;
void stop(QTextToSpeech::BoundaryHint boundaryHint) override;
void pause(QTextToSpeech::BoundaryHint boundaryHint) override;
void resume() override;
@@ -59,6 +60,7 @@ private:
QString m_errorString;
bool m_pauseRequested = false;
qsizetype m_currentIndex = -1;
+ QAudioFormat m_format;
};
QT_END_NAMESPACE
diff --git a/src/plugins/tts/sapi/qtexttospeech_sapi.cpp b/src/plugins/tts/sapi/qtexttospeech_sapi.cpp
index 00b2e84..0e92547 100644
--- a/src/plugins/tts/sapi/qtexttospeech_sapi.cpp
+++ b/src/plugins/tts/sapi/qtexttospeech_sapi.cpp
@@ -22,6 +22,7 @@ QT_BEGIN_NAMESPACE
#ifdef Q_CC_MINGW // from sphelper.h
static const GUID CLSD_SpVoice = {0x96749377, 0x3391, 0x11d2,{0x9e, 0xe3, 0x0, 0xc0, 0x4f, 0x79, 0x73, 0x96}};
+const GUID SPDFID_WaveFormatEx = {0xC31ADBAE, 0x527F, 0x4ff5,{0xA2, 0x30, 0xF6, 0x2B, 0xB6, 0x1F, 0xF7, 0x0C}};
static inline HRESULT SpGetTokenFromId(const WCHAR *pszTokenId, ISpObjectToken **cpToken, BOOL fCreateIfNotExist = FALSE)
{
@@ -54,7 +55,6 @@ inline void SpClearEvent(SPEVENT *pe)
break;
}
}
-
#endif // Q_CC_MINGW
QTextToSpeechEngineSapi::QTextToSpeechEngineSapi(const QVariantMap &, QObject *)
@@ -117,6 +117,123 @@ void QTextToSpeechEngineSapi::say(const QString &text)
QCoreApplication::translate("QTextToSpeech", "Speech synthesizing failure."));
}
+void QTextToSpeechEngineSapi::synthesize(const QString &text)
+{
+ class OutputStream : public ISpStreamFormat
+ {
+ ULONG m_ref = 1;
+ qint64 m_pos = 0;
+ qint64 m_length = 0;
+ QTextToSpeechEngineSapi *m_engine = nullptr;
+ QAudioFormat m_format;
+
+ public:
+ OutputStream(QTextToSpeechEngineSapi *engine)
+ : m_engine(engine)
+ {
+ m_format.setChannelConfig(QAudioFormat::ChannelConfigMono);
+ m_format.setSampleRate(16000);
+ m_format.setSampleFormat(QAudioFormat::Int16);
+ }
+ virtual ~OutputStream() = default;
+
+ // IUnknown
+ ULONG AddRef() override { return ++m_ref; }
+ ULONG Release() override {
+ if (!--m_ref) {
+ delete this;
+ return 0;
+ }
+ return m_ref;
+ }
+
+ HRESULT QueryInterface(REFIID riid, VOID **ppvInterface) override
+ {
+ if (!ppvInterface)
+ return E_POINTER;
+
+ if (riid == __uuidof(IUnknown)) {
+ *ppvInterface = static_cast<IUnknown*>(this);
+ } else if (riid == __uuidof(IStream)) {
+ *ppvInterface = static_cast<IStream *>(this);
+ } else if (riid == __uuidof(ISpStreamFormat)) {
+ *ppvInterface = static_cast<ISpStreamFormat *>(this);
+ } else {
+ *ppvInterface = nullptr;
+ return E_NOINTERFACE;
+ }
+ AddRef();
+ return S_OK;
+ }
+
+ // IStream
+ HRESULT Read(void *,ULONG,ULONG *) override { return E_NOTIMPL; }
+ HRESULT Write(const void *pv,ULONG cb,ULONG *pcbWritten) override
+ {
+ emit m_engine->synthesized(m_format, QByteArray(static_cast<const char *>(pv), cb));
+ *pcbWritten = cb;
+ return S_OK;
+ }
+ HRESULT Seek(LARGE_INTEGER dlibMove, DWORD dwOrigin, ULARGE_INTEGER *plibNewPosition) override
+ {
+ qint64 move = dlibMove.QuadPart;
+ switch (dwOrigin) {
+ case STREAM_SEEK_SET:
+ m_pos = move;
+ break;
+ case STREAM_SEEK_CUR:
+ m_pos += move;
+ break;
+ case STREAM_SEEK_END:
+ m_pos = m_length + move;
+ break;
+ }
+ (*plibNewPosition).QuadPart = m_pos;
+ return S_OK;
+ }
+ HRESULT SetSize(ULARGE_INTEGER) override { return E_NOTIMPL; }
+ HRESULT CopyTo(IStream *,ULARGE_INTEGER,ULARGE_INTEGER *,ULARGE_INTEGER *) override { return E_NOTIMPL; }
+ HRESULT Commit(DWORD) override { return E_NOTIMPL; }
+ HRESULT Revert(void) override { return E_NOTIMPL; }
+ HRESULT LockRegion(ULARGE_INTEGER,ULARGE_INTEGER,DWORD) override { return E_NOTIMPL; }
+ HRESULT UnlockRegion(ULARGE_INTEGER,ULARGE_INTEGER,DWORD) override { return E_NOTIMPL; }
+ HRESULT Stat(STATSTG *,DWORD) override { return E_NOTIMPL; }
+ HRESULT Clone(IStream **) override { return E_NOTIMPL; }
+
+ // ISpStreamFormat
+ HRESULT GetFormat(GUID *pguidFormatId,WAVEFORMATEX **ppCoMemWaveFormatEx) override
+ {
+ *pguidFormatId = SPDFID_WaveFormatEx;
+ WAVEFORMATEX *format = static_cast<WAVEFORMATEX *>(CoTaskMemAlloc(sizeof(WAVEFORMATEX)));
+ format->wFormatTag = WAVE_FORMAT_PCM;
+ format->nChannels = m_format.channelCount();
+ format->nSamplesPerSec = m_format.sampleRate();
+ format->wBitsPerSample = m_format.bytesPerSample() * 8;
+ format->nBlockAlign = format->nChannels * format->wBitsPerSample / 8;
+ format->nAvgBytesPerSec = format->nSamplesPerSec * format->nBlockAlign;
+ format->cbSize = 0; // amount of extra format information
+
+ *ppCoMemWaveFormatEx = format;
+ return S_OK;
+ }
+ };
+
+ if (text.isEmpty())
+ return;
+
+ currentText = text;
+ const QString prefix = u"<pitch absmiddle=\"%1\"/>"_qs.arg(m_pitch * 10);
+ textOffset = prefix.length();
+ currentText.prepend(prefix);
+
+ OutputStream *outputStream = new OutputStream(this);
+ m_voice->SetOutput(outputStream, false);
+ HRESULT hr = m_voice->Speak(currentText.toStdWString().data(), SPF_ASYNC, NULL);
+ if (!SUCCEEDED(hr))
+ setError(QTextToSpeech::ErrorReason::Input,
+ QCoreApplication::translate("QTextToSpeech", "Speech synthesizing failure."));
+}
+
void QTextToSpeechEngineSapi::stop(QTextToSpeech::BoundaryHint boundaryHint)
{
Q_UNUSED(boundaryHint);
diff --git a/src/plugins/tts/sapi/qtexttospeech_sapi.h b/src/plugins/tts/sapi/qtexttospeech_sapi.h
index 6b139d2..dd0cea5 100644
--- a/src/plugins/tts/sapi/qtexttospeech_sapi.h
+++ b/src/plugins/tts/sapi/qtexttospeech_sapi.h
@@ -29,6 +29,7 @@ public:
QList<QLocale> availableLocales() const override;
QList<QVoice> availableVoices() const override;
void say(const QString &text) override;
+ void synthesize(const QString &text) override;
void stop(QTextToSpeech::BoundaryHint boundaryHint) override;
void pause(QTextToSpeech::BoundaryHint boundaryHint) override;
void resume() override;
@@ -47,6 +48,7 @@ public:
QString errorString() const override;
HRESULT STDMETHODCALLTYPE NotifyCallback(WPARAM /*wParam*/, LPARAM /*lParam*/) override;
+ friend class OputStream;
private:
bool isSpeaking() const;
diff --git a/src/plugins/tts/sapi/sapi_plugin.json b/src/plugins/tts/sapi/sapi_plugin.json
index c16b87f..95de0bf 100644
--- a/src/plugins/tts/sapi/sapi_plugin.json
+++ b/src/plugins/tts/sapi/sapi_plugin.json
@@ -5,6 +5,7 @@
"Priority": 50,
"Capabilities": [
"Speak",
- "WordByWordProgress"
+ "WordByWordProgress",
+ "Synthesize"
]
}
diff --git a/src/plugins/tts/speechdispatcher/qtexttospeech_speechd.cpp b/src/plugins/tts/speechdispatcher/qtexttospeech_speechd.cpp
index 3026d39..f93ea1c 100644
--- a/src/plugins/tts/speechdispatcher/qtexttospeech_speechd.cpp
+++ b/src/plugins/tts/speechdispatcher/qtexttospeech_speechd.cpp
@@ -151,6 +151,11 @@ void QTextToSpeechEngineSpeechd::say(const QString &text)
QCoreApplication::translate("QTextToSpeech", "Text synthesizing failure."));
}
+void QTextToSpeechEngineSpeechd::synthesize(const QString &)
+{
+ setError(QTextToSpeech::ErrorReason::Configuration, tr("Synthesize not supported"));
+}
+
void QTextToSpeechEngineSpeechd::stop(QTextToSpeech::BoundaryHint boundaryHint)
{
Q_UNUSED(boundaryHint);
diff --git a/src/plugins/tts/speechdispatcher/qtexttospeech_speechd.h b/src/plugins/tts/speechdispatcher/qtexttospeech_speechd.h
index 1860e9d..b9aaf9f 100644
--- a/src/plugins/tts/speechdispatcher/qtexttospeech_speechd.h
+++ b/src/plugins/tts/speechdispatcher/qtexttospeech_speechd.h
@@ -28,6 +28,7 @@ public:
QList<QLocale> availableLocales() const override;
QList<QVoice> availableVoices() const override;
void say(const QString &text) override;
+ void synthesize(const QString &text) override;
void stop(QTextToSpeech::BoundaryHint boundaryHint) override;
void pause(QTextToSpeech::BoundaryHint boundaryHint) override;
void resume() override;
diff --git a/src/plugins/tts/winrt/qtexttospeech_winrt.cpp b/src/plugins/tts/winrt/qtexttospeech_winrt.cpp
index b98ffe8..fdbaad0 100644
--- a/src/plugins/tts/winrt/qtexttospeech_winrt.cpp
+++ b/src/plugins/tts/winrt/qtexttospeech_winrt.cpp
@@ -472,6 +472,51 @@ void QTextToSpeechEngineWinRT::say(const QString &text)
});
}
+void QTextToSpeechEngineWinRT::synthesize(const QString &text)
+{
+ Q_D(QTextToSpeechEngineWinRT);
+
+ HRESULT hr = S_OK;
+
+ HStringReference nativeText(reinterpret_cast<LPCWSTR>(text.utf16()), text.length());
+
+ ComPtr<IAsyncOperation<SpeechSynthesisStream*>> synthOperation;
+ hr = d->synth->SynthesizeTextToStreamAsync(nativeText.Get(), &synthOperation);
+ if (!SUCCEEDED(hr)) {
+ d->setError(QTextToSpeech::ErrorReason::Input,
+ QCoreApplication::translate("QTextToSpeech", "Speech synthesizing failure."));
+ return;
+ }
+
+ // The source will wait for the the data resulting out of the synthOperation, and emits
+ // streamReady when data is available. This starts a QAudioSink, which pulls the data.
+ d->audioSource.Attach(new AudioSource(synthOperation));
+
+ connect(d->audioSource.Get(), &AudioSource::streamReady, this, [d, this](const QAudioFormat &format){
+ if (d->state != QTextToSpeech::Synthesizing) {
+ d->state = QTextToSpeech::Synthesizing;
+ emit stateChanged(d->state);
+ }
+ });
+ connect(d->audioSource.Get(), &AudioSource::readyRead, this, [d, this](){
+ Q_ASSERT(d->state == QTextToSpeech::Synthesizing);
+ const QByteArray data = d->audioSource->read(d->audioSource->bytesAvailable());
+ emit synthesized(d->audioSource->format(), data);
+ if (d->audioSource->atEnd())
+ d->audioSource->close();
+ });
+ connect(d->audioSource.Get(), &AudioSource::aboutToClose, this, [d, this]{
+ if (d->state != QTextToSpeech::Ready) {
+ d->state = QTextToSpeech::Ready;
+ emit stateChanged(d->state);
+ }
+ });
+ connect(d->audioSource.Get(), &AudioSource::errorInStream, this, [d]{
+ d->setError(QTextToSpeech::ErrorReason::Input,
+ QCoreApplication::translate("QTextToSpeech", "Error synthesizing audio stream."));
+ });
+}
+
void QTextToSpeechEngineWinRT::stop(QTextToSpeech::BoundaryHint boundaryHint)
{
Q_UNUSED(boundaryHint);
diff --git a/src/plugins/tts/winrt/qtexttospeech_winrt.h b/src/plugins/tts/winrt/qtexttospeech_winrt.h
index 39cdeab..045f8e0 100644
--- a/src/plugins/tts/winrt/qtexttospeech_winrt.h
+++ b/src/plugins/tts/winrt/qtexttospeech_winrt.h
@@ -28,6 +28,7 @@ public:
QList<QLocale> availableLocales() const override;
QList<QVoice> availableVoices() const override;
void say(const QString &text) override;
+ void synthesize(const QString &text) override;
void stop(QTextToSpeech::BoundaryHint boundaryHint) override;
void pause(QTextToSpeech::BoundaryHint boundaryHint) override;
void resume() override;
diff --git a/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.cpp b/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.cpp
index b66a78a..a680ffe 100644
--- a/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.cpp
+++ b/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.cpp
@@ -173,7 +173,7 @@ qint64 AudioSource::readData(char *data, qint64 maxlen)
bool AudioSource::atEnd() const
{
// not done as long as QIODevice's buffer is not empty
- if (!QIODevice::atEnd())
+ if (!QIODevice::atEnd() && QIODevice::bytesAvailable())
return false;
// If we get here, bytesAvailable() has returned 0, so our buffers are
@@ -279,8 +279,8 @@ HRESULT AudioSource::Invoke(IAsyncOperation<SpeechSynthesisStream*> *operation,
// we are buffered, but we don't want QIODevice to buffer as well
open(QIODevice::ReadOnly|QIODevice::Unbuffered);
- fetchMore();
emit streamReady(audioFormat);
+ fetchMore();
return S_OK;
}
diff --git a/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.h b/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.h
index 489b265..a9d5ebf 100644
--- a/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.h
+++ b/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.h
@@ -43,6 +43,8 @@ public:
bool atEnd() const override;
qint64 bytesAvailable() const override;
+ QAudioFormat format() const { return audioFormat; }
+
enum PauseState {
NoPause,
PauseRequested,
diff --git a/src/plugins/tts/winrt/winrt_plugin.json b/src/plugins/tts/winrt/winrt_plugin.json
index 5e4104b..f47e3f6 100644
--- a/src/plugins/tts/winrt/winrt_plugin.json
+++ b/src/plugins/tts/winrt/winrt_plugin.json
@@ -5,6 +5,7 @@
"Priority": 80,
"Capabilities": [
"Speak",
- "WordByWordProgress"
+ "WordByWordProgress",
+ "Synthesize"
]
}
diff --git a/src/tts/CMakeLists.txt b/src/tts/CMakeLists.txt
index 71e3d11..b8e92ea 100644
--- a/src/tts/CMakeLists.txt
+++ b/src/tts/CMakeLists.txt
@@ -16,6 +16,7 @@ qt_internal_add_module(TextToSpeech
Qt::QmlIntegration
PUBLIC_LIBRARIES
Qt::Core
+ Qt::Multimedia
PRIVATE_MODULE_INTERFACE
Qt::CorePrivate
)
diff --git a/src/tts/qtexttospeech.cpp b/src/tts/qtexttospeech.cpp
index e6599b7..29d2fd3 100644
--- a/src/tts/qtexttospeech.cpp
+++ b/src/tts/qtexttospeech.cpp
@@ -70,9 +70,13 @@ void QTextToSpeechPrivate::setEngineProvider(const QString &engine, const QVaria
// Connect signals directly from the engine to the public API signals
if (m_engine) {
- QObject::connect(m_engine, &QTextToSpeechEngine::stateChanged, q, &QTextToSpeech::stateChanged);
+ QObject::connect(m_engine, &QTextToSpeechEngine::stateChanged,
+ q, [this](QTextToSpeech::State newState){
+ updateState(newState);
+ });
QObject::connect(m_engine, &QTextToSpeechEngine::errorOccurred, q, &QTextToSpeech::errorOccurred);
QObject::connect(m_engine, &QTextToSpeechEngine::sayingWord, q, &QTextToSpeech::sayingWord);
+ QObject::connect(m_engine, &QTextToSpeechEngine::synthesized, q, &QTextToSpeech::synthesized);
}
}
@@ -139,6 +143,19 @@ void QTextToSpeechPrivate::loadPluginMetadata(QMultiHash<QString, QCborMap> &lis
}
}
+void QTextToSpeechPrivate::updateState(QTextToSpeech::State newState)
+{
+ Q_Q(QTextToSpeech);
+ if (newState == QTextToSpeech::Ready && m_slotObject) {
+ // If we are done synthesizing and the functor-overload was used,
+ // clear the temporary connection.
+ m_slotObject->destroyIfLastRef();
+ m_slotObject = nullptr;
+ m_engine->disconnect(m_synthesizeConnection);
+ }
+ emit q->stateChanged(newState);
+}
+
/*!
\class QTextToSpeech
\brief The QTextToSpeech class provides a convenient access to text-to-speech engines.
@@ -209,11 +226,13 @@ void QTextToSpeechPrivate::loadPluginMetadata(QMultiHash<QString, QCborMap> &lis
\brief This enum describes the current state of the text-to-speech engine.
- \value Ready The synthesizer is ready to start a new text. This is
- also the state after a text was finished.
- \value Speaking Text is being spoken.
- \value Paused The synthesis was paused and can be resumed with \l resume().
- \value Error An error has occurred. Details are given by \l errorReason().
+ \value Ready The synthesizer is ready to start a new text. This is
+ also the state after a text was finished.
+ \value Speaking Text is being spoken.
+ \value Synthesizing Text is being synthesized into PCM data. The synthesized()
+ signal will be emitted with chunks of data.
+ \value Paused The synthesis was paused and can be resumed with \l resume().
+ \value Error An error has occurred. Details are given by \l errorReason().
\sa QTextToSpeech::ErrorReason errorReason() errorString()
*/
@@ -382,6 +401,8 @@ QString QTextToSpeech::engine() const
\value Speak The engine can play audio output from text.
\value WordByWordProgress The engine emits the sayingWord() signal for
each word that gets spoken.
+ \value Synthesize The engine can \l{synthesize()}{synthesize} PCM
+ audio data from text.
\sa engineCapabilities()
*/
@@ -585,7 +606,7 @@ QString QTextToSpeech::errorString() const
*/
/*!
- Starts synthesizing the \a text.
+ Starts speaking the \a text.
This function starts sythesizing the speech asynchronously, and reads the text to the
default audio output device.
@@ -599,7 +620,7 @@ QString QTextToSpeech::errorString() const
set to \l Speaking once the reading starts. When the reading is done,
\l state will be set to \l Ready.
- \sa stop(), pause(), resume()
+ \sa stop(), pause(), resume(), synthesize()
*/
void QTextToSpeech::say(const QString &text)
{
@@ -609,6 +630,114 @@ void QTextToSpeech::say(const QString &text)
}
/*!
+ Synthesizes the \a text into raw audio data.
+ \since 6.6
+
+ This function synthesizes the speech asynchronously into raw audio data.
+ When data is available, the \l synthesized() signal is emitted with the
+ bytes, and the \l {QAudioFormat}{format} that the data is in.
+
+ The \l state property is set to \l Synthesizing when the synthesis starts,
+ and to \l Ready once the synthesis is finished. While synthesizing, the
+ synthesized() signal might be emitted multiple times, possibly with
+ changing values for \c format.
+
+ \sa say(), stop()
+*/
+void QTextToSpeech::synthesize(const QString &text)
+{
+ Q_D(QTextToSpeech);
+ if (d->m_engine)
+ d->m_engine->synthesize(text);
+}
+
+/*!
+ \fn template<typename Functor> void QTextToSpeech::synthesize(
+ const QString &text, Functor functor)
+ \fn template<typename Functor> void QTextToSpeech::synthesize(
+ const QString &text, const QObject *context, Functor functor)
+ \since 6.6
+
+ Synthesizes the \a text into raw audio data.
+
+ This function synthesizes the speech asynchronously into raw audio data.
+ When data is available, the \a functor will be called as
+ \c {functor(const QAudioFormat &format, const QByteArray &bytes)}, with
+ \c format describing the \l {QAudioFormat}{format} of the data in \c bytes.
+
+ The \l state property is set to \l Synthesizing when the synthesis starts,
+ and to \l Ready once the synthesis is finished. While synthesizing, the
+ \a functor might be called multiple times, possibly with changing values
+ for \c format.
+
+ The \a functor can be a callable, like a lambda or free function, with an
+ optional \a context object:
+
+ \code
+ tts.synthesize("Hello world", [](const QAudioFormat &format, const QByteArray &bytes){
+ // process data according to format
+ });
+ \endcode
+
+ or a slot in the \a context object:
+
+ \code
+ struct PCMProcessor : QObject
+ {
+ void processData(const QAudioFormat &format, const QByteArray &bytes)
+ {
+ // process data according to format
+ }
+ } processor;
+ tts.synthesize("Hello world", &processor, &PCMProcessor::processData);
+ \endcode
+
+ If \a context is destroyed, then the \a functor will no longer get called.
+
+ \note This API requires that the engine has the
+ \l {QTextToSpeech::Capability::}{Synthesize} capability.
+
+ \sa say(), stop()
+*/
+
+/*!
+ \internal
+
+ Handles the engine's synthesized() signal to call \a slotObj on the \a context
+ object. The slot object and the temporary connection are stored and released
+ in updateState() when the state of the engine transitions back to Ready.
+*/
+void QTextToSpeech::synthesizeImpl(const QString &text,
+ QtPrivate::QSlotObjectBase *slotObj, const QObject *context)
+{
+ Q_D(QTextToSpeech);
+ Q_ASSERT(slotObj);
+ d->m_slotObject = slotObj;
+ const auto receive = [d, context](const QAudioFormat &format, const QByteArray &bytes){
+ Q_ASSERT(d->m_slotObject);
+ void *args[] = {nullptr,
+ const_cast<QAudioFormat *>(&format),
+ const_cast<QByteArray *>(&bytes)};
+ d->m_slotObject->call(const_cast<QObject *>(context), args);
+ };
+ d->m_synthesizeConnection = connect(d->m_engine, &QTextToSpeechEngine::synthesized,
+ context ? context : this, receive);
+ synthesize(text);
+}
+
+/*!
+ \fn void QTextToSpeech::synthesized(const QAudioFormat &format, const QByteArray &data)
+
+ This signal is emitted when pcm \a data is available. The data is encoded in \a format.
+ A single call to \l synthesize() might result in several emissions of this signal.
+
+ \note This signal requires that the engine has the
+ \l {QTextToSpeech::Capability::}{Synthesize} capability.
+
+ \sa synthesize()
+*/
+
+/*!
\qmlmethod TextToSpeech::stop(BoundaryHint boundaryHint)
Stops the current reading at \a boundaryHint.
diff --git a/src/tts/qtexttospeech.h b/src/tts/qtexttospeech.h
index e98d615..3d78e0a 100644
--- a/src/tts/qtexttospeech.h
+++ b/src/tts/qtexttospeech.h
@@ -16,6 +16,8 @@
QT_BEGIN_NAMESPACE
+class QAudioFormat;
+
class QTextToSpeechPrivate;
class Q_TEXTTOSPEECH_EXPORT QTextToSpeech : public QObject
{
@@ -29,12 +31,14 @@ class Q_TEXTTOSPEECH_EXPORT QTextToSpeech : public QObject
Q_PROPERTY(QVoice voice READ voice WRITE setVoice NOTIFY voiceChanged)
Q_PROPERTY(Capabilities engineCapabilities READ engineCapabilities NOTIFY engineChanged)
Q_DECLARE_PRIVATE(QTextToSpeech)
+
public:
enum State {
Ready,
Speaking,
Paused,
- Error
+ Error,
+ Synthesizing
};
Q_ENUM(State)
@@ -59,6 +63,7 @@ public:
None = 0,
Speak = 1 << 0,
WordByWordProgress = 1 << 1,
+ Synthesize = 1 << 2,
};
Q_DECLARE_FLAGS(Capabilities, Capability)
Q_FLAG(Capabilities)
@@ -89,8 +94,71 @@ public:
Q_INVOKABLE static QStringList availableEngines();
+# ifdef Q_QDOC
+ template <typename Functor>
+ void synthesize(const QString &text, Functor functor);
+ template <typename Functor>
+ void synthesize(const QString &text, const QObject *context, Functor functor);
+# else
+ template <typename Slot> // synthesize to a QObject member function
+ void synthesize(const QString &text,
+ const typename QtPrivate::FunctionPointer<Slot>::Object *receiver, Slot slot)
+ {
+ using CallbackSignature = QtPrivate::FunctionPointer<void (*)(QAudioFormat, QByteArray)>;
+ using SlotSignature = QtPrivate::FunctionPointer<Slot>;
+
+ static_assert(int(SlotSignature::ArgumentCount) <= int(CallbackSignature::ArgumentCount),
+ "Slot requires more arguments than what can be provided.");
+ static_assert((QtPrivate::CheckCompatibleArguments<typename CallbackSignature::Arguments,
+ typename SlotSignature::Arguments>::value),
+ "Slot arguments are not compatible (must be QAudioFormat, QByteArray)");
+
+ auto slotObj = new QtPrivate::QSlotObject<Slot, typename SlotSignature::Arguments, void>(slot);
+ synthesizeImpl(text, slotObj, receiver);
+ }
+
+ // synthesize to a functor or function pointer (with context)
+ template <typename Func, std::enable_if_t<
+ !QtPrivate::FunctionPointer<Func>::IsPointerToMemberFunction
+ && !std::is_same<const char *, Func>::value, bool> = true>
+ void synthesize(const QString &text, const QObject *context, Func func)
+ {
+ using CallbackSignature = QtPrivate::FunctionPointer<void (*)(QAudioFormat, QByteArray)>;
+ constexpr int MatchingArgumentCount = QtPrivate::ComputeFunctorArgumentCount<
+ Func, CallbackSignature::Arguments>::Value;
+
+ static_assert(MatchingArgumentCount == 0
+ || MatchingArgumentCount == CallbackSignature::ArgumentCount,
+ "Functor arguments are not compatible (must be QAudioFormat, QByteArray)");
+
+ QtPrivate::QSlotObjectBase *slotObj = nullptr;
+ if constexpr (MatchingArgumentCount == CallbackSignature::ArgumentCount) {
+ slotObj = new QtPrivate::QFunctorSlotObject<Func, 2,
+ typename CallbackSignature::Arguments, void>(std::move(func));
+ } else if constexpr (MatchingArgumentCount == 1) {
+ slotObj = new QtPrivate::QFunctorSlotObject<Func, 1,
+ typename CallbackSignature::Arguments, void>(std::move(func));
+ } else {
+ slotObj = new QtPrivate::QFunctorSlotObject<Func, 0,
+ typename QtPrivate::List_Left<void, 0>::Value, void>(std::move(func));
+ }
+
+ synthesizeImpl(text, slotObj, context);
+ }
+
+ // synthesize to a functor or function pointer (without context)
+ template <typename Func, std::enable_if_t<
+ !QtPrivate::FunctionPointer<Func>::IsPointerToMemberFunction
+ && !std::is_same<const char *, Func>::value, bool> = true>
+ void synthesize(const QString &text, Func func)
+ {
+ synthesize(text, nullptr, std::move(func));
+ }
+# endif // Q_QDOC
+
public Q_SLOTS:
void say(const QString &text);
+ void synthesize(const QString &text);
void stop(QTextToSpeech::BoundaryHint boundaryHint = QTextToSpeech::BoundaryHint::Default);
void pause(QTextToSpeech::BoundaryHint boundaryHint = QTextToSpeech::BoundaryHint::Default);
void resume();
@@ -113,8 +181,12 @@ Q_SIGNALS:
void voiceChanged(const QVoice &voice);
void sayingWord(qsizetype start, qsizetype length);
+ void synthesized(const QAudioFormat &format, const QByteArray &data);
private:
+ void synthesizeImpl(const QString &text,
+ QtPrivate::QSlotObjectBase *slotObj, const QObject *context);
+
Q_DISABLE_COPY(QTextToSpeech)
};
Q_DECLARE_OPERATORS_FOR_FLAGS(QTextToSpeech::Capabilities)
diff --git a/src/tts/qtexttospeech_p.h b/src/tts/qtexttospeech_p.h
index cf5e657..6b6a93d 100644
--- a/src/tts/qtexttospeech_p.h
+++ b/src/tts/qtexttospeech_p.h
@@ -40,6 +40,7 @@ public:
private:
bool loadMeta();
void loadPlugin();
+ void updateState(QTextToSpeech::State newState);
static void loadPluginMetadata(QMultiHash<QString, QCborMap> &list);
QTextToSpeech *q_ptr;
QTextToSpeechPlugin *m_plugin = nullptr;
@@ -47,6 +48,8 @@ private:
QString m_providerName;
QCborMap m_metaData;
static QMutex m_mutex;
+ QMetaObject::Connection m_synthesizeConnection;
+ QtPrivate::QSlotObjectBase *m_slotObject = nullptr;
};
QT_END_NAMESPACE
diff --git a/src/tts/qtexttospeechengine.h b/src/tts/qtexttospeechengine.h
index 9fdb87b..2fc1825 100644
--- a/src/tts/qtexttospeechengine.h
+++ b/src/tts/qtexttospeechengine.h
@@ -9,9 +9,12 @@
#include <QtCore/QObject>
#include <QtCore/QLocale>
#include <QtCore/QDir>
+#include <QtMultimedia/QAudioFormat>
QT_BEGIN_NAMESPACE
+class QAudioFormat;
+
class Q_TEXTTOSPEECH_EXPORT QTextToSpeechEngine : public QObject
{
Q_OBJECT
@@ -24,6 +27,9 @@ public:
virtual QList<QVoice> availableVoices() const = 0;
virtual void say(const QString &text) = 0;
+ virtual void synthesize(const QString &text) {
+ Q_UNUSED(text);
+ };
virtual void stop(QTextToSpeech::BoundaryHint boundaryHint) = 0;
virtual void pause(QTextToSpeech::BoundaryHint boundaryHint) = 0;
virtual void resume() = 0;
@@ -52,6 +58,7 @@ Q_SIGNALS:
void errorOccurred(QTextToSpeech::ErrorReason error, const QString &errorString);
void sayingWord(qsizetype start, qsizetype length);
+ void synthesized(const QAudioFormat &format, const QByteArray &data);
};
QT_END_NAMESPACE
diff --git a/tests/auto/qtexttospeech/tst_qtexttospeech.cpp b/tests/auto/qtexttospeech/tst_qtexttospeech.cpp
index 09c4f22..fd355cf 100644
--- a/tests/auto/qtexttospeech/tst_qtexttospeech.cpp
+++ b/tests/auto/qtexttospeech/tst_qtexttospeech.cpp
@@ -6,6 +6,7 @@
#include <QTextToSpeech>
#include <QSignalSpy>
#include <QMediaDevices>
+#include <QAudioFormat>
#include <QAudioDevice>
#include <QOperatingSystemVersion>
#include <QRegularExpression>
@@ -53,6 +54,12 @@ private slots:
void sayingWordWithPause_data();
void sayingWordWithPause();
+ void synthesize_data();
+ void synthesize();
+
+ void synthesizeCallback_data();
+ void synthesizeCallback();
+
private:
static bool hasDefaultAudioOutput()
{
@@ -74,6 +81,13 @@ private:
}
}
}
+
+ void onError(QTextToSpeech::ErrorReason error, const QString &errorString) {
+ errorReason = error;
+ qCritical() << "Error:" << errorString;
+ }
+
+ QTextToSpeech::ErrorReason errorReason = QTextToSpeech::ErrorReason::NoError;
};
void tst_QTextToSpeech::initTestCase_data()
@@ -601,5 +615,175 @@ void tst_QTextToSpeech::sayingWordWithPause()
debugHelper.dismiss();
}
+void tst_QTextToSpeech::synthesize_data()
+{
+ QTest::addColumn<QString>("text");
+
+ QTest::addRow("text") << "Let's synthesize some text!";
+}
+
+void tst_QTextToSpeech::synthesize()
+{
+ QFETCH_GLOBAL(QString, engine);
+ if (engine != "mock" && !hasDefaultAudioOutput())
+ QSKIP("No audio device present");
+ if (engine == "android" && QOperatingSystemVersion::current() < QOperatingSystemVersion::Android10)
+ QSKIP("Only testing on recent Android versions");
+
+ QFETCH(QString, text);
+
+ QTextToSpeech tts(engine);
+ if (!(tts.engineCapabilities() & QTextToSpeech::Capability::Synthesize))
+ QSKIP("This engine doesn't support synthesize()");
+
+ connect(&tts, &QTextToSpeech::errorOccurred, this, &tst_QTextToSpeech::onError);
+ QTRY_COMPARE(tts.state(), QTextToSpeech::Ready);
+ selectWorkingVoice(&tts);
+
+ QElapsedTimer speechTimer;
+ // We can't assume that synthesis isn't done before we can check, and that we only
+ // have a single change during an event loop cycle, so connect to the signal
+ // and keep track ourselves.
+ bool running = false;
+ bool finished = false;
+ qint64 speechTime = 0;
+ connect(&tts, &QTextToSpeech::stateChanged, [&running, &finished, &speechTimer, &speechTime](QTextToSpeech::State state) {
+ if (state == QTextToSpeech::Synthesizing || state == QTextToSpeech::Speaking) {
+ speechTimer.start();
+ running = true;
+ finished = false;
+ }
+ if (running && state == QTextToSpeech::Ready) {
+ if (!speechTime)
+ speechTime = speechTimer.elapsed();
+ finished = true;
+ }
+ });
+
+ // first, measure how long it takes to speak the text
+ tts.say(text);
+ QTRY_VERIFY(running);
+ QTRY_VERIFY(finished);
+
+ running = false;
+
+ QAudioFormat pcmFormat;
+ QByteArray pcmData;
+
+ connect(&tts, &QTextToSpeech::synthesized,
+ this, [&pcmFormat, &pcmData](const QAudioFormat &format, const QByteArray &bytes) {
+ pcmFormat = format;
+ pcmData += bytes;
+ });
+
+ QElapsedTimer notBlockingTimer;
+ notBlockingTimer.start();
+ tts.synthesize(text);
+ QCOMPARE_LT(notBlockingTimer.elapsed(), 250);
+ QTRY_VERIFY(running);
+ QTRY_VERIFY(finished);
+
+ QVERIFY(pcmFormat.isValid());
+ // bytesForDuration takes micro seconds, we measured in milliseconds.
+ const qint32 bytesExpected = pcmFormat.bytesForDuration(speechTime * 1000);
+
+ // We should have as much data as the format requires for the time it took
+ // to play the speech, +/- 10% as we can't measure the exact audio duration.
+ QCOMPARE_GE(pcmData.size(), double(bytesExpected) * 0.9);
+ if (engine == "flite") // flite is very unreliable
+ QCOMPARE_LT(pcmData.size(), double(bytesExpected) * 1.5);
+ else
+ QCOMPARE_LT(pcmData.size(), double(bytesExpected) * 1.1);
+}
+
+/*!
+ API test for the functor variants of synthesize(), using only the mock
+ engine as the engine implementation is identical to the non-functor
+ version tested above.
+*/
+void tst_QTextToSpeech::synthesizeCallback_data()
+{
+ QTest::addColumn<QString>("text");
+
+ QTest::addRow("one") << "test";
+ QTest::addRow("several") << "this will produce more than one chunk.";
+}
+
+void tst_QTextToSpeech::synthesizeCallback()
+{
+ QFETCH_GLOBAL(QString, engine);
+ if (engine != "mock")
+ QSKIP("Only testing with mock engine");
+
+ QTextToSpeech tts(engine);
+ QVERIFY(tts.engineCapabilities() & QTextToSpeech::Capability::Synthesize);
+
+ QFETCH(QString, text);
+
+ QAudioFormat expectedFormat;
+ QByteArray expectedBytes;
+
+ // record a reference using the already tested synthesized() signal
+ auto connection = connect(&tts, &QTextToSpeech::synthesized,
+ [&expectedFormat, &expectedBytes](const QAudioFormat &format, const QByteArray &bytes){
+ expectedFormat = format;
+ expectedBytes += bytes;
+ });
+ tts.synthesize(text);
+ QTRY_VERIFY(expectedFormat.isValid());
+ QTRY_COMPARE(tts.state(), QTextToSpeech::Ready);
+ tts.disconnect(connection);
+
+ struct Processor : QObject {
+ void process(const QAudioFormat &format, const QByteArray &bytes)
+ {
+ m_format = format;
+ m_allBytes += bytes;
+ }
+ void audioFormatKnown(const QAudioFormat &format)
+ {
+ m_format = format;
+ }
+ void reset()
+ {
+ m_format = {};
+ m_allBytes = {};
+ }
+ QAudioFormat m_format;
+ QByteArray m_allBytes;
+ } processor;
+
+ // Functor without context
+ tts.synthesize(text, [&processor](const QAudioFormat &format, const QByteArray &bytes){
+ processor.m_format = format;
+ processor.m_allBytes += bytes;
+ });
+ QTRY_COMPARE(processor.m_format, expectedFormat);
+ QTRY_COMPARE(tts.state(), QTextToSpeech::Ready);
+ QCOMPARE(processor.m_allBytes, expectedBytes);
+ processor.reset();
+ // Functor with context
+ tts.synthesize(text, &tts, [&processor](const QAudioFormat &format, const QByteArray &bytes){
+ processor.m_format = format;
+ processor.m_allBytes += bytes;
+ });
+ QTRY_COMPARE(processor.m_format, expectedFormat);
+ QTRY_COMPARE(tts.state(), QTextToSpeech::Ready);
+ QCOMPARE(processor.m_allBytes, expectedBytes);
+ processor.reset();
+ // PMF
+ tts.synthesize(text, &processor, &Processor::process);
+ QTRY_COMPARE(processor.m_format, expectedFormat);
+ QTRY_COMPARE(tts.state(), QTextToSpeech::Ready);
+ QCOMPARE(processor.m_allBytes, expectedBytes);
+ processor.reset();
+ // PMF with no QByteArray argument - not very useful, but Qt allows it
+ tts.synthesize(text, &processor, &Processor::audioFormatKnown);
+ QTRY_COMPARE(processor.m_format, expectedFormat);
+ QTRY_COMPARE(tts.state(), QTextToSpeech::Ready);
+ QCOMPARE(processor.m_allBytes, QByteArray());
+ processor.reset();
+}
+
QTEST_MAIN(tst_QTextToSpeech)
#include "tst_qtexttospeech.moc"