diff options
author | Volker Hilsheimer <volker.hilsheimer@qt.io> | 2023-01-18 15:04:41 +0100 |
---|---|---|
committer | Volker Hilsheimer <volker.hilsheimer@qt.io> | 2023-02-19 19:36:36 +0100 |
commit | c03afcc297bf250baff8d0693e4db0c8cc77eeed (patch) | |
tree | 257c98299d9c94a3c998b13a5ef19a7d5acd1b78 /tests | |
parent | ea5c48e518789c3387ed9c9d21978eda122e9782 (diff) |
Add QTextToSpeech::synthesize to produce PCM data rather than audio
The function starts the synthesis as an asynchronous process, and
emits a signal 'synthesized()' (or calls a functor) with a chunk of
PCM data as a QByteArray, and the QAudioFormat in which the data is
encoded.
This requires a dependency to Qt Multimedia for Qt Speech for all
platforms; it has so far been required only with flite and winrt
backends.
Implemented for all engines, except speechd and macos engines where
it's not possible - these engines don't provide access to the data.
The test case verifies that the implementation is asynchronous, and
that it produces a reasonable amount of data. Since this involves
timer-based measurements, values need to be compared with some
appropriate margins.
The QML documentation of this API is omitted on purpose; the
QAudioFormat type is not available in QML, and we don't want to
encourage users to operate on raw bytes from QML anyway.
[ChangeLog][QtTextToSpeech][QTextToSpeech] Added the ability to
produce PCM data as a QByteArray. The QtTextToSpeech module now
depends on QtMultimedia on all platforms.
Fixes: QTBUG-109837
Change-Id: I308a3e18998827089c0f75789b720f1bd36e3c46
Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
Reviewed-by: Axel Spoerl <axel.spoerl@qt.io>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/auto/qtexttospeech/tst_qtexttospeech.cpp | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/tests/auto/qtexttospeech/tst_qtexttospeech.cpp b/tests/auto/qtexttospeech/tst_qtexttospeech.cpp index 09c4f22..fd355cf 100644 --- a/tests/auto/qtexttospeech/tst_qtexttospeech.cpp +++ b/tests/auto/qtexttospeech/tst_qtexttospeech.cpp @@ -6,6 +6,7 @@ #include <QTextToSpeech> #include <QSignalSpy> #include <QMediaDevices> +#include <QAudioFormat> #include <QAudioDevice> #include <QOperatingSystemVersion> #include <QRegularExpression> @@ -53,6 +54,12 @@ private slots: void sayingWordWithPause_data(); void sayingWordWithPause(); + void synthesize_data(); + void synthesize(); + + void synthesizeCallback_data(); + void synthesizeCallback(); + private: static bool hasDefaultAudioOutput() { @@ -74,6 +81,13 @@ private: } } } + + void onError(QTextToSpeech::ErrorReason error, const QString &errorString) { + errorReason = error; + qCritical() << "Error:" << errorString; + } + + QTextToSpeech::ErrorReason errorReason = QTextToSpeech::ErrorReason::NoError; }; void tst_QTextToSpeech::initTestCase_data() @@ -601,5 +615,175 @@ void tst_QTextToSpeech::sayingWordWithPause() debugHelper.dismiss(); } +void tst_QTextToSpeech::synthesize_data() +{ + QTest::addColumn<QString>("text"); + + QTest::addRow("text") << "Let's synthesize some text!"; +} + +void tst_QTextToSpeech::synthesize() +{ + QFETCH_GLOBAL(QString, engine); + if (engine != "mock" && !hasDefaultAudioOutput()) + QSKIP("No audio device present"); + if (engine == "android" && QOperatingSystemVersion::current() < QOperatingSystemVersion::Android10) + QSKIP("Only testing on recent Android versions"); + + QFETCH(QString, text); + + QTextToSpeech tts(engine); + if (!(tts.engineCapabilities() & QTextToSpeech::Capability::Synthesize)) + QSKIP("This engine doesn't support synthesize()"); + + connect(&tts, &QTextToSpeech::errorOccurred, this, &tst_QTextToSpeech::onError); + QTRY_COMPARE(tts.state(), QTextToSpeech::Ready); + selectWorkingVoice(&tts); + + QElapsedTimer speechTimer; + // We can't assume that synthesis isn't done before we can check, and that we only + // have a single change during an event loop cycle, so connect to the signal + // and keep track ourselves. + bool running = false; + bool finished = false; + qint64 speechTime = 0; + connect(&tts, &QTextToSpeech::stateChanged, [&running, &finished, &speechTimer, &speechTime](QTextToSpeech::State state) { + if (state == QTextToSpeech::Synthesizing || state == QTextToSpeech::Speaking) { + speechTimer.start(); + running = true; + finished = false; + } + if (running && state == QTextToSpeech::Ready) { + if (!speechTime) + speechTime = speechTimer.elapsed(); + finished = true; + } + }); + + // first, measure how long it takes to speak the text + tts.say(text); + QTRY_VERIFY(running); + QTRY_VERIFY(finished); + + running = false; + + QAudioFormat pcmFormat; + QByteArray pcmData; + + connect(&tts, &QTextToSpeech::synthesized, + this, [&pcmFormat, &pcmData](const QAudioFormat &format, const QByteArray &bytes) { + pcmFormat = format; + pcmData += bytes; + }); + + QElapsedTimer notBlockingTimer; + notBlockingTimer.start(); + tts.synthesize(text); + QCOMPARE_LT(notBlockingTimer.elapsed(), 250); + QTRY_VERIFY(running); + QTRY_VERIFY(finished); + + QVERIFY(pcmFormat.isValid()); + // bytesForDuration takes micro seconds, we measured in milliseconds. + const qint32 bytesExpected = pcmFormat.bytesForDuration(speechTime * 1000); + + // We should have as much data as the format requires for the time it took + // to play the speech, +/- 10% as we can't measure the exact audio duration. + QCOMPARE_GE(pcmData.size(), double(bytesExpected) * 0.9); + if (engine == "flite") // flite is very unreliable + QCOMPARE_LT(pcmData.size(), double(bytesExpected) * 1.5); + else + QCOMPARE_LT(pcmData.size(), double(bytesExpected) * 1.1); +} + +/*! + API test for the functor variants of synthesize(), using only the mock + engine as the engine implementation is identical to the non-functor + version tested above. +*/ +void tst_QTextToSpeech::synthesizeCallback_data() +{ + QTest::addColumn<QString>("text"); + + QTest::addRow("one") << "test"; + QTest::addRow("several") << "this will produce more than one chunk."; +} + +void tst_QTextToSpeech::synthesizeCallback() +{ + QFETCH_GLOBAL(QString, engine); + if (engine != "mock") + QSKIP("Only testing with mock engine"); + + QTextToSpeech tts(engine); + QVERIFY(tts.engineCapabilities() & QTextToSpeech::Capability::Synthesize); + + QFETCH(QString, text); + + QAudioFormat expectedFormat; + QByteArray expectedBytes; + + // record a reference using the already tested synthesized() signal + auto connection = connect(&tts, &QTextToSpeech::synthesized, + [&expectedFormat, &expectedBytes](const QAudioFormat &format, const QByteArray &bytes){ + expectedFormat = format; + expectedBytes += bytes; + }); + tts.synthesize(text); + QTRY_VERIFY(expectedFormat.isValid()); + QTRY_COMPARE(tts.state(), QTextToSpeech::Ready); + tts.disconnect(connection); + + struct Processor : QObject { + void process(const QAudioFormat &format, const QByteArray &bytes) + { + m_format = format; + m_allBytes += bytes; + } + void audioFormatKnown(const QAudioFormat &format) + { + m_format = format; + } + void reset() + { + m_format = {}; + m_allBytes = {}; + } + QAudioFormat m_format; + QByteArray m_allBytes; + } processor; + + // Functor without context + tts.synthesize(text, [&processor](const QAudioFormat &format, const QByteArray &bytes){ + processor.m_format = format; + processor.m_allBytes += bytes; + }); + QTRY_COMPARE(processor.m_format, expectedFormat); + QTRY_COMPARE(tts.state(), QTextToSpeech::Ready); + QCOMPARE(processor.m_allBytes, expectedBytes); + processor.reset(); + // Functor with context + tts.synthesize(text, &tts, [&processor](const QAudioFormat &format, const QByteArray &bytes){ + processor.m_format = format; + processor.m_allBytes += bytes; + }); + QTRY_COMPARE(processor.m_format, expectedFormat); + QTRY_COMPARE(tts.state(), QTextToSpeech::Ready); + QCOMPARE(processor.m_allBytes, expectedBytes); + processor.reset(); + // PMF + tts.synthesize(text, &processor, &Processor::process); + QTRY_COMPARE(processor.m_format, expectedFormat); + QTRY_COMPARE(tts.state(), QTextToSpeech::Ready); + QCOMPARE(processor.m_allBytes, expectedBytes); + processor.reset(); + // PMF with no QByteArray argument - not very useful, but Qt allows it + tts.synthesize(text, &processor, &Processor::audioFormatKnown); + QTRY_COMPARE(processor.m_format, expectedFormat); + QTRY_COMPARE(tts.state(), QTextToSpeech::Ready); + QCOMPARE(processor.m_allBytes, QByteArray()); + processor.reset(); +} + QTEST_MAIN(tst_QTextToSpeech) #include "tst_qtexttospeech.moc" |