WinRT: implement boundary hint support in pause

Look for the next matching boundary, which stores its starting time. Calculate the corresponding byte using QAudioFormat, then cut the delivery of data off when we reach the matching byte. In case of immediate pause, suspend the sink immediately. The default is unchanged: we search for silence in the current chunk of data. Implementing boundary support for stop() might come later. Change-Id: I2b4cf06dbf8e2c55321a5364d2366c255d9e9afe Reviewed-by: Axel Spoerl <axel.spoerl@qt.io> (cherry picked from commit 5e4bc89c7606bf958f3b55d09c872dcf360e9c56) Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
author: Volker Hilsheimer <volker.hilsheimer@qt.io> 2023-11-10 00:39:03 +0200
committer: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org> 2023-11-10 07:35:50 +0000
commit: b1adf1831209e67fff696bcc8d6896687586afea (patch)
tree: b68262de539ec24469260bb32da8b240655dbf52
parent: 20fd7025cbf2f6372f4dbc3183652882ec75542c (diff)
3 files changed, 59 insertions, 5 deletions
diff --git a/src/plugins/tts/winrt/qtexttospeech_winrt.cpp b/src/plugins/tts/winrt/qtexttospeech_winrt.cpp
index 9cb0b5d..d6357ac 100644
--- a/src/plugins/tts/winrt/qtexttospeech_winrt.cpp
+++ b/src/plugins/tts/winrt/qtexttospeech_winrt.cpp
@@ -547,11 +547,41 @@ void QTextToSpeechEngineWinRT::stop(QTextToSpeech::BoundaryHint boundaryHint)
 
 void QTextToSpeechEngineWinRT::pause(QTextToSpeech::BoundaryHint boundaryHint)
 {
-    Q_UNUSED(boundaryHint);
     Q_D(QTextToSpeechEngineWinRT);
 
-    if (d->audioSource)
-        d->audioSource->pause();
+    if (!d->audioSource)
+        return;
+
+    auto pauseBoundaryType = AudioSource::Boundary::Unknown;
+    switch (boundaryHint) {
+    case QTextToSpeech::BoundaryHint::Default:
+        d->audioSource->pause(0);
+        return;
+    case QTextToSpeech::BoundaryHint::Immediate:
+        d->audioSource->pause(0);
+        if (d->audioSink)
+            d->audioSink->suspend();
+        return;
+    case QTextToSpeech::BoundaryHint::Word:
+        pauseBoundaryType = AudioSource::Boundary::Word;
+        break;
+    case QTextToSpeech::BoundaryHint::Sentence:
+        pauseBoundaryType = AudioSource::Boundary::Sentence;
+        break;
+    case QTextToSpeech::BoundaryHint::Utterance:
+        // taken care off by engine-independent implementation
+        return;
+    }
+
+    // find the next boundary of the matching type
+    const auto nextBoundary = std::find_if(d->currentBoundary + 1, d->boundaries.constEnd(),
+                                            [pauseBoundaryType](auto &&it){
+        return it.type == pauseBoundaryType;
+    });
+    if (nextBoundary != d->boundaries.constEnd()) {
+        d->audioSource->pause(d->audioSource->format()
+                                        .bytesForDuration(nextBoundary->startTime));
+    }
 }
 
 void QTextToSpeechEngineWinRT::resume()
diff --git a/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.cpp b/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.cpp
index e71f112..28245c3 100644
--- a/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.cpp
+++ b/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.cpp
@@ -136,7 +136,23 @@ qint64 AudioSource::readData(char *data, qint64 maxlen)
         break;
     case PauseRequested: {
         Q_ASSERT(audioFormat.sampleFormat() == QAudioFormat::Int16);
-        // we are dealing with artificially created sound, so we don't have
+
+        if (m_pauseRequestedAt) {
+            if (m_pauseRequestedAt <= m_bytesRead) {
+                // we missed the window, pause immediately
+                maxlen = 0;
+            } else if (m_pauseRequestedAt <= m_bytesRead + maxlen) {
+                maxlen = qMax(quint64(0), m_pauseRequestedAt - m_bytesRead) + 44;
+            } else {
+                // wait for the next chunk
+                break;
+            }
+            m_pause = Paused;
+            m_pauseRequestedAt = 0;
+            break;
+        }
+        // If no byte to pause at is specified, look for silence in the current
+        // chunk. We are dealing with artificially created sound, so we don't have
         // to find a large enough window with overall low energy; we can just
         // look for a series (e.g. 1/20th of a second) of samples with value 0.
         const int silenceDuration = audioFormat.sampleRate() / 20;
@@ -162,6 +178,9 @@ qint64 AudioSource::readData(char *data, qint64 maxlen)
                 silenceCount = 0;
             }
         }
+        // no silence found - stop after this chunk
+        if (m_pause != Paused)
+            m_pause = Paused;
         break;
     }
     case Paused:
@@ -181,6 +200,7 @@ qint64 AudioSource::readData(char *data, qint64 maxlen)
     else
         m_bufferOffset += maxlen;
 
+    m_bytesRead += maxlen;
     return maxlen;
 }
 
diff --git a/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.h b/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.h
index 79fdf16..da6e901 100644
--- a/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.h
+++ b/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.h
@@ -51,14 +51,16 @@ public:
         Paused
     } m_pause = NoPause;
 
-    void pause()
+    void pause(quint64 atByte)
     {
         m_pause = PauseRequested;
+        m_pauseRequestedAt = atByte;
     }
 
     void resume()
     {
         m_pause = NoPause;
+        m_pauseRequestedAt = 0;
         if (bytesAvailable())
             emit readyRead();
     }
@@ -126,6 +128,8 @@ private:
     UINT32 m_bufferOffset = 0;
     // RIFF header has been checked at the beginning of the stream
     bool m_riffHeaderChecked = false;
+    quint64 m_bytesRead = 0;
+    quint64 m_pauseRequestedAt = 0;
 
     void populateBoundaries();
     QList<Boundary> boundaries;
author	Volker Hilsheimer <volker.hilsheimer@qt.io>	2023-11-10 00:39:03 +0200
committer	Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>	2023-11-10 07:35:50 +0000
commit	b1adf1831209e67fff696bcc8d6896687586afea (patch)
tree	b68262de539ec24469260bb32da8b240655dbf52
parent	20fd7025cbf2f6372f4dbc3183652882ec75542c (diff)