summaryrefslogtreecommitdiffstats
path: root/src/plugins/tts/winrt/qtexttospeech_winrt_audiosource.h
blob: 79fdf167a3d89ec369e35422b3e1ffd0a9924df1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
// Copyright (C) 2022 The Qt Company Ltd.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only

#ifndef QTEXTTOSPEECHENGINE_WINRT_AUDIOSOURCE_H
#define QTEXTTOSPEECHENGINE_WINRT_AUDIOSOURCE_H

#include <QtCore/QIODevice>
#include <QtMultimedia/QAudioFormat>

#include <robuffer.h>
#include <winrt/base.h>
#include <QtCore/private/qfactorycacheregistration_p.h>
#include <windows.foundation.h>
#include <windows.media.speechsynthesis.h>
#include <windows.storage.streams.h>

#include <wrl.h>

using namespace ABI::Windows::Foundation;
using namespace ABI::Windows::Media::SpeechSynthesis;
using namespace ABI::Windows::Storage::Streams;
using namespace Microsoft::WRL;

QT_BEGIN_NAMESPACE

using StreamReadyHandler = IAsyncOperationCompletedHandler<SpeechSynthesisStream*>;
using BytesReadyHandler = IAsyncOperationWithProgressCompletedHandler<IBuffer*, UINT32>;

class AudioSource : public QIODevice,
                    public StreamReadyHandler,
                    public BytesReadyHandler
{
    Q_OBJECT
public:
    AudioSource(ComPtr<IAsyncOperation<SpeechSynthesisStream*>> synthOperation);

    bool isSequential() const override { return true; }

    void close() override;
    qint64 readData(char *data, qint64 maxlen) override;
    qint64 writeData(const char *data, qint64 len) override { return 0; }

    bool atEnd() const override;
    qint64 bytesAvailable() const override;

    QAudioFormat format() const { return audioFormat; }

    enum PauseState {
        NoPause,
        PauseRequested,
        Paused
    } m_pause = NoPause;

    void pause()
    {
        m_pause = PauseRequested;
    }

    void resume()
    {
        m_pause = NoPause;
        if (bytesAvailable())
            emit readyRead();
    }

    struct Boundary {
        enum Type { Word, Sentence, Unknown } type;
        QString text;
        int beginIndex;
        int endIndex;
        qint64 startTime;
        friend inline bool operator<(const Boundary &lhs, const Boundary &rhs)
        {
            return lhs.startTime < rhs.startTime;
        }
    };

    QList<Boundary> boundaryData() const
    {
        return boundaries;
    }

    // IUnknown
    ULONG STDMETHODCALLTYPE AddRef() { return ++ref; }
    ULONG STDMETHODCALLTYPE Release() {
        if (!--ref) {
            delete this;
            return 0;
        }
        return ref;
    }
    HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, VOID **ppvInterface);

    // completion handler for synthesising the stream
    HRESULT STDMETHODCALLTYPE Invoke(IAsyncOperation<SpeechSynthesisStream*> *operation,
                                     AsyncStatus status) override;
    // completion handler for reading from the stream
    HRESULT STDMETHODCALLTYPE Invoke(IAsyncOperationWithProgress<IBuffer*, unsigned int> *read,
                                     AsyncStatus status) override;

Q_SIGNALS:
    void streamReady(const QAudioFormat &format);
    void errorInStream();

private:
    // lifetime is controlled via IUnknown reference counting, make sure
    // we don't destroy by accident, polymorphically, or via a QObject parent
    ~AudioSource() override;

    qint64 bytesInBuffer() const;
    bool fetchMore();

    QAudioFormat audioFormat;

    // The input stream that gives access to the synthesis stream. We keep the
    // async operation so that we can cancel it if we get destroyed prematurely.
    ComPtr<IAsyncOperation<SpeechSynthesisStream*>> synthOperation;
    ComPtr<IInputStream> inputStream;
    ComPtr<IRandomAccessStream> randomAccessStream;
    // the current ReadAsync operation that yields an IBuffer
    ComPtr<IAsyncOperationWithProgress<IBuffer*, UINT32>> readOperation;
    ComPtr<IBuffer> m_buffer;
    // access to the raw pcm bytes in the IBuffer; this took much reading of Windows header files...
    ComPtr<::Windows::Storage::Streams::IBufferByteAccess> bufferByteAccess;
    // The data in the IBuffer might be paritally consumed
    UINT32 m_bufferOffset = 0;
    // RIFF header has been checked at the beginning of the stream
    bool m_riffHeaderChecked = false;

    void populateBoundaries();
    QList<Boundary> boundaries;

    ULONG ref = 1;
};

QT_END_NAMESPACE

#endif