Add optional audio level processing

- it is much easier to interact and configure system if input audio level is avail. Audio level signal is provided by AlexaInterface. For performance purposes audio processing might be turned off by editing QSettings "Luxoft Sweden AB"/"AlexaApp" file "capture/process_input" boolean key Change-Id: I49dfb64f6b385e9a14a135ab920babb9b4a8748d Reviewed-by: Bramastyo Harimukti Santoso <bramastyo.harimukti.santoso@pelagicore.com>
author: Egor Nemtsev <enemtsev@luxoft.com> 2019-08-20 14:23:52 +0300
committer: Egor Nemtsev <enemtsev@luxoft.com> 2019-08-22 12:01:17 +0000
commit: b41742c3f66c43c625d4ad26e3f2875f7960b12c (patch)
tree: fb67d2c4e54d1edbb80b68e3fdd16b828afbc004
parent: 8d26def6d46f2836d27c976537425b7978dc45bb (diff)
4 files changed, 275 insertions, 9 deletions
diff --git a/plugins/alexainterface/AlexaInterface.cpp b/plugins/alexainterface/AlexaInterface.cpp
index 6577177..ab4c95a 100644
--- a/plugins/alexainterface/AlexaInterface.cpp
+++ b/plugins/alexainterface/AlexaInterface.cpp
@@ -910,13 +910,19 @@ bool AlexaInterface::initialize(
     QSettings settings(QStringLiteral("Luxoft Sweden AB"), QStringLiteral("AlexaApp"));
     QString captureDeviceName = settings.value(QStringLiteral("capture/device_name"),
                                                QStringLiteral("default")).toString();
-    std::shared_ptr<QtMicrophoneWrapper> m_micWrapper = QtMicrophoneWrapper::create(sharedDataStream,
+    m_micWrapper = QtMicrophoneWrapper::create(sharedDataStream,
                                                                                   captureDeviceName);
     if (!m_micWrapper) {
         ACSDK_CRITICAL(LX("Failed to create QtMicrophoneWrapper!"));
         return false;
     }
 
+    // Turn on/off audio level processing, might be CPU consuming
+    bool processAudioLevel = settings.value(QStringLiteral("capture/process_input"), true).toBool();
+    m_micWrapper->setLevelProcess(processAudioLevel);
+    QObject::connect(m_micWrapper.get(), &QtMicrophoneWrapper::audioLevelChanged, this, &AlexaInterface::audioLevelChanged);
+
+
     // Creating wake word audio provider, if necessary
 #ifdef KWD
     bool wakeAlwaysReadable = true;
diff --git a/plugins/alexainterface/AlexaInterface.h b/plugins/alexainterface/AlexaInterface.h
index bcfa56e..492d8df 100644
--- a/plugins/alexainterface/AlexaInterface.h
+++ b/plugins/alexainterface/AlexaInterface.h
@@ -87,6 +87,7 @@ class AlexaInterface: public QObject {
     Q_PROPERTY(QString authCode READ authCode NOTIFY authCodeChanged)
     Q_PROPERTY(ConnectionManager::ConnectionStatus connectionStatus READ connectionStatus NOTIFY connectionStatusChanged)
     Q_PROPERTY(LogLevel logLevel READ logLevel WRITE setLogLevel NOTIFY logLevelChanged)
+    Q_PROPERTY(qreal audioLevel READ audioLevel NOTIFY audioLevelChanged)
 
 public:
 
@@ -117,6 +118,7 @@ public:
     QString authCode() const { return m_authCode; }
     ConnectionManager::ConnectionStatus connectionStatus() const { return m_connectionStatus; }
     LogLevel logLevel() const { return m_logLevel; }
+    qreal audioLevel() const { return m_micWrapper ? m_micWrapper->audioLevel() : 0.0; }
 
     explicit AlexaInterface(QObject* parent = nullptr);
     /// Destructor which manages the @c AlexaInterface shutdown sequence.
@@ -213,6 +215,7 @@ Q_SIGNALS:
     void connectionStatusChanged();
     void logLevelChanged();
     void cardReady(BaseCard *card);
+    void audioLevelChanged();
 
 private:
     static std::unique_ptr<AlexaInterface> instance;
diff --git a/plugins/alexainterface/QtMicrophoneWrapper.cpp b/plugins/alexainterface/QtMicrophoneWrapper.cpp
index c3fe0ef..86bfa6f 100644
--- a/plugins/alexainterface/QtMicrophoneWrapper.cpp
+++ b/plugins/alexainterface/QtMicrophoneWrapper.cpp
@@ -30,7 +30,7 @@
 ****************************************************************************/
 
 #include "QtMicrophoneWrapper.h"
-
+#include <QtEndian>
 #include <QDebug>
 
 using alexaClientSDK::avsCommon::avs::AudioInputStream;
@@ -121,7 +121,7 @@ void QtMicrophoneWrapper::setAudioDevice(const QString &deviceName) {
     format.setByteOrder(QAudioFormat::LittleEndian);
     format.setSampleType(QAudioFormat::SignedInt);
 
-    m_audioInfo = QAudioDeviceInfo::defaultInputDevice();
+    QAudioDeviceInfo audioInfo = QAudioDeviceInfo::defaultInputDevice();
 
     QList<QAudioDeviceInfo> devices = QAudioDeviceInfo::availableDevices(QAudio::AudioInput);
 
@@ -129,20 +129,23 @@ void QtMicrophoneWrapper::setAudioDevice(const QString &deviceName) {
     for (QAudioDeviceInfo &device : devices) {
         qDebug() << "     device name: " << device.deviceName();
         if (device.deviceName() == deviceName) {
-            m_audioInfo = device;
+            audioInfo = device;
         }
     }
 
-    qDebug() << "Selected capture device:" << m_audioInfo.deviceName();
+    qDebug() << "Selected capture device:" << audioInfo.deviceName();
     qDebug() << "Requested format" << format;
 
-    if (!m_audioInfo.isFormatSupported(format)) {
+    if (!audioInfo.isFormatSupported(format)) {
         qWarning() << "QtMicrophoneWrapper: Default format not supported, trying to use the nearest.";
-        format = m_audioInfo.nearestFormat(format);
+        format = audioInfo.nearestFormat(format);
         qWarning() << "QtMicrophoneWrapper: Nearest format" << format;
     }
 
-    m_audioInput = new QAudioInput(m_audioInfo, format, this);
+    m_audioInput = new QAudioInput(audioInfo, format, this);
+
+    m_audioLevelInfo.init(m_audioInput->format());
+
     QObject::connect(m_audioInput, &QAudioInput::notify, this, [this](){
 
         QByteArray readBytes = m_audioInputIODevice->readAll();
@@ -153,6 +156,12 @@ void QtMicrophoneWrapper::setAudioDevice(const QString &deviceName) {
                 static_cast<size_t>(m_readAudioDataBytes)/m_writer->getWordSize() :
                 static_cast<size_t>(m_readAudioDataBytes);
         m_writer->write(m_readAudioData.data(), nWords);
+
+        if (m_levelProcess) {
+            m_audioLevel = m_audioLevelInfo.processBuffer(m_readAudioData);
+            Q_EMIT audioLevelChanged();
+        }
+
         m_readAudioData.clear();
         m_readAudioDataBytes = 0;
     });
@@ -161,3 +170,200 @@ void QtMicrophoneWrapper::setAudioDevice(const QString &deviceName) {
     m_audioInput->setNotifyInterval(latency);
     qDebug("QtMicrophoneWrapper: Latency is configured to: %d ms", m_audioInput->notifyInterval());
 }
+
+AudioLevelInfo::AudioLevelInfo(const QAudioFormat &format)
+{
+    init(format);
+}
+
+bool AudioLevelInfo::init(const QAudioFormat &format)
+{
+    m_valid = true;
+    m_maxAmplitude = 0;
+
+    switch (format.sampleSize()) {
+    case 8:
+        switch (format.sampleType()) {
+        case QAudioFormat::UnSignedInt:
+            m_maxAmplitude = 255;
+            break;
+        case QAudioFormat::SignedInt:
+            m_maxAmplitude = 127;
+            break;
+        default:
+            break;
+        }
+        break;
+    case 16:
+        switch (format.sampleType()) {
+        case QAudioFormat::UnSignedInt:
+            m_maxAmplitude = 65535;
+            break;
+        case QAudioFormat::SignedInt:
+            m_maxAmplitude = 32767;
+            break;
+        default:
+            break;
+        }
+        break;
+    case 32:
+        switch (format.sampleType()) {
+        case QAudioFormat::UnSignedInt:
+            m_maxAmplitude = 0xffffffff;
+            break;
+        case QAudioFormat::SignedInt:
+            m_maxAmplitude = 0x7fffffff;
+            break;
+        case QAudioFormat::Float:
+            m_maxAmplitude = 0x7fffffff;
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        m_valid = false;
+        return false;
+    }
+
+    if (format.sampleSize() == 8 && format.sampleType() == QAudioFormat::UnSignedInt) {
+        m_getAudioLevelValue = &AudioLevelInfo::processUnSignedInt8;
+    } else if (format.sampleSize() == 8 && format.sampleType() == QAudioFormat::SignedInt) {
+        m_getAudioLevelValue = &AudioLevelInfo::processSignedInt8;
+    } else if (format.sampleSize() == 16 && format.sampleType() == QAudioFormat::UnSignedInt) {
+        if (format.byteOrder() == QAudioFormat::LittleEndian) {
+            m_getAudioLevelValue = &AudioLevelInfo::processUnSignedInt16LE;
+        }
+        else {
+            m_getAudioLevelValue = &AudioLevelInfo::processUnSignedInt16BE;
+        }
+    } else if (format.sampleSize() == 16 && format.sampleType() == QAudioFormat::SignedInt) {
+        if (format.byteOrder() == QAudioFormat::LittleEndian) {
+            m_getAudioLevelValue = &AudioLevelInfo::processSignedInt16LE;
+        }
+        else{
+            m_getAudioLevelValue = &AudioLevelInfo::processSignedInt16BE;
+        }
+    } else if (format.sampleSize() == 32 && format.sampleType() == QAudioFormat::UnSignedInt) {
+        if (format.byteOrder() == QAudioFormat::LittleEndian) {
+            m_getAudioLevelValue = &AudioLevelInfo::processUnSignedInt32LE;
+        }
+        else {
+            m_getAudioLevelValue = &AudioLevelInfo::processUnSignedInt32BE;
+        }
+    } else if (format.sampleSize() == 32 && format.sampleType() == QAudioFormat::SignedInt) {
+        if (format.byteOrder() == QAudioFormat::LittleEndian) {
+            m_getAudioLevelValue = &AudioLevelInfo::processSignedInt32LE;
+        }
+        else {
+            m_getAudioLevelValue = &AudioLevelInfo::processSignedInt32BE;
+        }
+    } else if (format.sampleSize() == 32 && format.sampleType() == QAudioFormat::Float) {
+        m_getAudioLevelValue = &AudioLevelInfo::processFloat;
+    } else {
+        m_getAudioLevelValue = &AudioLevelInfo::processDefault;
+        m_valid = false;
+    }
+
+    if (format.sampleSize() % 8 != 0) {
+        m_valid = false;
+    }
+
+    m_channelBytes = format.sampleSize() / 8;
+    m_sampleBytes = m_channelBytes * format.channelCount();
+    m_channelCount = format.channelCount();
+
+    return m_valid;
+}
+
+qreal AudioLevelInfo::processBuffer(const QByteArray &ba) const
+{
+    if (ba.size() == 0) {
+        return 0.0;
+    }
+
+    if (m_valid) {
+        if (ba.size() % m_sampleBytes != 0)
+            return 0.0;
+
+        const int numSamples = ba.size() / m_sampleBytes;
+        const int step = numSamples / 50 + 1; //check 50 samples from buffer, skip all other info
+
+        quint32 maxValue = 0;
+        const char *ptr = ba.constData();
+
+        for (int i = 0; i < numSamples; i += step) {
+            for (int j = 0; j < m_channelCount; ++j) {
+                quint32 value = (*m_getAudioLevelValue)(ptr);
+                maxValue = qMax(value, maxValue);
+                ptr += m_channelBytes;
+            }
+            ptr += m_channelBytes * (m_channelCount) * (step - 1);
+        }
+
+        maxValue = qMin(maxValue, m_maxAmplitude);
+        return qreal(maxValue) / m_maxAmplitude;
+    }
+
+    return 0.0;
+}
+
+quint32 AudioLevelInfo::processUnSignedInt8(const char *ptr)
+{
+    return *reinterpret_cast<const quint8*>(ptr);
+}
+
+quint32 AudioLevelInfo::processSignedInt8(const char *ptr)
+{
+    return static_cast<quint32>(qAbs(*reinterpret_cast<const qint8*>(ptr)));
+}
+
+quint32 AudioLevelInfo::processUnSignedInt16LE(const char *ptr)
+{
+    return qFromLittleEndian<quint16>(ptr);
+}
+
+quint32 AudioLevelInfo::processUnSignedInt16BE(const char *ptr)
+{
+    return qFromBigEndian<quint16>(ptr);
+}
+
+quint32 AudioLevelInfo::processSignedInt16LE(const char *ptr)
+{
+    return static_cast<quint32>(qAbs(qFromLittleEndian<qint16>(ptr)));
+}
+
+quint32 AudioLevelInfo::processSignedInt16BE(const char *ptr)
+{
+    return static_cast<quint32>(qAbs(qFromBigEndian<qint16>(ptr)));
+}
+
+quint32 AudioLevelInfo::processUnSignedInt32LE(const char *ptr)
+{
+    return qFromLittleEndian<quint32>(ptr);
+}
+
+quint32 AudioLevelInfo::processUnSignedInt32BE(const char *ptr)
+{
+    return qFromBigEndian<quint32>(ptr);
+}
+
+quint32 AudioLevelInfo::processSignedInt32LE(const char *ptr)
+{
+    return static_cast<quint32>(qAbs(qFromLittleEndian<qint32>(ptr)));
+}
+
+quint32 AudioLevelInfo::processSignedInt32BE(const char *ptr)
+{
+    return static_cast<quint32>(qAbs(qFromBigEndian<qint32>(ptr)));
+}
+
+quint32 AudioLevelInfo::processFloat(const char *ptr)
+{
+    return static_cast<quint32>(qAbs(*reinterpret_cast<const float*>(ptr) * 0x7fffffff)); // assumes 0-1.0
+}
+quint32 AudioLevelInfo::processDefault(const char *ptr)
+{
+    Q_UNUSED(ptr)
+    return 0;
+}
diff --git a/plugins/alexainterface/QtMicrophoneWrapper.h b/plugins/alexainterface/QtMicrophoneWrapper.h
index cf81596..bfeb5ae 100644
--- a/plugins/alexainterface/QtMicrophoneWrapper.h
+++ b/plugins/alexainterface/QtMicrophoneWrapper.h
@@ -40,6 +40,48 @@
 
 using namespace alexaClientSDK;
 
+/**
+ * @brief The AudioLevelInfo class
+ *
+ * provides processBuffer() function to get avg audio level from QByteArray data
+ * should be initialized with QAudioFormat in constructor or by init() function
+ *
+ */
+class AudioLevelInfo
+{
+public:
+    explicit AudioLevelInfo(const QAudioFormat &format);
+    AudioLevelInfo() {}
+    bool init(const QAudioFormat &format);
+
+    qreal processBuffer(const QByteArray &ba) const;
+private:
+    bool m_valid = false;
+
+    /// format-specific for processing speed-up
+    quint32 m_maxAmplitude = 0;
+    int m_sampleBytes = 0;
+    int m_channelBytes = 0;
+    int m_channelCount = 0;
+
+    /// pointer to static member function chosen for format
+    quint32 (*m_getAudioLevelValue)(const char *) = &AudioLevelInfo::processDefault;
+
+    /// buffer process functions
+    static quint32 processUnSignedInt8(const char *ptr);
+    static quint32 processSignedInt8(const char *ptr);
+    static quint32 processUnSignedInt16LE(const char *ptr);
+    static quint32 processUnSignedInt16BE(const char *ptr);
+    static quint32 processSignedInt16LE(const char *ptr);
+    static quint32 processSignedInt16BE(const char *ptr);
+    static quint32 processUnSignedInt32LE(const char *ptr);
+    static quint32 processUnSignedInt32BE(const char *ptr);
+    static quint32 processSignedInt32LE(const char *ptr);
+    static quint32 processSignedInt32BE(const char *ptr);
+    static quint32 processFloat(const char *ptr);
+    static quint32 processDefault(const char *ptr);
+};
+
 class QtMicrophoneWrapper
       : public QObject
       , public applicationUtilities::resources::audio::MicrophoneInterface {
@@ -70,6 +112,12 @@ public:
 
     virtual ~QtMicrophoneWrapper() override;
 
+    qreal audioLevel() const { return m_audioLevel; }
+    void setLevelProcess(bool enable) { m_levelProcess = enable; }
+
+Q_SIGNALS:
+    void audioLevelChanged();
+
 private:
     /**
      * Constructor.
@@ -77,11 +125,14 @@ private:
      */
     QtMicrophoneWrapper(std::shared_ptr<avsCommon::avs::AudioInputStream> stream);
 
-    QAudioDeviceInfo m_audioInfo;
     QAudioInput *m_audioInput = nullptr;
     QIODevice *m_audioInputIODevice = nullptr;
     int m_readAudioDataBytes = 0;
     QByteArray m_readAudioData;
+    qreal m_audioLevel = 0.0; // 0.0 <= m_audioLevel <= 1.0
+    bool m_levelProcess = false;
+    AudioLevelInfo m_audioLevelInfo;
+
 
     /// Initializes Audio
     bool initialize(const QString &deviceName);
author	Egor Nemtsev <enemtsev@luxoft.com>	2019-08-20 14:23:52 +0300
committer	Egor Nemtsev <enemtsev@luxoft.com>	2019-08-22 12:01:17 +0000
commit	b41742c3f66c43c625d4ad26e3f2875f7960b12c (patch)
tree	fb67d2c4e54d1edbb80b68e3fdd16b828afbc004
parent	8d26def6d46f2836d27c976537425b7978dc45bb (diff)