summaryrefslogtreecommitdiffstats
path: root/chromium/third_party/webrtc/modules/audio_processing/include/audio_processing.h
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/third_party/webrtc/modules/audio_processing/include/audio_processing.h')
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/include/audio_processing.h164
1 files changed, 121 insertions, 43 deletions
diff --git a/chromium/third_party/webrtc/modules/audio_processing/include/audio_processing.h b/chromium/third_party/webrtc/modules/audio_processing/include/audio_processing.h
index b5c856de273..77c3f3add22 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/chromium/third_party/webrtc/modules/audio_processing/include/audio_processing.h
@@ -15,7 +15,6 @@
#include <stdio.h> // FILE
#include "webrtc/common.h"
-#include "webrtc/modules/interface/module.h"
#include "webrtc/typedefs.h"
struct AecCore;
@@ -50,11 +49,32 @@ class VoiceDetection;
// except when really necessary.
struct DelayCorrection {
DelayCorrection() : enabled(false) {}
- DelayCorrection(bool enabled) : enabled(enabled) {}
+ explicit DelayCorrection(bool enabled) : enabled(enabled) {}
+ bool enabled;
+};
+
+// Use to disable the reported system delays. By disabling the reported system
+// delays the echo cancellation algorithm assumes the process and reverse
+// streams to be aligned. This configuration only applies to EchoCancellation
+// and not EchoControlMobile and is set with AudioProcessing::SetExtraOptions().
+// Note that by disabling reported system delays the EchoCancellation may
+// regress in performance.
+struct ReportedDelay {
+ ReportedDelay() : enabled(true) {}
+ explicit ReportedDelay(bool enabled) : enabled(enabled) {}
+ bool enabled;
+};
+// Must be provided through AudioProcessing::Create(Confg&). It will have no
+// impact if used with AudioProcessing::SetExtraOptions().
+struct ExperimentalAgc {
+ ExperimentalAgc() : enabled(true) {}
+ explicit ExperimentalAgc(bool enabled) : enabled(enabled) {}
bool enabled;
};
+static const int kAudioProcMaxNativeSampleRateHz = 32000;
+
// The Audio Processing Module (APM) provides a collection of voice processing
// components designed for real-time communications software.
//
@@ -84,16 +104,12 @@ struct DelayCorrection {
// 2. Parameter getters are never called concurrently with the corresponding
// setter.
//
-// APM accepts only 16-bit linear PCM audio data in frames of 10 ms. Multiple
-// channels should be interleaved.
+// APM accepts only linear PCM audio data in chunks of 10 ms. The int16
+// interfaces use interleaved data, while the float interfaces use deinterleaved
+// data.
//
// Usage example, omitting error checking:
// AudioProcessing* apm = AudioProcessing::Create(0);
-// apm->set_sample_rate_hz(32000); // Super-wideband processing.
-//
-// // Mono capture and stereo render.
-// apm->set_num_channels(1, 1);
-// apm->set_num_reverse_channels(2);
//
// apm->high_pass_filter()->Enable(true);
//
@@ -132,13 +148,27 @@ struct DelayCorrection {
// // Close the application...
// delete apm;
//
-class AudioProcessing : public Module {
+class AudioProcessing {
public:
- // Creates a APM instance, with identifier |id|. Use one instance for every
- // primary audio stream requiring processing. On the client-side, this would
- // typically be one instance for the near-end stream, and additional instances
- // for each far-end stream which requires processing. On the server-side,
- // this would typically be one instance for every incoming stream.
+ enum ChannelLayout {
+ kMono,
+ // Left, right.
+ kStereo,
+ // Mono, keyboard mic.
+ kMonoAndKeyboard,
+ // Left, right, keyboard mic.
+ kStereoAndKeyboard
+ };
+
+ // Creates an APM instance. Use one instance for every primary audio stream
+ // requiring processing. On the client-side, this would typically be one
+ // instance for the near-end stream, and additional instances for each far-end
+ // stream which requires processing. On the server-side, this would typically
+ // be one instance for every incoming stream.
+ static AudioProcessing* Create();
+ // Allows passing in an optional configuration at create-time.
+ static AudioProcessing* Create(const Config& config);
+ // TODO(ajm): Deprecated; remove all calls to it.
static AudioProcessing* Create(int id);
virtual ~AudioProcessing() {}
@@ -147,11 +177,26 @@ class AudioProcessing : public Module {
// it is not necessary to call before processing the first stream after
// creation.
//
- // set_sample_rate_hz(), set_num_channels() and set_num_reverse_channels()
- // will trigger a full initialization if the settings are changed from their
- // existing values. Otherwise they are no-ops.
+ // It is also not necessary to call if the audio parameters (sample
+ // rate and number of channels) have changed. Passing updated parameters
+ // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible.
+ // If the parameters are known at init-time though, they may be provided.
virtual int Initialize() = 0;
+ // The int16 interfaces require:
+ // - only |NativeRate|s be used
+ // - that the input, output and reverse rates must match
+ // - that |output_layout| matches |input_layout|
+ //
+ // The float interfaces accept arbitrary rates and support differing input
+ // and output layouts, but the output may only remove channels, not add.
+ virtual int Initialize(int input_sample_rate_hz,
+ int output_sample_rate_hz,
+ int reverse_sample_rate_hz,
+ ChannelLayout input_layout,
+ ChannelLayout output_layout,
+ ChannelLayout reverse_layout) = 0;
+
// Pass down additional options which don't have explicit setters. This
// ensures the options are applied immediately.
virtual void SetExtraOptions(const Config& config) = 0;
@@ -159,23 +204,30 @@ class AudioProcessing : public Module {
virtual int EnableExperimentalNs(bool enable) = 0;
virtual bool experimental_ns_enabled() const = 0;
- // Sets the sample |rate| in Hz for both the primary and reverse audio
- // streams. 8000, 16000 or 32000 Hz are permitted.
+ // DEPRECATED.
+ // TODO(ajm): Remove after Chromium has upgraded to using Initialize().
virtual int set_sample_rate_hz(int rate) = 0;
+ // TODO(ajm): Remove after voice engine no longer requires it to resample
+ // the reverse stream to the forward rate.
+ virtual int input_sample_rate_hz() const = 0;
+ // TODO(ajm): Remove after Chromium no longer depends on it.
virtual int sample_rate_hz() const = 0;
- // Sets the number of channels for the primary audio stream. Input frames must
- // contain a number of channels given by |input_channels|, while output frames
- // will be returned with number of channels given by |output_channels|.
- virtual int set_num_channels(int input_channels, int output_channels) = 0;
+ // TODO(ajm): Only intended for internal use. Make private and friend the
+ // necessary classes?
+ virtual int proc_sample_rate_hz() const = 0;
+ virtual int proc_split_sample_rate_hz() const = 0;
virtual int num_input_channels() const = 0;
virtual int num_output_channels() const = 0;
-
- // Sets the number of channels for the reverse audio stream. Input frames must
- // contain a number of channels given by |channels|.
- virtual int set_num_reverse_channels(int channels) = 0;
virtual int num_reverse_channels() const = 0;
+ // Set to true when the output of AudioProcessing will be muted or in some
+ // other way not used. Ideally, the captured audio would still be processed,
+ // but some components may change behavior based on this information.
+ // Default false.
+ virtual void set_output_will_be_muted(bool muted) = 0;
+ virtual bool output_will_be_muted() const = 0;
+
// Processes a 10 ms |frame| of the primary audio stream. On the client-side,
// this is the near-end (or captured) audio.
//
@@ -184,10 +236,25 @@ class AudioProcessing : public Module {
// with the stream_ tag which is needed should be called after processing.
//
// The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
- // members of |frame| must be valid, and correspond to settings supplied
- // to APM.
+ // members of |frame| must be valid. If changed from the previous call to this
+ // method, it will trigger an initialization.
virtual int ProcessStream(AudioFrame* frame) = 0;
+ // Accepts deinterleaved float audio with the range [-1, 1]. Each element
+ // of |src| points to a channel buffer, arranged according to
+ // |input_layout|. At output, the channels will be arranged according to
+ // |output_layout| at |output_sample_rate_hz| in |dest|.
+ //
+ // The output layout may only remove channels, not add. |src| and |dest|
+ // may use the same memory, if desired.
+ virtual int ProcessStream(const float* const* src,
+ int samples_per_channel,
+ int input_sample_rate_hz,
+ ChannelLayout input_layout,
+ int output_sample_rate_hz,
+ ChannelLayout output_layout,
+ float* const* dest) = 0;
+
// Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame
// will not be modified. On the client-side, this is the far-end (or to be
// rendered) audio.
@@ -199,11 +266,19 @@ class AudioProcessing : public Module {
// chances are you don't need to use it.
//
// The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
- // members of |frame| must be valid.
+ // members of |frame| must be valid. |sample_rate_hz_| must correspond to
+ // |input_sample_rate_hz()|
//
// TODO(ajm): add const to input; requires an implementation fix.
virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;
+ // Accepts deinterleaved float audio with the range [-1, 1]. Each element
+ // of |data| points to a channel buffer, arranged according to |layout|.
+ virtual int AnalyzeReverseStream(const float* const* data,
+ int samples_per_channel,
+ int sample_rate_hz,
+ ChannelLayout layout) = 0;
+
// This must be called if and only if echo processing is enabled.
//
// Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end
@@ -219,6 +294,12 @@ class AudioProcessing : public Module {
// ProcessStream().
virtual int set_stream_delay_ms(int delay) = 0;
virtual int stream_delay_ms() const = 0;
+ virtual bool was_stream_delay_set() const = 0;
+
+ // Call to signal that a key press occurred (true) or did not occur (false)
+ // with this chunk of audio.
+ virtual void set_stream_key_pressed(bool key_pressed) = 0;
+ virtual bool stream_key_pressed() const = 0;
// Sets a delay |offset| in ms to add to the values passed in through
// set_stream_delay_ms(). May be positive or negative.
@@ -283,9 +364,13 @@ class AudioProcessing : public Module {
kBadStreamParameterWarning = -13
};
- // Inherited from Module.
- virtual int32_t TimeUntilNextProcess() OVERRIDE;
- virtual int32_t Process() OVERRIDE;
+ enum NativeRate {
+ kSampleRate8kHz = 8000,
+ kSampleRate16kHz = 16000,
+ kSampleRate32kHz = 32000
+ };
+
+ static const int kChunkSizeMs = 10;
};
// The acoustic echo cancellation (AEC) component provides better performance
@@ -306,16 +391,10 @@ class EchoCancellation {
// render and capture devices are used, particularly with webcams.
//
// This enables a compensation mechanism, and requires that
- // |set_device_sample_rate_hz()| and |set_stream_drift_samples()| be called.
+ // set_stream_drift_samples() be called.
virtual int enable_drift_compensation(bool enable) = 0;
virtual bool is_drift_compensation_enabled() const = 0;
- // Provides the sampling rate of the audio devices. It is assumed the render
- // and capture devices use the same nominal sample rate. Required if and only
- // if drift compensation is enabled.
- virtual int set_device_sample_rate_hz(int rate) = 0;
- virtual int device_sample_rate_hz() const = 0;
-
// Sets the difference between the number of samples rendered and captured by
// the audio devices since the last call to |ProcessStream()|. Must be called
// if drift compensation is enabled, prior to |ProcessStream()|.
@@ -555,8 +634,7 @@ class LevelEstimator {
// frames since the last call to RMS(). The returned value is positive but
// should be interpreted as negative. It is constrained to [0, 127].
//
- // The computation follows:
- // http://tools.ietf.org/html/draft-ietf-avtext-client-to-mixer-audio-level-05
+ // The computation follows: https://tools.ietf.org/html/rfc6465
// with the intent that it can provide the RTP audio level indication.
//
// Frames passed to ProcessStream() with an |_energy| of zero are considered