summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/resonance-audio/resonance_audio/geometrical_acoustics/estimating_rt60.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/resonance-audio/resonance_audio/geometrical_acoustics/estimating_rt60.cc')
-rw-r--r--src/3rdparty/resonance-audio/resonance_audio/geometrical_acoustics/estimating_rt60.cc155
1 files changed, 155 insertions, 0 deletions
diff --git a/src/3rdparty/resonance-audio/resonance_audio/geometrical_acoustics/estimating_rt60.cc b/src/3rdparty/resonance-audio/resonance_audio/geometrical_acoustics/estimating_rt60.cc
new file mode 100644
index 000000000..547f5c1ab
--- /dev/null
+++ b/src/3rdparty/resonance-audio/resonance_audio/geometrical_acoustics/estimating_rt60.cc
@@ -0,0 +1,155 @@
+/*
+Copyright 2018 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS-IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <cmath>
+
+#include "base/logging.h"
+#include "base/misc_math.h"
+#include "geometrical_acoustics/estimating_rt60.h"
+
+namespace vraudio {
+
+// The RT60 is estimated by fitting a line to the log of energy. We collect
+// energy from individual impulse responses into bins and use each bin's
+// midpoint and height (average energy inside the bin) as a data point (x, y)
+// for the line fitting algorithm. For example, for a bin that collects 100.0
+// units of energy from 0.0s to 2.0s has a midpoint of 1.0s and height of
+// log10(100.0 / 2.0).
+//
+// In general we have some liberty in how we divide the whole reverb tail as
+// bins. For example, for a reverb tail of 3.5 seconds long:
+// ----------------------------------------------------------------------->
+//
+// we may divide it into 7 bins of widths 0.5 seconds:
+// |---------|---------|---------|---------|---------|---------|---------|>
+// 0.0s 0.5s 1.0s 1.5s 2.0s 2.5s 3.0s 3.5s
+//
+// or we may divide it into 3 bins of widths {0.5s, 1.0s, 2.0s}:
+// |---------|-------------------|---------------------------------------|>
+// 0.0s 0.5s 1.5s 3.5s
+//
+// In this implementation, we adopted a scheme that lets the accumulated energy
+// determine the width. The first bin would contain 0.5 of the total energy of
+// the whole reverb tail, the second bin would contain 0.5 of the remaining
+// energy after the first bin, and so on. The number 0.5 can be replaced by any
+// value in [0.0, 1.0), which is defined as |kBinEnergyRatio| in our
+// implementation.
+//
+// The advantage of this scheme is that there will not be any empty bin (of
+// height zero), which might occur because the impulse responses can be sparse
+// at certain regions. An empty bin is a particularly bad data point and
+// will greatly reduce the efficacy of the linear fitting. (For a more
+// detailed discussion of the sparsity of impulse responses, see [internal ref]
+float EstimateRT60(const std::vector<float>& energy_impulse_responses,
+ const float sampling_rate) {
+ // First initialize the remaining energy to be the total energy.
+ float remaining_energy = 0.0f;
+ for (const float energy : energy_impulse_responses) {
+ remaining_energy += energy;
+ }
+
+ // The collected data points: the bin's midpoints and heights.
+ std::vector<float> bin_midpoints;
+ std::vector<float> bin_heights;
+
+ // The target energy (as a ratio of the remaining energy) that a bin should
+ // collect before closing.
+ const float kBinEnergyRatio = 0.5f;
+ float target_accumulated_energy = remaining_energy * kBinEnergyRatio;
+
+ // Stop when the remaining energy is below this threshold.
+ const float kRemainingThreshold = remaining_energy * 1e-3f;
+
+ // The iteration goes as follows:
+ // 1. A bin is opened.
+ // 2. Energy is accumulated from impulse responses.
+ // 3. When the accumulated energy exceeds the target, the bin is closed and
+ // its midpoint and height recorded.
+ // 4. Repeat 1-3 until the remaining energy is below a threshold.
+ float accumulated_energy = 0.0f;
+ float bin_begin = -1.0f;
+ for (size_t i = 0; i < energy_impulse_responses.size(); ++i) {
+ if (energy_impulse_responses[i] <= 0.0f) {
+ continue;
+ }
+
+ // The first non-zero response is found; set the |bin_begin|.
+ if (bin_begin < 0.0f) {
+ bin_begin = static_cast<float>(i) / sampling_rate;
+ }
+
+ accumulated_energy += energy_impulse_responses[i];
+
+ // Close the bin if the accumulated energy meets the target.
+ if (accumulated_energy > target_accumulated_energy) {
+ // Compute the bin's midpoint, in the unit of second.
+ const float bin_end = static_cast<float>(i + 1) / sampling_rate;
+ const float bin_midpoint = 0.5f * (bin_begin + bin_end);
+
+ // Compute the bin's height as the average energy inside the bin, in the
+ // unit of dB.
+ const float bin_width = bin_end - bin_begin;
+ const float bin_height =
+ 10.0f * std::log10(accumulated_energy / bin_width);
+
+ // Collect the data point.
+ bin_midpoints.push_back(bin_midpoint);
+ bin_heights.push_back(bin_height);
+
+ // Terminate the data point collection when the remaining energy is below
+ // threshold.
+ remaining_energy -= accumulated_energy;
+ if (remaining_energy < kRemainingThreshold) {
+ break;
+ }
+
+ // Start a new bin and update the remaining energy.
+ target_accumulated_energy = remaining_energy * kBinEnergyRatio;
+ bin_begin = bin_end;
+ accumulated_energy = 0.0f;
+ }
+ }
+
+ // Require at least some data points to perform linear fitting.
+ const size_t kMinNumDataPointsForFitting = 3;
+ if (bin_midpoints.size() < kMinNumDataPointsForFitting) {
+ LOG(WARNING) << "Insufficient number of data points for fitting";
+ return 0.0f;
+ }
+
+ // Perform linear fitting.
+ float slope = 0.0f;
+ float intercept = 0.0f;
+ float r_square = 0.0f;
+ if (!LinearLeastSquareFitting(bin_midpoints, bin_heights, &slope, &intercept,
+ &r_square)) {
+ LOG(WARNING) << "Linear least square fitting failed";
+ return 0.0f;
+ }
+ LOG(INFO) << "Fitted slope= " << slope << "; intercept= " << intercept
+ << "; R^2= " << r_square;
+
+ // RT60 is defined as how long it takes for the energy to decay 60 dB.
+ // Note that |slope| should be negative.
+ if (slope < 0.0f) {
+ return -60.0f / slope;
+ } else {
+ LOG(WARNING) << "Invalid RT60 from non-negative slope";
+ return 0.0f;
+ }
+}
+
+} // namespace vraudio