/* * Copyright (C) 2011, Google Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ #include "third_party/blink/renderer/modules/webaudio/wave_shaper_dsp_kernel.h" #include #include #include "build/build_config.h" #include "third_party/blink/renderer/platform/audio/audio_utilities.h" #include "third_party/blink/renderer/platform/audio/vector_math.h" #include "third_party/blink/renderer/platform/wtf/threading.h" namespace blink { WaveShaperDSPKernel::WaveShaperDSPKernel(WaveShaperProcessor* processor) : AudioDSPKernel(processor), tail_time_(0), // 4 times render size to handle 4x oversampling. virtual_index_(4 * audio_utilities::kRenderQuantumFrames), index_(4 * audio_utilities::kRenderQuantumFrames), v1_(4 * audio_utilities::kRenderQuantumFrames), v2_(4 * audio_utilities::kRenderQuantumFrames), f_(4 * audio_utilities::kRenderQuantumFrames) { if (processor->Oversample() != WaveShaperProcessor::kOverSampleNone) LazyInitializeOversampling(); } void WaveShaperDSPKernel::LazyInitializeOversampling() { if (!temp_buffer_) { temp_buffer_ = std::make_unique( audio_utilities::kRenderQuantumFrames * 2); temp_buffer2_ = std::make_unique( audio_utilities::kRenderQuantumFrames * 4); up_sampler_ = std::make_unique(audio_utilities::kRenderQuantumFrames); down_sampler_ = std::make_unique( audio_utilities::kRenderQuantumFrames * 2); up_sampler2_ = std::make_unique(audio_utilities::kRenderQuantumFrames * 2); down_sampler2_ = std::make_unique( audio_utilities::kRenderQuantumFrames * 4); } } void WaveShaperDSPKernel::Process(const float* source, float* destination, uint32_t frames_to_process) { switch (GetWaveShaperProcessor()->Oversample()) { case WaveShaperProcessor::kOverSampleNone: ProcessCurve(source, destination, frames_to_process); break; case WaveShaperProcessor::kOverSample2x: ProcessCurve2x(source, destination, frames_to_process); break; case WaveShaperProcessor::kOverSample4x: ProcessCurve4x(source, destination, frames_to_process); break; default: NOTREACHED(); } } double WaveShaperDSPKernel::WaveShaperCurveValue(float input, const float* curve_data, int curve_length) const { // Calculate a virtual index based on input -1 -> +1 with -1 being curve[0], // +1 being curve[curveLength - 1], and 0 being at the center of the curve // data. Then linearly interpolate between the two points in the curve. double virtual_index = 0.5 * (input + 1) * (curve_length - 1); double output; if (virtual_index < 0) { // input < -1, so use curve[0] output = curve_data[0]; } else if (virtual_index >= curve_length - 1) { // input >= 1, so use last curve value output = curve_data[curve_length - 1]; } else { // The general case where -1 <= input < 1, where 0 <= virtualIndex < // curveLength - 1, so interpolate between the nearest samples on the // curve. unsigned index1 = static_cast(virtual_index); unsigned index2 = index1 + 1; double interpolation_factor = virtual_index - index1; double value1 = curve_data[index1]; double value2 = curve_data[index2]; output = (1.0 - interpolation_factor) * value1 + interpolation_factor * value2; } return output; } void WaveShaperDSPKernel::WaveShaperCurveValues(float* destination, const float* source, uint32_t frames_to_process, const float* curve_data, int curve_length) const { DCHECK_LE(frames_to_process, virtual_index_.size()); // Index into the array computed from the source value. float* virtual_index = virtual_index_.Data(); // virtual_index[k] = // clampTo(0.5 * (source[k] + 1) * (curve_length - 1), // 0.0f, // static_cast(curve_length - 1)) // Add 1 to source puttting result in virtual_index vector_math::Vsadd(source, 1, 1, virtual_index, 1, frames_to_process); // Scale virtual_index in place by (curve_lenth -1)/2 vector_math::Vsmul(virtual_index, 1, 0.5 * (curve_length - 1), virtual_index, 1, frames_to_process); // Clip virtual_index, in place. vector_math::Vclip(virtual_index, 1, 0, curve_length - 1, virtual_index, 1, frames_to_process); // index = floor(virtual_index) DCHECK_LE(frames_to_process, index_.size()); float* index = index_.Data(); // v1 and v2 hold the curve_data corresponding to the closest curve // values to the source sample. To save memory, v1 will use the // destination array. DCHECK_LE(frames_to_process, v1_.size()); DCHECK_LE(frames_to_process, v2_.size()); float* v1 = v1_.Data(); float* v2 = v2_.Data(); // Interpolation factor: virtual_index - index. DCHECK_LE(frames_to_process, f_.size()); float* f = f_.Data(); int max_index = curve_length - 1; unsigned k = 0; #if defined(ARCH_CPU_X86_FAMILY) { int loop_limit = frames_to_process / 4; // one = 1 __m128i one = _mm_set1_epi32(1); // Do 4 eleemnts at a time for (int loop = 0; loop < loop_limit; ++loop, k += 4) { // v = virtual_index[k] __m128 v = _mm_loadu_ps(virtual_index + k); // index1 = static_cast(v); __m128i index1 = _mm_cvttps_epi32(v); // v = static_cast(index1) and save result to index[k:k+3] v = _mm_cvtepi32_ps(index1); _mm_storeu_ps(&index[k], v); // index2 = index2 + 1; __m128i index2 = _mm_add_epi32(index1, one); // Convert index1/index2 to arrays of 32-bit int values that are our // array indices to use to get the curve data. int32_t* i1 = reinterpret_cast(&index1); int32_t* i2 = reinterpret_cast(&index2); // Get the curve_data values and save them in v1 and v2, // carfully clamping the values. If the input is NaN, index1 // could be 0x8000000. v1[k] = curve_data[clampTo(i1[0], 0, max_index)]; v2[k] = curve_data[clampTo(i2[0], 0, max_index)]; v1[k + 1] = curve_data[clampTo(i1[1], 0, max_index)]; v2[k + 1] = curve_data[clampTo(i2[1], 0, max_index)]; v1[k + 2] = curve_data[clampTo(i1[2], 0, max_index)]; v2[k + 2] = curve_data[clampTo(i2[2], 0, max_index)]; v1[k + 3] = curve_data[clampTo(i1[3], 0, max_index)]; v2[k + 3] = curve_data[clampTo(i2[3], 0, max_index)]; } } #elif defined(CPU_ARM_NEON) { int loop_limit = frames_to_process / 4; // Neon constants: // zero = 0 // one = 1 // max = max_index int32x4_t zero = vdupq_n_s32(0); int32x4_t one = vdupq_n_s32(1); int32x4_t max = vdupq_n_s32(max_index); for (int loop = 0; loop < loop_limit; ++loop, k += 4) { // v = virtual_index float32x4_t v = vld1q_f32(virtual_index + k); // index1 = static_cast(v), then clamp to a valid index range for // curve_data int32x4_t index1 = vcvtq_s32_f32(v); index1 = vmaxq_s32(vminq_s32(index1, max), zero); // v = static_cast(v) and save it away for later use. v = vcvtq_f32_s32(index1); vst1q_f32(&index[k], v); // index2 = index1 + 1, then clamp to a valid range for curve_data. int32x4_t index2 = vaddq_s32(index1, one); index2 = vmaxq_s32(vminq_s32(index2, max), zero); // Save index1/2 so we can get the individual parts. Aligned to // 16 bytes for vst1q instruction. int32_t i1[4] __attribute__((aligned(16))); int32_t i2[4] __attribute__((aligned(16))); vst1q_s32(i1, index1); vst1q_s32(i2, index2); // Get curve elements corresponding to the indices. v1[k] = curve_data[i1[0]]; v2[k] = curve_data[i2[0]]; v1[k + 1] = curve_data[i1[1]]; v2[k + 1] = curve_data[i2[1]]; v1[k + 2] = curve_data[i1[2]]; v2[k + 2] = curve_data[i2[2]]; v1[k + 3] = curve_data[i1[3]]; v2[k + 3] = curve_data[i2[3]]; } } #endif // Compute values for index1 and load the curve_data corresponding to indices. for (; k < frames_to_process; ++k) { unsigned index1 = clampTo(static_cast(virtual_index[k]), 0, max_index); unsigned index2 = clampTo(index1 + 1, 0, max_index); index[k] = index1; v1[k] = curve_data[index1]; v2[k] = curve_data[index2]; } // f[k] = virtual_index[k] - index[k] vector_math::Vsub(virtual_index, 1, index, 1, f, 1, frames_to_process); // Do the linear interpolation of the curve data: // destination[k] = v1[k] + f[k]*(v2[k] - v1[k]) // // 1. v2[k] = v2[k] - v1[k] // 2. v2[k] = f[k]*v2[k] = f[k]*(v2[k] - v1[k]) // 3. destination[k] = destination[k] + v2[k] // = v1[k] + f[k]*(v2[k] - v1[k]) vector_math::Vsub(v2, 1, v1, 1, v2, 1, frames_to_process); vector_math::Vmul(f, 1, v2, 1, v2, 1, frames_to_process); vector_math::Vadd(v2, 1, v1, 1, destination, 1, frames_to_process); } void WaveShaperDSPKernel::ProcessCurve(const float* source, float* destination, uint32_t frames_to_process) { DCHECK(source); DCHECK(destination); DCHECK(GetWaveShaperProcessor()); Vector* curve = GetWaveShaperProcessor()->Curve(); if (!curve) { // Act as "straight wire" pass-through if no curve is set. memcpy(destination, source, sizeof(float) * frames_to_process); return; } float* curve_data = curve->data(); int curve_length = curve->size(); DCHECK(curve_data); if (!curve_data || !curve_length) { memcpy(destination, source, sizeof(float) * frames_to_process); return; } // Apply waveshaping curve. WaveShaperCurveValues(destination, source, frames_to_process, curve_data, curve_length); } void WaveShaperDSPKernel::ProcessCurve2x(const float* source, float* destination, uint32_t frames_to_process) { DCHECK_EQ(frames_to_process, audio_utilities::kRenderQuantumFrames); float* temp_p = temp_buffer_->Data(); up_sampler_->Process(source, temp_p, frames_to_process); // Process at 2x up-sampled rate. ProcessCurve(temp_p, temp_p, frames_to_process * 2); down_sampler_->Process(temp_p, destination, frames_to_process * 2); } void WaveShaperDSPKernel::ProcessCurve4x(const float* source, float* destination, uint32_t frames_to_process) { DCHECK_EQ(frames_to_process, audio_utilities::kRenderQuantumFrames); float* temp_p = temp_buffer_->Data(); float* temp_p2 = temp_buffer2_->Data(); up_sampler_->Process(source, temp_p, frames_to_process); up_sampler2_->Process(temp_p, temp_p2, frames_to_process * 2); // Process at 4x up-sampled rate. ProcessCurve(temp_p2, temp_p2, frames_to_process * 4); down_sampler2_->Process(temp_p2, temp_p, frames_to_process * 4); down_sampler_->Process(temp_p, destination, frames_to_process * 2); } void WaveShaperDSPKernel::Reset() { if (up_sampler_) { up_sampler_->Reset(); down_sampler_->Reset(); up_sampler2_->Reset(); down_sampler2_->Reset(); } } bool WaveShaperDSPKernel::RequiresTailProcessing() const { // Always return true even if the tail time and latency might both be zero. return true; } double WaveShaperDSPKernel::TailTime() const { return tail_time_; } double WaveShaperDSPKernel::LatencyTime() const { size_t latency_frames = 0; WaveShaperDSPKernel* kernel = const_cast(this); switch (kernel->GetWaveShaperProcessor()->Oversample()) { case WaveShaperProcessor::kOverSampleNone: break; case WaveShaperProcessor::kOverSample2x: latency_frames += up_sampler_->LatencyFrames(); latency_frames += down_sampler_->LatencyFrames(); break; case WaveShaperProcessor::kOverSample4x: { // Account for first stage upsampling. latency_frames += up_sampler_->LatencyFrames(); latency_frames += down_sampler_->LatencyFrames(); // Account for second stage upsampling. // and divide by 2 to get back down to the regular sample-rate. size_t latency_frames2 = (up_sampler2_->LatencyFrames() + down_sampler2_->LatencyFrames()) / 2; latency_frames += latency_frames2; break; } default: NOTREACHED(); } return static_cast(latency_frames) / SampleRate(); } } // namespace blink