webrtc_m130/modules/audio_processing/gain_controller2.cc

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

290 lines
11 KiB
C++
Raw Normal View History

/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/gain_controller2.h"
#include <memory>
#include <utility>
#include "api/audio/audio_frame.h"
#include "api/environment/environment.h"
#include "api/field_trials_view.h"
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/cpu_features.h"
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/include/audio_frame_view.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/strings/string_builder.h"
namespace webrtc {
namespace {
using Agc2Config = AudioProcessing::Config::GainController2;
using InputVolumeControllerConfig = InputVolumeController::Config;
constexpr int kLogLimiterStatsPeriodMs = 30'000;
constexpr int kFrameLengthMs = 10;
constexpr int kLogLimiterStatsPeriodNumFrames =
kLogLimiterStatsPeriodMs / kFrameLengthMs;
// Detects the available CPU features and applies any kill-switches.
AvailableCpuFeatures GetAllowedCpuFeatures(
const FieldTrialsView& field_trials) {
AvailableCpuFeatures features = GetAvailableCpuFeatures();
if (field_trials.IsEnabled("WebRTC-Agc2SimdSse2KillSwitch")) {
features.sse2 = false;
}
if (field_trials.IsEnabled("WebRTC-Agc2SimdAvx2KillSwitch")) {
features.avx2 = false;
}
if (field_trials.IsEnabled("WebRTC-Agc2SimdNeonKillSwitch")) {
features.neon = false;
}
return features;
}
// Peak and RMS audio levels in dBFS.
struct AudioLevels {
float peak_dbfs;
float rms_dbfs;
};
// Speech level info.
struct SpeechLevel {
bool is_confident;
float rms_dbfs;
};
// Computes the audio levels for the first channel in `frame`.
AudioLevels ComputeAudioLevels(DeinterleavedView<float> frame,
ApmDataDumper& data_dumper) {
float peak = 0.0f;
float rms = 0.0f;
for (const auto& x : frame[0]) {
peak = std::max(std::fabs(x), peak);
rms += x * x;
}
AudioLevels levels{
FloatS16ToDbfs(peak),
FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))};
data_dumper.DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs);
data_dumper.DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs);
return levels;
}
} // namespace
std::atomic<int> GainController2::instance_count_(0);
GainController2::GainController2(
const Environment& env,
const Agc2Config& config,
const InputVolumeControllerConfig& input_volume_controller_config,
int sample_rate_hz,
int num_channels,
bool use_internal_vad)
: cpu_features_(GetAllowedCpuFeatures(env.field_trials())),
data_dumper_(instance_count_.fetch_add(1) + 1),
fixed_gain_applier_(
/*hard_clip_samples=*/false,
/*initial_gain_factor=*/DbToRatio(config.fixed_digital.gain_db)),
limiter_(&data_dumper_,
SampleRateToDefaultChannelSize(sample_rate_hz),
/*histogram_name_prefix=*/"Agc2"),
APM: add AudioProcessingImpl::capture_::applied_input_volume(_changed) The `recommended_stream_analog_level()` getter is used to retrieve both the applied and the recommended input volume. This behavior is error-prone since the caller must know what is returned based on the point in the code (namely, before/after the AGC has changed the last applied input volume into a recommended level). This CL is a first step to make clarity on which input volume is handled in different parts of APM. Next in the pipeline: make `recommended_stream_analog_level()` a trivial getter that always returns the recommended level. Main changes: - When `recommended_stream_analog_level()` is called but `set_stream_analog_level()` is not called, APM logs an error and returns a fall-back volume (which should not be applied since, when `set_stream_analog_level()` is not called, no external input volume is expected to be present - When APM is used without calling the `*_stream_analog_level()` methods (e.g., when the caller does not provide any input volume), the recorded AEC dumps won't store `Stream::applied_input_level` Other changes: - Removed `AudioProcessingImpl::capture_::prev_analog_mic_level` - Removed redundant code in `GainController2` around detecting input volume changes (already done by APM) - Adapted the `audioproc_f` and `unpack_aecdump` tools - Data dumps clean-up: the applied and the recommended input volumes are now recorded in an AGC implementation agnostic way Bug: webrtc:7494, b/241923537 Change-Id: I3cb4a731fd9f3dc19bf6ac679b7ed8c969ea283b Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/271544 Reviewed-by: Per Åhgren <peah@webrtc.org> Reviewed-by: Hanna Silen <silen@webrtc.org> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/main@{#38054}
2022-09-07 17:14:26 +02:00
calls_since_last_limiter_log_(0) {
RTC_DCHECK(Validate(config));
data_dumper_.InitiateNewSetOfRecordings();
if (config.input_volume_controller.enabled ||
config.adaptive_digital.enabled) {
// Create dependencies.
speech_level_estimator_ = std::make_unique<SpeechLevelEstimator>(
&data_dumper_, config.adaptive_digital, kAdjacentSpeechFramesThreshold);
if (use_internal_vad)
vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
kVadResetPeriodMs, cpu_features_, sample_rate_hz);
}
if (config.input_volume_controller.enabled) {
// Create controller.
input_volume_controller_ = std::make_unique<InputVolumeController>(
num_channels, input_volume_controller_config);
// TODO(bugs.webrtc.org/7494): Call `Initialize` in ctor and remove method.
input_volume_controller_->Initialize();
}
if (config.adaptive_digital.enabled) {
// Create dependencies.
noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_);
saturation_protector_ = CreateSaturationProtector(
kSaturationProtectorInitialHeadroomDb, kAdjacentSpeechFramesThreshold,
&data_dumper_);
// Create controller.
adaptive_digital_controller_ =
std::make_unique<AdaptiveDigitalGainController>(
&data_dumper_, config.adaptive_digital,
kAdjacentSpeechFramesThreshold);
}
}
GainController2::~GainController2() = default;
// TODO(webrtc:7494): Pass the flag also to the other components.
void GainController2::SetCaptureOutputUsed(bool capture_output_used) {
if (input_volume_controller_) {
input_volume_controller_->HandleCaptureOutputUsedChange(
capture_output_used);
}
}
void GainController2::SetFixedGainDb(float gain_db) {
const float gain_factor = DbToRatio(gain_db);
if (fixed_gain_applier_.GetGainFactor() != gain_factor) {
// Reset the limiter to quickly react on abrupt level changes caused by
// large changes of the fixed gain.
limiter_.Reset();
}
fixed_gain_applier_.SetGainFactor(gain_factor);
}
void GainController2::Analyze(int applied_input_volume,
const AudioBuffer& audio_buffer) {
recommended_input_volume_ = std::nullopt;
RTC_DCHECK_GE(applied_input_volume, 0);
RTC_DCHECK_LE(applied_input_volume, 255);
if (input_volume_controller_) {
input_volume_controller_->AnalyzeInputAudio(applied_input_volume,
audio_buffer);
}
}
void GainController2::Process(std::optional<float> speech_probability,
APM: add AudioProcessingImpl::capture_::applied_input_volume(_changed) The `recommended_stream_analog_level()` getter is used to retrieve both the applied and the recommended input volume. This behavior is error-prone since the caller must know what is returned based on the point in the code (namely, before/after the AGC has changed the last applied input volume into a recommended level). This CL is a first step to make clarity on which input volume is handled in different parts of APM. Next in the pipeline: make `recommended_stream_analog_level()` a trivial getter that always returns the recommended level. Main changes: - When `recommended_stream_analog_level()` is called but `set_stream_analog_level()` is not called, APM logs an error and returns a fall-back volume (which should not be applied since, when `set_stream_analog_level()` is not called, no external input volume is expected to be present - When APM is used without calling the `*_stream_analog_level()` methods (e.g., when the caller does not provide any input volume), the recorded AEC dumps won't store `Stream::applied_input_level` Other changes: - Removed `AudioProcessingImpl::capture_::prev_analog_mic_level` - Removed redundant code in `GainController2` around detecting input volume changes (already done by APM) - Adapted the `audioproc_f` and `unpack_aecdump` tools - Data dumps clean-up: the applied and the recommended input volumes are now recorded in an AGC implementation agnostic way Bug: webrtc:7494, b/241923537 Change-Id: I3cb4a731fd9f3dc19bf6ac679b7ed8c969ea283b Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/271544 Reviewed-by: Per Åhgren <peah@webrtc.org> Reviewed-by: Hanna Silen <silen@webrtc.org> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/main@{#38054}
2022-09-07 17:14:26 +02:00
bool input_volume_changed,
AudioBuffer* audio) {
recommended_input_volume_ = std::nullopt;
APM: add AudioProcessingImpl::capture_::applied_input_volume(_changed) The `recommended_stream_analog_level()` getter is used to retrieve both the applied and the recommended input volume. This behavior is error-prone since the caller must know what is returned based on the point in the code (namely, before/after the AGC has changed the last applied input volume into a recommended level). This CL is a first step to make clarity on which input volume is handled in different parts of APM. Next in the pipeline: make `recommended_stream_analog_level()` a trivial getter that always returns the recommended level. Main changes: - When `recommended_stream_analog_level()` is called but `set_stream_analog_level()` is not called, APM logs an error and returns a fall-back volume (which should not be applied since, when `set_stream_analog_level()` is not called, no external input volume is expected to be present - When APM is used without calling the `*_stream_analog_level()` methods (e.g., when the caller does not provide any input volume), the recorded AEC dumps won't store `Stream::applied_input_level` Other changes: - Removed `AudioProcessingImpl::capture_::prev_analog_mic_level` - Removed redundant code in `GainController2` around detecting input volume changes (already done by APM) - Adapted the `audioproc_f` and `unpack_aecdump` tools - Data dumps clean-up: the applied and the recommended input volumes are now recorded in an AGC implementation agnostic way Bug: webrtc:7494, b/241923537 Change-Id: I3cb4a731fd9f3dc19bf6ac679b7ed8c969ea283b Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/271544 Reviewed-by: Per Åhgren <peah@webrtc.org> Reviewed-by: Hanna Silen <silen@webrtc.org> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/main@{#38054}
2022-09-07 17:14:26 +02:00
data_dumper_.DumpRaw("agc2_applied_input_volume_changed",
input_volume_changed);
if (input_volume_changed) {
// Handle input volume changes.
if (speech_level_estimator_)
speech_level_estimator_->Reset();
if (saturation_protector_)
saturation_protector_->Reset();
APM: add AudioProcessingImpl::capture_::applied_input_volume(_changed) The `recommended_stream_analog_level()` getter is used to retrieve both the applied and the recommended input volume. This behavior is error-prone since the caller must know what is returned based on the point in the code (namely, before/after the AGC has changed the last applied input volume into a recommended level). This CL is a first step to make clarity on which input volume is handled in different parts of APM. Next in the pipeline: make `recommended_stream_analog_level()` a trivial getter that always returns the recommended level. Main changes: - When `recommended_stream_analog_level()` is called but `set_stream_analog_level()` is not called, APM logs an error and returns a fall-back volume (which should not be applied since, when `set_stream_analog_level()` is not called, no external input volume is expected to be present - When APM is used without calling the `*_stream_analog_level()` methods (e.g., when the caller does not provide any input volume), the recorded AEC dumps won't store `Stream::applied_input_level` Other changes: - Removed `AudioProcessingImpl::capture_::prev_analog_mic_level` - Removed redundant code in `GainController2` around detecting input volume changes (already done by APM) - Adapted the `audioproc_f` and `unpack_aecdump` tools - Data dumps clean-up: the applied and the recommended input volumes are now recorded in an AGC implementation agnostic way Bug: webrtc:7494, b/241923537 Change-Id: I3cb4a731fd9f3dc19bf6ac679b7ed8c969ea283b Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/271544 Reviewed-by: Per Åhgren <peah@webrtc.org> Reviewed-by: Hanna Silen <silen@webrtc.org> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/main@{#38054}
2022-09-07 17:14:26 +02:00
}
DeinterleavedView<float> float_frame = audio->view();
// Compute speech probability.
if (vad_) {
// When the VAD component runs, `speech_probability` should not be specified
// because APM should not run the same VAD twice (as an APM sub-module and
// internally in AGC2).
RTC_DCHECK(!speech_probability.has_value());
speech_probability = vad_->Analyze(float_frame);
}
if (speech_probability.has_value()) {
RTC_DCHECK_GE(*speech_probability, 0.0f);
RTC_DCHECK_LE(*speech_probability, 1.0f);
}
// The speech probability may not be defined at this step (e.g., when the
// fixed digital controller alone is enabled).
if (speech_probability.has_value())
data_dumper_.DumpRaw("agc2_speech_probability", *speech_probability);
// Compute audio, noise and speech levels.
AudioLevels audio_levels = ComputeAudioLevels(float_frame, data_dumper_);
std::optional<float> noise_rms_dbfs;
if (noise_level_estimator_) {
// TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
// computation in `noise_level_estimator_`.
noise_rms_dbfs = noise_level_estimator_->Analyze(float_frame);
}
std::optional<SpeechLevel> speech_level;
if (speech_level_estimator_) {
RTC_DCHECK(speech_probability.has_value());
speech_level_estimator_->Update(
audio_levels.rms_dbfs, audio_levels.peak_dbfs, *speech_probability);
speech_level =
SpeechLevel{.is_confident = speech_level_estimator_->is_confident(),
.rms_dbfs = speech_level_estimator_->level_dbfs()};
}
// Update the recommended input volume.
if (input_volume_controller_) {
RTC_DCHECK(speech_level.has_value());
RTC_DCHECK(speech_probability.has_value());
if (speech_probability.has_value()) {
recommended_input_volume_ =
input_volume_controller_->RecommendInputVolume(
*speech_probability,
speech_level->is_confident
? std::optional<float>(speech_level->rms_dbfs)
: std::nullopt);
}
}
if (adaptive_digital_controller_) {
RTC_DCHECK(saturation_protector_);
RTC_DCHECK(speech_probability.has_value());
RTC_DCHECK(speech_level.has_value());
saturation_protector_->Analyze(*speech_probability, audio_levels.peak_dbfs,
speech_level->rms_dbfs);
float headroom_db = saturation_protector_->HeadroomDb();
data_dumper_.DumpRaw("agc2_headroom_db", headroom_db);
float limiter_envelope_dbfs = FloatS16ToDbfs(limiter_.LastAudioLevel());
data_dumper_.DumpRaw("agc2_limiter_envelope_dbfs", limiter_envelope_dbfs);
RTC_DCHECK(noise_rms_dbfs.has_value());
adaptive_digital_controller_->Process(
/*info=*/{.speech_probability = *speech_probability,
.speech_level_dbfs = speech_level->rms_dbfs,
.speech_level_reliable = speech_level->is_confident,
.noise_rms_dbfs = *noise_rms_dbfs,
.headroom_db = headroom_db,
.limiter_envelope_dbfs = limiter_envelope_dbfs},
float_frame);
}
// TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
// computation in `limiter_`.
fixed_gain_applier_.ApplyGain(float_frame);
limiter_.Process(float_frame);
// Periodically log limiter stats.
if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) {
calls_since_last_limiter_log_ = 0;
InterpolatedGainCurve::Stats stats = limiter_.GetGainCurveStats();
RTC_LOG(LS_INFO) << "[AGC2] limiter stats"
<< " | identity: " << stats.look_ups_identity_region
<< " | knee: " << stats.look_ups_knee_region
<< " | limiter: " << stats.look_ups_limiter_region
<< " | saturation: " << stats.look_ups_saturation_region;
}
}
bool GainController2::Validate(
const AudioProcessing::Config::GainController2& config) {
const auto& fixed = config.fixed_digital;
const auto& adaptive = config.adaptive_digital;
return fixed.gain_db >= 0.0f && fixed.gain_db < 50.0f &&
adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f &&
adaptive.initial_gain_db >= 0.0f &&
adaptive.max_gain_change_db_per_second > 0.0f &&
adaptive.max_output_noise_level_dbfs <= 0.0f;
}
} // namespace webrtc