AEC3: Multichannel suppressor

This change adds multichannel support to the AEC3 suppressor. Processing of mono capture is bit-exact to the previous code. Bug: webrtc:10913 Change-Id: I89affe3e066021bc34e4b525edf44dd3bea68365 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/158882 Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org> Reviewed-by: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#29692}
2019-11-05 15:19:02 +01:00 · 2019-11-05 15:19:02 +01:00 · 5ea5749a86
commit 5ea5749a86
parent 3ee47de99b
7 changed files with 323 additions and 278 deletions
--- a/modules/audio_processing/aec3/BUILD.gn
+++ b/modules/audio_processing/aec3/BUILD.gn
@ -41,6 +41,8 @@ rtc_library("aec3") {
    "decimator.cc",
    "decimator.h",
    "delay_estimate.h",
+    "dominant_nearend_detector.cc",
+    "dominant_nearend_detector.h",
    "downsampled_render_buffer.cc",
    "downsampled_render_buffer.h",
    "echo_audibility.cc",
--- a/modules/audio_processing/aec3/dominant_nearend_detector.cc
+++ b/modules/audio_processing/aec3/dominant_nearend_detector.cc
@ -0,0 +1,76 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/dominant_nearend_detector.h"
+
+#include <algorithm>
+#include <numeric>
+
+namespace webrtc {
+DominantNearendDetector::DominantNearendDetector(
+    const EchoCanceller3Config::Suppressor::DominantNearendDetection config,
+    size_t num_capture_channels)
+    : enr_threshold_(config.enr_threshold),
+      enr_exit_threshold_(config.enr_exit_threshold),
+      snr_threshold_(config.snr_threshold),
+      hold_duration_(config.hold_duration),
+      trigger_threshold_(config.trigger_threshold),
+      use_during_initial_phase_(config.use_during_initial_phase),
+      num_capture_channels_(num_capture_channels),
+      trigger_counters_(num_capture_channels_),
+      hold_counters_(num_capture_channels_) {}
+
+void DominantNearendDetector::Update(
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        nearend_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        residual_echo_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        comfort_noise_spectrum,
+    bool initial_state) {
+  nearend_state_ = false;
+
+  auto low_frequency_energy = [](rtc::ArrayView<const float> spectrum) {
+    RTC_DCHECK_LE(16, spectrum.size());
+    return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f);
+  };
+
+  for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+    const float ne_sum = low_frequency_energy(nearend_spectrum[ch]);
+    const float echo_sum = low_frequency_energy(residual_echo_spectrum[ch]);
+    const float noise_sum = low_frequency_energy(comfort_noise_spectrum[ch]);
+
+    // Detect strong active nearend if the nearend is sufficiently stronger than
+    // the echo and the nearend noise.
+    if ((!initial_state || use_during_initial_phase_) &&
+        echo_sum < enr_threshold_ * ne_sum &&
+        ne_sum > snr_threshold_ * noise_sum) {
+      if (++trigger_counters_[ch] >= trigger_threshold_) {
+        // After a period of strong active nearend activity, flag nearend mode.
+        hold_counters_[ch] = hold_duration_;
+        trigger_counters_[ch] = trigger_threshold_;
+      }
+    } else {
+      // Forget previously detected strong active nearend activity.
+      trigger_counters_[ch] = std::max(0, trigger_counters_[ch] - 1);
+    }
+
+    // Exit nearend-state early at strong echo.
+    if (echo_sum > enr_exit_threshold_ * ne_sum &&
+        echo_sum > snr_threshold_ * noise_sum) {
+      hold_counters_[ch] = 0;
+    }
+
+    // Remain in any nearend mode for a certain duration.
+    hold_counters_[ch] = std::max(0, hold_counters_[ch] - 1);
+    nearend_state_ = nearend_state_ || hold_counters_[ch] > 0;
+  }
+}
+}  // namespace webrtc
--- a/modules/audio_processing/aec3/dominant_nearend_detector.h
+++ b/modules/audio_processing/aec3/dominant_nearend_detector.h
@ -0,0 +1,56 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
+
+#include <vector>
+
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+
+namespace webrtc {
+// Class for selecting whether the suppressor is in the nearend or echo state.
+class DominantNearendDetector {
+ public:
+  DominantNearendDetector(
+      const EchoCanceller3Config::Suppressor::DominantNearendDetection config,
+      size_t num_capture_channels);
+
+  // Returns whether the current state is the nearend state.
+  bool IsNearendState() const { return nearend_state_; }
+
+  // Updates the state selection based on latest spectral estimates.
+  void Update(rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+                  nearend_spectrum,
+              rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+                  residual_echo_spectrum,
+              rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+                  comfort_noise_spectrum,
+              bool initial_state);
+
+ private:
+  const float enr_threshold_;
+  const float enr_exit_threshold_;
+  const float snr_threshold_;
+  const int hold_duration_;
+  const int trigger_threshold_;
+  const bool use_during_initial_phase_;
+  const size_t num_capture_channels_;
+
+  bool nearend_state_ = false;
+  std::vector<int> trigger_counters_;
+  std::vector<int> hold_counters_;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@ -148,7 +148,7 @@ class EchoRemoverImpl final : public EchoRemover {
  const size_t num_capture_channels_;
  const bool use_shadow_filter_output_;
  Subtractor subtractor_;
-  std::vector<std::unique_ptr<SuppressionGain>> suppression_gains_;
+  SuppressionGain suppression_gain_;
  ComfortNoiseGenerator cng_;
  SuppressionFilter suppression_filter_;
  RenderSignalAnalyzer render_signal_analyzer_;
@ -195,7 +195,10 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
                  num_capture_channels_,
                  data_dumper_.get(),
                  optimization_),
-      suppression_gains_(num_capture_channels_),
+      suppression_gain_(config_,
+                        optimization_,
+                        sample_rate_hz,
+                        num_capture_channels),
      cng_(optimization_, num_capture_channels_),
      suppression_filter_(optimization_,
                          sample_rate_hz_,
@ -203,9 +206,9 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
      render_signal_analyzer_(config_),
      residual_echo_estimator_(config_, num_render_channels),
      aec_state_(config_, num_capture_channels_),
-      e_old_(num_capture_channels_),
-      y_old_(num_capture_channels_),
-      e_heap_(NumChannelsOnHeap(num_capture_channels_)),
+      e_old_(num_capture_channels_, {0.f}),
+      y_old_(num_capture_channels_, {0.f}),
+      e_heap_(NumChannelsOnHeap(num_capture_channels_), {0.f}),
      Y2_heap_(NumChannelsOnHeap(num_capture_channels_)),
      E2_heap_(NumChannelsOnHeap(num_capture_channels_)),
      R2_heap_(NumChannelsOnHeap(num_capture_channels_)),
@ -216,16 +219,6 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
      high_band_comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)),
      subtractor_output_heap_(NumChannelsOnHeap(num_capture_channels_)) {
  RTC_DCHECK(ValidFullBandRate(sample_rate_hz));
-  for (auto& e_k : e_heap_) {
-    e_k.fill(0.f);
-  }
-
-  for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
-    suppression_gains_[ch] = std::make_unique<SuppressionGain>(
-        config_, optimization_, sample_rate_hz);
-    e_old_[ch].fill(0.f);
-    y_old_[ch].fill(0.f);
-  }
 }

 EchoRemoverImpl::~EchoRemoverImpl() = default;
@ -343,9 +336,7 @@ void EchoRemoverImpl::ProcessCapture(

    if (echo_path_variability.delay_change !=
        EchoPathVariability::DelayAdjustment::kNone) {
-      for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
-        suppression_gains_[ch]->SetInitialState(true);
-      }
+      suppression_gain_.SetInitialState(true);
    }
  }
  if (gain_change_hangover_ > 0) {
@ -359,9 +350,7 @@ void EchoRemoverImpl::ProcessCapture(
  // State transition.
  if (aec_state_.TransitionTriggered()) {
    subtractor_.ExitInitialState();
-    for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
-      suppression_gains_[ch]->SetInitialState(false);
-    }
+    suppression_gain_.SetInitialState(false);
  }

  // Perform linear echo cancellation.
@ -390,10 +379,6 @@ void EchoRemoverImpl::ProcessCapture(
                        1);
  data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e[0][0], 16000, 1);

-  float high_bands_gain = 1.f;
-  std::array<float, kFftLengthBy2Plus1> G;
-  G.fill(1.f);
-
  // Estimate the residual echo power.
  residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2,
                                    R2);
@ -402,34 +387,27 @@ void EchoRemoverImpl::ProcessCapture(
  cng_.Compute(aec_state_.SaturatedCapture(), Y2, comfort_noise,
               high_band_comfort_noise);

-  for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
-    // Suppressor echo estimate.
-    const auto& echo_spectrum =
-        aec_state_.UsableLinearEstimate() ? S2_linear[ch] : R2[ch];
-
-    // Suppressor nearend estimate.
-    std::array<float, kFftLengthBy2Plus1> nearend_spectrum_bounded;
-    if (aec_state_.UsableLinearEstimate()) {
+  // Suppressor nearend estimate.
+  if (aec_state_.UsableLinearEstimate()) {
+    // E2 is bound by Y2.
+    for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
      std::transform(E2[ch].begin(), E2[ch].end(), Y2[ch].begin(),
-                     nearend_spectrum_bounded.begin(),
+                     E2[ch].begin(),
                     [](float a, float b) { return std::min(a, b); });
    }
-    const auto& nearend_spectrum =
-        aec_state_.UsableLinearEstimate() ? nearend_spectrum_bounded : Y2[ch];
-
-    // Compute preferred gains for each channel. The minimum gain determines the
-    // final gain.
-    float high_bands_gain_channel;
-    std::array<float, kFftLengthBy2Plus1> G_channel;
-    suppression_gains_[ch]->GetGain(nearend_spectrum, echo_spectrum, R2[ch],
-                                    cng_.NoiseSpectrum()[ch],
-                                    render_signal_analyzer_, aec_state_, x,
-                                    &high_bands_gain_channel, &G_channel);
-
-    high_bands_gain = std::min(high_bands_gain, high_bands_gain_channel);
-    std::transform(G.begin(), G.end(), G_channel.begin(), G.begin(),
-                   [](float a, float b) { return std::min(a, b); });
  }
+  const auto& nearend_spectrum = aec_state_.UsableLinearEstimate() ? E2 : Y2;
+
+  // Suppressor echo estimate.
+  const auto& echo_spectrum =
+      aec_state_.UsableLinearEstimate() ? S2_linear : R2;
+
+  // Compute preferred gains.
+  float high_bands_gain;
+  std::array<float, kFftLengthBy2Plus1> G;
+  suppression_gain_.GetGain(nearend_spectrum, echo_spectrum, R2,
+                            cng_.NoiseSpectrum(), render_signal_analyzer_,
+                            aec_state_, x, &high_bands_gain, &G);

  suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,
                                high_bands_gain, Y_fft, y);
--- a/modules/audio_processing/aec3/suppression_gain.cc
+++ b/modules/audio_processing/aec3/suppression_gain.cc
@ -25,8 +25,10 @@
 namespace webrtc {
 namespace {

-// Adjust the gains according to the presence of known external filters.
-void AdjustForExternalFilters(std::array<float, kFftLengthBy2Plus1>* gain) {
+void PostprocessGains(std::array<float, kFftLengthBy2Plus1>* gain) {
+  // TODO(gustaf): Investigate if this can be relaxed to achieve higher
+  // transparency above 2 kHz.
+
  // Limit the low frequency gains to avoid the impact of the high-pass filter
  // on the lower-frequency gain influencing the overall achieved gain.
  (*gain)[0] = (*gain)[1] = std::min((*gain)[1], (*gain)[2]);
@ -41,6 +43,21 @@ void AdjustForExternalFilters(std::array<float, kFftLengthBy2Plus1>* gain) {
      gain->begin() + kAntiAliasingImpactLimit, gain->end() - 1,
      [min_upper_gain](float& a) { a = std::min(a, min_upper_gain); });
  (*gain)[kFftLengthBy2] = (*gain)[kFftLengthBy2Minus1];
+
+  // Limits the gain in the frequencies for which the adaptive filter has not
+  // converged.
+  // TODO(peah): Make adaptive to take the actual filter error into account.
+  constexpr size_t kUpperAccurateBandPlus1 = 29;
+
+  constexpr float oneByBandsInSum =
+      1 / static_cast<float>(kUpperAccurateBandPlus1 - 20);
+  const float hf_gain_bound =
+      std::accumulate(gain->begin() + 20,
+                      gain->begin() + kUpperAccurateBandPlus1, 0.f) *
+      oneByBandsInSum;
+
+  std::for_each(gain->begin() + kUpperAccurateBandPlus1, gain->end(),
+                [hf_gain_bound](float& a) { a = std::min(a, hf_gain_bound); });
 }

 // Scales the echo according to assessed audibility at the other end.
@ -79,33 +96,14 @@ void WeightEchoForAudibility(const EchoCanceller3Config& config,
  weigh(threshold, normalizer, 7, kFftLengthBy2Plus1, echo, weighted_echo);
 }

-// TODO(peah): Make adaptive to take the actual filter error into account.
-constexpr size_t kUpperAccurateBandPlus1 = 29;
-
-// Limits the gain in the frequencies for which the adaptive filter has not
-// converged. Currently, these frequencies are not hardcoded to the frequencies
-// which are typically not excited by speech.
-// TODO(peah): Make adaptive to take the actual filter error into account.
-void AdjustNonConvergedFrequencies(
-    std::array<float, kFftLengthBy2Plus1>* gain) {
-  constexpr float oneByBandsInSum =
-      1 / static_cast<float>(kUpperAccurateBandPlus1 - 20);
-  const float hf_gain_bound =
-      std::accumulate(gain->begin() + 20,
-                      gain->begin() + kUpperAccurateBandPlus1, 0.f) *
-      oneByBandsInSum;
-
-  std::for_each(gain->begin() + kUpperAccurateBandPlus1, gain->end(),
-                [hf_gain_bound](float& a) { a = std::min(a, hf_gain_bound); });
-}
-
 }  // namespace

 int SuppressionGain::instance_count_ = 0;

 float SuppressionGain::UpperBandsGain(
-    const std::array<float, kFftLengthBy2Plus1>& echo_spectrum,
-    const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> echo_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        comfort_noise_spectrum,
    const absl::optional<int>& narrow_peak_band,
    bool saturated_echo,
    const std::vector<std::vector<std::vector<float>>>& render,
@ -161,18 +159,22 @@ float SuppressionGain::UpperBandsGain(
    anti_howling_gain = 0.01f * sqrtf(low_band_energy / high_band_energy);
  }

-  // Bound the upper gain during significant echo activity.
-  auto low_frequency_energy = [](rtc::ArrayView<const float> spectrum) {
-    RTC_DCHECK_LE(16, spectrum.size());
-    return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f);
-  };
-  const float echo_sum = low_frequency_energy(echo_spectrum);
-  const float noise_sum = low_frequency_energy(comfort_noise_spectrum);
-  const auto& cfg = config_.suppressor.high_bands_suppression;
  float gain_bound = 1.f;
-  if (echo_sum > cfg.enr_threshold * noise_sum &&
-      !dominant_nearend_detector_.IsNearendState()) {
-    gain_bound = cfg.max_gain_during_echo;
+  if (!dominant_nearend_detector_.IsNearendState()) {
+    // Bound the upper gain during significant echo activity.
+    const auto& cfg = config_.suppressor.high_bands_suppression;
+    auto low_frequency_energy = [](rtc::ArrayView<const float> spectrum) {
+      RTC_DCHECK_LE(16, spectrum.size());
+      return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f);
+    };
+    for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+      const float echo_sum = low_frequency_energy(echo_spectrum[ch]);
+      const float noise_sum = low_frequency_energy(comfort_noise_spectrum[ch]);
+      if (echo_sum > cfg.enr_threshold * noise_sum) {
+        gain_bound = cfg.max_gain_during_echo;
+        break;
+      }
+    }
  }

  // Choose the gain as the minimum of the lower and upper gains.
@ -184,8 +186,6 @@ void SuppressionGain::GainToNoAudibleEcho(
    const std::array<float, kFftLengthBy2Plus1>& nearend,
    const std::array<float, kFftLengthBy2Plus1>& echo,
    const std::array<float, kFftLengthBy2Plus1>& masker,
-    const std::array<float, kFftLengthBy2Plus1>& min_gain,
-    const std::array<float, kFftLengthBy2Plus1>& max_gain,
    std::array<float, kFftLengthBy2Plus1>* gain) const {
  const auto& p = dominant_nearend_detector_.IsNearendState() ? nearend_params_
                                                              : normal_params_;
@ -198,7 +198,7 @@ void SuppressionGain::GainToNoAudibleEcho(
          (p.enr_suppress_[k] - p.enr_transparent_[k]);
      g = std::max(g, p.emr_transparent_[k] / emr);
    }
-    (*gain)[k] = std::max(std::min(g, max_gain[k]), min_gain[k]);
+    (*gain)[k] = g;
  }
 }

@ -206,6 +206,8 @@ void SuppressionGain::GainToNoAudibleEcho(
 // above the zero sample values.
 void SuppressionGain::GetMinGain(
    rtc::ArrayView<const float> weighted_residual_echo,
+    rtc::ArrayView<const float> last_nearend,
+    rtc::ArrayView<const float> last_echo,
    bool low_noise_render,
    bool saturated_echo,
    rtc::ArrayView<float> min_gain) const {
@ -227,7 +229,7 @@ void SuppressionGain::GetMinGain(

      // Make sure the gains of the low frequencies do not decrease too
      // quickly after strong nearend.
-      if (last_nearend_[k] > last_echo_[k]) {
+      if (last_nearend[k] > last_echo[k]) {
        min_gain[k] = std::max(min_gain[k], last_gain_[k] * dec);
        min_gain[k] = std::min(min_gain[k], 1.f);
      }
@ -249,79 +251,91 @@ void SuppressionGain::GetMaxGain(rtc::ArrayView<float> max_gain) const {
  }
 }

-// TODO(peah): Add further optimizations, in particular for the divisions.
 void SuppressionGain::LowerBandGain(
    bool low_noise_render,
    const AecState& aec_state,
-    const std::array<float, kFftLengthBy2Plus1>& suppressor_input,
-    const std::array<float, kFftLengthBy2Plus1>& nearend,
-    const std::array<float, kFftLengthBy2Plus1>& residual_echo,
-    const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        suppressor_input,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> residual_echo,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> comfort_noise,
    std::array<float, kFftLengthBy2Plus1>* gain) {
+  gain->fill(1.f);
  const bool saturated_echo = aec_state.SaturatedEcho();
-
-  // Weight echo power in terms of audibility. // Precompute 1/weighted echo
-  // (note that when the echo is zero, the precomputed value is never used).
-  std::array<float, kFftLengthBy2Plus1> weighted_residual_echo;
-  WeightEchoForAudibility(config_, residual_echo, weighted_residual_echo);
-
-  std::array<float, kFftLengthBy2Plus1> min_gain;
-  GetMinGain(weighted_residual_echo, low_noise_render, saturated_echo,
-             min_gain);
-
  std::array<float, kFftLengthBy2Plus1> max_gain;
  GetMaxGain(max_gain);

-  GainToNoAudibleEcho(nearend, weighted_residual_echo, comfort_noise, min_gain,
-                      max_gain, gain);
-  AdjustForExternalFilters(gain);
+  for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+    std::array<float, kFftLengthBy2Plus1> G;
+    std::array<float, kFftLengthBy2Plus1> nearend;
+    nearend_smoothers_[ch].Average(suppressor_input[ch], nearend);

-  // Adjust the gain for frequencies which have not yet converged.
-  AdjustNonConvergedFrequencies(gain);
+    // Weight echo power in terms of audibility.
+    std::array<float, kFftLengthBy2Plus1> weighted_residual_echo;
+    WeightEchoForAudibility(config_, residual_echo[ch], weighted_residual_echo);

-  // Store data required for the gain computation of the next block.
-  std::copy(nearend.begin(), nearend.end(), last_nearend_.begin());
-  std::copy(weighted_residual_echo.begin(), weighted_residual_echo.end(),
-            last_echo_.begin());
+    std::array<float, kFftLengthBy2Plus1> min_gain;
+    GetMinGain(weighted_residual_echo, last_nearend_[ch], last_echo_[ch],
+               low_noise_render, saturated_echo, min_gain);
+
+    GainToNoAudibleEcho(nearend, weighted_residual_echo, comfort_noise[0], &G);
+
+    // Clamp gains.
+    for (size_t k = 0; k < gain->size(); ++k) {
+      G[k] = std::max(std::min(G[k], max_gain[k]), min_gain[k]);
+      (*gain)[k] = std::min((*gain)[k], G[k]);
+    }
+
+    // Store data required for the gain computation of the next block.
+    std::copy(nearend.begin(), nearend.end(), last_nearend_[ch].begin());
+    std::copy(weighted_residual_echo.begin(), weighted_residual_echo.end(),
+              last_echo_[ch].begin());
+  }
+
+  // Limit high-frequency gains.
+  PostprocessGains(gain);
+
+  // Store computed gains.
  std::copy(gain->begin(), gain->end(), last_gain_.begin());
-  aec3::VectorMath(optimization_).Sqrt(*gain);

-  // Debug outputs for the purpose of development and analysis.
-  data_dumper_->DumpRaw("aec3_suppressor_min_gain", min_gain);
-  data_dumper_->DumpRaw("aec3_suppressor_max_gain", max_gain);
-  data_dumper_->DumpRaw("aec3_dominant_nearend",
-                        dominant_nearend_detector_.IsNearendState());
+  // Transform gains to amplitude domain.
+  aec3::VectorMath(optimization_).Sqrt(*gain);
 }

 SuppressionGain::SuppressionGain(const EchoCanceller3Config& config,
                                 Aec3Optimization optimization,
-                                 int sample_rate_hz)
+                                 int sample_rate_hz,
+                                 size_t num_capture_channels)
    : data_dumper_(
          new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
      optimization_(optimization),
      config_(config),
+      num_capture_channels_(num_capture_channels),
      state_change_duration_blocks_(
          static_cast<int>(config_.filter.config_change_duration_blocks)),
-      moving_average_(kFftLengthBy2Plus1,
-                      config.suppressor.nearend_average_blocks),
+      last_nearend_(num_capture_channels_, {0}),
+      last_echo_(num_capture_channels_, {0}),
+      nearend_smoothers_(
+          num_capture_channels_,
+          aec3::MovingAverage(kFftLengthBy2Plus1,
+                              config.suppressor.nearend_average_blocks)),
      nearend_params_(config_.suppressor.nearend_tuning),
      normal_params_(config_.suppressor.normal_tuning),
-      dominant_nearend_detector_(
-          config_.suppressor.dominant_nearend_detection) {
+      dominant_nearend_detector_(config_.suppressor.dominant_nearend_detection,
+                                 num_capture_channels_) {
  RTC_DCHECK_LT(0, state_change_duration_blocks_);
-  one_by_state_change_duration_blocks_ = 1.f / state_change_duration_blocks_;
  last_gain_.fill(1.f);
-  last_nearend_.fill(0.f);
-  last_echo_.fill(0.f);
 }

 SuppressionGain::~SuppressionGain() = default;

 void SuppressionGain::GetGain(
-    const std::array<float, kFftLengthBy2Plus1>& nearend_spectrum,
-    const std::array<float, kFftLengthBy2Plus1>& echo_spectrum,
-    const std::array<float, kFftLengthBy2Plus1>& residual_echo_spectrum,
-    const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        nearend_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> echo_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        residual_echo_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        comfort_noise_spectrum,
    const RenderSignalAnalyzer& render_signal_analyzer,
    const AecState& aec_state,
    const std::vector<std::vector<std::vector<float>>>& render,
@ -337,18 +351,20 @@ void SuppressionGain::GetGain(
    return;
  }

-  std::array<float, kFftLengthBy2Plus1> nearend_average;
-  moving_average_.Average(nearend_spectrum, nearend_average);
-
-  // Update the state selection.
+  // Update the nearend state selection.
  dominant_nearend_detector_.Update(nearend_spectrum, residual_echo_spectrum,
                                    comfort_noise_spectrum, initial_state_);

  // Compute gain for the lower band.
  bool low_noise_render = low_render_detector_.Detect(render);
-  LowerBandGain(low_noise_render, aec_state, nearend_spectrum, nearend_average,
+  LowerBandGain(low_noise_render, aec_state, nearend_spectrum,
                residual_echo_spectrum, comfort_noise_spectrum, low_band_gain);

+  if (cfg.enforce_empty_higher_bands) {
+    *high_bands_gain = 0.f;
+    return;
+  }
+
  // Compute the gain for the upper bands.
  const absl::optional<int> narrow_peak_band =
      render_signal_analyzer.NarrowPeakBand();
@ -356,9 +372,6 @@ void SuppressionGain::GetGain(
  *high_bands_gain =
      UpperBandsGain(echo_spectrum, comfort_noise_spectrum, narrow_peak_band,
                     aec_state.SaturatedEcho(), render, *low_band_gain);
-  if (cfg.enforce_empty_higher_bands) {
-    *high_bands_gain = 0.f;
-  }
 }

 void SuppressionGain::SetInitialState(bool state) {
@ -394,54 +407,6 @@ bool SuppressionGain::LowNoiseRenderDetector::Detect(
  return low_noise_render;
 }

-SuppressionGain::DominantNearendDetector::DominantNearendDetector(
-    const EchoCanceller3Config::Suppressor::DominantNearendDetection config)
-    : enr_threshold_(config.enr_threshold),
-      enr_exit_threshold_(config.enr_exit_threshold),
-      snr_threshold_(config.snr_threshold),
-      hold_duration_(config.hold_duration),
-      trigger_threshold_(config.trigger_threshold),
-      use_during_initial_phase_(config.use_during_initial_phase) {}
-
-void SuppressionGain::DominantNearendDetector::Update(
-    rtc::ArrayView<const float> nearend_spectrum,
-    rtc::ArrayView<const float> residual_echo_spectrum,
-    rtc::ArrayView<const float> comfort_noise_spectrum,
-    bool initial_state) {
-  auto low_frequency_energy = [](rtc::ArrayView<const float> spectrum) {
-    RTC_DCHECK_LE(16, spectrum.size());
-    return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f);
-  };
-  const float ne_sum = low_frequency_energy(nearend_spectrum);
-  const float echo_sum = low_frequency_energy(residual_echo_spectrum);
-  const float noise_sum = low_frequency_energy(comfort_noise_spectrum);
-
-  // Detect strong active nearend if the nearend is sufficiently stronger than
-  // the echo and the nearend noise.
-  if ((!initial_state || use_during_initial_phase_) &&
-      echo_sum < enr_threshold_ * ne_sum &&
-      ne_sum > snr_threshold_ * noise_sum) {
-    if (++trigger_counter_ >= trigger_threshold_) {
-      // After a period of strong active nearend activity, flag nearend mode.
-      hold_counter_ = hold_duration_;
-      trigger_counter_ = trigger_threshold_;
-    }
-  } else {
-    // Forget previously detected strong active nearend activity.
-    trigger_counter_ = std::max(0, trigger_counter_ - 1);
-  }
-
-  // Exit nearend-state early at strong echo.
-  if (echo_sum > enr_exit_threshold_ * ne_sum &&
-      echo_sum > snr_threshold_ * noise_sum) {
-    hold_counter_ = 0;
-  }
-
-  // Remain in any nearend mode for a certain duration.
-  hold_counter_ = std::max(0, hold_counter_ - 1);
-  nearend_state_ = hold_counter_ > 0;
-}
-
 SuppressionGain::GainParameters::GainParameters(
    const EchoCanceller3Config::Suppressor::Tuning& tuning)
    : max_inc_factor(tuning.max_inc_factor),
--- a/modules/audio_processing/aec3/suppression_gain.h
+++ b/modules/audio_processing/aec3/suppression_gain.h
@ -20,6 +20,7 @@
 #include "api/audio/echo_canceller3_config.h"
 #include "modules/audio_processing/aec3/aec3_common.h"
 #include "modules/audio_processing/aec3/aec_state.h"
+#include "modules/audio_processing/aec3/dominant_nearend_detector.h"
 #include "modules/audio_processing/aec3/fft_data.h"
 #include "modules/audio_processing/aec3/moving_average.h"
 #include "modules/audio_processing/aec3/render_signal_analyzer.h"
@ -32,13 +33,17 @@ class SuppressionGain {
 public:
  SuppressionGain(const EchoCanceller3Config& config,
                  Aec3Optimization optimization,
-                  int sample_rate_hz);
+                  int sample_rate_hz,
+                  size_t num_capture_channels);
  ~SuppressionGain();
  void GetGain(
-      const std::array<float, kFftLengthBy2Plus1>& nearend_spectrum,
-      const std::array<float, kFftLengthBy2Plus1>& echo_spectrum,
-      const std::array<float, kFftLengthBy2Plus1>& residual_echo_spectrum,
-      const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+          nearend_spectrum,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> echo_spectrum,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+          residual_echo_spectrum,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+          comfort_noise_spectrum,
      const RenderSignalAnalyzer& render_signal_analyzer,
      const AecState& aec_state,
      const std::vector<std::vector<std::vector<float>>>& render,
@ -51,31 +56,31 @@ class SuppressionGain {
 private:
  // Computes the gain to apply for the bands beyond the first band.
  float UpperBandsGain(
-      const std::array<float, kFftLengthBy2Plus1>& echo_spectrum,
-      const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> echo_spectrum,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+          comfort_noise_spectrum,
      const absl::optional<int>& narrow_peak_band,
      bool saturated_echo,
      const std::vector<std::vector<std::vector<float>>>& render,
      const std::array<float, kFftLengthBy2Plus1>& low_band_gain) const;

-  void GainToNoAudibleEcho(
-      const std::array<float, kFftLengthBy2Plus1>& nearend,
-      const std::array<float, kFftLengthBy2Plus1>& echo,
-      const std::array<float, kFftLengthBy2Plus1>& masker,
-      const std::array<float, kFftLengthBy2Plus1>& min_gain,
-      const std::array<float, kFftLengthBy2Plus1>& max_gain,
-      std::array<float, kFftLengthBy2Plus1>* gain) const;
+  void GainToNoAudibleEcho(const std::array<float, kFftLengthBy2Plus1>& nearend,
+                           const std::array<float, kFftLengthBy2Plus1>& echo,
+                           const std::array<float, kFftLengthBy2Plus1>& masker,
+                           std::array<float, kFftLengthBy2Plus1>* gain) const;

  void LowerBandGain(
      bool stationary_with_low_power,
      const AecState& aec_state,
-      const std::array<float, kFftLengthBy2Plus1>& suppressor_input,
-      const std::array<float, kFftLengthBy2Plus1>& nearend,
-      const std::array<float, kFftLengthBy2Plus1>& residual_echo,
-      const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+          suppressor_input,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> residual_echo,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> comfort_noise,
      std::array<float, kFftLengthBy2Plus1>* gain);

  void GetMinGain(rtc::ArrayView<const float> weighted_residual_echo,
+                  rtc::ArrayView<const float> last_nearend,
+                  rtc::ArrayView<const float> last_echo,
                  bool low_noise_render,
                  bool saturated_echo,
                  rtc::ArrayView<float> min_gain) const;
@ -90,35 +95,6 @@ class SuppressionGain {
    float average_power_ = 32768.f * 32768.f;
  };

-  // Class for selecting whether the suppressor is in the nearend or echo state.
-  class DominantNearendDetector {
-   public:
-    explicit DominantNearendDetector(
-        const EchoCanceller3Config::Suppressor::DominantNearendDetection
-            config);
-
-    // Returns whether the current state is the nearend state.
-    bool IsNearendState() const { return nearend_state_; }
-
-    // Updates the state selection based on latest spectral estimates.
-    void Update(rtc::ArrayView<const float> nearend_spectrum,
-                rtc::ArrayView<const float> residual_echo_spectrum,
-                rtc::ArrayView<const float> comfort_noise_spectrum,
-                bool initial_state);
-
-   private:
-    const float enr_threshold_;
-    const float enr_exit_threshold_;
-    const float snr_threshold_;
-    const int hold_duration_;
-    const int trigger_threshold_;
-    const bool use_during_initial_phase_;
-
-    bool nearend_state_ = false;
-    int trigger_counter_ = 0;
-    int hold_counter_ = 0;
-  };
-
  struct GainParameters {
    explicit GainParameters(
        const EchoCanceller3Config::Suppressor::Tuning& tuning);
@ -133,15 +109,15 @@ class SuppressionGain {
  std::unique_ptr<ApmDataDumper> data_dumper_;
  const Aec3Optimization optimization_;
  const EchoCanceller3Config config_;
+  const size_t num_capture_channels_;
  const int state_change_duration_blocks_;
-  float one_by_state_change_duration_blocks_;
  std::array<float, kFftLengthBy2Plus1> last_gain_;
-  std::array<float, kFftLengthBy2Plus1> last_nearend_;
-  std::array<float, kFftLengthBy2Plus1> last_echo_;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> last_nearend_;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> last_echo_;
  LowNoiseRenderDetector low_render_detector_;
  bool initial_state_ = true;
  int initial_state_change_counter_ = 0;
-  aec3::MovingAverage moving_average_;
+  std::vector<aec3::MovingAverage> nearend_smoothers_;
  const GainParameters nearend_params_;
  const GainParameters normal_params_;
  DominantNearendDetector dominant_nearend_detector_;
--- a/modules/audio_processing/aec3/suppression_gain_unittest.cc
+++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc
@ -26,16 +26,15 @@ namespace aec3 {

 // Verifies that the check for non-null output gains works.
 TEST(SuppressionGain, NullOutputGains) {
-  std::array<float, kFftLengthBy2Plus1> E2;
-  std::array<float, kFftLengthBy2Plus1> R2;
-  std::array<float, kFftLengthBy2Plus1> S2;
-  std::array<float, kFftLengthBy2Plus1> N2;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> E2(1, {0.f});
+  std::vector<std::array<float, kFftLengthBy2Plus1>> R2(1, {0.f});
+  std::vector<std::array<float, kFftLengthBy2Plus1>> S2(1);
+  std::vector<std::array<float, kFftLengthBy2Plus1>> N2(1, {0.f});
+  for (auto& S2_k : S2) {
+    S2_k.fill(.1f);
+  }
  FftData E;
  FftData Y;
-  E2.fill(0.f);
-  R2.fill(0.f);
-  S2.fill(0.1f);
-  N2.fill(0.f);
  E.re.fill(0.f);
  E.im.fill(0.f);
  Y.re.fill(0.f);
@ -44,7 +43,7 @@ TEST(SuppressionGain, NullOutputGains) {
  float high_bands_gain;
  AecState aec_state(EchoCanceller3Config{}, 1);
  EXPECT_DEATH(
-      SuppressionGain(EchoCanceller3Config{}, DetectOptimization(), 16000)
+      SuppressionGain(EchoCanceller3Config{}, DetectOptimization(), 16000, 1)
          .GetGain(E2, S2, R2, N2,
                   RenderSignalAnalyzer((EchoCanceller3Config{})), aec_state,
                   std::vector<std::vector<std::vector<float>>>(
@ -59,46 +58,43 @@ TEST(SuppressionGain, NullOutputGains) {
 // Does a sanity check that the gains are correctly computed.
 TEST(SuppressionGain, BasicGainComputation) {
  constexpr size_t kNumRenderChannels = 1;
-  constexpr size_t kNumCaptureChannels = 1;
+  constexpr size_t kNumCaptureChannels = 2;
  constexpr int kSampleRateHz = 16000;
  constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
  SuppressionGain suppression_gain(EchoCanceller3Config(), DetectOptimization(),
-                                   kSampleRateHz);
+                                   kSampleRateHz, kNumCaptureChannels);
  RenderSignalAnalyzer analyzer(EchoCanceller3Config{});
  float high_bands_gain;
  std::vector<std::array<float, kFftLengthBy2Plus1>> E2(kNumCaptureChannels);
-  std::array<float, kFftLengthBy2Plus1> S2;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> S2(kNumCaptureChannels,
+                                                        {0.f});
  std::vector<std::array<float, kFftLengthBy2Plus1>> Y2(kNumCaptureChannels);
-  std::array<float, kFftLengthBy2Plus1> R2;
-  std::array<float, kFftLengthBy2Plus1> N2;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> R2(kNumCaptureChannels);
+  std::vector<std::array<float, kFftLengthBy2Plus1>> N2(kNumCaptureChannels);
  std::array<float, kFftLengthBy2Plus1> g;
  std::vector<SubtractorOutput> output(kNumCaptureChannels);
-  std::array<float, kBlockSize> y;
  std::vector<std::vector<std::vector<float>>> x(
      kNumBands, std::vector<std::vector<float>>(
                     kNumRenderChannels, std::vector<float>(kBlockSize, 0.f)));
  EchoCanceller3Config config;
  AecState aec_state(config, kNumCaptureChannels);
  ApmDataDumper data_dumper(42);
-  Subtractor subtractor(config, 1, 1, &data_dumper, DetectOptimization());
+  Subtractor subtractor(config, kNumRenderChannels, kNumCaptureChannels,
+                        &data_dumper, DetectOptimization());
  std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
      RenderDelayBuffer::Create(config, kSampleRateHz, kNumRenderChannels));
  absl::optional<DelayEstimate> delay_estimate;

  // Ensure that a strong noise is detected to mask any echoes.
-  for (auto& E2_k : E2) {
-    E2_k.fill(10.f);
+  for (size_t ch = 0; ch < kNumCaptureChannels; ++ch) {
+    E2[ch].fill(10.f);
+    Y2[ch].fill(10.f);
+    R2[ch].fill(.1f);
+    N2[ch].fill(100.f);
  }
-  for (auto& Y2_k : Y2) {
-    Y2_k.fill(10.f);
-  }
-  R2.fill(0.1f);
-  S2.fill(0.1f);
-  N2.fill(100.f);
  for (auto& subtractor_output : output) {
    subtractor_output.Reset();
  }
-  y.fill(0.f);

  // Ensure that the gain is no longer forced to zero.
  for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) {
@ -111,41 +107,37 @@ TEST(SuppressionGain, BasicGainComputation) {
    aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(),
                     subtractor.FilterImpulseResponses(),
                     *render_delay_buffer->GetRenderBuffer(), E2, Y2, output);
-    suppression_gain.GetGain(E2[0], S2, R2, N2, analyzer, aec_state, x,
+    suppression_gain.GetGain(E2, S2, R2, N2, analyzer, aec_state, x,
                             &high_bands_gain, &g);
  }
  std::for_each(g.begin(), g.end(),
                [](float a) { EXPECT_NEAR(1.f, a, 0.001); });

  // Ensure that a strong nearend is detected to mask any echoes.
-  for (auto& E2_k : E2) {
-    E2_k.fill(100.f);
+  for (size_t ch = 0; ch < kNumCaptureChannels; ++ch) {
+    E2[ch].fill(100.f);
+    Y2[ch].fill(100.f);
+    R2[ch].fill(0.1f);
+    S2[ch].fill(0.1f);
+    N2[ch].fill(0.f);
  }
-  for (auto& Y2_k : Y2) {
-    Y2_k.fill(100.f);
-  }
-  R2.fill(0.1f);
-  S2.fill(0.1f);
-  N2.fill(0.f);

  for (int k = 0; k < 100; ++k) {
    aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(),
                     subtractor.FilterImpulseResponses(),
                     *render_delay_buffer->GetRenderBuffer(), E2, Y2, output);
-    suppression_gain.GetGain(E2[0], S2, R2, N2, analyzer, aec_state, x,
+    suppression_gain.GetGain(E2, S2, R2, N2, analyzer, aec_state, x,
                             &high_bands_gain, &g);
  }
  std::for_each(g.begin(), g.end(),
                [](float a) { EXPECT_NEAR(1.f, a, 0.001); });

-  // Ensure that a strong echo is suppressed.
-  for (auto& E2_k : E2) {
-    E2_k.fill(1000000000.f);
-  }
-  R2.fill(10000000000000.f);
+  // Add a strong echo to one of the channels and ensure that it is suppressed.
+  E2[1].fill(1000000000.f);
+  R2[1].fill(10000000000000.f);

  for (int k = 0; k < 10; ++k) {
-    suppression_gain.GetGain(E2[0], S2, R2, N2, analyzer, aec_state, x,
+    suppression_gain.GetGain(E2, S2, R2, N2, analyzer, aec_state, x,
                             &high_bands_gain, &g);
  }
  std::for_each(g.begin(), g.end(),