/* * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "modules/audio_mixer/frame_combiner.h" #include #include #include #include #include "api/array_view.h" #include "audio/utility/audio_frame_operations.h" #include "modules/audio_mixer/audio_frame_manipulator.h" #include "modules/audio_mixer/audio_mixer_impl.h" #include "rtc_base/checks.h" #include "rtc_base/logging.h" namespace webrtc { namespace { // Stereo, 48 kHz, 10 ms. constexpr int kMaximalFrameSize = 2 * 48 * 10; void CombineZeroFrames(bool use_limiter, AudioProcessing* limiter, AudioFrame* audio_frame_for_mixing) { audio_frame_for_mixing->elapsed_time_ms_ = -1; AudioFrameOperations::Mute(audio_frame_for_mixing); // The limiter should still process a zero frame to avoid jumps in // its gain curve. if (use_limiter) { RTC_DCHECK(limiter); // The limiter smoothly increases frames with half gain to full // volume. Here there's no need to apply half gain, since the frame // is zero anyway. limiter->ProcessStream(audio_frame_for_mixing); } } void CombineOneFrame(const AudioFrame* input_frame, bool use_limiter, AudioProcessing* limiter, AudioFrame* audio_frame_for_mixing) { audio_frame_for_mixing->timestamp_ = input_frame->timestamp_; audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_; // TODO(yujo): can we optimize muted frames? std::copy(input_frame->data(), input_frame->data() + input_frame->num_channels_ * input_frame->samples_per_channel_, audio_frame_for_mixing->mutable_data()); if (use_limiter) { AudioFrameOperations::ApplyHalfGain(audio_frame_for_mixing); RTC_DCHECK(limiter); limiter->ProcessStream(audio_frame_for_mixing); AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing); } } // Lower-level helper function called from Combine(...) when there // are several input frames. // // TODO(aleloi): change interface to ArrayView output_frame // once we have gotten rid of the APM limiter. // // Only the 'data' field of output_frame should be modified. The // rest are used for potentially sending the output to the APM // limiter. void CombineMultipleFrames( const std::vector>& input_frames, bool use_limiter, AudioProcessing* limiter, AudioFrame* audio_frame_for_mixing) { RTC_DCHECK(!input_frames.empty()); RTC_DCHECK(audio_frame_for_mixing); const size_t frame_length = input_frames.front().size(); for (const auto& frame : input_frames) { RTC_DCHECK_EQ(frame_length, frame.size()); } // Algorithm: int16 frames are added to a sufficiently large // statically allocated int32 buffer. For > 2 participants this is // more efficient than addition in place in the int16 audio // frame. The audio quality loss due to halving the samples is // smaller than 16-bit addition in place. RTC_DCHECK_GE(kMaximalFrameSize, frame_length); std::array add_buffer; add_buffer.fill(0); for (const auto& frame : input_frames) { // TODO(yujo): skip this for muted frames. std::transform(frame.begin(), frame.end(), add_buffer.begin(), add_buffer.begin(), std::plus()); } if (use_limiter) { // Halve all samples to avoid saturation before limiting. std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, audio_frame_for_mixing->mutable_data(), [](int32_t a) { return rtc::saturated_cast(a / 2); }); // Smoothly limit the audio. RTC_DCHECK(limiter); const int error = limiter->ProcessStream(audio_frame_for_mixing); if (error != limiter->kNoError) { RTC_LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error; RTC_NOTREACHED(); } // And now we can safely restore the level. This procedure results in // some loss of resolution, deemed acceptable. // // It's possible to apply the gain in the AGC (with a target level of 0 dbFS // and compression gain of 6 dB). However, in the transition frame when this // is enabled (moving from one to two audio sources) it has the potential to // create discontinuities in the mixed frame. // // Instead we double the frame (with addition since left-shifting a // negative value is undefined). AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing); } else { std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, audio_frame_for_mixing->mutable_data(), [](int32_t a) { return rtc::saturated_cast(a); }); } } std::unique_ptr CreateLimiter() { Config config; config.Set(new ExperimentalAgc(false)); std::unique_ptr limiter(AudioProcessing::Create(config)); RTC_DCHECK(limiter); webrtc::AudioProcessing::Config apm_config; apm_config.residual_echo_detector.enabled = false; limiter->ApplyConfig(apm_config); const auto check_no_error = [](int x) { RTC_DCHECK_EQ(x, AudioProcessing::kNoError); }; auto* const gain_control = limiter->gain_control(); check_no_error(gain_control->set_mode(GainControl::kFixedDigital)); // We smoothly limit the mixed frame to -7 dbFS. -6 would correspond to the // divide-by-2 but -7 is used instead to give a bit of headroom since the // AGC is not a hard limiter. check_no_error(gain_control->set_target_level_dbfs(7)); check_no_error(gain_control->set_compression_gain_db(0)); check_no_error(gain_control->enable_limiter(true)); check_no_error(gain_control->Enable(true)); return limiter; } } // namespace FrameCombiner::FrameCombiner(bool use_apm_limiter) : use_apm_limiter_(use_apm_limiter), limiter_(use_apm_limiter ? CreateLimiter() : nullptr) {} FrameCombiner::~FrameCombiner() = default; void FrameCombiner::Combine(const std::vector& mix_list, size_t number_of_channels, int sample_rate, size_t number_of_streams, AudioFrame* audio_frame_for_mixing) const { RTC_DCHECK(audio_frame_for_mixing); const size_t samples_per_channel = static_cast( (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000); for (const auto* frame : mix_list) { RTC_DCHECK_EQ(samples_per_channel, frame->samples_per_channel_); RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_); } // Frames could be both stereo and mono. for (auto* frame : mix_list) { RemixFrame(number_of_channels, frame); } // TODO(aleloi): Issue bugs.webrtc.org/3390. // Audio frame timestamp. The 'timestamp_' field is set to dummy // value '0', because it is only supported in the one channel case and // is then updated in the helper functions. audio_frame_for_mixing->UpdateFrame( 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined, AudioFrame::kVadUnknown, number_of_channels); const bool use_limiter_this_round = use_apm_limiter_ && number_of_streams > 1; if (mix_list.empty()) { CombineZeroFrames(use_limiter_this_round, limiter_.get(), audio_frame_for_mixing); } else if (mix_list.size() == 1) { CombineOneFrame(mix_list.front(), use_limiter_this_round, limiter_.get(), audio_frame_for_mixing); } else { std::vector> input_frames; for (size_t i = 0; i < mix_list.size(); ++i) { input_frames.push_back(rtc::ArrayView( mix_list[i]->data(), samples_per_channel * number_of_channels)); } CombineMultipleFrames(input_frames, use_limiter_this_round, limiter_.get(), audio_frame_for_mixing); } } } // namespace webrtc