Add unit tests for AudioDecoderOpusImpl for stereo

- With mono encoding and stereo decoding check that the decoded
  signal is trivial stereo
- DTX tests
  - With mono encoding and stereo decoding check that the comfort
    noise generated by Opus is NOT(*) trivially stereo
  - With stereo encoding and stereo decoding check that the comfort
    noise generated by Opus is not trivially stereo

*: the test shows the behavior described in [1] and that needs to
be fixed.

[1] https://issues.webrtc.org/376493209

Bug: webrtc:376493209
Change-Id: I34aacd4bd7c79be9df05c242e912c9981896a73d
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/367206
Reviewed-by: Jakob Ivarsson‎ <jakobi@webrtc.org>
Reviewed-by: Henrik Andreassson <henrika@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#43363}
This commit is contained in:
Alessio Bazzica 2024-11-06 10:03:29 +01:00 committed by WebRTC LUCI CQ
parent b444820216
commit a287ffa681
4 changed files with 310 additions and 0 deletions

View File

@ -160,6 +160,7 @@ if (rtc_include_tests && !build_with_chromium) {
"../resources/near22_stereo.pcm",
"../resources/near32_stereo.pcm",
"../resources/near44_stereo.pcm",
"../resources/near48_mono.pcm",
"../resources/near48_stereo.pcm",
"../resources/near88_stereo.pcm",
"../resources/near8_stereo.pcm",

View File

@ -1373,6 +1373,7 @@ if (rtc_include_tests) {
"codecs/cng/cng_unittest.cc",
"codecs/legacy_encoded_audio_frame_unittest.cc",
"codecs/opus/audio_decoder_multi_channel_opus_unittest.cc",
"codecs/opus/audio_decoder_opus_unittest.cc",
"codecs/opus/audio_encoder_multi_channel_opus_unittest.cc",
"codecs/opus/audio_encoder_opus_unittest.cc",
"codecs/opus/opus_bandwidth_unittest.cc",
@ -1459,6 +1460,7 @@ if (rtc_include_tests) {
"../../api/audio_codecs/opus:audio_decoder_opus",
"../../api/audio_codecs/opus:audio_encoder_multiopus",
"../../api/audio_codecs/opus:audio_encoder_opus",
"../../api/audio_codecs/opus:audio_encoder_opus_config",
"../../api/environment",
"../../api/environment:environment_factory",
"../../api/neteq:default_neteq_controller_factory",
@ -1475,10 +1477,12 @@ if (rtc_include_tests) {
"../../logging:mocks",
"../../logging:rtc_event_audio",
"../../modules/rtp_rtcp:rtp_rtcp_format",
"../../rtc_base:buffer",
"../../rtc_base:checks",
"../../rtc_base:digest",
"../../rtc_base:macromagic",
"../../rtc_base:platform_thread",
"../../rtc_base:random",
"../../rtc_base:refcount",
"../../rtc_base:rtc_base_tests_utils",
"../../rtc_base:rtc_event",

View File

@ -0,0 +1,304 @@
/*
* Copyright (c) 2024 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_coding/codecs/opus/audio_decoder_opus.h"
#include <cmath>
#include <limits>
#include <optional>
#include <utility>
#include <vector>
#include "api/array_view.h"
#include "api/audio/audio_frame.h"
#include "api/audio_codecs/audio_decoder.h"
#include "api/audio_codecs/opus/audio_encoder_opus_config.h"
#include "api/environment/environment.h"
#include "api/environment/environment_factory.h"
#include "modules/audio_coding/codecs/opus/audio_encoder_opus.h"
#include "modules/audio_coding/test/PCMFile.h"
#include "rtc_base/buffer.h"
#include "rtc_base/checks.h"
#include "rtc_base/random.h"
#include "test/gtest.h"
#include "test/testsupport/file_utils.h"
namespace webrtc {
namespace {
using DecodeResult = ::webrtc::AudioDecoder::EncodedAudioFrame::DecodeResult;
using ParseResult = ::webrtc::AudioDecoder::ParseResult;
constexpr int kSampleRateHz = 48000;
constexpr int kInputFrameDurationMs = 10;
constexpr int kInputFrameLength = kInputFrameDurationMs * kSampleRateHz / 1000;
constexpr int kEncoderFrameDurationMs = 20;
constexpr int kEncoderFrameLength =
kEncoderFrameDurationMs * kSampleRateHz / 1000;
constexpr int kPayloadType = 123;
AudioEncoderOpusConfig GetEncoderConfig(int num_channels, bool dtx_enabled) {
AudioEncoderOpusConfig config;
config.frame_size_ms = kEncoderFrameDurationMs;
config.sample_rate_hz = kSampleRateHz;
config.num_channels = num_channels;
config.application = AudioEncoderOpusConfig::ApplicationMode::kVoip;
config.bitrate_bps = 32000;
config.fec_enabled = false;
config.cbr_enabled = false;
config.max_playback_rate_hz = kSampleRateHz;
config.complexity = 10;
config.dtx_enabled = dtx_enabled;
return config;
}
class WhiteNoiseGenerator {
public:
explicit WhiteNoiseGenerator(double amplitude_dbfs)
: amplitude_(
rtc::saturated_cast<int16_t>(std::pow(10, amplitude_dbfs / 20) *
std::numeric_limits<int16_t>::max())),
random_generator_(42) {}
void GenerateNextFrame(rtc::ArrayView<int16_t> frame) {
for (size_t i = 0; i < frame.size(); ++i) {
frame[i] = rtc::saturated_cast<int16_t>(
random_generator_.Rand(-amplitude_, amplitude_));
}
}
private:
const int32_t amplitude_;
Random random_generator_;
};
bool IsZeroedFrame(rtc::ArrayView<const int16_t> audio) {
for (const int16_t& v : audio) {
if (v != 0)
return false;
}
return true;
}
bool IsTrivialStereo(rtc::ArrayView<const int16_t> audio) {
const int num_samples =
rtc::CheckedDivExact(audio.size(), static_cast<size_t>(2));
for (int i = 0, j = 0; i < num_samples; ++i, j += 2) {
if (audio[j] != audio[j + 1]) {
return false;
}
}
return true;
}
void EncodeDecodeSpeech(AudioEncoderOpusImpl& encoder,
AudioDecoderOpusImpl& decoder,
uint32_t& rtp_timestamp,
uint32_t& timestamp,
int max_frames) {
RTC_CHECK(encoder.NumChannels() == 1 || encoder.NumChannels() == 2);
const bool stereo_encoding = encoder.NumChannels() == 2;
const size_t decoder_num_channels = decoder.Channels();
std::vector<int16_t> decoded_frame(kEncoderFrameLength *
decoder_num_channels);
PCMFile pcm_file;
pcm_file.Open(test::ResourcePath(
stereo_encoding ? "near48_stereo" : "near48_mono", "pcm"),
kSampleRateHz, "rb");
pcm_file.ReadStereo(stereo_encoding);
AudioFrame audio_frame;
for (int i = 0; i < max_frames; ++i) {
if (pcm_file.EndOfFile()) {
break;
}
pcm_file.Read10MsData(audio_frame);
rtc::Buffer payload;
encoder.Encode(rtp_timestamp++, audio_frame.data_view().data(), &payload);
// Ignore empty payloads: the encoder needs more audio to produce a packet.
if (payload.size() == 0) {
continue;
}
// Decode.
std::vector<ParseResult> parse_results =
decoder.ParsePayload(std::move(payload), timestamp++);
RTC_CHECK_EQ(parse_results.size(), 1);
std::optional<DecodeResult> decode_results =
parse_results[0].frame->Decode(decoded_frame);
RTC_CHECK(decode_results);
RTC_CHECK_EQ(decode_results->num_decoded_samples, decoded_frame.size());
}
}
void EncodeDecodeNoiseUntilDecoderInDtxMode(AudioEncoderOpusImpl& encoder,
AudioDecoderOpusImpl& decoder,
uint32_t& rtp_timestamp,
uint32_t& timestamp) {
WhiteNoiseGenerator generator(/*amplitude_dbfs=*/-70.0);
std::vector<int16_t> input_frame(kInputFrameLength * encoder.NumChannels());
const size_t decoder_num_channels = decoder.Channels();
std::vector<int16_t> decoded_frame(kEncoderFrameLength *
decoder_num_channels);
bool dtx_packet_found = false;
for (int i = 0; i < 50; ++i) {
generator.GenerateNextFrame(input_frame);
rtc::Buffer payload;
const AudioEncoder::EncodedInfo info =
encoder.Encode(rtp_timestamp++, input_frame, &payload);
// Ignore empty payloads: the encoder needs more audio to produce a packet.
if (payload.size() == 0) {
continue;
}
// Decode `payload`. If not a DTX packet, decoding it may update the
// internal decoder parameters for comfort noise generation.
std::vector<ParseResult> parse_results =
decoder.ParsePayload(std::move(payload), timestamp++);
RTC_CHECK_EQ(parse_results.size(), 1);
std::optional<DecodeResult> decode_results =
parse_results[0].frame->Decode(decoded_frame);
RTC_CHECK(decode_results);
RTC_CHECK_EQ(decode_results->num_decoded_samples, decoded_frame.size());
if (parse_results[0].frame->IsDtxPacket()) {
// The decoder is now in DTX mode.
dtx_packet_found = true;
break;
}
}
RTC_CHECK(dtx_packet_found);
}
} // namespace
TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsTrivialStereo) {
const Environment env = EnvironmentFactory().Create();
WhiteNoiseGenerator generator(/*amplitude_dbfs=*/-70.0);
std::array<int16_t, kInputFrameLength> input_frame;
// Create a mono encoder.
const AudioEncoderOpusConfig encoder_config =
GetEncoderConfig(/*num_channels=*/1, /*dtx_enabled=*/false);
AudioEncoderOpusImpl encoder(env, encoder_config, kPayloadType);
// Create a stereo decoder.
constexpr size_t kDecoderNumChannels = 2;
AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels,
kSampleRateHz);
std::array<int16_t, kEncoderFrameLength * kDecoderNumChannels> decoded_frame;
uint32_t rtp_timestamp = 0xFFFu;
uint32_t timestamp = 0;
for (int i = 0; i < 30; ++i) {
generator.GenerateNextFrame(input_frame);
rtc::Buffer payload;
encoder.Encode(rtp_timestamp++, input_frame, &payload);
if (payload.size() == 0) {
continue;
}
// Decode.
std::vector<ParseResult> parse_results =
decoder.ParsePayload(std::move(payload), timestamp++);
RTC_CHECK_EQ(parse_results.size(), 1);
std::optional<DecodeResult> decode_results =
parse_results[0].frame->Decode(decoded_frame);
RTC_CHECK(decode_results);
RTC_CHECK_EQ(decode_results->num_decoded_samples, decoded_frame.size());
EXPECT_TRUE(IsTrivialStereo(decoded_frame));
}
}
TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsNonTrivialStereoDtx) {
const Environment env = EnvironmentFactory().Create();
// Create a mono encoder.
const AudioEncoderOpusConfig encoder_config =
GetEncoderConfig(/*num_channels=*/1, /*dtx_enabled=*/true);
AudioEncoderOpusImpl encoder(env, encoder_config, kPayloadType);
// Create a stereo decoder.
constexpr size_t kDecoderNumChannels = 2;
AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels,
kSampleRateHz);
uint32_t rtp_timestamp = 0xFFFu;
uint32_t timestamp = 0;
// Feed the encoder with speech, otherwise DTX will never kick in.
EncodeDecodeSpeech(encoder, decoder, rtp_timestamp, timestamp,
/*max_frames=*/100);
// Feed the encoder with noise until the decoder is in DTX mode.
EncodeDecodeNoiseUntilDecoderInDtxMode(encoder, decoder, rtp_timestamp,
timestamp);
// Decode an empty packet so that Opus generates comfort noise.
std::array<int16_t, kEncoderFrameLength * kDecoderNumChannels> decoded_frame;
AudioDecoder::SpeechType speech_type;
const int num_decoded_samples =
decoder.Decode(/*encoded=*/nullptr, /*encoded_len=*/0, kSampleRateHz,
decoded_frame.size(), decoded_frame.data(), &speech_type);
ASSERT_EQ(speech_type, AudioDecoder::SpeechType::kComfortNoise);
RTC_CHECK_GT(num_decoded_samples, 0);
RTC_CHECK_LE(num_decoded_samples, decoded_frame.size());
rtc::ArrayView<const int16_t> decoded_view(decoded_frame.data(),
num_decoded_samples);
// Make sure that comfort noise is not a muted frame.
ASSERT_FALSE(IsZeroedFrame(decoded_view));
// TODO: https://issues.webrtc.org/376493209 - When fixed, expect true below.
EXPECT_FALSE(IsTrivialStereo(decoded_view));
}
TEST(AudioDecoderOpusTest,
StereoEncoderStereoDecoderOutputsNonTrivialStereoDtx) {
const Environment env = EnvironmentFactory().Create();
// Create a stereo encoder.
const AudioEncoderOpusConfig encoder_config =
GetEncoderConfig(/*num_channels=*/2, /*dtx_enabled=*/true);
AudioEncoderOpusImpl encoder(env, encoder_config, kPayloadType);
// Create a stereo decoder.
constexpr size_t kDecoderNumChannels = 2;
AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels,
kSampleRateHz);
uint32_t rtp_timestamp = 0xFFFu;
uint32_t timestamp = 0;
// Feed the encoder with speech, otherwise DTX will never kick in.
EncodeDecodeSpeech(encoder, decoder, rtp_timestamp, timestamp,
/*max_frames=*/100);
// Feed the encoder with noise and decode until the decoder is in DTX mode.
EncodeDecodeNoiseUntilDecoderInDtxMode(encoder, decoder, rtp_timestamp,
timestamp);
// Decode an empty packet so that Opus generates comfort noise.
std::array<int16_t, kEncoderFrameLength * kDecoderNumChannels> decoded_frame;
AudioDecoder::SpeechType speech_type;
const int num_decoded_samples =
decoder.Decode(/*encoded=*/nullptr, /*encoded_len=*/0, kSampleRateHz,
decoded_frame.size(), decoded_frame.data(), &speech_type);
ASSERT_EQ(speech_type, AudioDecoder::SpeechType::kComfortNoise);
RTC_CHECK_GT(num_decoded_samples, 0);
RTC_CHECK_LE(num_decoded_samples, decoded_frame.size());
rtc::ArrayView<const int16_t> decoded_view(decoded_frame.data(),
num_decoded_samples);
// Make sure that comfort noise is not a muted frame.
ASSERT_FALSE(IsZeroedFrame(decoded_view));
EXPECT_FALSE(IsTrivialStereo(decoded_view));
}
} // namespace webrtc

View File

@ -0,0 +1 @@
2b752cdcb86095a0c405724aa1ce4ef910e06d10