Add unit tests for AudioDecoderOpusImpl for stereo
- With mono encoding and stereo decoding check that the decoded
signal is trivial stereo
- DTX tests
- With mono encoding and stereo decoding check that the comfort
noise generated by Opus is NOT(*) trivially stereo
- With stereo encoding and stereo decoding check that the comfort
noise generated by Opus is not trivially stereo
*: the test shows the behavior described in [1] and that needs to
be fixed.
[1] https://issues.webrtc.org/376493209
Bug: webrtc:376493209
Change-Id: I34aacd4bd7c79be9df05c242e912c9981896a73d
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/367206
Reviewed-by: Jakob Ivarsson <jakobi@webrtc.org>
Reviewed-by: Henrik Andreassson <henrika@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#43363}
This commit is contained in:
parent
b444820216
commit
a287ffa681
@ -160,6 +160,7 @@ if (rtc_include_tests && !build_with_chromium) {
|
||||
"../resources/near22_stereo.pcm",
|
||||
"../resources/near32_stereo.pcm",
|
||||
"../resources/near44_stereo.pcm",
|
||||
"../resources/near48_mono.pcm",
|
||||
"../resources/near48_stereo.pcm",
|
||||
"../resources/near88_stereo.pcm",
|
||||
"../resources/near8_stereo.pcm",
|
||||
|
||||
@ -1373,6 +1373,7 @@ if (rtc_include_tests) {
|
||||
"codecs/cng/cng_unittest.cc",
|
||||
"codecs/legacy_encoded_audio_frame_unittest.cc",
|
||||
"codecs/opus/audio_decoder_multi_channel_opus_unittest.cc",
|
||||
"codecs/opus/audio_decoder_opus_unittest.cc",
|
||||
"codecs/opus/audio_encoder_multi_channel_opus_unittest.cc",
|
||||
"codecs/opus/audio_encoder_opus_unittest.cc",
|
||||
"codecs/opus/opus_bandwidth_unittest.cc",
|
||||
@ -1459,6 +1460,7 @@ if (rtc_include_tests) {
|
||||
"../../api/audio_codecs/opus:audio_decoder_opus",
|
||||
"../../api/audio_codecs/opus:audio_encoder_multiopus",
|
||||
"../../api/audio_codecs/opus:audio_encoder_opus",
|
||||
"../../api/audio_codecs/opus:audio_encoder_opus_config",
|
||||
"../../api/environment",
|
||||
"../../api/environment:environment_factory",
|
||||
"../../api/neteq:default_neteq_controller_factory",
|
||||
@ -1475,10 +1477,12 @@ if (rtc_include_tests) {
|
||||
"../../logging:mocks",
|
||||
"../../logging:rtc_event_audio",
|
||||
"../../modules/rtp_rtcp:rtp_rtcp_format",
|
||||
"../../rtc_base:buffer",
|
||||
"../../rtc_base:checks",
|
||||
"../../rtc_base:digest",
|
||||
"../../rtc_base:macromagic",
|
||||
"../../rtc_base:platform_thread",
|
||||
"../../rtc_base:random",
|
||||
"../../rtc_base:refcount",
|
||||
"../../rtc_base:rtc_base_tests_utils",
|
||||
"../../rtc_base:rtc_event",
|
||||
|
||||
304
modules/audio_coding/codecs/opus/audio_decoder_opus_unittest.cc
Normal file
304
modules/audio_coding/codecs/opus/audio_decoder_opus_unittest.cc
Normal file
@ -0,0 +1,304 @@
|
||||
/*
|
||||
* Copyright (c) 2024 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_coding/codecs/opus/audio_decoder_opus.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/audio_frame.h"
|
||||
#include "api/audio_codecs/audio_decoder.h"
|
||||
#include "api/audio_codecs/opus/audio_encoder_opus_config.h"
|
||||
#include "api/environment/environment.h"
|
||||
#include "api/environment/environment_factory.h"
|
||||
#include "modules/audio_coding/codecs/opus/audio_encoder_opus.h"
|
||||
#include "modules/audio_coding/test/PCMFile.h"
|
||||
#include "rtc_base/buffer.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/random.h"
|
||||
#include "test/gtest.h"
|
||||
#include "test/testsupport/file_utils.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
using DecodeResult = ::webrtc::AudioDecoder::EncodedAudioFrame::DecodeResult;
|
||||
using ParseResult = ::webrtc::AudioDecoder::ParseResult;
|
||||
|
||||
constexpr int kSampleRateHz = 48000;
|
||||
|
||||
constexpr int kInputFrameDurationMs = 10;
|
||||
constexpr int kInputFrameLength = kInputFrameDurationMs * kSampleRateHz / 1000;
|
||||
|
||||
constexpr int kEncoderFrameDurationMs = 20;
|
||||
constexpr int kEncoderFrameLength =
|
||||
kEncoderFrameDurationMs * kSampleRateHz / 1000;
|
||||
|
||||
constexpr int kPayloadType = 123;
|
||||
|
||||
AudioEncoderOpusConfig GetEncoderConfig(int num_channels, bool dtx_enabled) {
|
||||
AudioEncoderOpusConfig config;
|
||||
|
||||
config.frame_size_ms = kEncoderFrameDurationMs;
|
||||
config.sample_rate_hz = kSampleRateHz;
|
||||
config.num_channels = num_channels;
|
||||
config.application = AudioEncoderOpusConfig::ApplicationMode::kVoip;
|
||||
config.bitrate_bps = 32000;
|
||||
config.fec_enabled = false;
|
||||
config.cbr_enabled = false;
|
||||
config.max_playback_rate_hz = kSampleRateHz;
|
||||
config.complexity = 10;
|
||||
config.dtx_enabled = dtx_enabled;
|
||||
|
||||
return config;
|
||||
}
|
||||
|
||||
class WhiteNoiseGenerator {
|
||||
public:
|
||||
explicit WhiteNoiseGenerator(double amplitude_dbfs)
|
||||
: amplitude_(
|
||||
rtc::saturated_cast<int16_t>(std::pow(10, amplitude_dbfs / 20) *
|
||||
std::numeric_limits<int16_t>::max())),
|
||||
random_generator_(42) {}
|
||||
|
||||
void GenerateNextFrame(rtc::ArrayView<int16_t> frame) {
|
||||
for (size_t i = 0; i < frame.size(); ++i) {
|
||||
frame[i] = rtc::saturated_cast<int16_t>(
|
||||
random_generator_.Rand(-amplitude_, amplitude_));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const int32_t amplitude_;
|
||||
Random random_generator_;
|
||||
};
|
||||
|
||||
bool IsZeroedFrame(rtc::ArrayView<const int16_t> audio) {
|
||||
for (const int16_t& v : audio) {
|
||||
if (v != 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsTrivialStereo(rtc::ArrayView<const int16_t> audio) {
|
||||
const int num_samples =
|
||||
rtc::CheckedDivExact(audio.size(), static_cast<size_t>(2));
|
||||
for (int i = 0, j = 0; i < num_samples; ++i, j += 2) {
|
||||
if (audio[j] != audio[j + 1]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void EncodeDecodeSpeech(AudioEncoderOpusImpl& encoder,
|
||||
AudioDecoderOpusImpl& decoder,
|
||||
uint32_t& rtp_timestamp,
|
||||
uint32_t& timestamp,
|
||||
int max_frames) {
|
||||
RTC_CHECK(encoder.NumChannels() == 1 || encoder.NumChannels() == 2);
|
||||
const bool stereo_encoding = encoder.NumChannels() == 2;
|
||||
const size_t decoder_num_channels = decoder.Channels();
|
||||
std::vector<int16_t> decoded_frame(kEncoderFrameLength *
|
||||
decoder_num_channels);
|
||||
|
||||
PCMFile pcm_file;
|
||||
pcm_file.Open(test::ResourcePath(
|
||||
stereo_encoding ? "near48_stereo" : "near48_mono", "pcm"),
|
||||
kSampleRateHz, "rb");
|
||||
pcm_file.ReadStereo(stereo_encoding);
|
||||
|
||||
AudioFrame audio_frame;
|
||||
for (int i = 0; i < max_frames; ++i) {
|
||||
if (pcm_file.EndOfFile()) {
|
||||
break;
|
||||
}
|
||||
pcm_file.Read10MsData(audio_frame);
|
||||
rtc::Buffer payload;
|
||||
encoder.Encode(rtp_timestamp++, audio_frame.data_view().data(), &payload);
|
||||
|
||||
// Ignore empty payloads: the encoder needs more audio to produce a packet.
|
||||
if (payload.size() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Decode.
|
||||
std::vector<ParseResult> parse_results =
|
||||
decoder.ParsePayload(std::move(payload), timestamp++);
|
||||
RTC_CHECK_EQ(parse_results.size(), 1);
|
||||
std::optional<DecodeResult> decode_results =
|
||||
parse_results[0].frame->Decode(decoded_frame);
|
||||
RTC_CHECK(decode_results);
|
||||
RTC_CHECK_EQ(decode_results->num_decoded_samples, decoded_frame.size());
|
||||
}
|
||||
}
|
||||
|
||||
void EncodeDecodeNoiseUntilDecoderInDtxMode(AudioEncoderOpusImpl& encoder,
|
||||
AudioDecoderOpusImpl& decoder,
|
||||
uint32_t& rtp_timestamp,
|
||||
uint32_t& timestamp) {
|
||||
WhiteNoiseGenerator generator(/*amplitude_dbfs=*/-70.0);
|
||||
std::vector<int16_t> input_frame(kInputFrameLength * encoder.NumChannels());
|
||||
const size_t decoder_num_channels = decoder.Channels();
|
||||
std::vector<int16_t> decoded_frame(kEncoderFrameLength *
|
||||
decoder_num_channels);
|
||||
|
||||
bool dtx_packet_found = false;
|
||||
for (int i = 0; i < 50; ++i) {
|
||||
generator.GenerateNextFrame(input_frame);
|
||||
rtc::Buffer payload;
|
||||
const AudioEncoder::EncodedInfo info =
|
||||
encoder.Encode(rtp_timestamp++, input_frame, &payload);
|
||||
|
||||
// Ignore empty payloads: the encoder needs more audio to produce a packet.
|
||||
if (payload.size() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Decode `payload`. If not a DTX packet, decoding it may update the
|
||||
// internal decoder parameters for comfort noise generation.
|
||||
std::vector<ParseResult> parse_results =
|
||||
decoder.ParsePayload(std::move(payload), timestamp++);
|
||||
RTC_CHECK_EQ(parse_results.size(), 1);
|
||||
std::optional<DecodeResult> decode_results =
|
||||
parse_results[0].frame->Decode(decoded_frame);
|
||||
RTC_CHECK(decode_results);
|
||||
RTC_CHECK_EQ(decode_results->num_decoded_samples, decoded_frame.size());
|
||||
|
||||
if (parse_results[0].frame->IsDtxPacket()) {
|
||||
// The decoder is now in DTX mode.
|
||||
dtx_packet_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
RTC_CHECK(dtx_packet_found);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsTrivialStereo) {
|
||||
const Environment env = EnvironmentFactory().Create();
|
||||
WhiteNoiseGenerator generator(/*amplitude_dbfs=*/-70.0);
|
||||
std::array<int16_t, kInputFrameLength> input_frame;
|
||||
// Create a mono encoder.
|
||||
const AudioEncoderOpusConfig encoder_config =
|
||||
GetEncoderConfig(/*num_channels=*/1, /*dtx_enabled=*/false);
|
||||
AudioEncoderOpusImpl encoder(env, encoder_config, kPayloadType);
|
||||
// Create a stereo decoder.
|
||||
constexpr size_t kDecoderNumChannels = 2;
|
||||
AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels,
|
||||
kSampleRateHz);
|
||||
std::array<int16_t, kEncoderFrameLength * kDecoderNumChannels> decoded_frame;
|
||||
|
||||
uint32_t rtp_timestamp = 0xFFFu;
|
||||
uint32_t timestamp = 0;
|
||||
for (int i = 0; i < 30; ++i) {
|
||||
generator.GenerateNextFrame(input_frame);
|
||||
rtc::Buffer payload;
|
||||
encoder.Encode(rtp_timestamp++, input_frame, &payload);
|
||||
if (payload.size() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Decode.
|
||||
std::vector<ParseResult> parse_results =
|
||||
decoder.ParsePayload(std::move(payload), timestamp++);
|
||||
RTC_CHECK_EQ(parse_results.size(), 1);
|
||||
std::optional<DecodeResult> decode_results =
|
||||
parse_results[0].frame->Decode(decoded_frame);
|
||||
RTC_CHECK(decode_results);
|
||||
RTC_CHECK_EQ(decode_results->num_decoded_samples, decoded_frame.size());
|
||||
|
||||
EXPECT_TRUE(IsTrivialStereo(decoded_frame));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsNonTrivialStereoDtx) {
|
||||
const Environment env = EnvironmentFactory().Create();
|
||||
// Create a mono encoder.
|
||||
const AudioEncoderOpusConfig encoder_config =
|
||||
GetEncoderConfig(/*num_channels=*/1, /*dtx_enabled=*/true);
|
||||
AudioEncoderOpusImpl encoder(env, encoder_config, kPayloadType);
|
||||
// Create a stereo decoder.
|
||||
constexpr size_t kDecoderNumChannels = 2;
|
||||
AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels,
|
||||
kSampleRateHz);
|
||||
|
||||
uint32_t rtp_timestamp = 0xFFFu;
|
||||
uint32_t timestamp = 0;
|
||||
// Feed the encoder with speech, otherwise DTX will never kick in.
|
||||
EncodeDecodeSpeech(encoder, decoder, rtp_timestamp, timestamp,
|
||||
/*max_frames=*/100);
|
||||
// Feed the encoder with noise until the decoder is in DTX mode.
|
||||
EncodeDecodeNoiseUntilDecoderInDtxMode(encoder, decoder, rtp_timestamp,
|
||||
timestamp);
|
||||
|
||||
// Decode an empty packet so that Opus generates comfort noise.
|
||||
std::array<int16_t, kEncoderFrameLength * kDecoderNumChannels> decoded_frame;
|
||||
AudioDecoder::SpeechType speech_type;
|
||||
const int num_decoded_samples =
|
||||
decoder.Decode(/*encoded=*/nullptr, /*encoded_len=*/0, kSampleRateHz,
|
||||
decoded_frame.size(), decoded_frame.data(), &speech_type);
|
||||
ASSERT_EQ(speech_type, AudioDecoder::SpeechType::kComfortNoise);
|
||||
RTC_CHECK_GT(num_decoded_samples, 0);
|
||||
RTC_CHECK_LE(num_decoded_samples, decoded_frame.size());
|
||||
rtc::ArrayView<const int16_t> decoded_view(decoded_frame.data(),
|
||||
num_decoded_samples);
|
||||
// Make sure that comfort noise is not a muted frame.
|
||||
ASSERT_FALSE(IsZeroedFrame(decoded_view));
|
||||
|
||||
// TODO: https://issues.webrtc.org/376493209 - When fixed, expect true below.
|
||||
EXPECT_FALSE(IsTrivialStereo(decoded_view));
|
||||
}
|
||||
|
||||
TEST(AudioDecoderOpusTest,
|
||||
StereoEncoderStereoDecoderOutputsNonTrivialStereoDtx) {
|
||||
const Environment env = EnvironmentFactory().Create();
|
||||
// Create a stereo encoder.
|
||||
const AudioEncoderOpusConfig encoder_config =
|
||||
GetEncoderConfig(/*num_channels=*/2, /*dtx_enabled=*/true);
|
||||
AudioEncoderOpusImpl encoder(env, encoder_config, kPayloadType);
|
||||
// Create a stereo decoder.
|
||||
constexpr size_t kDecoderNumChannels = 2;
|
||||
AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels,
|
||||
kSampleRateHz);
|
||||
|
||||
uint32_t rtp_timestamp = 0xFFFu;
|
||||
uint32_t timestamp = 0;
|
||||
// Feed the encoder with speech, otherwise DTX will never kick in.
|
||||
EncodeDecodeSpeech(encoder, decoder, rtp_timestamp, timestamp,
|
||||
/*max_frames=*/100);
|
||||
// Feed the encoder with noise and decode until the decoder is in DTX mode.
|
||||
EncodeDecodeNoiseUntilDecoderInDtxMode(encoder, decoder, rtp_timestamp,
|
||||
timestamp);
|
||||
|
||||
// Decode an empty packet so that Opus generates comfort noise.
|
||||
std::array<int16_t, kEncoderFrameLength * kDecoderNumChannels> decoded_frame;
|
||||
AudioDecoder::SpeechType speech_type;
|
||||
const int num_decoded_samples =
|
||||
decoder.Decode(/*encoded=*/nullptr, /*encoded_len=*/0, kSampleRateHz,
|
||||
decoded_frame.size(), decoded_frame.data(), &speech_type);
|
||||
ASSERT_EQ(speech_type, AudioDecoder::SpeechType::kComfortNoise);
|
||||
RTC_CHECK_GT(num_decoded_samples, 0);
|
||||
RTC_CHECK_LE(num_decoded_samples, decoded_frame.size());
|
||||
rtc::ArrayView<const int16_t> decoded_view(decoded_frame.data(),
|
||||
num_decoded_samples);
|
||||
// Make sure that comfort noise is not a muted frame.
|
||||
ASSERT_FALSE(IsZeroedFrame(decoded_view));
|
||||
|
||||
EXPECT_FALSE(IsTrivialStereo(decoded_view));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
1
resources/near48_mono.pcm.sha1
Normal file
1
resources/near48_mono.pcm.sha1
Normal file
@ -0,0 +1 @@
|
||||
2b752cdcb86095a0c405724aa1ce4ef910e06d10
|
||||
Loading…
x
Reference in New Issue
Block a user