2015-06-25 12:28:48 -07:00
|
|
|
/*
|
|
|
|
|
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
|
|
|
|
*
|
|
|
|
|
* Use of this source code is governed by a BSD-style license
|
|
|
|
|
* that can be found in the LICENSE file in the root of the source
|
|
|
|
|
* tree. An additional intellectual property rights grant can be found
|
|
|
|
|
* in the file PATENTS. All contributing project authors may
|
|
|
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
|
|
|
*/
|
|
|
|
|
|
2017-09-15 06:47:31 +02:00
|
|
|
#include "modules/audio_processing/vad/voice_activity_detector.h"
|
2015-06-25 12:28:48 -07:00
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
|
|
2017-09-15 06:47:31 +02:00
|
|
|
#include "rtc_base/checks.h"
|
2015-06-25 12:28:48 -07:00
|
|
|
|
|
|
|
|
namespace webrtc {
|
|
|
|
|
namespace {
|
|
|
|
|
|
Convert channel counts to size_t.
IIRC, this was originally requested by ajm during review of the other size_t conversions I did over the past year, and I agreed it made sense, but wanted to do it separately since those changes were already gargantuan.
BUG=chromium:81439
TEST=none
R=henrik.lundin@webrtc.org, henrika@webrtc.org, kjellander@webrtc.org, minyue@webrtc.org, perkj@webrtc.org, solenberg@webrtc.org, stefan@webrtc.org, tina.legrand@webrtc.org
Review URL: https://codereview.webrtc.org/1316523002 .
Cr-Commit-Position: refs/heads/master@{#11229}
2016-01-12 16:26:35 -08:00
|
|
|
const size_t kNumChannels = 1;
|
2015-06-25 12:28:48 -07:00
|
|
|
|
|
|
|
|
const double kDefaultVoiceValue = 1.0;
|
|
|
|
|
const double kNeutralProbability = 0.5;
|
|
|
|
|
const double kLowProbability = 0.01;
|
|
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
|
|
VoiceActivityDetector::VoiceActivityDetector()
|
|
|
|
|
: last_voice_probability_(kDefaultVoiceValue),
|
|
|
|
|
standalone_vad_(StandaloneVad::Create()) {
|
|
|
|
|
}
|
|
|
|
|
|
2016-08-26 14:50:38 -07:00
|
|
|
VoiceActivityDetector::~VoiceActivityDetector() = default;
|
|
|
|
|
|
2015-06-25 12:28:48 -07:00
|
|
|
// Because ISAC has a different chunk length, it updates
|
|
|
|
|
// |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.
|
|
|
|
|
// Otherwise it clears them.
|
|
|
|
|
void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
|
Update a ton of audio code to use size_t more correctly and in general reduce
use of int16_t/uint16_t.
This is the upshot of a recommendation by henrik.lundin and kwiberg on an original small change ( https://webrtc-codereview.appspot.com/42569004/#ps1 ) to stop using int16_t just because values could fit in it, and is similar in nature to a previous "mass change to use size_t more" ( https://webrtc-codereview.appspot.com/23129004/ ) which also needed to be split up for review but to land all at once, since, like adding "const", such changes tend to cause a lot of transitive effects.
This was be reviewed and approved in pieces:
https://codereview.webrtc.org/1224093003
https://codereview.webrtc.org/1224123002
https://codereview.webrtc.org/1224163002
https://codereview.webrtc.org/1225133003
https://codereview.webrtc.org/1225173002
https://codereview.webrtc.org/1227163003
https://codereview.webrtc.org/1227203003
https://codereview.webrtc.org/1227213002
https://codereview.webrtc.org/1227893002
https://codereview.webrtc.org/1228793004
https://codereview.webrtc.org/1228803003
https://codereview.webrtc.org/1228823002
https://codereview.webrtc.org/1228823003
https://codereview.webrtc.org/1228843002
https://codereview.webrtc.org/1230693002
https://codereview.webrtc.org/1231713002
The change is being landed as TBR to all the folks who reviewed the above.
BUG=chromium:81439
TEST=none
R=andrew@webrtc.org, pbos@webrtc.org
TBR=aluebs, andrew, asapersson, henrika, hlundin, jan.skoglund, kwiberg, minyue, pbos, pthatcher
Review URL: https://codereview.webrtc.org/1230503003 .
Cr-Commit-Position: refs/heads/master@{#9768}
2015-08-24 14:52:23 -07:00
|
|
|
size_t length,
|
2015-06-25 12:28:48 -07:00
|
|
|
int sample_rate_hz) {
|
2016-11-28 15:58:53 -08:00
|
|
|
RTC_DCHECK_EQ(length, sample_rate_hz / 100);
|
2015-06-25 12:28:48 -07:00
|
|
|
// Resample to the required rate.
|
|
|
|
|
const int16_t* resampled_ptr = audio;
|
|
|
|
|
if (sample_rate_hz != kSampleRateHz) {
|
2015-09-17 00:24:34 -07:00
|
|
|
RTC_CHECK_EQ(
|
2015-06-25 12:28:48 -07:00
|
|
|
resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),
|
|
|
|
|
0);
|
|
|
|
|
resampler_.Push(audio, length, resampled_, kLength10Ms, length);
|
|
|
|
|
resampled_ptr = resampled_;
|
|
|
|
|
}
|
2015-09-17 00:24:34 -07:00
|
|
|
RTC_DCHECK_EQ(length, kLength10Ms);
|
2015-06-25 12:28:48 -07:00
|
|
|
|
|
|
|
|
// Each chunk needs to be passed into |standalone_vad_|, because internally it
|
|
|
|
|
// buffers the audio and processes it all at once when GetActivity() is
|
|
|
|
|
// called.
|
2015-09-17 00:24:34 -07:00
|
|
|
RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);
|
2015-06-25 12:28:48 -07:00
|
|
|
|
|
|
|
|
audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);
|
|
|
|
|
|
|
|
|
|
chunkwise_voice_probabilities_.resize(features_.num_frames);
|
|
|
|
|
chunkwise_rms_.resize(features_.num_frames);
|
|
|
|
|
std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),
|
|
|
|
|
chunkwise_rms_.begin());
|
|
|
|
|
if (features_.num_frames > 0) {
|
|
|
|
|
if (features_.silence) {
|
|
|
|
|
// The other features are invalid, so set the voice probabilities to an
|
|
|
|
|
// arbitrary low value.
|
|
|
|
|
std::fill(chunkwise_voice_probabilities_.begin(),
|
|
|
|
|
chunkwise_voice_probabilities_.end(), kLowProbability);
|
|
|
|
|
} else {
|
|
|
|
|
std::fill(chunkwise_voice_probabilities_.begin(),
|
|
|
|
|
chunkwise_voice_probabilities_.end(), kNeutralProbability);
|
2015-09-17 00:24:34 -07:00
|
|
|
RTC_CHECK_GE(
|
2015-06-25 12:28:48 -07:00
|
|
|
standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],
|
|
|
|
|
chunkwise_voice_probabilities_.size()),
|
|
|
|
|
0);
|
2015-09-17 00:24:34 -07:00
|
|
|
RTC_CHECK_GE(pitch_based_vad_.VoicingProbability(
|
|
|
|
|
features_, &chunkwise_voice_probabilities_[0]),
|
|
|
|
|
0);
|
2015-06-25 12:28:48 -07:00
|
|
|
}
|
|
|
|
|
last_voice_probability_ = chunkwise_voice_probabilities_.back();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace webrtc
|