webrtc_m130/modules/audio_processing/vad/voice_activity_detector.cc

/*
 *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "modules/audio_processing/vad/voice_activity_detector.h"

#include <algorithm>

#include "rtc_base/checks.h"

namespace webrtc {
namespace {

const size_t kNumChannels = 1;

const double kDefaultVoiceValue = 1.0;
const double kNeutralProbability = 0.5;
const double kLowProbability = 0.01;

}  // namespace

VoiceActivityDetector::VoiceActivityDetector()
    : last_voice_probability_(kDefaultVoiceValue),
      standalone_vad_(StandaloneVad::Create()) {
}

VoiceActivityDetector::~VoiceActivityDetector() = default;

// Because ISAC has a different chunk length, it updates
// |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.
// Otherwise it clears them.
void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
                                         size_t length,
                                         int sample_rate_hz) {
  RTC_DCHECK_EQ(length, sample_rate_hz / 100);
  // Resample to the required rate.
  const int16_t* resampled_ptr = audio;
  if (sample_rate_hz != kSampleRateHz) {
    RTC_CHECK_EQ(
        resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),
        0);
    resampler_.Push(audio, length, resampled_, kLength10Ms, length);
    resampled_ptr = resampled_;
  }
  RTC_DCHECK_EQ(length, kLength10Ms);

  // Each chunk needs to be passed into |standalone_vad_|, because internally it
  // buffers the audio and processes it all at once when GetActivity() is
  // called.
  RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);

  audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);

  chunkwise_voice_probabilities_.resize(features_.num_frames);
  chunkwise_rms_.resize(features_.num_frames);
  std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),
            chunkwise_rms_.begin());
  if (features_.num_frames > 0) {
    if (features_.silence) {
      // The other features are invalid, so set the voice probabilities to an
      // arbitrary low value.
      std::fill(chunkwise_voice_probabilities_.begin(),
                chunkwise_voice_probabilities_.end(), kLowProbability);
    } else {
      std::fill(chunkwise_voice_probabilities_.begin(),
                chunkwise_voice_probabilities_.end(), kNeutralProbability);
      RTC_CHECK_GE(
          standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],
                                       chunkwise_voice_probabilities_.size()),
          0);
      RTC_CHECK_GE(pitch_based_vad_.VoicingProbability(
                       features_, &chunkwise_voice_probabilities_[0]),
                   0);
    }
    last_voice_probability_ = chunkwise_voice_probabilities_.back();
  }
}

}  // namespace webrtc
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00			`/*`
			`* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.`
			`*`
			`* Use of this source code is governed by a BSD-style license`
			`* that can be found in the LICENSE file in the root of the source`
			`* tree. An additional intellectual property rights grant can be found`
			`* in the file PATENTS. All contributing project authors may`
			`* be found in the AUTHORS file in the root of the source tree.`
			`*/`

Fixing WebRTC after moving from src/webrtc to src/ In https://webrtc-review.googlesource.com/c/src/+/1560 we moved WebRTC from src/webrtc to src/ (in order to preserve an healthy git history). This CL takes care of fixing header guards, #include paths, etc... NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true TBR=tommi@webrtc.org Bug: chromium:611808 Change-Id: Iea91618212bee0af16aa3f05071eab8f93706578 Reviewed-on: https://webrtc-review.googlesource.com/1561 Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org> Reviewed-by: Henrik Kjellander <kjellander@webrtc.org> Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org> Cr-Commit-Position: refs/heads/master@{#19846} 2017-09-15 06:47:31 +02:00			`#include "modules/audio_processing/vad/voice_activity_detector.h"`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00
			`#include <algorithm>`

Fixing WebRTC after moving from src/webrtc to src/ In https://webrtc-review.googlesource.com/c/src/+/1560 we moved WebRTC from src/webrtc to src/ (in order to preserve an healthy git history). This CL takes care of fixing header guards, #include paths, etc... NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true TBR=tommi@webrtc.org Bug: chromium:611808 Change-Id: Iea91618212bee0af16aa3f05071eab8f93706578 Reviewed-on: https://webrtc-review.googlesource.com/1561 Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org> Reviewed-by: Henrik Kjellander <kjellander@webrtc.org> Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org> Cr-Commit-Position: refs/heads/master@{#19846} 2017-09-15 06:47:31 +02:00			`#include "rtc_base/checks.h"`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00
			`namespace webrtc {`
			`namespace {`

Convert channel counts to size_t. IIRC, this was originally requested by ajm during review of the other size_t conversions I did over the past year, and I agreed it made sense, but wanted to do it separately since those changes were already gargantuan. BUG=chromium:81439 TEST=none R=henrik.lundin@webrtc.org, henrika@webrtc.org, kjellander@webrtc.org, minyue@webrtc.org, perkj@webrtc.org, solenberg@webrtc.org, stefan@webrtc.org, tina.legrand@webrtc.org Review URL: https://codereview.webrtc.org/1316523002 . Cr-Commit-Position: refs/heads/master@{#11229} 2016-01-12 16:26:35 -08:00			`const size_t kNumChannels = 1;`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00
			`const double kDefaultVoiceValue = 1.0;`
			`const double kNeutralProbability = 0.5;`
			`const double kLowProbability = 0.01;`

			`} // namespace`

			`VoiceActivityDetector::VoiceActivityDetector()`
			`: last_voice_probability_(kDefaultVoiceValue),`
			`standalone_vad_(StandaloneVad::Create()) {`
			`}`

Fix Chromium clang plugin warnings BUG=webrtc:163 Review-Url: https://codereview.webrtc.org/2285713002 Cr-Commit-Position: refs/heads/master@{#13943} 2016-08-26 14:50:38 -07:00			`VoiceActivityDetector::~VoiceActivityDetector() = default;`

Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00			`// Because ISAC has a different chunk length, it updates`
			`// \|chunkwise_voice_probabilities_\| and \|chunkwise_rms_\| when there is new data.`
			`// Otherwise it clears them.`
			`void VoiceActivityDetector::ProcessChunk(const int16_t* audio,`
Update a ton of audio code to use size_t more correctly and in general reduce use of int16_t/uint16_t. This is the upshot of a recommendation by henrik.lundin and kwiberg on an original small change ( https://webrtc-codereview.appspot.com/42569004/#ps1 ) to stop using int16_t just because values could fit in it, and is similar in nature to a previous "mass change to use size_t more" ( https://webrtc-codereview.appspot.com/23129004/ ) which also needed to be split up for review but to land all at once, since, like adding "const", such changes tend to cause a lot of transitive effects. This was be reviewed and approved in pieces: https://codereview.webrtc.org/1224093003 https://codereview.webrtc.org/1224123002 https://codereview.webrtc.org/1224163002 https://codereview.webrtc.org/1225133003 https://codereview.webrtc.org/1225173002 https://codereview.webrtc.org/1227163003 https://codereview.webrtc.org/1227203003 https://codereview.webrtc.org/1227213002 https://codereview.webrtc.org/1227893002 https://codereview.webrtc.org/1228793004 https://codereview.webrtc.org/1228803003 https://codereview.webrtc.org/1228823002 https://codereview.webrtc.org/1228823003 https://codereview.webrtc.org/1228843002 https://codereview.webrtc.org/1230693002 https://codereview.webrtc.org/1231713002 The change is being landed as TBR to all the folks who reviewed the above. BUG=chromium:81439 TEST=none R=andrew@webrtc.org, pbos@webrtc.org TBR=aluebs, andrew, asapersson, henrika, hlundin, jan.skoglund, kwiberg, minyue, pbos, pthatcher Review URL: https://codereview.webrtc.org/1230503003 . Cr-Commit-Position: refs/heads/master@{#9768} 2015-08-24 14:52:23 -07:00			`size_t length,`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00			`int sample_rate_hz) {`
RTC_[D]CHECK_op: Remove superfluous casts There's no longer any need to make the two arguments have the same signedness, so we can remove a bunch of superfluous (and sometimes dangerous) casts. It turned out I also had to fix the safe_cmp functions to properly handle enums that are implicitly convertible to integers. NOPRESUBMIT=true BUG=webrtc:6645 Review-Url: https://codereview.webrtc.org/2534683002 Cr-Commit-Position: refs/heads/master@{#15281} 2016-11-28 15:58:53 -08:00			`RTC_DCHECK_EQ(length, sample_rate_hz / 100);`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00			`// Resample to the required rate.`
			`const int16_t* resampled_ptr = audio;`
			`if (sample_rate_hz != kSampleRateHz) {`
Add RTC_ prefix to (D)CHECKs and related macros. We must remove dependency on Chromium, i.e. we can't use Chromium's base/logging.h. That means we need to define these macros in WebRTC also when doing Chromium builds. And this causes redefinition. Alternative solutions: * Check if we already have defined e.g. CHECK, and don't define them in that case. This makes us depend on include order in Chromium, which is not acceptable. * Don't allow using the macros in WebRTC headers. Error prone since if someone adds it there by mistake it may compile fine, but later break if a header in added or order is changed in Chromium. That will be confusing and hard to enforce. * Ensure that headers that are included by an embedder don't include our macros. This would require some heavy refactoring to be maintainable and enforcable. * Changes in Chromium for this is obviously not an option. BUG=chromium:468375 NOTRY=true Review URL: https://codereview.webrtc.org/1335923002 Cr-Commit-Position: refs/heads/master@{#9964} 2015-09-17 00:24:34 -07:00			`RTC_CHECK_EQ(`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00			`resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),`
			`0);`
			`resampler_.Push(audio, length, resampled_, kLength10Ms, length);`
			`resampled_ptr = resampled_;`
			`}`
Add RTC_ prefix to (D)CHECKs and related macros. We must remove dependency on Chromium, i.e. we can't use Chromium's base/logging.h. That means we need to define these macros in WebRTC also when doing Chromium builds. And this causes redefinition. Alternative solutions: * Check if we already have defined e.g. CHECK, and don't define them in that case. This makes us depend on include order in Chromium, which is not acceptable. * Don't allow using the macros in WebRTC headers. Error prone since if someone adds it there by mistake it may compile fine, but later break if a header in added or order is changed in Chromium. That will be confusing and hard to enforce. * Ensure that headers that are included by an embedder don't include our macros. This would require some heavy refactoring to be maintainable and enforcable. * Changes in Chromium for this is obviously not an option. BUG=chromium:468375 NOTRY=true Review URL: https://codereview.webrtc.org/1335923002 Cr-Commit-Position: refs/heads/master@{#9964} 2015-09-17 00:24:34 -07:00			`RTC_DCHECK_EQ(length, kLength10Ms);`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00
			`// Each chunk needs to be passed into \|standalone_vad_\|, because internally it`
			`// buffers the audio and processes it all at once when GetActivity() is`
			`// called.`
Add RTC_ prefix to (D)CHECKs and related macros. We must remove dependency on Chromium, i.e. we can't use Chromium's base/logging.h. That means we need to define these macros in WebRTC also when doing Chromium builds. And this causes redefinition. Alternative solutions: * Check if we already have defined e.g. CHECK, and don't define them in that case. This makes us depend on include order in Chromium, which is not acceptable. * Don't allow using the macros in WebRTC headers. Error prone since if someone adds it there by mistake it may compile fine, but later break if a header in added or order is changed in Chromium. That will be confusing and hard to enforce. * Ensure that headers that are included by an embedder don't include our macros. This would require some heavy refactoring to be maintainable and enforcable. * Changes in Chromium for this is obviously not an option. BUG=chromium:468375 NOTRY=true Review URL: https://codereview.webrtc.org/1335923002 Cr-Commit-Position: refs/heads/master@{#9964} 2015-09-17 00:24:34 -07:00			`RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00
			`audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);`

			`chunkwise_voice_probabilities_.resize(features_.num_frames);`
			`chunkwise_rms_.resize(features_.num_frames);`
			`std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),`
			`chunkwise_rms_.begin());`
			`if (features_.num_frames > 0) {`
			`if (features_.silence) {`
			`// The other features are invalid, so set the voice probabilities to an`
			`// arbitrary low value.`
			`std::fill(chunkwise_voice_probabilities_.begin(),`
			`chunkwise_voice_probabilities_.end(), kLowProbability);`
			`} else {`
			`std::fill(chunkwise_voice_probabilities_.begin(),`
			`chunkwise_voice_probabilities_.end(), kNeutralProbability);`
Add RTC_ prefix to (D)CHECKs and related macros. We must remove dependency on Chromium, i.e. we can't use Chromium's base/logging.h. That means we need to define these macros in WebRTC also when doing Chromium builds. And this causes redefinition. Alternative solutions: * Check if we already have defined e.g. CHECK, and don't define them in that case. This makes us depend on include order in Chromium, which is not acceptable. * Don't allow using the macros in WebRTC headers. Error prone since if someone adds it there by mistake it may compile fine, but later break if a header in added or order is changed in Chromium. That will be confusing and hard to enforce. * Ensure that headers that are included by an embedder don't include our macros. This would require some heavy refactoring to be maintainable and enforcable. * Changes in Chromium for this is obviously not an option. BUG=chromium:468375 NOTRY=true Review URL: https://codereview.webrtc.org/1335923002 Cr-Commit-Position: refs/heads/master@{#9964} 2015-09-17 00:24:34 -07:00			`RTC_CHECK_GE(`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00			`standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],`
			`chunkwise_voice_probabilities_.size()),`
			`0);`
Add RTC_ prefix to (D)CHECKs and related macros. We must remove dependency on Chromium, i.e. we can't use Chromium's base/logging.h. That means we need to define these macros in WebRTC also when doing Chromium builds. And this causes redefinition. Alternative solutions: * Check if we already have defined e.g. CHECK, and don't define them in that case. This makes us depend on include order in Chromium, which is not acceptable. * Don't allow using the macros in WebRTC headers. Error prone since if someone adds it there by mistake it may compile fine, but later break if a header in added or order is changed in Chromium. That will be confusing and hard to enforce. * Ensure that headers that are included by an embedder don't include our macros. This would require some heavy refactoring to be maintainable and enforcable. * Changes in Chromium for this is obviously not an option. BUG=chromium:468375 NOTRY=true Review URL: https://codereview.webrtc.org/1335923002 Cr-Commit-Position: refs/heads/master@{#9964} 2015-09-17 00:24:34 -07:00			`RTC_CHECK_GE(pitch_based_vad_.VoicingProbability(`
			`features_, &chunkwise_voice_probabilities_[0]),`
			`0);`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00			`}`
			`last_voice_probability_ = chunkwise_voice_probabilities_.back();`
			`}`
			`}`

			`} // namespace webrtc`