webrtc_m130/modules/audio_processing/vad/voice_activity_detector.h

/*
 *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
#define MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_

#include <memory>
#include <vector>

#include "common_audio/resampler/include/resampler.h"
#include "modules/audio_processing/vad/vad_audio_proc.h"
#include "modules/audio_processing/vad/common.h"
#include "modules/audio_processing/vad/pitch_based_vad.h"
#include "modules/audio_processing/vad/standalone_vad.h"

namespace webrtc {

// A Voice Activity Detector (VAD) that combines the voice probability from the
// StandaloneVad and PitchBasedVad to get a more robust estimation.
class VoiceActivityDetector {
 public:
  VoiceActivityDetector();
  ~VoiceActivityDetector();

  // Processes each audio chunk and estimates the voice probability.
  void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz);

  // Returns a vector of voice probabilities for each chunk. It can be empty for
  // some chunks, but it catches up afterwards returning multiple values at
  // once.
  const std::vector<double>& chunkwise_voice_probabilities() const {
    return chunkwise_voice_probabilities_;
  }

  // Returns a vector of RMS values for each chunk. It has the same length as
  // chunkwise_voice_probabilities().
  const std::vector<double>& chunkwise_rms() const { return chunkwise_rms_; }

  // Returns the last voice probability, regardless of the internal
  // implementation, although it has a few chunks of delay.
  float last_voice_probability() const { return last_voice_probability_; }

 private:
  // TODO(aluebs): Change these to float.
  std::vector<double> chunkwise_voice_probabilities_;
  std::vector<double> chunkwise_rms_;

  float last_voice_probability_;

  Resampler resampler_;
  VadAudioProc audio_processing_;

  std::unique_ptr<StandaloneVad> standalone_vad_;
  PitchBasedVad pitch_based_vad_;

  int16_t resampled_[kLength10Ms];
  AudioFeatures features_;
};

}  // namespace webrtc

#endif  // MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00			`/*`
			`* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.`
			`*`
			`* Use of this source code is governed by a BSD-style license`
			`* that can be found in the LICENSE file in the root of the source`
			`* tree. An additional intellectual property rights grant can be found`
			`* in the file PATENTS. All contributing project authors may`
			`* be found in the AUTHORS file in the root of the source tree.`
			`*/`

Fixing WebRTC after moving from src/webrtc to src/ In https://webrtc-review.googlesource.com/c/src/+/1560 we moved WebRTC from src/webrtc to src/ (in order to preserve an healthy git history). This CL takes care of fixing header guards, #include paths, etc... NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true TBR=tommi@webrtc.org Bug: chromium:611808 Change-Id: Iea91618212bee0af16aa3f05071eab8f93706578 Reviewed-on: https://webrtc-review.googlesource.com/1561 Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org> Reviewed-by: Henrik Kjellander <kjellander@webrtc.org> Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org> Cr-Commit-Position: refs/heads/master@{#19846} 2017-09-15 06:47:31 +02:00			`#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_`
			`#define MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00
Replace scoped_ptr with unique_ptr in webrtc/modules/audio_processing/vad/ BUG=webrtc:5520 Review URL: https://codereview.webrtc.org/1699003002 Cr-Commit-Position: refs/heads/master@{#11657} 2016-02-17 07:59:48 -08:00			`#include <memory>`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00			`#include <vector>`

Fixing WebRTC after moving from src/webrtc to src/ In https://webrtc-review.googlesource.com/c/src/+/1560 we moved WebRTC from src/webrtc to src/ (in order to preserve an healthy git history). This CL takes care of fixing header guards, #include paths, etc... NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true TBR=tommi@webrtc.org Bug: chromium:611808 Change-Id: Iea91618212bee0af16aa3f05071eab8f93706578 Reviewed-on: https://webrtc-review.googlesource.com/1561 Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org> Reviewed-by: Henrik Kjellander <kjellander@webrtc.org> Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org> Cr-Commit-Position: refs/heads/master@{#19846} 2017-09-15 06:47:31 +02:00			`#include "common_audio/resampler/include/resampler.h"`
			`#include "modules/audio_processing/vad/vad_audio_proc.h"`
			`#include "modules/audio_processing/vad/common.h"`
			`#include "modules/audio_processing/vad/pitch_based_vad.h"`
			`#include "modules/audio_processing/vad/standalone_vad.h"`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00
			`namespace webrtc {`

			`// A Voice Activity Detector (VAD) that combines the voice probability from the`
			`// StandaloneVad and PitchBasedVad to get a more robust estimation.`
			`class VoiceActivityDetector {`
			`public:`
			`VoiceActivityDetector();`
Fix Chromium clang plugin warnings BUG=webrtc:163 Review-Url: https://codereview.webrtc.org/2285713002 Cr-Commit-Position: refs/heads/master@{#13943} 2016-08-26 14:50:38 -07:00			`~VoiceActivityDetector();`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00
audio_processing VAD annotations in APM-qa. Added possibility to extract audio_processing VAD annotations in the Quality Assessment tool. Annotations are extracted into compressed Numpy 'annotations.npz' files. Annotations contain information about VAD, speech level, speech probabilities etc. TBR=alessiob@webrtc.org Bug: webrtc:7494 Change-Id: I0e54bb67132ae4e180f89959b8bca3ea7f259458 Reviewed-on: https://webrtc-review.googlesource.com/17840 Commit-Queue: Alex Loiko <aleloi@webrtc.org> Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Alex Loiko <aleloi@webrtc.org> Cr-Commit-Position: refs/heads/master@{#20581} 2017-11-07 10:51:20 +01:00			`// Processes each audio chunk and estimates the voice probability.`
Update a ton of audio code to use size_t more correctly and in general reduce use of int16_t/uint16_t. This is the upshot of a recommendation by henrik.lundin and kwiberg on an original small change ( https://webrtc-codereview.appspot.com/42569004/#ps1 ) to stop using int16_t just because values could fit in it, and is similar in nature to a previous "mass change to use size_t more" ( https://webrtc-codereview.appspot.com/23129004/ ) which also needed to be split up for review but to land all at once, since, like adding "const", such changes tend to cause a lot of transitive effects. This was be reviewed and approved in pieces: https://codereview.webrtc.org/1224093003 https://codereview.webrtc.org/1224123002 https://codereview.webrtc.org/1224163002 https://codereview.webrtc.org/1225133003 https://codereview.webrtc.org/1225173002 https://codereview.webrtc.org/1227163003 https://codereview.webrtc.org/1227203003 https://codereview.webrtc.org/1227213002 https://codereview.webrtc.org/1227893002 https://codereview.webrtc.org/1228793004 https://codereview.webrtc.org/1228803003 https://codereview.webrtc.org/1228823002 https://codereview.webrtc.org/1228823003 https://codereview.webrtc.org/1228843002 https://codereview.webrtc.org/1230693002 https://codereview.webrtc.org/1231713002 The change is being landed as TBR to all the folks who reviewed the above. BUG=chromium:81439 TEST=none R=andrew@webrtc.org, pbos@webrtc.org TBR=aluebs, andrew, asapersson, henrika, hlundin, jan.skoglund, kwiberg, minyue, pbos, pthatcher Review URL: https://codereview.webrtc.org/1230503003 . Cr-Commit-Position: refs/heads/master@{#9768} 2015-08-24 14:52:23 -07:00			`void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz);`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00
			`// Returns a vector of voice probabilities for each chunk. It can be empty for`
			`// some chunks, but it catches up afterwards returning multiple values at`
			`// once.`
			`const std::vector<double>& chunkwise_voice_probabilities() const {`
			`return chunkwise_voice_probabilities_;`
			`}`

			`// Returns a vector of RMS values for each chunk. It has the same length as`
			`// chunkwise_voice_probabilities().`
			`const std::vector<double>& chunkwise_rms() const { return chunkwise_rms_; }`

			`// Returns the last voice probability, regardless of the internal`
			`// implementation, although it has a few chunks of delay.`
			`float last_voice_probability() const { return last_voice_probability_; }`

			`private:`
			`// TODO(aluebs): Change these to float.`
			`std::vector<double> chunkwise_voice_probabilities_;`
			`std::vector<double> chunkwise_rms_;`

			`float last_voice_probability_;`

			`Resampler resampler_;`
			`VadAudioProc audio_processing_;`

Replace scoped_ptr with unique_ptr in webrtc/modules/audio_processing/vad/ BUG=webrtc:5520 Review URL: https://codereview.webrtc.org/1699003002 Cr-Commit-Position: refs/heads/master@{#11657} 2016-02-17 07:59:48 -08:00			`std::unique_ptr<StandaloneVad> standalone_vad_;`
Pull the Voice Activity Detector out from the AGC This change generates bit-exact values when running through audioproc_f than before. This change was originally uploaded here: * https://codereview.webrtc.org/1181933002/ * https://codereview.webrtc.org/1177043017/ And reverted because of an ASAN problem in Chrome here: * https://codereview.webrtc.org/1192863006/ * https://codereview.webrtc.org/1194963003/ TBR=andrew@webrtc.org Review URL: https://codereview.webrtc.org/1212543002 Cr-Commit-Position: refs/heads/master@{#9505} 2015-06-25 12:28:48 -07:00			`PitchBasedVad pitch_based_vad_;`

			`int16_t resampled_[kLength10Ms];`
			`AudioFeatures features_;`
			`};`

			`} // namespace webrtc`

Fixing WebRTC after moving from src/webrtc to src/ In https://webrtc-review.googlesource.com/c/src/+/1560 we moved WebRTC from src/webrtc to src/ (in order to preserve an healthy git history). This CL takes care of fixing header guards, #include paths, etc... NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true TBR=tommi@webrtc.org Bug: chromium:611808 Change-Id: Iea91618212bee0af16aa3f05071eab8f93706578 Reviewed-on: https://webrtc-review.googlesource.com/1561 Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org> Reviewed-by: Henrik Kjellander <kjellander@webrtc.org> Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org> Cr-Commit-Position: refs/heads/master@{#19846} 2017-09-15 06:47:31 +02:00			`#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_`