2011-07-07 08:21:25 +00:00
|
|
|
/*
|
2012-01-25 12:18:12 +00:00
|
|
|
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
2011-07-07 08:21:25 +00:00
|
|
|
*
|
|
|
|
|
* Use of this source code is governed by a BSD-style license
|
|
|
|
|
* that can be found in the LICENSE file in the root of the source
|
|
|
|
|
* tree. An additional intellectual property rights grant can be found
|
|
|
|
|
* in the file PATENTS. All contributing project authors may
|
|
|
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This header file includes the descriptions of the core VAD calls.
|
|
|
|
|
*/
|
|
|
|
|
|
2017-09-15 06:47:31 +02:00
|
|
|
#ifndef COMMON_AUDIO_VAD_VAD_CORE_H_
|
|
|
|
|
#define COMMON_AUDIO_VAD_VAD_CORE_H_
|
2011-07-07 08:21:25 +00:00
|
|
|
|
2017-09-15 06:47:31 +02:00
|
|
|
#include "common_audio/signal_processing/include/signal_processing_library.h"
|
2012-03-20 12:53:06 +00:00
|
|
|
|
|
|
|
|
enum { kNumChannels = 6 }; // Number of frequency bands (named channels).
|
|
|
|
|
enum { kNumGaussians = 2 }; // Number of Gaussians per channel in the GMM.
|
|
|
|
|
enum { kTableSize = kNumChannels * kNumGaussians };
|
|
|
|
|
enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal.
|
2011-07-07 08:21:25 +00:00
|
|
|
|
2017-03-09 06:25:06 -08:00
|
|
|
typedef struct VadInstT_ {
|
2012-06-19 11:03:32 +00:00
|
|
|
int vad;
|
2012-03-29 12:09:44 +00:00
|
|
|
int32_t downsampling_filter_states[4];
|
2012-10-15 17:46:19 +00:00
|
|
|
WebRtcSpl_State48khzTo8khz state_48_to_8;
|
2012-03-29 12:09:44 +00:00
|
|
|
int16_t noise_means[kTableSize];
|
|
|
|
|
int16_t speech_means[kTableSize];
|
|
|
|
|
int16_t noise_stds[kTableSize];
|
|
|
|
|
int16_t speech_stds[kTableSize];
|
2021-07-26 12:15:29 +02:00
|
|
|
// TODO(bjornv): Change to `frame_count`.
|
2012-03-29 12:09:44 +00:00
|
|
|
int32_t frame_counter;
|
2017-03-09 06:25:06 -08:00
|
|
|
int16_t over_hang; // Over Hang
|
2012-03-29 12:09:44 +00:00
|
|
|
int16_t num_of_speech;
|
2021-07-26 12:15:29 +02:00
|
|
|
// TODO(bjornv): Change to `age_vector`.
|
2012-03-29 12:09:44 +00:00
|
|
|
int16_t index_vector[16 * kNumChannels];
|
|
|
|
|
int16_t low_value_vector[16 * kNumChannels];
|
2021-07-26 12:15:29 +02:00
|
|
|
// TODO(bjornv): Change to `median`.
|
2012-03-29 12:09:44 +00:00
|
|
|
int16_t mean_value[kNumChannels];
|
|
|
|
|
int16_t upper_state[5];
|
|
|
|
|
int16_t lower_state[5];
|
|
|
|
|
int16_t hp_filter_state[4];
|
|
|
|
|
int16_t over_hang_max_1[3];
|
|
|
|
|
int16_t over_hang_max_2[3];
|
|
|
|
|
int16_t individual[3];
|
|
|
|
|
int16_t total[3];
|
2011-07-07 08:21:25 +00:00
|
|
|
|
2012-01-25 12:18:12 +00:00
|
|
|
int init_flag;
|
2011-07-07 08:21:25 +00:00
|
|
|
} VadInstT;
|
|
|
|
|
|
2012-01-25 12:18:12 +00:00
|
|
|
// Initializes the core VAD component. The default aggressiveness mode is
|
2021-07-26 12:15:29 +02:00
|
|
|
// controlled by `kDefaultMode` in vad_core.c.
|
2012-01-25 12:18:12 +00:00
|
|
|
//
|
|
|
|
|
// - self [i/o] : Instance that should be initialized
|
|
|
|
|
//
|
2017-02-26 04:18:12 -08:00
|
|
|
// returns : 0 (OK), -1 (null pointer in or if the default mode can't be
|
2012-01-25 12:18:12 +00:00
|
|
|
// set)
|
|
|
|
|
int WebRtcVad_InitCore(VadInstT* self);
|
2011-07-07 08:21:25 +00:00
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
|
* WebRtcVad_set_mode_core(...)
|
|
|
|
|
*
|
|
|
|
|
* This function changes the VAD settings
|
|
|
|
|
*
|
|
|
|
|
* Input:
|
|
|
|
|
* - inst : VAD instance
|
|
|
|
|
* - mode : Aggressiveness degree
|
|
|
|
|
* 0 (High quality) - 3 (Highly aggressive)
|
|
|
|
|
*
|
|
|
|
|
* Output:
|
|
|
|
|
* - inst : Changed instance
|
|
|
|
|
*
|
|
|
|
|
* Return value : 0 - Ok
|
|
|
|
|
* -1 - Error
|
|
|
|
|
*/
|
|
|
|
|
|
2012-03-20 12:53:06 +00:00
|
|
|
int WebRtcVad_set_mode_core(VadInstT* self, int mode);
|
2011-07-07 08:21:25 +00:00
|
|
|
|
|
|
|
|
/****************************************************************************
|
2012-10-15 17:46:19 +00:00
|
|
|
* WebRtcVad_CalcVad48khz(...)
|
2014-04-30 16:44:13 +00:00
|
|
|
* WebRtcVad_CalcVad32khz(...)
|
|
|
|
|
* WebRtcVad_CalcVad16khz(...)
|
|
|
|
|
* WebRtcVad_CalcVad8khz(...)
|
2011-07-07 08:21:25 +00:00
|
|
|
*
|
|
|
|
|
* Calculate probability for active speech and make VAD decision.
|
|
|
|
|
*
|
|
|
|
|
* Input:
|
|
|
|
|
* - inst : Instance that should be initialized
|
|
|
|
|
* - speech_frame : Input speech frame
|
|
|
|
|
* - frame_length : Number of input samples
|
|
|
|
|
*
|
|
|
|
|
* Output:
|
|
|
|
|
* - inst : Updated filter states etc.
|
|
|
|
|
*
|
|
|
|
|
* Return value : VAD decision
|
|
|
|
|
* 0 - No active speech
|
|
|
|
|
* 1-6 - Active speech
|
|
|
|
|
*/
|
2014-04-30 16:44:13 +00:00
|
|
|
int WebRtcVad_CalcVad48khz(VadInstT* inst,
|
|
|
|
|
const int16_t* speech_frame,
|
Update a ton of audio code to use size_t more correctly and in general reduce
use of int16_t/uint16_t.
This is the upshot of a recommendation by henrik.lundin and kwiberg on an original small change ( https://webrtc-codereview.appspot.com/42569004/#ps1 ) to stop using int16_t just because values could fit in it, and is similar in nature to a previous "mass change to use size_t more" ( https://webrtc-codereview.appspot.com/23129004/ ) which also needed to be split up for review but to land all at once, since, like adding "const", such changes tend to cause a lot of transitive effects.
This was be reviewed and approved in pieces:
https://codereview.webrtc.org/1224093003
https://codereview.webrtc.org/1224123002
https://codereview.webrtc.org/1224163002
https://codereview.webrtc.org/1225133003
https://codereview.webrtc.org/1225173002
https://codereview.webrtc.org/1227163003
https://codereview.webrtc.org/1227203003
https://codereview.webrtc.org/1227213002
https://codereview.webrtc.org/1227893002
https://codereview.webrtc.org/1228793004
https://codereview.webrtc.org/1228803003
https://codereview.webrtc.org/1228823002
https://codereview.webrtc.org/1228823003
https://codereview.webrtc.org/1228843002
https://codereview.webrtc.org/1230693002
https://codereview.webrtc.org/1231713002
The change is being landed as TBR to all the folks who reviewed the above.
BUG=chromium:81439
TEST=none
R=andrew@webrtc.org, pbos@webrtc.org
TBR=aluebs, andrew, asapersson, henrika, hlundin, jan.skoglund, kwiberg, minyue, pbos, pthatcher
Review URL: https://codereview.webrtc.org/1230503003 .
Cr-Commit-Position: refs/heads/master@{#9768}
2015-08-24 14:52:23 -07:00
|
|
|
size_t frame_length);
|
2014-04-30 16:44:13 +00:00
|
|
|
int WebRtcVad_CalcVad32khz(VadInstT* inst,
|
|
|
|
|
const int16_t* speech_frame,
|
Update a ton of audio code to use size_t more correctly and in general reduce
use of int16_t/uint16_t.
This is the upshot of a recommendation by henrik.lundin and kwiberg on an original small change ( https://webrtc-codereview.appspot.com/42569004/#ps1 ) to stop using int16_t just because values could fit in it, and is similar in nature to a previous "mass change to use size_t more" ( https://webrtc-codereview.appspot.com/23129004/ ) which also needed to be split up for review but to land all at once, since, like adding "const", such changes tend to cause a lot of transitive effects.
This was be reviewed and approved in pieces:
https://codereview.webrtc.org/1224093003
https://codereview.webrtc.org/1224123002
https://codereview.webrtc.org/1224163002
https://codereview.webrtc.org/1225133003
https://codereview.webrtc.org/1225173002
https://codereview.webrtc.org/1227163003
https://codereview.webrtc.org/1227203003
https://codereview.webrtc.org/1227213002
https://codereview.webrtc.org/1227893002
https://codereview.webrtc.org/1228793004
https://codereview.webrtc.org/1228803003
https://codereview.webrtc.org/1228823002
https://codereview.webrtc.org/1228823003
https://codereview.webrtc.org/1228843002
https://codereview.webrtc.org/1230693002
https://codereview.webrtc.org/1231713002
The change is being landed as TBR to all the folks who reviewed the above.
BUG=chromium:81439
TEST=none
R=andrew@webrtc.org, pbos@webrtc.org
TBR=aluebs, andrew, asapersson, henrika, hlundin, jan.skoglund, kwiberg, minyue, pbos, pthatcher
Review URL: https://codereview.webrtc.org/1230503003 .
Cr-Commit-Position: refs/heads/master@{#9768}
2015-08-24 14:52:23 -07:00
|
|
|
size_t frame_length);
|
2014-04-30 16:44:13 +00:00
|
|
|
int WebRtcVad_CalcVad16khz(VadInstT* inst,
|
|
|
|
|
const int16_t* speech_frame,
|
Update a ton of audio code to use size_t more correctly and in general reduce
use of int16_t/uint16_t.
This is the upshot of a recommendation by henrik.lundin and kwiberg on an original small change ( https://webrtc-codereview.appspot.com/42569004/#ps1 ) to stop using int16_t just because values could fit in it, and is similar in nature to a previous "mass change to use size_t more" ( https://webrtc-codereview.appspot.com/23129004/ ) which also needed to be split up for review but to land all at once, since, like adding "const", such changes tend to cause a lot of transitive effects.
This was be reviewed and approved in pieces:
https://codereview.webrtc.org/1224093003
https://codereview.webrtc.org/1224123002
https://codereview.webrtc.org/1224163002
https://codereview.webrtc.org/1225133003
https://codereview.webrtc.org/1225173002
https://codereview.webrtc.org/1227163003
https://codereview.webrtc.org/1227203003
https://codereview.webrtc.org/1227213002
https://codereview.webrtc.org/1227893002
https://codereview.webrtc.org/1228793004
https://codereview.webrtc.org/1228803003
https://codereview.webrtc.org/1228823002
https://codereview.webrtc.org/1228823003
https://codereview.webrtc.org/1228843002
https://codereview.webrtc.org/1230693002
https://codereview.webrtc.org/1231713002
The change is being landed as TBR to all the folks who reviewed the above.
BUG=chromium:81439
TEST=none
R=andrew@webrtc.org, pbos@webrtc.org
TBR=aluebs, andrew, asapersson, henrika, hlundin, jan.skoglund, kwiberg, minyue, pbos, pthatcher
Review URL: https://codereview.webrtc.org/1230503003 .
Cr-Commit-Position: refs/heads/master@{#9768}
2015-08-24 14:52:23 -07:00
|
|
|
size_t frame_length);
|
2014-04-30 16:44:13 +00:00
|
|
|
int WebRtcVad_CalcVad8khz(VadInstT* inst,
|
|
|
|
|
const int16_t* speech_frame,
|
Update a ton of audio code to use size_t more correctly and in general reduce
use of int16_t/uint16_t.
This is the upshot of a recommendation by henrik.lundin and kwiberg on an original small change ( https://webrtc-codereview.appspot.com/42569004/#ps1 ) to stop using int16_t just because values could fit in it, and is similar in nature to a previous "mass change to use size_t more" ( https://webrtc-codereview.appspot.com/23129004/ ) which also needed to be split up for review but to land all at once, since, like adding "const", such changes tend to cause a lot of transitive effects.
This was be reviewed and approved in pieces:
https://codereview.webrtc.org/1224093003
https://codereview.webrtc.org/1224123002
https://codereview.webrtc.org/1224163002
https://codereview.webrtc.org/1225133003
https://codereview.webrtc.org/1225173002
https://codereview.webrtc.org/1227163003
https://codereview.webrtc.org/1227203003
https://codereview.webrtc.org/1227213002
https://codereview.webrtc.org/1227893002
https://codereview.webrtc.org/1228793004
https://codereview.webrtc.org/1228803003
https://codereview.webrtc.org/1228823002
https://codereview.webrtc.org/1228823003
https://codereview.webrtc.org/1228843002
https://codereview.webrtc.org/1230693002
https://codereview.webrtc.org/1231713002
The change is being landed as TBR to all the folks who reviewed the above.
BUG=chromium:81439
TEST=none
R=andrew@webrtc.org, pbos@webrtc.org
TBR=aluebs, andrew, asapersson, henrika, hlundin, jan.skoglund, kwiberg, minyue, pbos, pthatcher
Review URL: https://codereview.webrtc.org/1230503003 .
Cr-Commit-Position: refs/heads/master@{#9768}
2015-08-24 14:52:23 -07:00
|
|
|
size_t frame_length);
|
2011-07-07 08:21:25 +00:00
|
|
|
|
2017-09-15 06:47:31 +02:00
|
|
|
#endif // COMMON_AUDIO_VAD_VAD_CORE_H_
|