-
Notifications
You must be signed in to change notification settings - Fork 1
/
vad.c
127 lines (108 loc) · 3.26 KB
/
vad.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#include <stdio.h>
#include <math.h>
// PROGRAM CONSTANTS
// program settings
const char *input_path = "inputaudio1.data";
const char *output_path = "output.data";
const size_t PACKET_LENGTH = 160;
// sample buffer
int8_t buffer[PACKET_LENGTH];
// vad settings
const double MIN_ENERGY = 0.9;
const double INCREASE_FACTOR = 0.0008;
const double FRAME_MARGIN = 5;
// vad algorithm support variables
// TODO struct
size_t n_frame = 0;
double emin, emax;
double delta;
size_t margin_frame_counter;
int vad(int8_t *buffer, size_t packet_length, size_t n_frame, size_t *mf_counter, double *emin, double *emax, double *delta)
{
// calculate energy of current frame (RMSE)
double current_energy = 0;
for (int i = 0; i < packet_length; i++)
{
current_energy += pow((double)buffer[i], 2);
}
current_energy = sqrt(current_energy / (double)packet_length);
// if current frame is the first, current energy is emax
if (n_frame == 0)
{
printf("first frame\n");
*emax = current_energy;
*emin = MIN_ENERGY;
*delta = 0;
*mf_counter = 0;
}
if (current_energy > *emax)
*emax = current_energy;
// if current energy is less than emin...
if (current_energy < *emin)
{
//... and it is zero
if (current_energy == 0)
{
// set emin back to its default value
*emin = MIN_ENERGY;
}
else
{
// otherwise set emin to current energy
*emin = current_energy;
}
}
// delta = 1;
printf("\ncurrent energy: %f\n", current_energy);
// calculate threshold
double lambda = (*emax - *emin) / (*emax);
double threshold = (1 - lambda) * (*emax) + lambda * (*emin);
printf("threshold: %f\n", threshold);
printf("lambda: %f\n", lambda);
printf("emin: %f\n", *emin);
printf("emax: %f\n", *emax);
printf("mf_count: %d\n", *mf_counter);
*emin += *emin * (*delta += INCREASE_FACTOR);
if (current_energy > threshold)
{
*mf_counter = 0;
return 1;
}
if (*mf_counter == FRAME_MARGIN)
{
return 0;
}
*mf_counter = *mf_counter + 1;
return 1;
}
int main()
{
FILE *input_file = fopen(input_path, "r");
FILE *output_file = fopen(output_path, "w");
int sample; // current sample
size_t count = 0;
int vad_val;
do
{
sample = fgetc(input_file);
buffer[count++] = (int8_t)sample; // current sample is cast to the size of a byte
if (count == PACKET_LENGTH - 1 || sample == EOF) // if buffer is filled, evaluate the packet
{
// printf("packet size: %d\n", count + 1);
vad_val = vad(buffer, count, n_frame++, &margin_frame_counter, &emin, &emax, &delta);
for (int i = 0; i < count + 1; i++)
{
if (vad_val) // if vad says it's a voice packet...
{
fputc((int)buffer[i], output_file); // ...write the sample to the output stream
}
else
{
fputc(0, output_file); // otherwise the packet is noise, write 0
}
}
printf("sent: %d\n", vad_val);
count = 0;
}
} while (sample != EOF);
}