Spaces:
Sleeping
Sleeping
Fix bug in FFT
Browse filesThe FFT routine does not work for odd N
Solution is to add DFT and use it when N is odd
main.cpp
CHANGED
|
@@ -1909,8 +1909,31 @@ whisper_vocab::id whisper_sample_timestamp(
|
|
| 1909 |
return probs_id[0].second;
|
| 1910 |
}
|
| 1911 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1912 |
// Cooley-Tukey FFT
|
| 1913 |
-
// poor man's
|
| 1914 |
// input is real-valued
|
| 1915 |
// output is complex-valued
|
| 1916 |
void fft(const std::vector<float> & in, std::vector<float> & out) {
|
|
@@ -1924,6 +1947,11 @@ void fft(const std::vector<float> & in, std::vector<float> & out) {
|
|
| 1924 |
return;
|
| 1925 |
}
|
| 1926 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1927 |
std::vector<float> even;
|
| 1928 |
std::vector<float> odd;
|
| 1929 |
|
|
@@ -2014,9 +2042,20 @@ bool log_mel_spectrogram(
|
|
| 2014 |
// FFT -> mag^2
|
| 2015 |
fft(fft_in, fft_out);
|
| 2016 |
|
| 2017 |
-
for (int j = 0; j <
|
| 2018 |
fft_out[j] = (fft_out[2*j + 0]*fft_out[2*j + 0] + fft_out[2*j + 1]*fft_out[2*j + 1]);
|
| 2019 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2020 |
|
| 2021 |
// mel spectrogram
|
| 2022 |
for (int j = 0; j < mel.n_mel; j++) {
|
|
@@ -2048,6 +2087,7 @@ bool log_mel_spectrogram(
|
|
| 2048 |
mmax = mel.data[i];
|
| 2049 |
}
|
| 2050 |
}
|
|
|
|
| 2051 |
|
| 2052 |
mmax -= 8.0;
|
| 2053 |
|
|
|
|
| 1909 |
return probs_id[0].second;
|
| 1910 |
}
|
| 1911 |
|
| 1912 |
+
// naive Discrete Fourier Transform
|
| 1913 |
+
// input is real-valued
|
| 1914 |
+
// output is complex-valued
|
| 1915 |
+
void dft(const std::vector<float> & in, std::vector<float> & out) {
|
| 1916 |
+
int N = in.size();
|
| 1917 |
+
|
| 1918 |
+
out.resize(N*2);
|
| 1919 |
+
|
| 1920 |
+
for (int k = 0; k < N; k++) {
|
| 1921 |
+
float re = 0;
|
| 1922 |
+
float im = 0;
|
| 1923 |
+
|
| 1924 |
+
for (int n = 0; n < N; n++) {
|
| 1925 |
+
float angle = 2*M_PI*k*n/N;
|
| 1926 |
+
re += in[n]*cos(angle);
|
| 1927 |
+
im -= in[n]*sin(angle);
|
| 1928 |
+
}
|
| 1929 |
+
|
| 1930 |
+
out[k*2 + 0] = re;
|
| 1931 |
+
out[k*2 + 1] = im;
|
| 1932 |
+
}
|
| 1933 |
+
}
|
| 1934 |
+
|
| 1935 |
// Cooley-Tukey FFT
|
| 1936 |
+
// poor man's implementation - use something better
|
| 1937 |
// input is real-valued
|
| 1938 |
// output is complex-valued
|
| 1939 |
void fft(const std::vector<float> & in, std::vector<float> & out) {
|
|
|
|
| 1947 |
return;
|
| 1948 |
}
|
| 1949 |
|
| 1950 |
+
if (N%2 == 1) {
|
| 1951 |
+
dft(in, out);
|
| 1952 |
+
return;
|
| 1953 |
+
}
|
| 1954 |
+
|
| 1955 |
std::vector<float> even;
|
| 1956 |
std::vector<float> odd;
|
| 1957 |
|
|
|
|
| 2042 |
// FFT -> mag^2
|
| 2043 |
fft(fft_in, fft_out);
|
| 2044 |
|
| 2045 |
+
for (int j = 0; j < fft_size; j++) {
|
| 2046 |
fft_out[j] = (fft_out[2*j + 0]*fft_out[2*j + 0] + fft_out[2*j + 1]*fft_out[2*j + 1]);
|
| 2047 |
}
|
| 2048 |
+
for (int j = 1; j < fft_size/2; j++) {
|
| 2049 |
+
//if (i == 0) {
|
| 2050 |
+
// printf("%d: %f %f\n", j, fft_out[j], fft_out[fft_size - j]);
|
| 2051 |
+
//}
|
| 2052 |
+
fft_out[j] += fft_out[fft_size - j];
|
| 2053 |
+
}
|
| 2054 |
+
if (i == 0) {
|
| 2055 |
+
//for (int j = 0; j < fft_size; j++) {
|
| 2056 |
+
// printf("%d: %e\n", j, fft_out[j]);
|
| 2057 |
+
//}
|
| 2058 |
+
}
|
| 2059 |
|
| 2060 |
// mel spectrogram
|
| 2061 |
for (int j = 0; j < mel.n_mel; j++) {
|
|
|
|
| 2087 |
mmax = mel.data[i];
|
| 2088 |
}
|
| 2089 |
}
|
| 2090 |
+
//printf("%s: max = %f\n", __func__, mmax);
|
| 2091 |
|
| 2092 |
mmax -= 8.0;
|
| 2093 |
|