Spaces:
Sleeping
Sleeping
main : make whisper_print_segment_callback() more readable (close #371)
Browse files- examples/main/main.cpp +53 -61
examples/main/main.cpp
CHANGED
|
@@ -176,90 +176,82 @@ void whisper_print_segment_callback(struct whisper_context * ctx, int n_new, voi
|
|
| 176 |
|
| 177 |
const int n_segments = whisper_full_n_segments(ctx);
|
| 178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
// print the last n_new segments
|
| 180 |
const int s0 = n_segments - n_new;
|
|
|
|
| 181 |
if (s0 == 0) {
|
| 182 |
printf("\n");
|
| 183 |
}
|
| 184 |
|
| 185 |
for (int i = s0; i < n_segments; i++) {
|
| 186 |
-
if (params.no_timestamps) {
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
const whisper_token id = whisper_full_get_token_id(ctx, i, j);
|
| 191 |
-
if (id >= whisper_token_eot(ctx)) {
|
| 192 |
-
continue;
|
| 193 |
-
}
|
| 194 |
-
}
|
| 195 |
|
| 196 |
-
|
| 197 |
-
|
|
|
|
| 198 |
|
| 199 |
-
|
| 200 |
|
| 201 |
-
|
| 202 |
-
}
|
| 203 |
-
} else {
|
| 204 |
-
const char * text = whisper_full_get_segment_text(ctx, i);
|
| 205 |
-
printf("%s", text);
|
| 206 |
-
}
|
| 207 |
-
fflush(stdout);
|
| 208 |
-
} else {
|
| 209 |
-
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
|
| 210 |
-
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
|
| 211 |
|
| 212 |
-
|
|
|
|
| 213 |
|
| 214 |
-
|
| 215 |
-
|
| 216 |
|
| 217 |
-
|
| 218 |
-
|
|
|
|
|
|
|
| 219 |
|
| 220 |
-
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
energy1 += fabs(pcmf32s[1][j]);
|
| 226 |
-
}
|
| 227 |
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
|
|
|
| 234 |
}
|
| 235 |
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
if (params.print_colors) {
|
| 240 |
-
printf("[%s --> %s] ", to_timestamp(t0).c_str(), to_timestamp(t1).c_str());
|
| 241 |
-
for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
|
| 242 |
-
if (params.print_special == false) {
|
| 243 |
-
const whisper_token id = whisper_full_get_token_id(ctx, i, j);
|
| 244 |
-
if (id >= whisper_token_eot(ctx)) {
|
| 245 |
-
continue;
|
| 246 |
-
}
|
| 247 |
-
}
|
| 248 |
|
| 249 |
-
|
| 250 |
-
const float p = whisper_full_get_token_p (ctx, i, j);
|
| 251 |
|
| 252 |
-
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
printf("\n");
|
| 257 |
-
} else {
|
| 258 |
-
const char * text = whisper_full_get_segment_text(ctx, i);
|
| 259 |
|
| 260 |
-
|
| 261 |
-
|
|
|
|
| 262 |
}
|
|
|
|
|
|
|
| 263 |
}
|
| 264 |
}
|
| 265 |
|
|
|
|
| 176 |
|
| 177 |
const int n_segments = whisper_full_n_segments(ctx);
|
| 178 |
|
| 179 |
+
std::string speaker = "";
|
| 180 |
+
|
| 181 |
+
int64_t t0;
|
| 182 |
+
int64_t t1;
|
| 183 |
+
|
| 184 |
// print the last n_new segments
|
| 185 |
const int s0 = n_segments - n_new;
|
| 186 |
+
|
| 187 |
if (s0 == 0) {
|
| 188 |
printf("\n");
|
| 189 |
}
|
| 190 |
|
| 191 |
for (int i = s0; i < n_segments; i++) {
|
| 192 |
+
if (!params.no_timestamps || params.diarize) {
|
| 193 |
+
t0 = whisper_full_get_segment_t0(ctx, i);
|
| 194 |
+
t1 = whisper_full_get_segment_t1(ctx, i);
|
| 195 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
+
if (!params.no_timestamps) {
|
| 198 |
+
printf("[%s --> %s] ", to_timestamp(t0).c_str(), to_timestamp(t1).c_str());
|
| 199 |
+
}
|
| 200 |
|
| 201 |
+
if (params.diarize && pcmf32s.size() == 2) {
|
| 202 |
|
| 203 |
+
const int64_t n_samples = pcmf32s[0].size();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
+
const int64_t is0 = timestamp_to_sample(t0, n_samples);
|
| 206 |
+
const int64_t is1 = timestamp_to_sample(t1, n_samples);
|
| 207 |
|
| 208 |
+
double energy0 = 0.0f;
|
| 209 |
+
double energy1 = 0.0f;
|
| 210 |
|
| 211 |
+
for (int64_t j = is0; j < is1; j++) {
|
| 212 |
+
energy0 += fabs(pcmf32s[0][j]);
|
| 213 |
+
energy1 += fabs(pcmf32s[1][j]);
|
| 214 |
+
}
|
| 215 |
|
| 216 |
+
if (energy0 > 1.1*energy1) {
|
| 217 |
+
speaker = "(speaker 0)";
|
| 218 |
+
} else if (energy1 > 1.1*energy0) {
|
| 219 |
+
speaker = "(speaker 1)";
|
| 220 |
+
} else {
|
| 221 |
+
speaker = "(speaker ?)";
|
| 222 |
+
}
|
| 223 |
|
| 224 |
+
//printf("is0 = %lld, is1 = %lld, energy0 = %f, energy1 = %f, %s\n", is0, is1, energy0, energy1, speaker.c_str());
|
| 225 |
+
}
|
|
|
|
|
|
|
| 226 |
|
| 227 |
+
if (params.print_colors) {
|
| 228 |
+
for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
|
| 229 |
+
if (params.print_special == false) {
|
| 230 |
+
const whisper_token id = whisper_full_get_token_id(ctx, i, j);
|
| 231 |
+
if (id >= whisper_token_eot(ctx)) {
|
| 232 |
+
continue;
|
| 233 |
+
}
|
| 234 |
}
|
| 235 |
|
| 236 |
+
const char * text = whisper_full_get_token_text(ctx, i, j);
|
| 237 |
+
const float p = whisper_full_get_token_p (ctx, i, j);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
+
const int col = std::max(0, std::min((int) k_colors.size(), (int) (std::pow(p, 3)*float(k_colors.size()))));
|
|
|
|
| 240 |
|
| 241 |
+
printf("%s%s%s%s", speaker.c_str(), k_colors[col].c_str(), text, "\033[0m");
|
| 242 |
+
}
|
| 243 |
+
} else {
|
| 244 |
+
const char * text = whisper_full_get_segment_text(ctx, i);
|
| 245 |
|
| 246 |
+
printf("%s%s", speaker.c_str(), text);
|
| 247 |
+
}
|
|
|
|
|
|
|
|
|
|
| 248 |
|
| 249 |
+
// with timestamps or speakers: each segment on new line
|
| 250 |
+
if (!params.no_timestamps || params.diarize) {
|
| 251 |
+
printf("\n");
|
| 252 |
}
|
| 253 |
+
|
| 254 |
+
fflush(stdout);
|
| 255 |
}
|
| 256 |
}
|
| 257 |
|