Spaces:
Running
Running
main : add -ocsv, aka --output-csv to output a CSV file
Browse filesAdds -ocsv, aka --output-csv feature to examples/main, which outputs a CSV file containing lines formatted as follows <startTime-in-integer-milliseconds>, <endTime-in-integer-milliseconds>, "<transcript-line-including-commas>".
- examples/main/main.cpp +36 -0
examples/main/main.cpp
CHANGED
|
@@ -69,6 +69,7 @@ struct whisper_params {
|
|
| 69 |
bool output_vtt = false;
|
| 70 |
bool output_srt = false;
|
| 71 |
bool output_wts = false;
|
|
|
|
| 72 |
bool print_special = false;
|
| 73 |
bool print_colors = false;
|
| 74 |
bool print_progress = false;
|
|
@@ -111,6 +112,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
| 111 |
else if (arg == "-ovtt" || arg == "--output-vtt") { params.output_vtt = true; }
|
| 112 |
else if (arg == "-osrt" || arg == "--output-srt") { params.output_srt = true; }
|
| 113 |
else if (arg == "-owts" || arg == "--output-words") { params.output_wts = true; }
|
|
|
|
| 114 |
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
| 115 |
else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; }
|
| 116 |
else if (arg == "-pp" || arg == "--print-progress") { params.print_progress = true; }
|
|
@@ -150,6 +152,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|
| 150 |
fprintf(stderr, " -ovtt, --output-vtt [%-7s] output result in a vtt file\n", params.output_vtt ? "true" : "false");
|
| 151 |
fprintf(stderr, " -osrt, --output-srt [%-7s] output result in a srt file\n", params.output_srt ? "true" : "false");
|
| 152 |
fprintf(stderr, " -owts, --output-words [%-7s] output script for generating karaoke video\n", params.output_wts ? "true" : "false");
|
|
|
|
| 153 |
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
| 154 |
fprintf(stderr, " -pc, --print-colors [%-7s] print colors\n", params.print_colors ? "true" : "false");
|
| 155 |
fprintf(stderr, " -pp, --print-progress [%-7s] print progress\n", params.print_progress ? "true" : "false");
|
|
@@ -325,6 +328,32 @@ bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_
|
|
| 325 |
return true;
|
| 326 |
}
|
| 327 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
// karaoke video generation
|
| 329 |
// outputs a bash script that uses ffmpeg to generate a video with the subtitles
|
| 330 |
// TODO: font parameter adjustments
|
|
@@ -674,6 +703,13 @@ int main(int argc, char ** argv) {
|
|
| 674 |
const auto fname_wts = fname_inp + ".wts";
|
| 675 |
output_wts(ctx, fname_wts.c_str(), fname_inp.c_str(), params, float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE);
|
| 676 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 677 |
}
|
| 678 |
}
|
| 679 |
|
|
|
|
| 69 |
bool output_vtt = false;
|
| 70 |
bool output_srt = false;
|
| 71 |
bool output_wts = false;
|
| 72 |
+
bool output_csv = false;
|
| 73 |
bool print_special = false;
|
| 74 |
bool print_colors = false;
|
| 75 |
bool print_progress = false;
|
|
|
|
| 112 |
else if (arg == "-ovtt" || arg == "--output-vtt") { params.output_vtt = true; }
|
| 113 |
else if (arg == "-osrt" || arg == "--output-srt") { params.output_srt = true; }
|
| 114 |
else if (arg == "-owts" || arg == "--output-words") { params.output_wts = true; }
|
| 115 |
+
else if (arg == "-ocsv" || arg == "--output-csv") { params.output_csv = true; }
|
| 116 |
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
| 117 |
else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; }
|
| 118 |
else if (arg == "-pp" || arg == "--print-progress") { params.print_progress = true; }
|
|
|
|
| 152 |
fprintf(stderr, " -ovtt, --output-vtt [%-7s] output result in a vtt file\n", params.output_vtt ? "true" : "false");
|
| 153 |
fprintf(stderr, " -osrt, --output-srt [%-7s] output result in a srt file\n", params.output_srt ? "true" : "false");
|
| 154 |
fprintf(stderr, " -owts, --output-words [%-7s] output script for generating karaoke video\n", params.output_wts ? "true" : "false");
|
| 155 |
+
fprintf(stderr, " -ocsv, --output-csv [%-7s] output result in a CSV file\n", params.output_csv ? "true" : "false");
|
| 156 |
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
| 157 |
fprintf(stderr, " -pc, --print-colors [%-7s] print colors\n", params.print_colors ? "true" : "false");
|
| 158 |
fprintf(stderr, " -pp, --print-progress [%-7s] print progress\n", params.print_progress ? "true" : "false");
|
|
|
|
| 328 |
return true;
|
| 329 |
}
|
| 330 |
|
| 331 |
+
bool output_csv(struct whisper_context * ctx, const char * fname) {
|
| 332 |
+
std::ofstream fout(fname);
|
| 333 |
+
if (!fout.is_open()) {
|
| 334 |
+
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
|
| 335 |
+
return false;
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
|
| 339 |
+
|
| 340 |
+
const int n_segments = whisper_full_n_segments(ctx);
|
| 341 |
+
for (int i = 0; i < n_segments; ++i) {
|
| 342 |
+
const char * text = whisper_full_get_segment_text(ctx, i);
|
| 343 |
+
if (text[0] == ' ')
|
| 344 |
+
text = text + sizeof(char); //whisper_full_get_segment_text() returns a string with leading space, point to the next character.
|
| 345 |
+
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
|
| 346 |
+
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
|
| 347 |
+
//need to multiply times returned from whisper_full_get_segment_t{0,1}() by 10 to get milliseconds.
|
| 348 |
+
fout << 10 * t0 << ", "
|
| 349 |
+
<< 10 * t1 << ", \""
|
| 350 |
+
<< text << "\"\n";
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
return true;
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
|
| 357 |
// karaoke video generation
|
| 358 |
// outputs a bash script that uses ffmpeg to generate a video with the subtitles
|
| 359 |
// TODO: font parameter adjustments
|
|
|
|
| 703 |
const auto fname_wts = fname_inp + ".wts";
|
| 704 |
output_wts(ctx, fname_wts.c_str(), fname_inp.c_str(), params, float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE);
|
| 705 |
}
|
| 706 |
+
|
| 707 |
+
// output to CSV file
|
| 708 |
+
if (params.output_csv) {
|
| 709 |
+
const auto fname_csv = fname_inp + ".csv";
|
| 710 |
+
output_csv(ctx, fname_csv.c_str());
|
| 711 |
+
}
|
| 712 |
+
|
| 713 |
}
|
| 714 |
}
|
| 715 |
|