Spaces:
Running
Running
main : provide option for creating JSON output (#615)
Browse files* examples : provide option for exporting also as JSON file (ggerganov/whisper.cpp#614)
* main : remove leftovers
---------
Co-authored-by: Georgi Gerganov <[email protected]>
- examples/main/README.md +1 -0
- examples/main/main.cpp +132 -0
- whisper.cpp +66 -1
- whisper.h +15 -0
examples/main/README.md
CHANGED
|
@@ -31,6 +31,7 @@ options:
|
|
| 31 |
-osrt, --output-srt [false ] output result in a srt file
|
| 32 |
-owts, --output-words [false ] output script for generating karaoke video
|
| 33 |
-ocsv, --output-csv [false ] output result in a CSV file
|
|
|
|
| 34 |
-of FNAME, --output-file FNAME [ ] output file path (without file extension)
|
| 35 |
-ps, --print-special [false ] print special tokens
|
| 36 |
-pc, --print-colors [false ] print colors
|
|
|
|
| 31 |
-osrt, --output-srt [false ] output result in a srt file
|
| 32 |
-owts, --output-words [false ] output script for generating karaoke video
|
| 33 |
-ocsv, --output-csv [false ] output result in a CSV file
|
| 34 |
+
-oj, --output-json [false ] output result in a JSON file
|
| 35 |
-of FNAME, --output-file FNAME [ ] output file path (without file extension)
|
| 36 |
-ps, --print-special [false ] print special tokens
|
| 37 |
-pc, --print-colors [false ] print colors
|
examples/main/main.cpp
CHANGED
|
@@ -73,6 +73,7 @@ struct whisper_params {
|
|
| 73 |
bool output_srt = false;
|
| 74 |
bool output_wts = false;
|
| 75 |
bool output_csv = false;
|
|
|
|
| 76 |
bool print_special = false;
|
| 77 |
bool print_colors = false;
|
| 78 |
bool print_progress = false;
|
|
@@ -130,6 +131,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
| 130 |
else if (arg == "-owts" || arg == "--output-words") { params.output_wts = true; }
|
| 131 |
else if (arg == "-fp" || arg == "--font-path") { params.font_path = argv[++i]; }
|
| 132 |
else if (arg == "-ocsv" || arg == "--output-csv") { params.output_csv = true; }
|
|
|
|
| 133 |
else if (arg == "-of" || arg == "--output-file") { params.fname_out.emplace_back(argv[++i]); }
|
| 134 |
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
| 135 |
else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; }
|
|
@@ -178,6 +180,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
|
|
| 178 |
fprintf(stderr, " -owts, --output-words [%-7s] output script for generating karaoke video\n", params.output_wts ? "true" : "false");
|
| 179 |
fprintf(stderr, " -fp, --font-path [%-7s] path to a monospace font for karaoke video\n", params.font_path.c_str());
|
| 180 |
fprintf(stderr, " -ocsv, --output-csv [%-7s] output result in a CSV file\n", params.output_csv ? "true" : "false");
|
|
|
|
| 181 |
fprintf(stderr, " -of FNAME, --output-file FNAME [%-7s] output file path (without file extension)\n", "");
|
| 182 |
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
| 183 |
fprintf(stderr, " -pc, --print-colors [%-7s] print colors\n", params.print_colors ? "true" : "false");
|
|
@@ -368,6 +371,129 @@ bool output_csv(struct whisper_context * ctx, const char * fname) {
|
|
| 368 |
return true;
|
| 369 |
}
|
| 370 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
// karaoke video generation
|
| 372 |
// outputs a bash script that uses ffmpeg to generate a video with the subtitles
|
| 373 |
// TODO: font parameter adjustments
|
|
@@ -662,6 +788,12 @@ int main(int argc, char ** argv) {
|
|
| 662 |
const auto fname_csv = fname_out + ".csv";
|
| 663 |
output_csv(ctx, fname_csv.c_str());
|
| 664 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 665 |
}
|
| 666 |
}
|
| 667 |
|
|
|
|
| 73 |
bool output_srt = false;
|
| 74 |
bool output_wts = false;
|
| 75 |
bool output_csv = false;
|
| 76 |
+
bool output_jsn = false;
|
| 77 |
bool print_special = false;
|
| 78 |
bool print_colors = false;
|
| 79 |
bool print_progress = false;
|
|
|
|
| 131 |
else if (arg == "-owts" || arg == "--output-words") { params.output_wts = true; }
|
| 132 |
else if (arg == "-fp" || arg == "--font-path") { params.font_path = argv[++i]; }
|
| 133 |
else if (arg == "-ocsv" || arg == "--output-csv") { params.output_csv = true; }
|
| 134 |
+
else if (arg == "-oj" || arg == "--output-json") { params.output_jsn = true; }
|
| 135 |
else if (arg == "-of" || arg == "--output-file") { params.fname_out.emplace_back(argv[++i]); }
|
| 136 |
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
|
| 137 |
else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; }
|
|
|
|
| 180 |
fprintf(stderr, " -owts, --output-words [%-7s] output script for generating karaoke video\n", params.output_wts ? "true" : "false");
|
| 181 |
fprintf(stderr, " -fp, --font-path [%-7s] path to a monospace font for karaoke video\n", params.font_path.c_str());
|
| 182 |
fprintf(stderr, " -ocsv, --output-csv [%-7s] output result in a CSV file\n", params.output_csv ? "true" : "false");
|
| 183 |
+
fprintf(stderr, " -oj, --output-json [%-7s] output result in a JSON file\n", params.output_jsn ? "true" : "false");
|
| 184 |
fprintf(stderr, " -of FNAME, --output-file FNAME [%-7s] output file path (without file extension)\n", "");
|
| 185 |
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
|
| 186 |
fprintf(stderr, " -pc, --print-colors [%-7s] print colors\n", params.print_colors ? "true" : "false");
|
|
|
|
| 371 |
return true;
|
| 372 |
}
|
| 373 |
|
| 374 |
+
bool output_json(struct whisper_context * ctx, const char * fname, const whisper_params & params) {
|
| 375 |
+
std::ofstream fout(fname);
|
| 376 |
+
int indent = 0;
|
| 377 |
+
|
| 378 |
+
auto doindent = [&]() {
|
| 379 |
+
for (int i = 0; i < indent; i++) fout << "\t";
|
| 380 |
+
};
|
| 381 |
+
|
| 382 |
+
auto start_arr = [&](const char *name) {
|
| 383 |
+
doindent();
|
| 384 |
+
fout << "\"" << name << "\": [\n";
|
| 385 |
+
indent++;
|
| 386 |
+
};
|
| 387 |
+
|
| 388 |
+
auto end_arr = [&](bool end = false) {
|
| 389 |
+
indent--;
|
| 390 |
+
doindent();
|
| 391 |
+
fout << (end ? "]\n" : "},\n");
|
| 392 |
+
};
|
| 393 |
+
|
| 394 |
+
auto start_obj = [&](const char *name = nullptr) {
|
| 395 |
+
doindent();
|
| 396 |
+
if (name) {
|
| 397 |
+
fout << "\"" << name << "\": {\n";
|
| 398 |
+
} else {
|
| 399 |
+
fout << "{\n";
|
| 400 |
+
}
|
| 401 |
+
indent++;
|
| 402 |
+
};
|
| 403 |
+
|
| 404 |
+
auto end_obj = [&](bool end = false) {
|
| 405 |
+
indent--;
|
| 406 |
+
doindent();
|
| 407 |
+
fout << (end ? "}\n" : "},\n");
|
| 408 |
+
};
|
| 409 |
+
|
| 410 |
+
auto start_value = [&](const char *name) {
|
| 411 |
+
doindent();
|
| 412 |
+
fout << "\"" << name << "\": ";
|
| 413 |
+
};
|
| 414 |
+
|
| 415 |
+
auto value_s = [&](const char *name, const char *val, bool end = false) {
|
| 416 |
+
start_value(name);
|
| 417 |
+
fout << "\"" << val << (end ? "\"\n" : "\",\n");
|
| 418 |
+
};
|
| 419 |
+
|
| 420 |
+
auto end_value = [&](bool end = false) {
|
| 421 |
+
fout << (end ? "\n" : ",\n");
|
| 422 |
+
};
|
| 423 |
+
|
| 424 |
+
auto value_i = [&](const char *name, const int64_t val, bool end = false) {
|
| 425 |
+
start_value(name);
|
| 426 |
+
fout << val;
|
| 427 |
+
end_value(end);
|
| 428 |
+
};
|
| 429 |
+
|
| 430 |
+
auto value_b = [&](const char *name, const bool val, bool end = false) {
|
| 431 |
+
start_value(name);
|
| 432 |
+
fout << (val ? "true" : "false");
|
| 433 |
+
end_value(end);
|
| 434 |
+
};
|
| 435 |
+
|
| 436 |
+
if (!fout.is_open()) {
|
| 437 |
+
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
|
| 438 |
+
return false;
|
| 439 |
+
}
|
| 440 |
+
|
| 441 |
+
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
|
| 442 |
+
start_obj();
|
| 443 |
+
value_s("systeminfo", whisper_print_system_info());
|
| 444 |
+
start_obj("model");
|
| 445 |
+
value_s("type", whisper_model_type_readable(ctx));
|
| 446 |
+
value_b("multilingual", whisper_is_multilingual(ctx));
|
| 447 |
+
value_i("vocab", whisper_model_n_vocab(ctx));
|
| 448 |
+
start_obj("audio");
|
| 449 |
+
value_i("ctx", whisper_model_n_audio_ctx(ctx));
|
| 450 |
+
value_i("state", whisper_model_n_audio_state(ctx));
|
| 451 |
+
value_i("head", whisper_model_n_audio_head(ctx));
|
| 452 |
+
value_i("layer", whisper_model_n_audio_layer(ctx), true);
|
| 453 |
+
end_obj();
|
| 454 |
+
start_obj("text");
|
| 455 |
+
value_i("ctx", whisper_model_n_text_ctx(ctx));
|
| 456 |
+
value_i("state", whisper_model_n_text_state(ctx));
|
| 457 |
+
value_i("head", whisper_model_n_text_head(ctx));
|
| 458 |
+
value_i("leyer", whisper_model_n_text_layer(ctx), true);
|
| 459 |
+
end_obj();
|
| 460 |
+
value_i("mels", whisper_model_n_mels(ctx));
|
| 461 |
+
value_i("f16", whisper_model_f16(ctx), true);
|
| 462 |
+
end_obj();
|
| 463 |
+
start_obj("params");
|
| 464 |
+
value_s("model", params.model.c_str());
|
| 465 |
+
value_s("language", params.language.c_str());
|
| 466 |
+
value_b("translate", params.translate, true);
|
| 467 |
+
end_obj();
|
| 468 |
+
start_obj("result");
|
| 469 |
+
value_s("language", whisper_lang_str(whisper_full_lang_id(ctx)), true);
|
| 470 |
+
end_obj();
|
| 471 |
+
start_arr("transcription");
|
| 472 |
+
|
| 473 |
+
const int n_segments = whisper_full_n_segments(ctx);
|
| 474 |
+
for (int i = 0; i < n_segments; ++i) {
|
| 475 |
+
const char * text = whisper_full_get_segment_text(ctx, i);
|
| 476 |
+
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
|
| 477 |
+
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
|
| 478 |
+
|
| 479 |
+
start_obj();
|
| 480 |
+
start_obj("timestanps");
|
| 481 |
+
value_s("from", to_timestamp(t0, true).c_str());
|
| 482 |
+
value_s("to", to_timestamp(t1, true).c_str(), true);
|
| 483 |
+
end_obj();
|
| 484 |
+
start_obj("offsets");
|
| 485 |
+
value_i("from", t0 * 10);
|
| 486 |
+
value_i("to", t1 * 10, true);
|
| 487 |
+
end_obj();
|
| 488 |
+
value_s("text", text, true);
|
| 489 |
+
end_obj(i == (n_segments - 1));
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
end_arr(true);
|
| 493 |
+
end_obj(true);
|
| 494 |
+
return true;
|
| 495 |
+
}
|
| 496 |
+
|
| 497 |
// karaoke video generation
|
| 498 |
// outputs a bash script that uses ffmpeg to generate a video with the subtitles
|
| 499 |
// TODO: font parameter adjustments
|
|
|
|
| 788 |
const auto fname_csv = fname_out + ".csv";
|
| 789 |
output_csv(ctx, fname_csv.c_str());
|
| 790 |
}
|
| 791 |
+
|
| 792 |
+
// output to JSON file
|
| 793 |
+
if (params.output_jsn) {
|
| 794 |
+
const auto fname_jsn = fname_out + ".json";
|
| 795 |
+
output_json(ctx, fname_jsn.c_str(), params);
|
| 796 |
+
}
|
| 797 |
}
|
| 798 |
}
|
| 799 |
|
whisper.cpp
CHANGED
|
@@ -1408,7 +1408,7 @@ static bool whisper_encode_internal(
|
|
| 1408 |
//}
|
| 1409 |
|
| 1410 |
static int iter = 0;
|
| 1411 |
-
|
| 1412 |
const size_t e_pe_stride = model.e_pe->ne[0]*ggml_element_size(model.e_pe);
|
| 1413 |
const size_t e_pe_offset = model.e_pe->ne[0]*ggml_element_size(model.e_pe)*n_ctx*iter;
|
| 1414 |
|
|
@@ -2919,6 +2919,71 @@ int whisper_lang_auto_detect(
|
|
| 2919 |
return whisper_lang_auto_detect_with_state(ctx, ctx->state, offset_ms, n_threads, lang_probs);
|
| 2920 |
}
|
| 2921 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2922 |
int whisper_n_len_from_state(struct whisper_state * state) {
|
| 2923 |
return state->mel.n_len;
|
| 2924 |
}
|
|
|
|
| 1408 |
//}
|
| 1409 |
|
| 1410 |
static int iter = 0;
|
| 1411 |
+
|
| 1412 |
const size_t e_pe_stride = model.e_pe->ne[0]*ggml_element_size(model.e_pe);
|
| 1413 |
const size_t e_pe_offset = model.e_pe->ne[0]*ggml_element_size(model.e_pe)*n_ctx*iter;
|
| 1414 |
|
|
|
|
| 2919 |
return whisper_lang_auto_detect_with_state(ctx, ctx->state, offset_ms, n_threads, lang_probs);
|
| 2920 |
}
|
| 2921 |
|
| 2922 |
+
int whisper_model_n_vocab(struct whisper_context * ctx) {
|
| 2923 |
+
return ctx->model.hparams.n_vocab;
|
| 2924 |
+
}
|
| 2925 |
+
|
| 2926 |
+
int whisper_model_n_audio_ctx(struct whisper_context * ctx) {
|
| 2927 |
+
return ctx->model.hparams.n_audio_ctx;
|
| 2928 |
+
}
|
| 2929 |
+
|
| 2930 |
+
int whisper_model_n_audio_state(struct whisper_context * ctx) {
|
| 2931 |
+
return ctx->model.hparams.n_audio_state;
|
| 2932 |
+
}
|
| 2933 |
+
|
| 2934 |
+
int whisper_model_n_audio_head(struct whisper_context * ctx) {
|
| 2935 |
+
return ctx->model.hparams.n_audio_head;
|
| 2936 |
+
}
|
| 2937 |
+
|
| 2938 |
+
int whisper_model_n_audio_layer(struct whisper_context * ctx) {
|
| 2939 |
+
return ctx->model.hparams.n_audio_layer;
|
| 2940 |
+
}
|
| 2941 |
+
|
| 2942 |
+
int whisper_model_n_text_ctx(struct whisper_context * ctx) {
|
| 2943 |
+
return ctx->model.hparams.n_text_ctx;
|
| 2944 |
+
}
|
| 2945 |
+
|
| 2946 |
+
int whisper_model_n_text_state(struct whisper_context * ctx) {
|
| 2947 |
+
return ctx->model.hparams.n_text_state;
|
| 2948 |
+
}
|
| 2949 |
+
|
| 2950 |
+
int whisper_model_n_text_head(struct whisper_context * ctx) {
|
| 2951 |
+
return ctx->model.hparams.n_text_head;
|
| 2952 |
+
}
|
| 2953 |
+
|
| 2954 |
+
int whisper_model_n_text_layer(struct whisper_context * ctx) {
|
| 2955 |
+
return ctx->model.hparams.n_text_layer;
|
| 2956 |
+
}
|
| 2957 |
+
|
| 2958 |
+
int whisper_model_n_mels(struct whisper_context * ctx) {
|
| 2959 |
+
return ctx->model.hparams.n_mels;
|
| 2960 |
+
}
|
| 2961 |
+
|
| 2962 |
+
int whisper_model_f16(struct whisper_context * ctx) {
|
| 2963 |
+
return ctx->model.hparams.f16;
|
| 2964 |
+
}
|
| 2965 |
+
|
| 2966 |
+
int whisper_model_type(struct whisper_context * ctx) {
|
| 2967 |
+
return ctx->model.type;
|
| 2968 |
+
}
|
| 2969 |
+
|
| 2970 |
+
const char *whisper_model_type_readable(struct whisper_context * ctx) {
|
| 2971 |
+
switch (ctx->model.type) {
|
| 2972 |
+
case e_model::MODEL_TINY:
|
| 2973 |
+
return "tiny";
|
| 2974 |
+
case e_model::MODEL_BASE:
|
| 2975 |
+
return "base";
|
| 2976 |
+
case e_model::MODEL_SMALL:
|
| 2977 |
+
return "small";
|
| 2978 |
+
case e_model::MODEL_MEDIUM:
|
| 2979 |
+
return "medium";
|
| 2980 |
+
case e_model::MODEL_LARGE:
|
| 2981 |
+
return "large";
|
| 2982 |
+
default:
|
| 2983 |
+
return "unknown";
|
| 2984 |
+
}
|
| 2985 |
+
}
|
| 2986 |
+
|
| 2987 |
int whisper_n_len_from_state(struct whisper_state * state) {
|
| 2988 |
return state->mel.n_len;
|
| 2989 |
}
|
whisper.h
CHANGED
|
@@ -248,6 +248,19 @@ extern "C" {
|
|
| 248 |
WHISPER_API int whisper_n_audio_ctx (struct whisper_context * ctx);
|
| 249 |
WHISPER_API int whisper_is_multilingual (struct whisper_context * ctx);
|
| 250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
// Token logits obtained from the last call to whisper_decode()
|
| 252 |
// The logits for the last token are stored in the last row
|
| 253 |
// Rows: n_tokens
|
|
@@ -257,6 +270,8 @@ extern "C" {
|
|
| 257 |
|
| 258 |
// Token Id -> String. Uses the vocabulary in the provided context
|
| 259 |
WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);
|
|
|
|
|
|
|
| 260 |
|
| 261 |
// Special tokens
|
| 262 |
WHISPER_API whisper_token whisper_token_eot (struct whisper_context * ctx);
|
|
|
|
| 248 |
WHISPER_API int whisper_n_audio_ctx (struct whisper_context * ctx);
|
| 249 |
WHISPER_API int whisper_is_multilingual (struct whisper_context * ctx);
|
| 250 |
|
| 251 |
+
WHISPER_API int whisper_model_n_vocab (struct whisper_context * ctx);
|
| 252 |
+
WHISPER_API int whisper_model_n_audio_ctx (struct whisper_context * ctx);
|
| 253 |
+
WHISPER_API int whisper_model_n_audio_state(struct whisper_context * ctx);
|
| 254 |
+
WHISPER_API int whisper_model_n_audio_head (struct whisper_context * ctx);
|
| 255 |
+
WHISPER_API int whisper_model_n_audio_layer(struct whisper_context * ctx);
|
| 256 |
+
WHISPER_API int whisper_model_n_text_ctx (struct whisper_context * ctx);
|
| 257 |
+
WHISPER_API int whisper_model_n_text_state (struct whisper_context * ctx);
|
| 258 |
+
WHISPER_API int whisper_model_n_text_head (struct whisper_context * ctx);
|
| 259 |
+
WHISPER_API int whisper_model_n_text_layer (struct whisper_context * ctx);
|
| 260 |
+
WHISPER_API int whisper_model_n_mels (struct whisper_context * ctx);
|
| 261 |
+
WHISPER_API int whisper_model_f16 (struct whisper_context * ctx);
|
| 262 |
+
WHISPER_API int whisper_model_type (struct whisper_context * ctx);
|
| 263 |
+
|
| 264 |
// Token logits obtained from the last call to whisper_decode()
|
| 265 |
// The logits for the last token are stored in the last row
|
| 266 |
// Rows: n_tokens
|
|
|
|
| 270 |
|
| 271 |
// Token Id -> String. Uses the vocabulary in the provided context
|
| 272 |
WHISPER_API const char * whisper_token_to_str(struct whisper_context * ctx, whisper_token token);
|
| 273 |
+
WHISPER_API const char * whisper_model_type_readable(struct whisper_context * ctx);
|
| 274 |
+
|
| 275 |
|
| 276 |
// Special tokens
|
| 277 |
WHISPER_API whisper_token whisper_token_eot (struct whisper_context * ctx);
|