Spaces:
Sleeping
Sleeping
stream : add "max_tokens" cli arg
Browse filesControls the max tokens per segment for the stream example
examples/stream/stream.cpp
CHANGED
|
@@ -40,6 +40,7 @@ struct whisper_params {
|
|
| 40 |
int32_t step_ms = 3000;
|
| 41 |
int32_t length_ms = 10000;
|
| 42 |
int32_t capture_id = -1;
|
|
|
|
| 43 |
int32_t audio_ctx = 0;
|
| 44 |
|
| 45 |
bool speed_up = false;
|
|
@@ -70,6 +71,8 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
|
| 70 |
params.length_ms = std::stoi(argv[++i]);
|
| 71 |
} else if (arg == "-c" || arg == "--capture") {
|
| 72 |
params.capture_id = std::stoi(argv[++i]);
|
|
|
|
|
|
|
| 73 |
} else if (arg == "-ac" || arg == "--audio_ctx") {
|
| 74 |
params.audio_ctx = std::stoi(argv[++i]);
|
| 75 |
} else if (arg == "-su" || arg == "--speed-up") {
|
|
@@ -119,6 +122,7 @@ void whisper_print_usage(int argc, char ** argv, const whisper_params & params)
|
|
| 119 |
fprintf(stderr, " --step N audio step size in milliseconds (default: %d)\n", params.step_ms);
|
| 120 |
fprintf(stderr, " --length N audio length in milliseconds (default: %d)\n", params.length_ms);
|
| 121 |
fprintf(stderr, " -c ID, --capture ID capture device ID (default: -1)\n");
|
|
|
|
| 122 |
fprintf(stderr, " -ac N, --audio_ctx N audio context size (default: %d, 0 - all)\n", params.audio_ctx);
|
| 123 |
fprintf(stderr, " -su, --speed-up speed up audio by factor of 2 (faster processing, reduced accuracy, default: %s)\n", params.speed_up ? "true" : "false");
|
| 124 |
fprintf(stderr, " -v, --verbose verbose output\n");
|
|
@@ -333,7 +337,7 @@ int main(int argc, char ** argv) {
|
|
| 333 |
wparams.translate = params.translate;
|
| 334 |
wparams.no_context = params.no_context;
|
| 335 |
wparams.single_segment = true;
|
| 336 |
-
wparams.max_tokens =
|
| 337 |
wparams.language = params.language.c_str();
|
| 338 |
wparams.n_threads = params.n_threads;
|
| 339 |
|
|
|
|
| 40 |
int32_t step_ms = 3000;
|
| 41 |
int32_t length_ms = 10000;
|
| 42 |
int32_t capture_id = -1;
|
| 43 |
+
int32_t max_tokens = 32;
|
| 44 |
int32_t audio_ctx = 0;
|
| 45 |
|
| 46 |
bool speed_up = false;
|
|
|
|
| 71 |
params.length_ms = std::stoi(argv[++i]);
|
| 72 |
} else if (arg == "-c" || arg == "--capture") {
|
| 73 |
params.capture_id = std::stoi(argv[++i]);
|
| 74 |
+
} else if (arg == "-mt" || arg == "--max_tokens") {
|
| 75 |
+
params.max_tokens = std::stoi(argv[++i]);
|
| 76 |
} else if (arg == "-ac" || arg == "--audio_ctx") {
|
| 77 |
params.audio_ctx = std::stoi(argv[++i]);
|
| 78 |
} else if (arg == "-su" || arg == "--speed-up") {
|
|
|
|
| 122 |
fprintf(stderr, " --step N audio step size in milliseconds (default: %d)\n", params.step_ms);
|
| 123 |
fprintf(stderr, " --length N audio length in milliseconds (default: %d)\n", params.length_ms);
|
| 124 |
fprintf(stderr, " -c ID, --capture ID capture device ID (default: -1)\n");
|
| 125 |
+
fprintf(stderr, " -mt N, --max_tokens N maximum number of tokens per audio chunk (default: %d)\n", params.max_tokens);
|
| 126 |
fprintf(stderr, " -ac N, --audio_ctx N audio context size (default: %d, 0 - all)\n", params.audio_ctx);
|
| 127 |
fprintf(stderr, " -su, --speed-up speed up audio by factor of 2 (faster processing, reduced accuracy, default: %s)\n", params.speed_up ? "true" : "false");
|
| 128 |
fprintf(stderr, " -v, --verbose verbose output\n");
|
|
|
|
| 337 |
wparams.translate = params.translate;
|
| 338 |
wparams.no_context = params.no_context;
|
| 339 |
wparams.single_segment = true;
|
| 340 |
+
wparams.max_tokens = params.max_tokens;
|
| 341 |
wparams.language = params.language.c_str();
|
| 342 |
wparams.n_threads = params.n_threads;
|
| 343 |
|