ggerganov commited on
Commit
57a7bac
·
1 Parent(s): 6adc1fe

stream : add "max_tokens" cli arg

Browse files

Controls the max tokens per segment for the stream example

Files changed (1) hide show
  1. examples/stream/stream.cpp +5 -1
examples/stream/stream.cpp CHANGED
@@ -40,6 +40,7 @@ struct whisper_params {
40
  int32_t step_ms = 3000;
41
  int32_t length_ms = 10000;
42
  int32_t capture_id = -1;
 
43
  int32_t audio_ctx = 0;
44
 
45
  bool speed_up = false;
@@ -70,6 +71,8 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
70
  params.length_ms = std::stoi(argv[++i]);
71
  } else if (arg == "-c" || arg == "--capture") {
72
  params.capture_id = std::stoi(argv[++i]);
 
 
73
  } else if (arg == "-ac" || arg == "--audio_ctx") {
74
  params.audio_ctx = std::stoi(argv[++i]);
75
  } else if (arg == "-su" || arg == "--speed-up") {
@@ -119,6 +122,7 @@ void whisper_print_usage(int argc, char ** argv, const whisper_params & params)
119
  fprintf(stderr, " --step N audio step size in milliseconds (default: %d)\n", params.step_ms);
120
  fprintf(stderr, " --length N audio length in milliseconds (default: %d)\n", params.length_ms);
121
  fprintf(stderr, " -c ID, --capture ID capture device ID (default: -1)\n");
 
122
  fprintf(stderr, " -ac N, --audio_ctx N audio context size (default: %d, 0 - all)\n", params.audio_ctx);
123
  fprintf(stderr, " -su, --speed-up speed up audio by factor of 2 (faster processing, reduced accuracy, default: %s)\n", params.speed_up ? "true" : "false");
124
  fprintf(stderr, " -v, --verbose verbose output\n");
@@ -333,7 +337,7 @@ int main(int argc, char ** argv) {
333
  wparams.translate = params.translate;
334
  wparams.no_context = params.no_context;
335
  wparams.single_segment = true;
336
- wparams.max_tokens = 32;
337
  wparams.language = params.language.c_str();
338
  wparams.n_threads = params.n_threads;
339
 
 
40
  int32_t step_ms = 3000;
41
  int32_t length_ms = 10000;
42
  int32_t capture_id = -1;
43
+ int32_t max_tokens = 32;
44
  int32_t audio_ctx = 0;
45
 
46
  bool speed_up = false;
 
71
  params.length_ms = std::stoi(argv[++i]);
72
  } else if (arg == "-c" || arg == "--capture") {
73
  params.capture_id = std::stoi(argv[++i]);
74
+ } else if (arg == "-mt" || arg == "--max_tokens") {
75
+ params.max_tokens = std::stoi(argv[++i]);
76
  } else if (arg == "-ac" || arg == "--audio_ctx") {
77
  params.audio_ctx = std::stoi(argv[++i]);
78
  } else if (arg == "-su" || arg == "--speed-up") {
 
122
  fprintf(stderr, " --step N audio step size in milliseconds (default: %d)\n", params.step_ms);
123
  fprintf(stderr, " --length N audio length in milliseconds (default: %d)\n", params.length_ms);
124
  fprintf(stderr, " -c ID, --capture ID capture device ID (default: -1)\n");
125
+ fprintf(stderr, " -mt N, --max_tokens N maximum number of tokens per audio chunk (default: %d)\n", params.max_tokens);
126
  fprintf(stderr, " -ac N, --audio_ctx N audio context size (default: %d, 0 - all)\n", params.audio_ctx);
127
  fprintf(stderr, " -su, --speed-up speed up audio by factor of 2 (faster processing, reduced accuracy, default: %s)\n", params.speed_up ? "true" : "false");
128
  fprintf(stderr, " -v, --verbose verbose output\n");
 
337
  wparams.translate = params.translate;
338
  wparams.no_context = params.no_context;
339
  wparams.single_segment = true;
340
+ wparams.max_tokens = params.max_tokens;
341
  wparams.language = params.language.c_str();
342
  wparams.n_threads = params.n_threads;
343