ggerganov commited on
Commit
068424c
·
unverified ·
1 Parent(s): a488eb7

main : add option to print the progress (#276)

Browse files
Files changed (2) hide show
  1. Makefile +2 -2
  2. examples/main/main.cpp +59 -56
Makefile CHANGED
@@ -27,8 +27,8 @@ endif
27
  # Compile flags
28
  #
29
 
30
- CFLAGS = -I. -O3 -std=c11 -fPIC
31
- CXXFLAGS = -I. -I./examples -O3 -std=c++11 -fPIC
32
  LDFLAGS =
33
 
34
  # OS specific
 
27
  # Compile flags
28
  #
29
 
30
+ CFLAGS = -I. -Ofast -std=c11 -fPIC
31
+ CXXFLAGS = -I. -I./examples -Ofast -std=c++11 -fPIC
32
  LDFLAGS =
33
 
34
  # OS specific
examples/main/main.cpp CHANGED
@@ -62,16 +62,17 @@ struct whisper_params {
62
 
63
  float word_thold = 0.01f;
64
 
65
- bool speed_up = false;
66
- bool translate = false;
67
- bool diarize = false;
68
- bool output_txt = false;
69
- bool output_vtt = false;
70
- bool output_srt = false;
71
- bool output_wts = false;
72
- bool print_special = false;
73
- bool print_colors = false;
74
- bool no_timestamps = false;
 
75
 
76
  std::string language = "en";
77
  std::string prompt = "";
@@ -95,28 +96,29 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
95
  whisper_print_usage(argc, argv, params);
96
  exit(0);
97
  }
98
- else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
99
- else if (arg == "-p" || arg == "--processors") { params.n_processors = std::stoi(argv[++i]); }
100
- else if (arg == "-ot" || arg == "--offset-t") { params.offset_t_ms = std::stoi(argv[++i]); }
101
- else if (arg == "-on" || arg == "--offset-n") { params.offset_n = std::stoi(argv[++i]); }
102
- else if (arg == "-d" || arg == "--duration") { params.duration_ms = std::stoi(argv[++i]); }
103
- else if (arg == "-mc" || arg == "--max-context") { params.max_context = std::stoi(argv[++i]); }
104
- else if (arg == "-ml" || arg == "--max-len") { params.max_len = std::stoi(argv[++i]); }
105
- else if (arg == "-wt" || arg == "--word-thold") { params.word_thold = std::stof(argv[++i]); }
106
- else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
107
- else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
108
- else if (arg == "-di" || arg == "--diarize") { params.diarize = true; }
109
- else if (arg == "-otxt" || arg == "--output-txt") { params.output_txt = true; }
110
- else if (arg == "-ovtt" || arg == "--output-vtt") { params.output_vtt = true; }
111
- else if (arg == "-osrt" || arg == "--output-srt") { params.output_srt = true; }
112
- else if (arg == "-owts" || arg == "--output-words") { params.output_wts = true; }
113
- else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
114
- else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; }
115
- else if (arg == "-nt" || arg == "--no-timestamps") { params.no_timestamps = true; }
116
- else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
117
- else if ( arg == "--prompt") { params.prompt = argv[++i]; }
118
- else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
119
- else if (arg == "-f" || arg == "--file") { params.fname_inp.push_back(argv[++i]); }
 
120
  else {
121
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
122
  whisper_print_usage(argc, argv, params);
@@ -132,29 +134,30 @@ void whisper_print_usage(int argc, char ** argv, const whisper_params & params)
132
  fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
133
  fprintf(stderr, "\n");
134
  fprintf(stderr, "options:\n");
135
- fprintf(stderr, " -h, --help [default] show this help message and exit\n");
136
- fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
137
- fprintf(stderr, " -p N, --processors N [%-7d] number of processors to use during computation\n", params.n_processors);
138
- fprintf(stderr, " -ot N, --offset-t N [%-7d] time offset in milliseconds\n", params.offset_t_ms);
139
- fprintf(stderr, " -on N, --offset-n N [%-7d] segment index offset\n", params.offset_n);
140
- fprintf(stderr, " -d N, --duration N [%-7d] duration of audio to process in milliseconds\n", params.duration_ms);
141
- fprintf(stderr, " -mc N, --max-context N [%-7d] maximum number of text context tokens to store\n", params.max_context);
142
- fprintf(stderr, " -ml N, --max-len N [%-7d] maximum segment length in characters\n", params.max_len);
143
- fprintf(stderr, " -wt N, --word-thold N [%-7.2f] word timestamp probability threshold\n", params.word_thold);
144
- fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
145
- fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
146
- fprintf(stderr, " -di, --diarize [%-7s] stereo audio diarization\n", params.diarize ? "true" : "false");
147
- fprintf(stderr, " -otxt, --output-txt [%-7s] output result in a text file\n", params.output_txt ? "true" : "false");
148
- fprintf(stderr, " -ovtt, --output-vtt [%-7s] output result in a vtt file\n", params.output_vtt ? "true" : "false");
149
- fprintf(stderr, " -osrt, --output-srt [%-7s] output result in a srt file\n", params.output_srt ? "true" : "false");
150
- fprintf(stderr, " -owts, --output-words [%-7s] output script for generating karaoke video\n", params.output_wts ? "true" : "false");
151
- fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
152
- fprintf(stderr, " -pc, --print-colors [%-7s] print colors\n", params.print_colors ? "true" : "false");
153
- fprintf(stderr, " -nt, --no-timestamps [%-7s] do not print timestamps\n", params.no_timestamps ? "false" : "true");
154
- fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
155
- fprintf(stderr, " --prompt PROMPT [%-7s] initial prompt\n", params.prompt.c_str());
156
- fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
157
- fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
 
158
  fprintf(stderr, "\n");
159
  }
160
 
@@ -601,7 +604,7 @@ int main(int argc, char ** argv) {
601
  whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
602
 
603
  wparams.print_realtime = false;
604
- wparams.print_progress = false;
605
  wparams.print_timestamps = !params.no_timestamps;
606
  wparams.print_special = params.print_special;
607
  wparams.translate = params.translate;
 
62
 
63
  float word_thold = 0.01f;
64
 
65
+ bool speed_up = false;
66
+ bool translate = false;
67
+ bool diarize = false;
68
+ bool output_txt = false;
69
+ bool output_vtt = false;
70
+ bool output_srt = false;
71
+ bool output_wts = false;
72
+ bool print_special = false;
73
+ bool print_colors = false;
74
+ bool print_progress = false;
75
+ bool no_timestamps = false;
76
 
77
  std::string language = "en";
78
  std::string prompt = "";
 
96
  whisper_print_usage(argc, argv, params);
97
  exit(0);
98
  }
99
+ else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
100
+ else if (arg == "-p" || arg == "--processors") { params.n_processors = std::stoi(argv[++i]); }
101
+ else if (arg == "-ot" || arg == "--offset-t") { params.offset_t_ms = std::stoi(argv[++i]); }
102
+ else if (arg == "-on" || arg == "--offset-n") { params.offset_n = std::stoi(argv[++i]); }
103
+ else if (arg == "-d" || arg == "--duration") { params.duration_ms = std::stoi(argv[++i]); }
104
+ else if (arg == "-mc" || arg == "--max-context") { params.max_context = std::stoi(argv[++i]); }
105
+ else if (arg == "-ml" || arg == "--max-len") { params.max_len = std::stoi(argv[++i]); }
106
+ else if (arg == "-wt" || arg == "--word-thold") { params.word_thold = std::stof(argv[++i]); }
107
+ else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
108
+ else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
109
+ else if (arg == "-di" || arg == "--diarize") { params.diarize = true; }
110
+ else if (arg == "-otxt" || arg == "--output-txt") { params.output_txt = true; }
111
+ else if (arg == "-ovtt" || arg == "--output-vtt") { params.output_vtt = true; }
112
+ else if (arg == "-osrt" || arg == "--output-srt") { params.output_srt = true; }
113
+ else if (arg == "-owts" || arg == "--output-words") { params.output_wts = true; }
114
+ else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
115
+ else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; }
116
+ else if (arg == "-pp" || arg == "--print-progress") { params.print_progress = true; }
117
+ else if (arg == "-nt" || arg == "--no-timestamps") { params.no_timestamps = true; }
118
+ else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
119
+ else if ( arg == "--prompt") { params.prompt = argv[++i]; }
120
+ else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
121
+ else if (arg == "-f" || arg == "--file") { params.fname_inp.push_back(argv[++i]); }
122
  else {
123
  fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
124
  whisper_print_usage(argc, argv, params);
 
134
  fprintf(stderr, "usage: %s [options] file0.wav file1.wav ...\n", argv[0]);
135
  fprintf(stderr, "\n");
136
  fprintf(stderr, "options:\n");
137
+ fprintf(stderr, " -h, --help [default] show this help message and exit\n");
138
+ fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
139
+ fprintf(stderr, " -p N, --processors N [%-7d] number of processors to use during computation\n", params.n_processors);
140
+ fprintf(stderr, " -ot N, --offset-t N [%-7d] time offset in milliseconds\n", params.offset_t_ms);
141
+ fprintf(stderr, " -on N, --offset-n N [%-7d] segment index offset\n", params.offset_n);
142
+ fprintf(stderr, " -d N, --duration N [%-7d] duration of audio to process in milliseconds\n", params.duration_ms);
143
+ fprintf(stderr, " -mc N, --max-context N [%-7d] maximum number of text context tokens to store\n", params.max_context);
144
+ fprintf(stderr, " -ml N, --max-len N [%-7d] maximum segment length in characters\n", params.max_len);
145
+ fprintf(stderr, " -wt N, --word-thold N [%-7.2f] word timestamp probability threshold\n", params.word_thold);
146
+ fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
147
+ fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
148
+ fprintf(stderr, " -di, --diarize [%-7s] stereo audio diarization\n", params.diarize ? "true" : "false");
149
+ fprintf(stderr, " -otxt, --output-txt [%-7s] output result in a text file\n", params.output_txt ? "true" : "false");
150
+ fprintf(stderr, " -ovtt, --output-vtt [%-7s] output result in a vtt file\n", params.output_vtt ? "true" : "false");
151
+ fprintf(stderr, " -osrt, --output-srt [%-7s] output result in a srt file\n", params.output_srt ? "true" : "false");
152
+ fprintf(stderr, " -owts, --output-words [%-7s] output script for generating karaoke video\n", params.output_wts ? "true" : "false");
153
+ fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
154
+ fprintf(stderr, " -pc, --print-colors [%-7s] print colors\n", params.print_colors ? "true" : "false");
155
+ fprintf(stderr, " -pp, --print-progress [%-7s] print progress\n", params.print_progress ? "true" : "false");
156
+ fprintf(stderr, " -nt, --no-timestamps [%-7s] do not print timestamps\n", params.no_timestamps ? "false" : "true");
157
+ fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
158
+ fprintf(stderr, " --prompt PROMPT [%-7s] initial prompt\n", params.prompt.c_str());
159
+ fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
160
+ fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
161
  fprintf(stderr, "\n");
162
  }
163
 
 
604
  whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
605
 
606
  wparams.print_realtime = false;
607
+ wparams.print_progress = params.print_progress;
608
  wparams.print_timestamps = !params.no_timestamps;
609
  wparams.print_special = params.print_special;
610
  wparams.translate = params.translate;