Ryan Johnson commited on
Commit
168712d
·
unverified ·
1 Parent(s): 494fb84

go : improve model download (#2756)

Browse files

* Updated models download URL

* Updated list of models available

All of the high efficiency quantized models are rejected when trying to download. They exist on the server. Let's allow them.

* added path prefix for whisper-cli in message to user. The message is misleading if this script is called from another script in a different folder. So the message has to be fixed.

* undid download URL change I made earlier. Fixed filepath.Join(urlPath, model) bug.

* Undid download URL change I made earlier.

Seems that the old URL works but only when provided a model to download. Still doesn't explain why there's a different download URL that also works. Please elucidate in docs.

* Fixed URLForModel Function's bug

filepath.Join is designed for filesystem paths, and it uses backslashes (\) on Windows. URLs, however, require forward slashes (/), so the use of filepath.Join is inappropriate for constructing URLs.

The fmt.Sprintf function ensures that forward slashes are used.

* Fixed URL trailing / double slash bug

Ensure no double slash by trimming trailing '/' from srcUrl if present

* Fixed bad download URL, missing ggml prefix

Not sure if that was a bug I introduced but it was trying to download without the prefix.

* Added question before downloading all models. Added download size estimate

HEAD Requests:
Efficiently fetches file sizes without downloading the content.
Interactive Workflow:
Allows the user to make informed decisions about downloading all models.
Safe Defaults:
Aborts if the user does not explicitly confirm.

* Fixed Unbuffered channel warning.

warning in context.go : misuse of unbuffered os.Signal channel as argument to signal.

The warning indicates that the unbuffered channel used in signal.Notify in context.go may be misused. In Go, unbuffered channels can cause potential deadlocks if signals are sent faster than they are received.

* Fixed download size calculation, download URL prefix bug, added link to models URL for user.

The URL formatter was prepending the model name to the formatted model name in the URL

* Added logs and exes to gitignore

* Delete bindings/go/examples/go-model-download/go-model-download.exe

* Delete whisper_build.log

.gitignore CHANGED
@@ -58,3 +58,5 @@ cmake-build-debug/
58
  .cxx/
59
  .gradle/
60
  local.properties
 
 
 
58
  .cxx/
59
  .gradle/
60
  local.properties
61
+ .log
62
+ .exe
bindings/go/examples/go-model-download/context.go CHANGED
@@ -9,22 +9,23 @@ import (
9
  // ContextForSignal returns a context object which is cancelled when a signal
10
  // is received. It returns nil if no signal parameter is provided
11
  func ContextForSignal(signals ...os.Signal) context.Context {
12
- if len(signals) == 0 {
13
- return nil
14
- }
15
 
16
- ch := make(chan os.Signal)
17
- ctx, cancel := context.WithCancel(context.Background())
18
 
19
- // Send message on channel when signal received
20
- signal.Notify(ch, signals...)
21
 
22
- // When any signal received, call cancel
23
- go func() {
24
- <-ch
25
- cancel()
26
- }()
27
 
28
- // Return success
29
- return ctx
30
  }
 
 
9
  // ContextForSignal returns a context object which is cancelled when a signal
10
  // is received. It returns nil if no signal parameter is provided
11
  func ContextForSignal(signals ...os.Signal) context.Context {
12
+ if len(signals) == 0 {
13
+ return nil
14
+ }
15
 
16
+ ch := make(chan os.Signal, 1) // Buffered channel with space for 1 signal
17
+ ctx, cancel := context.WithCancel(context.Background())
18
 
19
+ // Send message on channel when signal received
20
+ signal.Notify(ch, signals...)
21
 
22
+ // When any signal is received, call cancel
23
+ go func() {
24
+ <-ch
25
+ cancel()
26
+ }()
27
 
28
+ // Return success
29
+ return ctx
30
  }
31
+
bindings/go/examples/go-model-download/main.go CHANGED
@@ -9,6 +9,7 @@ import (
9
  "net/url"
10
  "os"
11
  "path/filepath"
 
12
  "syscall"
13
  "time"
14
  )
@@ -17,14 +18,27 @@ import (
17
  // CONSTANTS
18
 
19
  const (
20
- srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main" // The location of the models
21
- srcExt = ".bin" // Filename extension
22
- bufSize = 1024 * 64 // Size of the buffer used for downloading the model
23
  )
24
 
25
  var (
26
  // The models which will be downloaded, if no model is specified as an argument
27
- modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3", "large-v3-turbo"}
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  )
29
 
30
  var (
@@ -44,7 +58,25 @@ var (
44
  func main() {
45
  flag.Usage = func() {
46
  name := filepath.Base(flag.CommandLine.Name())
47
- fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] <model>\n\n", name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  flag.PrintDefaults()
49
  }
50
  flag.Parse()
@@ -114,23 +146,87 @@ func GetOut() (string, error) {
114
  // GetModels returns the list of models to download
115
  func GetModels() []string {
116
  if flag.NArg() == 0 {
117
- return modelNames
118
- } else {
119
- return flag.Args()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  }
122
 
123
  // URLForModel returns the URL for the given model on huggingface.co
124
  func URLForModel(model string) (string, error) {
 
 
 
 
 
 
125
  if filepath.Ext(model) != srcExt {
126
  model += srcExt
127
  }
 
 
128
  url, err := url.Parse(srcUrl)
129
  if err != nil {
130
  return "", err
131
- } else {
132
- url.Path = filepath.Join(url.Path, model)
133
  }
 
 
 
134
  return url.String(), nil
135
  }
136
 
 
9
  "net/url"
10
  "os"
11
  "path/filepath"
12
+ "strings"
13
  "syscall"
14
  "time"
15
  )
 
18
  // CONSTANTS
19
 
20
  const (
21
+ srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/" // The location of the models
22
+ srcExt = ".bin" // Filename extension
23
+ bufSize = 1024 * 64 // Size of the buffer used for downloading the model
24
  )
25
 
26
  var (
27
  // The models which will be downloaded, if no model is specified as an argument
28
+ modelNames = []string{
29
+ "tiny", "tiny-q5_1", "tiny-q8_0",
30
+ "tiny.en", "tiny.en-q5_1", "tiny.en-q8_0",
31
+ "base", "base-q5_1", "base-q8_0",
32
+ "base.en", "base.en-q5_1", "base.en-q8_0",
33
+ "small", "small-q5_1", "small-q8_0",
34
+ "small.en", "small.en-q5_1", "small.en-q8_0",
35
+ "medium", "medium-q5_0", "medium-q8_0",
36
+ "medium.en", "medium.en-q5_0", "medium.en-q8_0",
37
+ "large-v1",
38
+ "large-v2", "large-v2-q5_0", "large-v2-q8_0",
39
+ "large-v3", "large-v3-q5_0",
40
+ "large-v3-turbo", "large-v3-turbo-q5_0", "large-v3-turbo-q8_0",
41
+ }
42
  )
43
 
44
  var (
 
58
  func main() {
59
  flag.Usage = func() {
60
  name := filepath.Base(flag.CommandLine.Name())
61
+ fmt.Fprintf(flag.CommandLine.Output(), `
62
+ Usage: %s [options] [<model>...]
63
+
64
+ Options:
65
+ -out string Specify the output folder where models will be saved.
66
+ Default: Current working directory.
67
+ -timeout duration Set the maximum duration for downloading a model.
68
+ Example: 10m, 1h (default: 30m0s).
69
+ -quiet Suppress all output except errors.
70
+
71
+ Examples:
72
+ 1. Download a specific model:
73
+ %s -out ./models tiny-q8_0
74
+
75
+ 2. Download all models:
76
+ %s -out ./models
77
+
78
+ `, name, name, name)
79
+
80
  flag.PrintDefaults()
81
  }
82
  flag.Parse()
 
146
  // GetModels returns the list of models to download
147
  func GetModels() []string {
148
  if flag.NArg() == 0 {
149
+ fmt.Println("No model specified.")
150
+ fmt.Println("Preparing to download all models...")
151
+
152
+ // Calculate total download size
153
+ fmt.Println("Calculating total download size...")
154
+ totalSize, err := CalculateTotalDownloadSize(modelNames)
155
+ if err != nil {
156
+ fmt.Println("Error calculating download sizes:", err)
157
+ os.Exit(1)
158
+ }
159
+
160
+ fmt.Println("View available models: https://huggingface.co/ggerganov/whisper.cpp/tree/main")
161
+ fmt.Printf("Total download size: %.2f GB\n", float64(totalSize)/(1024*1024*1024))
162
+ fmt.Println("Would you like to download all models? (y/N)")
163
+
164
+ // Prompt for user input
165
+ var response string
166
+ fmt.Scanln(&response)
167
+ if response != "y" && response != "Y" {
168
+ fmt.Println("Aborting. Specify a model to download.")
169
+ os.Exit(0)
170
+ }
171
+
172
+ return modelNames // Return all models if confirmed
173
  }
174
+ return flag.Args() // Return specific models if arguments are provided
175
+ }
176
+
177
+ func CalculateTotalDownloadSize(models []string) (int64, error) {
178
+ var totalSize int64
179
+ client := http.Client{}
180
+
181
+ for _, model := range models {
182
+ modelURL, err := URLForModel(model)
183
+ if err != nil {
184
+ return 0, err
185
+ }
186
+
187
+ // Issue a HEAD request to get the file size
188
+ req, err := http.NewRequest("HEAD", modelURL, nil)
189
+ if err != nil {
190
+ return 0, err
191
+ }
192
+
193
+ resp, err := client.Do(req)
194
+ if err != nil {
195
+ return 0, err
196
+ }
197
+ resp.Body.Close()
198
+
199
+ if resp.StatusCode != http.StatusOK {
200
+ fmt.Printf("Warning: Unable to fetch size for %s (HTTP %d)\n", model, resp.StatusCode)
201
+ continue
202
+ }
203
+
204
+ size := resp.ContentLength
205
+ totalSize += size
206
+ }
207
+ return totalSize, nil
208
  }
209
 
210
  // URLForModel returns the URL for the given model on huggingface.co
211
  func URLForModel(model string) (string, error) {
212
+ // Ensure "ggml-" prefix is added only once
213
+ if !strings.HasPrefix(model, "ggml-") {
214
+ model = "ggml-" + model
215
+ }
216
+
217
+ // Ensure ".bin" extension is added only once
218
  if filepath.Ext(model) != srcExt {
219
  model += srcExt
220
  }
221
+
222
+ // Parse the base URL
223
  url, err := url.Parse(srcUrl)
224
  if err != nil {
225
  return "", err
 
 
226
  }
227
+
228
+ // Ensure no trailing slash in the base URL
229
+ url.Path = fmt.Sprintf("%s/%s", strings.TrimSuffix(url.Path, "/"), model)
230
  return url.String(), nil
231
  }
232
 
models/download-ggml-model.cmd CHANGED
@@ -8,7 +8,18 @@ popd
8
  set argc=0
9
  for %%x in (%*) do set /A argc+=1
10
 
11
- set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  if %argc% neq 1 (
14
  echo.
@@ -50,7 +61,7 @@ if %ERRORLEVEL% neq 0 (
50
 
51
  echo Done! Model %model% saved in %root_path%\models\ggml-%model%.bin
52
  echo You can now use it like this:
53
- echo build\bin\Release\whisper-cli.exe -m %root_path%\models\ggml-%model%.bin -f %root_path%\samples\jfk.wav
54
 
55
  goto :eof
56
 
 
8
  set argc=0
9
  for %%x in (%*) do set /A argc+=1
10
 
11
+ set models=tiny tiny-q5_1 tiny-q8_0 ^
12
+ tiny.en tiny.en-q5_1 tiny.en-q8_0 ^
13
+ base base-q5_1 base-q8_0 ^
14
+ base.en base.en-q5_1 base.en-q8_0 ^
15
+ small small-q5_1 small-q8_0 ^
16
+ small.en small.en-q5_1 small.en-q8_0 ^
17
+ medium medium-q5_0 medium-q8_0 ^
18
+ medium.en medium.en-q5_0 medium.en-q8_0 ^
19
+ large-v1 ^
20
+ large-v2 large-v2-q5_0 large-v2-q8_0 ^
21
+ large-v3 large-v3-q5_0 ^
22
+ large-v3-turbo large-v3-turbo-q5_0 large-v3-turbo-q8_0
23
 
24
  if %argc% neq 1 (
25
  echo.
 
61
 
62
  echo Done! Model %model% saved in %root_path%\models\ggml-%model%.bin
63
  echo You can now use it like this:
64
+ echo %~dp0build\bin\Release\whisper-cli.exe -m %root_path%\models\ggml-%model%.bin -f %root_path%\samples\jfk.wav
65
 
66
  goto :eof
67