Spaces:
Sleeping
Sleeping
whisper : fix UB when reading buffer of length 0 bytes (#265)
Browse files- whisper.cpp +10 -3
whisper.cpp
CHANGED
|
@@ -549,13 +549,20 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
|
|
| 549 |
//}
|
| 550 |
|
| 551 |
std::string word;
|
|
|
|
| 552 |
for (int i = 0; i < n_vocab; i++) {
|
| 553 |
uint32_t len;
|
| 554 |
read_safe(fin, len);
|
| 555 |
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 559 |
|
| 560 |
vocab.token_to_id[word] = i;
|
| 561 |
vocab.id_to_token[i] = word;
|
|
|
|
| 549 |
//}
|
| 550 |
|
| 551 |
std::string word;
|
| 552 |
+
std::vector<char> tmp;
|
| 553 |
for (int i = 0; i < n_vocab; i++) {
|
| 554 |
uint32_t len;
|
| 555 |
read_safe(fin, len);
|
| 556 |
|
| 557 |
+
if (len > 0) {
|
| 558 |
+
tmp.resize(len);
|
| 559 |
+
fin.read(&tmp[0], tmp.size()); // read to buffer
|
| 560 |
+
word.assign(&tmp[0], tmp.size());
|
| 561 |
+
} else {
|
| 562 |
+
// seems like we have an empty-string token in multi-language models (i = 50256)
|
| 563 |
+
//fprintf(stderr, "%s: warning: empty-string token in vocab, i = %d\n", __func__, i);
|
| 564 |
+
word = "";
|
| 565 |
+
}
|
| 566 |
|
| 567 |
vocab.token_to_id[word] = i;
|
| 568 |
vocab.id_to_token[i] = word;
|