Spaces:
Running
Running
whisper : use backend registry (#0)
Browse files- src/whisper.cpp +51 -127
src/whisper.cpp
CHANGED
|
@@ -1,43 +1,19 @@
|
|
| 1 |
#include "whisper.h"
|
| 2 |
|
| 3 |
-
#ifdef WHISPER_USE_COREML
|
| 4 |
-
#include "coreml/whisper-encoder.h"
|
| 5 |
-
#endif
|
| 6 |
-
|
| 7 |
#include "ggml-cpu.h"
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
#include "ggml-
|
| 11 |
-
#
|
| 12 |
-
|
| 13 |
-
#ifdef GGML_USE_CUDA
|
| 14 |
-
#include "ggml-cuda.h"
|
| 15 |
-
#endif
|
| 16 |
-
|
| 17 |
-
#ifdef GGML_USE_SYCL
|
| 18 |
-
#include "ggml-sycl.h"
|
| 19 |
-
#endif
|
| 20 |
-
|
| 21 |
-
#ifdef GGML_USE_VULKAN
|
| 22 |
-
#include "ggml-vulkan.h"
|
| 23 |
-
#endif
|
| 24 |
|
| 25 |
-
#ifdef
|
| 26 |
-
#include "
|
| 27 |
#endif
|
| 28 |
|
| 29 |
#ifdef WHISPER_USE_OPENVINO
|
| 30 |
#include "openvino/whisper-openvino-encoder.h"
|
| 31 |
#endif
|
| 32 |
|
| 33 |
-
#ifdef GGML_USE_CANN
|
| 34 |
-
#include "ggml-cann.h"
|
| 35 |
-
#endif
|
| 36 |
-
|
| 37 |
-
#include "ggml.h"
|
| 38 |
-
#include "ggml-alloc.h"
|
| 39 |
-
#include "ggml-backend.h"
|
| 40 |
-
|
| 41 |
#include <atomic>
|
| 42 |
#include <algorithm>
|
| 43 |
#include <cassert>
|
|
@@ -195,14 +171,13 @@ static bool ggml_graph_compute_helper(
|
|
| 195 |
|
| 196 |
for (int i = 0; i < ggml_backend_sched_get_n_backends(sched); ++i) {
|
| 197 |
ggml_backend_t backend = ggml_backend_sched_get_backend(sched, i);
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
if (
|
| 203 |
-
|
| 204 |
}
|
| 205 |
-
#endif
|
| 206 |
}
|
| 207 |
|
| 208 |
bool t = ggml_backend_sched_graph_compute(sched, graph) == GGML_STATUS_SUCCESS;
|
|
@@ -1256,67 +1231,23 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) {
|
|
| 1256 |
}
|
| 1257 |
|
| 1258 |
static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) {
|
| 1259 |
-
ggml_backend_t result = NULL;
|
| 1260 |
-
|
| 1261 |
ggml_log_set(g_state.log_callback, g_state.log_callback_user_data);
|
| 1262 |
|
| 1263 |
-
#ifdef GGML_USE_CUDA
|
| 1264 |
if (params.use_gpu) {
|
| 1265 |
-
|
| 1266 |
-
|
| 1267 |
-
|
| 1268 |
-
|
| 1269 |
-
|
| 1270 |
-
|
| 1271 |
-
|
| 1272 |
-
|
| 1273 |
-
|
| 1274 |
-
|
| 1275 |
-
WHISPER_LOG_INFO("%s: using Metal backend\n", __func__);
|
| 1276 |
-
result = ggml_backend_metal_init();
|
| 1277 |
-
if (!result) {
|
| 1278 |
-
WHISPER_LOG_ERROR("%s: ggml_backend_metal_init() failed\n", __func__);
|
| 1279 |
-
} else if (!ggml_backend_metal_supports_family(result, 7)) {
|
| 1280 |
-
WHISPER_LOG_ERROR("%s: Metal GPU does not support family 7 - falling back to CPU\n", __func__);
|
| 1281 |
-
ggml_backend_free(result);
|
| 1282 |
-
result = NULL;
|
| 1283 |
-
}
|
| 1284 |
-
}
|
| 1285 |
-
#endif
|
| 1286 |
-
|
| 1287 |
-
#ifdef GGML_USE_SYCL
|
| 1288 |
-
if (params.use_gpu) {
|
| 1289 |
-
WHISPER_LOG_INFO("%s: using SYCL backend\n", __func__);
|
| 1290 |
-
result = ggml_backend_sycl_init(params.gpu_device);
|
| 1291 |
-
if (!result) {
|
| 1292 |
-
WHISPER_LOG_ERROR("%s: ggml_backend_sycl_init() failed\n", __func__);
|
| 1293 |
-
}
|
| 1294 |
-
}
|
| 1295 |
-
#endif
|
| 1296 |
-
|
| 1297 |
-
#ifdef GGML_USE_VULKAN
|
| 1298 |
-
if (params.use_gpu) {
|
| 1299 |
-
WHISPER_LOG_INFO("%s: using Vulkan backend\n", __func__);
|
| 1300 |
-
result = ggml_backend_vk_init(params.gpu_device);
|
| 1301 |
-
if (!result) {
|
| 1302 |
-
WHISPER_LOG_ERROR("%s: ggml_backend_vk_init() failed\n", __func__);
|
| 1303 |
-
}
|
| 1304 |
-
}
|
| 1305 |
-
#endif
|
| 1306 |
-
|
| 1307 |
-
#ifdef GGML_USE_CANN
|
| 1308 |
-
if (params.use_gpu) {
|
| 1309 |
-
WHISPER_LOG_INFO("%s: using CANN backend\n", __func__);
|
| 1310 |
-
result = ggml_backend_cann_init(params.gpu_device);
|
| 1311 |
-
if (!result) {
|
| 1312 |
-
WHISPER_LOG_ERROR("%s: ggml_backend_cann_init() failed\n", __func__);
|
| 1313 |
}
|
| 1314 |
}
|
| 1315 |
-
#endif
|
| 1316 |
|
| 1317 |
-
|
| 1318 |
-
|
| 1319 |
-
return result;
|
| 1320 |
}
|
| 1321 |
|
| 1322 |
static std::vector<ggml_backend_t> whisper_backend_init(const whisper_context_params & params) {
|
|
@@ -1328,17 +1259,19 @@ static std::vector<ggml_backend_t> whisper_backend_init(const whisper_context_pa
|
|
| 1328 |
result.push_back(backend_gpu);
|
| 1329 |
}
|
| 1330 |
|
| 1331 |
-
|
| 1332 |
-
{
|
| 1333 |
-
|
| 1334 |
-
|
| 1335 |
-
|
| 1336 |
-
|
| 1337 |
-
|
| 1338 |
-
|
|
|
|
|
|
|
|
|
|
| 1339 |
}
|
| 1340 |
}
|
| 1341 |
-
#endif
|
| 1342 |
|
| 1343 |
GGML_UNUSED(params);
|
| 1344 |
|
|
@@ -1348,33 +1281,20 @@ static std::vector<ggml_backend_t> whisper_backend_init(const whisper_context_pa
|
|
| 1348 |
}
|
| 1349 |
|
| 1350 |
static ggml_backend_buffer_type_t whisper_default_buffer_type(const whisper_context_params & params) {
|
| 1351 |
-
|
| 1352 |
-
|
| 1353 |
-
|
| 1354 |
-
|
| 1355 |
-
#ifdef GGML_USE_CUDA
|
| 1356 |
-
result || (result = ggml_backend_cuda_buffer_type(params.gpu_device));
|
| 1357 |
-
#endif
|
| 1358 |
-
|
| 1359 |
-
#ifdef GGML_USE_METAL
|
| 1360 |
-
result || (result = ggml_backend_metal_buffer_type());
|
| 1361 |
-
#endif
|
| 1362 |
-
|
| 1363 |
-
#ifdef GGML_USE_SYCL
|
| 1364 |
-
result || (result = ggml_backend_sycl_buffer_type(params.gpu_device));
|
| 1365 |
-
#endif
|
| 1366 |
-
|
| 1367 |
-
#ifdef GGML_USE_VULKAN
|
| 1368 |
-
result || (result = ggml_backend_vk_buffer_type(params.gpu_device));
|
| 1369 |
-
#endif
|
| 1370 |
-
|
| 1371 |
-
#ifdef GGML_USE_CANN
|
| 1372 |
-
result || (result == ggml_backend_cann_buffer_type(params.gpu_device));
|
| 1373 |
-
#endif
|
| 1374 |
|
| 1375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1376 |
|
| 1377 |
-
return
|
| 1378 |
}
|
| 1379 |
|
| 1380 |
// load the model from a ggml file
|
|
@@ -3668,8 +3588,7 @@ struct whisper_context * whisper_init_with_params_no_state(struct whisper_model_
|
|
| 3668 |
WHISPER_LOG_INFO("%s: flash attn = %d\n", __func__, params.flash_attn);
|
| 3669 |
WHISPER_LOG_INFO("%s: gpu_device = %d\n", __func__, params.gpu_device);
|
| 3670 |
WHISPER_LOG_INFO("%s: dtw = %d\n", __func__, params.dtw_token_timestamps);
|
| 3671 |
-
|
| 3672 |
-
// TODO: temporary call to force backend registry initialization
|
| 3673 |
WHISPER_LOG_INFO("%s: backends = %zu\n", __func__, ggml_backend_reg_count());
|
| 3674 |
|
| 3675 |
whisper_context * ctx = new whisper_context;
|
|
@@ -7427,6 +7346,11 @@ static void whisper_log_internal(ggml_log_level level, const char * format, ...)
|
|
| 7427 |
static void whisper_log_callback_default(ggml_log_level level, const char * text, void * user_data) {
|
| 7428 |
(void) level;
|
| 7429 |
(void) user_data;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7430 |
fputs(text, stderr);
|
| 7431 |
fflush(stderr);
|
| 7432 |
}
|
|
|
|
| 1 |
#include "whisper.h"
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
#include "ggml-cpu.h"
|
| 4 |
|
| 5 |
+
#include "ggml.h"
|
| 6 |
+
#include "ggml-alloc.h"
|
| 7 |
+
#include "ggml-backend.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
#ifdef WHISPER_USE_COREML
|
| 10 |
+
#include "coreml/whisper-encoder.h"
|
| 11 |
#endif
|
| 12 |
|
| 13 |
#ifdef WHISPER_USE_OPENVINO
|
| 14 |
#include "openvino/whisper-openvino-encoder.h"
|
| 15 |
#endif
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
#include <atomic>
|
| 18 |
#include <algorithm>
|
| 19 |
#include <cassert>
|
|
|
|
| 171 |
|
| 172 |
for (int i = 0; i < ggml_backend_sched_get_n_backends(sched); ++i) {
|
| 173 |
ggml_backend_t backend = ggml_backend_sched_get_backend(sched, i);
|
| 174 |
+
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
|
| 175 |
+
ggml_backend_reg_t reg = dev ? ggml_backend_dev_backend_reg(dev) : nullptr;
|
| 176 |
+
|
| 177 |
+
auto * fn_set_n_threads = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
|
| 178 |
+
if (fn_set_n_threads) {
|
| 179 |
+
fn_set_n_threads(backend, n_threads);
|
| 180 |
}
|
|
|
|
| 181 |
}
|
| 182 |
|
| 183 |
bool t = ggml_backend_sched_graph_compute(sched, graph) == GGML_STATUS_SUCCESS;
|
|
|
|
| 1231 |
}
|
| 1232 |
|
| 1233 |
static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) {
|
|
|
|
|
|
|
| 1234 |
ggml_log_set(g_state.log_callback, g_state.log_callback_user_data);
|
| 1235 |
|
|
|
|
| 1236 |
if (params.use_gpu) {
|
| 1237 |
+
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
| 1238 |
+
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
| 1239 |
+
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
|
| 1240 |
+
WHISPER_LOG_INFO("%s: using %s backend\n", __func__, ggml_backend_dev_name(dev));
|
| 1241 |
+
ggml_backend_t result = ggml_backend_dev_init(dev, nullptr);
|
| 1242 |
+
if (!result) {
|
| 1243 |
+
WHISPER_LOG_ERROR("%s: failed to initialize %s backend\n", __func__, ggml_backend_dev_name(dev));
|
| 1244 |
+
}
|
| 1245 |
+
return result;
|
| 1246 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1247 |
}
|
| 1248 |
}
|
|
|
|
| 1249 |
|
| 1250 |
+
return nullptr;
|
|
|
|
|
|
|
| 1251 |
}
|
| 1252 |
|
| 1253 |
static std::vector<ggml_backend_t> whisper_backend_init(const whisper_context_params & params) {
|
|
|
|
| 1259 |
result.push_back(backend_gpu);
|
| 1260 |
}
|
| 1261 |
|
| 1262 |
+
// ACCEL backends
|
| 1263 |
+
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
| 1264 |
+
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
| 1265 |
+
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_ACCEL) {
|
| 1266 |
+
WHISPER_LOG_INFO("%s: using %s backend\n", __func__, ggml_backend_dev_name(dev));
|
| 1267 |
+
ggml_backend_t backend = ggml_backend_dev_init(dev, nullptr);
|
| 1268 |
+
if (!backend) {
|
| 1269 |
+
WHISPER_LOG_ERROR("%s: failed to initialize %s backend\n", __func__, ggml_backend_dev_name(dev));
|
| 1270 |
+
continue;
|
| 1271 |
+
}
|
| 1272 |
+
result.push_back(backend);
|
| 1273 |
}
|
| 1274 |
}
|
|
|
|
| 1275 |
|
| 1276 |
GGML_UNUSED(params);
|
| 1277 |
|
|
|
|
| 1281 |
}
|
| 1282 |
|
| 1283 |
static ggml_backend_buffer_type_t whisper_default_buffer_type(const whisper_context_params & params) {
|
| 1284 |
+
if (!params.use_gpu) {
|
| 1285 |
+
return ggml_backend_cpu_buffer_type();
|
| 1286 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1287 |
|
| 1288 |
+
// if we have a GPU device - use it
|
| 1289 |
+
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
| 1290 |
+
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
| 1291 |
+
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
|
| 1292 |
+
WHISPER_LOG_INFO("%s: using device %s (%s)\n", __func__, ggml_backend_dev_name(dev), ggml_backend_dev_description(dev));
|
| 1293 |
+
return ggml_backend_dev_buffer_type(dev);
|
| 1294 |
+
}
|
| 1295 |
+
}
|
| 1296 |
|
| 1297 |
+
return ggml_backend_cpu_buffer_type();
|
| 1298 |
}
|
| 1299 |
|
| 1300 |
// load the model from a ggml file
|
|
|
|
| 3588 |
WHISPER_LOG_INFO("%s: flash attn = %d\n", __func__, params.flash_attn);
|
| 3589 |
WHISPER_LOG_INFO("%s: gpu_device = %d\n", __func__, params.gpu_device);
|
| 3590 |
WHISPER_LOG_INFO("%s: dtw = %d\n", __func__, params.dtw_token_timestamps);
|
| 3591 |
+
WHISPER_LOG_INFO("%s: devices = %zu\n", __func__, ggml_backend_dev_count());
|
|
|
|
| 3592 |
WHISPER_LOG_INFO("%s: backends = %zu\n", __func__, ggml_backend_reg_count());
|
| 3593 |
|
| 3594 |
whisper_context * ctx = new whisper_context;
|
|
|
|
| 7346 |
static void whisper_log_callback_default(ggml_log_level level, const char * text, void * user_data) {
|
| 7347 |
(void) level;
|
| 7348 |
(void) user_data;
|
| 7349 |
+
#ifndef WHISPER_DEBUG
|
| 7350 |
+
if (level == GGML_LOG_LEVEL_DEBUG) {
|
| 7351 |
+
return;
|
| 7352 |
+
}
|
| 7353 |
+
#endif
|
| 7354 |
fputs(text, stderr);
|
| 7355 |
fflush(stderr);
|
| 7356 |
}
|