Spaces:
Running
Running
Przemysław Pawełczyk
commited on
talk-llama : fix build after ggml sync (#1049)
Browse filessed -i 's,GGML_BACKEND_CUDA,GGML_BACKEND_GPU,g' examples/talk-llama/llama.cpp
examples/talk-llama/llama.cpp
CHANGED
|
@@ -1002,7 +1002,7 @@ static void llama_model_load_internal(
|
|
| 1002 |
}
|
| 1003 |
|
| 1004 |
#ifdef GGML_USE_CUBLAS
|
| 1005 |
-
#define LLAMA_BACKEND_OFFLOAD
|
| 1006 |
#else
|
| 1007 |
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_CPU
|
| 1008 |
#endif
|
|
@@ -1054,7 +1054,7 @@ static void llama_model_load_internal(
|
|
| 1054 |
layer.w2 = ml->get_tensor(layers_i + ".feed_forward.w2.weight", { n_ff, n_embd}, backend);
|
| 1055 |
layer.w3 = ml->get_tensor(layers_i + ".feed_forward.w3.weight", {n_embd, n_ff}, backend);
|
| 1056 |
|
| 1057 |
-
if (backend ==
|
| 1058 |
vram_total +=
|
| 1059 |
ggml_nbytes(layer.attention_norm) + ggml_nbytes(layer.wq) + ggml_nbytes(layer.wk) +
|
| 1060 |
ggml_nbytes(layer.wv) + ggml_nbytes(layer.wo) + ggml_nbytes(layer.attention_norm) +
|
|
@@ -1115,7 +1115,7 @@ static void llama_model_load_internal(
|
|
| 1115 |
}
|
| 1116 |
}
|
| 1117 |
for (llama_load_tensor & lt : ml->tensors_map.tensors) {
|
| 1118 |
-
if (lt.ggml_tensor->backend !=
|
| 1119 |
continue;
|
| 1120 |
}
|
| 1121 |
if (progress_callback) {
|
|
|
|
| 1002 |
}
|
| 1003 |
|
| 1004 |
#ifdef GGML_USE_CUBLAS
|
| 1005 |
+
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU
|
| 1006 |
#else
|
| 1007 |
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_CPU
|
| 1008 |
#endif
|
|
|
|
| 1054 |
layer.w2 = ml->get_tensor(layers_i + ".feed_forward.w2.weight", { n_ff, n_embd}, backend);
|
| 1055 |
layer.w3 = ml->get_tensor(layers_i + ".feed_forward.w3.weight", {n_embd, n_ff}, backend);
|
| 1056 |
|
| 1057 |
+
if (backend == GGML_BACKEND_GPU) {
|
| 1058 |
vram_total +=
|
| 1059 |
ggml_nbytes(layer.attention_norm) + ggml_nbytes(layer.wq) + ggml_nbytes(layer.wk) +
|
| 1060 |
ggml_nbytes(layer.wv) + ggml_nbytes(layer.wo) + ggml_nbytes(layer.attention_norm) +
|
|
|
|
| 1115 |
}
|
| 1116 |
}
|
| 1117 |
for (llama_load_tensor & lt : ml->tensors_map.tensors) {
|
| 1118 |
+
if (lt.ggml_tensor->backend != GGML_BACKEND_GPU) {
|
| 1119 |
continue;
|
| 1120 |
}
|
| 1121 |
if (progress_callback) {
|