Spaces:
Running
Running
examples : fix n_gpu_layers usage in talk-llama (#1441)
Browse files
examples/talk-llama/talk-llama.cpp
CHANGED
|
@@ -266,6 +266,9 @@ int main(int argc, char ** argv) {
|
|
| 266 |
llama_backend_init(true);
|
| 267 |
|
| 268 |
auto lmparams = llama_model_default_params();
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
struct llama_model * model_llama = llama_load_model_from_file(params.model_llama.c_str(), lmparams);
|
| 271 |
|
|
@@ -276,9 +279,6 @@ int main(int argc, char ** argv) {
|
|
| 276 |
lcparams.seed = 1;
|
| 277 |
lcparams.f16_kv = true;
|
| 278 |
lcparams.n_threads = params.n_threads;
|
| 279 |
-
if (!params.use_gpu) {
|
| 280 |
-
lcparams.n_gpu_layers = 0;
|
| 281 |
-
}
|
| 282 |
|
| 283 |
struct llama_context * ctx_llama = llama_new_context_with_model(model_llama, lcparams);
|
| 284 |
|
|
|
|
| 266 |
llama_backend_init(true);
|
| 267 |
|
| 268 |
auto lmparams = llama_model_default_params();
|
| 269 |
+
if (!params.use_gpu) {
|
| 270 |
+
lcparams.lmparams = 0;
|
| 271 |
+
}
|
| 272 |
|
| 273 |
struct llama_model * model_llama = llama_load_model_from_file(params.model_llama.c_str(), lmparams);
|
| 274 |
|
|
|
|
| 279 |
lcparams.seed = 1;
|
| 280 |
lcparams.f16_kv = true;
|
| 281 |
lcparams.n_threads = params.n_threads;
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
struct llama_context * ctx_llama = llama_new_context_with_model(model_llama, lcparams);
|
| 284 |
|