Diego Devesa ggerganov commited on
Commit
b73266f
·
1 Parent(s): 385a521

ggml : add support for dynamic loading of backends (llama/10469)

Browse files

* ggml : add support for dynamic loading of backends

---------

Co-authored-by: Georgi Gerganov <[email protected]>

ggml/CMakeLists.txt CHANGED
@@ -33,6 +33,7 @@ else()
33
  endif()
34
 
35
  option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
 
36
 
37
  #
38
  # option list
 
33
  endif()
34
 
35
  option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
36
+ option(GGML_BACKEND_DL "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF)
37
 
38
  #
39
  # option list
ggml/include/ggml-backend.h CHANGED
@@ -190,6 +190,14 @@ extern "C" {
190
  typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
191
  // Get additional buffer types provided by the device (returns a NULL-terminated array)
192
  typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
 
 
 
 
 
 
 
 
193
 
194
  //
195
  // Backend registry
@@ -214,6 +222,13 @@ extern "C" {
214
  // = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
215
  GGML_API ggml_backend_t ggml_backend_init_best(void);
216
 
 
 
 
 
 
 
 
217
  //
218
  // Backend scheduler
219
  //
 
190
  typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
191
  // Get additional buffer types provided by the device (returns a NULL-terminated array)
192
  typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
193
+ // Set the abort callback for the backend
194
+ typedef void (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data);
195
+ // Get a list of feature flags supported by the backend (returns a NULL-terminated array)
196
+ struct ggml_backend_feature {
197
+ const char * name;
198
+ const char * value;
199
+ };
200
+ typedef struct ggml_backend_feature * (*ggml_backend_get_features_t)(ggml_backend_reg_t reg);
201
 
202
  //
203
  // Backend registry
 
222
  // = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
223
  GGML_API ggml_backend_t ggml_backend_init_best(void);
224
 
225
+ // Load a backend from a dynamic library and register it
226
+ GGML_API ggml_backend_reg_t ggml_backend_load(const char * path);
227
+ // Unload a backend if loaded dynamically and unregister it
228
+ GGML_API void ggml_backend_unload(ggml_backend_reg_t reg);
229
+ // Load all known backends from dynamic libraries
230
+ GGML_API void ggml_backend_load_all(void);
231
+
232
  //
233
  // Backend scheduler
234
  //
ggml/include/ggml-cpu.h CHANGED
@@ -7,29 +7,6 @@
7
  extern "C" {
8
  #endif
9
 
10
- // Scheduling priorities
11
- enum ggml_sched_priority {
12
- GGML_SCHED_PRIO_NORMAL,
13
- GGML_SCHED_PRIO_MEDIUM,
14
- GGML_SCHED_PRIO_HIGH,
15
- GGML_SCHED_PRIO_REALTIME
16
- };
17
-
18
- // Threadpool params
19
- // Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
20
- struct ggml_threadpool_params {
21
- bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
22
- int n_threads; // number of threads
23
- enum ggml_sched_priority prio; // thread priority
24
- uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
25
- bool strict_cpu; // strict cpu placement
26
- bool paused; // start in paused state
27
- };
28
-
29
- struct ggml_threadpool; // forward declaration, see ggml.c
30
-
31
- typedef struct ggml_threadpool * ggml_threadpool_t;
32
-
33
  // the compute plan that needs to be prepared for ggml_graph_compute()
34
  // since https://github.com/ggerganov/ggml/issues/287
35
  struct ggml_cplan {
@@ -75,14 +52,11 @@ extern "C" {
75
  GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
76
  GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
77
 
78
- GGML_BACKEND_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
79
- GGML_BACKEND_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
80
- GGML_BACKEND_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
81
- GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
82
- GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
83
- GGML_BACKEND_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
84
- GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
85
- GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
86
 
87
  // ggml_graph_plan() has to be called before ggml_graph_compute()
88
  // when plan.work_size > 0, caller must allocate memory for plan.work_data
@@ -104,10 +78,10 @@ extern "C" {
104
  GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
105
  GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
106
  GGML_BACKEND_API int ggml_cpu_has_avx (void);
 
107
  GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
108
  GGML_BACKEND_API int ggml_cpu_has_f16c (void);
109
  GGML_BACKEND_API int ggml_cpu_has_fma (void);
110
- GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
111
  GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
112
  GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
113
  GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
 
7
  extern "C" {
8
  #endif
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  // the compute plan that needs to be prepared for ggml_graph_compute()
11
  // since https://github.com/ggerganov/ggml/issues/287
12
  struct ggml_cplan {
 
52
  GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
53
  GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
54
 
55
+ GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
56
+ GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
57
+ GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
58
+ GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
59
+ GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
 
 
 
60
 
61
  // ggml_graph_plan() has to be called before ggml_graph_compute()
62
  // when plan.work_size > 0, caller must allocate memory for plan.work_data
 
78
  GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
79
  GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
80
  GGML_BACKEND_API int ggml_cpu_has_avx (void);
81
+ GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
82
  GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
83
  GGML_BACKEND_API int ggml_cpu_has_f16c (void);
84
  GGML_BACKEND_API int ggml_cpu_has_fma (void);
 
85
  GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
86
  GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
87
  GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
ggml/include/ggml.h CHANGED
@@ -2215,6 +2215,37 @@ extern "C" {
2215
 
2216
  GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
2217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2218
  #ifdef __cplusplus
2219
  }
2220
  #endif
 
2215
 
2216
  GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
2217
 
2218
+ // ggml threadpool
2219
+ // TODO: currently, only a few functions are in the base ggml API, while the rest are in the CPU backend
2220
+ // the goal should be to create an API that other backends can use move everything to the ggml base
2221
+
2222
+ // scheduling priorities
2223
+ enum ggml_sched_priority {
2224
+ GGML_SCHED_PRIO_NORMAL,
2225
+ GGML_SCHED_PRIO_MEDIUM,
2226
+ GGML_SCHED_PRIO_HIGH,
2227
+ GGML_SCHED_PRIO_REALTIME
2228
+ };
2229
+
2230
+ // threadpool params
2231
+ // Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
2232
+ struct ggml_threadpool_params {
2233
+ bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
2234
+ int n_threads; // number of threads
2235
+ enum ggml_sched_priority prio; // thread priority
2236
+ uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
2237
+ bool strict_cpu; // strict cpu placement
2238
+ bool paused; // start in paused state
2239
+ };
2240
+
2241
+ struct ggml_threadpool; // forward declaration, see ggml.c
2242
+
2243
+ typedef struct ggml_threadpool * ggml_threadpool_t;
2244
+
2245
+ GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
2246
+ GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
2247
+ GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
2248
+
2249
  #ifdef __cplusplus
2250
  }
2251
  #endif
ggml/src/CMakeLists.txt CHANGED
@@ -202,6 +202,10 @@ endif()
202
 
203
  # ggml
204
 
 
 
 
 
205
  add_library(ggml-base
206
  ../include/ggml.h
207
  ../include/ggml-alloc.h
@@ -226,6 +230,31 @@ add_library(ggml
226
 
227
  target_link_libraries(ggml PUBLIC ggml-base)
228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  function(ggml_add_backend backend)
230
  string(TOUPPER "GGML_${backend}" backend_id)
231
  if (${backend_id})
@@ -236,14 +265,10 @@ function(ggml_add_backend backend)
236
  # however, currently it is necessary for AMX, since it is enabled by default on llama.cpp
237
  if (${backend_id})
238
  message(STATUS "Including ${backend} backend")
239
- if (${BUILD_SHARED_LIBS})
240
- target_compile_definitions(${backend_target} PRIVATE GGML_BACKEND_BUILD)
241
- target_compile_definitions(${backend_target} PUBLIC GGML_BACKEND_SHARED)
242
  endif()
243
- install(TARGETS ${backend_target} LIBRARY)
244
- target_link_libraries(ggml PUBLIC ${backend_target})
245
- string(TOUPPER "GGML_USE_${backend}" backend_use)
246
- target_compile_definitions(ggml PUBLIC ${backend_use})
247
  endif()
248
  endif()
249
  endfunction()
@@ -256,10 +281,10 @@ ggml_add_backend(CUDA)
256
  ggml_add_backend(HIP)
257
  ggml_add_backend(Kompute)
258
  ggml_add_backend(METAL)
 
259
  ggml_add_backend(RPC)
260
  ggml_add_backend(SYCL)
261
  ggml_add_backend(Vulkan)
262
- ggml_add_backend(MUSA)
263
 
264
  foreach (target ggml-base ggml)
265
  target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
 
202
 
203
  # ggml
204
 
205
+ if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS)
206
+ message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS")
207
+ endif()
208
+
209
  add_library(ggml-base
210
  ../include/ggml.h
211
  ../include/ggml-alloc.h
 
230
 
231
  target_link_libraries(ggml PUBLIC ggml-base)
232
 
233
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
234
+ target_link_libraries(ggml PRIVATE dl)
235
+ endif()
236
+
237
+ function(ggml_add_backend_library backend)
238
+ if (GGML_BACKEND_DL)
239
+ add_library(${backend} MODULE ${ARGN})
240
+ # write the shared library to the output directory
241
+ set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
242
+ target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
243
+ else()
244
+ add_library(${backend} ${ARGN})
245
+ target_link_libraries(ggml PUBLIC ${backend})
246
+ install(TARGETS ${backend} LIBRARY)
247
+ endif()
248
+
249
+ target_link_libraries(${backend} PRIVATE ggml-base)
250
+ target_include_directories(${backend} PRIVATE ..)
251
+
252
+ if (${BUILD_SHARED_LIBS})
253
+ target_compile_definitions(${backend} PRIVATE GGML_BACKEND_BUILD)
254
+ target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED)
255
+ endif()
256
+ endfunction()
257
+
258
  function(ggml_add_backend backend)
259
  string(TOUPPER "GGML_${backend}" backend_id)
260
  if (${backend_id})
 
265
  # however, currently it is necessary for AMX, since it is enabled by default on llama.cpp
266
  if (${backend_id})
267
  message(STATUS "Including ${backend} backend")
268
+ if (NOT GGML_BACKEND_DL)
269
+ string(TOUPPER "GGML_USE_${backend}" backend_use)
270
+ target_compile_definitions(ggml PUBLIC ${backend_use})
271
  endif()
 
 
 
 
272
  endif()
273
  endif()
274
  endfunction()
 
281
  ggml_add_backend(HIP)
282
  ggml_add_backend(Kompute)
283
  ggml_add_backend(METAL)
284
+ ggml_add_backend(MUSA)
285
  ggml_add_backend(RPC)
286
  ggml_add_backend(SYCL)
287
  ggml_add_backend(Vulkan)
 
288
 
289
  foreach (target ggml-base ggml)
290
  target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
ggml/src/ggml-backend-impl.h CHANGED
@@ -8,6 +8,8 @@
8
  extern "C" {
9
  #endif
10
 
 
 
11
  //
12
  // Backend buffer type
13
  //
@@ -63,20 +65,20 @@ extern "C" {
63
  enum ggml_backend_buffer_usage usage;
64
  };
65
 
66
- ggml_backend_buffer_t ggml_backend_buffer_init(
67
  ggml_backend_buffer_type_t buft,
68
  struct ggml_backend_buffer_i iface,
69
  void * context,
70
  size_t size);
71
 
72
  // do not use directly, use ggml_backend_tensor_copy instead
73
- bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
74
 
75
  // multi-buffer
76
  // buffer that contains a collection of buffers
77
- ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
78
- bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
79
- void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
80
 
81
  //
82
  // Backend (stream)
@@ -199,17 +201,37 @@ extern "C" {
199
  };
200
 
201
  struct ggml_backend_reg {
202
- // int api_version; // TODO: for dynamic loading
203
  struct ggml_backend_reg_i iface;
204
  void * context;
205
  };
206
 
207
-
208
  // Internal backend registry API
209
- void ggml_backend_register(ggml_backend_reg_t reg);
210
- void ggml_backend_device_register(ggml_backend_dev_t device);
211
- // TODO: backends can be loaded as a dynamic library, in which case it needs to export this function
212
- // typedef ggml_backend_register_t * (*ggml_backend_init)(void);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
  #ifdef __cplusplus
215
  }
 
8
  extern "C" {
9
  #endif
10
 
11
+ #define GGML_BACKEND_API_VERSION 1
12
+
13
  //
14
  // Backend buffer type
15
  //
 
65
  enum ggml_backend_buffer_usage usage;
66
  };
67
 
68
+ GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
69
  ggml_backend_buffer_type_t buft,
70
  struct ggml_backend_buffer_i iface,
71
  void * context,
72
  size_t size);
73
 
74
  // do not use directly, use ggml_backend_tensor_copy instead
75
+ GGML_API bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
76
 
77
  // multi-buffer
78
  // buffer that contains a collection of buffers
79
+ GGML_API ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
80
+ GGML_API bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
81
+ GGML_API void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
82
 
83
  //
84
  // Backend (stream)
 
201
  };
202
 
203
  struct ggml_backend_reg {
204
+ int api_version; // initialize to GGML_BACKEND_API_VERSION
205
  struct ggml_backend_reg_i iface;
206
  void * context;
207
  };
208
 
 
209
  // Internal backend registry API
210
+ GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
211
+ GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
212
+
213
+ // Add backend dynamic loading support to the backend
214
+ typedef ggml_backend_reg_t (*ggml_backend_init_t)(void);
215
+
216
+ #ifdef GGML_BACKEND_DL
217
+ #ifdef __cplusplus
218
+ # define GGML_BACKEND_DL_IMPL(reg_fn) \
219
+ extern "C" { \
220
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
221
+ } \
222
+ ggml_backend_reg_t ggml_backend_init(void) { \
223
+ return reg_fn(); \
224
+ }
225
+ #else
226
+ # define GGML_BACKEND_DL_IMPL(reg_fn) \
227
+ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
228
+ ggml_backend_reg_t ggml_backend_init(void) { \
229
+ return reg_fn(); \
230
+ }
231
+ #endif
232
+ #else
233
+ # define GGML_BACKEND_DL_IMPL(reg_fn)
234
+ #endif
235
 
236
  #ifdef __cplusplus
237
  }
ggml/src/ggml-backend-reg.cpp CHANGED
@@ -1,11 +1,29 @@
1
  #include "ggml-backend-impl.h"
2
  #include "ggml-backend.h"
3
- #include "ggml-cpu.h"
4
  #include "ggml-impl.h"
 
5
  #include <cstring>
 
6
  #include <vector>
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  // Backend registry
 
 
 
9
 
10
  #ifdef GGML_USE_CUDA
11
  #include "ggml-cuda.h"
@@ -43,8 +61,13 @@
43
  #include "ggml-kompute.h"
44
  #endif
45
 
 
 
 
 
 
46
  struct ggml_backend_registry {
47
- std::vector<ggml_backend_reg_t> backends;
48
  std::vector<ggml_backend_dev_t> devices;
49
 
50
  ggml_backend_registry() {
@@ -75,11 +98,19 @@ struct ggml_backend_registry {
75
  #ifdef GGML_USE_KOMPUTE
76
  register_backend(ggml_backend_kompute_reg());
77
  #endif
78
-
79
  register_backend(ggml_backend_cpu_reg());
 
80
  }
81
 
82
- void register_backend(ggml_backend_reg_t reg) {
 
 
 
 
 
 
 
83
  if (!reg) {
84
  return;
85
  }
@@ -88,7 +119,7 @@ struct ggml_backend_registry {
88
  GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
89
  __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
90
  #endif
91
- backends.push_back(reg);
92
  for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
93
  register_device(ggml_backend_reg_dev_get(reg, i));
94
  }
@@ -100,6 +131,111 @@ struct ggml_backend_registry {
100
  #endif
101
  devices.push_back(device);
102
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  };
104
 
105
  static ggml_backend_registry & get_reg() {
@@ -123,7 +259,7 @@ size_t ggml_backend_reg_count() {
123
 
124
  ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
125
  GGML_ASSERT(index < ggml_backend_reg_count());
126
- return get_reg().backends[index];
127
  }
128
 
129
  ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
@@ -133,7 +269,7 @@ ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
133
  return reg;
134
  }
135
  }
136
- return NULL;
137
  }
138
 
139
  // Device enumeration
@@ -153,7 +289,7 @@ ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
153
  return dev;
154
  }
155
  }
156
- return NULL;
157
  }
158
 
159
  ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
@@ -163,14 +299,14 @@ ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
163
  return dev;
164
  }
165
  }
166
- return NULL;
167
  }
168
 
169
  // Convenience functions
170
  ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
171
  ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
172
  if (!dev) {
173
- return NULL;
174
  }
175
  return ggml_backend_dev_init(dev, params);
176
  }
@@ -178,7 +314,7 @@ ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params)
178
  ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
179
  ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
180
  if (!dev) {
181
- return NULL;
182
  }
183
  return ggml_backend_dev_init(dev, params);
184
  }
@@ -189,7 +325,97 @@ ggml_backend_t ggml_backend_init_best(void) {
189
  dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
190
  }
191
  if (!dev) {
192
- return NULL;
193
  }
194
- return ggml_backend_dev_init(dev, NULL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  }
 
1
  #include "ggml-backend-impl.h"
2
  #include "ggml-backend.h"
 
3
  #include "ggml-impl.h"
4
+ #include <algorithm>
5
  #include <cstring>
6
+ #include <string>
7
  #include <vector>
8
 
9
+ #ifdef _WIN32
10
+ # define WIN32_LEAN_AND_MEAN
11
+ # ifndef NOMINMAX
12
+ # define NOMINMAX
13
+ # endif
14
+ # include <windows.h>
15
+ #elif defined(__APPLE__)
16
+ # include <mach-o/dyld.h>
17
+ # include <dlfcn.h>
18
+ #else
19
+ # include <dlfcn.h>
20
+ # include <unistd.h>
21
+ #endif
22
+
23
  // Backend registry
24
+ #ifdef GGML_USE_CPU
25
+ #include "ggml-cpu.h"
26
+ #endif
27
 
28
  #ifdef GGML_USE_CUDA
29
  #include "ggml-cuda.h"
 
61
  #include "ggml-kompute.h"
62
  #endif
63
 
64
+ struct ggml_backend_reg_entry {
65
+ ggml_backend_reg_t reg;
66
+ void * handle;
67
+ };
68
+
69
  struct ggml_backend_registry {
70
+ std::vector<ggml_backend_reg_entry> backends;
71
  std::vector<ggml_backend_dev_t> devices;
72
 
73
  ggml_backend_registry() {
 
98
  #ifdef GGML_USE_KOMPUTE
99
  register_backend(ggml_backend_kompute_reg());
100
  #endif
101
+ #ifdef GGML_USE_CPU
102
  register_backend(ggml_backend_cpu_reg());
103
+ #endif
104
  }
105
 
106
+ ~ggml_backend_registry() {
107
+ while (!backends.empty()) {
108
+ // use silent since the log system may have been destroyed at this point
109
+ unload_backend(backends.back().reg, true);
110
+ }
111
+ }
112
+
113
+ void register_backend(ggml_backend_reg_t reg, void * handle = nullptr) {
114
  if (!reg) {
115
  return;
116
  }
 
119
  GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
120
  __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
121
  #endif
122
+ backends.push_back({ reg, handle });
123
  for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
124
  register_device(ggml_backend_reg_dev_get(reg, i));
125
  }
 
131
  #endif
132
  devices.push_back(device);
133
  }
134
+
135
+ ggml_backend_reg_t load_backend(const char * path, bool silent) {
136
+ #ifdef _WIN32
137
+ // suppress error dialogs for missing DLLs
138
+ DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
139
+ SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
140
+
141
+ HMODULE handle = LoadLibraryA(path);
142
+
143
+ if (!handle) {
144
+ if (!silent) {
145
+ GGML_LOG_ERROR("%s: failed to load %s: %lu\n", __func__, path, GetLastError());
146
+ }
147
+ SetErrorMode(old_mode);
148
+ return nullptr;
149
+ }
150
+
151
+ ggml_backend_init_t backend_init = (ggml_backend_init_t) GetProcAddress(handle, "ggml_backend_init");
152
+
153
+ SetErrorMode(old_mode);
154
+
155
+ if (!backend_init) {
156
+ if (!silent) {
157
+ GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s: %lu\n", __func__, path, GetLastError());
158
+ }
159
+ FreeLibrary(handle);
160
+ return nullptr;
161
+ }
162
+ #else
163
+ void * handle = dlopen(path, RTLD_NOW | RTLD_LOCAL);
164
+
165
+ if (!handle) {
166
+ if (!silent) {
167
+ GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path, dlerror());
168
+ }
169
+ return nullptr;
170
+ }
171
+
172
+ auto * backend_init = (ggml_backend_init_t) dlsym(handle, "ggml_backend_init");
173
+
174
+ if (!backend_init) {
175
+ if (!silent) {
176
+ GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s: %s\n", __func__, path, dlerror());
177
+ }
178
+ dlclose(handle);
179
+ return nullptr;
180
+ }
181
+ #endif
182
+ ggml_backend_reg_t reg = backend_init();
183
+
184
+ if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
185
+ if (!silent) {
186
+ if (!reg) {
187
+ GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path);
188
+ } else {
189
+ GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
190
+ __func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
191
+ }
192
+ }
193
+ #ifdef _WIN32
194
+ FreeLibrary(handle);
195
+ #else
196
+ dlclose(handle);
197
+ #endif
198
+ return nullptr;
199
+ }
200
+
201
+ GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
202
+ register_backend(reg, handle);
203
+ return reg;
204
+ }
205
+
206
+ void unload_backend(ggml_backend_reg_t reg, bool silent) {
207
+ auto it = std::find_if(backends.begin(), backends.end(),
208
+ [reg](ggml_backend_reg_entry entry) { return entry.reg == reg; });
209
+
210
+ if (it == backends.end()) {
211
+ if (!silent) {
212
+ GGML_LOG_ERROR("%s: backend not found\n", __func__);
213
+ }
214
+ return;
215
+ }
216
+
217
+ if (!silent) {
218
+ GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
219
+ }
220
+
221
+ // remove devices
222
+ devices.erase(
223
+ std::remove_if(devices.begin(), devices.end(),
224
+ [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
225
+ devices.end());
226
+
227
+ // unload library
228
+ if (it->handle) {
229
+ #ifdef _WIN32
230
+ FreeLibrary((HMODULE) it->handle);
231
+ #else
232
+ dlclose(it->handle);
233
+ #endif
234
+ }
235
+
236
+ // remove backend
237
+ backends.erase(it);
238
+ }
239
  };
240
 
241
  static ggml_backend_registry & get_reg() {
 
259
 
260
  ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
261
  GGML_ASSERT(index < ggml_backend_reg_count());
262
+ return get_reg().backends[index].reg;
263
  }
264
 
265
  ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
 
269
  return reg;
270
  }
271
  }
272
+ return nullptr;
273
  }
274
 
275
  // Device enumeration
 
289
  return dev;
290
  }
291
  }
292
+ return nullptr;
293
  }
294
 
295
  ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
 
299
  return dev;
300
  }
301
  }
302
+ return nullptr;
303
  }
304
 
305
  // Convenience functions
306
  ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
307
  ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
308
  if (!dev) {
309
+ return nullptr;
310
  }
311
  return ggml_backend_dev_init(dev, params);
312
  }
 
314
  ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
315
  ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
316
  if (!dev) {
317
+ return nullptr;
318
  }
319
  return ggml_backend_dev_init(dev, params);
320
  }
 
325
  dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
326
  }
327
  if (!dev) {
328
+ return nullptr;
329
  }
330
+ return ggml_backend_dev_init(dev, nullptr);
331
+ }
332
+
333
+ // Dynamic loading
334
+ ggml_backend_reg_t ggml_backend_load(const char * path) {
335
+ return get_reg().load_backend(path, false);
336
+ }
337
+
338
+ void ggml_backend_unload(ggml_backend_reg_t reg) {
339
+ get_reg().unload_backend(reg, true);
340
+ }
341
+
342
+ void ggml_backend_load_all() {
343
+ std::vector<std::string> search_prefix;
344
+
345
+ // add the executable directory to the search path
346
+ // FIXME: this is convenient for development, but it should probably be disabled in production
347
+
348
+ #if defined(__APPLE__)
349
+ // get executable path
350
+ std::vector<char> path;
351
+ uint32_t size;
352
+ while (true) {
353
+ size = path.size();
354
+ if (_NSGetExecutablePath(path.data(), &size) == 0) {
355
+ break;
356
+ }
357
+ path.resize(size);
358
+ }
359
+ std::string base_path(path.data(), size);
360
+ // remove executable name
361
+ auto last_slash = base_path.find_last_of('/');
362
+ if (last_slash != std::string::npos) {
363
+ base_path = base_path.substr(0, last_slash);
364
+ }
365
+ search_prefix.push_back(base_path + "/");
366
+ #elif defined(__linux__)
367
+ std::string base_path = ".";
368
+ std::vector<char> path(1024);
369
+ while (true) {
370
+ // get executable path
371
+ ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
372
+ if (len == -1) {
373
+ break;
374
+ }
375
+ if (len < (ssize_t) path.size()) {
376
+ base_path = std::string(path.data(), len);
377
+ // remove executable name
378
+ auto last_slash = base_path.find_last_of('/');
379
+ if (last_slash != std::string::npos) {
380
+ base_path = base_path.substr(0, last_slash);
381
+ }
382
+ break;
383
+ }
384
+ path.resize(path.size() * 2);
385
+ }
386
+
387
+ search_prefix.push_back(base_path + "/");
388
+ #endif
389
+
390
+ auto & reg = get_reg();
391
+
392
+ auto try_load = [&](const std::string & name) {
393
+ std::string os_name;
394
+ #ifdef _WIN32
395
+ os_name = "ggml-" + name + ".dll";
396
+ #else
397
+ os_name = "libggml-" + name + ".so";
398
+ #endif
399
+ if (reg.load_backend(os_name.c_str(), true)) {
400
+ return;
401
+ }
402
+ for (const auto & prefix : search_prefix) {
403
+ if (reg.load_backend((prefix + os_name).c_str(), true)) {
404
+ return;
405
+ }
406
+ }
407
+ };
408
+
409
+ try_load("amx");
410
+ try_load("blas");
411
+ try_load("cann");
412
+ try_load("cuda");
413
+ try_load("hip");
414
+ try_load("kompute");
415
+ try_load("metal");
416
+ try_load("rpc");
417
+ try_load("sycl");
418
+ try_load("vulkan");
419
+ try_load("musa");
420
+ try_load("cpu");
421
  }
ggml/src/ggml-blas/CMakeLists.txt CHANGED
@@ -11,12 +11,9 @@ find_package(BLAS)
11
  if (BLAS_FOUND)
12
  message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
13
 
14
- add_library(ggml-blas
15
- ggml-blas.cpp
16
- )
17
-
18
- target_link_libraries(ggml-blas PRIVATE ggml-base)
19
- target_include_directories(ggml-blas PRIVATE . ..)
20
 
21
  if (${GGML_BLAS_VENDOR} MATCHES "Apple")
22
  add_compile_definitions(ACCELERATE_NEW_LAPACK)
 
11
  if (BLAS_FOUND)
12
  message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
13
 
14
+ ggml_add_backend_library(ggml-blas
15
+ ggml-blas.cpp
16
+ )
 
 
 
17
 
18
  if (${GGML_BLAS_VENDOR} MATCHES "Apple")
19
  add_compile_definitions(ACCELERATE_NEW_LAPACK)
ggml/src/ggml-blas/ggml-blas.cpp CHANGED
@@ -506,9 +506,12 @@ static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = {
506
 
507
  ggml_backend_reg_t ggml_backend_blas_reg(void) {
508
  static struct ggml_backend_reg ggml_backend_blas_reg = {
509
- /* .iface = */ ggml_backend_blas_reg_i,
510
- /* .context = */ NULL,
 
511
  };
512
 
513
  return &ggml_backend_blas_reg;
514
  }
 
 
 
506
 
507
  ggml_backend_reg_t ggml_backend_blas_reg(void) {
508
  static struct ggml_backend_reg ggml_backend_blas_reg = {
509
+ /* .api_version = */ GGML_BACKEND_API_VERSION,
510
+ /* .iface = */ ggml_backend_blas_reg_i,
511
+ /* .context = */ NULL,
512
  };
513
 
514
  return &ggml_backend_blas_reg;
515
  }
516
+
517
+ GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg)
ggml/src/ggml-cann/CMakeLists.txt CHANGED
@@ -61,9 +61,9 @@ if (CANN_INSTALL_DIR)
61
 
62
  file(GLOB GGML_SOURCES_CANN "*.cpp")
63
 
64
- add_library(ggml-cann ${GGML_SOURCES_CANN})
65
- target_link_libraries(ggml-cann PRIVATE ggml-base ${CANN_LIBRARIES})
66
- target_include_directories(ggml-cann PRIVATE . .. ${CANN_INCLUDE_DIRS})
67
  target_link_directories(ggml-cann PRIVATE ${CANN_INSTALL_DIR}/lib64)
68
 
69
  target_compile_definitions(ggml-cann PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
 
61
 
62
  file(GLOB GGML_SOURCES_CANN "*.cpp")
63
 
64
+ ggml_add_backend_library(ggml-cann ${GGML_SOURCES_CANN})
65
+ target_link_libraries(ggml-cann PRIVATE ${CANN_LIBRARIES})
66
+ target_include_directories(ggml-cann PRIVATE ${CANN_INCLUDE_DIRS})
67
  target_link_directories(ggml-cann PRIVATE ${CANN_INSTALL_DIR}/lib64)
68
 
69
  target_compile_definitions(ggml-cann PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
ggml/src/ggml-cann/ggml-cann.cpp CHANGED
@@ -2064,16 +2064,17 @@ ggml_backend_reg_t ggml_backend_cann_reg() {
2064
  dev_ctx->name = GGML_CANN_NAME + std::to_string(i);
2065
  ggml_cann_set_device(i);
2066
  ggml_backend_dev_t dev = new ggml_backend_device {
2067
- /* .interface = */ ggml_backend_cann_device_interface,
2068
- /* .reg = */ &reg,
2069
- /* .context = */ dev_ctx
2070
  };
2071
  ctx->devices.push_back(dev);
2072
  }
2073
 
2074
  reg = ggml_backend_reg {
2075
- /* .interface = */ ggml_backend_cann_reg_interface,
2076
- /* .context = */ ctx
 
2077
  };
2078
  }
2079
 
@@ -2126,3 +2127,5 @@ void ggml_backend_cann_get_device_memory(int32_t device, size_t* free,
2126
  ggml_cann_set_device(device);
2127
  ACL_CHECK(aclrtGetMemInfo(ACL_HBM_MEM, free, total));
2128
  }
 
 
 
2064
  dev_ctx->name = GGML_CANN_NAME + std::to_string(i);
2065
  ggml_cann_set_device(i);
2066
  ggml_backend_dev_t dev = new ggml_backend_device {
2067
+ /* .iface = */ ggml_backend_cann_device_interface,
2068
+ /* .reg = */ &reg,
2069
+ /* .context = */ dev_ctx
2070
  };
2071
  ctx->devices.push_back(dev);
2072
  }
2073
 
2074
  reg = ggml_backend_reg {
2075
+ /* .api_version = */ GGML_BACKEND_API_VERSION,
2076
+ /* .iface = */ ggml_backend_cann_reg_interface,
2077
+ /* .context = */ ctx
2078
  };
2079
  }
2080
 
 
2127
  ggml_cann_set_device(device);
2128
  ACL_CHECK(aclrtGetMemInfo(ACL_HBM_MEM, free, total));
2129
  }
2130
+
2131
+ GGML_BACKEND_DL_IMPL(ggml_backend_cann_reg)
ggml/src/ggml-cpu/CMakeLists.txt CHANGED
@@ -1,14 +1,13 @@
1
- add_library(ggml-cpu
2
- ggml-cpu.c
3
- ggml-cpu.cpp
4
- ggml-cpu-aarch64.c
5
- ggml-cpu-aarch64.h
6
- ggml-cpu-quants.c
7
- ggml-cpu-quants.h
8
- )
9
 
10
- target_link_libraries(ggml-cpu PRIVATE ggml-base)
11
- target_include_directories(ggml-cpu PRIVATE . ..)
12
 
13
  if (APPLE AND GGML_ACCELERATE)
14
  find_library(ACCELERATE_FRAMEWORK Accelerate)
 
1
+ ggml_add_backend_library(ggml-cpu
2
+ ggml-cpu.c
3
+ ggml-cpu.cpp
4
+ ggml-cpu-aarch64.c
5
+ ggml-cpu-aarch64.h
6
+ ggml-cpu-quants.c
7
+ ggml-cpu-quants.h
8
+ )
9
 
10
+ target_include_directories(ggml-cpu PRIVATE .)
 
11
 
12
  if (APPLE AND GGML_ACCELERATE)
13
  find_library(ACCELERATE_FRAMEWORK Accelerate)
ggml/src/ggml-cpu/ggml-cpu.c CHANGED
@@ -13578,29 +13578,6 @@ static void ggml_graph_compute_kickoff(struct ggml_threadpool * threadpool, int
13578
 
13579
  #endif // GGML_USE_OPENMP
13580
 
13581
- void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
13582
- p->n_threads = n_threads;
13583
- p->prio = 0; // default priority (usually means normal or inherited)
13584
- p->poll = 50; // hybrid-polling enabled
13585
- p->strict_cpu = false; // no strict placement (all threads share same cpumask)
13586
- p->paused = false; // threads are ready to go
13587
- memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
13588
- }
13589
-
13590
- struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
13591
- struct ggml_threadpool_params p;
13592
- ggml_threadpool_params_init(&p, n_threads);
13593
- return p;
13594
- }
13595
-
13596
- bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
13597
- if (p0->n_threads != p1->n_threads ) return false;
13598
- if (p0->prio != p1->prio ) return false;
13599
- if (p0->poll != p1->poll ) return false;
13600
- if (p0->strict_cpu != p1->strict_cpu ) return false;
13601
- return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
13602
- }
13603
-
13604
  static struct ggml_threadpool * ggml_threadpool_new_impl(
13605
  struct ggml_threadpool_params * tpp,
13606
  struct ggml_cgraph * cgraph,
 
13578
 
13579
  #endif // GGML_USE_OPENMP
13580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13581
  static struct ggml_threadpool * ggml_threadpool_new_impl(
13582
  struct ggml_threadpool_params * tpp,
13583
  struct ggml_cgraph * cgraph,
ggml/src/ggml-cpu/ggml-cpu.cpp CHANGED
@@ -541,16 +541,12 @@ static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg
541
  return &ggml_backend_cpu_device;
542
  }
543
 
544
- struct ggml_backend_feature {
545
- const char * name;
546
- const char * value;
547
- };
548
-
549
- // Not used yet
550
  // This is intended to replace the the ggml_cpu_has_* functions when loading the CPU backend dynamically,
551
- // and additionally to allow other backends to expose their own list of features that applications can query using the same API.
552
  static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t reg) {
553
  static std::vector<ggml_backend_feature> features = []() {
 
 
554
  std::vector<ggml_backend_feature> features;
555
  if (ggml_cpu_has_sse3()) {
556
  features.push_back({ "SSE3", "1" });
@@ -561,6 +557,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
561
  if (ggml_cpu_has_avx()) {
562
  features.push_back({ "AVX", "1" });
563
  }
 
 
 
564
  if (ggml_cpu_has_avx2()) {
565
  features.push_back({ "AVX2", "1" });
566
  }
@@ -570,9 +569,6 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
570
  if (ggml_cpu_has_fma()) {
571
  features.push_back({ "FMA", "1" });
572
  }
573
- if (ggml_cpu_has_avx_vnni()) {
574
- features.push_back({ "AVX_VNNI", "1" });
575
- }
576
  if (ggml_cpu_has_avx512()) {
577
  features.push_back({ "AVX512", "1" });
578
  }
@@ -619,6 +615,10 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
619
  if (ggml_cpu_has_llamafile()) {
620
  features.push_back({ "LLAMAFILE", "1" });
621
  }
 
 
 
 
622
 
623
  features.push_back({ nullptr, nullptr });
624
 
@@ -637,6 +637,29 @@ static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const ch
637
  if (strcmp(name, "ggml_backend_dev_get_extra_bufts") == 0) {
638
  return (void *)ggml_backend_cpu_get_extra_bufts;
639
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640
 
641
  return NULL;
642
 
@@ -655,9 +678,12 @@ ggml_backend_reg_t ggml_backend_cpu_reg(void) {
655
  ggml_cpu_init();
656
 
657
  static struct ggml_backend_reg ggml_backend_cpu_reg = {
658
- /* .iface = */ ggml_backend_cpu_reg_i,
659
- /* .context = */ NULL,
 
660
  };
661
 
662
  return &ggml_backend_cpu_reg;
663
  }
 
 
 
541
  return &ggml_backend_cpu_device;
542
  }
543
 
 
 
 
 
 
 
544
  // This is intended to replace the the ggml_cpu_has_* functions when loading the CPU backend dynamically,
545
+ // and additionally to allow other backends to expose their own list of features that applications can query using the same API
546
  static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t reg) {
547
  static std::vector<ggml_backend_feature> features = []() {
548
+ ggml_cpu_init();
549
+
550
  std::vector<ggml_backend_feature> features;
551
  if (ggml_cpu_has_sse3()) {
552
  features.push_back({ "SSE3", "1" });
 
557
  if (ggml_cpu_has_avx()) {
558
  features.push_back({ "AVX", "1" });
559
  }
560
+ if (ggml_cpu_has_avx_vnni()) {
561
+ features.push_back({ "AVX_VNNI", "1" });
562
+ }
563
  if (ggml_cpu_has_avx2()) {
564
  features.push_back({ "AVX2", "1" });
565
  }
 
569
  if (ggml_cpu_has_fma()) {
570
  features.push_back({ "FMA", "1" });
571
  }
 
 
 
572
  if (ggml_cpu_has_avx512()) {
573
  features.push_back({ "AVX512", "1" });
574
  }
 
615
  if (ggml_cpu_has_llamafile()) {
616
  features.push_back({ "LLAMAFILE", "1" });
617
  }
618
+ // TODO: rename this
619
+ #ifdef GGML_USE_CPU_AARCH64
620
+ features.push_back({ "AARCH64_REPACK", "1" });
621
+ #endif
622
 
623
  features.push_back({ nullptr, nullptr });
624
 
 
637
  if (strcmp(name, "ggml_backend_dev_get_extra_bufts") == 0) {
638
  return (void *)ggml_backend_cpu_get_extra_bufts;
639
  }
640
+ if (strcmp(name, "ggml_backend_get_features") == 0) {
641
+ return (void *)ggml_backend_cpu_get_features;
642
+ }
643
+ if (strcmp(name, "ggml_backend_set_abort_callback") == 0) {
644
+ return (void *)ggml_backend_cpu_set_abort_callback;
645
+ }
646
+ if (strcmp(name, "ggml_backend_cpu_numa_init") == 0) {
647
+ return (void *)ggml_numa_init;
648
+ }
649
+ if (strcmp(name, "ggml_backend_cpu_is_numa") == 0) {
650
+ return (void *)ggml_is_numa;
651
+ }
652
+
653
+ // threadpool - TODO: move to ggml-base
654
+ if (strcmp(name, "ggml_threadpool_new") == 0) {
655
+ return (void *)ggml_threadpool_new;
656
+ }
657
+ if (strcmp(name, "ggml_threadpool_free") == 0) {
658
+ return (void *)ggml_threadpool_free;
659
+ }
660
+ if (strcmp(name, "ggml_backend_cpu_set_threadpool") == 0) {
661
+ return (void *)ggml_backend_cpu_set_threadpool;
662
+ }
663
 
664
  return NULL;
665
 
 
678
  ggml_cpu_init();
679
 
680
  static struct ggml_backend_reg ggml_backend_cpu_reg = {
681
+ /* .api_version = */ GGML_BACKEND_API_VERSION,
682
+ /* .iface = */ ggml_backend_cpu_reg_i,
683
+ /* .context = */ NULL,
684
  };
685
 
686
  return &ggml_backend_cpu_reg;
687
  }
688
+
689
+ GGML_BACKEND_DL_IMPL(ggml_backend_cpu_reg)
ggml/src/ggml-cuda/ggml-cuda.cu CHANGED
@@ -3126,6 +3126,61 @@ static ggml_backend_dev_t ggml_backend_cuda_reg_get_device(ggml_backend_reg_t re
3126
  return ctx->devices[index];
3127
  }
3128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3129
  static void * ggml_backend_cuda_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
3130
  GGML_UNUSED(reg);
3131
  if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
@@ -3137,6 +3192,9 @@ static void * ggml_backend_cuda_reg_get_proc_address(ggml_backend_reg_t reg, con
3137
  if (strcmp(name, "ggml_backend_unregister_host_buffer") == 0) {
3138
  return (void *)ggml_backend_cuda_unregister_host_buffer;
3139
  }
 
 
 
3140
  return nullptr;
3141
  }
3142
 
@@ -3169,16 +3227,17 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
3169
  dev_ctx->description = prop.name;
3170
 
3171
  ggml_backend_dev_t dev = new ggml_backend_device {
3172
- /* .interface = */ ggml_backend_cuda_device_interface,
3173
- /* .reg = */ &reg,
3174
- /* .context = */ dev_ctx
3175
  };
3176
  ctx->devices.push_back(dev);
3177
  }
3178
 
3179
  reg = ggml_backend_reg {
3180
- /* .interface = */ ggml_backend_cuda_reg_interface,
3181
- /* .context = */ ctx
 
3182
  };
3183
  }
3184
 
@@ -3209,3 +3268,5 @@ ggml_backend_t ggml_backend_cuda_init(int device) {
3209
 
3210
  return cuda_backend;
3211
  }
 
 
 
3126
  return ctx->devices[index];
3127
  }
3128
 
3129
+ static ggml_backend_feature * ggml_backend_cuda_get_features(ggml_backend_reg_t reg) {
3130
+ static std::vector<ggml_backend_feature> features = []() {
3131
+ std::vector<ggml_backend_feature> features;
3132
+ #define _STRINGIFY(...) #__VA_ARGS__
3133
+ #define STRINGIFY(...) _STRINGIFY(__VA_ARGS__)
3134
+
3135
+ #ifdef __CUDA_ARCH_LIST__
3136
+ features.push_back({ "ARCHS", STRINGIFY(__CUDA_ARCH_LIST__) });
3137
+ #endif
3138
+
3139
+ #ifdef GGML_CUDA_FORCE_MMQ
3140
+ features.push_back({ "FORCE_MMQ", "1" });
3141
+ #endif
3142
+
3143
+ #ifdef GGML_CUDA_FORCE_CUBLAS
3144
+ features.push_back({ "FORCE_CUBLAS", "1" });
3145
+ #endif
3146
+
3147
+ #ifdef GGML_CUDA_NO_VMM
3148
+ features.push_back({ "NO_VMM", "1" });
3149
+ #endif
3150
+
3151
+ #ifdef GGML_CUDA_NO_PEER_COPY
3152
+ features.push_back({ "NO_PEER_COPY", "1" });
3153
+ #endif
3154
+
3155
+ #ifdef GGML_CUDA_F16
3156
+ features.push_back({ "F16", "1" });
3157
+ #endif
3158
+
3159
+ #ifdef GGML_CUDA_USE_GRAPHS
3160
+ features.push_back({ "USE_GRAPHS", "1" });
3161
+ #endif
3162
+
3163
+ #ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE
3164
+ features.push_back({ "PEER_MAX_BATCH_SIZE", STRINGIFY(GGML_CUDA_PEER_MAX_BATCH_SIZE) });
3165
+ #endif
3166
+
3167
+ #ifdef GGML_CUDA_FA_ALL_QUANTS
3168
+ features.push_back({ "FA_ALL_QUANTS", "1" });
3169
+ #endif
3170
+
3171
+ #undef _STRINGIFY
3172
+ #undef STRINGIFY
3173
+
3174
+ features.push_back({ nullptr, nullptr });
3175
+
3176
+ return features;
3177
+ }();
3178
+
3179
+ return features.data();
3180
+
3181
+ GGML_UNUSED(reg);
3182
+ }
3183
+
3184
  static void * ggml_backend_cuda_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
3185
  GGML_UNUSED(reg);
3186
  if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
 
3192
  if (strcmp(name, "ggml_backend_unregister_host_buffer") == 0) {
3193
  return (void *)ggml_backend_cuda_unregister_host_buffer;
3194
  }
3195
+ if (strcmp(name, "ggml_backend_get_features") == 0) {
3196
+ return (void *)ggml_backend_cuda_get_features;
3197
+ }
3198
  return nullptr;
3199
  }
3200
 
 
3227
  dev_ctx->description = prop.name;
3228
 
3229
  ggml_backend_dev_t dev = new ggml_backend_device {
3230
+ /* .iface = */ ggml_backend_cuda_device_interface,
3231
+ /* .reg = */ &reg,
3232
+ /* .context = */ dev_ctx
3233
  };
3234
  ctx->devices.push_back(dev);
3235
  }
3236
 
3237
  reg = ggml_backend_reg {
3238
+ /* .api_version = */ GGML_BACKEND_API_VERSION,
3239
+ /* .iface = */ ggml_backend_cuda_reg_interface,
3240
+ /* .context = */ ctx
3241
  };
3242
  }
3243
 
 
3268
 
3269
  return cuda_backend;
3270
  }
3271
+
3272
+ GGML_BACKEND_DL_IMPL(ggml_backend_cuda_reg)
ggml/src/ggml-cuda/ggml/CMakeLists.txt CHANGED
@@ -46,13 +46,10 @@ if (CUDAToolkit_FOUND)
46
  list(APPEND GGML_SOURCES_CUDA ${SRCS})
47
  endif()
48
 
49
- add_library(ggml-cuda
50
- ${GGML_HEADERS_CUDA}
51
- ${GGML_SOURCES_CUDA}
52
- )
53
-
54
- target_link_libraries(ggml-cuda PRIVATE ggml-base)
55
- target_include_directories(ggml-cuda PRIVATE . ..)
56
 
57
  add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
58
 
 
46
  list(APPEND GGML_SOURCES_CUDA ${SRCS})
47
  endif()
48
 
49
+ ggml_add_backend_library(ggml-cuda
50
+ ${GGML_HEADERS_CUDA}
51
+ ${GGML_SOURCES_CUDA}
52
+ )
 
 
 
53
 
54
  add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
55
 
ggml/src/ggml-hip/CMakeLists.txt CHANGED
@@ -64,12 +64,10 @@ else()
64
  list(APPEND GGML_SOURCES_ROCM ${SRCS})
65
  endif()
66
 
67
- add_library(ggml-hip
68
- ${GGML_HEADERS_ROCM}
69
- ${GGML_SOURCES_ROCM})
70
-
71
- target_link_libraries(ggml-hip PRIVATE ggml-base)
72
- target_include_directories(ggml-hip PRIVATE . ..)
73
 
74
  # TODO: do not use CUDA definitions for HIP
75
  target_compile_definitions(ggml PUBLIC GGML_USE_CUDA)
 
64
  list(APPEND GGML_SOURCES_ROCM ${SRCS})
65
  endif()
66
 
67
+ ggml_add_backend_library(ggml-hip
68
+ ${GGML_HEADERS_ROCM}
69
+ ${GGML_SOURCES_ROCM}
70
+ )
 
 
71
 
72
  # TODO: do not use CUDA definitions for HIP
73
  target_compile_definitions(ggml PUBLIC GGML_USE_CUDA)
ggml/src/ggml-kompute/CMakeLists.txt CHANGED
@@ -6,13 +6,13 @@ if (NOT glslc_executable)
6
  message(FATAL_ERROR "glslc not found")
7
  endif()
8
 
9
- add_library(ggml-kompute
10
- ggml-kompute.cpp
11
- ../../include/ggml-kompute.h
12
- )
13
 
14
  target_link_libraries(ggml-kompute PRIVATE ggml-base kompute)
15
- target_include_directories(ggml-kompute PRIVATE . .. ${CMAKE_CURRENT_BINARY_DIR})
16
 
17
  add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
18
 
 
6
  message(FATAL_ERROR "glslc not found")
7
  endif()
8
 
9
+ ggml_add_backend_library(ggml-kompute
10
+ ggml-kompute.cpp
11
+ ../../include/ggml-kompute.h
12
+ )
13
 
14
  target_link_libraries(ggml-kompute PRIVATE ggml-base kompute)
15
+ target_include_directories(ggml-kompute PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
16
 
17
  add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
18
 
ggml/src/ggml-kompute/ggml-kompute.cpp CHANGED
@@ -2176,9 +2176,12 @@ static const struct ggml_backend_reg_i ggml_backend_kompute_reg_i = {
2176
 
2177
  ggml_backend_reg_t ggml_backend_kompute_reg() {
2178
  static ggml_backend_reg reg = {
2179
- /* .iface = */ ggml_backend_kompute_reg_i,
2180
- /* .context = */ nullptr,
 
2181
  };
2182
 
2183
  return &reg;
2184
  }
 
 
 
2176
 
2177
  ggml_backend_reg_t ggml_backend_kompute_reg() {
2178
  static ggml_backend_reg reg = {
2179
+ /* .api_version = */ GGML_BACKEND_API_VERSION,
2180
+ /* .iface = */ ggml_backend_kompute_reg_i,
2181
+ /* .context = */ nullptr,
2182
  };
2183
 
2184
  return &reg;
2185
  }
2186
+
2187
+ GGML_BACKEND_DL_IMPL(ggml_backend_kompute_reg)
ggml/src/ggml-metal/CMakeLists.txt CHANGED
@@ -4,19 +4,16 @@ find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
4
 
5
  message(STATUS "Metal framework found")
6
 
7
- add_library(ggml-metal
8
- ggml-metal.m
9
- )
10
 
11
  target_link_libraries(ggml-metal PRIVATE
12
- ggml-base
13
  ${FOUNDATION_LIBRARY}
14
  ${METAL_FRAMEWORK}
15
  ${METALKIT_FRAMEWORK}
16
  )
17
 
18
- target_include_directories(ggml-metal PRIVATE . ..)
19
-
20
  if (GGML_METAL_NDEBUG)
21
  add_compile_definitions(GGML_METAL_NDEBUG)
22
  endif()
 
4
 
5
  message(STATUS "Metal framework found")
6
 
7
+ ggml_add_backend_library(ggml-metal
8
+ ggml-metal.m
9
+ )
10
 
11
  target_link_libraries(ggml-metal PRIVATE
 
12
  ${FOUNDATION_LIBRARY}
13
  ${METAL_FRAMEWORK}
14
  ${METALKIT_FRAMEWORK}
15
  )
16
 
 
 
17
  if (GGML_METAL_NDEBUG)
18
  add_compile_definitions(GGML_METAL_NDEBUG)
19
  endif()
ggml/src/ggml-metal/ggml-metal.m CHANGED
@@ -4448,19 +4448,45 @@ static ggml_backend_dev_t ggml_backend_metal_reg_device_get(ggml_backend_reg_t r
4448
  GGML_UNUSED(index);
4449
  }
4450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4451
  static struct ggml_backend_reg_i ggml_backend_metal_reg_i = {
4452
  /* .get_name = */ ggml_backend_metal_reg_get_name,
4453
  /* .device_count = */ ggml_backend_metal_reg_device_count,
4454
  /* .device_get = */ ggml_backend_metal_reg_device_get,
4455
- /* .get_proc_address = */ NULL,
4456
  };
4457
 
4458
  ggml_backend_reg_t ggml_backend_metal_reg(void) {
4459
  // TODO: make this thread-safe somehow?
4460
  {
4461
  g_ggml_backend_metal_reg = (struct ggml_backend_reg) {
4462
- /* .iface = */ ggml_backend_metal_reg_i,
4463
- /* .context = */ NULL,
 
4464
  };
4465
 
4466
  g_ggml_backend_metal_device = (struct ggml_backend_device) {
@@ -4472,3 +4498,5 @@ ggml_backend_reg_t ggml_backend_metal_reg(void) {
4472
 
4473
  return &g_ggml_backend_metal_reg;
4474
  }
 
 
 
4448
  GGML_UNUSED(index);
4449
  }
4450
 
4451
+ static struct ggml_backend_feature g_ggml_backend_metal_features[] = {
4452
+ #if defined(GGML_METAL_EMBED_LIBRARY)
4453
+ { "EMBED_LIBRARY", "1" },
4454
+ #endif
4455
+ #if defined(GGML_METAL_USE_BF16)
4456
+ { "BF16", "1" },
4457
+ #endif
4458
+ { nil, nil },
4459
+ };
4460
+
4461
+ static struct ggml_backend_feature * ggml_backend_metal_get_features(ggml_backend_reg_t reg) {
4462
+ return g_ggml_backend_metal_features;
4463
+
4464
+ GGML_UNUSED(reg);
4465
+ }
4466
+
4467
+ static void * ggml_backend_metal_get_proc_address(ggml_backend_reg_t reg, const char * name) {
4468
+ if (strcmp(name, "ggml_backend_get_features") == 0) {
4469
+ return (void *)ggml_backend_metal_get_features;
4470
+ }
4471
+
4472
+ return NULL;
4473
+
4474
+ GGML_UNUSED(reg);
4475
+ }
4476
  static struct ggml_backend_reg_i ggml_backend_metal_reg_i = {
4477
  /* .get_name = */ ggml_backend_metal_reg_get_name,
4478
  /* .device_count = */ ggml_backend_metal_reg_device_count,
4479
  /* .device_get = */ ggml_backend_metal_reg_device_get,
4480
+ /* .get_proc_address = */ ggml_backend_metal_get_proc_address,
4481
  };
4482
 
4483
  ggml_backend_reg_t ggml_backend_metal_reg(void) {
4484
  // TODO: make this thread-safe somehow?
4485
  {
4486
  g_ggml_backend_metal_reg = (struct ggml_backend_reg) {
4487
+ /* .api_version = */ GGML_BACKEND_API_VERSION,
4488
+ /* .iface = */ ggml_backend_metal_reg_i,
4489
+ /* .context = */ NULL,
4490
  };
4491
 
4492
  g_ggml_backend_metal_device = (struct ggml_backend_device) {
 
4498
 
4499
  return &g_ggml_backend_metal_reg;
4500
  }
4501
+
4502
+ GGML_BACKEND_DL_IMPL(ggml_backend_metal_reg)
ggml/src/ggml-musa/ggml/CMakeLists.txt CHANGED
@@ -47,12 +47,10 @@ if (MUSAToolkit_FOUND)
47
  set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22")
48
  endforeach()
49
 
50
- add_library(ggml-musa
51
- ${GGML_HEADERS_MUSA}
52
- ${GGML_SOURCES_MUSA})
53
-
54
- target_link_libraries(ggml-musa PRIVATE ggml-base)
55
- target_include_directories(ggml-musa PRIVATE . ..)
56
 
57
  # TODO: do not use CUDA definitions for MUSA
58
  target_compile_definitions(ggml PUBLIC GGML_USE_CUDA)
 
47
  set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22")
48
  endforeach()
49
 
50
+ ggml_add_backend_library(ggml-musa
51
+ ${GGML_HEADERS_MUSA}
52
+ ${GGML_SOURCES_MUSA}
53
+ )
 
 
54
 
55
  # TODO: do not use CUDA definitions for MUSA
56
  target_compile_definitions(ggml PUBLIC GGML_USE_CUDA)
ggml/src/ggml-rpc/CMakeLists.txt CHANGED
@@ -1,10 +1,8 @@
1
  message(STATUS "Using RPC backend")
2
 
3
- add_library(ggml-rpc
4
- ggml-rpc.cpp)
5
-
6
- target_link_libraries(ggml-rpc PRIVATE ggml-base)
7
- target_include_directories(ggml-rpc PRIVATE . ..)
8
 
9
  if (WIN32)
10
  target_link_libraries(ggml-rpc PRIVATE ws2_32)
 
1
  message(STATUS "Using RPC backend")
2
 
3
+ ggml_add_backend_library(ggml-rpc
4
+ ggml-rpc.cpp
5
+ )
 
 
6
 
7
  if (WIN32)
8
  target_link_libraries(ggml-rpc PRIVATE ws2_32)
ggml/src/ggml-rpc/ggml-rpc.cpp CHANGED
@@ -1369,8 +1369,9 @@ static const struct ggml_backend_reg_i ggml_backend_rpc_reg_i = {
1369
 
1370
  ggml_backend_reg_t ggml_backend_rpc_reg(void) {
1371
  static struct ggml_backend_reg ggml_backend_rpc_reg = {
1372
- /* .iface = */ ggml_backend_rpc_reg_i,
1373
- /* .context = */ NULL,
 
1374
  };
1375
 
1376
  return &ggml_backend_rpc_reg;
@@ -1401,3 +1402,5 @@ ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint) {
1401
 
1402
  return dev;
1403
  }
 
 
 
1369
 
1370
  ggml_backend_reg_t ggml_backend_rpc_reg(void) {
1371
  static struct ggml_backend_reg ggml_backend_rpc_reg = {
1372
+ /* .api_version = */ GGML_BACKEND_API_VERSION,
1373
+ /* .iface = */ ggml_backend_rpc_reg_i,
1374
+ /* .context = */ NULL,
1375
  };
1376
 
1377
  return &ggml_backend_rpc_reg;
 
1402
 
1403
  return dev;
1404
  }
1405
+
1406
+ GGML_BACKEND_DL_IMPL(ggml_backend_rpc_reg)
ggml/src/ggml-sycl/CMakeLists.txt CHANGED
@@ -16,12 +16,10 @@ endif()
16
  message(STATUS "SYCL found")
17
  #todo: AOT
18
 
19
- add_library(ggml-sycl
20
- ggml-sycl.cpp
21
- ../../include/ggml-sycl.h)
22
-
23
- target_link_libraries(ggml-sycl PRIVATE ggml-base)
24
- target_include_directories(ggml-sycl PRIVATE . ..)
25
 
26
  if (GGML_SYCL_F16)
27
  if (GGML_SYCL_TARGET STREQUAL "AMD")
 
16
  message(STATUS "SYCL found")
17
  #todo: AOT
18
 
19
+ ggml_add_backend_library(ggml-sycl
20
+ ggml-sycl.cpp
21
+ ../../include/ggml-sycl.h
22
+ )
 
 
23
 
24
  if (GGML_SYCL_F16)
25
  if (GGML_SYCL_TARGET STREQUAL "AMD")
ggml/src/ggml-sycl/ggml-sycl.cpp CHANGED
@@ -4637,16 +4637,17 @@ ggml_backend_reg_t ggml_backend_sycl_reg() {
4637
  dev_ctx->description = prop.get_name();
4638
 
4639
  ggml_backend_dev_t dev = new ggml_backend_device {
4640
- /* .interface = */ ggml_backend_sycl_device_interface,
4641
- /* .reg = */ &reg,
4642
- /* .context = */ dev_ctx
4643
  };
4644
  ctx->devices.push_back(dev);
4645
  }
4646
 
4647
  reg = ggml_backend_reg {
4648
- /* .interface = */ ggml_backend_sycl_reg_interface,
4649
- /* .context = */ ctx
 
4650
  };
4651
  }
4652
 
@@ -4678,3 +4679,4 @@ ggml_backend_t ggml_backend_sycl_init(int device) {
4678
  return sycl_backend;
4679
  }
4680
 
 
 
4637
  dev_ctx->description = prop.get_name();
4638
 
4639
  ggml_backend_dev_t dev = new ggml_backend_device {
4640
+ /* .iface = */ ggml_backend_sycl_device_interface,
4641
+ /* .reg = */ &reg,
4642
+ /* .context = */ dev_ctx
4643
  };
4644
  ctx->devices.push_back(dev);
4645
  }
4646
 
4647
  reg = ggml_backend_reg {
4648
+ /* .api_version = */ GGML_BACKEND_API_VERSION,
4649
+ /* .iface = */ ggml_backend_sycl_reg_interface,
4650
+ /* .context = */ ctx
4651
  };
4652
  }
4653
 
 
4679
  return sycl_backend;
4680
  }
4681
 
4682
+ GGML_BACKEND_DL_IMPL(ggml_backend_sycl_reg)
ggml/src/ggml-vulkan/CMakeLists.txt CHANGED
@@ -3,13 +3,13 @@ find_package(Vulkan COMPONENTS glslc REQUIRED)
3
  if (Vulkan_FOUND)
4
  message(STATUS "Vulkan found")
5
 
6
- add_library(ggml-vulkan
7
- ggml-vulkan.cpp
8
- ../../include/ggml-vulkan.h
9
- )
10
 
11
- target_link_libraries(ggml-vulkan PRIVATE ggml-base Vulkan::Vulkan)
12
- target_include_directories(ggml-vulkan PRIVATE . .. ${CMAKE_CURRENT_BINARY_DIR})
13
 
14
  # Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build
15
  # Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector
 
3
  if (Vulkan_FOUND)
4
  message(STATUS "Vulkan found")
5
 
6
+ ggml_add_backend_library(ggml-vulkan
7
+ ggml-vulkan.cpp
8
+ ../../include/ggml-vulkan.h
9
+ )
10
 
11
+ target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan)
12
+ target_include_directories(ggml-vulkan PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
13
 
14
  # Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build
15
  # Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector
ggml/src/ggml-vulkan/ggml-vulkan.cpp CHANGED
@@ -6738,8 +6738,9 @@ static const struct ggml_backend_reg_i ggml_backend_vk_reg_i = {
6738
 
6739
  ggml_backend_reg_t ggml_backend_vk_reg() {
6740
  static ggml_backend_reg reg = {
6741
- /* .iface = */ ggml_backend_vk_reg_i,
6742
- /* .context = */ nullptr,
 
6743
  };
6744
 
6745
  return &reg;
@@ -7365,3 +7366,5 @@ static void ggml_vk_check_results_1(ggml_tensor * tensor) {
7365
  VK_LOG_DEBUG("END ggml_vk_check_results_1(" << tensor->name << ")");
7366
  }
7367
  #endif
 
 
 
6738
 
6739
  ggml_backend_reg_t ggml_backend_vk_reg() {
6740
  static ggml_backend_reg reg = {
6741
+ /* .api_version = */ GGML_BACKEND_API_VERSION,
6742
+ /* .iface = */ ggml_backend_vk_reg_i,
6743
+ /* .context = */ nullptr,
6744
  };
6745
 
6746
  return &reg;
 
7366
  VK_LOG_DEBUG("END ggml_vk_check_results_1(" << tensor->name << ")");
7367
  }
7368
  #endif
7369
+
7370
+ GGML_BACKEND_DL_IMPL(ggml_backend_vk_reg)
ggml/src/ggml.c CHANGED
@@ -7571,3 +7571,26 @@ void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
7571
  g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
7572
  g_logger_state.log_callback_user_data = user_data;
7573
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7571
  g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
7572
  g_logger_state.log_callback_user_data = user_data;
7573
  }
7574
+
7575
+ void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
7576
+ p->n_threads = n_threads;
7577
+ p->prio = 0; // default priority (usually means normal or inherited)
7578
+ p->poll = 50; // hybrid-polling enabled
7579
+ p->strict_cpu = false; // no strict placement (all threads share same cpumask)
7580
+ p->paused = false; // threads are ready to go
7581
+ memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
7582
+ }
7583
+
7584
+ struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
7585
+ struct ggml_threadpool_params p;
7586
+ ggml_threadpool_params_init(&p, n_threads);
7587
+ return p;
7588
+ }
7589
+
7590
+ bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
7591
+ if (p0->n_threads != p1->n_threads ) return false;
7592
+ if (p0->prio != p1->prio ) return false;
7593
+ if (p0->poll != p1->poll ) return false;
7594
+ if (p0->strict_cpu != p1->strict_cpu ) return false;
7595
+ return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
7596
+ }