Spaces:
Running
Running
ggml : add support for dynamic loading of backends (llama/10469)
Browse files* ggml : add support for dynamic loading of backends
---------
Co-authored-by: Georgi Gerganov <[email protected]>
- ggml/CMakeLists.txt +1 -0
- ggml/include/ggml-backend.h +15 -0
- ggml/include/ggml-cpu.h +6 -32
- ggml/include/ggml.h +31 -0
- ggml/src/CMakeLists.txt +33 -8
- ggml/src/ggml-backend-impl.h +33 -11
- ggml/src/ggml-backend-reg.cpp +239 -13
- ggml/src/ggml-blas/CMakeLists.txt +3 -6
- ggml/src/ggml-blas/ggml-blas.cpp +5 -2
- ggml/src/ggml-cann/CMakeLists.txt +3 -3
- ggml/src/ggml-cann/ggml-cann.cpp +8 -5
- ggml/src/ggml-cpu/CMakeLists.txt +9 -10
- ggml/src/ggml-cpu/ggml-cpu.c +0 -23
- ggml/src/ggml-cpu/ggml-cpu.cpp +38 -12
- ggml/src/ggml-cuda/ggml-cuda.cu +66 -5
- ggml/src/ggml-cuda/ggml/CMakeLists.txt +4 -7
- ggml/src/ggml-hip/CMakeLists.txt +4 -6
- ggml/src/ggml-kompute/CMakeLists.txt +5 -5
- ggml/src/ggml-kompute/ggml-kompute.cpp +5 -2
- ggml/src/ggml-metal/CMakeLists.txt +3 -6
- ggml/src/ggml-metal/ggml-metal.m +31 -3
- ggml/src/ggml-musa/ggml/CMakeLists.txt +4 -6
- ggml/src/ggml-rpc/CMakeLists.txt +3 -5
- ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
- ggml/src/ggml-sycl/CMakeLists.txt +4 -6
- ggml/src/ggml-sycl/ggml-sycl.cpp +7 -5
- ggml/src/ggml-vulkan/CMakeLists.txt +6 -6
- ggml/src/ggml-vulkan/ggml-vulkan.cpp +5 -2
- ggml/src/ggml.c +23 -0
ggml/CMakeLists.txt
CHANGED
|
@@ -33,6 +33,7 @@ else()
|
|
| 33 |
endif()
|
| 34 |
|
| 35 |
option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
|
|
|
| 36 |
|
| 37 |
#
|
| 38 |
# option list
|
|
|
|
| 33 |
endif()
|
| 34 |
|
| 35 |
option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
| 36 |
+
option(GGML_BACKEND_DL "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF)
|
| 37 |
|
| 38 |
#
|
| 39 |
# option list
|
ggml/include/ggml-backend.h
CHANGED
|
@@ -190,6 +190,14 @@ extern "C" {
|
|
| 190 |
typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
|
| 191 |
// Get additional buffer types provided by the device (returns a NULL-terminated array)
|
| 192 |
typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
//
|
| 195 |
// Backend registry
|
|
@@ -214,6 +222,13 @@ extern "C" {
|
|
| 214 |
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
|
| 215 |
GGML_API ggml_backend_t ggml_backend_init_best(void);
|
| 216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
//
|
| 218 |
// Backend scheduler
|
| 219 |
//
|
|
|
|
| 190 |
typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
|
| 191 |
// Get additional buffer types provided by the device (returns a NULL-terminated array)
|
| 192 |
typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
|
| 193 |
+
// Set the abort callback for the backend
|
| 194 |
+
typedef void (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data);
|
| 195 |
+
// Get a list of feature flags supported by the backend (returns a NULL-terminated array)
|
| 196 |
+
struct ggml_backend_feature {
|
| 197 |
+
const char * name;
|
| 198 |
+
const char * value;
|
| 199 |
+
};
|
| 200 |
+
typedef struct ggml_backend_feature * (*ggml_backend_get_features_t)(ggml_backend_reg_t reg);
|
| 201 |
|
| 202 |
//
|
| 203 |
// Backend registry
|
|
|
|
| 222 |
// = ggml_backend_dev_init(ggml_backend_dev_by_type(GPU) OR ggml_backend_dev_by_type(CPU), NULL)
|
| 223 |
GGML_API ggml_backend_t ggml_backend_init_best(void);
|
| 224 |
|
| 225 |
+
// Load a backend from a dynamic library and register it
|
| 226 |
+
GGML_API ggml_backend_reg_t ggml_backend_load(const char * path);
|
| 227 |
+
// Unload a backend if loaded dynamically and unregister it
|
| 228 |
+
GGML_API void ggml_backend_unload(ggml_backend_reg_t reg);
|
| 229 |
+
// Load all known backends from dynamic libraries
|
| 230 |
+
GGML_API void ggml_backend_load_all(void);
|
| 231 |
+
|
| 232 |
//
|
| 233 |
// Backend scheduler
|
| 234 |
//
|
ggml/include/ggml-cpu.h
CHANGED
|
@@ -7,29 +7,6 @@
|
|
| 7 |
extern "C" {
|
| 8 |
#endif
|
| 9 |
|
| 10 |
-
// Scheduling priorities
|
| 11 |
-
enum ggml_sched_priority {
|
| 12 |
-
GGML_SCHED_PRIO_NORMAL,
|
| 13 |
-
GGML_SCHED_PRIO_MEDIUM,
|
| 14 |
-
GGML_SCHED_PRIO_HIGH,
|
| 15 |
-
GGML_SCHED_PRIO_REALTIME
|
| 16 |
-
};
|
| 17 |
-
|
| 18 |
-
// Threadpool params
|
| 19 |
-
// Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
|
| 20 |
-
struct ggml_threadpool_params {
|
| 21 |
-
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
|
| 22 |
-
int n_threads; // number of threads
|
| 23 |
-
enum ggml_sched_priority prio; // thread priority
|
| 24 |
-
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
| 25 |
-
bool strict_cpu; // strict cpu placement
|
| 26 |
-
bool paused; // start in paused state
|
| 27 |
-
};
|
| 28 |
-
|
| 29 |
-
struct ggml_threadpool; // forward declaration, see ggml.c
|
| 30 |
-
|
| 31 |
-
typedef struct ggml_threadpool * ggml_threadpool_t;
|
| 32 |
-
|
| 33 |
// the compute plan that needs to be prepared for ggml_graph_compute()
|
| 34 |
// since https://github.com/ggerganov/ggml/issues/287
|
| 35 |
struct ggml_cplan {
|
|
@@ -75,14 +52,11 @@ extern "C" {
|
|
| 75 |
GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
| 76 |
GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
|
| 77 |
|
| 78 |
-
GGML_BACKEND_API struct
|
| 79 |
-
GGML_BACKEND_API void
|
| 80 |
-
GGML_BACKEND_API
|
| 81 |
-
GGML_BACKEND_API struct ggml_threadpool *
|
| 82 |
-
GGML_BACKEND_API void
|
| 83 |
-
GGML_BACKEND_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
|
| 84 |
-
GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
|
| 85 |
-
GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
|
| 86 |
|
| 87 |
// ggml_graph_plan() has to be called before ggml_graph_compute()
|
| 88 |
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
|
@@ -104,10 +78,10 @@ extern "C" {
|
|
| 104 |
GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
|
| 105 |
GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
|
| 106 |
GGML_BACKEND_API int ggml_cpu_has_avx (void);
|
|
|
|
| 107 |
GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
|
| 108 |
GGML_BACKEND_API int ggml_cpu_has_f16c (void);
|
| 109 |
GGML_BACKEND_API int ggml_cpu_has_fma (void);
|
| 110 |
-
GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
|
| 111 |
GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
|
| 112 |
GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
|
| 113 |
GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
|
|
|
|
| 7 |
extern "C" {
|
| 8 |
#endif
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
// the compute plan that needs to be prepared for ggml_graph_compute()
|
| 11 |
// since https://github.com/ggerganov/ggml/issues/287
|
| 12 |
struct ggml_cplan {
|
|
|
|
| 52 |
GGML_BACKEND_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
| 53 |
GGML_BACKEND_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
|
| 54 |
|
| 55 |
+
GGML_BACKEND_API struct ggml_threadpool * ggml_threadpool_new (struct ggml_threadpool_params * params);
|
| 56 |
+
GGML_BACKEND_API void ggml_threadpool_free (struct ggml_threadpool * threadpool);
|
| 57 |
+
GGML_BACKEND_API int ggml_threadpool_get_n_threads (struct ggml_threadpool * threadpool);
|
| 58 |
+
GGML_BACKEND_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
|
| 59 |
+
GGML_BACKEND_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
// ggml_graph_plan() has to be called before ggml_graph_compute()
|
| 62 |
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
|
|
|
| 78 |
GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
|
| 79 |
GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
|
| 80 |
GGML_BACKEND_API int ggml_cpu_has_avx (void);
|
| 81 |
+
GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
|
| 82 |
GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
|
| 83 |
GGML_BACKEND_API int ggml_cpu_has_f16c (void);
|
| 84 |
GGML_BACKEND_API int ggml_cpu_has_fma (void);
|
|
|
|
| 85 |
GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
|
| 86 |
GGML_BACKEND_API int ggml_cpu_has_avx512_vbmi(void);
|
| 87 |
GGML_BACKEND_API int ggml_cpu_has_avx512_vnni(void);
|
ggml/include/ggml.h
CHANGED
|
@@ -2215,6 +2215,37 @@ extern "C" {
|
|
| 2215 |
|
| 2216 |
GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
|
| 2217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2218 |
#ifdef __cplusplus
|
| 2219 |
}
|
| 2220 |
#endif
|
|
|
|
| 2215 |
|
| 2216 |
GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
|
| 2217 |
|
| 2218 |
+
// ggml threadpool
|
| 2219 |
+
// TODO: currently, only a few functions are in the base ggml API, while the rest are in the CPU backend
|
| 2220 |
+
// the goal should be to create an API that other backends can use move everything to the ggml base
|
| 2221 |
+
|
| 2222 |
+
// scheduling priorities
|
| 2223 |
+
enum ggml_sched_priority {
|
| 2224 |
+
GGML_SCHED_PRIO_NORMAL,
|
| 2225 |
+
GGML_SCHED_PRIO_MEDIUM,
|
| 2226 |
+
GGML_SCHED_PRIO_HIGH,
|
| 2227 |
+
GGML_SCHED_PRIO_REALTIME
|
| 2228 |
+
};
|
| 2229 |
+
|
| 2230 |
+
// threadpool params
|
| 2231 |
+
// Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
|
| 2232 |
+
struct ggml_threadpool_params {
|
| 2233 |
+
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
|
| 2234 |
+
int n_threads; // number of threads
|
| 2235 |
+
enum ggml_sched_priority prio; // thread priority
|
| 2236 |
+
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
| 2237 |
+
bool strict_cpu; // strict cpu placement
|
| 2238 |
+
bool paused; // start in paused state
|
| 2239 |
+
};
|
| 2240 |
+
|
| 2241 |
+
struct ggml_threadpool; // forward declaration, see ggml.c
|
| 2242 |
+
|
| 2243 |
+
typedef struct ggml_threadpool * ggml_threadpool_t;
|
| 2244 |
+
|
| 2245 |
+
GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
|
| 2246 |
+
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
|
| 2247 |
+
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);
|
| 2248 |
+
|
| 2249 |
#ifdef __cplusplus
|
| 2250 |
}
|
| 2251 |
#endif
|
ggml/src/CMakeLists.txt
CHANGED
|
@@ -202,6 +202,10 @@ endif()
|
|
| 202 |
|
| 203 |
# ggml
|
| 204 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
add_library(ggml-base
|
| 206 |
../include/ggml.h
|
| 207 |
../include/ggml-alloc.h
|
|
@@ -226,6 +230,31 @@ add_library(ggml
|
|
| 226 |
|
| 227 |
target_link_libraries(ggml PUBLIC ggml-base)
|
| 228 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
function(ggml_add_backend backend)
|
| 230 |
string(TOUPPER "GGML_${backend}" backend_id)
|
| 231 |
if (${backend_id})
|
|
@@ -236,14 +265,10 @@ function(ggml_add_backend backend)
|
|
| 236 |
# however, currently it is necessary for AMX, since it is enabled by default on llama.cpp
|
| 237 |
if (${backend_id})
|
| 238 |
message(STATUS "Including ${backend} backend")
|
| 239 |
-
if (
|
| 240 |
-
|
| 241 |
-
target_compile_definitions(${
|
| 242 |
endif()
|
| 243 |
-
install(TARGETS ${backend_target} LIBRARY)
|
| 244 |
-
target_link_libraries(ggml PUBLIC ${backend_target})
|
| 245 |
-
string(TOUPPER "GGML_USE_${backend}" backend_use)
|
| 246 |
-
target_compile_definitions(ggml PUBLIC ${backend_use})
|
| 247 |
endif()
|
| 248 |
endif()
|
| 249 |
endfunction()
|
|
@@ -256,10 +281,10 @@ ggml_add_backend(CUDA)
|
|
| 256 |
ggml_add_backend(HIP)
|
| 257 |
ggml_add_backend(Kompute)
|
| 258 |
ggml_add_backend(METAL)
|
|
|
|
| 259 |
ggml_add_backend(RPC)
|
| 260 |
ggml_add_backend(SYCL)
|
| 261 |
ggml_add_backend(Vulkan)
|
| 262 |
-
ggml_add_backend(MUSA)
|
| 263 |
|
| 264 |
foreach (target ggml-base ggml)
|
| 265 |
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
|
|
|
|
| 202 |
|
| 203 |
# ggml
|
| 204 |
|
| 205 |
+
if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS)
|
| 206 |
+
message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS")
|
| 207 |
+
endif()
|
| 208 |
+
|
| 209 |
add_library(ggml-base
|
| 210 |
../include/ggml.h
|
| 211 |
../include/ggml-alloc.h
|
|
|
|
| 230 |
|
| 231 |
target_link_libraries(ggml PUBLIC ggml-base)
|
| 232 |
|
| 233 |
+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
| 234 |
+
target_link_libraries(ggml PRIVATE dl)
|
| 235 |
+
endif()
|
| 236 |
+
|
| 237 |
+
function(ggml_add_backend_library backend)
|
| 238 |
+
if (GGML_BACKEND_DL)
|
| 239 |
+
add_library(${backend} MODULE ${ARGN})
|
| 240 |
+
# write the shared library to the output directory
|
| 241 |
+
set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
|
| 242 |
+
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
|
| 243 |
+
else()
|
| 244 |
+
add_library(${backend} ${ARGN})
|
| 245 |
+
target_link_libraries(ggml PUBLIC ${backend})
|
| 246 |
+
install(TARGETS ${backend} LIBRARY)
|
| 247 |
+
endif()
|
| 248 |
+
|
| 249 |
+
target_link_libraries(${backend} PRIVATE ggml-base)
|
| 250 |
+
target_include_directories(${backend} PRIVATE ..)
|
| 251 |
+
|
| 252 |
+
if (${BUILD_SHARED_LIBS})
|
| 253 |
+
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_BUILD)
|
| 254 |
+
target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED)
|
| 255 |
+
endif()
|
| 256 |
+
endfunction()
|
| 257 |
+
|
| 258 |
function(ggml_add_backend backend)
|
| 259 |
string(TOUPPER "GGML_${backend}" backend_id)
|
| 260 |
if (${backend_id})
|
|
|
|
| 265 |
# however, currently it is necessary for AMX, since it is enabled by default on llama.cpp
|
| 266 |
if (${backend_id})
|
| 267 |
message(STATUS "Including ${backend} backend")
|
| 268 |
+
if (NOT GGML_BACKEND_DL)
|
| 269 |
+
string(TOUPPER "GGML_USE_${backend}" backend_use)
|
| 270 |
+
target_compile_definitions(ggml PUBLIC ${backend_use})
|
| 271 |
endif()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
endif()
|
| 273 |
endif()
|
| 274 |
endfunction()
|
|
|
|
| 281 |
ggml_add_backend(HIP)
|
| 282 |
ggml_add_backend(Kompute)
|
| 283 |
ggml_add_backend(METAL)
|
| 284 |
+
ggml_add_backend(MUSA)
|
| 285 |
ggml_add_backend(RPC)
|
| 286 |
ggml_add_backend(SYCL)
|
| 287 |
ggml_add_backend(Vulkan)
|
|
|
|
| 288 |
|
| 289 |
foreach (target ggml-base ggml)
|
| 290 |
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
|
ggml/src/ggml-backend-impl.h
CHANGED
|
@@ -8,6 +8,8 @@
|
|
| 8 |
extern "C" {
|
| 9 |
#endif
|
| 10 |
|
|
|
|
|
|
|
| 11 |
//
|
| 12 |
// Backend buffer type
|
| 13 |
//
|
|
@@ -63,20 +65,20 @@ extern "C" {
|
|
| 63 |
enum ggml_backend_buffer_usage usage;
|
| 64 |
};
|
| 65 |
|
| 66 |
-
ggml_backend_buffer_t ggml_backend_buffer_init(
|
| 67 |
ggml_backend_buffer_type_t buft,
|
| 68 |
struct ggml_backend_buffer_i iface,
|
| 69 |
void * context,
|
| 70 |
size_t size);
|
| 71 |
|
| 72 |
// do not use directly, use ggml_backend_tensor_copy instead
|
| 73 |
-
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
|
| 74 |
|
| 75 |
// multi-buffer
|
| 76 |
// buffer that contains a collection of buffers
|
| 77 |
-
ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
|
| 78 |
-
bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
|
| 79 |
-
void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
| 80 |
|
| 81 |
//
|
| 82 |
// Backend (stream)
|
|
@@ -199,17 +201,37 @@ extern "C" {
|
|
| 199 |
};
|
| 200 |
|
| 201 |
struct ggml_backend_reg {
|
| 202 |
-
|
| 203 |
struct ggml_backend_reg_i iface;
|
| 204 |
void * context;
|
| 205 |
};
|
| 206 |
|
| 207 |
-
|
| 208 |
// Internal backend registry API
|
| 209 |
-
void ggml_backend_register(ggml_backend_reg_t reg);
|
| 210 |
-
void ggml_backend_device_register(ggml_backend_dev_t device);
|
| 211 |
-
|
| 212 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
#ifdef __cplusplus
|
| 215 |
}
|
|
|
|
| 8 |
extern "C" {
|
| 9 |
#endif
|
| 10 |
|
| 11 |
+
#define GGML_BACKEND_API_VERSION 1
|
| 12 |
+
|
| 13 |
//
|
| 14 |
// Backend buffer type
|
| 15 |
//
|
|
|
|
| 65 |
enum ggml_backend_buffer_usage usage;
|
| 66 |
};
|
| 67 |
|
| 68 |
+
GGML_API ggml_backend_buffer_t ggml_backend_buffer_init(
|
| 69 |
ggml_backend_buffer_type_t buft,
|
| 70 |
struct ggml_backend_buffer_i iface,
|
| 71 |
void * context,
|
| 72 |
size_t size);
|
| 73 |
|
| 74 |
// do not use directly, use ggml_backend_tensor_copy instead
|
| 75 |
+
GGML_API bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
|
| 76 |
|
| 77 |
// multi-buffer
|
| 78 |
// buffer that contains a collection of buffers
|
| 79 |
+
GGML_API ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers);
|
| 80 |
+
GGML_API bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer);
|
| 81 |
+
GGML_API void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
|
| 82 |
|
| 83 |
//
|
| 84 |
// Backend (stream)
|
|
|
|
| 201 |
};
|
| 202 |
|
| 203 |
struct ggml_backend_reg {
|
| 204 |
+
int api_version; // initialize to GGML_BACKEND_API_VERSION
|
| 205 |
struct ggml_backend_reg_i iface;
|
| 206 |
void * context;
|
| 207 |
};
|
| 208 |
|
|
|
|
| 209 |
// Internal backend registry API
|
| 210 |
+
GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
|
| 211 |
+
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
|
| 212 |
+
|
| 213 |
+
// Add backend dynamic loading support to the backend
|
| 214 |
+
typedef ggml_backend_reg_t (*ggml_backend_init_t)(void);
|
| 215 |
+
|
| 216 |
+
#ifdef GGML_BACKEND_DL
|
| 217 |
+
#ifdef __cplusplus
|
| 218 |
+
# define GGML_BACKEND_DL_IMPL(reg_fn) \
|
| 219 |
+
extern "C" { \
|
| 220 |
+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
|
| 221 |
+
} \
|
| 222 |
+
ggml_backend_reg_t ggml_backend_init(void) { \
|
| 223 |
+
return reg_fn(); \
|
| 224 |
+
}
|
| 225 |
+
#else
|
| 226 |
+
# define GGML_BACKEND_DL_IMPL(reg_fn) \
|
| 227 |
+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_init(void); \
|
| 228 |
+
ggml_backend_reg_t ggml_backend_init(void) { \
|
| 229 |
+
return reg_fn(); \
|
| 230 |
+
}
|
| 231 |
+
#endif
|
| 232 |
+
#else
|
| 233 |
+
# define GGML_BACKEND_DL_IMPL(reg_fn)
|
| 234 |
+
#endif
|
| 235 |
|
| 236 |
#ifdef __cplusplus
|
| 237 |
}
|
ggml/src/ggml-backend-reg.cpp
CHANGED
|
@@ -1,11 +1,29 @@
|
|
| 1 |
#include "ggml-backend-impl.h"
|
| 2 |
#include "ggml-backend.h"
|
| 3 |
-
#include "ggml-cpu.h"
|
| 4 |
#include "ggml-impl.h"
|
|
|
|
| 5 |
#include <cstring>
|
|
|
|
| 6 |
#include <vector>
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
// Backend registry
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
#ifdef GGML_USE_CUDA
|
| 11 |
#include "ggml-cuda.h"
|
|
@@ -43,8 +61,13 @@
|
|
| 43 |
#include "ggml-kompute.h"
|
| 44 |
#endif
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
struct ggml_backend_registry {
|
| 47 |
-
std::vector<
|
| 48 |
std::vector<ggml_backend_dev_t> devices;
|
| 49 |
|
| 50 |
ggml_backend_registry() {
|
|
@@ -75,11 +98,19 @@ struct ggml_backend_registry {
|
|
| 75 |
#ifdef GGML_USE_KOMPUTE
|
| 76 |
register_backend(ggml_backend_kompute_reg());
|
| 77 |
#endif
|
| 78 |
-
|
| 79 |
register_backend(ggml_backend_cpu_reg());
|
|
|
|
| 80 |
}
|
| 81 |
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
if (!reg) {
|
| 84 |
return;
|
| 85 |
}
|
|
@@ -88,7 +119,7 @@ struct ggml_backend_registry {
|
|
| 88 |
GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
| 89 |
__func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
|
| 90 |
#endif
|
| 91 |
-
backends.push_back(reg);
|
| 92 |
for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
|
| 93 |
register_device(ggml_backend_reg_dev_get(reg, i));
|
| 94 |
}
|
|
@@ -100,6 +131,111 @@ struct ggml_backend_registry {
|
|
| 100 |
#endif
|
| 101 |
devices.push_back(device);
|
| 102 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
};
|
| 104 |
|
| 105 |
static ggml_backend_registry & get_reg() {
|
|
@@ -123,7 +259,7 @@ size_t ggml_backend_reg_count() {
|
|
| 123 |
|
| 124 |
ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
|
| 125 |
GGML_ASSERT(index < ggml_backend_reg_count());
|
| 126 |
-
return get_reg().backends[index];
|
| 127 |
}
|
| 128 |
|
| 129 |
ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
|
|
@@ -133,7 +269,7 @@ ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
|
|
| 133 |
return reg;
|
| 134 |
}
|
| 135 |
}
|
| 136 |
-
return
|
| 137 |
}
|
| 138 |
|
| 139 |
// Device enumeration
|
|
@@ -153,7 +289,7 @@ ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
|
|
| 153 |
return dev;
|
| 154 |
}
|
| 155 |
}
|
| 156 |
-
return
|
| 157 |
}
|
| 158 |
|
| 159 |
ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
|
|
@@ -163,14 +299,14 @@ ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
|
|
| 163 |
return dev;
|
| 164 |
}
|
| 165 |
}
|
| 166 |
-
return
|
| 167 |
}
|
| 168 |
|
| 169 |
// Convenience functions
|
| 170 |
ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
|
| 171 |
ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
|
| 172 |
if (!dev) {
|
| 173 |
-
return
|
| 174 |
}
|
| 175 |
return ggml_backend_dev_init(dev, params);
|
| 176 |
}
|
|
@@ -178,7 +314,7 @@ ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params)
|
|
| 178 |
ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
|
| 179 |
ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
|
| 180 |
if (!dev) {
|
| 181 |
-
return
|
| 182 |
}
|
| 183 |
return ggml_backend_dev_init(dev, params);
|
| 184 |
}
|
|
@@ -189,7 +325,97 @@ ggml_backend_t ggml_backend_init_best(void) {
|
|
| 189 |
dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
| 190 |
}
|
| 191 |
if (!dev) {
|
| 192 |
-
return
|
| 193 |
}
|
| 194 |
-
return ggml_backend_dev_init(dev,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
}
|
|
|
|
| 1 |
#include "ggml-backend-impl.h"
|
| 2 |
#include "ggml-backend.h"
|
|
|
|
| 3 |
#include "ggml-impl.h"
|
| 4 |
+
#include <algorithm>
|
| 5 |
#include <cstring>
|
| 6 |
+
#include <string>
|
| 7 |
#include <vector>
|
| 8 |
|
| 9 |
+
#ifdef _WIN32
|
| 10 |
+
# define WIN32_LEAN_AND_MEAN
|
| 11 |
+
# ifndef NOMINMAX
|
| 12 |
+
# define NOMINMAX
|
| 13 |
+
# endif
|
| 14 |
+
# include <windows.h>
|
| 15 |
+
#elif defined(__APPLE__)
|
| 16 |
+
# include <mach-o/dyld.h>
|
| 17 |
+
# include <dlfcn.h>
|
| 18 |
+
#else
|
| 19 |
+
# include <dlfcn.h>
|
| 20 |
+
# include <unistd.h>
|
| 21 |
+
#endif
|
| 22 |
+
|
| 23 |
// Backend registry
|
| 24 |
+
#ifdef GGML_USE_CPU
|
| 25 |
+
#include "ggml-cpu.h"
|
| 26 |
+
#endif
|
| 27 |
|
| 28 |
#ifdef GGML_USE_CUDA
|
| 29 |
#include "ggml-cuda.h"
|
|
|
|
| 61 |
#include "ggml-kompute.h"
|
| 62 |
#endif
|
| 63 |
|
| 64 |
+
struct ggml_backend_reg_entry {
|
| 65 |
+
ggml_backend_reg_t reg;
|
| 66 |
+
void * handle;
|
| 67 |
+
};
|
| 68 |
+
|
| 69 |
struct ggml_backend_registry {
|
| 70 |
+
std::vector<ggml_backend_reg_entry> backends;
|
| 71 |
std::vector<ggml_backend_dev_t> devices;
|
| 72 |
|
| 73 |
ggml_backend_registry() {
|
|
|
|
| 98 |
#ifdef GGML_USE_KOMPUTE
|
| 99 |
register_backend(ggml_backend_kompute_reg());
|
| 100 |
#endif
|
| 101 |
+
#ifdef GGML_USE_CPU
|
| 102 |
register_backend(ggml_backend_cpu_reg());
|
| 103 |
+
#endif
|
| 104 |
}
|
| 105 |
|
| 106 |
+
~ggml_backend_registry() {
|
| 107 |
+
while (!backends.empty()) {
|
| 108 |
+
// use silent since the log system may have been destroyed at this point
|
| 109 |
+
unload_backend(backends.back().reg, true);
|
| 110 |
+
}
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
void register_backend(ggml_backend_reg_t reg, void * handle = nullptr) {
|
| 114 |
if (!reg) {
|
| 115 |
return;
|
| 116 |
}
|
|
|
|
| 119 |
GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n",
|
| 120 |
__func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg));
|
| 121 |
#endif
|
| 122 |
+
backends.push_back({ reg, handle });
|
| 123 |
for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
|
| 124 |
register_device(ggml_backend_reg_dev_get(reg, i));
|
| 125 |
}
|
|
|
|
| 131 |
#endif
|
| 132 |
devices.push_back(device);
|
| 133 |
}
|
| 134 |
+
|
| 135 |
+
ggml_backend_reg_t load_backend(const char * path, bool silent) {
|
| 136 |
+
#ifdef _WIN32
|
| 137 |
+
// suppress error dialogs for missing DLLs
|
| 138 |
+
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
| 139 |
+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
| 140 |
+
|
| 141 |
+
HMODULE handle = LoadLibraryA(path);
|
| 142 |
+
|
| 143 |
+
if (!handle) {
|
| 144 |
+
if (!silent) {
|
| 145 |
+
GGML_LOG_ERROR("%s: failed to load %s: %lu\n", __func__, path, GetLastError());
|
| 146 |
+
}
|
| 147 |
+
SetErrorMode(old_mode);
|
| 148 |
+
return nullptr;
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
ggml_backend_init_t backend_init = (ggml_backend_init_t) GetProcAddress(handle, "ggml_backend_init");
|
| 152 |
+
|
| 153 |
+
SetErrorMode(old_mode);
|
| 154 |
+
|
| 155 |
+
if (!backend_init) {
|
| 156 |
+
if (!silent) {
|
| 157 |
+
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s: %lu\n", __func__, path, GetLastError());
|
| 158 |
+
}
|
| 159 |
+
FreeLibrary(handle);
|
| 160 |
+
return nullptr;
|
| 161 |
+
}
|
| 162 |
+
#else
|
| 163 |
+
void * handle = dlopen(path, RTLD_NOW | RTLD_LOCAL);
|
| 164 |
+
|
| 165 |
+
if (!handle) {
|
| 166 |
+
if (!silent) {
|
| 167 |
+
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path, dlerror());
|
| 168 |
+
}
|
| 169 |
+
return nullptr;
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
auto * backend_init = (ggml_backend_init_t) dlsym(handle, "ggml_backend_init");
|
| 173 |
+
|
| 174 |
+
if (!backend_init) {
|
| 175 |
+
if (!silent) {
|
| 176 |
+
GGML_LOG_ERROR("%s: failed to find ggml_backend_init in %s: %s\n", __func__, path, dlerror());
|
| 177 |
+
}
|
| 178 |
+
dlclose(handle);
|
| 179 |
+
return nullptr;
|
| 180 |
+
}
|
| 181 |
+
#endif
|
| 182 |
+
ggml_backend_reg_t reg = backend_init();
|
| 183 |
+
|
| 184 |
+
if (!reg || reg->api_version != GGML_BACKEND_API_VERSION) {
|
| 185 |
+
if (!silent) {
|
| 186 |
+
if (!reg) {
|
| 187 |
+
GGML_LOG_ERROR("%s: failed to initialize backend from %s: ggml_backend_init returned NULL\n", __func__, path);
|
| 188 |
+
} else {
|
| 189 |
+
GGML_LOG_ERROR("%s: failed to initialize backend from %s: incompatible API version (backend: %d, current: %d)\n",
|
| 190 |
+
__func__, path, reg->api_version, GGML_BACKEND_API_VERSION);
|
| 191 |
+
}
|
| 192 |
+
}
|
| 193 |
+
#ifdef _WIN32
|
| 194 |
+
FreeLibrary(handle);
|
| 195 |
+
#else
|
| 196 |
+
dlclose(handle);
|
| 197 |
+
#endif
|
| 198 |
+
return nullptr;
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), path);
|
| 202 |
+
register_backend(reg, handle);
|
| 203 |
+
return reg;
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
void unload_backend(ggml_backend_reg_t reg, bool silent) {
|
| 207 |
+
auto it = std::find_if(backends.begin(), backends.end(),
|
| 208 |
+
[reg](ggml_backend_reg_entry entry) { return entry.reg == reg; });
|
| 209 |
+
|
| 210 |
+
if (it == backends.end()) {
|
| 211 |
+
if (!silent) {
|
| 212 |
+
GGML_LOG_ERROR("%s: backend not found\n", __func__);
|
| 213 |
+
}
|
| 214 |
+
return;
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
if (!silent) {
|
| 218 |
+
GGML_LOG_DEBUG("%s: unloading %s backend\n", __func__, ggml_backend_reg_name(reg));
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
// remove devices
|
| 222 |
+
devices.erase(
|
| 223 |
+
std::remove_if(devices.begin(), devices.end(),
|
| 224 |
+
[reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
|
| 225 |
+
devices.end());
|
| 226 |
+
|
| 227 |
+
// unload library
|
| 228 |
+
if (it->handle) {
|
| 229 |
+
#ifdef _WIN32
|
| 230 |
+
FreeLibrary((HMODULE) it->handle);
|
| 231 |
+
#else
|
| 232 |
+
dlclose(it->handle);
|
| 233 |
+
#endif
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
// remove backend
|
| 237 |
+
backends.erase(it);
|
| 238 |
+
}
|
| 239 |
};
|
| 240 |
|
| 241 |
static ggml_backend_registry & get_reg() {
|
|
|
|
| 259 |
|
| 260 |
ggml_backend_reg_t ggml_backend_reg_get(size_t index) {
|
| 261 |
GGML_ASSERT(index < ggml_backend_reg_count());
|
| 262 |
+
return get_reg().backends[index].reg;
|
| 263 |
}
|
| 264 |
|
| 265 |
ggml_backend_reg_t ggml_backend_reg_by_name(const char * name) {
|
|
|
|
| 269 |
return reg;
|
| 270 |
}
|
| 271 |
}
|
| 272 |
+
return nullptr;
|
| 273 |
}
|
| 274 |
|
| 275 |
// Device enumeration
|
|
|
|
| 289 |
return dev;
|
| 290 |
}
|
| 291 |
}
|
| 292 |
+
return nullptr;
|
| 293 |
}
|
| 294 |
|
| 295 |
ggml_backend_dev_t ggml_backend_dev_by_type(enum ggml_backend_dev_type type) {
|
|
|
|
| 299 |
return dev;
|
| 300 |
}
|
| 301 |
}
|
| 302 |
+
return nullptr;
|
| 303 |
}
|
| 304 |
|
| 305 |
// Convenience functions
|
| 306 |
ggml_backend_t ggml_backend_init_by_name(const char * name, const char * params) {
|
| 307 |
ggml_backend_dev_t dev = ggml_backend_dev_by_name(name);
|
| 308 |
if (!dev) {
|
| 309 |
+
return nullptr;
|
| 310 |
}
|
| 311 |
return ggml_backend_dev_init(dev, params);
|
| 312 |
}
|
|
|
|
| 314 |
ggml_backend_t ggml_backend_init_by_type(enum ggml_backend_dev_type type, const char * params) {
|
| 315 |
ggml_backend_dev_t dev = ggml_backend_dev_by_type(type);
|
| 316 |
if (!dev) {
|
| 317 |
+
return nullptr;
|
| 318 |
}
|
| 319 |
return ggml_backend_dev_init(dev, params);
|
| 320 |
}
|
|
|
|
| 325 |
dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
| 326 |
}
|
| 327 |
if (!dev) {
|
| 328 |
+
return nullptr;
|
| 329 |
}
|
| 330 |
+
return ggml_backend_dev_init(dev, nullptr);
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
// Dynamic loading
|
| 334 |
+
ggml_backend_reg_t ggml_backend_load(const char * path) {
|
| 335 |
+
return get_reg().load_backend(path, false);
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
void ggml_backend_unload(ggml_backend_reg_t reg) {
|
| 339 |
+
get_reg().unload_backend(reg, true);
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
void ggml_backend_load_all() {
|
| 343 |
+
std::vector<std::string> search_prefix;
|
| 344 |
+
|
| 345 |
+
// add the executable directory to the search path
|
| 346 |
+
// FIXME: this is convenient for development, but it should probably be disabled in production
|
| 347 |
+
|
| 348 |
+
#if defined(__APPLE__)
|
| 349 |
+
// get executable path
|
| 350 |
+
std::vector<char> path;
|
| 351 |
+
uint32_t size;
|
| 352 |
+
while (true) {
|
| 353 |
+
size = path.size();
|
| 354 |
+
if (_NSGetExecutablePath(path.data(), &size) == 0) {
|
| 355 |
+
break;
|
| 356 |
+
}
|
| 357 |
+
path.resize(size);
|
| 358 |
+
}
|
| 359 |
+
std::string base_path(path.data(), size);
|
| 360 |
+
// remove executable name
|
| 361 |
+
auto last_slash = base_path.find_last_of('/');
|
| 362 |
+
if (last_slash != std::string::npos) {
|
| 363 |
+
base_path = base_path.substr(0, last_slash);
|
| 364 |
+
}
|
| 365 |
+
search_prefix.push_back(base_path + "/");
|
| 366 |
+
#elif defined(__linux__)
|
| 367 |
+
std::string base_path = ".";
|
| 368 |
+
std::vector<char> path(1024);
|
| 369 |
+
while (true) {
|
| 370 |
+
// get executable path
|
| 371 |
+
ssize_t len = readlink("/proc/self/exe", path.data(), path.size());
|
| 372 |
+
if (len == -1) {
|
| 373 |
+
break;
|
| 374 |
+
}
|
| 375 |
+
if (len < (ssize_t) path.size()) {
|
| 376 |
+
base_path = std::string(path.data(), len);
|
| 377 |
+
// remove executable name
|
| 378 |
+
auto last_slash = base_path.find_last_of('/');
|
| 379 |
+
if (last_slash != std::string::npos) {
|
| 380 |
+
base_path = base_path.substr(0, last_slash);
|
| 381 |
+
}
|
| 382 |
+
break;
|
| 383 |
+
}
|
| 384 |
+
path.resize(path.size() * 2);
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
search_prefix.push_back(base_path + "/");
|
| 388 |
+
#endif
|
| 389 |
+
|
| 390 |
+
auto & reg = get_reg();
|
| 391 |
+
|
| 392 |
+
auto try_load = [&](const std::string & name) {
|
| 393 |
+
std::string os_name;
|
| 394 |
+
#ifdef _WIN32
|
| 395 |
+
os_name = "ggml-" + name + ".dll";
|
| 396 |
+
#else
|
| 397 |
+
os_name = "libggml-" + name + ".so";
|
| 398 |
+
#endif
|
| 399 |
+
if (reg.load_backend(os_name.c_str(), true)) {
|
| 400 |
+
return;
|
| 401 |
+
}
|
| 402 |
+
for (const auto & prefix : search_prefix) {
|
| 403 |
+
if (reg.load_backend((prefix + os_name).c_str(), true)) {
|
| 404 |
+
return;
|
| 405 |
+
}
|
| 406 |
+
}
|
| 407 |
+
};
|
| 408 |
+
|
| 409 |
+
try_load("amx");
|
| 410 |
+
try_load("blas");
|
| 411 |
+
try_load("cann");
|
| 412 |
+
try_load("cuda");
|
| 413 |
+
try_load("hip");
|
| 414 |
+
try_load("kompute");
|
| 415 |
+
try_load("metal");
|
| 416 |
+
try_load("rpc");
|
| 417 |
+
try_load("sycl");
|
| 418 |
+
try_load("vulkan");
|
| 419 |
+
try_load("musa");
|
| 420 |
+
try_load("cpu");
|
| 421 |
}
|
ggml/src/ggml-blas/CMakeLists.txt
CHANGED
|
@@ -11,12 +11,9 @@ find_package(BLAS)
|
|
| 11 |
if (BLAS_FOUND)
|
| 12 |
message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
target_link_libraries(ggml-blas PRIVATE ggml-base)
|
| 19 |
-
target_include_directories(ggml-blas PRIVATE . ..)
|
| 20 |
|
| 21 |
if (${GGML_BLAS_VENDOR} MATCHES "Apple")
|
| 22 |
add_compile_definitions(ACCELERATE_NEW_LAPACK)
|
|
|
|
| 11 |
if (BLAS_FOUND)
|
| 12 |
message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
|
| 13 |
|
| 14 |
+
ggml_add_backend_library(ggml-blas
|
| 15 |
+
ggml-blas.cpp
|
| 16 |
+
)
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
if (${GGML_BLAS_VENDOR} MATCHES "Apple")
|
| 19 |
add_compile_definitions(ACCELERATE_NEW_LAPACK)
|
ggml/src/ggml-blas/ggml-blas.cpp
CHANGED
|
@@ -506,9 +506,12 @@ static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = {
|
|
| 506 |
|
| 507 |
ggml_backend_reg_t ggml_backend_blas_reg(void) {
|
| 508 |
static struct ggml_backend_reg ggml_backend_blas_reg = {
|
| 509 |
-
/* .
|
| 510 |
-
/* .
|
|
|
|
| 511 |
};
|
| 512 |
|
| 513 |
return &ggml_backend_blas_reg;
|
| 514 |
}
|
|
|
|
|
|
|
|
|
| 506 |
|
| 507 |
ggml_backend_reg_t ggml_backend_blas_reg(void) {
|
| 508 |
static struct ggml_backend_reg ggml_backend_blas_reg = {
|
| 509 |
+
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
| 510 |
+
/* .iface = */ ggml_backend_blas_reg_i,
|
| 511 |
+
/* .context = */ NULL,
|
| 512 |
};
|
| 513 |
|
| 514 |
return &ggml_backend_blas_reg;
|
| 515 |
}
|
| 516 |
+
|
| 517 |
+
GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg)
|
ggml/src/ggml-cann/CMakeLists.txt
CHANGED
|
@@ -61,9 +61,9 @@ if (CANN_INSTALL_DIR)
|
|
| 61 |
|
| 62 |
file(GLOB GGML_SOURCES_CANN "*.cpp")
|
| 63 |
|
| 64 |
-
|
| 65 |
-
target_link_libraries(ggml-cann PRIVATE
|
| 66 |
-
target_include_directories(ggml-cann PRIVATE
|
| 67 |
target_link_directories(ggml-cann PRIVATE ${CANN_INSTALL_DIR}/lib64)
|
| 68 |
|
| 69 |
target_compile_definitions(ggml-cann PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
|
|
|
|
| 61 |
|
| 62 |
file(GLOB GGML_SOURCES_CANN "*.cpp")
|
| 63 |
|
| 64 |
+
ggml_add_backend_library(ggml-cann ${GGML_SOURCES_CANN})
|
| 65 |
+
target_link_libraries(ggml-cann PRIVATE ${CANN_LIBRARIES})
|
| 66 |
+
target_include_directories(ggml-cann PRIVATE ${CANN_INCLUDE_DIRS})
|
| 67 |
target_link_directories(ggml-cann PRIVATE ${CANN_INSTALL_DIR}/lib64)
|
| 68 |
|
| 69 |
target_compile_definitions(ggml-cann PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
|
ggml/src/ggml-cann/ggml-cann.cpp
CHANGED
|
@@ -2064,16 +2064,17 @@ ggml_backend_reg_t ggml_backend_cann_reg() {
|
|
| 2064 |
dev_ctx->name = GGML_CANN_NAME + std::to_string(i);
|
| 2065 |
ggml_cann_set_device(i);
|
| 2066 |
ggml_backend_dev_t dev = new ggml_backend_device {
|
| 2067 |
-
/* .
|
| 2068 |
-
/* .reg
|
| 2069 |
-
/* .context
|
| 2070 |
};
|
| 2071 |
ctx->devices.push_back(dev);
|
| 2072 |
}
|
| 2073 |
|
| 2074 |
reg = ggml_backend_reg {
|
| 2075 |
-
/* .
|
| 2076 |
-
/* .
|
|
|
|
| 2077 |
};
|
| 2078 |
}
|
| 2079 |
|
|
@@ -2126,3 +2127,5 @@ void ggml_backend_cann_get_device_memory(int32_t device, size_t* free,
|
|
| 2126 |
ggml_cann_set_device(device);
|
| 2127 |
ACL_CHECK(aclrtGetMemInfo(ACL_HBM_MEM, free, total));
|
| 2128 |
}
|
|
|
|
|
|
|
|
|
| 2064 |
dev_ctx->name = GGML_CANN_NAME + std::to_string(i);
|
| 2065 |
ggml_cann_set_device(i);
|
| 2066 |
ggml_backend_dev_t dev = new ggml_backend_device {
|
| 2067 |
+
/* .iface = */ ggml_backend_cann_device_interface,
|
| 2068 |
+
/* .reg = */ ®,
|
| 2069 |
+
/* .context = */ dev_ctx
|
| 2070 |
};
|
| 2071 |
ctx->devices.push_back(dev);
|
| 2072 |
}
|
| 2073 |
|
| 2074 |
reg = ggml_backend_reg {
|
| 2075 |
+
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
| 2076 |
+
/* .iface = */ ggml_backend_cann_reg_interface,
|
| 2077 |
+
/* .context = */ ctx
|
| 2078 |
};
|
| 2079 |
}
|
| 2080 |
|
|
|
|
| 2127 |
ggml_cann_set_device(device);
|
| 2128 |
ACL_CHECK(aclrtGetMemInfo(ACL_HBM_MEM, free, total));
|
| 2129 |
}
|
| 2130 |
+
|
| 2131 |
+
GGML_BACKEND_DL_IMPL(ggml_backend_cann_reg)
|
ggml/src/ggml-cpu/CMakeLists.txt
CHANGED
|
@@ -1,14 +1,13 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
|
| 10 |
-
|
| 11 |
-
target_include_directories(ggml-cpu PRIVATE . ..)
|
| 12 |
|
| 13 |
if (APPLE AND GGML_ACCELERATE)
|
| 14 |
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
|
|
|
| 1 |
+
ggml_add_backend_library(ggml-cpu
|
| 2 |
+
ggml-cpu.c
|
| 3 |
+
ggml-cpu.cpp
|
| 4 |
+
ggml-cpu-aarch64.c
|
| 5 |
+
ggml-cpu-aarch64.h
|
| 6 |
+
ggml-cpu-quants.c
|
| 7 |
+
ggml-cpu-quants.h
|
| 8 |
+
)
|
| 9 |
|
| 10 |
+
target_include_directories(ggml-cpu PRIVATE .)
|
|
|
|
| 11 |
|
| 12 |
if (APPLE AND GGML_ACCELERATE)
|
| 13 |
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
ggml/src/ggml-cpu/ggml-cpu.c
CHANGED
|
@@ -13578,29 +13578,6 @@ static void ggml_graph_compute_kickoff(struct ggml_threadpool * threadpool, int
|
|
| 13578 |
|
| 13579 |
#endif // GGML_USE_OPENMP
|
| 13580 |
|
| 13581 |
-
void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
|
| 13582 |
-
p->n_threads = n_threads;
|
| 13583 |
-
p->prio = 0; // default priority (usually means normal or inherited)
|
| 13584 |
-
p->poll = 50; // hybrid-polling enabled
|
| 13585 |
-
p->strict_cpu = false; // no strict placement (all threads share same cpumask)
|
| 13586 |
-
p->paused = false; // threads are ready to go
|
| 13587 |
-
memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
|
| 13588 |
-
}
|
| 13589 |
-
|
| 13590 |
-
struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
|
| 13591 |
-
struct ggml_threadpool_params p;
|
| 13592 |
-
ggml_threadpool_params_init(&p, n_threads);
|
| 13593 |
-
return p;
|
| 13594 |
-
}
|
| 13595 |
-
|
| 13596 |
-
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
|
| 13597 |
-
if (p0->n_threads != p1->n_threads ) return false;
|
| 13598 |
-
if (p0->prio != p1->prio ) return false;
|
| 13599 |
-
if (p0->poll != p1->poll ) return false;
|
| 13600 |
-
if (p0->strict_cpu != p1->strict_cpu ) return false;
|
| 13601 |
-
return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
|
| 13602 |
-
}
|
| 13603 |
-
|
| 13604 |
static struct ggml_threadpool * ggml_threadpool_new_impl(
|
| 13605 |
struct ggml_threadpool_params * tpp,
|
| 13606 |
struct ggml_cgraph * cgraph,
|
|
|
|
| 13578 |
|
| 13579 |
#endif // GGML_USE_OPENMP
|
| 13580 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13581 |
static struct ggml_threadpool * ggml_threadpool_new_impl(
|
| 13582 |
struct ggml_threadpool_params * tpp,
|
| 13583 |
struct ggml_cgraph * cgraph,
|
ggml/src/ggml-cpu/ggml-cpu.cpp
CHANGED
|
@@ -541,16 +541,12 @@ static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg
|
|
| 541 |
return &ggml_backend_cpu_device;
|
| 542 |
}
|
| 543 |
|
| 544 |
-
struct ggml_backend_feature {
|
| 545 |
-
const char * name;
|
| 546 |
-
const char * value;
|
| 547 |
-
};
|
| 548 |
-
|
| 549 |
-
// Not used yet
|
| 550 |
// This is intended to replace the the ggml_cpu_has_* functions when loading the CPU backend dynamically,
|
| 551 |
-
// and additionally to allow other backends to expose their own list of features that applications can query using the same API
|
| 552 |
static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t reg) {
|
| 553 |
static std::vector<ggml_backend_feature> features = []() {
|
|
|
|
|
|
|
| 554 |
std::vector<ggml_backend_feature> features;
|
| 555 |
if (ggml_cpu_has_sse3()) {
|
| 556 |
features.push_back({ "SSE3", "1" });
|
|
@@ -561,6 +557,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|
| 561 |
if (ggml_cpu_has_avx()) {
|
| 562 |
features.push_back({ "AVX", "1" });
|
| 563 |
}
|
|
|
|
|
|
|
|
|
|
| 564 |
if (ggml_cpu_has_avx2()) {
|
| 565 |
features.push_back({ "AVX2", "1" });
|
| 566 |
}
|
|
@@ -570,9 +569,6 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|
| 570 |
if (ggml_cpu_has_fma()) {
|
| 571 |
features.push_back({ "FMA", "1" });
|
| 572 |
}
|
| 573 |
-
if (ggml_cpu_has_avx_vnni()) {
|
| 574 |
-
features.push_back({ "AVX_VNNI", "1" });
|
| 575 |
-
}
|
| 576 |
if (ggml_cpu_has_avx512()) {
|
| 577 |
features.push_back({ "AVX512", "1" });
|
| 578 |
}
|
|
@@ -619,6 +615,10 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|
| 619 |
if (ggml_cpu_has_llamafile()) {
|
| 620 |
features.push_back({ "LLAMAFILE", "1" });
|
| 621 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 622 |
|
| 623 |
features.push_back({ nullptr, nullptr });
|
| 624 |
|
|
@@ -637,6 +637,29 @@ static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const ch
|
|
| 637 |
if (strcmp(name, "ggml_backend_dev_get_extra_bufts") == 0) {
|
| 638 |
return (void *)ggml_backend_cpu_get_extra_bufts;
|
| 639 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 640 |
|
| 641 |
return NULL;
|
| 642 |
|
|
@@ -655,9 +678,12 @@ ggml_backend_reg_t ggml_backend_cpu_reg(void) {
|
|
| 655 |
ggml_cpu_init();
|
| 656 |
|
| 657 |
static struct ggml_backend_reg ggml_backend_cpu_reg = {
|
| 658 |
-
/* .
|
| 659 |
-
/* .
|
|
|
|
| 660 |
};
|
| 661 |
|
| 662 |
return &ggml_backend_cpu_reg;
|
| 663 |
}
|
|
|
|
|
|
|
|
|
| 541 |
return &ggml_backend_cpu_device;
|
| 542 |
}
|
| 543 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 544 |
// This is intended to replace the the ggml_cpu_has_* functions when loading the CPU backend dynamically,
|
| 545 |
+
// and additionally to allow other backends to expose their own list of features that applications can query using the same API
|
| 546 |
static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t reg) {
|
| 547 |
static std::vector<ggml_backend_feature> features = []() {
|
| 548 |
+
ggml_cpu_init();
|
| 549 |
+
|
| 550 |
std::vector<ggml_backend_feature> features;
|
| 551 |
if (ggml_cpu_has_sse3()) {
|
| 552 |
features.push_back({ "SSE3", "1" });
|
|
|
|
| 557 |
if (ggml_cpu_has_avx()) {
|
| 558 |
features.push_back({ "AVX", "1" });
|
| 559 |
}
|
| 560 |
+
if (ggml_cpu_has_avx_vnni()) {
|
| 561 |
+
features.push_back({ "AVX_VNNI", "1" });
|
| 562 |
+
}
|
| 563 |
if (ggml_cpu_has_avx2()) {
|
| 564 |
features.push_back({ "AVX2", "1" });
|
| 565 |
}
|
|
|
|
| 569 |
if (ggml_cpu_has_fma()) {
|
| 570 |
features.push_back({ "FMA", "1" });
|
| 571 |
}
|
|
|
|
|
|
|
|
|
|
| 572 |
if (ggml_cpu_has_avx512()) {
|
| 573 |
features.push_back({ "AVX512", "1" });
|
| 574 |
}
|
|
|
|
| 615 |
if (ggml_cpu_has_llamafile()) {
|
| 616 |
features.push_back({ "LLAMAFILE", "1" });
|
| 617 |
}
|
| 618 |
+
// TODO: rename this
|
| 619 |
+
#ifdef GGML_USE_CPU_AARCH64
|
| 620 |
+
features.push_back({ "AARCH64_REPACK", "1" });
|
| 621 |
+
#endif
|
| 622 |
|
| 623 |
features.push_back({ nullptr, nullptr });
|
| 624 |
|
|
|
|
| 637 |
if (strcmp(name, "ggml_backend_dev_get_extra_bufts") == 0) {
|
| 638 |
return (void *)ggml_backend_cpu_get_extra_bufts;
|
| 639 |
}
|
| 640 |
+
if (strcmp(name, "ggml_backend_get_features") == 0) {
|
| 641 |
+
return (void *)ggml_backend_cpu_get_features;
|
| 642 |
+
}
|
| 643 |
+
if (strcmp(name, "ggml_backend_set_abort_callback") == 0) {
|
| 644 |
+
return (void *)ggml_backend_cpu_set_abort_callback;
|
| 645 |
+
}
|
| 646 |
+
if (strcmp(name, "ggml_backend_cpu_numa_init") == 0) {
|
| 647 |
+
return (void *)ggml_numa_init;
|
| 648 |
+
}
|
| 649 |
+
if (strcmp(name, "ggml_backend_cpu_is_numa") == 0) {
|
| 650 |
+
return (void *)ggml_is_numa;
|
| 651 |
+
}
|
| 652 |
+
|
| 653 |
+
// threadpool - TODO: move to ggml-base
|
| 654 |
+
if (strcmp(name, "ggml_threadpool_new") == 0) {
|
| 655 |
+
return (void *)ggml_threadpool_new;
|
| 656 |
+
}
|
| 657 |
+
if (strcmp(name, "ggml_threadpool_free") == 0) {
|
| 658 |
+
return (void *)ggml_threadpool_free;
|
| 659 |
+
}
|
| 660 |
+
if (strcmp(name, "ggml_backend_cpu_set_threadpool") == 0) {
|
| 661 |
+
return (void *)ggml_backend_cpu_set_threadpool;
|
| 662 |
+
}
|
| 663 |
|
| 664 |
return NULL;
|
| 665 |
|
|
|
|
| 678 |
ggml_cpu_init();
|
| 679 |
|
| 680 |
static struct ggml_backend_reg ggml_backend_cpu_reg = {
|
| 681 |
+
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
| 682 |
+
/* .iface = */ ggml_backend_cpu_reg_i,
|
| 683 |
+
/* .context = */ NULL,
|
| 684 |
};
|
| 685 |
|
| 686 |
return &ggml_backend_cpu_reg;
|
| 687 |
}
|
| 688 |
+
|
| 689 |
+
GGML_BACKEND_DL_IMPL(ggml_backend_cpu_reg)
|
ggml/src/ggml-cuda/ggml-cuda.cu
CHANGED
|
@@ -3126,6 +3126,61 @@ static ggml_backend_dev_t ggml_backend_cuda_reg_get_device(ggml_backend_reg_t re
|
|
| 3126 |
return ctx->devices[index];
|
| 3127 |
}
|
| 3128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3129 |
static void * ggml_backend_cuda_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
|
| 3130 |
GGML_UNUSED(reg);
|
| 3131 |
if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
|
|
@@ -3137,6 +3192,9 @@ static void * ggml_backend_cuda_reg_get_proc_address(ggml_backend_reg_t reg, con
|
|
| 3137 |
if (strcmp(name, "ggml_backend_unregister_host_buffer") == 0) {
|
| 3138 |
return (void *)ggml_backend_cuda_unregister_host_buffer;
|
| 3139 |
}
|
|
|
|
|
|
|
|
|
|
| 3140 |
return nullptr;
|
| 3141 |
}
|
| 3142 |
|
|
@@ -3169,16 +3227,17 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
|
| 3169 |
dev_ctx->description = prop.name;
|
| 3170 |
|
| 3171 |
ggml_backend_dev_t dev = new ggml_backend_device {
|
| 3172 |
-
/* .
|
| 3173 |
-
/* .reg
|
| 3174 |
-
/* .context
|
| 3175 |
};
|
| 3176 |
ctx->devices.push_back(dev);
|
| 3177 |
}
|
| 3178 |
|
| 3179 |
reg = ggml_backend_reg {
|
| 3180 |
-
/* .
|
| 3181 |
-
/* .
|
|
|
|
| 3182 |
};
|
| 3183 |
}
|
| 3184 |
|
|
@@ -3209,3 +3268,5 @@ ggml_backend_t ggml_backend_cuda_init(int device) {
|
|
| 3209 |
|
| 3210 |
return cuda_backend;
|
| 3211 |
}
|
|
|
|
|
|
|
|
|
| 3126 |
return ctx->devices[index];
|
| 3127 |
}
|
| 3128 |
|
| 3129 |
+
static ggml_backend_feature * ggml_backend_cuda_get_features(ggml_backend_reg_t reg) {
|
| 3130 |
+
static std::vector<ggml_backend_feature> features = []() {
|
| 3131 |
+
std::vector<ggml_backend_feature> features;
|
| 3132 |
+
#define _STRINGIFY(...) #__VA_ARGS__
|
| 3133 |
+
#define STRINGIFY(...) _STRINGIFY(__VA_ARGS__)
|
| 3134 |
+
|
| 3135 |
+
#ifdef __CUDA_ARCH_LIST__
|
| 3136 |
+
features.push_back({ "ARCHS", STRINGIFY(__CUDA_ARCH_LIST__) });
|
| 3137 |
+
#endif
|
| 3138 |
+
|
| 3139 |
+
#ifdef GGML_CUDA_FORCE_MMQ
|
| 3140 |
+
features.push_back({ "FORCE_MMQ", "1" });
|
| 3141 |
+
#endif
|
| 3142 |
+
|
| 3143 |
+
#ifdef GGML_CUDA_FORCE_CUBLAS
|
| 3144 |
+
features.push_back({ "FORCE_CUBLAS", "1" });
|
| 3145 |
+
#endif
|
| 3146 |
+
|
| 3147 |
+
#ifdef GGML_CUDA_NO_VMM
|
| 3148 |
+
features.push_back({ "NO_VMM", "1" });
|
| 3149 |
+
#endif
|
| 3150 |
+
|
| 3151 |
+
#ifdef GGML_CUDA_NO_PEER_COPY
|
| 3152 |
+
features.push_back({ "NO_PEER_COPY", "1" });
|
| 3153 |
+
#endif
|
| 3154 |
+
|
| 3155 |
+
#ifdef GGML_CUDA_F16
|
| 3156 |
+
features.push_back({ "F16", "1" });
|
| 3157 |
+
#endif
|
| 3158 |
+
|
| 3159 |
+
#ifdef GGML_CUDA_USE_GRAPHS
|
| 3160 |
+
features.push_back({ "USE_GRAPHS", "1" });
|
| 3161 |
+
#endif
|
| 3162 |
+
|
| 3163 |
+
#ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE
|
| 3164 |
+
features.push_back({ "PEER_MAX_BATCH_SIZE", STRINGIFY(GGML_CUDA_PEER_MAX_BATCH_SIZE) });
|
| 3165 |
+
#endif
|
| 3166 |
+
|
| 3167 |
+
#ifdef GGML_CUDA_FA_ALL_QUANTS
|
| 3168 |
+
features.push_back({ "FA_ALL_QUANTS", "1" });
|
| 3169 |
+
#endif
|
| 3170 |
+
|
| 3171 |
+
#undef _STRINGIFY
|
| 3172 |
+
#undef STRINGIFY
|
| 3173 |
+
|
| 3174 |
+
features.push_back({ nullptr, nullptr });
|
| 3175 |
+
|
| 3176 |
+
return features;
|
| 3177 |
+
}();
|
| 3178 |
+
|
| 3179 |
+
return features.data();
|
| 3180 |
+
|
| 3181 |
+
GGML_UNUSED(reg);
|
| 3182 |
+
}
|
| 3183 |
+
|
| 3184 |
static void * ggml_backend_cuda_reg_get_proc_address(ggml_backend_reg_t reg, const char * name) {
|
| 3185 |
GGML_UNUSED(reg);
|
| 3186 |
if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
|
|
|
|
| 3192 |
if (strcmp(name, "ggml_backend_unregister_host_buffer") == 0) {
|
| 3193 |
return (void *)ggml_backend_cuda_unregister_host_buffer;
|
| 3194 |
}
|
| 3195 |
+
if (strcmp(name, "ggml_backend_get_features") == 0) {
|
| 3196 |
+
return (void *)ggml_backend_cuda_get_features;
|
| 3197 |
+
}
|
| 3198 |
return nullptr;
|
| 3199 |
}
|
| 3200 |
|
|
|
|
| 3227 |
dev_ctx->description = prop.name;
|
| 3228 |
|
| 3229 |
ggml_backend_dev_t dev = new ggml_backend_device {
|
| 3230 |
+
/* .iface = */ ggml_backend_cuda_device_interface,
|
| 3231 |
+
/* .reg = */ ®,
|
| 3232 |
+
/* .context = */ dev_ctx
|
| 3233 |
};
|
| 3234 |
ctx->devices.push_back(dev);
|
| 3235 |
}
|
| 3236 |
|
| 3237 |
reg = ggml_backend_reg {
|
| 3238 |
+
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
| 3239 |
+
/* .iface = */ ggml_backend_cuda_reg_interface,
|
| 3240 |
+
/* .context = */ ctx
|
| 3241 |
};
|
| 3242 |
}
|
| 3243 |
|
|
|
|
| 3268 |
|
| 3269 |
return cuda_backend;
|
| 3270 |
}
|
| 3271 |
+
|
| 3272 |
+
GGML_BACKEND_DL_IMPL(ggml_backend_cuda_reg)
|
ggml/src/ggml-cuda/ggml/CMakeLists.txt
CHANGED
|
@@ -46,13 +46,10 @@ if (CUDAToolkit_FOUND)
|
|
| 46 |
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
| 47 |
endif()
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
target_link_libraries(ggml-cuda PRIVATE ggml-base)
|
| 55 |
-
target_include_directories(ggml-cuda PRIVATE . ..)
|
| 56 |
|
| 57 |
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
|
| 58 |
|
|
|
|
| 46 |
list(APPEND GGML_SOURCES_CUDA ${SRCS})
|
| 47 |
endif()
|
| 48 |
|
| 49 |
+
ggml_add_backend_library(ggml-cuda
|
| 50 |
+
${GGML_HEADERS_CUDA}
|
| 51 |
+
${GGML_SOURCES_CUDA}
|
| 52 |
+
)
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
|
| 55 |
|
ggml/src/ggml-hip/CMakeLists.txt
CHANGED
|
@@ -64,12 +64,10 @@ else()
|
|
| 64 |
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
| 65 |
endif()
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
target_link_libraries(ggml-hip PRIVATE ggml-base)
|
| 72 |
-
target_include_directories(ggml-hip PRIVATE . ..)
|
| 73 |
|
| 74 |
# TODO: do not use CUDA definitions for HIP
|
| 75 |
target_compile_definitions(ggml PUBLIC GGML_USE_CUDA)
|
|
|
|
| 64 |
list(APPEND GGML_SOURCES_ROCM ${SRCS})
|
| 65 |
endif()
|
| 66 |
|
| 67 |
+
ggml_add_backend_library(ggml-hip
|
| 68 |
+
${GGML_HEADERS_ROCM}
|
| 69 |
+
${GGML_SOURCES_ROCM}
|
| 70 |
+
)
|
|
|
|
|
|
|
| 71 |
|
| 72 |
# TODO: do not use CUDA definitions for HIP
|
| 73 |
target_compile_definitions(ggml PUBLIC GGML_USE_CUDA)
|
ggml/src/ggml-kompute/CMakeLists.txt
CHANGED
|
@@ -6,13 +6,13 @@ if (NOT glslc_executable)
|
|
| 6 |
message(FATAL_ERROR "glslc not found")
|
| 7 |
endif()
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
|
| 14 |
target_link_libraries(ggml-kompute PRIVATE ggml-base kompute)
|
| 15 |
-
target_include_directories(ggml-kompute PRIVATE
|
| 16 |
|
| 17 |
add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
|
| 18 |
|
|
|
|
| 6 |
message(FATAL_ERROR "glslc not found")
|
| 7 |
endif()
|
| 8 |
|
| 9 |
+
ggml_add_backend_library(ggml-kompute
|
| 10 |
+
ggml-kompute.cpp
|
| 11 |
+
../../include/ggml-kompute.h
|
| 12 |
+
)
|
| 13 |
|
| 14 |
target_link_libraries(ggml-kompute PRIVATE ggml-base kompute)
|
| 15 |
+
target_include_directories(ggml-kompute PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
| 16 |
|
| 17 |
add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
|
| 18 |
|
ggml/src/ggml-kompute/ggml-kompute.cpp
CHANGED
|
@@ -2176,9 +2176,12 @@ static const struct ggml_backend_reg_i ggml_backend_kompute_reg_i = {
|
|
| 2176 |
|
| 2177 |
ggml_backend_reg_t ggml_backend_kompute_reg() {
|
| 2178 |
static ggml_backend_reg reg = {
|
| 2179 |
-
/* .
|
| 2180 |
-
/* .
|
|
|
|
| 2181 |
};
|
| 2182 |
|
| 2183 |
return ®
|
| 2184 |
}
|
|
|
|
|
|
|
|
|
| 2176 |
|
| 2177 |
ggml_backend_reg_t ggml_backend_kompute_reg() {
|
| 2178 |
static ggml_backend_reg reg = {
|
| 2179 |
+
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
| 2180 |
+
/* .iface = */ ggml_backend_kompute_reg_i,
|
| 2181 |
+
/* .context = */ nullptr,
|
| 2182 |
};
|
| 2183 |
|
| 2184 |
return ®
|
| 2185 |
}
|
| 2186 |
+
|
| 2187 |
+
GGML_BACKEND_DL_IMPL(ggml_backend_kompute_reg)
|
ggml/src/ggml-metal/CMakeLists.txt
CHANGED
|
@@ -4,19 +4,16 @@ find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
|
| 4 |
|
| 5 |
message(STATUS "Metal framework found")
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
|
| 11 |
target_link_libraries(ggml-metal PRIVATE
|
| 12 |
-
ggml-base
|
| 13 |
${FOUNDATION_LIBRARY}
|
| 14 |
${METAL_FRAMEWORK}
|
| 15 |
${METALKIT_FRAMEWORK}
|
| 16 |
)
|
| 17 |
|
| 18 |
-
target_include_directories(ggml-metal PRIVATE . ..)
|
| 19 |
-
|
| 20 |
if (GGML_METAL_NDEBUG)
|
| 21 |
add_compile_definitions(GGML_METAL_NDEBUG)
|
| 22 |
endif()
|
|
|
|
| 4 |
|
| 5 |
message(STATUS "Metal framework found")
|
| 6 |
|
| 7 |
+
ggml_add_backend_library(ggml-metal
|
| 8 |
+
ggml-metal.m
|
| 9 |
+
)
|
| 10 |
|
| 11 |
target_link_libraries(ggml-metal PRIVATE
|
|
|
|
| 12 |
${FOUNDATION_LIBRARY}
|
| 13 |
${METAL_FRAMEWORK}
|
| 14 |
${METALKIT_FRAMEWORK}
|
| 15 |
)
|
| 16 |
|
|
|
|
|
|
|
| 17 |
if (GGML_METAL_NDEBUG)
|
| 18 |
add_compile_definitions(GGML_METAL_NDEBUG)
|
| 19 |
endif()
|
ggml/src/ggml-metal/ggml-metal.m
CHANGED
|
@@ -4448,19 +4448,45 @@ static ggml_backend_dev_t ggml_backend_metal_reg_device_get(ggml_backend_reg_t r
|
|
| 4448 |
GGML_UNUSED(index);
|
| 4449 |
}
|
| 4450 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4451 |
static struct ggml_backend_reg_i ggml_backend_metal_reg_i = {
|
| 4452 |
/* .get_name = */ ggml_backend_metal_reg_get_name,
|
| 4453 |
/* .device_count = */ ggml_backend_metal_reg_device_count,
|
| 4454 |
/* .device_get = */ ggml_backend_metal_reg_device_get,
|
| 4455 |
-
/* .get_proc_address = */
|
| 4456 |
};
|
| 4457 |
|
| 4458 |
ggml_backend_reg_t ggml_backend_metal_reg(void) {
|
| 4459 |
// TODO: make this thread-safe somehow?
|
| 4460 |
{
|
| 4461 |
g_ggml_backend_metal_reg = (struct ggml_backend_reg) {
|
| 4462 |
-
/* .
|
| 4463 |
-
/* .
|
|
|
|
| 4464 |
};
|
| 4465 |
|
| 4466 |
g_ggml_backend_metal_device = (struct ggml_backend_device) {
|
|
@@ -4472,3 +4498,5 @@ ggml_backend_reg_t ggml_backend_metal_reg(void) {
|
|
| 4472 |
|
| 4473 |
return &g_ggml_backend_metal_reg;
|
| 4474 |
}
|
|
|
|
|
|
|
|
|
| 4448 |
GGML_UNUSED(index);
|
| 4449 |
}
|
| 4450 |
|
| 4451 |
+
static struct ggml_backend_feature g_ggml_backend_metal_features[] = {
|
| 4452 |
+
#if defined(GGML_METAL_EMBED_LIBRARY)
|
| 4453 |
+
{ "EMBED_LIBRARY", "1" },
|
| 4454 |
+
#endif
|
| 4455 |
+
#if defined(GGML_METAL_USE_BF16)
|
| 4456 |
+
{ "BF16", "1" },
|
| 4457 |
+
#endif
|
| 4458 |
+
{ nil, nil },
|
| 4459 |
+
};
|
| 4460 |
+
|
| 4461 |
+
static struct ggml_backend_feature * ggml_backend_metal_get_features(ggml_backend_reg_t reg) {
|
| 4462 |
+
return g_ggml_backend_metal_features;
|
| 4463 |
+
|
| 4464 |
+
GGML_UNUSED(reg);
|
| 4465 |
+
}
|
| 4466 |
+
|
| 4467 |
+
static void * ggml_backend_metal_get_proc_address(ggml_backend_reg_t reg, const char * name) {
|
| 4468 |
+
if (strcmp(name, "ggml_backend_get_features") == 0) {
|
| 4469 |
+
return (void *)ggml_backend_metal_get_features;
|
| 4470 |
+
}
|
| 4471 |
+
|
| 4472 |
+
return NULL;
|
| 4473 |
+
|
| 4474 |
+
GGML_UNUSED(reg);
|
| 4475 |
+
}
|
| 4476 |
static struct ggml_backend_reg_i ggml_backend_metal_reg_i = {
|
| 4477 |
/* .get_name = */ ggml_backend_metal_reg_get_name,
|
| 4478 |
/* .device_count = */ ggml_backend_metal_reg_device_count,
|
| 4479 |
/* .device_get = */ ggml_backend_metal_reg_device_get,
|
| 4480 |
+
/* .get_proc_address = */ ggml_backend_metal_get_proc_address,
|
| 4481 |
};
|
| 4482 |
|
| 4483 |
ggml_backend_reg_t ggml_backend_metal_reg(void) {
|
| 4484 |
// TODO: make this thread-safe somehow?
|
| 4485 |
{
|
| 4486 |
g_ggml_backend_metal_reg = (struct ggml_backend_reg) {
|
| 4487 |
+
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
| 4488 |
+
/* .iface = */ ggml_backend_metal_reg_i,
|
| 4489 |
+
/* .context = */ NULL,
|
| 4490 |
};
|
| 4491 |
|
| 4492 |
g_ggml_backend_metal_device = (struct ggml_backend_device) {
|
|
|
|
| 4498 |
|
| 4499 |
return &g_ggml_backend_metal_reg;
|
| 4500 |
}
|
| 4501 |
+
|
| 4502 |
+
GGML_BACKEND_DL_IMPL(ggml_backend_metal_reg)
|
ggml/src/ggml-musa/ggml/CMakeLists.txt
CHANGED
|
@@ -47,12 +47,10 @@ if (MUSAToolkit_FOUND)
|
|
| 47 |
set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22")
|
| 48 |
endforeach()
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
target_link_libraries(ggml-musa PRIVATE ggml-base)
|
| 55 |
-
target_include_directories(ggml-musa PRIVATE . ..)
|
| 56 |
|
| 57 |
# TODO: do not use CUDA definitions for MUSA
|
| 58 |
target_compile_definitions(ggml PUBLIC GGML_USE_CUDA)
|
|
|
|
| 47 |
set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS "-x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22")
|
| 48 |
endforeach()
|
| 49 |
|
| 50 |
+
ggml_add_backend_library(ggml-musa
|
| 51 |
+
${GGML_HEADERS_MUSA}
|
| 52 |
+
${GGML_SOURCES_MUSA}
|
| 53 |
+
)
|
|
|
|
|
|
|
| 54 |
|
| 55 |
# TODO: do not use CUDA definitions for MUSA
|
| 56 |
target_compile_definitions(ggml PUBLIC GGML_USE_CUDA)
|
ggml/src/ggml-rpc/CMakeLists.txt
CHANGED
|
@@ -1,10 +1,8 @@
|
|
| 1 |
message(STATUS "Using RPC backend")
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
target_link_libraries(ggml-rpc PRIVATE ggml-base)
|
| 7 |
-
target_include_directories(ggml-rpc PRIVATE . ..)
|
| 8 |
|
| 9 |
if (WIN32)
|
| 10 |
target_link_libraries(ggml-rpc PRIVATE ws2_32)
|
|
|
|
| 1 |
message(STATUS "Using RPC backend")
|
| 2 |
|
| 3 |
+
ggml_add_backend_library(ggml-rpc
|
| 4 |
+
ggml-rpc.cpp
|
| 5 |
+
)
|
|
|
|
|
|
|
| 6 |
|
| 7 |
if (WIN32)
|
| 8 |
target_link_libraries(ggml-rpc PRIVATE ws2_32)
|
ggml/src/ggml-rpc/ggml-rpc.cpp
CHANGED
|
@@ -1369,8 +1369,9 @@ static const struct ggml_backend_reg_i ggml_backend_rpc_reg_i = {
|
|
| 1369 |
|
| 1370 |
ggml_backend_reg_t ggml_backend_rpc_reg(void) {
|
| 1371 |
static struct ggml_backend_reg ggml_backend_rpc_reg = {
|
| 1372 |
-
/* .
|
| 1373 |
-
/* .
|
|
|
|
| 1374 |
};
|
| 1375 |
|
| 1376 |
return &ggml_backend_rpc_reg;
|
|
@@ -1401,3 +1402,5 @@ ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint) {
|
|
| 1401 |
|
| 1402 |
return dev;
|
| 1403 |
}
|
|
|
|
|
|
|
|
|
| 1369 |
|
| 1370 |
ggml_backend_reg_t ggml_backend_rpc_reg(void) {
|
| 1371 |
static struct ggml_backend_reg ggml_backend_rpc_reg = {
|
| 1372 |
+
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
| 1373 |
+
/* .iface = */ ggml_backend_rpc_reg_i,
|
| 1374 |
+
/* .context = */ NULL,
|
| 1375 |
};
|
| 1376 |
|
| 1377 |
return &ggml_backend_rpc_reg;
|
|
|
|
| 1402 |
|
| 1403 |
return dev;
|
| 1404 |
}
|
| 1405 |
+
|
| 1406 |
+
GGML_BACKEND_DL_IMPL(ggml_backend_rpc_reg)
|
ggml/src/ggml-sycl/CMakeLists.txt
CHANGED
|
@@ -16,12 +16,10 @@ endif()
|
|
| 16 |
message(STATUS "SYCL found")
|
| 17 |
#todo: AOT
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
target_link_libraries(ggml-sycl PRIVATE ggml-base)
|
| 24 |
-
target_include_directories(ggml-sycl PRIVATE . ..)
|
| 25 |
|
| 26 |
if (GGML_SYCL_F16)
|
| 27 |
if (GGML_SYCL_TARGET STREQUAL "AMD")
|
|
|
|
| 16 |
message(STATUS "SYCL found")
|
| 17 |
#todo: AOT
|
| 18 |
|
| 19 |
+
ggml_add_backend_library(ggml-sycl
|
| 20 |
+
ggml-sycl.cpp
|
| 21 |
+
../../include/ggml-sycl.h
|
| 22 |
+
)
|
|
|
|
|
|
|
| 23 |
|
| 24 |
if (GGML_SYCL_F16)
|
| 25 |
if (GGML_SYCL_TARGET STREQUAL "AMD")
|
ggml/src/ggml-sycl/ggml-sycl.cpp
CHANGED
|
@@ -4637,16 +4637,17 @@ ggml_backend_reg_t ggml_backend_sycl_reg() {
|
|
| 4637 |
dev_ctx->description = prop.get_name();
|
| 4638 |
|
| 4639 |
ggml_backend_dev_t dev = new ggml_backend_device {
|
| 4640 |
-
/* .
|
| 4641 |
-
/* .reg
|
| 4642 |
-
/* .context
|
| 4643 |
};
|
| 4644 |
ctx->devices.push_back(dev);
|
| 4645 |
}
|
| 4646 |
|
| 4647 |
reg = ggml_backend_reg {
|
| 4648 |
-
/* .
|
| 4649 |
-
/* .
|
|
|
|
| 4650 |
};
|
| 4651 |
}
|
| 4652 |
|
|
@@ -4678,3 +4679,4 @@ ggml_backend_t ggml_backend_sycl_init(int device) {
|
|
| 4678 |
return sycl_backend;
|
| 4679 |
}
|
| 4680 |
|
|
|
|
|
|
| 4637 |
dev_ctx->description = prop.get_name();
|
| 4638 |
|
| 4639 |
ggml_backend_dev_t dev = new ggml_backend_device {
|
| 4640 |
+
/* .iface = */ ggml_backend_sycl_device_interface,
|
| 4641 |
+
/* .reg = */ ®,
|
| 4642 |
+
/* .context = */ dev_ctx
|
| 4643 |
};
|
| 4644 |
ctx->devices.push_back(dev);
|
| 4645 |
}
|
| 4646 |
|
| 4647 |
reg = ggml_backend_reg {
|
| 4648 |
+
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
| 4649 |
+
/* .iface = */ ggml_backend_sycl_reg_interface,
|
| 4650 |
+
/* .context = */ ctx
|
| 4651 |
};
|
| 4652 |
}
|
| 4653 |
|
|
|
|
| 4679 |
return sycl_backend;
|
| 4680 |
}
|
| 4681 |
|
| 4682 |
+
GGML_BACKEND_DL_IMPL(ggml_backend_sycl_reg)
|
ggml/src/ggml-vulkan/CMakeLists.txt
CHANGED
|
@@ -3,13 +3,13 @@ find_package(Vulkan COMPONENTS glslc REQUIRED)
|
|
| 3 |
if (Vulkan_FOUND)
|
| 4 |
message(STATUS "Vulkan found")
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
|
| 11 |
-
target_link_libraries(ggml-vulkan PRIVATE
|
| 12 |
-
target_include_directories(ggml-vulkan PRIVATE
|
| 13 |
|
| 14 |
# Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build
|
| 15 |
# Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector
|
|
|
|
| 3 |
if (Vulkan_FOUND)
|
| 4 |
message(STATUS "Vulkan found")
|
| 5 |
|
| 6 |
+
ggml_add_backend_library(ggml-vulkan
|
| 7 |
+
ggml-vulkan.cpp
|
| 8 |
+
../../include/ggml-vulkan.h
|
| 9 |
+
)
|
| 10 |
|
| 11 |
+
target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan)
|
| 12 |
+
target_include_directories(ggml-vulkan PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
| 13 |
|
| 14 |
# Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build
|
| 15 |
# Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector
|
ggml/src/ggml-vulkan/ggml-vulkan.cpp
CHANGED
|
@@ -6738,8 +6738,9 @@ static const struct ggml_backend_reg_i ggml_backend_vk_reg_i = {
|
|
| 6738 |
|
| 6739 |
ggml_backend_reg_t ggml_backend_vk_reg() {
|
| 6740 |
static ggml_backend_reg reg = {
|
| 6741 |
-
/* .
|
| 6742 |
-
/* .
|
|
|
|
| 6743 |
};
|
| 6744 |
|
| 6745 |
return ®
|
|
@@ -7365,3 +7366,5 @@ static void ggml_vk_check_results_1(ggml_tensor * tensor) {
|
|
| 7365 |
VK_LOG_DEBUG("END ggml_vk_check_results_1(" << tensor->name << ")");
|
| 7366 |
}
|
| 7367 |
#endif
|
|
|
|
|
|
|
|
|
| 6738 |
|
| 6739 |
ggml_backend_reg_t ggml_backend_vk_reg() {
|
| 6740 |
static ggml_backend_reg reg = {
|
| 6741 |
+
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
| 6742 |
+
/* .iface = */ ggml_backend_vk_reg_i,
|
| 6743 |
+
/* .context = */ nullptr,
|
| 6744 |
};
|
| 6745 |
|
| 6746 |
return ®
|
|
|
|
| 7366 |
VK_LOG_DEBUG("END ggml_vk_check_results_1(" << tensor->name << ")");
|
| 7367 |
}
|
| 7368 |
#endif
|
| 7369 |
+
|
| 7370 |
+
GGML_BACKEND_DL_IMPL(ggml_backend_vk_reg)
|
ggml/src/ggml.c
CHANGED
|
@@ -7571,3 +7571,26 @@ void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
|
|
| 7571 |
g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
|
| 7572 |
g_logger_state.log_callback_user_data = user_data;
|
| 7573 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7571 |
g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
|
| 7572 |
g_logger_state.log_callback_user_data = user_data;
|
| 7573 |
}
|
| 7574 |
+
|
| 7575 |
+
void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
|
| 7576 |
+
p->n_threads = n_threads;
|
| 7577 |
+
p->prio = 0; // default priority (usually means normal or inherited)
|
| 7578 |
+
p->poll = 50; // hybrid-polling enabled
|
| 7579 |
+
p->strict_cpu = false; // no strict placement (all threads share same cpumask)
|
| 7580 |
+
p->paused = false; // threads are ready to go
|
| 7581 |
+
memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
|
| 7582 |
+
}
|
| 7583 |
+
|
| 7584 |
+
struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
|
| 7585 |
+
struct ggml_threadpool_params p;
|
| 7586 |
+
ggml_threadpool_params_init(&p, n_threads);
|
| 7587 |
+
return p;
|
| 7588 |
+
}
|
| 7589 |
+
|
| 7590 |
+
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
|
| 7591 |
+
if (p0->n_threads != p1->n_threads ) return false;
|
| 7592 |
+
if (p0->prio != p1->prio ) return false;
|
| 7593 |
+
if (p0->poll != p1->poll ) return false;
|
| 7594 |
+
if (p0->strict_cpu != p1->strict_cpu ) return false;
|
| 7595 |
+
return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
|
| 7596 |
+
}
|