Spaces:
Sleeping
Sleeping
Neo Zhang Jianyu
commited on
fix set main gpu crash (llama/6339)
Browse files- ggml-sycl.cpp +22 -3
ggml-sycl.cpp
CHANGED
|
@@ -2968,7 +2968,7 @@ namespace dpct
|
|
| 2968 |
#include "ggml-common.h"
|
| 2969 |
|
| 2970 |
static int g_ggml_sycl_debug=0;
|
| 2971 |
-
#define GGML_SYCL_DEBUG(...) do{if(g_ggml_sycl_debug)
|
| 2972 |
|
| 2973 |
#define CHECK_TRY_ERROR(expr) \
|
| 2974 |
[&]() { \
|
|
@@ -12868,6 +12868,7 @@ void print_device_detail(int id, sycl::device &device, std::string device_type)
|
|
| 12868 |
}
|
| 12869 |
|
| 12870 |
void ggml_backend_sycl_print_sycl_devices() {
|
|
|
|
| 12871 |
int device_count = dpct::dev_mgr::instance().device_count();
|
| 12872 |
std::map<std::string, size_t> DeviceNums;
|
| 12873 |
fprintf(stderr, "found %d SYCL devices:\n", device_count);
|
|
@@ -12925,7 +12926,9 @@ static void ggml_init_sycl() try {
|
|
| 12925 |
static bool initialized = false;
|
| 12926 |
|
| 12927 |
if (!initialized) {
|
|
|
|
| 12928 |
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
|
|
|
|
| 12929 |
fprintf(stderr, "%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug);
|
| 12930 |
|
| 12931 |
#if defined(GGML_SYCL_F16)
|
|
@@ -16039,6 +16042,7 @@ bool ggml_sycl_compute_forward(struct ggml_compute_params * params, struct ggml_
|
|
| 16039 |
}
|
| 16040 |
|
| 16041 |
GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len) try {
|
|
|
|
| 16042 |
for(int i=0;i<max_len;i++) id_list[i] = -1;
|
| 16043 |
|
| 16044 |
if (!g_sycl_gpu_mgr) {
|
|
@@ -16073,6 +16077,7 @@ catch (sycl::exception const &exc) {
|
|
| 16073 |
|
| 16074 |
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description,
|
| 16075 |
size_t description_size) try {
|
|
|
|
| 16076 |
dpct::device_info prop;
|
| 16077 |
int device_id = g_sycl_gpu_mgr->gpus[device];
|
| 16078 |
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
|
@@ -16087,6 +16092,7 @@ catch (sycl::exception const &exc) {
|
|
| 16087 |
|
| 16088 |
GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free,
|
| 16089 |
size_t *total) try {
|
|
|
|
| 16090 |
ggml_sycl_set_device(device);
|
| 16091 |
|
| 16092 |
/*
|
|
@@ -16438,7 +16444,8 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
|
|
| 16438 |
};
|
| 16439 |
|
| 16440 |
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_index) {
|
| 16441 |
-
|
|
|
|
| 16442 |
if (device_index>=g_device_count or device_index<0) {
|
| 16443 |
printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
|
| 16444 |
device_index, g_device_count-1);
|
|
@@ -16808,6 +16815,7 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface
|
|
| 16808 |
};
|
| 16809 |
|
| 16810 |
GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split) {
|
|
|
|
| 16811 |
ggml_init_sycl();
|
| 16812 |
// FIXME: this is not thread safe
|
| 16813 |
static std::map<std::array<float, GGML_SYCL_MAX_DEVICES>, struct ggml_backend_buffer_type> buft_map;
|
|
@@ -16880,6 +16888,7 @@ static ggml_backend_buffer_t ggml_backend_sycl_host_buffer_type_alloc_buffer(ggm
|
|
| 16880 |
}
|
| 16881 |
|
| 16882 |
ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
|
|
|
|
| 16883 |
static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_type_host = {
|
| 16884 |
/* .iface = */ {
|
| 16885 |
/* .get_name = */ ggml_backend_sycl_host_buffer_type_name,
|
|
@@ -17176,6 +17185,7 @@ static ggml_guid_t ggml_backend_sycl_guid() {
|
|
| 17176 |
}
|
| 17177 |
|
| 17178 |
GGML_CALL ggml_backend_t ggml_backend_sycl_init(int device) {
|
|
|
|
| 17179 |
ggml_init_sycl();
|
| 17180 |
|
| 17181 |
check_allow_gpu_index(device);
|
|
@@ -17202,6 +17212,7 @@ bool ggml_backend_is_sycl(ggml_backend_t backend) {
|
|
| 17202 |
}
|
| 17203 |
|
| 17204 |
GGML_CALL int ggml_backend_sycl_get_device_count() {
|
|
|
|
| 17205 |
if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
|
| 17206 |
return g_sycl_gpu_mgr->get_gpu_count();
|
| 17207 |
}
|
|
@@ -17214,16 +17225,21 @@ GGML_CALL static ggml_backend_t ggml_backend_reg_sycl_init(const char * params,
|
|
| 17214 |
}
|
| 17215 |
|
| 17216 |
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id) {
|
|
|
|
| 17217 |
return g_sycl_gpu_mgr->get_index(device_id);
|
| 17218 |
}
|
| 17219 |
|
| 17220 |
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index) {
|
|
|
|
| 17221 |
return g_sycl_gpu_mgr->gpus[device_index];
|
| 17222 |
}
|
| 17223 |
|
| 17224 |
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id) {
|
| 17225 |
-
|
|
|
|
| 17226 |
fprintf(stderr, "ggml_backend_sycl_set_single_device: use single device: [%d]\n", main_gpu_id);
|
|
|
|
|
|
|
| 17227 |
if (g_sycl_gpu_mgr) {
|
| 17228 |
delete g_sycl_gpu_mgr;
|
| 17229 |
}
|
|
@@ -17234,6 +17250,9 @@ GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id
|
|
| 17234 |
}
|
| 17235 |
|
| 17236 |
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode() {
|
|
|
|
|
|
|
|
|
|
| 17237 |
if (g_ggml_sycl_backend_gpu_mode == SYCL_MUL_GPU_MODE) {
|
| 17238 |
return;
|
| 17239 |
}
|
|
|
|
| 2968 |
#include "ggml-common.h"
|
| 2969 |
|
| 2970 |
static int g_ggml_sycl_debug=0;
|
| 2971 |
+
#define GGML_SYCL_DEBUG(...) do{if(g_ggml_sycl_debug) fprintf(stderr, __VA_ARGS__);}while(0)
|
| 2972 |
|
| 2973 |
#define CHECK_TRY_ERROR(expr) \
|
| 2974 |
[&]() { \
|
|
|
|
| 12868 |
}
|
| 12869 |
|
| 12870 |
void ggml_backend_sycl_print_sycl_devices() {
|
| 12871 |
+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n");
|
| 12872 |
int device_count = dpct::dev_mgr::instance().device_count();
|
| 12873 |
std::map<std::string, size_t> DeviceNums;
|
| 12874 |
fprintf(stderr, "found %d SYCL devices:\n", device_count);
|
|
|
|
| 12926 |
static bool initialized = false;
|
| 12927 |
|
| 12928 |
if (!initialized) {
|
| 12929 |
+
fprintf(stderr, "[SYCL] call ggml_init_sycl\n");
|
| 12930 |
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
|
| 12931 |
+
|
| 12932 |
fprintf(stderr, "%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug);
|
| 12933 |
|
| 12934 |
#if defined(GGML_SYCL_F16)
|
|
|
|
| 16042 |
}
|
| 16043 |
|
| 16044 |
GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len) try {
|
| 16045 |
+
GGML_SYCL_DEBUG("[SYCL] call ggml_sycl_get_gpu_list\n");
|
| 16046 |
for(int i=0;i<max_len;i++) id_list[i] = -1;
|
| 16047 |
|
| 16048 |
if (!g_sycl_gpu_mgr) {
|
|
|
|
| 16077 |
|
| 16078 |
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description,
|
| 16079 |
size_t description_size) try {
|
| 16080 |
+
GGML_SYCL_DEBUG("[SYCL] call ggml_sycl_get_device_description\n");
|
| 16081 |
dpct::device_info prop;
|
| 16082 |
int device_id = g_sycl_gpu_mgr->gpus[device];
|
| 16083 |
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
|
|
|
| 16092 |
|
| 16093 |
GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free,
|
| 16094 |
size_t *total) try {
|
| 16095 |
+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_memory\n");
|
| 16096 |
ggml_sycl_set_device(device);
|
| 16097 |
|
| 16098 |
/*
|
|
|
|
| 16444 |
};
|
| 16445 |
|
| 16446 |
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_index) {
|
| 16447 |
+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n");
|
| 16448 |
+
|
| 16449 |
if (device_index>=g_device_count or device_index<0) {
|
| 16450 |
printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
|
| 16451 |
device_index, g_device_count-1);
|
|
|
|
| 16815 |
};
|
| 16816 |
|
| 16817 |
GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split) {
|
| 16818 |
+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_split_buffer_type\n");
|
| 16819 |
ggml_init_sycl();
|
| 16820 |
// FIXME: this is not thread safe
|
| 16821 |
static std::map<std::array<float, GGML_SYCL_MAX_DEVICES>, struct ggml_backend_buffer_type> buft_map;
|
|
|
|
| 16888 |
}
|
| 16889 |
|
| 16890 |
ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
|
| 16891 |
+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_host_buffer_type\n");
|
| 16892 |
static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_type_host = {
|
| 16893 |
/* .iface = */ {
|
| 16894 |
/* .get_name = */ ggml_backend_sycl_host_buffer_type_name,
|
|
|
|
| 17185 |
}
|
| 17186 |
|
| 17187 |
GGML_CALL ggml_backend_t ggml_backend_sycl_init(int device) {
|
| 17188 |
+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_init\n");
|
| 17189 |
ggml_init_sycl();
|
| 17190 |
|
| 17191 |
check_allow_gpu_index(device);
|
|
|
|
| 17212 |
}
|
| 17213 |
|
| 17214 |
GGML_CALL int ggml_backend_sycl_get_device_count() {
|
| 17215 |
+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_count\n");
|
| 17216 |
if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
|
| 17217 |
return g_sycl_gpu_mgr->get_gpu_count();
|
| 17218 |
}
|
|
|
|
| 17225 |
}
|
| 17226 |
|
| 17227 |
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id) {
|
| 17228 |
+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_index\n");
|
| 17229 |
return g_sycl_gpu_mgr->get_index(device_id);
|
| 17230 |
}
|
| 17231 |
|
| 17232 |
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index) {
|
| 17233 |
+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_id\n");
|
| 17234 |
return g_sycl_gpu_mgr->gpus[device_index];
|
| 17235 |
}
|
| 17236 |
|
| 17237 |
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id) {
|
| 17238 |
+
ggml_init_sycl();
|
| 17239 |
+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_set_single_device_mode\n");
|
| 17240 |
fprintf(stderr, "ggml_backend_sycl_set_single_device: use single device: [%d]\n", main_gpu_id);
|
| 17241 |
+
GGML_ASSERT(main_gpu_id<g_all_sycl_device_count);
|
| 17242 |
+
|
| 17243 |
if (g_sycl_gpu_mgr) {
|
| 17244 |
delete g_sycl_gpu_mgr;
|
| 17245 |
}
|
|
|
|
| 17250 |
}
|
| 17251 |
|
| 17252 |
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode() {
|
| 17253 |
+
ggml_init_sycl();
|
| 17254 |
+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_set_mul_device_mode\n");
|
| 17255 |
+
|
| 17256 |
if (g_ggml_sycl_backend_gpu_mode == SYCL_MUL_GPU_MODE) {
|
| 17257 |
return;
|
| 17258 |
}
|