Neo Zhang Jianyu commited on
Commit
3bdb5e6
·
unverified ·
1 Parent(s): 80db462

fix set main gpu crash (llama/6339)

Browse files
Files changed (1) hide show
  1. ggml-sycl.cpp +22 -3
ggml-sycl.cpp CHANGED
@@ -2968,7 +2968,7 @@ namespace dpct
2968
  #include "ggml-common.h"
2969
 
2970
  static int g_ggml_sycl_debug=0;
2971
- #define GGML_SYCL_DEBUG(...) do{if(g_ggml_sycl_debug) printf(__VA_ARGS__);}while(0)
2972
 
2973
  #define CHECK_TRY_ERROR(expr) \
2974
  [&]() { \
@@ -12868,6 +12868,7 @@ void print_device_detail(int id, sycl::device &device, std::string device_type)
12868
  }
12869
 
12870
  void ggml_backend_sycl_print_sycl_devices() {
 
12871
  int device_count = dpct::dev_mgr::instance().device_count();
12872
  std::map<std::string, size_t> DeviceNums;
12873
  fprintf(stderr, "found %d SYCL devices:\n", device_count);
@@ -12925,7 +12926,9 @@ static void ggml_init_sycl() try {
12925
  static bool initialized = false;
12926
 
12927
  if (!initialized) {
 
12928
  g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
 
12929
  fprintf(stderr, "%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug);
12930
 
12931
  #if defined(GGML_SYCL_F16)
@@ -16039,6 +16042,7 @@ bool ggml_sycl_compute_forward(struct ggml_compute_params * params, struct ggml_
16039
  }
16040
 
16041
  GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len) try {
 
16042
  for(int i=0;i<max_len;i++) id_list[i] = -1;
16043
 
16044
  if (!g_sycl_gpu_mgr) {
@@ -16073,6 +16077,7 @@ catch (sycl::exception const &exc) {
16073
 
16074
  GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description,
16075
  size_t description_size) try {
 
16076
  dpct::device_info prop;
16077
  int device_id = g_sycl_gpu_mgr->gpus[device];
16078
  SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
@@ -16087,6 +16092,7 @@ catch (sycl::exception const &exc) {
16087
 
16088
  GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free,
16089
  size_t *total) try {
 
16090
  ggml_sycl_set_device(device);
16091
 
16092
  /*
@@ -16438,7 +16444,8 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
16438
  };
16439
 
16440
  ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_index) {
16441
- ggml_init_sycl();
 
16442
  if (device_index>=g_device_count or device_index<0) {
16443
  printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
16444
  device_index, g_device_count-1);
@@ -16808,6 +16815,7 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface
16808
  };
16809
 
16810
  GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split) {
 
16811
  ggml_init_sycl();
16812
  // FIXME: this is not thread safe
16813
  static std::map<std::array<float, GGML_SYCL_MAX_DEVICES>, struct ggml_backend_buffer_type> buft_map;
@@ -16880,6 +16888,7 @@ static ggml_backend_buffer_t ggml_backend_sycl_host_buffer_type_alloc_buffer(ggm
16880
  }
16881
 
16882
  ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
 
16883
  static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_type_host = {
16884
  /* .iface = */ {
16885
  /* .get_name = */ ggml_backend_sycl_host_buffer_type_name,
@@ -17176,6 +17185,7 @@ static ggml_guid_t ggml_backend_sycl_guid() {
17176
  }
17177
 
17178
  GGML_CALL ggml_backend_t ggml_backend_sycl_init(int device) {
 
17179
  ggml_init_sycl();
17180
 
17181
  check_allow_gpu_index(device);
@@ -17202,6 +17212,7 @@ bool ggml_backend_is_sycl(ggml_backend_t backend) {
17202
  }
17203
 
17204
  GGML_CALL int ggml_backend_sycl_get_device_count() {
 
17205
  if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
17206
  return g_sycl_gpu_mgr->get_gpu_count();
17207
  }
@@ -17214,16 +17225,21 @@ GGML_CALL static ggml_backend_t ggml_backend_reg_sycl_init(const char * params,
17214
  }
17215
 
17216
  GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id) {
 
17217
  return g_sycl_gpu_mgr->get_index(device_id);
17218
  }
17219
 
17220
  GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index) {
 
17221
  return g_sycl_gpu_mgr->gpus[device_index];
17222
  }
17223
 
17224
  GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id) {
17225
- GGML_ASSERT(main_gpu_id<g_all_sycl_device_count);
 
17226
  fprintf(stderr, "ggml_backend_sycl_set_single_device: use single device: [%d]\n", main_gpu_id);
 
 
17227
  if (g_sycl_gpu_mgr) {
17228
  delete g_sycl_gpu_mgr;
17229
  }
@@ -17234,6 +17250,9 @@ GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id
17234
  }
17235
 
17236
  GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode() {
 
 
 
17237
  if (g_ggml_sycl_backend_gpu_mode == SYCL_MUL_GPU_MODE) {
17238
  return;
17239
  }
 
2968
  #include "ggml-common.h"
2969
 
2970
  static int g_ggml_sycl_debug=0;
2971
+ #define GGML_SYCL_DEBUG(...) do{if(g_ggml_sycl_debug) fprintf(stderr, __VA_ARGS__);}while(0)
2972
 
2973
  #define CHECK_TRY_ERROR(expr) \
2974
  [&]() { \
 
12868
  }
12869
 
12870
  void ggml_backend_sycl_print_sycl_devices() {
12871
+ GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n");
12872
  int device_count = dpct::dev_mgr::instance().device_count();
12873
  std::map<std::string, size_t> DeviceNums;
12874
  fprintf(stderr, "found %d SYCL devices:\n", device_count);
 
12926
  static bool initialized = false;
12927
 
12928
  if (!initialized) {
12929
+ fprintf(stderr, "[SYCL] call ggml_init_sycl\n");
12930
  g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
12931
+
12932
  fprintf(stderr, "%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug);
12933
 
12934
  #if defined(GGML_SYCL_F16)
 
16042
  }
16043
 
16044
  GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len) try {
16045
+ GGML_SYCL_DEBUG("[SYCL] call ggml_sycl_get_gpu_list\n");
16046
  for(int i=0;i<max_len;i++) id_list[i] = -1;
16047
 
16048
  if (!g_sycl_gpu_mgr) {
 
16077
 
16078
  GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description,
16079
  size_t description_size) try {
16080
+ GGML_SYCL_DEBUG("[SYCL] call ggml_sycl_get_device_description\n");
16081
  dpct::device_info prop;
16082
  int device_id = g_sycl_gpu_mgr->gpus[device];
16083
  SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
 
16092
 
16093
  GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free,
16094
  size_t *total) try {
16095
+ GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_memory\n");
16096
  ggml_sycl_set_device(device);
16097
 
16098
  /*
 
16444
  };
16445
 
16446
  ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_index) {
16447
+ GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n");
16448
+
16449
  if (device_index>=g_device_count or device_index<0) {
16450
  printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
16451
  device_index, g_device_count-1);
 
16815
  };
16816
 
16817
  GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split) {
16818
+ GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_split_buffer_type\n");
16819
  ggml_init_sycl();
16820
  // FIXME: this is not thread safe
16821
  static std::map<std::array<float, GGML_SYCL_MAX_DEVICES>, struct ggml_backend_buffer_type> buft_map;
 
16888
  }
16889
 
16890
  ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
16891
+ GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_host_buffer_type\n");
16892
  static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_type_host = {
16893
  /* .iface = */ {
16894
  /* .get_name = */ ggml_backend_sycl_host_buffer_type_name,
 
17185
  }
17186
 
17187
  GGML_CALL ggml_backend_t ggml_backend_sycl_init(int device) {
17188
+ GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_init\n");
17189
  ggml_init_sycl();
17190
 
17191
  check_allow_gpu_index(device);
 
17212
  }
17213
 
17214
  GGML_CALL int ggml_backend_sycl_get_device_count() {
17215
+ GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_count\n");
17216
  if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
17217
  return g_sycl_gpu_mgr->get_gpu_count();
17218
  }
 
17225
  }
17226
 
17227
  GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id) {
17228
+ GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_index\n");
17229
  return g_sycl_gpu_mgr->get_index(device_id);
17230
  }
17231
 
17232
  GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index) {
17233
+ GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_id\n");
17234
  return g_sycl_gpu_mgr->gpus[device_index];
17235
  }
17236
 
17237
  GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id) {
17238
+ ggml_init_sycl();
17239
+ GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_set_single_device_mode\n");
17240
  fprintf(stderr, "ggml_backend_sycl_set_single_device: use single device: [%d]\n", main_gpu_id);
17241
+ GGML_ASSERT(main_gpu_id<g_all_sycl_device_count);
17242
+
17243
  if (g_sycl_gpu_mgr) {
17244
  delete g_sycl_gpu_mgr;
17245
  }
 
17250
  }
17251
 
17252
  GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode() {
17253
+ ggml_init_sycl();
17254
+ GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_set_mul_device_mode\n");
17255
+
17256
  if (g_ggml_sycl_backend_gpu_mode == SYCL_MUL_GPU_MODE) {
17257
  return;
17258
  }