Łukasz Ślusarczyk Romain Biessy commited on
Commit
c18969f
·
1 Parent(s): 607a196

SYCL: using graphs is configurable by environment variable and compile option (llama/12371)

Browse files

* alberto changes

* enable sycl graphs by env variable

* fixed compilation warnings in ggml-sycl.cpp

* renamed graph variables

* fix markdown in docs/backend/SYCL.md

Co-authored-by: Romain Biessy <[email protected]>

* fix markdown in docs/backend/SYCL.md again

* compiling graphs by default, renamed graph_enable to graph_disable

---------

Co-authored-by: Romain Biessy <[email protected]>

ggml/CMakeLists.txt CHANGED
@@ -191,6 +191,7 @@ option(GGML_OPENMP "ggml: use OpenMP"
191
  option(GGML_RPC "ggml: use RPC" OFF)
192
  option(GGML_SYCL "ggml: use SYCL" OFF)
193
  option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
 
194
  set (GGML_SYCL_TARGET "INTEL" CACHE STRING
195
  "ggml: sycl target device")
196
  set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
 
191
  option(GGML_RPC "ggml: use RPC" OFF)
192
  option(GGML_SYCL "ggml: use SYCL" OFF)
193
  option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
194
+ option(GGML_SYCL_GRAPH "ggml: enable graphs in the SYCL backend" ON)
195
  set (GGML_SYCL_TARGET "INTEL" CACHE STRING
196
  "ggml: sycl target device")
197
  set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
ggml/src/ggml-sycl/CMakeLists.txt CHANGED
@@ -66,6 +66,9 @@ if (WIN32)
66
  find_package(MKL REQUIRED)
67
  target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
68
  else()
 
 
 
69
  if (GGML_SYCL_TARGET STREQUAL "INTEL")
70
  target_link_libraries(ggml-sycl PRIVATE sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
71
  elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
 
66
  find_package(MKL REQUIRED)
67
  target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
68
  else()
69
+ if (GGML_SYCL_GRAPH)
70
+ add_compile_definitions(GGML_SYCL_GRAPH)
71
+ endif()
72
  if (GGML_SYCL_TARGET STREQUAL "INTEL")
73
  target_link_libraries(ggml-sycl PRIVATE sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
74
  elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
ggml/src/ggml-sycl/common.hpp CHANGED
@@ -301,6 +301,7 @@ inline optimize_feature check_gpu_optimize_feature(syclex::architecture &arch) {
301
  return opt;
302
  }
303
 
 
304
  struct ggml_backend_sycl_context {
305
  int device;
306
  std::string name;
@@ -392,6 +393,10 @@ struct ggml_backend_sycl_context {
392
  return pool(device);
393
  }
394
 
 
 
 
 
395
  ggml_sycl_pool & host_pool(int device) {
396
  if (host_pools[device] == nullptr) {
397
  host_pools[device] = new_pool_for_host(stream(device, 0), device);
 
301
  return opt;
302
  }
303
 
304
+ namespace sycl_ex = sycl::ext::oneapi::experimental;
305
  struct ggml_backend_sycl_context {
306
  int device;
307
  std::string name;
 
393
  return pool(device);
394
  }
395
 
396
+ #ifdef GGML_SYCL_GRAPH
397
+ std::unique_ptr<sycl_ex::command_graph<sycl_ex::graph_state::executable>> exec_graph = nullptr;
398
+ #endif
399
+
400
  ggml_sycl_pool & host_pool(int device) {
401
  if (host_pools[device] == nullptr) {
402
  host_pools[device] = new_pool_for_host(stream(device, 0), device);
ggml/src/ggml-sycl/ggml-sycl.cpp CHANGED
@@ -46,6 +46,7 @@
46
  static bool g_sycl_loaded = false;
47
  int g_ggml_sycl_debug = 0;
48
  int g_ggml_sycl_disable_optimize = 0;
 
49
 
50
  static ggml_sycl_device_info ggml_sycl_init() {
51
  ggml_sycl_device_info info = {};
@@ -191,10 +192,12 @@ static void ggml_check_sycl() try {
191
  if (!initialized) {
192
  g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
193
  g_ggml_sycl_disable_optimize= get_sycl_env("GGML_SYCL_DISABLE_OPT", 0);
 
194
  GGML_SYCL_DEBUG("[SYCL] call ggml_check_sycl\n");
195
  GGML_LOG_INFO("Running with Environment Variables:\n");
196
  GGML_LOG_INFO(" GGML_SYCL_DEBUG: %d\n", g_ggml_sycl_debug);
197
  GGML_LOG_INFO(" GGML_SYCL_DISABLE_OPT: %d\n", g_ggml_sycl_disable_optimize);
 
198
  GGML_LOG_INFO("Build with Macros:\n");
199
  #if defined(GGML_SYCL_FORCE_MMQ)
200
  GGML_LOG_INFO(" GGML_SYCL_FORCE_MMQ: yes\n");
@@ -3699,10 +3702,9 @@ static void optimize_graph_once(ggml_cgraph * cgraph, ggml_backend_sycl_context
3699
  if (ctx->opt_feature.reorder) opt_for_reorder(cgraph->nodes[i], stream);
3700
  }
3701
  }
3702
- static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
3703
- ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
3704
- ggml_sycl_set_main_device(sycl_ctx->device);
3705
 
 
 
3706
  if (!g_ggml_sycl_disable_optimize) optimize_graph_once(cgraph, sycl_ctx);
3707
 
3708
  for (int i = 0; i < cgraph->n_nodes; i++) {
@@ -3724,7 +3726,46 @@ static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_
3724
  }
3725
  GGML_ASSERT(ok);
3726
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3727
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3728
  return GGML_STATUS_SUCCESS;
3729
  }
3730
 
 
46
  static bool g_sycl_loaded = false;
47
  int g_ggml_sycl_debug = 0;
48
  int g_ggml_sycl_disable_optimize = 0;
49
+ int g_ggml_sycl_disable_graph = 0;
50
 
51
  static ggml_sycl_device_info ggml_sycl_init() {
52
  ggml_sycl_device_info info = {};
 
192
  if (!initialized) {
193
  g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
194
  g_ggml_sycl_disable_optimize= get_sycl_env("GGML_SYCL_DISABLE_OPT", 0);
195
+ g_ggml_sycl_disable_graph = get_sycl_env("GGML_SYCL_DISABLE_GRAPH", 1);
196
  GGML_SYCL_DEBUG("[SYCL] call ggml_check_sycl\n");
197
  GGML_LOG_INFO("Running with Environment Variables:\n");
198
  GGML_LOG_INFO(" GGML_SYCL_DEBUG: %d\n", g_ggml_sycl_debug);
199
  GGML_LOG_INFO(" GGML_SYCL_DISABLE_OPT: %d\n", g_ggml_sycl_disable_optimize);
200
+ GGML_LOG_INFO(" GGML_SYCL_DISABLE_GRAPH: %d\n", g_ggml_sycl_disable_graph);
201
  GGML_LOG_INFO("Build with Macros:\n");
202
  #if defined(GGML_SYCL_FORCE_MMQ)
203
  GGML_LOG_INFO(" GGML_SYCL_FORCE_MMQ: yes\n");
 
3702
  if (ctx->opt_feature.reorder) opt_for_reorder(cgraph->nodes[i], stream);
3703
  }
3704
  }
 
 
 
3705
 
3706
+ static void ggml_backend_sycl_graph_compute_impl(ggml_backend_sycl_context * sycl_ctx, ggml_cgraph * cgraph) {
3707
+ ggml_sycl_set_main_device(sycl_ctx->device);
3708
  if (!g_ggml_sycl_disable_optimize) optimize_graph_once(cgraph, sycl_ctx);
3709
 
3710
  for (int i = 0; i < cgraph->n_nodes; i++) {
 
3726
  }
3727
  GGML_ASSERT(ok);
3728
  }
3729
+ }
3730
+
3731
+ static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
3732
+ auto * sycl_ctx = static_cast<ggml_backend_sycl_context *>(backend->context);
3733
+
3734
+ #ifdef GGML_SYCL_GRAPH
3735
+ if (!g_ggml_sycl_disable_graph) {
3736
+ if (!sycl_ctx->exec_graph && !dpct::get_device(sycl_ctx->device).has(sycl::aspect::ext_oneapi_graph)) {
3737
+ GGML_SYCL_DEBUG("[SYCL-GRAPH] can not use graphs on device:%d\n", sycl_ctx->device);
3738
+ ggml_backend_sycl_graph_compute_impl(sycl_ctx, cgraph);
3739
+ return GGML_STATUS_SUCCESS;
3740
+ }
3741
+
3742
+ sycl_ex::command_graph model_sycl_graph(*(sycl_ctx->stream()));
3743
+ model_sycl_graph.begin_recording(*(sycl_ctx->stream()));
3744
+ ggml_backend_sycl_graph_compute_impl(sycl_ctx, cgraph);
3745
+ model_sycl_graph.end_recording();
3746
 
3747
+ if (!sycl_ctx->exec_graph) {
3748
+ auto exec_graph = model_sycl_graph.finalize({sycl_ex::property::graph::updatable{}});
3749
+ sycl_ctx->exec_graph = std::make_unique<
3750
+ sycl_ex::command_graph<sycl_ex::graph_state::executable>>(exec_graph);
3751
+ } else {
3752
+ try {
3753
+ sycl_ctx->exec_graph->update(model_sycl_graph);
3754
+ GGML_SYCL_DEBUG("[SYCL-GRAPH] update success\n");
3755
+ } catch (sycl::exception const & e) {
3756
+ GGML_SYCL_DEBUG("[SYCL-GRAPH] Exception when updating graph, %s\n", e.what());
3757
+ auto exec_graph = model_sycl_graph.finalize({sycl_ex::property::graph::updatable{}});
3758
+ sycl_ctx->exec_graph = std::make_unique<
3759
+ sycl_ex::command_graph<sycl_ex::graph_state::executable>>(exec_graph);
3760
+ }
3761
+ }
3762
+
3763
+ sycl_ctx->stream()->ext_oneapi_graph(*(sycl_ctx->exec_graph));
3764
+ } else
3765
+ #endif
3766
+ {
3767
+ ggml_backend_sycl_graph_compute_impl(sycl_ctx, cgraph);
3768
+ }
3769
  return GGML_STATUS_SUCCESS;
3770
  }
3771