Spaces:
Sleeping
Sleeping
Łukasz Ślusarczyk
Romain Biessy
commited on
Commit
·
c18969f
1
Parent(s):
607a196
SYCL: using graphs is configurable by environment variable and compile option (llama/12371)
Browse files* alberto changes
* enable sycl graphs by env variable
* fixed compilation warnings in ggml-sycl.cpp
* renamed graph variables
* fix markdown in docs/backend/SYCL.md
Co-authored-by: Romain Biessy <[email protected]>
* fix markdown in docs/backend/SYCL.md again
* compiling graphs by default, renamed graph_enable to graph_disable
---------
Co-authored-by: Romain Biessy <[email protected]>
ggml/CMakeLists.txt
CHANGED
|
@@ -191,6 +191,7 @@ option(GGML_OPENMP "ggml: use OpenMP"
|
|
| 191 |
option(GGML_RPC "ggml: use RPC" OFF)
|
| 192 |
option(GGML_SYCL "ggml: use SYCL" OFF)
|
| 193 |
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
|
|
|
|
| 194 |
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
|
| 195 |
"ggml: sycl target device")
|
| 196 |
set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
|
|
|
|
| 191 |
option(GGML_RPC "ggml: use RPC" OFF)
|
| 192 |
option(GGML_SYCL "ggml: use SYCL" OFF)
|
| 193 |
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
|
| 194 |
+
option(GGML_SYCL_GRAPH "ggml: enable graphs in the SYCL backend" ON)
|
| 195 |
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
|
| 196 |
"ggml: sycl target device")
|
| 197 |
set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
|
ggml/src/ggml-sycl/CMakeLists.txt
CHANGED
|
@@ -66,6 +66,9 @@ if (WIN32)
|
|
| 66 |
find_package(MKL REQUIRED)
|
| 67 |
target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
|
| 68 |
else()
|
|
|
|
|
|
|
|
|
|
| 69 |
if (GGML_SYCL_TARGET STREQUAL "INTEL")
|
| 70 |
target_link_libraries(ggml-sycl PRIVATE sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
|
| 71 |
elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
|
|
|
|
| 66 |
find_package(MKL REQUIRED)
|
| 67 |
target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
|
| 68 |
else()
|
| 69 |
+
if (GGML_SYCL_GRAPH)
|
| 70 |
+
add_compile_definitions(GGML_SYCL_GRAPH)
|
| 71 |
+
endif()
|
| 72 |
if (GGML_SYCL_TARGET STREQUAL "INTEL")
|
| 73 |
target_link_libraries(ggml-sycl PRIVATE sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
|
| 74 |
elseif (GGML_SYCL_TARGET STREQUAL "NVIDIA")
|
ggml/src/ggml-sycl/common.hpp
CHANGED
|
@@ -301,6 +301,7 @@ inline optimize_feature check_gpu_optimize_feature(syclex::architecture &arch) {
|
|
| 301 |
return opt;
|
| 302 |
}
|
| 303 |
|
|
|
|
| 304 |
struct ggml_backend_sycl_context {
|
| 305 |
int device;
|
| 306 |
std::string name;
|
|
@@ -392,6 +393,10 @@ struct ggml_backend_sycl_context {
|
|
| 392 |
return pool(device);
|
| 393 |
}
|
| 394 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 395 |
ggml_sycl_pool & host_pool(int device) {
|
| 396 |
if (host_pools[device] == nullptr) {
|
| 397 |
host_pools[device] = new_pool_for_host(stream(device, 0), device);
|
|
|
|
| 301 |
return opt;
|
| 302 |
}
|
| 303 |
|
| 304 |
+
namespace sycl_ex = sycl::ext::oneapi::experimental;
|
| 305 |
struct ggml_backend_sycl_context {
|
| 306 |
int device;
|
| 307 |
std::string name;
|
|
|
|
| 393 |
return pool(device);
|
| 394 |
}
|
| 395 |
|
| 396 |
+
#ifdef GGML_SYCL_GRAPH
|
| 397 |
+
std::unique_ptr<sycl_ex::command_graph<sycl_ex::graph_state::executable>> exec_graph = nullptr;
|
| 398 |
+
#endif
|
| 399 |
+
|
| 400 |
ggml_sycl_pool & host_pool(int device) {
|
| 401 |
if (host_pools[device] == nullptr) {
|
| 402 |
host_pools[device] = new_pool_for_host(stream(device, 0), device);
|
ggml/src/ggml-sycl/ggml-sycl.cpp
CHANGED
|
@@ -46,6 +46,7 @@
|
|
| 46 |
static bool g_sycl_loaded = false;
|
| 47 |
int g_ggml_sycl_debug = 0;
|
| 48 |
int g_ggml_sycl_disable_optimize = 0;
|
|
|
|
| 49 |
|
| 50 |
static ggml_sycl_device_info ggml_sycl_init() {
|
| 51 |
ggml_sycl_device_info info = {};
|
|
@@ -191,10 +192,12 @@ static void ggml_check_sycl() try {
|
|
| 191 |
if (!initialized) {
|
| 192 |
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
|
| 193 |
g_ggml_sycl_disable_optimize= get_sycl_env("GGML_SYCL_DISABLE_OPT", 0);
|
|
|
|
| 194 |
GGML_SYCL_DEBUG("[SYCL] call ggml_check_sycl\n");
|
| 195 |
GGML_LOG_INFO("Running with Environment Variables:\n");
|
| 196 |
GGML_LOG_INFO(" GGML_SYCL_DEBUG: %d\n", g_ggml_sycl_debug);
|
| 197 |
GGML_LOG_INFO(" GGML_SYCL_DISABLE_OPT: %d\n", g_ggml_sycl_disable_optimize);
|
|
|
|
| 198 |
GGML_LOG_INFO("Build with Macros:\n");
|
| 199 |
#if defined(GGML_SYCL_FORCE_MMQ)
|
| 200 |
GGML_LOG_INFO(" GGML_SYCL_FORCE_MMQ: yes\n");
|
|
@@ -3699,10 +3702,9 @@ static void optimize_graph_once(ggml_cgraph * cgraph, ggml_backend_sycl_context
|
|
| 3699 |
if (ctx->opt_feature.reorder) opt_for_reorder(cgraph->nodes[i], stream);
|
| 3700 |
}
|
| 3701 |
}
|
| 3702 |
-
static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
| 3703 |
-
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
|
| 3704 |
-
ggml_sycl_set_main_device(sycl_ctx->device);
|
| 3705 |
|
|
|
|
|
|
|
| 3706 |
if (!g_ggml_sycl_disable_optimize) optimize_graph_once(cgraph, sycl_ctx);
|
| 3707 |
|
| 3708 |
for (int i = 0; i < cgraph->n_nodes; i++) {
|
|
@@ -3724,7 +3726,46 @@ static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_
|
|
| 3724 |
}
|
| 3725 |
GGML_ASSERT(ok);
|
| 3726 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3727 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3728 |
return GGML_STATUS_SUCCESS;
|
| 3729 |
}
|
| 3730 |
|
|
|
|
| 46 |
static bool g_sycl_loaded = false;
|
| 47 |
int g_ggml_sycl_debug = 0;
|
| 48 |
int g_ggml_sycl_disable_optimize = 0;
|
| 49 |
+
int g_ggml_sycl_disable_graph = 0;
|
| 50 |
|
| 51 |
static ggml_sycl_device_info ggml_sycl_init() {
|
| 52 |
ggml_sycl_device_info info = {};
|
|
|
|
| 192 |
if (!initialized) {
|
| 193 |
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
|
| 194 |
g_ggml_sycl_disable_optimize= get_sycl_env("GGML_SYCL_DISABLE_OPT", 0);
|
| 195 |
+
g_ggml_sycl_disable_graph = get_sycl_env("GGML_SYCL_DISABLE_GRAPH", 1);
|
| 196 |
GGML_SYCL_DEBUG("[SYCL] call ggml_check_sycl\n");
|
| 197 |
GGML_LOG_INFO("Running with Environment Variables:\n");
|
| 198 |
GGML_LOG_INFO(" GGML_SYCL_DEBUG: %d\n", g_ggml_sycl_debug);
|
| 199 |
GGML_LOG_INFO(" GGML_SYCL_DISABLE_OPT: %d\n", g_ggml_sycl_disable_optimize);
|
| 200 |
+
GGML_LOG_INFO(" GGML_SYCL_DISABLE_GRAPH: %d\n", g_ggml_sycl_disable_graph);
|
| 201 |
GGML_LOG_INFO("Build with Macros:\n");
|
| 202 |
#if defined(GGML_SYCL_FORCE_MMQ)
|
| 203 |
GGML_LOG_INFO(" GGML_SYCL_FORCE_MMQ: yes\n");
|
|
|
|
| 3702 |
if (ctx->opt_feature.reorder) opt_for_reorder(cgraph->nodes[i], stream);
|
| 3703 |
}
|
| 3704 |
}
|
|
|
|
|
|
|
|
|
|
| 3705 |
|
| 3706 |
+
static void ggml_backend_sycl_graph_compute_impl(ggml_backend_sycl_context * sycl_ctx, ggml_cgraph * cgraph) {
|
| 3707 |
+
ggml_sycl_set_main_device(sycl_ctx->device);
|
| 3708 |
if (!g_ggml_sycl_disable_optimize) optimize_graph_once(cgraph, sycl_ctx);
|
| 3709 |
|
| 3710 |
for (int i = 0; i < cgraph->n_nodes; i++) {
|
|
|
|
| 3726 |
}
|
| 3727 |
GGML_ASSERT(ok);
|
| 3728 |
}
|
| 3729 |
+
}
|
| 3730 |
+
|
| 3731 |
+
static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
| 3732 |
+
auto * sycl_ctx = static_cast<ggml_backend_sycl_context *>(backend->context);
|
| 3733 |
+
|
| 3734 |
+
#ifdef GGML_SYCL_GRAPH
|
| 3735 |
+
if (!g_ggml_sycl_disable_graph) {
|
| 3736 |
+
if (!sycl_ctx->exec_graph && !dpct::get_device(sycl_ctx->device).has(sycl::aspect::ext_oneapi_graph)) {
|
| 3737 |
+
GGML_SYCL_DEBUG("[SYCL-GRAPH] can not use graphs on device:%d\n", sycl_ctx->device);
|
| 3738 |
+
ggml_backend_sycl_graph_compute_impl(sycl_ctx, cgraph);
|
| 3739 |
+
return GGML_STATUS_SUCCESS;
|
| 3740 |
+
}
|
| 3741 |
+
|
| 3742 |
+
sycl_ex::command_graph model_sycl_graph(*(sycl_ctx->stream()));
|
| 3743 |
+
model_sycl_graph.begin_recording(*(sycl_ctx->stream()));
|
| 3744 |
+
ggml_backend_sycl_graph_compute_impl(sycl_ctx, cgraph);
|
| 3745 |
+
model_sycl_graph.end_recording();
|
| 3746 |
|
| 3747 |
+
if (!sycl_ctx->exec_graph) {
|
| 3748 |
+
auto exec_graph = model_sycl_graph.finalize({sycl_ex::property::graph::updatable{}});
|
| 3749 |
+
sycl_ctx->exec_graph = std::make_unique<
|
| 3750 |
+
sycl_ex::command_graph<sycl_ex::graph_state::executable>>(exec_graph);
|
| 3751 |
+
} else {
|
| 3752 |
+
try {
|
| 3753 |
+
sycl_ctx->exec_graph->update(model_sycl_graph);
|
| 3754 |
+
GGML_SYCL_DEBUG("[SYCL-GRAPH] update success\n");
|
| 3755 |
+
} catch (sycl::exception const & e) {
|
| 3756 |
+
GGML_SYCL_DEBUG("[SYCL-GRAPH] Exception when updating graph, %s\n", e.what());
|
| 3757 |
+
auto exec_graph = model_sycl_graph.finalize({sycl_ex::property::graph::updatable{}});
|
| 3758 |
+
sycl_ctx->exec_graph = std::make_unique<
|
| 3759 |
+
sycl_ex::command_graph<sycl_ex::graph_state::executable>>(exec_graph);
|
| 3760 |
+
}
|
| 3761 |
+
}
|
| 3762 |
+
|
| 3763 |
+
sycl_ctx->stream()->ext_oneapi_graph(*(sycl_ctx->exec_graph));
|
| 3764 |
+
} else
|
| 3765 |
+
#endif
|
| 3766 |
+
{
|
| 3767 |
+
ggml_backend_sycl_graph_compute_impl(sycl_ctx, cgraph);
|
| 3768 |
+
}
|
| 3769 |
return GGML_STATUS_SUCCESS;
|
| 3770 |
}
|
| 3771 |
|