Spaces:
Running
Running
Anton Mitkov
commited on
Commit
·
b25d3bf
1
Parent(s):
ae0c7b8
sycl: GGML_SYCL_DISABLE_OPT on by default for all Intel Devices (llama/13973)
Browse files
ggml/src/ggml-sycl/common.hpp
CHANGED
|
@@ -199,7 +199,7 @@ struct sycl_device_info {
|
|
| 199 |
// size_t smpb; // max. shared memory per block
|
| 200 |
bool vmm; // virtual memory support
|
| 201 |
size_t total_vram;
|
| 202 |
-
sycl_hw_info hw_info;
|
| 203 |
optimize_feature opt_feature;
|
| 204 |
};
|
| 205 |
|
|
@@ -286,29 +286,6 @@ struct ggml_tensor_extra_gpu {
|
|
| 286 |
|
| 287 |
void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> streams={});
|
| 288 |
|
| 289 |
-
inline optimize_feature check_gpu_optimize_feature(syclex::architecture &arch) {
|
| 290 |
-
optimize_feature opt;
|
| 291 |
-
|
| 292 |
-
opt.reorder =
|
| 293 |
-
(arch == syclex::architecture::intel_gpu_dg1 ||
|
| 294 |
-
arch == syclex::architecture::intel_gpu_acm_g10 ||
|
| 295 |
-
arch == syclex::architecture::intel_gpu_acm_g11 ||
|
| 296 |
-
arch == syclex::architecture::intel_gpu_acm_g12 ||
|
| 297 |
-
arch == syclex::architecture::intel_gpu_pvc ||
|
| 298 |
-
arch == syclex::architecture::intel_gpu_pvc_vg ||
|
| 299 |
-
arch == syclex::architecture::intel_gpu_mtl_u ||
|
| 300 |
-
arch == syclex::architecture::intel_gpu_mtl_s ||
|
| 301 |
-
arch == syclex::architecture::intel_gpu_mtl_h ||
|
| 302 |
-
arch == syclex::architecture::intel_gpu_arl_u ||
|
| 303 |
-
arch == syclex::architecture::intel_gpu_arl_s ||
|
| 304 |
-
arch == syclex::architecture::intel_gpu_arl_h ||
|
| 305 |
-
arch == syclex::architecture::intel_gpu_bmg_g21 ||
|
| 306 |
-
arch == syclex::architecture::intel_gpu_lnl_m
|
| 307 |
-
);
|
| 308 |
-
|
| 309 |
-
return opt;
|
| 310 |
-
}
|
| 311 |
-
|
| 312 |
namespace sycl_ex = sycl::ext::oneapi::experimental;
|
| 313 |
struct ggml_backend_sycl_context {
|
| 314 |
int device;
|
|
|
|
| 199 |
// size_t smpb; // max. shared memory per block
|
| 200 |
bool vmm; // virtual memory support
|
| 201 |
size_t total_vram;
|
| 202 |
+
//sycl_hw_info hw_info; \\ device id and aarch, currently not used
|
| 203 |
optimize_feature opt_feature;
|
| 204 |
};
|
| 205 |
|
|
|
|
| 286 |
|
| 287 |
void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> streams={});
|
| 288 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
namespace sycl_ex = sycl::ext::oneapi::experimental;
|
| 290 |
struct ggml_backend_sycl_context {
|
| 291 |
int device;
|
ggml/src/ggml-sycl/ggml-sycl.cpp
CHANGED
|
@@ -83,9 +83,7 @@ static ggml_sycl_device_info ggml_sycl_init() {
|
|
| 83 |
|
| 84 |
info.devices[i].cc =
|
| 85 |
100 * prop.get_major_version() + 10 * prop.get_minor_version();
|
| 86 |
-
info.devices[i].
|
| 87 |
-
info.devices[i].opt_feature = check_gpu_optimize_feature(info.devices[i].hw_info.arch);
|
| 88 |
-
|
| 89 |
info.max_work_group_sizes[i] = prop.get_max_work_group_size();
|
| 90 |
}
|
| 91 |
|
|
@@ -195,7 +193,7 @@ static void ggml_check_sycl() try {
|
|
| 195 |
|
| 196 |
if (!initialized) {
|
| 197 |
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
|
| 198 |
-
g_ggml_sycl_disable_optimize= get_sycl_env("GGML_SYCL_DISABLE_OPT",
|
| 199 |
g_ggml_sycl_disable_graph = get_sycl_env("GGML_SYCL_DISABLE_GRAPH", 1);
|
| 200 |
g_ggml_sycl_disable_dnn = get_sycl_env("GGML_SYCL_DISABLE_DNN", 0);
|
| 201 |
g_ggml_sycl_prioritize_dmmv = get_sycl_env("GGML_SYCL_PRIORITIZE_DMMV", 0);
|
|
|
|
| 83 |
|
| 84 |
info.devices[i].cc =
|
| 85 |
100 * prop.get_major_version() + 10 * prop.get_minor_version();
|
| 86 |
+
info.devices[i].opt_feature.reorder = !device.ext_oneapi_architecture_is(syclex::arch_category::intel_gpu);
|
|
|
|
|
|
|
| 87 |
info.max_work_group_sizes[i] = prop.get_max_work_group_size();
|
| 88 |
}
|
| 89 |
|
|
|
|
| 193 |
|
| 194 |
if (!initialized) {
|
| 195 |
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
|
| 196 |
+
g_ggml_sycl_disable_optimize = get_sycl_env("GGML_SYCL_DISABLE_OPT", 0);
|
| 197 |
g_ggml_sycl_disable_graph = get_sycl_env("GGML_SYCL_DISABLE_GRAPH", 1);
|
| 198 |
g_ggml_sycl_disable_dnn = get_sycl_env("GGML_SYCL_DISABLE_DNN", 0);
|
| 199 |
g_ggml_sycl_prioritize_dmmv = get_sycl_env("GGML_SYCL_PRIORITIZE_DMMV", 0);
|
ggml/src/ggml-sycl/sycl_hw.cpp
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
#include "sycl_hw.hpp"
|
| 2 |
|
| 3 |
-
|
|
|
|
| 4 |
sycl_hw_info get_device_hw_info(sycl::device *device_ptr) {
|
| 5 |
sycl_hw_info res;
|
| 6 |
int32_t id = device_ptr->get_info<sycl::ext::intel::info::device::device_id>();
|
|
@@ -11,3 +12,4 @@ sycl_hw_info get_device_hw_info(sycl::device *device_ptr) {
|
|
| 11 |
|
| 12 |
return res;
|
| 13 |
}
|
|
|
|
|
|
| 1 |
#include "sycl_hw.hpp"
|
| 2 |
|
| 3 |
+
// TODO: currently not used
|
| 4 |
+
/*
|
| 5 |
sycl_hw_info get_device_hw_info(sycl::device *device_ptr) {
|
| 6 |
sycl_hw_info res;
|
| 7 |
int32_t id = device_ptr->get_info<sycl::ext::intel::info::device::device_id>();
|
|
|
|
| 12 |
|
| 13 |
return res;
|
| 14 |
}
|
| 15 |
+
*/
|
ggml/src/ggml-sycl/sycl_hw.hpp
CHANGED
|
@@ -10,6 +10,8 @@
|
|
| 10 |
|
| 11 |
namespace syclex = sycl::ext::oneapi::experimental;
|
| 12 |
|
|
|
|
|
|
|
| 13 |
struct sycl_hw_info {
|
| 14 |
syclex::architecture arch;
|
| 15 |
int32_t device_id;
|
|
@@ -18,6 +20,7 @@ struct sycl_hw_info {
|
|
| 18 |
bool is_in_vector(std::vector<int> &vec, int item);
|
| 19 |
|
| 20 |
sycl_hw_info get_device_hw_info(sycl::device *device_ptr);
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
#endif // SYCL_HW_HPP
|
|
|
|
| 10 |
|
| 11 |
namespace syclex = sycl::ext::oneapi::experimental;
|
| 12 |
|
| 13 |
+
// TODO: currently not used
|
| 14 |
+
/*
|
| 15 |
struct sycl_hw_info {
|
| 16 |
syclex::architecture arch;
|
| 17 |
int32_t device_id;
|
|
|
|
| 20 |
bool is_in_vector(std::vector<int> &vec, int item);
|
| 21 |
|
| 22 |
sycl_hw_info get_device_hw_info(sycl::device *device_ptr);
|
| 23 |
+
*/
|
| 24 |
|
| 25 |
|
| 26 |
#endif // SYCL_HW_HPP
|