Henry Linjamäki commited on
Commit
57028a7
·
1 Parent(s): 9e034be

opencl: use OpenCL C standard supported by the device (llama/12221)

Browse files

This patch nudges the llama.cpp a bit to be supported on PoCL which
doesn't support OpenCL C CL2.0. The issue is solved by querying the
device for the supported OpenCL C versions and using the highest one
available.

ggml/CMakeLists.txt CHANGED
@@ -200,6 +200,8 @@ option(GGML_OPENCL "ggml: use OpenCL"
200
  option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
201
  option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
202
  option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
 
 
203
 
204
  # toolchain for vulkan-shaders-gen
205
  set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
 
200
  option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
201
  option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
202
  option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
203
+ set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
204
+ "gmml: OpenCL API version to target")
205
 
206
  # toolchain for vulkan-shaders-gen
207
  set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
ggml/src/ggml-opencl/CMakeLists.txt CHANGED
@@ -15,6 +15,7 @@ if (GGML_OPENCL_PROFILING)
15
  endif ()
16
 
17
  add_compile_definitions(GGML_OPENCL_SOA_Q)
 
18
 
19
  if (GGML_OPENCL_USE_ADRENO_KERNELS)
20
  message(STATUS "OpenCL will use matmul kernels optimized for Adreno")
 
15
  endif ()
16
 
17
  add_compile_definitions(GGML_OPENCL_SOA_Q)
18
+ add_compile_definitions(GGML_OPENCL_TARGET_VERSION=${GGML_OPENCL_TARGET_VERSION})
19
 
20
  if (GGML_OPENCL_USE_ADRENO_KERNELS)
21
  message(STATUS "OpenCL will use matmul kernels optimized for Adreno")
ggml/src/ggml-opencl/ggml-opencl.cpp CHANGED
@@ -1,4 +1,4 @@
1
- #define CL_TARGET_OPENCL_VERSION 220
2
  #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
3
 
4
  // suppress warnings in CL headers for GCC and Clang
@@ -25,6 +25,8 @@
25
  #include <vector>
26
  #include <string>
27
  #include <cmath>
 
 
28
 
29
  #undef MIN
30
  #undef MAX
@@ -62,6 +64,97 @@ enum ADRENO_GPU_GEN {
62
  X1E,
63
  };
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  static ADRENO_GPU_GEN get_adreno_gpu_gen(const char *device_name) {
66
  if (strstr(device_name, "730") ||
67
  strstr(device_name, "740") ||
@@ -470,16 +563,11 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
470
  // A local ref of cl_device_id for convenience
471
  cl_device_id device = backend_ctx->device;
472
 
 
 
473
  // Check device OpenCL version, OpenCL 2.0 or above is required
474
- size_t device_ver_str_size;
475
- clGetDeviceInfo(device, CL_DEVICE_VERSION, 0, NULL, &device_ver_str_size);
476
- char *device_ver_buffer = (char *)alloca(device_ver_str_size + 1);
477
- clGetDeviceInfo(device, CL_DEVICE_VERSION, device_ver_str_size, device_ver_buffer, NULL);
478
- device_ver_buffer[device_ver_str_size] = '\0';
479
- GGML_LOG_INFO("ggml_opencl: device OpenCL version: %s\n", device_ver_buffer);
480
-
481
- if (strstr(device_ver_buffer, "OpenCL 2") == NULL &&
482
- strstr(device_ver_buffer, "OpenCL 3") == NULL) {
483
  GGML_LOG_ERROR("ggml_opencl: OpenCL 2.0 or above is required\n");
484
  return backend_ctx;
485
  }
@@ -516,8 +604,7 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
516
 
517
  // If OpenCL 3.0 is supported, then check for cl_khr_subgroups, which becomes
518
  // optional in OpenCL 3.0 (cl_khr_subgroup is mandatory in OpenCL 2.x)
519
- if (strstr(device_ver_buffer, "OpenCL 3") &&
520
- strstr(ext_buffer, "cl_khr_subgroups") == NULL &&
521
  strstr(ext_buffer, "cl_intel_subgroups") == NULL) {
522
  GGML_LOG_ERROR("ggml_opencl: device does not support subgroups (cl_khr_subgroups or cl_intel_subgroups) "
523
  "(note that subgroups is an optional feature in OpenCL 3.0)\n");
@@ -581,9 +668,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
581
  const std::string kernel_src = read_file("ggml-opencl.cl");
582
  #endif
583
 
584
- std::string compile_opts =
585
- "-cl-std=CL2.0 -cl-mad-enable -cl-unsafe-math-optimizations "
586
- "-cl-finite-math-only -cl-fast-relaxed-math ";
 
 
 
587
  backend_ctx->program = build_program_from_source(context, device, kernel_src.c_str(), compile_opts);
588
 
589
  // Non matmul kernels.
@@ -693,10 +783,10 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
693
  CL_CHECK((backend_ctx->kernel_transpose_16 = clCreateKernel(backend_ctx->program_transpose_16, "kernel_transpose_16", &err), err));
694
 
695
  // Gemv general
696
- std::string CL_gemv_compile_opts =
697
- " -cl-std=CL2.0 "
698
- " -cl-mad-enable "
699
- " -DSIMDGROUP_WIDTH=" + std::to_string(backend_ctx->adreno_wave_size);
700
  if (has_vector_subgroup_broadcast) {
701
  CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT ";
702
  }
@@ -713,12 +803,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
713
  CL_CHECK((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_general = clCreateKernel(backend_ctx->program_CL_gemv_general, "kernel_gemv_noshuffle", &err), err));
714
 
715
  // Gemv 2048, 16384
716
- CL_gemv_compile_opts =
717
- " -cl-std=CL2.0 "
718
- " -cl-mad-enable "
719
- " -DLINE_STRIDE_A=2048 "
720
- " -DBLOCK_STRIDE_A=16384 "
721
- " -DSIMDGROUP_WIDTH=" + std::to_string(backend_ctx->adreno_wave_size);
722
  if (has_vector_subgroup_broadcast) {
723
  CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT ";
724
  }
@@ -735,12 +825,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
735
  CL_CHECK((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_4096_1_4096 = clCreateKernel(backend_ctx->program_CL_gemv_4096_1_4096, "kernel_gemv_noshuffle", &err), err));
736
 
737
  // Gemv 2048, 16384
738
- CL_gemv_compile_opts =
739
- " -cl-std=CL2.0 "
740
- " -cl-mad-enable "
741
- " -DLINE_STRIDE_A=2048 "
742
- " -DBLOCK_STRIDE_A=16384 "
743
- " -DSIMDGROUP_WIDTH=" + std::to_string(backend_ctx->adreno_wave_size);
744
  if (has_vector_subgroup_broadcast) {
745
  CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT ";
746
  }
@@ -750,12 +840,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
750
  CL_CHECK((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_4096_1_11008 = clCreateKernel(backend_ctx->program_CL_gemv_4096_1_11008, "kernel_gemv_noshuffle", &err), err));
751
 
752
  // Gemv 5504, 44032
753
- CL_gemv_compile_opts =
754
- " -cl-std=CL2.0 "
755
- " -cl-mad-enable "
756
- " -DLINE_STRIDE_A=5504 "
757
- " -DBLOCK_STRIDE_A=44032 "
758
- " -DSIMDGROUP_WIDTH=" + std::to_string(backend_ctx->adreno_wave_size);
759
  if (has_vector_subgroup_broadcast) {
760
  CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT ";
761
  }
@@ -765,12 +855,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
765
  CL_CHECK((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_11008_1_4096 = clCreateKernel(backend_ctx->program_CL_gemv_11008_1_4096, "kernel_gemv_noshuffle", &err), err));
766
 
767
  // Gemv 16000, 128000
768
- CL_gemv_compile_opts =
769
- " -cl-std=CL2.0 "
770
- " -cl-mad-enable "
771
- " -DLINE_STRIDE_A=16000 "
772
- " -DBLOCK_STRIDE_A=128000 "
773
- " -DSIMDGROUP_WIDTH=" + std::to_string(backend_ctx->adreno_wave_size);
774
  if (has_vector_subgroup_broadcast) {
775
  CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT ";
776
  }
 
1
+ #define CL_TARGET_OPENCL_VERSION GGML_OPENCL_TARGET_VERSION
2
  #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
3
 
4
  // suppress warnings in CL headers for GCC and Clang
 
25
  #include <vector>
26
  #include <string>
27
  #include <cmath>
28
+ #include <memory>
29
+ #include <charconv>
30
 
31
  #undef MIN
32
  #undef MAX
 
64
  X1E,
65
  };
66
 
67
+ struct ggml_cl_version {
68
+ cl_uint major = 0;
69
+ cl_uint minor = 0;
70
+ };
71
+
72
+ // Parses a version string of form "XX.YY ". On an error returns ggml_cl_version with all zeroes.
73
+ static ggml_cl_version parse_cl_version(std::string_view str) {
74
+ size_t major_str_begin = 0;
75
+ size_t major_str_end = str.find(".", major_str_begin);
76
+ if (major_str_end == std::string::npos) {
77
+ return {};
78
+ }
79
+
80
+ size_t minor_str_begin = major_str_end + 1;
81
+ size_t minor_str_end = str.find(" ", minor_str_begin);
82
+ if (minor_str_end == std::string::npos) {
83
+ return {};
84
+ }
85
+
86
+ cl_uint version_major;
87
+ if (std::from_chars(str.data() + major_str_begin, str.data() + major_str_end, version_major).ec != std::errc{}) {
88
+ return {};
89
+ }
90
+
91
+ cl_uint version_minor;
92
+ if (std::from_chars(str.data() + minor_str_begin, str.data() + minor_str_end, version_minor).ec != std::errc{}) {
93
+ return {};
94
+ }
95
+ return { version_major, version_minor };
96
+ }
97
+
98
+ // Returns OpenCL platform's version. On an error returns ggml_cl_version with all zeroes.
99
+ static ggml_cl_version get_opencl_platform_version(cl_platform_id platform) {
100
+ size_t param_size;
101
+ CL_CHECK(clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, nullptr, &param_size));
102
+ std::unique_ptr<char[]> param_storage(new char[param_size]);
103
+ CL_CHECK(clGetPlatformInfo(platform, CL_PLATFORM_VERSION, param_size, param_storage.get(), nullptr));
104
+
105
+ auto param_value = std::string_view(param_storage.get(), param_size);
106
+ const std::string version_prefix = "OpenCL "; // Suffix: "XX.YY <platform-specific-info>"
107
+ if (param_value.find(version_prefix) != 0) {
108
+ return {};
109
+ }
110
+ param_value.remove_prefix(version_prefix.length());
111
+ return parse_cl_version(param_value);
112
+ }
113
+
114
+ // Return a version to use in OpenCL C compilation. On an error returns ggml_cl_version with all zeroes.
115
+ static ggml_cl_version get_opencl_c_version(ggml_cl_version platform_version, cl_device_id device) {
116
+ size_t param_size;
117
+
118
+ #if CL_TARGET_OPENCL_VERSION >= 300
119
+ if (platform_version.major >= 3) {
120
+ CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr, &param_size));
121
+ if (!param_size) {
122
+ return {};
123
+ }
124
+
125
+ std::unique_ptr<cl_name_version[]> versions(new cl_name_version[param_size]);
126
+ CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, param_size, versions.get(), nullptr));
127
+ unsigned versions_count = param_size / sizeof(cl_name_version);
128
+
129
+ cl_version version_max = 0;
130
+ for (unsigned i = 0; i < versions_count; i++) {
131
+ version_max = std::max<cl_version>(versions[i].version, version_max);
132
+ }
133
+
134
+ return { CL_VERSION_MAJOR(version_max), CL_VERSION_MINOR(version_max) };
135
+ }
136
+ #else
137
+ GGML_UNUSED(platform_version);
138
+ #endif // CL_TARGET_OPENCL_VERSION >= 300
139
+
140
+ CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, nullptr, &param_size));
141
+ if (!param_size) {
142
+ return {};
143
+ }
144
+
145
+ std::unique_ptr<char[]> param_storage(new char[param_size]);
146
+ CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, param_size, param_storage.get(), nullptr));
147
+ auto param_value = std::string_view(param_storage.get(), param_size);
148
+
149
+ const std::string version_prefix = "OpenCL C "; // Suffix: "XX.YY <platform-specific-info>"
150
+ if (param_value.find(version_prefix) != 0) {
151
+ return {};
152
+ }
153
+ param_value.remove_prefix(version_prefix.length());
154
+
155
+ return parse_cl_version(param_value);
156
+ }
157
+
158
  static ADRENO_GPU_GEN get_adreno_gpu_gen(const char *device_name) {
159
  if (strstr(device_name, "730") ||
160
  strstr(device_name, "740") ||
 
563
  // A local ref of cl_device_id for convenience
564
  cl_device_id device = backend_ctx->device;
565
 
566
+ ggml_cl_version platform_version = get_opencl_platform_version(default_device->platform->id);
567
+
568
  // Check device OpenCL version, OpenCL 2.0 or above is required
569
+ ggml_cl_version opencl_c_version = get_opencl_c_version(platform_version, device);
570
+ if (opencl_c_version.major < 2) {
 
 
 
 
 
 
 
571
  GGML_LOG_ERROR("ggml_opencl: OpenCL 2.0 or above is required\n");
572
  return backend_ctx;
573
  }
 
604
 
605
  // If OpenCL 3.0 is supported, then check for cl_khr_subgroups, which becomes
606
  // optional in OpenCL 3.0 (cl_khr_subgroup is mandatory in OpenCL 2.x)
607
+ if (opencl_c_version.major == 3 && strstr(ext_buffer, "cl_khr_subgroups") == NULL &&
 
608
  strstr(ext_buffer, "cl_intel_subgroups") == NULL) {
609
  GGML_LOG_ERROR("ggml_opencl: device does not support subgroups (cl_khr_subgroups or cl_intel_subgroups) "
610
  "(note that subgroups is an optional feature in OpenCL 3.0)\n");
 
668
  const std::string kernel_src = read_file("ggml-opencl.cl");
669
  #endif
670
 
671
+ auto opencl_c_std =
672
+ std::string("CL") + std::to_string(opencl_c_version.major) + "." + std::to_string(opencl_c_version.minor);
673
+
674
+ std::string compile_opts = std::string("-cl-std=") + opencl_c_std +
675
+ " -cl-mad-enable -cl-unsafe-math-optimizations"
676
+ " -cl-finite-math-only -cl-fast-relaxed-math";
677
  backend_ctx->program = build_program_from_source(context, device, kernel_src.c_str(), compile_opts);
678
 
679
  // Non matmul kernels.
 
783
  CL_CHECK((backend_ctx->kernel_transpose_16 = clCreateKernel(backend_ctx->program_transpose_16, "kernel_transpose_16", &err), err));
784
 
785
  // Gemv general
786
+ std::string CL_gemv_compile_opts = std::string("-cl-std=") + opencl_c_std +
787
+ " -cl-mad-enable "
788
+ " -DSIMDGROUP_WIDTH=" +
789
+ std::to_string(backend_ctx->adreno_wave_size);
790
  if (has_vector_subgroup_broadcast) {
791
  CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT ";
792
  }
 
803
  CL_CHECK((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_general = clCreateKernel(backend_ctx->program_CL_gemv_general, "kernel_gemv_noshuffle", &err), err));
804
 
805
  // Gemv 2048, 16384
806
+ CL_gemv_compile_opts = std::string("-cl-std=") + opencl_c_std +
807
+ " -cl-mad-enable "
808
+ " -DLINE_STRIDE_A=2048 "
809
+ " -DBLOCK_STRIDE_A=16384 "
810
+ " -DSIMDGROUP_WIDTH=" +
811
+ std::to_string(backend_ctx->adreno_wave_size);
812
  if (has_vector_subgroup_broadcast) {
813
  CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT ";
814
  }
 
825
  CL_CHECK((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_4096_1_4096 = clCreateKernel(backend_ctx->program_CL_gemv_4096_1_4096, "kernel_gemv_noshuffle", &err), err));
826
 
827
  // Gemv 2048, 16384
828
+ CL_gemv_compile_opts = std::string("-cl-std=") + opencl_c_std +
829
+ " -cl-mad-enable "
830
+ " -DLINE_STRIDE_A=2048 "
831
+ " -DBLOCK_STRIDE_A=16384 "
832
+ " -DSIMDGROUP_WIDTH=" +
833
+ std::to_string(backend_ctx->adreno_wave_size);
834
  if (has_vector_subgroup_broadcast) {
835
  CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT ";
836
  }
 
840
  CL_CHECK((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_4096_1_11008 = clCreateKernel(backend_ctx->program_CL_gemv_4096_1_11008, "kernel_gemv_noshuffle", &err), err));
841
 
842
  // Gemv 5504, 44032
843
+ CL_gemv_compile_opts = std::string("-cl-std=") + opencl_c_std +
844
+ " -cl-mad-enable "
845
+ " -DLINE_STRIDE_A=5504 "
846
+ " -DBLOCK_STRIDE_A=44032 "
847
+ " -DSIMDGROUP_WIDTH=" +
848
+ std::to_string(backend_ctx->adreno_wave_size);
849
  if (has_vector_subgroup_broadcast) {
850
  CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT ";
851
  }
 
855
  CL_CHECK((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_11008_1_4096 = clCreateKernel(backend_ctx->program_CL_gemv_11008_1_4096, "kernel_gemv_noshuffle", &err), err));
856
 
857
  // Gemv 16000, 128000
858
+ CL_gemv_compile_opts = std::string("-cl-std=") + opencl_c_std +
859
+ " -cl-mad-enable "
860
+ " -DLINE_STRIDE_A=16000 "
861
+ " -DBLOCK_STRIDE_A=128000 "
862
+ " -DSIMDGROUP_WIDTH=" +
863
+ std::to_string(backend_ctx->adreno_wave_size);
864
  if (has_vector_subgroup_broadcast) {
865
  CL_gemv_compile_opts += " -DVECTOR_SUB_GROUP_BROADCAT ";
866
  }