Spaces:
Running
Running
Commit
·
f76204e
1
Parent(s):
81a6cae
vulkan: fix diag_mask_inf (llama/11323)
Browse filesWith robustbufferaccess disabled, this shader was showing OOB stores. There
is a bounds check in the code, but the workgrouop dimensions were reversed vs
CUDA and it was running the wrong number of threads. So fix the workgroup
dimensions and disable robustness for this pipeline.
ggml/src/ggml-vulkan/ggml-vulkan.cpp
CHANGED
|
@@ -2012,7 +2012,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
|
| 2012 |
ggml_vk_create_pipeline(device, device->pipeline_leaky_relu_f32, "leaky_relu_f32", leaky_relu_f32_len, leaky_relu_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1);
|
| 2013 |
ggml_vk_create_pipeline(device, device->pipeline_tanh_f32, "tanh_f32", tanh_f32_len, tanh_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1);
|
| 2014 |
|
| 2015 |
-
ggml_vk_create_pipeline(device, device->pipeline_diag_mask_inf_f32, "diag_mask_inf_f32", diag_mask_inf_f32_len, diag_mask_inf_f32_data, "main", 2, sizeof(vk_op_diag_mask_push_constants), {
|
| 2016 |
|
| 2017 |
ggml_vk_create_pipeline(device, device->pipeline_soft_max_f32, "soft_max_f32", soft_max_f32_len, soft_max_f32_data, "main", 3, sizeof(vk_op_soft_max_push_constants), {1, 1, 1}, { device->subgroup_size }, 1);
|
| 2018 |
ggml_vk_create_pipeline(device, device->pipeline_soft_max_f32_wg512, "soft_max_f32_wg512", soft_max_f32_len, soft_max_f32_data, "main", 3, sizeof(vk_op_soft_max_push_constants), {1, 1, 1}, { 512 }, 1);
|
|
|
|
| 2012 |
ggml_vk_create_pipeline(device, device->pipeline_leaky_relu_f32, "leaky_relu_f32", leaky_relu_f32_len, leaky_relu_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1);
|
| 2013 |
ggml_vk_create_pipeline(device, device->pipeline_tanh_f32, "tanh_f32", tanh_f32_len, tanh_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1);
|
| 2014 |
|
| 2015 |
+
ggml_vk_create_pipeline(device, device->pipeline_diag_mask_inf_f32, "diag_mask_inf_f32", diag_mask_inf_f32_len, diag_mask_inf_f32_data, "main", 2, sizeof(vk_op_diag_mask_push_constants), {1, 512, 1}, {}, 1, true);
|
| 2016 |
|
| 2017 |
ggml_vk_create_pipeline(device, device->pipeline_soft_max_f32, "soft_max_f32", soft_max_f32_len, soft_max_f32_data, "main", 3, sizeof(vk_op_soft_max_push_constants), {1, 1, 1}, { device->subgroup_size }, 1);
|
| 2018 |
ggml_vk_create_pipeline(device, device->pipeline_soft_max_f32_wg512, "soft_max_f32_wg512", soft_max_f32_len, soft_max_f32_data, "main", 3, sizeof(vk_op_soft_max_push_constants), {1, 1, 1}, { 512 }, 1);
|
ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp
CHANGED
|
@@ -12,7 +12,7 @@ layout (push_constant) uniform parameter
|
|
| 12 |
|
| 13 |
#include "types.comp"
|
| 14 |
|
| 15 |
-
layout(local_size_x =
|
| 16 |
|
| 17 |
layout (binding = 0) readonly buffer X {A_TYPE data_a[];};
|
| 18 |
layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};
|
|
|
|
| 12 |
|
| 13 |
#include "types.comp"
|
| 14 |
|
| 15 |
+
layout(local_size_x = 1, local_size_y = 512, local_size_z = 1) in;
|
| 16 |
|
| 17 |
layout (binding = 0) readonly buffer X {A_TYPE data_a[];};
|
| 18 |
layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};
|