Spaces:
Sleeping
Sleeping
Erik Scholz
commited on
Commit
·
48e92ad
1
Parent(s):
22fb24a
vulkan : add fp16 support for the conv_2d kernel (llama/14872)
Browse files* add f16 to conv_2d testing
* weaken conv2d test error threshold
ggml/src/ggml-vulkan/ggml-vulkan.cpp
CHANGED
|
@@ -484,6 +484,7 @@ struct vk_device_struct {
|
|
| 484 |
vk_pipeline pipeline_rwkv_wkv7_f32;
|
| 485 |
vk_pipeline pipeline_opt_step_adamw_f32;
|
| 486 |
vk_pipeline pipeline_conv2d_f32;
|
|
|
|
| 487 |
vk_pipeline pipeline_conv2d_dw_whcn_f32;
|
| 488 |
vk_pipeline pipeline_conv2d_dw_cwhn_f32;
|
| 489 |
|
|
@@ -3074,12 +3075,21 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
|
| 3074 |
device, device->pipeline_conv2d_f32, "conv2d_f32", conv2d_f32_len, conv2d_f32_data, "main", 3,
|
| 3075 |
sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
|
| 3076 |
{ conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true, true);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3077 |
} else {
|
| 3078 |
ggml_vk_create_pipeline(
|
| 3079 |
device, device->pipeline_conv2d_f32, "conv2d_f32", conv2d_f32_len, conv2d_f32_data, "main", 3,
|
| 3080 |
sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
|
| 3081 |
{ conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true,
|
| 3082 |
false);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3083 |
}
|
| 3084 |
|
| 3085 |
ggml_vk_create_pipeline(device, device->pipeline_conv2d_dw_whcn_f32, "conv2d_dw_whcn_f32", conv2d_dw_whcn_f32_len, conv2d_dw_whcn_f32_data, "main", 3, sizeof(vk_op_conv2d_dw_push_constants), {512, 1, 1}, {}, 1);
|
|
@@ -6958,9 +6968,13 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
|
|
| 6958 |
}
|
| 6959 |
return nullptr;
|
| 6960 |
case GGML_OP_CONV_2D:
|
| 6961 |
-
if (
|
| 6962 |
ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && ggml_is_contiguous(dst)) {
|
| 6963 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6964 |
}
|
| 6965 |
return nullptr;
|
| 6966 |
case GGML_OP_CONV_2D_DW:
|
|
@@ -8185,13 +8199,13 @@ static void ggml_vk_pool_2d(ggml_backend_vk_context * ctx, vk_context& subctx, c
|
|
| 8185 |
|
| 8186 |
static void ggml_vk_conv_2d(ggml_backend_vk_context * ctx, vk_context & subctx, const ggml_tensor * src0,
|
| 8187 |
const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
|
| 8188 |
-
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 8189 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 8190 |
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
| 8191 |
|
| 8192 |
GGML_TENSOR_BINARY_OP_LOCALS
|
| 8193 |
|
| 8194 |
-
GGML_ASSERT(nb00 == sizeof(float));
|
| 8195 |
GGML_ASSERT(nb10 == sizeof(float));
|
| 8196 |
GGML_ASSERT(nb0 == sizeof(float));
|
| 8197 |
|
|
@@ -10874,7 +10888,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
|
|
| 10874 |
const vk_device& device = ggml_vk_get_device(ctx->device);
|
| 10875 |
bool is_Apple = ggml_vk_get_device(ctx->device)->vendor_id == VK_VENDOR_ID_APPLE;
|
| 10876 |
// Channel-contiguous format is not supported yet.
|
| 10877 |
-
return (op->src[0]->type == GGML_TYPE_F32 &&
|
| 10878 |
op->src[1]->type == GGML_TYPE_F32 &&
|
| 10879 |
op->type == GGML_TYPE_F32 &&
|
| 10880 |
ggml_is_contiguous(op->src[0]) &&
|
|
|
|
| 484 |
vk_pipeline pipeline_rwkv_wkv7_f32;
|
| 485 |
vk_pipeline pipeline_opt_step_adamw_f32;
|
| 486 |
vk_pipeline pipeline_conv2d_f32;
|
| 487 |
+
vk_pipeline pipeline_conv2d_f16_f32;
|
| 488 |
vk_pipeline pipeline_conv2d_dw_whcn_f32;
|
| 489 |
vk_pipeline pipeline_conv2d_dw_cwhn_f32;
|
| 490 |
|
|
|
|
| 3075 |
device, device->pipeline_conv2d_f32, "conv2d_f32", conv2d_f32_len, conv2d_f32_data, "main", 3,
|
| 3076 |
sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
|
| 3077 |
{ conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true, true);
|
| 3078 |
+
ggml_vk_create_pipeline(
|
| 3079 |
+
device, device->pipeline_conv2d_f16_f32, "conv2d_f16_f32", conv2d_f16_f32_len, conv2d_f16_f32_data, "main", 3,
|
| 3080 |
+
sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
|
| 3081 |
+
{ conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true, true);
|
| 3082 |
} else {
|
| 3083 |
ggml_vk_create_pipeline(
|
| 3084 |
device, device->pipeline_conv2d_f32, "conv2d_f32", conv2d_f32_len, conv2d_f32_data, "main", 3,
|
| 3085 |
sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
|
| 3086 |
{ conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true,
|
| 3087 |
false);
|
| 3088 |
+
ggml_vk_create_pipeline(
|
| 3089 |
+
device, device->pipeline_conv2d_f16_f32, "conv2d_f16_f32", conv2d_f16_f32_len, conv2d_f16_f32_data, "main", 3,
|
| 3090 |
+
sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
|
| 3091 |
+
{ conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true,
|
| 3092 |
+
false);
|
| 3093 |
}
|
| 3094 |
|
| 3095 |
ggml_vk_create_pipeline(device, device->pipeline_conv2d_dw_whcn_f32, "conv2d_dw_whcn_f32", conv2d_dw_whcn_f32_len, conv2d_dw_whcn_f32_data, "main", 3, sizeof(vk_op_conv2d_dw_push_constants), {512, 1, 1}, {}, 1);
|
|
|
|
| 6968 |
}
|
| 6969 |
return nullptr;
|
| 6970 |
case GGML_OP_CONV_2D:
|
| 6971 |
+
if (src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32 &&
|
| 6972 |
ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && ggml_is_contiguous(dst)) {
|
| 6973 |
+
if (src0->type == GGML_TYPE_F32) {
|
| 6974 |
+
return ctx->device->pipeline_conv2d_f32;
|
| 6975 |
+
} else if (src0->type == GGML_TYPE_F16) {
|
| 6976 |
+
return ctx->device->pipeline_conv2d_f16_f32;
|
| 6977 |
+
}
|
| 6978 |
}
|
| 6979 |
return nullptr;
|
| 6980 |
case GGML_OP_CONV_2D_DW:
|
|
|
|
| 8199 |
|
| 8200 |
static void ggml_vk_conv_2d(ggml_backend_vk_context * ctx, vk_context & subctx, const ggml_tensor * src0,
|
| 8201 |
const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
|
| 8202 |
+
GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
|
| 8203 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 8204 |
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
| 8205 |
|
| 8206 |
GGML_TENSOR_BINARY_OP_LOCALS
|
| 8207 |
|
| 8208 |
+
GGML_ASSERT(nb00 == sizeof(float) || nb00 == sizeof(ggml_fp16_t));
|
| 8209 |
GGML_ASSERT(nb10 == sizeof(float));
|
| 8210 |
GGML_ASSERT(nb0 == sizeof(float));
|
| 8211 |
|
|
|
|
| 10888 |
const vk_device& device = ggml_vk_get_device(ctx->device);
|
| 10889 |
bool is_Apple = ggml_vk_get_device(ctx->device)->vendor_id == VK_VENDOR_ID_APPLE;
|
| 10890 |
// Channel-contiguous format is not supported yet.
|
| 10891 |
+
return ((op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16) &&
|
| 10892 |
op->src[1]->type == GGML_TYPE_F32 &&
|
| 10893 |
op->type == GGML_TYPE_F32 &&
|
| 10894 |
ggml_is_contiguous(op->src[0]) &&
|
ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
CHANGED
|
@@ -656,6 +656,7 @@ void process_shaders() {
|
|
| 656 |
string_to_spv("opt_step_adamw_f32", "opt_step_adamw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}}));
|
| 657 |
|
| 658 |
string_to_spv("conv2d_f32", "conv2d_mm.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"USE_COLLECTIVES", "1"}});
|
|
|
|
| 659 |
|
| 660 |
string_to_spv("conv2d_dw_whcn_f32", "conv2d_dw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"WHCN", "1"}}));
|
| 661 |
string_to_spv("conv2d_dw_cwhn_f32", "conv2d_dw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"CWHN", "1"}}));
|
|
|
|
| 656 |
string_to_spv("opt_step_adamw_f32", "opt_step_adamw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}}));
|
| 657 |
|
| 658 |
string_to_spv("conv2d_f32", "conv2d_mm.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"USE_COLLECTIVES", "1"}});
|
| 659 |
+
string_to_spv("conv2d_f16_f32", "conv2d_mm.comp", {{"A_TYPE", "float16_t"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"USE_COLLECTIVES", "1"}});
|
| 660 |
|
| 661 |
string_to_spv("conv2d_dw_whcn_f32", "conv2d_dw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"WHCN", "1"}}));
|
| 662 |
string_to_spv("conv2d_dw_cwhn_f32", "conv2d_dw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"CWHN", "1"}}));
|