Erik Scholz commited on
Commit
48e92ad
·
1 Parent(s): 22fb24a

vulkan : add fp16 support for the conv_2d kernel (llama/14872)

Browse files

* add f16 to conv_2d testing
* weaken conv2d test error threshold

ggml/src/ggml-vulkan/ggml-vulkan.cpp CHANGED
@@ -484,6 +484,7 @@ struct vk_device_struct {
484
  vk_pipeline pipeline_rwkv_wkv7_f32;
485
  vk_pipeline pipeline_opt_step_adamw_f32;
486
  vk_pipeline pipeline_conv2d_f32;
 
487
  vk_pipeline pipeline_conv2d_dw_whcn_f32;
488
  vk_pipeline pipeline_conv2d_dw_cwhn_f32;
489
 
@@ -3074,12 +3075,21 @@ static void ggml_vk_load_shaders(vk_device& device) {
3074
  device, device->pipeline_conv2d_f32, "conv2d_f32", conv2d_f32_len, conv2d_f32_data, "main", 3,
3075
  sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
3076
  { conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true, true);
 
 
 
 
3077
  } else {
3078
  ggml_vk_create_pipeline(
3079
  device, device->pipeline_conv2d_f32, "conv2d_f32", conv2d_f32_len, conv2d_f32_data, "main", 3,
3080
  sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
3081
  { conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true,
3082
  false);
 
 
 
 
 
3083
  }
3084
 
3085
  ggml_vk_create_pipeline(device, device->pipeline_conv2d_dw_whcn_f32, "conv2d_dw_whcn_f32", conv2d_dw_whcn_f32_len, conv2d_dw_whcn_f32_data, "main", 3, sizeof(vk_op_conv2d_dw_push_constants), {512, 1, 1}, {}, 1);
@@ -6958,9 +6968,13 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
6958
  }
6959
  return nullptr;
6960
  case GGML_OP_CONV_2D:
6961
- if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32 &&
6962
  ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && ggml_is_contiguous(dst)) {
6963
- return ctx->device->pipeline_conv2d_f32;
 
 
 
 
6964
  }
6965
  return nullptr;
6966
  case GGML_OP_CONV_2D_DW:
@@ -8185,13 +8199,13 @@ static void ggml_vk_pool_2d(ggml_backend_vk_context * ctx, vk_context& subctx, c
8185
 
8186
  static void ggml_vk_conv_2d(ggml_backend_vk_context * ctx, vk_context & subctx, const ggml_tensor * src0,
8187
  const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
8188
- GGML_ASSERT(src0->type == GGML_TYPE_F32);
8189
  GGML_ASSERT(src1->type == GGML_TYPE_F32);
8190
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
8191
 
8192
  GGML_TENSOR_BINARY_OP_LOCALS
8193
 
8194
- GGML_ASSERT(nb00 == sizeof(float));
8195
  GGML_ASSERT(nb10 == sizeof(float));
8196
  GGML_ASSERT(nb0 == sizeof(float));
8197
 
@@ -10874,7 +10888,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
10874
  const vk_device& device = ggml_vk_get_device(ctx->device);
10875
  bool is_Apple = ggml_vk_get_device(ctx->device)->vendor_id == VK_VENDOR_ID_APPLE;
10876
  // Channel-contiguous format is not supported yet.
10877
- return (op->src[0]->type == GGML_TYPE_F32 &&
10878
  op->src[1]->type == GGML_TYPE_F32 &&
10879
  op->type == GGML_TYPE_F32 &&
10880
  ggml_is_contiguous(op->src[0]) &&
 
484
  vk_pipeline pipeline_rwkv_wkv7_f32;
485
  vk_pipeline pipeline_opt_step_adamw_f32;
486
  vk_pipeline pipeline_conv2d_f32;
487
+ vk_pipeline pipeline_conv2d_f16_f32;
488
  vk_pipeline pipeline_conv2d_dw_whcn_f32;
489
  vk_pipeline pipeline_conv2d_dw_cwhn_f32;
490
 
 
3075
  device, device->pipeline_conv2d_f32, "conv2d_f32", conv2d_f32_len, conv2d_f32_data, "main", 3,
3076
  sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
3077
  { conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true, true);
3078
+ ggml_vk_create_pipeline(
3079
+ device, device->pipeline_conv2d_f16_f32, "conv2d_f16_f32", conv2d_f16_f32_len, conv2d_f16_f32_data, "main", 3,
3080
+ sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
3081
+ { conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true, true);
3082
  } else {
3083
  ggml_vk_create_pipeline(
3084
  device, device->pipeline_conv2d_f32, "conv2d_f32", conv2d_f32_len, conv2d_f32_data, "main", 3,
3085
  sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
3086
  { conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true,
3087
  false);
3088
+ ggml_vk_create_pipeline(
3089
+ device, device->pipeline_conv2d_f16_f32, "conv2d_f16_f32", conv2d_f16_f32_len, conv2d_f16_f32_data, "main", 3,
3090
+ sizeof(vk_op_conv2d_push_constants), { conv2d_BS_K, conv2d_BS_NPQ, 1 },
3091
+ { conv2d_WG_SIZE, conv2d_BS_K, conv2d_BS_CRS, conv2d_BS_NPQ, conv2d_TS_K, use_collectives }, 1, true,
3092
+ false);
3093
  }
3094
 
3095
  ggml_vk_create_pipeline(device, device->pipeline_conv2d_dw_whcn_f32, "conv2d_dw_whcn_f32", conv2d_dw_whcn_f32_len, conv2d_dw_whcn_f32_data, "main", 3, sizeof(vk_op_conv2d_dw_push_constants), {512, 1, 1}, {}, 1);
 
6968
  }
6969
  return nullptr;
6970
  case GGML_OP_CONV_2D:
6971
+ if (src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32 &&
6972
  ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && ggml_is_contiguous(dst)) {
6973
+ if (src0->type == GGML_TYPE_F32) {
6974
+ return ctx->device->pipeline_conv2d_f32;
6975
+ } else if (src0->type == GGML_TYPE_F16) {
6976
+ return ctx->device->pipeline_conv2d_f16_f32;
6977
+ }
6978
  }
6979
  return nullptr;
6980
  case GGML_OP_CONV_2D_DW:
 
8199
 
8200
  static void ggml_vk_conv_2d(ggml_backend_vk_context * ctx, vk_context & subctx, const ggml_tensor * src0,
8201
  const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
8202
+ GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
8203
  GGML_ASSERT(src1->type == GGML_TYPE_F32);
8204
  GGML_ASSERT(dst->type == GGML_TYPE_F32);
8205
 
8206
  GGML_TENSOR_BINARY_OP_LOCALS
8207
 
8208
+ GGML_ASSERT(nb00 == sizeof(float) || nb00 == sizeof(ggml_fp16_t));
8209
  GGML_ASSERT(nb10 == sizeof(float));
8210
  GGML_ASSERT(nb0 == sizeof(float));
8211
 
 
10888
  const vk_device& device = ggml_vk_get_device(ctx->device);
10889
  bool is_Apple = ggml_vk_get_device(ctx->device)->vendor_id == VK_VENDOR_ID_APPLE;
10890
  // Channel-contiguous format is not supported yet.
10891
+ return ((op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16) &&
10892
  op->src[1]->type == GGML_TYPE_F32 &&
10893
  op->type == GGML_TYPE_F32 &&
10894
  ggml_is_contiguous(op->src[0]) &&
ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp CHANGED
@@ -656,6 +656,7 @@ void process_shaders() {
656
  string_to_spv("opt_step_adamw_f32", "opt_step_adamw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}}));
657
 
658
  string_to_spv("conv2d_f32", "conv2d_mm.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"USE_COLLECTIVES", "1"}});
 
659
 
660
  string_to_spv("conv2d_dw_whcn_f32", "conv2d_dw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"WHCN", "1"}}));
661
  string_to_spv("conv2d_dw_cwhn_f32", "conv2d_dw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"CWHN", "1"}}));
 
656
  string_to_spv("opt_step_adamw_f32", "opt_step_adamw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}}));
657
 
658
  string_to_spv("conv2d_f32", "conv2d_mm.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"USE_COLLECTIVES", "1"}});
659
+ string_to_spv("conv2d_f16_f32", "conv2d_mm.comp", {{"A_TYPE", "float16_t"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"USE_COLLECTIVES", "1"}});
660
 
661
  string_to_spv("conv2d_dw_whcn_f32", "conv2d_dw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"WHCN", "1"}}));
662
  string_to_spv("conv2d_dw_cwhn_f32", "conv2d_dw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"CWHN", "1"}}));