slaren commited on
Commit
5fd8fb7
·
unverified ·
1 Parent(s): 26aec77

cuda : fix LLAMA_CUDA_F16 (llama/5262)

Browse files
Files changed (1) hide show
  1. ggml-cuda.cu +3 -3
ggml-cuda.cu CHANGED
@@ -8657,9 +8657,9 @@ static void ggml_cuda_op_dequantize_mul_mat_vec(
8657
 
8658
  if (src1_convert_f16) {
8659
  src1_dfloat = src1_dfloat_a.alloc(ne00);
8660
- ggml_cpy_f32_f16_cuda((const char *) src1_ddf_i, (char *) src1_dfloat, ne00,
8661
- ne00, 1, sizeof(float), 0, 0,
8662
- ne00, 1, sizeof(half), 0, 0, stream);
8663
  }
8664
  #else
8665
  const dfloat * src1_dfloat = (const dfloat *) src1_ddf_i; // dfloat == float, no conversion
 
8657
 
8658
  if (src1_convert_f16) {
8659
  src1_dfloat = src1_dfloat_a.alloc(ne00);
8660
+ const to_fp16_cuda_t to_fp16_cuda = ggml_get_to_fp16_cuda(src1->type);
8661
+ GGML_ASSERT(to_fp16_cuda != nullptr);
8662
+ to_fp16_cuda(src1_ddf_i, src1_dfloat, ne00, stream);
8663
  }
8664
  #else
8665
  const dfloat * src1_dfloat = (const dfloat *) src1_ddf_i; // dfloat == float, no conversion