Spaces:
Running
Running
slaren
commited on
cuda : fix LLAMA_CUDA_F16 (llama/5262)
Browse files- ggml-cuda.cu +3 -3
ggml-cuda.cu
CHANGED
|
@@ -8657,9 +8657,9 @@ static void ggml_cuda_op_dequantize_mul_mat_vec(
|
|
| 8657 |
|
| 8658 |
if (src1_convert_f16) {
|
| 8659 |
src1_dfloat = src1_dfloat_a.alloc(ne00);
|
| 8660 |
-
|
| 8661 |
-
|
| 8662 |
-
|
| 8663 |
}
|
| 8664 |
#else
|
| 8665 |
const dfloat * src1_dfloat = (const dfloat *) src1_ddf_i; // dfloat == float, no conversion
|
|
|
|
| 8657 |
|
| 8658 |
if (src1_convert_f16) {
|
| 8659 |
src1_dfloat = src1_dfloat_a.alloc(ne00);
|
| 8660 |
+
const to_fp16_cuda_t to_fp16_cuda = ggml_get_to_fp16_cuda(src1->type);
|
| 8661 |
+
GGML_ASSERT(to_fp16_cuda != nullptr);
|
| 8662 |
+
to_fp16_cuda(src1_ddf_i, src1_dfloat, ne00, stream);
|
| 8663 |
}
|
| 8664 |
#else
|
| 8665 |
const dfloat * src1_dfloat = (const dfloat *) src1_ddf_i; // dfloat == float, no conversion
|