Spaces:
Running
Running
Abhilash Majumder
commited on
ggml-sycl: Replace 3d ops with macro (llama/5458)
Browse files* use macro
* use macro
* fix format
- ggml-sycl.cpp +14 -61
ggml-sycl.cpp
CHANGED
|
@@ -11578,11 +11578,8 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst,
|
|
| 11578 |
}
|
| 11579 |
char * dst_ptr = (char *) dst;
|
| 11580 |
|
| 11581 |
-
|
| 11582 |
-
|
| 11583 |
-
const int64_t nb1 = src->nb[1];
|
| 11584 |
-
const int64_t nb2 = src->nb[2];
|
| 11585 |
-
const int64_t nb3 = src->nb[3];
|
| 11586 |
const enum ggml_type type = src->type;
|
| 11587 |
const int64_t ts = ggml_type_size(type);
|
| 11588 |
const int64_t bs = ggml_blck_size(type);
|
|
@@ -12426,9 +12423,7 @@ inline void ggml_sycl_op_alibi(const ggml_tensor *src0, const ggml_tensor *src1,
|
|
| 12426 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 12427 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 12428 |
|
| 12429 |
-
|
| 12430 |
-
const int64_t ne01 = src0->ne[1];
|
| 12431 |
-
const int64_t ne02 = src0->ne[2];
|
| 12432 |
const int64_t nrows = ggml_nrows(src0);
|
| 12433 |
|
| 12434 |
//const int n_past = ((int32_t *) dst->op_params)[0];
|
|
@@ -12758,15 +12753,9 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
|
|
| 12758 |
ggml_sycl_op_mul_mat_t op,
|
| 12759 |
const bool convert_src1_to_q8_1) try {
|
| 12760 |
|
| 12761 |
-
|
| 12762 |
-
const int64_t ne01 = src0->ne[1];
|
| 12763 |
-
const int64_t ne02 = src0->ne[2];
|
| 12764 |
-
const int64_t ne03 = src0->ne[3];
|
| 12765 |
|
| 12766 |
-
|
| 12767 |
-
const int64_t ne11 = src1->ne[1];
|
| 12768 |
-
const int64_t ne12 = src1->ne[2];
|
| 12769 |
-
const int64_t ne13 = src1->ne[3];
|
| 12770 |
const int64_t nrows1 = ggml_nrows(src1);
|
| 12771 |
|
| 12772 |
GGML_ASSERT(ne03 == ne13);
|
|
@@ -13337,23 +13326,13 @@ static void ggml_sycl_mul_mat_mat_batched_sycl(const ggml_tensor *src0,
|
|
| 13337 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 13338 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 13339 |
|
| 13340 |
-
|
| 13341 |
-
const int64_t ne01 = src0->ne[1];
|
| 13342 |
-
const int64_t ne02 = src0->ne[2];
|
| 13343 |
-
const int64_t ne03 = src0->ne[3];
|
| 13344 |
|
| 13345 |
-
|
| 13346 |
-
const int64_t nb02 = src0->nb[2]; GGML_UNUSED(nb02);
|
| 13347 |
-
const int64_t nb03 = src0->nb[3]; GGML_UNUSED(nb03);
|
| 13348 |
|
| 13349 |
-
|
| 13350 |
-
const int64_t ne11 = src1->ne[1];
|
| 13351 |
-
const int64_t ne12 = src1->ne[2];
|
| 13352 |
-
const int64_t ne13 = src1->ne[3];
|
| 13353 |
|
| 13354 |
-
|
| 13355 |
-
const int64_t nb12 = src1->nb[2]; GGML_UNUSED(nb12);
|
| 13356 |
-
const int64_t nb13 = src1->nb[3]; GGML_UNUSED(nb13);
|
| 13357 |
|
| 13358 |
const int64_t ne1 = ggml_nelements(src1);
|
| 13359 |
const int64_t ne = ggml_nelements(dst);
|
|
@@ -13655,23 +13634,15 @@ static void ggml_sycl_mul_mat_id_sycl(ggml_tensor * dst) {
|
|
| 13655 |
GGML_ASSERT(src00->backend != GGML_BACKEND_GPU_SPLIT);
|
| 13656 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 13657 |
|
| 13658 |
-
|
| 13659 |
-
const int64_t ne01 = src00->ne[1];
|
| 13660 |
-
const int64_t ne02 = src00->ne[2];
|
| 13661 |
-
const int64_t ne03 = src00->ne[3];
|
| 13662 |
|
| 13663 |
//const int64_t nb01 = src00->nb[1];
|
| 13664 |
-
|
| 13665 |
-
const int64_t nb03 = src00->nb[3]; GGML_UNUSED(nb03);
|
| 13666 |
|
| 13667 |
-
|
| 13668 |
-
const int64_t ne11 = src1->ne[1];
|
| 13669 |
-
const int64_t ne12 = src1->ne[2];
|
| 13670 |
-
const int64_t ne13 = src1->ne[3];
|
| 13671 |
|
|
|
|
| 13672 |
//const int64_t nb11 = src1->nb[1];
|
| 13673 |
-
const int64_t nb12 = src1->nb[2]; GGML_UNUSED(nb12);
|
| 13674 |
-
const int64_t nb13 = src1->nb[3]; GGML_UNUSED(nb13);
|
| 13675 |
|
| 13676 |
const int64_t ne1 = ggml_nelements(src1);
|
| 13677 |
const int64_t ne = ggml_nelements(dst);
|
|
@@ -13940,25 +13911,7 @@ static void ggml_sycl_cpy(const ggml_tensor *src0, const ggml_tensor *src1,
|
|
| 13940 |
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
|
| 13941 |
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
|
| 13942 |
|
| 13943 |
-
|
| 13944 |
-
const int64_t ne01 = src0->ne[1];
|
| 13945 |
-
const int64_t ne02 = src0->ne[2];
|
| 13946 |
-
|
| 13947 |
-
|
| 13948 |
-
const int64_t nb00 = src0->nb[0];
|
| 13949 |
-
const int64_t nb01 = src0->nb[1];
|
| 13950 |
-
const int64_t nb02 = src0->nb[2];
|
| 13951 |
-
const int64_t nb03 = src0->nb[3];
|
| 13952 |
-
|
| 13953 |
-
const int64_t ne10 = src1->ne[0];
|
| 13954 |
-
const int64_t ne11 = src1->ne[1];
|
| 13955 |
-
const int64_t ne12 = src1->ne[2];
|
| 13956 |
-
|
| 13957 |
-
|
| 13958 |
-
const int64_t nb10 = src1->nb[0];
|
| 13959 |
-
const int64_t nb11 = src1->nb[1];
|
| 13960 |
-
const int64_t nb12 = src1->nb[2];
|
| 13961 |
-
const int64_t nb13 = src1->nb[3];
|
| 13962 |
|
| 13963 |
SYCL_CHECK(ggml_sycl_set_device(g_main_device));
|
| 13964 |
dpct::queue_ptr main_stream = g_syclStreams[g_main_device_index][0];
|
|
|
|
| 11578 |
}
|
| 11579 |
char * dst_ptr = (char *) dst;
|
| 11580 |
|
| 11581 |
+
GGML_TENSOR_LOCALS_1(int64_t, ne, src, ne);
|
| 11582 |
+
GGML_TENSOR_LOCALS(int64_t, nb, src, nb);
|
|
|
|
|
|
|
|
|
|
| 11583 |
const enum ggml_type type = src->type;
|
| 11584 |
const int64_t ts = ggml_type_size(type);
|
| 11585 |
const int64_t bs = ggml_blck_size(type);
|
|
|
|
| 12423 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 12424 |
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
| 12425 |
|
| 12426 |
+
GGML_TENSOR_LOCALS_3(int64_t, ne0, src0, ne);
|
|
|
|
|
|
|
| 12427 |
const int64_t nrows = ggml_nrows(src0);
|
| 12428 |
|
| 12429 |
//const int n_past = ((int32_t *) dst->op_params)[0];
|
|
|
|
| 12753 |
ggml_sycl_op_mul_mat_t op,
|
| 12754 |
const bool convert_src1_to_q8_1) try {
|
| 12755 |
|
| 12756 |
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
|
|
|
|
|
|
|
|
|
| 12757 |
|
| 12758 |
+
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
|
|
|
|
|
|
|
|
|
|
| 12759 |
const int64_t nrows1 = ggml_nrows(src1);
|
| 12760 |
|
| 12761 |
GGML_ASSERT(ne03 == ne13);
|
|
|
|
| 13326 |
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
| 13327 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 13328 |
|
| 13329 |
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
|
|
|
|
|
|
|
|
|
| 13330 |
|
| 13331 |
+
GGML_TENSOR_LOCALS(int64_t, nb0, src0, nb);
|
|
|
|
|
|
|
| 13332 |
|
| 13333 |
+
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
|
|
|
|
|
|
|
|
|
|
| 13334 |
|
| 13335 |
+
GGML_TENSOR_LOCALS(int64_t, nb1, src1, nb);
|
|
|
|
|
|
|
| 13336 |
|
| 13337 |
const int64_t ne1 = ggml_nelements(src1);
|
| 13338 |
const int64_t ne = ggml_nelements(dst);
|
|
|
|
| 13634 |
GGML_ASSERT(src00->backend != GGML_BACKEND_GPU_SPLIT);
|
| 13635 |
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
| 13636 |
|
| 13637 |
+
GGML_TENSOR_LOCALS(int64_t, ne0, src00, ne);
|
|
|
|
|
|
|
|
|
|
| 13638 |
|
| 13639 |
//const int64_t nb01 = src00->nb[1];
|
| 13640 |
+
GGML_TENSOR_LOCALS(int64_t, nb0, src00, nb);
|
|
|
|
| 13641 |
|
| 13642 |
+
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
|
|
|
|
|
|
|
|
|
|
| 13643 |
|
| 13644 |
+
GGML_TENSOR_LOCALS(int64_t, nb1, src1, nb);
|
| 13645 |
//const int64_t nb11 = src1->nb[1];
|
|
|
|
|
|
|
| 13646 |
|
| 13647 |
const int64_t ne1 = ggml_nelements(src1);
|
| 13648 |
const int64_t ne = ggml_nelements(dst);
|
|
|
|
| 13911 |
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
|
| 13912 |
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
|
| 13913 |
|
| 13914 |
+
GGML_TENSOR_BINARY_OP_LOCALS;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13915 |
|
| 13916 |
SYCL_CHECK(ggml_sycl_set_device(g_main_device));
|
| 13917 |
dpct::queue_ptr main_stream = g_syclStreams[g_main_device_index][0];
|