Neo Zhang commited on
Commit
328702a
·
unverified ·
1 Parent(s): 03d4b22

rm wait() (llama/7233)

Browse files
Files changed (1) hide show
  1. ggml-sycl.cpp +1 -24
ggml-sycl.cpp CHANGED
@@ -15564,26 +15564,6 @@ static void ggml_sycl_mul_mat_batched_sycl(const ggml_tensor *src0,
15564
  const int64_t r2 = ne12/ne02;
15565
  const int64_t r3 = ne13/ne03;
15566
 
15567
- #if 0
15568
- // use syclGemmEx
15569
- {
15570
- for (int i13 = 0; i13 < ne13; ++i13) {
15571
- for (int i12 = 0; i12 < ne12; ++i12) {
15572
- int i03 = i13 / r3;
15573
- int i02 = i12 / r2;
15574
-
15575
- SYCL_CHECK(
15576
- syclGemmEx(g_sycl_handles[g_main_device], CUBLAS_OP_T, CUBLAS_OP_N,
15577
- ne01, ne11, ne10,
15578
- alpha, (const char *) src0_as_f16 + i02*src0->nb[2] + i03*src0->nb[3] , SYCL_R_16F, nb01/sizeof(half),
15579
- (const char *) src1_as_f16 + i12*src1->nb[2]/2 + i13*src1->nb[3]/2, SYCL_R_16F, nb11/sizeof(float),
15580
- beta, ( char *) dst_t + i12*nbd2 + i13*nbd3, cu_data_type, ne01,
15581
- cu_compute_type,
15582
- CUBLAS_GEMM_DEFAULT_TENSOR_OP));
15583
- }
15584
- }
15585
- }
15586
- #else
15587
  if (r2 == 1 && r3 == 1 && src0->nb[2]*src0->ne[2] == src0->nb[3] && src1->nb[2]*src1->ne[2] == src1->nb[3]) {
15588
  // there is no broadcast and src0, src1 are contiguous across dims 2, 3
15589
  SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm_batch(
@@ -15595,7 +15575,6 @@ static void ggml_sycl_mul_mat_batched_sycl(const ggml_tensor *src0,
15595
  nb11 / nb10, nb12 / nb10, beta,
15596
  (char *)dst_t, cu_data_type, ne01, nb2 / nb0,
15597
  ne12 * ne13, cu_compute_type)));
15598
- g_sycl_handles[g_main_device]->wait();
15599
  } else {
15600
  const int ne23 = ne12*ne13;
15601
 
@@ -15626,7 +15605,7 @@ static void ggml_sycl_mul_mat_batched_sycl(const ggml_tensor *src0,
15626
  nb02, nb03, nb12_scaled, nb13_scaled,
15627
  nbd2, nbd3, r2, r3, item_ct1);
15628
  });
15629
- }).wait();
15630
  }
15631
  SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm_batch(
15632
  *g_sycl_handles[g_main_device], oneapi::mkl::transpose::trans,
@@ -15637,9 +15616,7 @@ static void ggml_sycl_mul_mat_batched_sycl(const ggml_tensor *src0,
15637
  dpct::library_data_t::real_half, nb11 / nb10, beta,
15638
  (void **)(ptrs_dst.get() + 0 * ne23), cu_data_type, ne01, ne23,
15639
  cu_compute_type)));
15640
- g_sycl_handles[g_main_device]->wait();
15641
  }
15642
- #endif
15643
 
15644
  if (no_mixed_dtypes) {
15645
  const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16);
 
15564
  const int64_t r2 = ne12/ne02;
15565
  const int64_t r3 = ne13/ne03;
15566
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15567
  if (r2 == 1 && r3 == 1 && src0->nb[2]*src0->ne[2] == src0->nb[3] && src1->nb[2]*src1->ne[2] == src1->nb[3]) {
15568
  // there is no broadcast and src0, src1 are contiguous across dims 2, 3
15569
  SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm_batch(
 
15575
  nb11 / nb10, nb12 / nb10, beta,
15576
  (char *)dst_t, cu_data_type, ne01, nb2 / nb0,
15577
  ne12 * ne13, cu_compute_type)));
 
15578
  } else {
15579
  const int ne23 = ne12*ne13;
15580
 
 
15605
  nb02, nb03, nb12_scaled, nb13_scaled,
15606
  nbd2, nbd3, r2, r3, item_ct1);
15607
  });
15608
+ });
15609
  }
15610
  SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm_batch(
15611
  *g_sycl_handles[g_main_device], oneapi::mkl::transpose::trans,
 
15616
  dpct::library_data_t::real_half, nb11 / nb10, beta,
15617
  (void **)(ptrs_dst.get() + 0 * ne23), cu_data_type, ne01, ne23,
15618
  cu_compute_type)));
 
15619
  }
 
15620
 
15621
  if (no_mixed_dtypes) {
15622
  const to_fp32_sycl_t to_fp32_sycl = ggml_get_to_fp32_sycl(GGML_TYPE_F16);