Spaces:
Sleeping
Sleeping
Commit
·
7d66a68
1
Parent(s):
52069b8
vulkan : do not use tensor->extra (llama/9407)
Browse files* vulkan : do not use tensor->extra
This patch allows using the Vulkan backend with the RPC backend as
tensor->extra is no longer used.
Ref: #8536
* Adapt GGML_VULKAN_CHECK_RESULTS to extra removal (llama/2)
---------
Co-authored-by: 0cc4m <[email protected]>
- ggml/src/ggml-vulkan.cpp +148 -194
ggml/src/ggml-vulkan.cpp
CHANGED
|
@@ -433,16 +433,6 @@ struct vk_context_struct {
|
|
| 433 |
typedef std::shared_ptr<vk_context_struct> vk_context;
|
| 434 |
typedef std::weak_ptr<vk_context_struct> vk_context_ref;
|
| 435 |
|
| 436 |
-
struct ggml_tensor_extra_gpu {
|
| 437 |
-
vk_buffer_ref buffer_gpu;
|
| 438 |
-
uint64_t offset;
|
| 439 |
-
|
| 440 |
-
void reset() {
|
| 441 |
-
buffer_gpu.reset();
|
| 442 |
-
offset = 0;
|
| 443 |
-
}
|
| 444 |
-
};
|
| 445 |
-
|
| 446 |
struct ggml_vk_garbage_collector {
|
| 447 |
std::vector<vk_semaphore> tl_semaphores;
|
| 448 |
std::vector<vk_semaphore> semaphores;
|
|
@@ -553,6 +543,31 @@ struct ggml_backend_vk_context {
|
|
| 553 |
std::vector<vk_context_ref> tensor_ctxs;
|
| 554 |
};
|
| 555 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 556 |
#ifdef GGML_VULKAN_MEMORY_DEBUG
|
| 557 |
void vk_memory_logger::log_allocation(vk_buffer_ref buf_ref, size_t size) {
|
| 558 |
std::lock_guard<std::mutex> guard(log_mutex);
|
|
@@ -3076,9 +3091,9 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
|
|
| 3076 |
const uint64_t r2 = ne12 / ne02;
|
| 3077 |
const uint64_t r3 = ne13 / ne03;
|
| 3078 |
|
| 3079 |
-
|
| 3080 |
-
|
| 3081 |
-
|
| 3082 |
|
| 3083 |
vk_buffer d_Qx;
|
| 3084 |
size_t qx_buf_offset = 0;
|
|
@@ -3180,8 +3195,8 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
|
|
| 3180 |
return;
|
| 3181 |
}
|
| 3182 |
|
| 3183 |
-
vk_buffer d_D =
|
| 3184 |
-
const uint64_t d_buf_offset =
|
| 3185 |
GGML_ASSERT(d_D != nullptr);
|
| 3186 |
GGML_ASSERT(d_D->size >= d_buf_offset + d_sz * ne02 * ne03);
|
| 3187 |
vk_buffer d_X;
|
|
@@ -3189,13 +3204,13 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
|
|
| 3189 |
vk_buffer d_Y;
|
| 3190 |
uint64_t y_buf_offset = 0;
|
| 3191 |
if (!src0_uma) {
|
| 3192 |
-
d_Qx =
|
| 3193 |
-
qx_buf_offset =
|
| 3194 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3195 |
}
|
| 3196 |
if (!src1_uma) {
|
| 3197 |
-
d_Qy =
|
| 3198 |
-
qy_buf_offset =
|
| 3199 |
GGML_ASSERT(d_Qy != nullptr);
|
| 3200 |
}
|
| 3201 |
if (qx_needs_dequant) {
|
|
@@ -3276,9 +3291,9 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
|
|
| 3276 |
const uint64_t r2 = ne12 / ne02;
|
| 3277 |
const uint64_t r3 = ne13 / ne03;
|
| 3278 |
|
| 3279 |
-
|
| 3280 |
-
|
| 3281 |
-
|
| 3282 |
|
| 3283 |
vk_buffer d_Qx;
|
| 3284 |
size_t qx_buf_offset = 0;
|
|
@@ -3357,21 +3372,21 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
|
|
| 3357 |
return;
|
| 3358 |
}
|
| 3359 |
|
| 3360 |
-
vk_buffer d_D =
|
| 3361 |
-
const uint64_t d_buf_offset =
|
| 3362 |
GGML_ASSERT(d_D != nullptr);
|
| 3363 |
vk_buffer d_X;
|
| 3364 |
uint64_t x_buf_offset = 0;
|
| 3365 |
vk_buffer d_Y;
|
| 3366 |
uint64_t y_buf_offset = 0;
|
| 3367 |
if(!src0_uma) {
|
| 3368 |
-
d_Qx =
|
| 3369 |
-
qx_buf_offset =
|
| 3370 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3371 |
}
|
| 3372 |
if(!src1_uma) {
|
| 3373 |
-
d_Qy =
|
| 3374 |
-
qy_buf_offset =
|
| 3375 |
GGML_ASSERT(d_Qy != nullptr);
|
| 3376 |
}
|
| 3377 |
if (qx_needs_dequant) {
|
|
@@ -3454,9 +3469,9 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_c
|
|
| 3454 |
|
| 3455 |
GGML_ASSERT(ne11 == 1);
|
| 3456 |
|
| 3457 |
-
|
| 3458 |
-
|
| 3459 |
-
|
| 3460 |
|
| 3461 |
vk_buffer d_Qy;
|
| 3462 |
size_t qy_buf_offset = 0;
|
|
@@ -3482,15 +3497,15 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_c
|
|
| 3482 |
return;
|
| 3483 |
}
|
| 3484 |
|
| 3485 |
-
vk_buffer d_D =
|
| 3486 |
-
const uint64_t d_buf_offset =
|
| 3487 |
GGML_ASSERT(d_D != nullptr);
|
| 3488 |
-
vk_buffer d_Qx =
|
| 3489 |
-
const uint64_t qx_buf_offset =
|
| 3490 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3491 |
if (!src1_uma) {
|
| 3492 |
-
d_Qy =
|
| 3493 |
-
qy_buf_offset =
|
| 3494 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3495 |
}
|
| 3496 |
|
|
@@ -3532,9 +3547,9 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
|
|
| 3532 |
|
| 3533 |
GGML_ASSERT(ne11 == 1);
|
| 3534 |
|
| 3535 |
-
|
| 3536 |
-
|
| 3537 |
-
|
| 3538 |
|
| 3539 |
vk_buffer d_Qy = nullptr;
|
| 3540 |
size_t qy_buf_offset = 0;
|
|
@@ -3561,15 +3576,15 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
|
|
| 3561 |
return;
|
| 3562 |
}
|
| 3563 |
|
| 3564 |
-
vk_buffer d_D =
|
| 3565 |
-
const uint64_t d_buf_offset =
|
| 3566 |
GGML_ASSERT(d_D != nullptr);
|
| 3567 |
-
vk_buffer d_Qx =
|
| 3568 |
-
const uint64_t qx_buf_offset =
|
| 3569 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3570 |
if (!src1_uma) {
|
| 3571 |
-
d_Qy =
|
| 3572 |
-
qy_buf_offset =
|
| 3573 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3574 |
}
|
| 3575 |
|
|
@@ -3631,10 +3646,10 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
|
|
| 3631 |
|
| 3632 |
const uint64_t n_as = ne02;
|
| 3633 |
|
| 3634 |
-
|
| 3635 |
-
|
| 3636 |
-
|
| 3637 |
-
|
| 3638 |
|
| 3639 |
vk_buffer d_Qx;
|
| 3640 |
size_t qx_buf_offset = 0;
|
|
@@ -3731,26 +3746,26 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
|
|
| 3731 |
return;
|
| 3732 |
}
|
| 3733 |
|
| 3734 |
-
vk_buffer d_D =
|
| 3735 |
-
const uint64_t d_buf_offset =
|
| 3736 |
GGML_ASSERT(d_D != nullptr);
|
| 3737 |
vk_buffer d_X;
|
| 3738 |
uint64_t x_buf_offset = 0;
|
| 3739 |
vk_buffer d_Y;
|
| 3740 |
uint64_t y_buf_offset = 0;
|
| 3741 |
if (!src0_uma) {
|
| 3742 |
-
d_Qx =
|
| 3743 |
-
qx_buf_offset =
|
| 3744 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3745 |
}
|
| 3746 |
if (!src1_uma) {
|
| 3747 |
-
d_Qy =
|
| 3748 |
-
qy_buf_offset =
|
| 3749 |
GGML_ASSERT(d_Qy != nullptr);
|
| 3750 |
}
|
| 3751 |
if (!ids_uma) {
|
| 3752 |
-
d_ids =
|
| 3753 |
-
ids_buf_offset =
|
| 3754 |
GGML_ASSERT(d_ids != nullptr);
|
| 3755 |
}
|
| 3756 |
if (qx_needs_dequant) {
|
|
@@ -3836,10 +3851,10 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
|
|
| 3836 |
const uint64_t ne22 = dst->ne[2];
|
| 3837 |
const uint64_t ne23 = dst->ne[3];
|
| 3838 |
|
| 3839 |
-
|
| 3840 |
-
|
| 3841 |
-
|
| 3842 |
-
|
| 3843 |
|
| 3844 |
vk_buffer d_Qx;
|
| 3845 |
size_t qx_buf_offset = 0;
|
|
@@ -3924,26 +3939,26 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
|
|
| 3924 |
return;
|
| 3925 |
}
|
| 3926 |
|
| 3927 |
-
vk_buffer d_D =
|
| 3928 |
-
const uint64_t d_buf_offset =
|
| 3929 |
GGML_ASSERT(d_D != nullptr);
|
| 3930 |
vk_buffer d_X;
|
| 3931 |
uint64_t x_buf_offset = 0;
|
| 3932 |
vk_buffer d_Y;
|
| 3933 |
uint64_t y_buf_offset = 0;
|
| 3934 |
if(!src0_uma) {
|
| 3935 |
-
d_Qx =
|
| 3936 |
-
qx_buf_offset =
|
| 3937 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3938 |
}
|
| 3939 |
if(!src1_uma) {
|
| 3940 |
-
d_Qy =
|
| 3941 |
-
qy_buf_offset =
|
| 3942 |
GGML_ASSERT(d_Qy != nullptr);
|
| 3943 |
}
|
| 3944 |
if(!ids_uma) {
|
| 3945 |
-
d_ids =
|
| 3946 |
-
ids_buf_offset =
|
| 3947 |
GGML_ASSERT(d_ids != nullptr);
|
| 3948 |
}
|
| 3949 |
if (qx_needs_dequant) {
|
|
@@ -4250,7 +4265,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
|
|
| 4250 |
std::cerr << "), " << ggml_op_name(op) << ", " << (dryrun ? "dryrun" : "") << ")");
|
| 4251 |
GGML_ASSERT(op == GGML_OP_GET_ROWS || (!ggml_is_quantized(src0->type) && (src1 == nullptr || !ggml_is_quantized(src1->type)))); // NOLINT
|
| 4252 |
GGML_ASSERT(ggml_vk_op_supports_incontiguous(op) || ggml_vk_dim01_contiguous(src0)); // NOLINT
|
| 4253 |
-
GGML_ASSERT(dst->
|
| 4254 |
const uint64_t ne00 = src0->ne[0];
|
| 4255 |
const uint64_t ne01 = src0->ne[1];
|
| 4256 |
const uint64_t ne02 = src0->ne[2];
|
|
@@ -4296,10 +4311,10 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
|
|
| 4296 |
|
| 4297 |
const bool op_supports_incontiguous = ggml_vk_op_supports_incontiguous(op);
|
| 4298 |
|
| 4299 |
-
|
| 4300 |
-
|
| 4301 |
-
|
| 4302 |
-
|
| 4303 |
|
| 4304 |
vk_buffer d_X = nullptr;
|
| 4305 |
size_t x_buf_offset = 0;
|
|
@@ -4330,7 +4345,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
|
|
| 4330 |
uint64_t z_sz = use_src2 ? ggml_type_size(src2->type) * ne2 : 0;
|
| 4331 |
uint64_t d_sz = ggml_type_size(dst->type) * ned;
|
| 4332 |
|
| 4333 |
-
vk_buffer d_D =
|
| 4334 |
|
| 4335 |
// Workaround for tiny tensor inputs on ROPE
|
| 4336 |
if (op == GGML_OP_ROPE && use_src1 && y_sz > d_D->size) {
|
|
@@ -4338,21 +4353,21 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
|
|
| 4338 |
}
|
| 4339 |
|
| 4340 |
GGML_ASSERT(d_D != nullptr);
|
| 4341 |
-
uint64_t d_buf_offset = ((
|
| 4342 |
-
GGML_ASSERT(d_buf_offset ==
|
| 4343 |
if(!src0_uma) {
|
| 4344 |
-
d_X =
|
| 4345 |
-
x_buf_offset =
|
| 4346 |
GGML_ASSERT(d_X != nullptr);
|
| 4347 |
}
|
| 4348 |
if (use_src1 && !src1_uma) {
|
| 4349 |
-
d_Y =
|
| 4350 |
-
y_buf_offset =
|
| 4351 |
GGML_ASSERT(d_Y != nullptr);
|
| 4352 |
}
|
| 4353 |
if (use_src2 && !src2_uma) {
|
| 4354 |
-
d_Z =
|
| 4355 |
-
z_buf_offset =
|
| 4356 |
GGML_ASSERT(d_Z != nullptr);
|
| 4357 |
}
|
| 4358 |
|
|
@@ -4531,11 +4546,10 @@ static void ggml_vk_get_rows(ggml_backend_vk_context * ctx, vk_context& subctx,
|
|
| 4531 |
}
|
| 4532 |
|
| 4533 |
static void ggml_vk_acc(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
|
| 4534 |
-
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) dst->extra;
|
| 4535 |
const uint32_t src0_type_size = ggml_type_size(src0->type);
|
| 4536 |
const uint32_t src1_type_size = ggml_type_size(src1->type);
|
| 4537 |
const uint32_t dst_type_size = ggml_type_size(dst->type);
|
| 4538 |
-
const uint32_t d_offset = ((
|
| 4539 |
|
| 4540 |
int nb1 = dst->op_params[0] / 4; // 4 bytes of float32
|
| 4541 |
int nb2 = dst->op_params[1] / 4; // 4 bytes of float32
|
|
@@ -4724,10 +4738,9 @@ static void ggml_vk_repeat(ggml_backend_vk_context * ctx, vk_context& subctx, co
|
|
| 4724 |
}
|
| 4725 |
|
| 4726 |
static void ggml_vk_cpy(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
|
| 4727 |
-
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) dst->extra;
|
| 4728 |
const uint32_t src0_type_size = ggml_type_size(src0->type);
|
| 4729 |
const uint32_t dst_type_size = ggml_type_size(dst->type);
|
| 4730 |
-
const uint32_t d_offset = ((
|
| 4731 |
|
| 4732 |
ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CPY, {
|
| 4733 |
(uint32_t)ggml_nelements(src0),
|
|
@@ -5535,14 +5548,6 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
|
|
| 5535 |
}
|
| 5536 |
#endif
|
| 5537 |
|
| 5538 |
-
static ggml_tensor_extra_gpu * ggml_vk_tensor_create_extra(ggml_tensor * tensor) {
|
| 5539 |
-
VK_LOG_DEBUG("ggml_vk_create_extra(" << tensor << " (" << tensor->name << ", " << ggml_op_name(tensor->op) << "))");
|
| 5540 |
-
ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu;
|
| 5541 |
-
extra->reset();
|
| 5542 |
-
tensor->extra = extra;
|
| 5543 |
-
return extra;
|
| 5544 |
-
}
|
| 5545 |
-
|
| 5546 |
static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
|
| 5547 |
#if defined(GGML_VULKAN_RUN_TESTS)
|
| 5548 |
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_F32);
|
|
@@ -5711,9 +5716,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context* ctx, ggml_tensor* t
|
|
| 5711 |
// Returns true if node has enqueued work into the queue, false otherwise
|
| 5712 |
// If submit is true the current all operations queued so far are being submitted to Vulkan to overlap cmdlist creation and GPU execution.
|
| 5713 |
static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * node, int node_idx, ggml_tensor *node_begin, int node_idx_begin, bool dryrun, bool last_node, bool submit){
|
| 5714 |
-
|
| 5715 |
-
|
| 5716 |
-
if (ggml_is_empty(node) || extra == nullptr) {
|
| 5717 |
return false;
|
| 5718 |
}
|
| 5719 |
|
|
@@ -5965,7 +5968,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
|
|
| 5965 |
}
|
| 5966 |
|
| 5967 |
static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * tensor, int tensor_idx, bool use_fence = true){
|
| 5968 |
-
|
| 5969 |
|
| 5970 |
switch (tensor->op) {
|
| 5971 |
case GGML_OP_ADD:
|
|
@@ -6001,7 +6004,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6001 |
case GGML_OP_TIMESTEP_EMBEDDING:
|
| 6002 |
case GGML_OP_LEAKY_RELU:
|
| 6003 |
case GGML_OP_REPEAT:
|
| 6004 |
-
|
| 6005 |
|
| 6006 |
break;
|
| 6007 |
case GGML_OP_UNARY:
|
|
@@ -6011,7 +6014,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6011 |
case GGML_UNARY_OP_GELU_QUICK:
|
| 6012 |
case GGML_UNARY_OP_RELU:
|
| 6013 |
case GGML_UNARY_OP_TANH:
|
| 6014 |
-
|
| 6015 |
break;
|
| 6016 |
default:
|
| 6017 |
return false;
|
|
@@ -6019,14 +6022,14 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
|
|
| 6019 |
break;
|
| 6020 |
case GGML_OP_MUL_MAT:
|
| 6021 |
case GGML_OP_MUL_MAT_ID:
|
| 6022 |
-
|
| 6023 |
|
| 6024 |
break;
|
| 6025 |
default:
|
| 6026 |
return false;
|
| 6027 |
}
|
| 6028 |
|
| 6029 |
-
if (
|
| 6030 |
return false;
|
| 6031 |
}
|
| 6032 |
|
|
@@ -6167,42 +6170,6 @@ GGML_CALL static void ggml_vk_get_device_description(int device, char * descript
|
|
| 6167 |
|
| 6168 |
// device backend
|
| 6169 |
|
| 6170 |
-
static void * const vk_ptr_base = (void *)(uintptr_t) 0x1000; // NOLINT
|
| 6171 |
-
|
| 6172 |
-
struct ggml_backend_vk_buffer_context {
|
| 6173 |
-
vk_device_ref device;
|
| 6174 |
-
vk_buffer dev_buffer;
|
| 6175 |
-
ggml_tensor_extra_gpu * temp_tensor_extras = nullptr;
|
| 6176 |
-
size_t temp_tensor_extra_index = 0;
|
| 6177 |
-
std::string name;
|
| 6178 |
-
|
| 6179 |
-
ggml_backend_vk_buffer_context(vk_device_ref device, vk_buffer&& dev_buffer, std::string& name) :
|
| 6180 |
-
device(device),
|
| 6181 |
-
dev_buffer(dev_buffer),
|
| 6182 |
-
name(name) {
|
| 6183 |
-
}
|
| 6184 |
-
|
| 6185 |
-
~ggml_backend_vk_buffer_context() {
|
| 6186 |
-
ggml_vk_destroy_buffer(dev_buffer);
|
| 6187 |
-
if (temp_tensor_extras != nullptr) {
|
| 6188 |
-
delete[] temp_tensor_extras;
|
| 6189 |
-
}
|
| 6190 |
-
}
|
| 6191 |
-
|
| 6192 |
-
ggml_tensor_extra_gpu * ggml_vk_alloc_temp_tensor_extra() {
|
| 6193 |
-
if (temp_tensor_extras == nullptr) {
|
| 6194 |
-
temp_tensor_extras = new ggml_tensor_extra_gpu[GGML_VK_MAX_NODES];
|
| 6195 |
-
}
|
| 6196 |
-
|
| 6197 |
-
size_t alloc_index = temp_tensor_extra_index;
|
| 6198 |
-
temp_tensor_extra_index = (temp_tensor_extra_index + 1) % GGML_VK_MAX_NODES;
|
| 6199 |
-
ggml_tensor_extra_gpu * extra = &temp_tensor_extras[alloc_index];
|
| 6200 |
-
extra->reset();
|
| 6201 |
-
|
| 6202 |
-
return extra;
|
| 6203 |
-
}
|
| 6204 |
-
};
|
| 6205 |
-
|
| 6206 |
GGML_CALL static const char * ggml_backend_vk_buffer_get_name(ggml_backend_buffer_t buffer) {
|
| 6207 |
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
| 6208 |
return ctx->name.c_str();
|
|
@@ -6227,51 +6194,37 @@ GGML_CALL static void * ggml_backend_vk_buffer_get_base(ggml_backend_buffer_t bu
|
|
| 6227 |
|
| 6228 |
GGML_CALL static void ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
|
| 6229 |
VK_LOG_DEBUG("ggml_backend_vk_buffer_init_tensor(" << buffer << " (" << buffer->context << "), " << tensor << ")");
|
| 6230 |
-
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
| 6231 |
-
|
| 6232 |
if (tensor->view_src != nullptr) {
|
| 6233 |
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
|
| 6234 |
-
GGML_ASSERT(tensor->view_src->extra != nullptr);
|
| 6235 |
-
tensor->extra = tensor->view_src->extra;
|
| 6236 |
-
} else {
|
| 6237 |
-
ggml_tensor_extra_gpu * extra = ctx->ggml_vk_alloc_temp_tensor_extra();
|
| 6238 |
-
extra->buffer_gpu = ctx->dev_buffer;
|
| 6239 |
-
extra->offset = (uint8_t *) tensor->data - (uint8_t *) vk_ptr_base;
|
| 6240 |
-
tensor->extra = extra;
|
| 6241 |
}
|
| 6242 |
}
|
| 6243 |
|
| 6244 |
GGML_CALL static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
| 6245 |
VK_LOG_DEBUG("ggml_backend_vk_buffer_set_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")");
|
| 6246 |
-
|
|
|
|
| 6247 |
|
| 6248 |
-
|
| 6249 |
-
|
| 6250 |
-
ggml_vk_buffer_write(buf, extra->offset + tensor->view_offs + offset, data, size);
|
| 6251 |
-
|
| 6252 |
-
GGML_UNUSED(buffer);
|
| 6253 |
}
|
| 6254 |
|
| 6255 |
GGML_CALL static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
| 6256 |
VK_LOG_DEBUG("ggml_backend_vk_buffer_get_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")");
|
| 6257 |
-
|
| 6258 |
-
|
| 6259 |
-
vk_buffer buf = extra->buffer_gpu.lock();
|
| 6260 |
|
| 6261 |
-
|
| 6262 |
|
| 6263 |
-
|
| 6264 |
}
|
| 6265 |
|
| 6266 |
GGML_CALL static bool ggml_backend_vk_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) {
|
| 6267 |
if (ggml_backend_buffer_is_vk(src->buffer)) {
|
| 6268 |
-
|
| 6269 |
-
|
| 6270 |
|
| 6271 |
-
vk_buffer src_buf =
|
| 6272 |
-
vk_buffer dst_buf =
|
| 6273 |
|
| 6274 |
-
ggml_vk_buffer_copy(dst_buf,
|
| 6275 |
|
| 6276 |
return true;
|
| 6277 |
}
|
|
@@ -6449,7 +6402,7 @@ GGML_CALL static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, g
|
|
| 6449 |
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
|
| 6450 |
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_get_default_buffer_type(backend) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
|
| 6451 |
|
| 6452 |
-
|
| 6453 |
|
| 6454 |
vk_context transfer_ctx;
|
| 6455 |
|
|
@@ -6462,9 +6415,9 @@ GGML_CALL static void ggml_backend_vk_set_tensor_async(ggml_backend_t backend, g
|
|
| 6462 |
transfer_ctx = ctx->transfer_ctx.lock();
|
| 6463 |
}
|
| 6464 |
|
| 6465 |
-
vk_buffer buf =
|
| 6466 |
|
| 6467 |
-
ggml_vk_buffer_write_async(transfer_ctx, buf,
|
| 6468 |
}
|
| 6469 |
|
| 6470 |
GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
|
@@ -6472,7 +6425,7 @@ GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, c
|
|
| 6472 |
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
|
| 6473 |
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_get_default_buffer_type(backend) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
|
| 6474 |
|
| 6475 |
-
|
| 6476 |
|
| 6477 |
vk_context transfer_ctx;
|
| 6478 |
|
|
@@ -6485,17 +6438,17 @@ GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, c
|
|
| 6485 |
transfer_ctx = ctx->transfer_ctx.lock();
|
| 6486 |
}
|
| 6487 |
|
| 6488 |
-
vk_buffer buf =
|
| 6489 |
|
| 6490 |
-
ggml_vk_buffer_read_async(transfer_ctx, buf,
|
| 6491 |
}
|
| 6492 |
|
| 6493 |
GGML_CALL static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend, const ggml_tensor * src, ggml_tensor * dst) {
|
| 6494 |
VK_LOG_DEBUG("ggml_backend_vk_cpy_tensor_async()");
|
| 6495 |
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
|
| 6496 |
if ((dst->buffer->buft == ggml_backend_vk_get_default_buffer_type(backend) || dst->buffer->buft == ggml_backend_vk_host_buffer_type()) && ggml_backend_buffer_is_vk(src->buffer)) {
|
| 6497 |
-
|
| 6498 |
-
|
| 6499 |
|
| 6500 |
vk_context transfer_ctx;
|
| 6501 |
|
|
@@ -6508,10 +6461,10 @@ GGML_CALL static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend, c
|
|
| 6508 |
transfer_ctx = ctx->transfer_ctx.lock();
|
| 6509 |
}
|
| 6510 |
|
| 6511 |
-
vk_buffer src_buf =
|
| 6512 |
-
vk_buffer dst_buf =
|
| 6513 |
|
| 6514 |
-
ggml_vk_buffer_copy_async(transfer_ctx, dst_buf,
|
| 6515 |
return true;
|
| 6516 |
}
|
| 6517 |
|
|
@@ -6949,10 +6902,10 @@ static void ggml_vk_print_tensor(const ggml_tensor * tensor, const char * name)
|
|
| 6949 |
const size_t tensor_size = ggml_nbytes(tensor);
|
| 6950 |
tensor_data = malloc(tensor_size);
|
| 6951 |
|
| 6952 |
-
|
| 6953 |
|
| 6954 |
-
vk_buffer buffer_gpu =
|
| 6955 |
-
ggml_vk_buffer_read(buffer_gpu,
|
| 6956 |
}
|
| 6957 |
|
| 6958 |
std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
|
|
@@ -7026,9 +6979,9 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) {
|
|
| 7026 |
memcpy(src0_clone->data, src0->data, src0_size);
|
| 7027 |
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 7028 |
} else if (ggml_backend_buffer_is_vk(src0->buffer)) {
|
| 7029 |
-
|
| 7030 |
-
vk_buffer buffer_gpu =
|
| 7031 |
-
uint64_t offset =
|
| 7032 |
if (!ggml_is_contiguous(src0) && ggml_vk_dim01_contiguous(src0)) {
|
| 7033 |
for (int i3 = 0; i3 < src0->ne[3]; i3++) {
|
| 7034 |
for (int i2 = 0; i2 < src0->ne[2]; i2++) {
|
|
@@ -7068,9 +7021,9 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) {
|
|
| 7068 |
memcpy(src1_clone->data, src1->data, src1_size);
|
| 7069 |
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 7070 |
} else if (ggml_backend_buffer_is_vk(src1->buffer)) {
|
| 7071 |
-
|
| 7072 |
-
vk_buffer buffer_gpu =
|
| 7073 |
-
uint64_t offset =
|
| 7074 |
if (!ggml_is_contiguous(src1) && ggml_vk_dim01_contiguous(src1)) {
|
| 7075 |
for (int i3 = 0; i3 < src1->ne[3]; i3++) {
|
| 7076 |
for (int i2 = 0; i2 < src1->ne[2]; i2++) {
|
|
@@ -7110,9 +7063,9 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) {
|
|
| 7110 |
memcpy(src2_clone->data, src2->data, src2_size);
|
| 7111 |
memcpy(src2_clone->nb, src2->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 7112 |
} else if (ggml_backend_buffer_is_vk(src2->buffer)) {
|
| 7113 |
-
|
| 7114 |
-
vk_buffer buffer_gpu =
|
| 7115 |
-
uint64_t offset =
|
| 7116 |
if (!ggml_is_contiguous(src2) && ggml_vk_dim01_contiguous(src2)) {
|
| 7117 |
for (int i3 = 0; i3 < src2->ne[3]; i3++) {
|
| 7118 |
for (int i2 = 0; i2 < src2->ne[2]; i2++) {
|
|
@@ -7167,7 +7120,7 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) {
|
|
| 7167 |
} else if (tensor->op == GGML_OP_PAD) {
|
| 7168 |
tensor_clone = ggml_pad(ggml_ctx, src0_clone, tensor->ne[0] - src0_clone->ne[0], tensor->ne[1] - src0_clone->ne[1], tensor->ne[2] - src0_clone->ne[2], tensor->ne[3] - src0_clone->ne[3]);
|
| 7169 |
} else if (tensor->op == GGML_OP_REPEAT) {
|
| 7170 |
-
tensor_clone = ggml_repeat(ggml_ctx, src0_clone,
|
| 7171 |
} else if (tensor->op == GGML_OP_ADD) {
|
| 7172 |
tensor_clone = ggml_add(ggml_ctx, src0_clone, src1_clone);
|
| 7173 |
} else if (tensor->op == GGML_OP_ACC) {
|
|
@@ -7312,14 +7265,15 @@ static void ggml_vk_check_results_1(ggml_tensor * tensor) {
|
|
| 7312 |
size_t tensor_size = ggml_nbytes(tensor);
|
| 7313 |
tensor_data = malloc(tensor_size);
|
| 7314 |
|
| 7315 |
-
|
| 7316 |
|
| 7317 |
-
vk_buffer buffer_gpu =
|
| 7318 |
-
|
| 7319 |
-
|
|
|
|
| 7320 |
}
|
| 7321 |
|
| 7322 |
-
ggml_vk_buffer_read(buffer_gpu,
|
| 7323 |
}
|
| 7324 |
|
| 7325 |
float first_error_result = -1.0f;
|
|
|
|
| 433 |
typedef std::shared_ptr<vk_context_struct> vk_context;
|
| 434 |
typedef std::weak_ptr<vk_context_struct> vk_context_ref;
|
| 435 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 436 |
struct ggml_vk_garbage_collector {
|
| 437 |
std::vector<vk_semaphore> tl_semaphores;
|
| 438 |
std::vector<vk_semaphore> semaphores;
|
|
|
|
| 543 |
std::vector<vk_context_ref> tensor_ctxs;
|
| 544 |
};
|
| 545 |
|
| 546 |
+
static void * const vk_ptr_base = (void *)(uintptr_t) 0x1000; // NOLINT
|
| 547 |
+
|
| 548 |
+
static uint64_t vk_tensor_offset(const ggml_tensor * tensor) {
|
| 549 |
+
if (tensor->view_src) {
|
| 550 |
+
return (uint8_t *) tensor->view_src->data - (uint8_t *) vk_ptr_base;
|
| 551 |
+
}
|
| 552 |
+
return (uint8_t *) tensor->data - (uint8_t *) vk_ptr_base;
|
| 553 |
+
}
|
| 554 |
+
|
| 555 |
+
struct ggml_backend_vk_buffer_context {
|
| 556 |
+
vk_device_ref device;
|
| 557 |
+
vk_buffer dev_buffer;
|
| 558 |
+
std::string name;
|
| 559 |
+
|
| 560 |
+
ggml_backend_vk_buffer_context(vk_device_ref device, vk_buffer&& dev_buffer, std::string& name) :
|
| 561 |
+
device(device),
|
| 562 |
+
dev_buffer(dev_buffer),
|
| 563 |
+
name(name) {
|
| 564 |
+
}
|
| 565 |
+
|
| 566 |
+
~ggml_backend_vk_buffer_context() {
|
| 567 |
+
ggml_vk_destroy_buffer(dev_buffer);
|
| 568 |
+
}
|
| 569 |
+
};
|
| 570 |
+
|
| 571 |
#ifdef GGML_VULKAN_MEMORY_DEBUG
|
| 572 |
void vk_memory_logger::log_allocation(vk_buffer_ref buf_ref, size_t size) {
|
| 573 |
std::lock_guard<std::mutex> guard(log_mutex);
|
|
|
|
| 3091 |
const uint64_t r2 = ne12 / ne02;
|
| 3092 |
const uint64_t r3 = ne13 / ne03;
|
| 3093 |
|
| 3094 |
+
ggml_backend_vk_buffer_context * dst_buf_ctx = (ggml_backend_vk_buffer_context *)dst->buffer->context;
|
| 3095 |
+
ggml_backend_vk_buffer_context * src0_buf_ctx = (ggml_backend_vk_buffer_context *)src0->buffer->context;
|
| 3096 |
+
ggml_backend_vk_buffer_context * src1_buf_ctx = (ggml_backend_vk_buffer_context *)src1->buffer->context;
|
| 3097 |
|
| 3098 |
vk_buffer d_Qx;
|
| 3099 |
size_t qx_buf_offset = 0;
|
|
|
|
| 3195 |
return;
|
| 3196 |
}
|
| 3197 |
|
| 3198 |
+
vk_buffer d_D = dst_buf_ctx->dev_buffer;
|
| 3199 |
+
const uint64_t d_buf_offset = vk_tensor_offset(dst) + dst->view_offs;
|
| 3200 |
GGML_ASSERT(d_D != nullptr);
|
| 3201 |
GGML_ASSERT(d_D->size >= d_buf_offset + d_sz * ne02 * ne03);
|
| 3202 |
vk_buffer d_X;
|
|
|
|
| 3204 |
vk_buffer d_Y;
|
| 3205 |
uint64_t y_buf_offset = 0;
|
| 3206 |
if (!src0_uma) {
|
| 3207 |
+
d_Qx = src0_buf_ctx->dev_buffer;
|
| 3208 |
+
qx_buf_offset = vk_tensor_offset(src0) + src0->view_offs;
|
| 3209 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3210 |
}
|
| 3211 |
if (!src1_uma) {
|
| 3212 |
+
d_Qy = src1_buf_ctx->dev_buffer;
|
| 3213 |
+
qy_buf_offset = vk_tensor_offset(src1) + src1->view_offs;
|
| 3214 |
GGML_ASSERT(d_Qy != nullptr);
|
| 3215 |
}
|
| 3216 |
if (qx_needs_dequant) {
|
|
|
|
| 3291 |
const uint64_t r2 = ne12 / ne02;
|
| 3292 |
const uint64_t r3 = ne13 / ne03;
|
| 3293 |
|
| 3294 |
+
ggml_backend_vk_buffer_context * dst_buf_ctx = (ggml_backend_vk_buffer_context *)dst->buffer->context;
|
| 3295 |
+
ggml_backend_vk_buffer_context * src0_buf_ctx = (ggml_backend_vk_buffer_context *)src0->buffer->context;
|
| 3296 |
+
ggml_backend_vk_buffer_context * src1_buf_ctx = (ggml_backend_vk_buffer_context *)src1->buffer->context;
|
| 3297 |
|
| 3298 |
vk_buffer d_Qx;
|
| 3299 |
size_t qx_buf_offset = 0;
|
|
|
|
| 3372 |
return;
|
| 3373 |
}
|
| 3374 |
|
| 3375 |
+
vk_buffer d_D = dst_buf_ctx->dev_buffer;
|
| 3376 |
+
const uint64_t d_buf_offset = vk_tensor_offset(dst) + dst->view_offs;
|
| 3377 |
GGML_ASSERT(d_D != nullptr);
|
| 3378 |
vk_buffer d_X;
|
| 3379 |
uint64_t x_buf_offset = 0;
|
| 3380 |
vk_buffer d_Y;
|
| 3381 |
uint64_t y_buf_offset = 0;
|
| 3382 |
if(!src0_uma) {
|
| 3383 |
+
d_Qx = src0_buf_ctx->dev_buffer;
|
| 3384 |
+
qx_buf_offset = vk_tensor_offset(src0) + src0->view_offs;
|
| 3385 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3386 |
}
|
| 3387 |
if(!src1_uma) {
|
| 3388 |
+
d_Qy = src1_buf_ctx->dev_buffer;
|
| 3389 |
+
qy_buf_offset = vk_tensor_offset(src1) + src1->view_offs;
|
| 3390 |
GGML_ASSERT(d_Qy != nullptr);
|
| 3391 |
}
|
| 3392 |
if (qx_needs_dequant) {
|
|
|
|
| 3469 |
|
| 3470 |
GGML_ASSERT(ne11 == 1);
|
| 3471 |
|
| 3472 |
+
ggml_backend_vk_buffer_context * dst_buf_ctx = (ggml_backend_vk_buffer_context *)dst->buffer->context;
|
| 3473 |
+
ggml_backend_vk_buffer_context * src0_buf_ctx = (ggml_backend_vk_buffer_context *)src0->buffer->context;
|
| 3474 |
+
ggml_backend_vk_buffer_context * src1_buf_ctx = (ggml_backend_vk_buffer_context *)src1->buffer->context;
|
| 3475 |
|
| 3476 |
vk_buffer d_Qy;
|
| 3477 |
size_t qy_buf_offset = 0;
|
|
|
|
| 3497 |
return;
|
| 3498 |
}
|
| 3499 |
|
| 3500 |
+
vk_buffer d_D = dst_buf_ctx->dev_buffer;
|
| 3501 |
+
const uint64_t d_buf_offset = vk_tensor_offset(dst) + dst->view_offs;
|
| 3502 |
GGML_ASSERT(d_D != nullptr);
|
| 3503 |
+
vk_buffer d_Qx = src0_buf_ctx->dev_buffer;
|
| 3504 |
+
const uint64_t qx_buf_offset = vk_tensor_offset(src0) + src0->view_offs;
|
| 3505 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3506 |
if (!src1_uma) {
|
| 3507 |
+
d_Qy = src1_buf_ctx->dev_buffer;
|
| 3508 |
+
qy_buf_offset = vk_tensor_offset(src1) + src1->view_offs;
|
| 3509 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3510 |
}
|
| 3511 |
|
|
|
|
| 3547 |
|
| 3548 |
GGML_ASSERT(ne11 == 1);
|
| 3549 |
|
| 3550 |
+
ggml_backend_vk_buffer_context * dst_buf_ctx = (ggml_backend_vk_buffer_context *)dst->buffer->context;
|
| 3551 |
+
ggml_backend_vk_buffer_context * src0_buf_ctx = (ggml_backend_vk_buffer_context *)src0->buffer->context;
|
| 3552 |
+
ggml_backend_vk_buffer_context * src1_buf_ctx = (ggml_backend_vk_buffer_context *)src1->buffer->context;
|
| 3553 |
|
| 3554 |
vk_buffer d_Qy = nullptr;
|
| 3555 |
size_t qy_buf_offset = 0;
|
|
|
|
| 3576 |
return;
|
| 3577 |
}
|
| 3578 |
|
| 3579 |
+
vk_buffer d_D = dst_buf_ctx->dev_buffer;
|
| 3580 |
+
const uint64_t d_buf_offset = vk_tensor_offset(dst) + dst->view_offs;
|
| 3581 |
GGML_ASSERT(d_D != nullptr);
|
| 3582 |
+
vk_buffer d_Qx = src0_buf_ctx->dev_buffer;
|
| 3583 |
+
const uint64_t qx_buf_offset = vk_tensor_offset(src0) + src0->view_offs;
|
| 3584 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3585 |
if (!src1_uma) {
|
| 3586 |
+
d_Qy = src1_buf_ctx->dev_buffer;
|
| 3587 |
+
qy_buf_offset = vk_tensor_offset(src1) + src1->view_offs;
|
| 3588 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3589 |
}
|
| 3590 |
|
|
|
|
| 3646 |
|
| 3647 |
const uint64_t n_as = ne02;
|
| 3648 |
|
| 3649 |
+
ggml_backend_vk_buffer_context * dst_buf_ctx = (ggml_backend_vk_buffer_context *)dst->buffer->context;
|
| 3650 |
+
ggml_backend_vk_buffer_context * src0_buf_ctx = (ggml_backend_vk_buffer_context *)src0->buffer->context;
|
| 3651 |
+
ggml_backend_vk_buffer_context * src1_buf_ctx = (ggml_backend_vk_buffer_context *)src1->buffer->context;
|
| 3652 |
+
ggml_backend_vk_buffer_context * ids_buf_ctx = (ggml_backend_vk_buffer_context *)ids->buffer->context;
|
| 3653 |
|
| 3654 |
vk_buffer d_Qx;
|
| 3655 |
size_t qx_buf_offset = 0;
|
|
|
|
| 3746 |
return;
|
| 3747 |
}
|
| 3748 |
|
| 3749 |
+
vk_buffer d_D = dst_buf_ctx->dev_buffer;
|
| 3750 |
+
const uint64_t d_buf_offset = vk_tensor_offset(dst) + dst->view_offs;
|
| 3751 |
GGML_ASSERT(d_D != nullptr);
|
| 3752 |
vk_buffer d_X;
|
| 3753 |
uint64_t x_buf_offset = 0;
|
| 3754 |
vk_buffer d_Y;
|
| 3755 |
uint64_t y_buf_offset = 0;
|
| 3756 |
if (!src0_uma) {
|
| 3757 |
+
d_Qx = src0_buf_ctx->dev_buffer;
|
| 3758 |
+
qx_buf_offset = vk_tensor_offset(src0) + src0->view_offs;
|
| 3759 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3760 |
}
|
| 3761 |
if (!src1_uma) {
|
| 3762 |
+
d_Qy = src1_buf_ctx->dev_buffer;
|
| 3763 |
+
qy_buf_offset = vk_tensor_offset(src1) + src1->view_offs;
|
| 3764 |
GGML_ASSERT(d_Qy != nullptr);
|
| 3765 |
}
|
| 3766 |
if (!ids_uma) {
|
| 3767 |
+
d_ids = ids_buf_ctx->dev_buffer;
|
| 3768 |
+
ids_buf_offset = vk_tensor_offset(ids) + ids->view_offs;
|
| 3769 |
GGML_ASSERT(d_ids != nullptr);
|
| 3770 |
}
|
| 3771 |
if (qx_needs_dequant) {
|
|
|
|
| 3851 |
const uint64_t ne22 = dst->ne[2];
|
| 3852 |
const uint64_t ne23 = dst->ne[3];
|
| 3853 |
|
| 3854 |
+
ggml_backend_vk_buffer_context * dst_buf_ctx = (ggml_backend_vk_buffer_context *)dst->buffer->context;
|
| 3855 |
+
ggml_backend_vk_buffer_context * src0_buf_ctx = (ggml_backend_vk_buffer_context *)src0->buffer->context;
|
| 3856 |
+
ggml_backend_vk_buffer_context * src1_buf_ctx = (ggml_backend_vk_buffer_context *)src1->buffer->context;
|
| 3857 |
+
ggml_backend_vk_buffer_context * ids_buf_ctx = (ggml_backend_vk_buffer_context *)ids->buffer->context;
|
| 3858 |
|
| 3859 |
vk_buffer d_Qx;
|
| 3860 |
size_t qx_buf_offset = 0;
|
|
|
|
| 3939 |
return;
|
| 3940 |
}
|
| 3941 |
|
| 3942 |
+
vk_buffer d_D = dst_buf_ctx->dev_buffer;
|
| 3943 |
+
const uint64_t d_buf_offset = vk_tensor_offset(dst) + dst->view_offs;
|
| 3944 |
GGML_ASSERT(d_D != nullptr);
|
| 3945 |
vk_buffer d_X;
|
| 3946 |
uint64_t x_buf_offset = 0;
|
| 3947 |
vk_buffer d_Y;
|
| 3948 |
uint64_t y_buf_offset = 0;
|
| 3949 |
if(!src0_uma) {
|
| 3950 |
+
d_Qx = src0_buf_ctx->dev_buffer;
|
| 3951 |
+
qx_buf_offset = vk_tensor_offset(src0) + src0->view_offs;
|
| 3952 |
GGML_ASSERT(d_Qx != nullptr);
|
| 3953 |
}
|
| 3954 |
if(!src1_uma) {
|
| 3955 |
+
d_Qy = src1_buf_ctx->dev_buffer;
|
| 3956 |
+
qy_buf_offset = vk_tensor_offset(src1) + src1->view_offs;
|
| 3957 |
GGML_ASSERT(d_Qy != nullptr);
|
| 3958 |
}
|
| 3959 |
if(!ids_uma) {
|
| 3960 |
+
d_ids = ids_buf_ctx->dev_buffer;
|
| 3961 |
+
ids_buf_offset = vk_tensor_offset(ids) + ids->view_offs;
|
| 3962 |
GGML_ASSERT(d_ids != nullptr);
|
| 3963 |
}
|
| 3964 |
if (qx_needs_dequant) {
|
|
|
|
| 4265 |
std::cerr << "), " << ggml_op_name(op) << ", " << (dryrun ? "dryrun" : "") << ")");
|
| 4266 |
GGML_ASSERT(op == GGML_OP_GET_ROWS || (!ggml_is_quantized(src0->type) && (src1 == nullptr || !ggml_is_quantized(src1->type)))); // NOLINT
|
| 4267 |
GGML_ASSERT(ggml_vk_op_supports_incontiguous(op) || ggml_vk_dim01_contiguous(src0)); // NOLINT
|
| 4268 |
+
GGML_ASSERT(dst->buffer != nullptr);
|
| 4269 |
const uint64_t ne00 = src0->ne[0];
|
| 4270 |
const uint64_t ne01 = src0->ne[1];
|
| 4271 |
const uint64_t ne02 = src0->ne[2];
|
|
|
|
| 4311 |
|
| 4312 |
const bool op_supports_incontiguous = ggml_vk_op_supports_incontiguous(op);
|
| 4313 |
|
| 4314 |
+
ggml_backend_vk_buffer_context * dst_buf_ctx = (ggml_backend_vk_buffer_context *)dst->buffer->context;
|
| 4315 |
+
ggml_backend_vk_buffer_context * src0_buf_ctx = (ggml_backend_vk_buffer_context *)src0->buffer->context;
|
| 4316 |
+
ggml_backend_vk_buffer_context * src1_buf_ctx = use_src1 ? (ggml_backend_vk_buffer_context *)src1->buffer->context : nullptr;
|
| 4317 |
+
ggml_backend_vk_buffer_context * src2_buf_ctx = use_src2 ? (ggml_backend_vk_buffer_context *)src2->buffer->context : nullptr;
|
| 4318 |
|
| 4319 |
vk_buffer d_X = nullptr;
|
| 4320 |
size_t x_buf_offset = 0;
|
|
|
|
| 4345 |
uint64_t z_sz = use_src2 ? ggml_type_size(src2->type) * ne2 : 0;
|
| 4346 |
uint64_t d_sz = ggml_type_size(dst->type) * ned;
|
| 4347 |
|
| 4348 |
+
vk_buffer d_D = dst_buf_ctx->dev_buffer;
|
| 4349 |
|
| 4350 |
// Workaround for tiny tensor inputs on ROPE
|
| 4351 |
if (op == GGML_OP_ROPE && use_src1 && y_sz > d_D->size) {
|
|
|
|
| 4353 |
}
|
| 4354 |
|
| 4355 |
GGML_ASSERT(d_D != nullptr);
|
| 4356 |
+
uint64_t d_buf_offset = ((vk_tensor_offset(dst) + dst->view_offs) / ctx->device->properties.limits.minStorageBufferOffsetAlignment) * ctx->device->properties.limits.minStorageBufferOffsetAlignment;
|
| 4357 |
+
GGML_ASSERT(d_buf_offset == vk_tensor_offset(dst) || op == GGML_OP_CPY); // NOLINT
|
| 4358 |
if(!src0_uma) {
|
| 4359 |
+
d_X = src0_buf_ctx->dev_buffer;
|
| 4360 |
+
x_buf_offset = vk_tensor_offset(src0) + src0->view_offs;
|
| 4361 |
GGML_ASSERT(d_X != nullptr);
|
| 4362 |
}
|
| 4363 |
if (use_src1 && !src1_uma) {
|
| 4364 |
+
d_Y = src1_buf_ctx->dev_buffer;
|
| 4365 |
+
y_buf_offset = vk_tensor_offset(src1) + src1->view_offs;
|
| 4366 |
GGML_ASSERT(d_Y != nullptr);
|
| 4367 |
}
|
| 4368 |
if (use_src2 && !src2_uma) {
|
| 4369 |
+
d_Z = src2_buf_ctx->dev_buffer;
|
| 4370 |
+
z_buf_offset = vk_tensor_offset(src2) + src2->view_offs;
|
| 4371 |
GGML_ASSERT(d_Z != nullptr);
|
| 4372 |
}
|
| 4373 |
|
|
|
|
| 4546 |
}
|
| 4547 |
|
| 4548 |
static void ggml_vk_acc(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
|
|
|
|
| 4549 |
const uint32_t src0_type_size = ggml_type_size(src0->type);
|
| 4550 |
const uint32_t src1_type_size = ggml_type_size(src1->type);
|
| 4551 |
const uint32_t dst_type_size = ggml_type_size(dst->type);
|
| 4552 |
+
const uint32_t d_offset = ((vk_tensor_offset(dst) + dst->view_offs) % ctx->device->properties.limits.minStorageBufferOffsetAlignment) / dst_type_size;
|
| 4553 |
|
| 4554 |
int nb1 = dst->op_params[0] / 4; // 4 bytes of float32
|
| 4555 |
int nb2 = dst->op_params[1] / 4; // 4 bytes of float32
|
|
|
|
| 4738 |
}
|
| 4739 |
|
| 4740 |
static void ggml_vk_cpy(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
|
|
|
|
| 4741 |
const uint32_t src0_type_size = ggml_type_size(src0->type);
|
| 4742 |
const uint32_t dst_type_size = ggml_type_size(dst->type);
|
| 4743 |
+
const uint32_t d_offset = ((vk_tensor_offset(dst) + dst->view_offs) % ctx->device->properties.limits.minStorageBufferOffsetAlignment) / dst_type_size;
|
| 4744 |
|
| 4745 |
ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CPY, {
|
| 4746 |
(uint32_t)ggml_nelements(src0),
|
|
|
|
| 5548 |
}
|
| 5549 |
#endif
|
| 5550 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5551 |
static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
|
| 5552 |
#if defined(GGML_VULKAN_RUN_TESTS)
|
| 5553 |
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_F32);
|
|
|
|
| 5716 |
// Returns true if node has enqueued work into the queue, false otherwise
|
| 5717 |
// If submit is true the current all operations queued so far are being submitted to Vulkan to overlap cmdlist creation and GPU execution.
|
| 5718 |
static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * node, int node_idx, ggml_tensor *node_begin, int node_idx_begin, bool dryrun, bool last_node, bool submit){
|
| 5719 |
+
if (ggml_is_empty(node) || !node->buffer) {
|
|
|
|
|
|
|
| 5720 |
return false;
|
| 5721 |
}
|
| 5722 |
|
|
|
|
| 5968 |
}
|
| 5969 |
|
| 5970 |
static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * tensor, int tensor_idx, bool use_fence = true){
|
| 5971 |
+
ggml_backend_buffer * buf = nullptr;
|
| 5972 |
|
| 5973 |
switch (tensor->op) {
|
| 5974 |
case GGML_OP_ADD:
|
|
|
|
| 6004 |
case GGML_OP_TIMESTEP_EMBEDDING:
|
| 6005 |
case GGML_OP_LEAKY_RELU:
|
| 6006 |
case GGML_OP_REPEAT:
|
| 6007 |
+
buf = tensor->buffer;
|
| 6008 |
|
| 6009 |
break;
|
| 6010 |
case GGML_OP_UNARY:
|
|
|
|
| 6014 |
case GGML_UNARY_OP_GELU_QUICK:
|
| 6015 |
case GGML_UNARY_OP_RELU:
|
| 6016 |
case GGML_UNARY_OP_TANH:
|
| 6017 |
+
buf = tensor->buffer;
|
| 6018 |
break;
|
| 6019 |
default:
|
| 6020 |
return false;
|
|
|
|
| 6022 |
break;
|
| 6023 |
case GGML_OP_MUL_MAT:
|
| 6024 |
case GGML_OP_MUL_MAT_ID:
|
| 6025 |
+
buf = tensor->buffer;
|
| 6026 |
|
| 6027 |
break;
|
| 6028 |
default:
|
| 6029 |
return false;
|
| 6030 |
}
|
| 6031 |
|
| 6032 |
+
if (buf == nullptr) {
|
| 6033 |
return false;
|
| 6034 |
}
|
| 6035 |
|
|
|
|
| 6170 |
|
| 6171 |
// device backend
|
| 6172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6173 |
GGML_CALL static const char * ggml_backend_vk_buffer_get_name(ggml_backend_buffer_t buffer) {
|
| 6174 |
ggml_backend_vk_buffer_context * ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
| 6175 |
return ctx->name.c_str();
|
|
|
|
| 6194 |
|
| 6195 |
GGML_CALL static void ggml_backend_vk_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
|
| 6196 |
VK_LOG_DEBUG("ggml_backend_vk_buffer_init_tensor(" << buffer << " (" << buffer->context << "), " << tensor << ")");
|
|
|
|
|
|
|
| 6197 |
if (tensor->view_src != nullptr) {
|
| 6198 |
GGML_ASSERT(tensor->view_src->buffer->buft == buffer->buft);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6199 |
}
|
| 6200 |
}
|
| 6201 |
|
| 6202 |
GGML_CALL static void ggml_backend_vk_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
| 6203 |
VK_LOG_DEBUG("ggml_backend_vk_buffer_set_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")");
|
| 6204 |
+
ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
| 6205 |
+
vk_buffer buf = buf_ctx->dev_buffer;
|
| 6206 |
|
| 6207 |
+
ggml_vk_buffer_write(buf, vk_tensor_offset(tensor) + tensor->view_offs + offset, data, size);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6208 |
}
|
| 6209 |
|
| 6210 |
GGML_CALL static void ggml_backend_vk_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
| 6211 |
VK_LOG_DEBUG("ggml_backend_vk_buffer_get_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")");
|
| 6212 |
+
ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)buffer->context;
|
|
|
|
|
|
|
| 6213 |
|
| 6214 |
+
vk_buffer buf = buf_ctx->dev_buffer;
|
| 6215 |
|
| 6216 |
+
ggml_vk_buffer_read(buf, vk_tensor_offset(tensor) + tensor->view_offs + offset, data, size);
|
| 6217 |
}
|
| 6218 |
|
| 6219 |
GGML_CALL static bool ggml_backend_vk_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) {
|
| 6220 |
if (ggml_backend_buffer_is_vk(src->buffer)) {
|
| 6221 |
+
ggml_backend_vk_buffer_context * src_buf_ctx = (ggml_backend_vk_buffer_context *)src->buffer->context;
|
| 6222 |
+
ggml_backend_vk_buffer_context * dst_buf_ctx = (ggml_backend_vk_buffer_context *)dst->buffer->context;
|
| 6223 |
|
| 6224 |
+
vk_buffer src_buf = src_buf_ctx->dev_buffer;
|
| 6225 |
+
vk_buffer dst_buf = dst_buf_ctx->dev_buffer;
|
| 6226 |
|
| 6227 |
+
ggml_vk_buffer_copy(dst_buf, vk_tensor_offset(dst) + dst->view_offs, src_buf, vk_tensor_offset(src) + src->view_offs, ggml_nbytes(src));
|
| 6228 |
|
| 6229 |
return true;
|
| 6230 |
}
|
|
|
|
| 6402 |
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
|
| 6403 |
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_get_default_buffer_type(backend) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
|
| 6404 |
|
| 6405 |
+
ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)tensor->buffer->context;
|
| 6406 |
|
| 6407 |
vk_context transfer_ctx;
|
| 6408 |
|
|
|
|
| 6415 |
transfer_ctx = ctx->transfer_ctx.lock();
|
| 6416 |
}
|
| 6417 |
|
| 6418 |
+
vk_buffer buf = buf_ctx->dev_buffer;
|
| 6419 |
|
| 6420 |
+
ggml_vk_buffer_write_async(transfer_ctx, buf, vk_tensor_offset(tensor) + tensor->view_offs + offset, data, size);
|
| 6421 |
}
|
| 6422 |
|
| 6423 |
GGML_CALL static void ggml_backend_vk_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
|
|
|
| 6425 |
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
|
| 6426 |
GGML_ASSERT((tensor->buffer->buft == ggml_backend_vk_get_default_buffer_type(backend) || tensor->buffer->buft == ggml_backend_vk_host_buffer_type()) && "unsupported buffer type");
|
| 6427 |
|
| 6428 |
+
ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)tensor->buffer->context;
|
| 6429 |
|
| 6430 |
vk_context transfer_ctx;
|
| 6431 |
|
|
|
|
| 6438 |
transfer_ctx = ctx->transfer_ctx.lock();
|
| 6439 |
}
|
| 6440 |
|
| 6441 |
+
vk_buffer buf = buf_ctx->dev_buffer;
|
| 6442 |
|
| 6443 |
+
ggml_vk_buffer_read_async(transfer_ctx, buf, vk_tensor_offset(tensor) + tensor->view_offs + offset, data, size);
|
| 6444 |
}
|
| 6445 |
|
| 6446 |
GGML_CALL static bool ggml_backend_vk_cpy_tensor_async(ggml_backend_t backend, const ggml_tensor * src, ggml_tensor * dst) {
|
| 6447 |
VK_LOG_DEBUG("ggml_backend_vk_cpy_tensor_async()");
|
| 6448 |
ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context;
|
| 6449 |
if ((dst->buffer->buft == ggml_backend_vk_get_default_buffer_type(backend) || dst->buffer->buft == ggml_backend_vk_host_buffer_type()) && ggml_backend_buffer_is_vk(src->buffer)) {
|
| 6450 |
+
ggml_backend_vk_buffer_context * src_buf_ctx = (ggml_backend_vk_buffer_context *)src->buffer->context;
|
| 6451 |
+
ggml_backend_vk_buffer_context * dst_buf_ctx = (ggml_backend_vk_buffer_context *)dst->buffer->context;
|
| 6452 |
|
| 6453 |
vk_context transfer_ctx;
|
| 6454 |
|
|
|
|
| 6461 |
transfer_ctx = ctx->transfer_ctx.lock();
|
| 6462 |
}
|
| 6463 |
|
| 6464 |
+
vk_buffer src_buf = src_buf_ctx->dev_buffer;
|
| 6465 |
+
vk_buffer dst_buf = dst_buf_ctx->dev_buffer;
|
| 6466 |
|
| 6467 |
+
ggml_vk_buffer_copy_async(transfer_ctx, dst_buf, vk_tensor_offset(dst) + dst->view_offs, src_buf, vk_tensor_offset(src) + src->view_offs, ggml_nbytes(src));
|
| 6468 |
return true;
|
| 6469 |
}
|
| 6470 |
|
|
|
|
| 6902 |
const size_t tensor_size = ggml_nbytes(tensor);
|
| 6903 |
tensor_data = malloc(tensor_size);
|
| 6904 |
|
| 6905 |
+
ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)tensor->buffer->context;
|
| 6906 |
|
| 6907 |
+
vk_buffer buffer_gpu = buf_ctx->dev_buffer;
|
| 6908 |
+
ggml_vk_buffer_read(buffer_gpu, vk_tensor_offset(tensor) + tensor->view_offs, tensor_data, tensor_size);
|
| 6909 |
}
|
| 6910 |
|
| 6911 |
std::cerr << "TENSOR CHECK " << name << " (" << tensor->name << "): " << ggml_op_name(tensor->op) << std::endl;
|
|
|
|
| 6979 |
memcpy(src0_clone->data, src0->data, src0_size);
|
| 6980 |
memcpy(src0_clone->nb, src0->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 6981 |
} else if (ggml_backend_buffer_is_vk(src0->buffer)) {
|
| 6982 |
+
ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)src0->buffer->context;
|
| 6983 |
+
vk_buffer& buffer_gpu = buf_ctx->dev_buffer;
|
| 6984 |
+
uint64_t offset = vk_tensor_offset(src0) + src0->view_offs;
|
| 6985 |
if (!ggml_is_contiguous(src0) && ggml_vk_dim01_contiguous(src0)) {
|
| 6986 |
for (int i3 = 0; i3 < src0->ne[3]; i3++) {
|
| 6987 |
for (int i2 = 0; i2 < src0->ne[2]; i2++) {
|
|
|
|
| 7021 |
memcpy(src1_clone->data, src1->data, src1_size);
|
| 7022 |
memcpy(src1_clone->nb, src1->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 7023 |
} else if (ggml_backend_buffer_is_vk(src1->buffer)) {
|
| 7024 |
+
ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)src1->buffer->context;
|
| 7025 |
+
vk_buffer& buffer_gpu = buf_ctx->dev_buffer;
|
| 7026 |
+
uint64_t offset = vk_tensor_offset(src1) + src1->view_offs;
|
| 7027 |
if (!ggml_is_contiguous(src1) && ggml_vk_dim01_contiguous(src1)) {
|
| 7028 |
for (int i3 = 0; i3 < src1->ne[3]; i3++) {
|
| 7029 |
for (int i2 = 0; i2 < src1->ne[2]; i2++) {
|
|
|
|
| 7063 |
memcpy(src2_clone->data, src2->data, src2_size);
|
| 7064 |
memcpy(src2_clone->nb, src2->nb, sizeof(size_t) * GGML_MAX_DIMS);
|
| 7065 |
} else if (ggml_backend_buffer_is_vk(src2->buffer)) {
|
| 7066 |
+
ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)src2->buffer->context;
|
| 7067 |
+
vk_buffer& buffer_gpu = buf_ctx->dev_buffer;
|
| 7068 |
+
uint64_t offset = vk_tensor_offset(src2) + src2->view_offs;
|
| 7069 |
if (!ggml_is_contiguous(src2) && ggml_vk_dim01_contiguous(src2)) {
|
| 7070 |
for (int i3 = 0; i3 < src2->ne[3]; i3++) {
|
| 7071 |
for (int i2 = 0; i2 < src2->ne[2]; i2++) {
|
|
|
|
| 7120 |
} else if (tensor->op == GGML_OP_PAD) {
|
| 7121 |
tensor_clone = ggml_pad(ggml_ctx, src0_clone, tensor->ne[0] - src0_clone->ne[0], tensor->ne[1] - src0_clone->ne[1], tensor->ne[2] - src0_clone->ne[2], tensor->ne[3] - src0_clone->ne[3]);
|
| 7122 |
} else if (tensor->op == GGML_OP_REPEAT) {
|
| 7123 |
+
tensor_clone = ggml_repeat(ggml_ctx, src0_clone, tensor);
|
| 7124 |
} else if (tensor->op == GGML_OP_ADD) {
|
| 7125 |
tensor_clone = ggml_add(ggml_ctx, src0_clone, src1_clone);
|
| 7126 |
} else if (tensor->op == GGML_OP_ACC) {
|
|
|
|
| 7265 |
size_t tensor_size = ggml_nbytes(tensor);
|
| 7266 |
tensor_data = malloc(tensor_size);
|
| 7267 |
|
| 7268 |
+
ggml_backend_vk_buffer_context * buf_ctx = (ggml_backend_vk_buffer_context *)tensor->buffer->context;
|
| 7269 |
|
| 7270 |
+
vk_buffer& buffer_gpu = buf_ctx->dev_buffer;
|
| 7271 |
+
uint64_t offset = vk_tensor_offset(tensor) + tensor->view_offs;
|
| 7272 |
+
if (offset + tensor_size >= buffer_gpu->size) {
|
| 7273 |
+
tensor_size = buffer_gpu->size - offset;
|
| 7274 |
}
|
| 7275 |
|
| 7276 |
+
ggml_vk_buffer_read(buffer_gpu, offset, tensor_data, tensor_size);
|
| 7277 |
}
|
| 7278 |
|
| 7279 |
float first_error_result = -1.0f;
|