Spaces:
Running
Running
ggml : barrier refactor + static functions
Browse files
ggml.c
CHANGED
|
@@ -1217,6 +1217,24 @@ struct ggml_state {
|
|
| 1217 |
static struct ggml_state g_state;
|
| 1218 |
static atomic_int g_state_barrier = 0;
|
| 1219 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1220 |
////////////////////////////////////////////////////////////////////////////////
|
| 1221 |
|
| 1222 |
void ggml_print_object(const struct ggml_object * obj) {
|
|
@@ -1346,32 +1364,45 @@ int ggml_up64(int n) {
|
|
| 1346 |
|
| 1347 |
struct ggml_context * ggml_init(struct ggml_init_params params) {
|
| 1348 |
// make this function thread safe
|
| 1349 |
-
|
| 1350 |
-
int processing = atomic_fetch_add(&g_state_barrier, 1);
|
| 1351 |
-
while (processing > 0) {
|
| 1352 |
-
// wait for other threads to finish
|
| 1353 |
-
atomic_fetch_sub(&g_state_barrier, 1);
|
| 1354 |
-
sched_yield();
|
| 1355 |
-
processing = atomic_fetch_add(&g_state_barrier, 1);
|
| 1356 |
-
}
|
| 1357 |
-
}
|
| 1358 |
|
| 1359 |
static bool is_first_call = true;
|
|
|
|
| 1360 |
if (is_first_call) {
|
| 1361 |
-
|
| 1362 |
-
|
| 1363 |
-
|
| 1364 |
-
|
| 1365 |
-
|
| 1366 |
-
|
| 1367 |
-
|
| 1368 |
-
|
| 1369 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1370 |
}
|
| 1371 |
|
| 1372 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1373 |
|
| 1374 |
-
|
|
|
|
| 1375 |
|
| 1376 |
is_first_call = false;
|
| 1377 |
}
|
|
@@ -1379,14 +1410,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
| 1379 |
// find non-used context in g_state
|
| 1380 |
struct ggml_context * ctx = NULL;
|
| 1381 |
|
| 1382 |
-
static bool first_time = true;
|
| 1383 |
-
if (first_time) {
|
| 1384 |
-
for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
|
| 1385 |
-
g_state.contexts[i].used = false;
|
| 1386 |
-
}
|
| 1387 |
-
first_time = false;
|
| 1388 |
-
}
|
| 1389 |
-
|
| 1390 |
for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
|
| 1391 |
if (!g_state.contexts[i].used) {
|
| 1392 |
g_state.contexts[i].used = true;
|
|
@@ -1400,7 +1423,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
| 1400 |
if (ctx == NULL) {
|
| 1401 |
GGML_PRINT_DEBUG("%s: no unused context found\n", __func__);
|
| 1402 |
|
| 1403 |
-
|
| 1404 |
|
| 1405 |
return NULL;
|
| 1406 |
}
|
|
@@ -1418,22 +1441,16 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
| 1418 |
|
| 1419 |
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
|
| 1420 |
|
| 1421 |
-
|
| 1422 |
|
| 1423 |
return ctx;
|
| 1424 |
}
|
| 1425 |
|
| 1426 |
void ggml_free(struct ggml_context * ctx) {
|
| 1427 |
// make this function thread safe
|
| 1428 |
-
|
| 1429 |
-
|
| 1430 |
-
|
| 1431 |
-
// wait for other threads to finish
|
| 1432 |
-
atomic_fetch_sub(&g_state_barrier, 1);
|
| 1433 |
-
sched_yield();
|
| 1434 |
-
processing = atomic_fetch_add(&g_state_barrier, 1);
|
| 1435 |
-
}
|
| 1436 |
-
}
|
| 1437 |
|
| 1438 |
for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
|
| 1439 |
if (&g_state.contexts[i].context == ctx) {
|
|
@@ -1446,15 +1463,16 @@ void ggml_free(struct ggml_context * ctx) {
|
|
| 1446 |
free(ctx->mem_buffer);
|
| 1447 |
}
|
| 1448 |
|
| 1449 |
-
|
| 1450 |
-
|
| 1451 |
-
return;
|
| 1452 |
}
|
| 1453 |
}
|
| 1454 |
|
| 1455 |
-
|
|
|
|
|
|
|
| 1456 |
|
| 1457 |
-
|
| 1458 |
}
|
| 1459 |
|
| 1460 |
size_t ggml_used_mem(const struct ggml_context * ctx) {
|
|
@@ -3035,7 +3053,7 @@ void ggml_set_param(
|
|
| 3035 |
|
| 3036 |
// ggml_compute_forward_dup
|
| 3037 |
|
| 3038 |
-
void ggml_compute_forward_dup_f16(
|
| 3039 |
const struct ggml_compute_params * params,
|
| 3040 |
const struct ggml_tensor * src0,
|
| 3041 |
struct ggml_tensor * dst) {
|
|
@@ -3139,7 +3157,7 @@ void ggml_compute_forward_dup_f16(
|
|
| 3139 |
}
|
| 3140 |
}
|
| 3141 |
|
| 3142 |
-
void ggml_compute_forward_dup_f32(
|
| 3143 |
const struct ggml_compute_params * params,
|
| 3144 |
const struct ggml_tensor * src0,
|
| 3145 |
struct ggml_tensor * dst) {
|
|
@@ -3243,7 +3261,7 @@ void ggml_compute_forward_dup_f32(
|
|
| 3243 |
}
|
| 3244 |
}
|
| 3245 |
|
| 3246 |
-
void ggml_compute_forward_dup(
|
| 3247 |
const struct ggml_compute_params * params,
|
| 3248 |
const struct ggml_tensor * src0,
|
| 3249 |
struct ggml_tensor * dst) {
|
|
@@ -3268,7 +3286,7 @@ void ggml_compute_forward_dup(
|
|
| 3268 |
|
| 3269 |
// ggml_compute_forward_add
|
| 3270 |
|
| 3271 |
-
void ggml_compute_forward_add_f32(
|
| 3272 |
const struct ggml_compute_params * params,
|
| 3273 |
const struct ggml_tensor * src0,
|
| 3274 |
const struct ggml_tensor * src1,
|
|
@@ -3321,7 +3339,7 @@ void ggml_compute_forward_add_f32(
|
|
| 3321 |
}
|
| 3322 |
}
|
| 3323 |
|
| 3324 |
-
void ggml_compute_forward_add(
|
| 3325 |
const struct ggml_compute_params * params,
|
| 3326 |
const struct ggml_tensor * src0,
|
| 3327 |
const struct ggml_tensor * src1,
|
|
@@ -3344,7 +3362,7 @@ void ggml_compute_forward_add(
|
|
| 3344 |
|
| 3345 |
// ggml_compute_forward_sub
|
| 3346 |
|
| 3347 |
-
void ggml_compute_forward_sub_f32(
|
| 3348 |
const struct ggml_compute_params * params,
|
| 3349 |
const struct ggml_tensor * src0,
|
| 3350 |
const struct ggml_tensor * src1,
|
|
@@ -3371,7 +3389,7 @@ void ggml_compute_forward_sub_f32(
|
|
| 3371 |
}
|
| 3372 |
}
|
| 3373 |
|
| 3374 |
-
void ggml_compute_forward_sub(
|
| 3375 |
const struct ggml_compute_params * params,
|
| 3376 |
const struct ggml_tensor * src0,
|
| 3377 |
const struct ggml_tensor * src1,
|
|
@@ -3394,7 +3412,7 @@ void ggml_compute_forward_sub(
|
|
| 3394 |
|
| 3395 |
// ggml_compute_forward_mul
|
| 3396 |
|
| 3397 |
-
void ggml_compute_forward_mul_f32(
|
| 3398 |
const struct ggml_compute_params * params,
|
| 3399 |
const struct ggml_tensor * src0,
|
| 3400 |
const struct ggml_tensor * src1,
|
|
@@ -3421,7 +3439,7 @@ void ggml_compute_forward_mul_f32(
|
|
| 3421 |
}
|
| 3422 |
}
|
| 3423 |
|
| 3424 |
-
void ggml_compute_forward_mul(
|
| 3425 |
const struct ggml_compute_params * params,
|
| 3426 |
const struct ggml_tensor * src0,
|
| 3427 |
const struct ggml_tensor * src1,
|
|
@@ -3444,7 +3462,7 @@ void ggml_compute_forward_mul(
|
|
| 3444 |
|
| 3445 |
// ggml_compute_forward_div
|
| 3446 |
|
| 3447 |
-
void ggml_compute_forward_div_f32(
|
| 3448 |
const struct ggml_compute_params * params,
|
| 3449 |
const struct ggml_tensor * src0,
|
| 3450 |
const struct ggml_tensor * src1,
|
|
@@ -3471,7 +3489,7 @@ void ggml_compute_forward_div_f32(
|
|
| 3471 |
}
|
| 3472 |
}
|
| 3473 |
|
| 3474 |
-
void ggml_compute_forward_div(
|
| 3475 |
const struct ggml_compute_params * params,
|
| 3476 |
const struct ggml_tensor * src0,
|
| 3477 |
const struct ggml_tensor * src1,
|
|
@@ -3494,7 +3512,7 @@ void ggml_compute_forward_div(
|
|
| 3494 |
|
| 3495 |
// ggml_compute_forward_sqr
|
| 3496 |
|
| 3497 |
-
void ggml_compute_forward_sqr_f32(
|
| 3498 |
const struct ggml_compute_params * params,
|
| 3499 |
const struct ggml_tensor * src0,
|
| 3500 |
struct ggml_tensor * dst) {
|
|
@@ -3518,7 +3536,7 @@ void ggml_compute_forward_sqr_f32(
|
|
| 3518 |
}
|
| 3519 |
}
|
| 3520 |
|
| 3521 |
-
void ggml_compute_forward_sqr(
|
| 3522 |
const struct ggml_compute_params * params,
|
| 3523 |
const struct ggml_tensor * src0,
|
| 3524 |
struct ggml_tensor * dst) {
|
|
@@ -3540,7 +3558,7 @@ void ggml_compute_forward_sqr(
|
|
| 3540 |
|
| 3541 |
// ggml_compute_forward_sqrt
|
| 3542 |
|
| 3543 |
-
void ggml_compute_forward_sqrt_f32(
|
| 3544 |
const struct ggml_compute_params * params,
|
| 3545 |
const struct ggml_tensor * src0,
|
| 3546 |
struct ggml_tensor * dst) {
|
|
@@ -3564,7 +3582,7 @@ void ggml_compute_forward_sqrt_f32(
|
|
| 3564 |
}
|
| 3565 |
}
|
| 3566 |
|
| 3567 |
-
void ggml_compute_forward_sqrt(
|
| 3568 |
const struct ggml_compute_params * params,
|
| 3569 |
const struct ggml_tensor * src0,
|
| 3570 |
struct ggml_tensor * dst) {
|
|
@@ -3586,7 +3604,7 @@ void ggml_compute_forward_sqrt(
|
|
| 3586 |
|
| 3587 |
// ggml_compute_forward_sum
|
| 3588 |
|
| 3589 |
-
void ggml_compute_forward_sum_f32(
|
| 3590 |
const struct ggml_compute_params * params,
|
| 3591 |
const struct ggml_tensor * src0,
|
| 3592 |
struct ggml_tensor * dst) {
|
|
@@ -3622,7 +3640,7 @@ void ggml_compute_forward_sum_f32(
|
|
| 3622 |
}
|
| 3623 |
}
|
| 3624 |
|
| 3625 |
-
void ggml_compute_forward_sum(
|
| 3626 |
const struct ggml_compute_params * params,
|
| 3627 |
const struct ggml_tensor * src0,
|
| 3628 |
struct ggml_tensor * dst) {
|
|
@@ -3644,7 +3662,7 @@ void ggml_compute_forward_sum(
|
|
| 3644 |
|
| 3645 |
// ggml_compute_forward_mean
|
| 3646 |
|
| 3647 |
-
void ggml_compute_forward_mean_f32(
|
| 3648 |
const struct ggml_compute_params * params,
|
| 3649 |
const struct ggml_tensor * src0,
|
| 3650 |
struct ggml_tensor * dst) {
|
|
@@ -3699,7 +3717,7 @@ void ggml_compute_forward_mean_f32(
|
|
| 3699 |
}
|
| 3700 |
}
|
| 3701 |
|
| 3702 |
-
void ggml_compute_forward_mean(
|
| 3703 |
const struct ggml_compute_params * params,
|
| 3704 |
const struct ggml_tensor * src0,
|
| 3705 |
struct ggml_tensor * dst) {
|
|
@@ -3721,7 +3739,7 @@ void ggml_compute_forward_mean(
|
|
| 3721 |
|
| 3722 |
// ggml_compute_forward_repeat
|
| 3723 |
|
| 3724 |
-
void ggml_compute_forward_repeat_f32(
|
| 3725 |
const struct ggml_compute_params * params,
|
| 3726 |
const struct ggml_tensor * src0,
|
| 3727 |
struct ggml_tensor * dst) {
|
|
@@ -3761,7 +3779,7 @@ void ggml_compute_forward_repeat_f32(
|
|
| 3761 |
}
|
| 3762 |
}
|
| 3763 |
|
| 3764 |
-
void ggml_compute_forward_repeat(
|
| 3765 |
const struct ggml_compute_params * params,
|
| 3766 |
const struct ggml_tensor * src0,
|
| 3767 |
struct ggml_tensor * dst) {
|
|
@@ -3783,7 +3801,7 @@ void ggml_compute_forward_repeat(
|
|
| 3783 |
|
| 3784 |
// ggml_compute_forward_abs
|
| 3785 |
|
| 3786 |
-
void ggml_compute_forward_abs_f32(
|
| 3787 |
const struct ggml_compute_params * params,
|
| 3788 |
const struct ggml_tensor * src0,
|
| 3789 |
struct ggml_tensor * dst) {
|
|
@@ -3807,7 +3825,7 @@ void ggml_compute_forward_abs_f32(
|
|
| 3807 |
}
|
| 3808 |
}
|
| 3809 |
|
| 3810 |
-
void ggml_compute_forward_abs(
|
| 3811 |
const struct ggml_compute_params * params,
|
| 3812 |
const struct ggml_tensor * src0,
|
| 3813 |
struct ggml_tensor * dst) {
|
|
@@ -3829,7 +3847,7 @@ void ggml_compute_forward_abs(
|
|
| 3829 |
|
| 3830 |
// ggml_compute_forward_sgn
|
| 3831 |
|
| 3832 |
-
void ggml_compute_forward_sgn_f32(
|
| 3833 |
const struct ggml_compute_params * params,
|
| 3834 |
const struct ggml_tensor * src0,
|
| 3835 |
struct ggml_tensor * dst) {
|
|
@@ -3853,7 +3871,7 @@ void ggml_compute_forward_sgn_f32(
|
|
| 3853 |
}
|
| 3854 |
}
|
| 3855 |
|
| 3856 |
-
void ggml_compute_forward_sgn(
|
| 3857 |
const struct ggml_compute_params * params,
|
| 3858 |
const struct ggml_tensor * src0,
|
| 3859 |
struct ggml_tensor * dst) {
|
|
@@ -3875,7 +3893,7 @@ void ggml_compute_forward_sgn(
|
|
| 3875 |
|
| 3876 |
// ggml_compute_forward_neg
|
| 3877 |
|
| 3878 |
-
void ggml_compute_forward_neg_f32(
|
| 3879 |
const struct ggml_compute_params * params,
|
| 3880 |
const struct ggml_tensor * src0,
|
| 3881 |
struct ggml_tensor * dst) {
|
|
@@ -3899,7 +3917,7 @@ void ggml_compute_forward_neg_f32(
|
|
| 3899 |
}
|
| 3900 |
}
|
| 3901 |
|
| 3902 |
-
void ggml_compute_forward_neg(
|
| 3903 |
const struct ggml_compute_params * params,
|
| 3904 |
const struct ggml_tensor * src0,
|
| 3905 |
struct ggml_tensor * dst) {
|
|
@@ -3921,7 +3939,7 @@ void ggml_compute_forward_neg(
|
|
| 3921 |
|
| 3922 |
// ggml_compute_forward_step
|
| 3923 |
|
| 3924 |
-
void ggml_compute_forward_step_f32(
|
| 3925 |
const struct ggml_compute_params * params,
|
| 3926 |
const struct ggml_tensor * src0,
|
| 3927 |
struct ggml_tensor * dst) {
|
|
@@ -3945,7 +3963,7 @@ void ggml_compute_forward_step_f32(
|
|
| 3945 |
}
|
| 3946 |
}
|
| 3947 |
|
| 3948 |
-
void ggml_compute_forward_step(
|
| 3949 |
const struct ggml_compute_params * params,
|
| 3950 |
const struct ggml_tensor * src0,
|
| 3951 |
struct ggml_tensor * dst) {
|
|
@@ -3967,7 +3985,7 @@ void ggml_compute_forward_step(
|
|
| 3967 |
|
| 3968 |
// ggml_compute_forward_relu
|
| 3969 |
|
| 3970 |
-
void ggml_compute_forward_relu_f32(
|
| 3971 |
const struct ggml_compute_params * params,
|
| 3972 |
const struct ggml_tensor * src0,
|
| 3973 |
struct ggml_tensor * dst) {
|
|
@@ -3991,7 +4009,7 @@ void ggml_compute_forward_relu_f32(
|
|
| 3991 |
}
|
| 3992 |
}
|
| 3993 |
|
| 3994 |
-
void ggml_compute_forward_relu(
|
| 3995 |
const struct ggml_compute_params * params,
|
| 3996 |
const struct ggml_tensor * src0,
|
| 3997 |
struct ggml_tensor * dst) {
|
|
@@ -4013,7 +4031,7 @@ void ggml_compute_forward_relu(
|
|
| 4013 |
|
| 4014 |
// ggml_compute_forward_gelu
|
| 4015 |
|
| 4016 |
-
void ggml_compute_forward_gelu_f32(
|
| 4017 |
const struct ggml_compute_params * params,
|
| 4018 |
const struct ggml_tensor * src0,
|
| 4019 |
struct ggml_tensor * dst) {
|
|
@@ -4054,7 +4072,7 @@ void ggml_compute_forward_gelu_f32(
|
|
| 4054 |
}
|
| 4055 |
}
|
| 4056 |
|
| 4057 |
-
void ggml_compute_forward_gelu(
|
| 4058 |
const struct ggml_compute_params * params,
|
| 4059 |
const struct ggml_tensor * src0,
|
| 4060 |
struct ggml_tensor * dst) {
|
|
@@ -4076,7 +4094,7 @@ void ggml_compute_forward_gelu(
|
|
| 4076 |
|
| 4077 |
// ggml_compute_forward_norm
|
| 4078 |
|
| 4079 |
-
void ggml_compute_forward_norm_f32(
|
| 4080 |
const struct ggml_compute_params * params,
|
| 4081 |
const struct ggml_tensor * src0,
|
| 4082 |
struct ggml_tensor * dst) {
|
|
@@ -4136,7 +4154,7 @@ void ggml_compute_forward_norm_f32(
|
|
| 4136 |
}
|
| 4137 |
}
|
| 4138 |
|
| 4139 |
-
void ggml_compute_forward_norm(
|
| 4140 |
const struct ggml_compute_params * params,
|
| 4141 |
const struct ggml_tensor * src0,
|
| 4142 |
struct ggml_tensor * dst) {
|
|
@@ -4158,9 +4176,10 @@ void ggml_compute_forward_norm(
|
|
| 4158 |
|
| 4159 |
// ggml_compute_forward_mul_mat
|
| 4160 |
|
|
|
|
| 4161 |
// helper function to determine if it is better to use BLAS or not
|
| 4162 |
// for large matrices, BLAS is faster
|
| 4163 |
-
bool ggml_compute_forward_mul_mat_use_blas(
|
| 4164 |
const struct ggml_tensor * src0,
|
| 4165 |
const struct ggml_tensor * src1,
|
| 4166 |
struct ggml_tensor * dst) {
|
|
@@ -4179,8 +4198,9 @@ bool ggml_compute_forward_mul_mat_use_blas(
|
|
| 4179 |
|
| 4180 |
return false;
|
| 4181 |
}
|
|
|
|
| 4182 |
|
| 4183 |
-
void ggml_compute_forward_mul_mat_f32(
|
| 4184 |
const struct ggml_compute_params * params,
|
| 4185 |
const struct ggml_tensor * src0,
|
| 4186 |
const struct ggml_tensor * src1,
|
|
@@ -4423,7 +4443,7 @@ void ggml_compute_forward_mul_mat_f32(
|
|
| 4423 |
//}
|
| 4424 |
}
|
| 4425 |
|
| 4426 |
-
void ggml_compute_forward_mul_mat_f16_f32(
|
| 4427 |
const struct ggml_compute_params * params,
|
| 4428 |
const struct ggml_tensor * src0,
|
| 4429 |
const struct ggml_tensor * src1,
|
|
@@ -4727,7 +4747,7 @@ void ggml_compute_forward_mul_mat_f16_f32(
|
|
| 4727 |
//}
|
| 4728 |
}
|
| 4729 |
|
| 4730 |
-
void ggml_compute_forward_mul_mat(
|
| 4731 |
const struct ggml_compute_params * params,
|
| 4732 |
const struct ggml_tensor * src0,
|
| 4733 |
const struct ggml_tensor * src1,
|
|
@@ -4753,7 +4773,7 @@ void ggml_compute_forward_mul_mat(
|
|
| 4753 |
|
| 4754 |
// ggml_compute_forward_scale
|
| 4755 |
|
| 4756 |
-
void ggml_compute_forward_scale_f32(
|
| 4757 |
const struct ggml_compute_params * params,
|
| 4758 |
const struct ggml_tensor * src0,
|
| 4759 |
const struct ggml_tensor * src1,
|
|
@@ -4788,7 +4808,7 @@ void ggml_compute_forward_scale_f32(
|
|
| 4788 |
}
|
| 4789 |
}
|
| 4790 |
|
| 4791 |
-
void ggml_compute_forward_scale(
|
| 4792 |
const struct ggml_compute_params * params,
|
| 4793 |
const struct ggml_tensor * src0,
|
| 4794 |
const struct ggml_tensor * src1,
|
|
@@ -4811,7 +4831,7 @@ void ggml_compute_forward_scale(
|
|
| 4811 |
|
| 4812 |
// ggml_compute_forward_cpy
|
| 4813 |
|
| 4814 |
-
void ggml_compute_forward_cpy(
|
| 4815 |
const struct ggml_compute_params * params,
|
| 4816 |
const struct ggml_tensor * src0,
|
| 4817 |
struct ggml_tensor * dst) {
|
|
@@ -4820,7 +4840,7 @@ void ggml_compute_forward_cpy(
|
|
| 4820 |
|
| 4821 |
// ggml_compute_forward_reshape
|
| 4822 |
|
| 4823 |
-
void ggml_compute_forward_reshape(
|
| 4824 |
const struct ggml_compute_params * params,
|
| 4825 |
const struct ggml_tensor * src0,
|
| 4826 |
struct ggml_tensor * dst) {
|
|
@@ -4832,7 +4852,7 @@ void ggml_compute_forward_reshape(
|
|
| 4832 |
|
| 4833 |
// ggml_compute_forward_view
|
| 4834 |
|
| 4835 |
-
void ggml_compute_forward_view(
|
| 4836 |
const struct ggml_compute_params * params,
|
| 4837 |
const struct ggml_tensor * src0) {
|
| 4838 |
// NOP
|
|
@@ -4842,7 +4862,7 @@ void ggml_compute_forward_view(
|
|
| 4842 |
|
| 4843 |
// ggml_compute_forward_permute
|
| 4844 |
|
| 4845 |
-
void ggml_compute_forward_permute(
|
| 4846 |
const struct ggml_compute_params * params,
|
| 4847 |
const struct ggml_tensor * src0) {
|
| 4848 |
// NOP
|
|
@@ -4852,7 +4872,7 @@ void ggml_compute_forward_permute(
|
|
| 4852 |
|
| 4853 |
// ggml_compute_forward_transpose
|
| 4854 |
|
| 4855 |
-
void ggml_compute_forward_transpose(
|
| 4856 |
const struct ggml_compute_params * params,
|
| 4857 |
const struct ggml_tensor * src0) {
|
| 4858 |
// NOP
|
|
@@ -4862,7 +4882,7 @@ void ggml_compute_forward_transpose(
|
|
| 4862 |
|
| 4863 |
// ggml_compute_forward_get_rows
|
| 4864 |
|
| 4865 |
-
void ggml_compute_forward_get_rows_f16(
|
| 4866 |
const struct ggml_compute_params * params,
|
| 4867 |
const struct ggml_tensor * src0,
|
| 4868 |
const struct ggml_tensor * src1,
|
|
@@ -4890,7 +4910,7 @@ void ggml_compute_forward_get_rows_f16(
|
|
| 4890 |
}
|
| 4891 |
}
|
| 4892 |
|
| 4893 |
-
void ggml_compute_forward_get_rows_f32(
|
| 4894 |
const struct ggml_compute_params * params,
|
| 4895 |
const struct ggml_tensor * src0,
|
| 4896 |
const struct ggml_tensor * src1,
|
|
@@ -4917,7 +4937,7 @@ void ggml_compute_forward_get_rows_f32(
|
|
| 4917 |
}
|
| 4918 |
}
|
| 4919 |
|
| 4920 |
-
void ggml_compute_forward_get_rows(
|
| 4921 |
const struct ggml_compute_params * params,
|
| 4922 |
const struct ggml_tensor * src0,
|
| 4923 |
const struct ggml_tensor * src1,
|
|
@@ -4943,7 +4963,7 @@ void ggml_compute_forward_get_rows(
|
|
| 4943 |
|
| 4944 |
// ggml_compute_forward_diag_mask_inf
|
| 4945 |
|
| 4946 |
-
void ggml_compute_forward_diag_mask_inf_f32(
|
| 4947 |
const struct ggml_compute_params * params,
|
| 4948 |
const struct ggml_tensor * src0,
|
| 4949 |
const struct ggml_tensor * src1,
|
|
@@ -4979,7 +4999,7 @@ void ggml_compute_forward_diag_mask_inf_f32(
|
|
| 4979 |
}
|
| 4980 |
}
|
| 4981 |
|
| 4982 |
-
void ggml_compute_forward_diag_mask_inf(
|
| 4983 |
const struct ggml_compute_params * params,
|
| 4984 |
const struct ggml_tensor * src0,
|
| 4985 |
const struct ggml_tensor * src1,
|
|
@@ -5002,7 +5022,7 @@ void ggml_compute_forward_diag_mask_inf(
|
|
| 5002 |
|
| 5003 |
// ggml_compute_forward_soft_max
|
| 5004 |
|
| 5005 |
-
void ggml_compute_forward_soft_max_f32(
|
| 5006 |
const struct ggml_compute_params * params,
|
| 5007 |
const struct ggml_tensor * src0,
|
| 5008 |
struct ggml_tensor * dst) {
|
|
@@ -5073,7 +5093,7 @@ void ggml_compute_forward_soft_max_f32(
|
|
| 5073 |
}
|
| 5074 |
}
|
| 5075 |
|
| 5076 |
-
void ggml_compute_forward_soft_max(
|
| 5077 |
const struct ggml_compute_params * params,
|
| 5078 |
const struct ggml_tensor * src0,
|
| 5079 |
struct ggml_tensor * dst) {
|
|
@@ -5095,7 +5115,7 @@ void ggml_compute_forward_soft_max(
|
|
| 5095 |
|
| 5096 |
// ggml_compute_forward_rope
|
| 5097 |
|
| 5098 |
-
void ggml_compute_forward_rope_f32(
|
| 5099 |
const struct ggml_compute_params * params,
|
| 5100 |
const struct ggml_tensor * src0,
|
| 5101 |
const struct ggml_tensor * src1,
|
|
@@ -5152,7 +5172,7 @@ void ggml_compute_forward_rope_f32(
|
|
| 5152 |
}
|
| 5153 |
}
|
| 5154 |
|
| 5155 |
-
void ggml_compute_forward_rope(
|
| 5156 |
const struct ggml_compute_params * params,
|
| 5157 |
const struct ggml_tensor * src0,
|
| 5158 |
const struct ggml_tensor * src1,
|
|
@@ -5175,7 +5195,7 @@ void ggml_compute_forward_rope(
|
|
| 5175 |
|
| 5176 |
// ggml_compute_forward_conv_1d_1s
|
| 5177 |
|
| 5178 |
-
void ggml_compute_forward_conv_1d_1s_f16_f32(
|
| 5179 |
const struct ggml_compute_params * params,
|
| 5180 |
const struct ggml_tensor * src0,
|
| 5181 |
const struct ggml_tensor * src1,
|
|
@@ -5295,7 +5315,7 @@ void ggml_compute_forward_conv_1d_1s_f16_f32(
|
|
| 5295 |
}
|
| 5296 |
}
|
| 5297 |
|
| 5298 |
-
void ggml_compute_forward_conv_1d_1s_f32(
|
| 5299 |
const struct ggml_compute_params * params,
|
| 5300 |
const struct ggml_tensor * src0,
|
| 5301 |
const struct ggml_tensor * src1,
|
|
@@ -5415,7 +5435,7 @@ void ggml_compute_forward_conv_1d_1s_f32(
|
|
| 5415 |
}
|
| 5416 |
}
|
| 5417 |
|
| 5418 |
-
void ggml_compute_forward_conv_1d_1s(
|
| 5419 |
const struct ggml_compute_params * params,
|
| 5420 |
const struct ggml_tensor * src0,
|
| 5421 |
const struct ggml_tensor * src1,
|
|
@@ -5441,7 +5461,7 @@ void ggml_compute_forward_conv_1d_1s(
|
|
| 5441 |
|
| 5442 |
// ggml_compute_forward_conv_1d_2s
|
| 5443 |
|
| 5444 |
-
void ggml_compute_forward_conv_1d_2s_f16_f32(
|
| 5445 |
const struct ggml_compute_params * params,
|
| 5446 |
const struct ggml_tensor * src0,
|
| 5447 |
const struct ggml_tensor * src1,
|
|
@@ -5561,7 +5581,7 @@ void ggml_compute_forward_conv_1d_2s_f16_f32(
|
|
| 5561 |
}
|
| 5562 |
}
|
| 5563 |
|
| 5564 |
-
void ggml_compute_forward_conv_1d_2s_f32(
|
| 5565 |
const struct ggml_compute_params * params,
|
| 5566 |
const struct ggml_tensor * src0,
|
| 5567 |
const struct ggml_tensor * src1,
|
|
@@ -5681,7 +5701,7 @@ void ggml_compute_forward_conv_1d_2s_f32(
|
|
| 5681 |
}
|
| 5682 |
}
|
| 5683 |
|
| 5684 |
-
void ggml_compute_forward_conv_1d_2s(
|
| 5685 |
const struct ggml_compute_params * params,
|
| 5686 |
const struct ggml_tensor * src0,
|
| 5687 |
const struct ggml_tensor * src1,
|
|
@@ -5707,7 +5727,7 @@ void ggml_compute_forward_conv_1d_2s(
|
|
| 5707 |
|
| 5708 |
// ggml_compute_forward_flash_attn
|
| 5709 |
|
| 5710 |
-
void ggml_compute_forward_flash_attn_f32(
|
| 5711 |
const struct ggml_compute_params * params,
|
| 5712 |
const struct ggml_tensor * q,
|
| 5713 |
const struct ggml_tensor * k,
|
|
@@ -5888,7 +5908,7 @@ void ggml_compute_forward_flash_attn_f32(
|
|
| 5888 |
}
|
| 5889 |
}
|
| 5890 |
|
| 5891 |
-
void ggml_compute_forward_flash_attn_f16(
|
| 5892 |
const struct ggml_compute_params * params,
|
| 5893 |
const struct ggml_tensor * q,
|
| 5894 |
const struct ggml_tensor * k,
|
|
@@ -6075,7 +6095,7 @@ void ggml_compute_forward_flash_attn_f16(
|
|
| 6075 |
}
|
| 6076 |
}
|
| 6077 |
|
| 6078 |
-
void ggml_compute_forward_flash_attn(
|
| 6079 |
const struct ggml_compute_params * params,
|
| 6080 |
const struct ggml_tensor * q,
|
| 6081 |
const struct ggml_tensor * k,
|
|
@@ -6103,7 +6123,7 @@ void ggml_compute_forward_flash_attn(
|
|
| 6103 |
|
| 6104 |
// ggml_compute_forward_flash_ff
|
| 6105 |
|
| 6106 |
-
void ggml_compute_forward_flash_ff_f16(
|
| 6107 |
const struct ggml_compute_params * params,
|
| 6108 |
const struct ggml_tensor * a, // F16
|
| 6109 |
const struct ggml_tensor * b0, // F16 fc_w
|
|
@@ -6283,7 +6303,7 @@ void ggml_compute_forward_flash_ff_f16(
|
|
| 6283 |
}
|
| 6284 |
}
|
| 6285 |
|
| 6286 |
-
void ggml_compute_forward_flash_ff(
|
| 6287 |
const struct ggml_compute_params * params,
|
| 6288 |
const struct ggml_tensor * a,
|
| 6289 |
const struct ggml_tensor * b0,
|
|
@@ -6312,7 +6332,7 @@ void ggml_compute_forward_flash_ff(
|
|
| 6312 |
|
| 6313 |
/////////////////////////////////
|
| 6314 |
|
| 6315 |
-
void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
|
| 6316 |
assert(params);
|
| 6317 |
|
| 6318 |
switch (tensor->op) {
|
|
@@ -6460,7 +6480,7 @@ void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tenso
|
|
| 6460 |
|
| 6461 |
////////////////////////////////////////////////////////////////////////////////
|
| 6462 |
|
| 6463 |
-
void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, bool inplace) {
|
| 6464 |
struct ggml_tensor * src0 = tensor->src0;
|
| 6465 |
struct ggml_tensor * src1 = tensor->src1;
|
| 6466 |
|
|
@@ -6704,7 +6724,7 @@ void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tenso
|
|
| 6704 |
}
|
| 6705 |
}
|
| 6706 |
|
| 6707 |
-
void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
|
| 6708 |
if (node->grad == NULL) {
|
| 6709 |
// this usually happens when we generate intermediate nodes from constants in the backward pass
|
| 6710 |
// it can also happen during forward pass, if the user performs computations with constants
|
|
@@ -6755,7 +6775,7 @@ void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node)
|
|
| 6755 |
}
|
| 6756 |
}
|
| 6757 |
|
| 6758 |
-
void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
|
| 6759 |
if (!expand) {
|
| 6760 |
cgraph->n_nodes = 0;
|
| 6761 |
cgraph->n_leafs = 0;
|
|
@@ -6866,6 +6886,11 @@ typedef int ggml_lock_t;
|
|
| 6866 |
|
| 6867 |
#define GGML_LOCK_INITIALIZER 0
|
| 6868 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6869 |
#else
|
| 6870 |
|
| 6871 |
//typedef pthread_spinlock_t ggml_lock_t;
|
|
@@ -6884,6 +6909,11 @@ typedef int ggml_lock_t;
|
|
| 6884 |
|
| 6885 |
#define GGML_LOCK_INITIALIZER 0
|
| 6886 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6887 |
#endif
|
| 6888 |
|
| 6889 |
struct ggml_compute_state_shared {
|
|
@@ -6898,7 +6928,7 @@ struct ggml_compute_state_shared {
|
|
| 6898 |
};
|
| 6899 |
|
| 6900 |
struct ggml_compute_state {
|
| 6901 |
-
|
| 6902 |
|
| 6903 |
struct ggml_compute_params params;
|
| 6904 |
struct ggml_tensor * node;
|
|
@@ -6906,16 +6936,7 @@ struct ggml_compute_state {
|
|
| 6906 |
struct ggml_compute_state_shared * shared;
|
| 6907 |
};
|
| 6908 |
|
| 6909 |
-
|
| 6910 |
-
void * ggml_graph_compute_one(void * data) {
|
| 6911 |
-
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
|
| 6912 |
-
|
| 6913 |
-
ggml_compute_forward(&state->params, state->node);
|
| 6914 |
-
|
| 6915 |
-
return NULL;
|
| 6916 |
-
}
|
| 6917 |
-
|
| 6918 |
-
thread_ret_t ggml_graph_compute_thread(void * data) {
|
| 6919 |
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
|
| 6920 |
|
| 6921 |
const int n_threads = state->shared->n_threads;
|
|
@@ -6995,7 +7016,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|
| 6995 |
.node = NULL,
|
| 6996 |
.shared = &state_shared,
|
| 6997 |
};
|
| 6998 |
-
int rc =
|
| 6999 |
assert(rc == 0);
|
| 7000 |
UNUSED(rc);
|
| 7001 |
}
|
|
@@ -7339,7 +7360,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|
| 7339 |
atomic_store(&state_shared.has_work, true);
|
| 7340 |
|
| 7341 |
for (int j = 0; j < n_threads - 1; j++) {
|
| 7342 |
-
int rc =
|
| 7343 |
assert(rc == 0);
|
| 7344 |
UNUSED(rc);
|
| 7345 |
}
|
|
@@ -7417,7 +7438,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
| 7417 |
}
|
| 7418 |
|
| 7419 |
// check if node is part of the graph
|
| 7420 |
-
bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
|
| 7421 |
if (cgraph == NULL) {
|
| 7422 |
return true;
|
| 7423 |
}
|
|
@@ -7431,7 +7452,7 @@ bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor
|
|
| 7431 |
return false;
|
| 7432 |
}
|
| 7433 |
|
| 7434 |
-
struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
|
| 7435 |
for (int i = 0; i < cgraph->n_nodes; i++) {
|
| 7436 |
struct ggml_tensor * parent = cgraph->nodes[i];
|
| 7437 |
|
|
@@ -7560,7 +7581,7 @@ label=\"<x>CONST %d [%d, %d]\"; ]\n",
|
|
| 7560 |
|
| 7561 |
////////////////////////////////////////////////////////////////////////////////
|
| 7562 |
|
| 7563 |
-
void ggml_opt_set_params(int np, struct ggml_tensor * const ps[], const float * x) {
|
| 7564 |
int i = 0;
|
| 7565 |
for (int p = 0; p < np; ++p) {
|
| 7566 |
const int ne = ggml_nelements(ps[p]) ;
|
|
@@ -7571,7 +7592,7 @@ void ggml_opt_set_params(int np, struct ggml_tensor * const ps[], const float *
|
|
| 7571 |
}
|
| 7572 |
}
|
| 7573 |
|
| 7574 |
-
void ggml_opt_get_params(int np, struct ggml_tensor * const ps[], float * x) {
|
| 7575 |
int i = 0;
|
| 7576 |
for (int p = 0; p < np; ++p) {
|
| 7577 |
const int ne = ggml_nelements(ps[p]) ;
|
|
@@ -7582,7 +7603,7 @@ void ggml_opt_get_params(int np, struct ggml_tensor * const ps[], float * x) {
|
|
| 7582 |
}
|
| 7583 |
}
|
| 7584 |
|
| 7585 |
-
void ggml_opt_get_grad(int np, struct ggml_tensor * const ps[], float * g) {
|
| 7586 |
int i = 0;
|
| 7587 |
for (int p = 0; p < np; ++p) {
|
| 7588 |
const int ne = ggml_nelements(ps[p]) ;
|
|
@@ -7599,7 +7620,7 @@ void ggml_opt_get_grad(int np, struct ggml_tensor * const ps[], float * g) {
|
|
| 7599 |
// ref: https://arxiv.org/pdf/1412.6980.pdf
|
| 7600 |
//
|
| 7601 |
|
| 7602 |
-
enum ggml_opt_result ggml_opt_adam(
|
| 7603 |
struct ggml_context * ctx,
|
| 7604 |
struct ggml_opt_params params,
|
| 7605 |
struct ggml_tensor * f,
|
|
@@ -7892,7 +7913,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
| 7892 |
return GGML_LINESEARCH_FAIL;
|
| 7893 |
}
|
| 7894 |
|
| 7895 |
-
enum ggml_opt_result ggml_opt_lbfgs(
|
| 7896 |
struct ggml_context * ctx,
|
| 7897 |
struct ggml_opt_params params,
|
| 7898 |
struct ggml_tensor * f,
|
|
|
|
| 1217 |
static struct ggml_state g_state;
|
| 1218 |
static atomic_int g_state_barrier = 0;
|
| 1219 |
|
| 1220 |
+
// barrier via spin lock
|
| 1221 |
+
inline static void ggml_critical_section_start() {
|
| 1222 |
+
int processing = atomic_fetch_add(&g_state_barrier, 1);
|
| 1223 |
+
|
| 1224 |
+
while (processing > 0) {
|
| 1225 |
+
// wait for other threads to finish
|
| 1226 |
+
atomic_fetch_sub(&g_state_barrier, 1);
|
| 1227 |
+
sched_yield(); // TODO: reconsider this
|
| 1228 |
+
processing = atomic_fetch_add(&g_state_barrier, 1);
|
| 1229 |
+
}
|
| 1230 |
+
}
|
| 1231 |
+
|
| 1232 |
+
// TODO: make this somehow automatically executed
|
| 1233 |
+
// some sort of "sentry" mechanism
|
| 1234 |
+
inline static void ggml_critical_section_end() {
|
| 1235 |
+
atomic_fetch_sub(&g_state_barrier, 1);
|
| 1236 |
+
}
|
| 1237 |
+
|
| 1238 |
////////////////////////////////////////////////////////////////////////////////
|
| 1239 |
|
| 1240 |
void ggml_print_object(const struct ggml_object * obj) {
|
|
|
|
| 1364 |
|
| 1365 |
struct ggml_context * ggml_init(struct ggml_init_params params) {
|
| 1366 |
// make this function thread safe
|
| 1367 |
+
ggml_critical_section_start();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1368 |
|
| 1369 |
static bool is_first_call = true;
|
| 1370 |
+
|
| 1371 |
if (is_first_call) {
|
| 1372 |
+
// initialize GELU and EXP tables
|
| 1373 |
+
{
|
| 1374 |
+
const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
|
| 1375 |
+
|
| 1376 |
+
ggml_fp16_t ii;
|
| 1377 |
+
for (int i = 0; i < (1 << 16); ++i) {
|
| 1378 |
+
uint16_t ui = i;
|
| 1379 |
+
memcpy(&ii, &ui, sizeof(ii));
|
| 1380 |
+
const float f = GGML_FP16_TO_FP32(ii);
|
| 1381 |
+
table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
|
| 1382 |
+
table_exp_f16[i] = GGML_FP32_TO_FP16(exp(f));
|
| 1383 |
+
}
|
| 1384 |
+
|
| 1385 |
+
const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
|
| 1386 |
+
|
| 1387 |
+
GGML_PRINT_DEBUG("%s: GELU and EXP tables initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
|
| 1388 |
}
|
| 1389 |
|
| 1390 |
+
// initialize g_state
|
| 1391 |
+
{
|
| 1392 |
+
const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
|
| 1393 |
+
|
| 1394 |
+
g_state = (struct ggml_state) {
|
| 1395 |
+
/*.contexts =*/ { 0 },
|
| 1396 |
+
};
|
| 1397 |
+
|
| 1398 |
+
for (int i = 0; i < GGML_MAX_CONTEXTS; ++i) {
|
| 1399 |
+
g_state.contexts[i].used = false;
|
| 1400 |
+
}
|
| 1401 |
+
|
| 1402 |
+
const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
|
| 1403 |
|
| 1404 |
+
GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
|
| 1405 |
+
}
|
| 1406 |
|
| 1407 |
is_first_call = false;
|
| 1408 |
}
|
|
|
|
| 1410 |
// find non-used context in g_state
|
| 1411 |
struct ggml_context * ctx = NULL;
|
| 1412 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1413 |
for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
|
| 1414 |
if (!g_state.contexts[i].used) {
|
| 1415 |
g_state.contexts[i].used = true;
|
|
|
|
| 1423 |
if (ctx == NULL) {
|
| 1424 |
GGML_PRINT_DEBUG("%s: no unused context found\n", __func__);
|
| 1425 |
|
| 1426 |
+
ggml_critical_section_end();
|
| 1427 |
|
| 1428 |
return NULL;
|
| 1429 |
}
|
|
|
|
| 1441 |
|
| 1442 |
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
|
| 1443 |
|
| 1444 |
+
ggml_critical_section_end();
|
| 1445 |
|
| 1446 |
return ctx;
|
| 1447 |
}
|
| 1448 |
|
| 1449 |
void ggml_free(struct ggml_context * ctx) {
|
| 1450 |
// make this function thread safe
|
| 1451 |
+
ggml_critical_section_start();
|
| 1452 |
+
|
| 1453 |
+
bool found = false;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1454 |
|
| 1455 |
for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
|
| 1456 |
if (&g_state.contexts[i].context == ctx) {
|
|
|
|
| 1463 |
free(ctx->mem_buffer);
|
| 1464 |
}
|
| 1465 |
|
| 1466 |
+
found = true;
|
| 1467 |
+
break;
|
|
|
|
| 1468 |
}
|
| 1469 |
}
|
| 1470 |
|
| 1471 |
+
if (!found) {
|
| 1472 |
+
GGML_PRINT_DEBUG("%s: context not found\n", __func__);
|
| 1473 |
+
}
|
| 1474 |
|
| 1475 |
+
ggml_critical_section_end();
|
| 1476 |
}
|
| 1477 |
|
| 1478 |
size_t ggml_used_mem(const struct ggml_context * ctx) {
|
|
|
|
| 3053 |
|
| 3054 |
// ggml_compute_forward_dup
|
| 3055 |
|
| 3056 |
+
static void ggml_compute_forward_dup_f16(
|
| 3057 |
const struct ggml_compute_params * params,
|
| 3058 |
const struct ggml_tensor * src0,
|
| 3059 |
struct ggml_tensor * dst) {
|
|
|
|
| 3157 |
}
|
| 3158 |
}
|
| 3159 |
|
| 3160 |
+
static void ggml_compute_forward_dup_f32(
|
| 3161 |
const struct ggml_compute_params * params,
|
| 3162 |
const struct ggml_tensor * src0,
|
| 3163 |
struct ggml_tensor * dst) {
|
|
|
|
| 3261 |
}
|
| 3262 |
}
|
| 3263 |
|
| 3264 |
+
static void ggml_compute_forward_dup(
|
| 3265 |
const struct ggml_compute_params * params,
|
| 3266 |
const struct ggml_tensor * src0,
|
| 3267 |
struct ggml_tensor * dst) {
|
|
|
|
| 3286 |
|
| 3287 |
// ggml_compute_forward_add
|
| 3288 |
|
| 3289 |
+
static void ggml_compute_forward_add_f32(
|
| 3290 |
const struct ggml_compute_params * params,
|
| 3291 |
const struct ggml_tensor * src0,
|
| 3292 |
const struct ggml_tensor * src1,
|
|
|
|
| 3339 |
}
|
| 3340 |
}
|
| 3341 |
|
| 3342 |
+
static void ggml_compute_forward_add(
|
| 3343 |
const struct ggml_compute_params * params,
|
| 3344 |
const struct ggml_tensor * src0,
|
| 3345 |
const struct ggml_tensor * src1,
|
|
|
|
| 3362 |
|
| 3363 |
// ggml_compute_forward_sub
|
| 3364 |
|
| 3365 |
+
static void ggml_compute_forward_sub_f32(
|
| 3366 |
const struct ggml_compute_params * params,
|
| 3367 |
const struct ggml_tensor * src0,
|
| 3368 |
const struct ggml_tensor * src1,
|
|
|
|
| 3389 |
}
|
| 3390 |
}
|
| 3391 |
|
| 3392 |
+
static void ggml_compute_forward_sub(
|
| 3393 |
const struct ggml_compute_params * params,
|
| 3394 |
const struct ggml_tensor * src0,
|
| 3395 |
const struct ggml_tensor * src1,
|
|
|
|
| 3412 |
|
| 3413 |
// ggml_compute_forward_mul
|
| 3414 |
|
| 3415 |
+
static void ggml_compute_forward_mul_f32(
|
| 3416 |
const struct ggml_compute_params * params,
|
| 3417 |
const struct ggml_tensor * src0,
|
| 3418 |
const struct ggml_tensor * src1,
|
|
|
|
| 3439 |
}
|
| 3440 |
}
|
| 3441 |
|
| 3442 |
+
static void ggml_compute_forward_mul(
|
| 3443 |
const struct ggml_compute_params * params,
|
| 3444 |
const struct ggml_tensor * src0,
|
| 3445 |
const struct ggml_tensor * src1,
|
|
|
|
| 3462 |
|
| 3463 |
// ggml_compute_forward_div
|
| 3464 |
|
| 3465 |
+
static void ggml_compute_forward_div_f32(
|
| 3466 |
const struct ggml_compute_params * params,
|
| 3467 |
const struct ggml_tensor * src0,
|
| 3468 |
const struct ggml_tensor * src1,
|
|
|
|
| 3489 |
}
|
| 3490 |
}
|
| 3491 |
|
| 3492 |
+
static void ggml_compute_forward_div(
|
| 3493 |
const struct ggml_compute_params * params,
|
| 3494 |
const struct ggml_tensor * src0,
|
| 3495 |
const struct ggml_tensor * src1,
|
|
|
|
| 3512 |
|
| 3513 |
// ggml_compute_forward_sqr
|
| 3514 |
|
| 3515 |
+
static void ggml_compute_forward_sqr_f32(
|
| 3516 |
const struct ggml_compute_params * params,
|
| 3517 |
const struct ggml_tensor * src0,
|
| 3518 |
struct ggml_tensor * dst) {
|
|
|
|
| 3536 |
}
|
| 3537 |
}
|
| 3538 |
|
| 3539 |
+
static void ggml_compute_forward_sqr(
|
| 3540 |
const struct ggml_compute_params * params,
|
| 3541 |
const struct ggml_tensor * src0,
|
| 3542 |
struct ggml_tensor * dst) {
|
|
|
|
| 3558 |
|
| 3559 |
// ggml_compute_forward_sqrt
|
| 3560 |
|
| 3561 |
+
static void ggml_compute_forward_sqrt_f32(
|
| 3562 |
const struct ggml_compute_params * params,
|
| 3563 |
const struct ggml_tensor * src0,
|
| 3564 |
struct ggml_tensor * dst) {
|
|
|
|
| 3582 |
}
|
| 3583 |
}
|
| 3584 |
|
| 3585 |
+
static void ggml_compute_forward_sqrt(
|
| 3586 |
const struct ggml_compute_params * params,
|
| 3587 |
const struct ggml_tensor * src0,
|
| 3588 |
struct ggml_tensor * dst) {
|
|
|
|
| 3604 |
|
| 3605 |
// ggml_compute_forward_sum
|
| 3606 |
|
| 3607 |
+
static void ggml_compute_forward_sum_f32(
|
| 3608 |
const struct ggml_compute_params * params,
|
| 3609 |
const struct ggml_tensor * src0,
|
| 3610 |
struct ggml_tensor * dst) {
|
|
|
|
| 3640 |
}
|
| 3641 |
}
|
| 3642 |
|
| 3643 |
+
static void ggml_compute_forward_sum(
|
| 3644 |
const struct ggml_compute_params * params,
|
| 3645 |
const struct ggml_tensor * src0,
|
| 3646 |
struct ggml_tensor * dst) {
|
|
|
|
| 3662 |
|
| 3663 |
// ggml_compute_forward_mean
|
| 3664 |
|
| 3665 |
+
static void ggml_compute_forward_mean_f32(
|
| 3666 |
const struct ggml_compute_params * params,
|
| 3667 |
const struct ggml_tensor * src0,
|
| 3668 |
struct ggml_tensor * dst) {
|
|
|
|
| 3717 |
}
|
| 3718 |
}
|
| 3719 |
|
| 3720 |
+
static void ggml_compute_forward_mean(
|
| 3721 |
const struct ggml_compute_params * params,
|
| 3722 |
const struct ggml_tensor * src0,
|
| 3723 |
struct ggml_tensor * dst) {
|
|
|
|
| 3739 |
|
| 3740 |
// ggml_compute_forward_repeat
|
| 3741 |
|
| 3742 |
+
static void ggml_compute_forward_repeat_f32(
|
| 3743 |
const struct ggml_compute_params * params,
|
| 3744 |
const struct ggml_tensor * src0,
|
| 3745 |
struct ggml_tensor * dst) {
|
|
|
|
| 3779 |
}
|
| 3780 |
}
|
| 3781 |
|
| 3782 |
+
static void ggml_compute_forward_repeat(
|
| 3783 |
const struct ggml_compute_params * params,
|
| 3784 |
const struct ggml_tensor * src0,
|
| 3785 |
struct ggml_tensor * dst) {
|
|
|
|
| 3801 |
|
| 3802 |
// ggml_compute_forward_abs
|
| 3803 |
|
| 3804 |
+
static void ggml_compute_forward_abs_f32(
|
| 3805 |
const struct ggml_compute_params * params,
|
| 3806 |
const struct ggml_tensor * src0,
|
| 3807 |
struct ggml_tensor * dst) {
|
|
|
|
| 3825 |
}
|
| 3826 |
}
|
| 3827 |
|
| 3828 |
+
static void ggml_compute_forward_abs(
|
| 3829 |
const struct ggml_compute_params * params,
|
| 3830 |
const struct ggml_tensor * src0,
|
| 3831 |
struct ggml_tensor * dst) {
|
|
|
|
| 3847 |
|
| 3848 |
// ggml_compute_forward_sgn
|
| 3849 |
|
| 3850 |
+
static void ggml_compute_forward_sgn_f32(
|
| 3851 |
const struct ggml_compute_params * params,
|
| 3852 |
const struct ggml_tensor * src0,
|
| 3853 |
struct ggml_tensor * dst) {
|
|
|
|
| 3871 |
}
|
| 3872 |
}
|
| 3873 |
|
| 3874 |
+
static void ggml_compute_forward_sgn(
|
| 3875 |
const struct ggml_compute_params * params,
|
| 3876 |
const struct ggml_tensor * src0,
|
| 3877 |
struct ggml_tensor * dst) {
|
|
|
|
| 3893 |
|
| 3894 |
// ggml_compute_forward_neg
|
| 3895 |
|
| 3896 |
+
static void ggml_compute_forward_neg_f32(
|
| 3897 |
const struct ggml_compute_params * params,
|
| 3898 |
const struct ggml_tensor * src0,
|
| 3899 |
struct ggml_tensor * dst) {
|
|
|
|
| 3917 |
}
|
| 3918 |
}
|
| 3919 |
|
| 3920 |
+
static void ggml_compute_forward_neg(
|
| 3921 |
const struct ggml_compute_params * params,
|
| 3922 |
const struct ggml_tensor * src0,
|
| 3923 |
struct ggml_tensor * dst) {
|
|
|
|
| 3939 |
|
| 3940 |
// ggml_compute_forward_step
|
| 3941 |
|
| 3942 |
+
static void ggml_compute_forward_step_f32(
|
| 3943 |
const struct ggml_compute_params * params,
|
| 3944 |
const struct ggml_tensor * src0,
|
| 3945 |
struct ggml_tensor * dst) {
|
|
|
|
| 3963 |
}
|
| 3964 |
}
|
| 3965 |
|
| 3966 |
+
static void ggml_compute_forward_step(
|
| 3967 |
const struct ggml_compute_params * params,
|
| 3968 |
const struct ggml_tensor * src0,
|
| 3969 |
struct ggml_tensor * dst) {
|
|
|
|
| 3985 |
|
| 3986 |
// ggml_compute_forward_relu
|
| 3987 |
|
| 3988 |
+
static void ggml_compute_forward_relu_f32(
|
| 3989 |
const struct ggml_compute_params * params,
|
| 3990 |
const struct ggml_tensor * src0,
|
| 3991 |
struct ggml_tensor * dst) {
|
|
|
|
| 4009 |
}
|
| 4010 |
}
|
| 4011 |
|
| 4012 |
+
static void ggml_compute_forward_relu(
|
| 4013 |
const struct ggml_compute_params * params,
|
| 4014 |
const struct ggml_tensor * src0,
|
| 4015 |
struct ggml_tensor * dst) {
|
|
|
|
| 4031 |
|
| 4032 |
// ggml_compute_forward_gelu
|
| 4033 |
|
| 4034 |
+
static void ggml_compute_forward_gelu_f32(
|
| 4035 |
const struct ggml_compute_params * params,
|
| 4036 |
const struct ggml_tensor * src0,
|
| 4037 |
struct ggml_tensor * dst) {
|
|
|
|
| 4072 |
}
|
| 4073 |
}
|
| 4074 |
|
| 4075 |
+
static void ggml_compute_forward_gelu(
|
| 4076 |
const struct ggml_compute_params * params,
|
| 4077 |
const struct ggml_tensor * src0,
|
| 4078 |
struct ggml_tensor * dst) {
|
|
|
|
| 4094 |
|
| 4095 |
// ggml_compute_forward_norm
|
| 4096 |
|
| 4097 |
+
static void ggml_compute_forward_norm_f32(
|
| 4098 |
const struct ggml_compute_params * params,
|
| 4099 |
const struct ggml_tensor * src0,
|
| 4100 |
struct ggml_tensor * dst) {
|
|
|
|
| 4154 |
}
|
| 4155 |
}
|
| 4156 |
|
| 4157 |
+
static void ggml_compute_forward_norm(
|
| 4158 |
const struct ggml_compute_params * params,
|
| 4159 |
const struct ggml_tensor * src0,
|
| 4160 |
struct ggml_tensor * dst) {
|
|
|
|
| 4176 |
|
| 4177 |
// ggml_compute_forward_mul_mat
|
| 4178 |
|
| 4179 |
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
| 4180 |
// helper function to determine if it is better to use BLAS or not
|
| 4181 |
// for large matrices, BLAS is faster
|
| 4182 |
+
static bool ggml_compute_forward_mul_mat_use_blas(
|
| 4183 |
const struct ggml_tensor * src0,
|
| 4184 |
const struct ggml_tensor * src1,
|
| 4185 |
struct ggml_tensor * dst) {
|
|
|
|
| 4198 |
|
| 4199 |
return false;
|
| 4200 |
}
|
| 4201 |
+
#endif
|
| 4202 |
|
| 4203 |
+
static void ggml_compute_forward_mul_mat_f32(
|
| 4204 |
const struct ggml_compute_params * params,
|
| 4205 |
const struct ggml_tensor * src0,
|
| 4206 |
const struct ggml_tensor * src1,
|
|
|
|
| 4443 |
//}
|
| 4444 |
}
|
| 4445 |
|
| 4446 |
+
static void ggml_compute_forward_mul_mat_f16_f32(
|
| 4447 |
const struct ggml_compute_params * params,
|
| 4448 |
const struct ggml_tensor * src0,
|
| 4449 |
const struct ggml_tensor * src1,
|
|
|
|
| 4747 |
//}
|
| 4748 |
}
|
| 4749 |
|
| 4750 |
+
static void ggml_compute_forward_mul_mat(
|
| 4751 |
const struct ggml_compute_params * params,
|
| 4752 |
const struct ggml_tensor * src0,
|
| 4753 |
const struct ggml_tensor * src1,
|
|
|
|
| 4773 |
|
| 4774 |
// ggml_compute_forward_scale
|
| 4775 |
|
| 4776 |
+
static void ggml_compute_forward_scale_f32(
|
| 4777 |
const struct ggml_compute_params * params,
|
| 4778 |
const struct ggml_tensor * src0,
|
| 4779 |
const struct ggml_tensor * src1,
|
|
|
|
| 4808 |
}
|
| 4809 |
}
|
| 4810 |
|
| 4811 |
+
static void ggml_compute_forward_scale(
|
| 4812 |
const struct ggml_compute_params * params,
|
| 4813 |
const struct ggml_tensor * src0,
|
| 4814 |
const struct ggml_tensor * src1,
|
|
|
|
| 4831 |
|
| 4832 |
// ggml_compute_forward_cpy
|
| 4833 |
|
| 4834 |
+
static void ggml_compute_forward_cpy(
|
| 4835 |
const struct ggml_compute_params * params,
|
| 4836 |
const struct ggml_tensor * src0,
|
| 4837 |
struct ggml_tensor * dst) {
|
|
|
|
| 4840 |
|
| 4841 |
// ggml_compute_forward_reshape
|
| 4842 |
|
| 4843 |
+
static void ggml_compute_forward_reshape(
|
| 4844 |
const struct ggml_compute_params * params,
|
| 4845 |
const struct ggml_tensor * src0,
|
| 4846 |
struct ggml_tensor * dst) {
|
|
|
|
| 4852 |
|
| 4853 |
// ggml_compute_forward_view
|
| 4854 |
|
| 4855 |
+
static void ggml_compute_forward_view(
|
| 4856 |
const struct ggml_compute_params * params,
|
| 4857 |
const struct ggml_tensor * src0) {
|
| 4858 |
// NOP
|
|
|
|
| 4862 |
|
| 4863 |
// ggml_compute_forward_permute
|
| 4864 |
|
| 4865 |
+
static void ggml_compute_forward_permute(
|
| 4866 |
const struct ggml_compute_params * params,
|
| 4867 |
const struct ggml_tensor * src0) {
|
| 4868 |
// NOP
|
|
|
|
| 4872 |
|
| 4873 |
// ggml_compute_forward_transpose
|
| 4874 |
|
| 4875 |
+
static void ggml_compute_forward_transpose(
|
| 4876 |
const struct ggml_compute_params * params,
|
| 4877 |
const struct ggml_tensor * src0) {
|
| 4878 |
// NOP
|
|
|
|
| 4882 |
|
| 4883 |
// ggml_compute_forward_get_rows
|
| 4884 |
|
| 4885 |
+
static void ggml_compute_forward_get_rows_f16(
|
| 4886 |
const struct ggml_compute_params * params,
|
| 4887 |
const struct ggml_tensor * src0,
|
| 4888 |
const struct ggml_tensor * src1,
|
|
|
|
| 4910 |
}
|
| 4911 |
}
|
| 4912 |
|
| 4913 |
+
static void ggml_compute_forward_get_rows_f32(
|
| 4914 |
const struct ggml_compute_params * params,
|
| 4915 |
const struct ggml_tensor * src0,
|
| 4916 |
const struct ggml_tensor * src1,
|
|
|
|
| 4937 |
}
|
| 4938 |
}
|
| 4939 |
|
| 4940 |
+
static void ggml_compute_forward_get_rows(
|
| 4941 |
const struct ggml_compute_params * params,
|
| 4942 |
const struct ggml_tensor * src0,
|
| 4943 |
const struct ggml_tensor * src1,
|
|
|
|
| 4963 |
|
| 4964 |
// ggml_compute_forward_diag_mask_inf
|
| 4965 |
|
| 4966 |
+
static void ggml_compute_forward_diag_mask_inf_f32(
|
| 4967 |
const struct ggml_compute_params * params,
|
| 4968 |
const struct ggml_tensor * src0,
|
| 4969 |
const struct ggml_tensor * src1,
|
|
|
|
| 4999 |
}
|
| 5000 |
}
|
| 5001 |
|
| 5002 |
+
static void ggml_compute_forward_diag_mask_inf(
|
| 5003 |
const struct ggml_compute_params * params,
|
| 5004 |
const struct ggml_tensor * src0,
|
| 5005 |
const struct ggml_tensor * src1,
|
|
|
|
| 5022 |
|
| 5023 |
// ggml_compute_forward_soft_max
|
| 5024 |
|
| 5025 |
+
static void ggml_compute_forward_soft_max_f32(
|
| 5026 |
const struct ggml_compute_params * params,
|
| 5027 |
const struct ggml_tensor * src0,
|
| 5028 |
struct ggml_tensor * dst) {
|
|
|
|
| 5093 |
}
|
| 5094 |
}
|
| 5095 |
|
| 5096 |
+
static void ggml_compute_forward_soft_max(
|
| 5097 |
const struct ggml_compute_params * params,
|
| 5098 |
const struct ggml_tensor * src0,
|
| 5099 |
struct ggml_tensor * dst) {
|
|
|
|
| 5115 |
|
| 5116 |
// ggml_compute_forward_rope
|
| 5117 |
|
| 5118 |
+
static void ggml_compute_forward_rope_f32(
|
| 5119 |
const struct ggml_compute_params * params,
|
| 5120 |
const struct ggml_tensor * src0,
|
| 5121 |
const struct ggml_tensor * src1,
|
|
|
|
| 5172 |
}
|
| 5173 |
}
|
| 5174 |
|
| 5175 |
+
static void ggml_compute_forward_rope(
|
| 5176 |
const struct ggml_compute_params * params,
|
| 5177 |
const struct ggml_tensor * src0,
|
| 5178 |
const struct ggml_tensor * src1,
|
|
|
|
| 5195 |
|
| 5196 |
// ggml_compute_forward_conv_1d_1s
|
| 5197 |
|
| 5198 |
+
static void ggml_compute_forward_conv_1d_1s_f16_f32(
|
| 5199 |
const struct ggml_compute_params * params,
|
| 5200 |
const struct ggml_tensor * src0,
|
| 5201 |
const struct ggml_tensor * src1,
|
|
|
|
| 5315 |
}
|
| 5316 |
}
|
| 5317 |
|
| 5318 |
+
static void ggml_compute_forward_conv_1d_1s_f32(
|
| 5319 |
const struct ggml_compute_params * params,
|
| 5320 |
const struct ggml_tensor * src0,
|
| 5321 |
const struct ggml_tensor * src1,
|
|
|
|
| 5435 |
}
|
| 5436 |
}
|
| 5437 |
|
| 5438 |
+
static void ggml_compute_forward_conv_1d_1s(
|
| 5439 |
const struct ggml_compute_params * params,
|
| 5440 |
const struct ggml_tensor * src0,
|
| 5441 |
const struct ggml_tensor * src1,
|
|
|
|
| 5461 |
|
| 5462 |
// ggml_compute_forward_conv_1d_2s
|
| 5463 |
|
| 5464 |
+
static void ggml_compute_forward_conv_1d_2s_f16_f32(
|
| 5465 |
const struct ggml_compute_params * params,
|
| 5466 |
const struct ggml_tensor * src0,
|
| 5467 |
const struct ggml_tensor * src1,
|
|
|
|
| 5581 |
}
|
| 5582 |
}
|
| 5583 |
|
| 5584 |
+
static void ggml_compute_forward_conv_1d_2s_f32(
|
| 5585 |
const struct ggml_compute_params * params,
|
| 5586 |
const struct ggml_tensor * src0,
|
| 5587 |
const struct ggml_tensor * src1,
|
|
|
|
| 5701 |
}
|
| 5702 |
}
|
| 5703 |
|
| 5704 |
+
static void ggml_compute_forward_conv_1d_2s(
|
| 5705 |
const struct ggml_compute_params * params,
|
| 5706 |
const struct ggml_tensor * src0,
|
| 5707 |
const struct ggml_tensor * src1,
|
|
|
|
| 5727 |
|
| 5728 |
// ggml_compute_forward_flash_attn
|
| 5729 |
|
| 5730 |
+
static void ggml_compute_forward_flash_attn_f32(
|
| 5731 |
const struct ggml_compute_params * params,
|
| 5732 |
const struct ggml_tensor * q,
|
| 5733 |
const struct ggml_tensor * k,
|
|
|
|
| 5908 |
}
|
| 5909 |
}
|
| 5910 |
|
| 5911 |
+
static void ggml_compute_forward_flash_attn_f16(
|
| 5912 |
const struct ggml_compute_params * params,
|
| 5913 |
const struct ggml_tensor * q,
|
| 5914 |
const struct ggml_tensor * k,
|
|
|
|
| 6095 |
}
|
| 6096 |
}
|
| 6097 |
|
| 6098 |
+
static void ggml_compute_forward_flash_attn(
|
| 6099 |
const struct ggml_compute_params * params,
|
| 6100 |
const struct ggml_tensor * q,
|
| 6101 |
const struct ggml_tensor * k,
|
|
|
|
| 6123 |
|
| 6124 |
// ggml_compute_forward_flash_ff
|
| 6125 |
|
| 6126 |
+
static void ggml_compute_forward_flash_ff_f16(
|
| 6127 |
const struct ggml_compute_params * params,
|
| 6128 |
const struct ggml_tensor * a, // F16
|
| 6129 |
const struct ggml_tensor * b0, // F16 fc_w
|
|
|
|
| 6303 |
}
|
| 6304 |
}
|
| 6305 |
|
| 6306 |
+
static void ggml_compute_forward_flash_ff(
|
| 6307 |
const struct ggml_compute_params * params,
|
| 6308 |
const struct ggml_tensor * a,
|
| 6309 |
const struct ggml_tensor * b0,
|
|
|
|
| 6332 |
|
| 6333 |
/////////////////////////////////
|
| 6334 |
|
| 6335 |
+
static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
|
| 6336 |
assert(params);
|
| 6337 |
|
| 6338 |
switch (tensor->op) {
|
|
|
|
| 6480 |
|
| 6481 |
////////////////////////////////////////////////////////////////////////////////
|
| 6482 |
|
| 6483 |
+
static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, bool inplace) {
|
| 6484 |
struct ggml_tensor * src0 = tensor->src0;
|
| 6485 |
struct ggml_tensor * src1 = tensor->src1;
|
| 6486 |
|
|
|
|
| 6724 |
}
|
| 6725 |
}
|
| 6726 |
|
| 6727 |
+
static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
|
| 6728 |
if (node->grad == NULL) {
|
| 6729 |
// this usually happens when we generate intermediate nodes from constants in the backward pass
|
| 6730 |
// it can also happen during forward pass, if the user performs computations with constants
|
|
|
|
| 6775 |
}
|
| 6776 |
}
|
| 6777 |
|
| 6778 |
+
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
|
| 6779 |
if (!expand) {
|
| 6780 |
cgraph->n_nodes = 0;
|
| 6781 |
cgraph->n_leafs = 0;
|
|
|
|
| 6886 |
|
| 6887 |
#define GGML_LOCK_INITIALIZER 0
|
| 6888 |
|
| 6889 |
+
typedef pthread_t ggml_thread_t;
|
| 6890 |
+
|
| 6891 |
+
#define ggml_thread_create pthread_create
|
| 6892 |
+
#define ggml_thread_join pthread_join
|
| 6893 |
+
|
| 6894 |
#else
|
| 6895 |
|
| 6896 |
//typedef pthread_spinlock_t ggml_lock_t;
|
|
|
|
| 6909 |
|
| 6910 |
#define GGML_LOCK_INITIALIZER 0
|
| 6911 |
|
| 6912 |
+
typedef pthread_t ggml_thread_t;
|
| 6913 |
+
|
| 6914 |
+
#define ggml_thread_create pthread_create
|
| 6915 |
+
#define ggml_thread_join pthread_join
|
| 6916 |
+
|
| 6917 |
#endif
|
| 6918 |
|
| 6919 |
struct ggml_compute_state_shared {
|
|
|
|
| 6928 |
};
|
| 6929 |
|
| 6930 |
struct ggml_compute_state {
|
| 6931 |
+
ggml_thread_t thrd;
|
| 6932 |
|
| 6933 |
struct ggml_compute_params params;
|
| 6934 |
struct ggml_tensor * node;
|
|
|
|
| 6936 |
struct ggml_compute_state_shared * shared;
|
| 6937 |
};
|
| 6938 |
|
| 6939 |
+
static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6940 |
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
|
| 6941 |
|
| 6942 |
const int n_threads = state->shared->n_threads;
|
|
|
|
| 7016 |
.node = NULL,
|
| 7017 |
.shared = &state_shared,
|
| 7018 |
};
|
| 7019 |
+
int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]);
|
| 7020 |
assert(rc == 0);
|
| 7021 |
UNUSED(rc);
|
| 7022 |
}
|
|
|
|
| 7360 |
atomic_store(&state_shared.has_work, true);
|
| 7361 |
|
| 7362 |
for (int j = 0; j < n_threads - 1; j++) {
|
| 7363 |
+
int rc = ggml_thread_join(workers[j].thrd, NULL);
|
| 7364 |
assert(rc == 0);
|
| 7365 |
UNUSED(rc);
|
| 7366 |
}
|
|
|
|
| 7438 |
}
|
| 7439 |
|
| 7440 |
// check if node is part of the graph
|
| 7441 |
+
static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
|
| 7442 |
if (cgraph == NULL) {
|
| 7443 |
return true;
|
| 7444 |
}
|
|
|
|
| 7452 |
return false;
|
| 7453 |
}
|
| 7454 |
|
| 7455 |
+
static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
|
| 7456 |
for (int i = 0; i < cgraph->n_nodes; i++) {
|
| 7457 |
struct ggml_tensor * parent = cgraph->nodes[i];
|
| 7458 |
|
|
|
|
| 7581 |
|
| 7582 |
////////////////////////////////////////////////////////////////////////////////
|
| 7583 |
|
| 7584 |
+
static void ggml_opt_set_params(int np, struct ggml_tensor * const ps[], const float * x) {
|
| 7585 |
int i = 0;
|
| 7586 |
for (int p = 0; p < np; ++p) {
|
| 7587 |
const int ne = ggml_nelements(ps[p]) ;
|
|
|
|
| 7592 |
}
|
| 7593 |
}
|
| 7594 |
|
| 7595 |
+
static void ggml_opt_get_params(int np, struct ggml_tensor * const ps[], float * x) {
|
| 7596 |
int i = 0;
|
| 7597 |
for (int p = 0; p < np; ++p) {
|
| 7598 |
const int ne = ggml_nelements(ps[p]) ;
|
|
|
|
| 7603 |
}
|
| 7604 |
}
|
| 7605 |
|
| 7606 |
+
static void ggml_opt_get_grad(int np, struct ggml_tensor * const ps[], float * g) {
|
| 7607 |
int i = 0;
|
| 7608 |
for (int p = 0; p < np; ++p) {
|
| 7609 |
const int ne = ggml_nelements(ps[p]) ;
|
|
|
|
| 7620 |
// ref: https://arxiv.org/pdf/1412.6980.pdf
|
| 7621 |
//
|
| 7622 |
|
| 7623 |
+
static enum ggml_opt_result ggml_opt_adam(
|
| 7624 |
struct ggml_context * ctx,
|
| 7625 |
struct ggml_opt_params params,
|
| 7626 |
struct ggml_tensor * f,
|
|
|
|
| 7913 |
return GGML_LINESEARCH_FAIL;
|
| 7914 |
}
|
| 7915 |
|
| 7916 |
+
static enum ggml_opt_result ggml_opt_lbfgs(
|
| 7917 |
struct ggml_context * ctx,
|
| 7918 |
struct ggml_opt_params params,
|
| 7919 |
struct ggml_tensor * f,
|