Spaces:
Sleeping
Sleeping
ggml : avoid duplicating function calls using MIN/MAX macros (llama/5325)
Browse files* Avoid duplicating function calls when using MIN/MAX macros.
Since these copy "a" and "b" they ask the compiler to evaluate one of them twice. The compiler doesn't have a problem with removing the duplication in something like MAX(0, x + 2), but in some cases we're calling functions, and those calls just happen twice.
By explicitly evaluating at the expression we get smaller and faster code without duplicate calls. See ggml_rope_yarn_corr_dims in Compiler Explorer:
https://godbolt.org/z/Ee4KMrvKh
Code behaves exactly the same.
* Update ggml.c
---------
Co-authored-by: Georgi Gerganov <[email protected]>
ggml.c
CHANGED
|
@@ -2470,7 +2470,8 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
|
| 2470 |
size_t max_size = 0;
|
| 2471 |
|
| 2472 |
for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
|
| 2473 |
-
|
|
|
|
| 2474 |
}
|
| 2475 |
|
| 2476 |
return max_size;
|
|
@@ -11887,8 +11888,10 @@ GGML_CALL void ggml_rope_yarn_corr_dims(
|
|
| 11887 |
int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]
|
| 11888 |
) {
|
| 11889 |
// start and end correction dims
|
| 11890 |
-
|
| 11891 |
-
|
|
|
|
|
|
|
| 11892 |
}
|
| 11893 |
|
| 11894 |
static void ggml_compute_forward_rope_f32(
|
|
|
|
| 2470 |
size_t max_size = 0;
|
| 2471 |
|
| 2472 |
for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
|
| 2473 |
+
size_t bytes = ggml_nbytes(tensor);
|
| 2474 |
+
max_size = MAX(max_size, bytes);
|
| 2475 |
}
|
| 2476 |
|
| 2477 |
return max_size;
|
|
|
|
| 11888 |
int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]
|
| 11889 |
) {
|
| 11890 |
// start and end correction dims
|
| 11891 |
+
float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_fast, freq_base));
|
| 11892 |
+
float end = ceilf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_slow, freq_base));
|
| 11893 |
+
dims[0] = MAX(0, start);
|
| 11894 |
+
dims[1] = MIN(n_dims - 1, end);
|
| 11895 |
}
|
| 11896 |
|
| 11897 |
static void ggml_compute_forward_rope_f32(
|