Dr. Tom Murphy VII Ph.D ggerganov commited on
Commit
9bb2b0a
·
unverified ·
1 Parent(s): 11e5f6b

ggml : avoid duplicating function calls using MIN/MAX macros (llama/5325)

Browse files

* Avoid duplicating function calls when using MIN/MAX macros.

Since these copy "a" and "b" they ask the compiler to evaluate one of them twice. The compiler doesn't have a problem with removing the duplication in something like MAX(0, x + 2), but in some cases we're calling functions, and those calls just happen twice.
By explicitly evaluating at the expression we get smaller and faster code without duplicate calls. See ggml_rope_yarn_corr_dims in Compiler Explorer:

https://godbolt.org/z/Ee4KMrvKh

Code behaves exactly the same.

* Update ggml.c

---------

Co-authored-by: Georgi Gerganov <[email protected]>

Files changed (1) hide show
  1. ggml.c +6 -3
ggml.c CHANGED
@@ -2470,7 +2470,8 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
2470
  size_t max_size = 0;
2471
 
2472
  for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
2473
- max_size = MAX(max_size, ggml_nbytes(tensor));
 
2474
  }
2475
 
2476
  return max_size;
@@ -11887,8 +11888,10 @@ GGML_CALL void ggml_rope_yarn_corr_dims(
11887
  int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]
11888
  ) {
11889
  // start and end correction dims
11890
- dims[0] = MAX(0, floorf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_fast, freq_base)));
11891
- dims[1] = MIN(n_dims - 1, ceilf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_slow, freq_base)));
 
 
11892
  }
11893
 
11894
  static void ggml_compute_forward_rope_f32(
 
2470
  size_t max_size = 0;
2471
 
2472
  for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
2473
+ size_t bytes = ggml_nbytes(tensor);
2474
+ max_size = MAX(max_size, bytes);
2475
  }
2476
 
2477
  return max_size;
 
11888
  int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]
11889
  ) {
11890
  // start and end correction dims
11891
+ float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_fast, freq_base));
11892
+ float end = ceilf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_slow, freq_base));
11893
+ dims[0] = MAX(0, start);
11894
+ dims[1] = MIN(n_dims - 1, end);
11895
  }
11896
 
11897
  static void ggml_compute_forward_rope_f32(