ggerganov commited on
Commit
982cdf8
·
unverified ·
1 Parent(s): 1e5ddb0

talk-llama : fix new rope interface

Browse files
Files changed (1) hide show
  1. examples/talk-llama/llama.cpp +2 -9
examples/talk-llama/llama.cpp CHANGED
@@ -281,13 +281,6 @@ static T checked_mul(T a, T b) {
281
  return ret;
282
  }
283
 
284
- static size_t checked_div(size_t a, size_t b) {
285
- if (b == 0 || a % b != 0) {
286
- throw format("error dividing %zu / %zu", a, b);
287
- }
288
- return a / b;
289
- }
290
-
291
  static std::string llama_format_tensor_shape(const std::vector<uint32_t> & ne) {
292
  char buf[256];
293
  snprintf(buf, sizeof(buf), "%5u", ne.at(0));
@@ -1237,8 +1230,8 @@ static bool llama_eval_internal(
1237
  // self-attention
1238
  {
1239
  // compute Q and K and RoPE them
1240
- struct ggml_tensor * Qcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
1241
- struct ggml_tensor * Kcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wk, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
1242
  ggml_set_name(Qcur, "Qcur");
1243
  ggml_set_name(Kcur, "Kcur");
1244
 
 
281
  return ret;
282
  }
283
 
 
 
 
 
 
 
 
284
  static std::string llama_format_tensor_shape(const std::vector<uint32_t> & ne) {
285
  char buf[256];
286
  snprintf(buf, sizeof(buf), "%5u", ne.at(0));
 
1230
  // self-attention
1231
  {
1232
  // compute Q and K and RoPE them
1233
+ struct ggml_tensor * Qcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0, 0);
1234
+ struct ggml_tensor * Kcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wk, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0, 0);
1235
  ggml_set_name(Qcur, "Qcur");
1236
  ggml_set_name(Kcur, "Kcur");
1237