ggerganov commited on
Commit
bfa259c
·
unverified ·
1 Parent(s): 2daf96b

whisper : improve printfs

Browse files
Files changed (1) hide show
  1. whisper.cpp +14 -13
whisper.cpp CHANGED
@@ -518,15 +518,6 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
518
  wctx.buf_memory.resize(MEM_REQ_MEMORY.at(model.type));
519
  wctx.buf_compute.resize(std::max(MEM_REQ_ENCODE.at(model.type), MEM_REQ_DECODE.at(model.type)));
520
  wctx.buf_compute_layer.resize(std::max(MEM_REQ_ENCODE_LAYER.at(model.type), MEM_REQ_DECODE_LAYER.at(model.type)));
521
-
522
- // this is the total memory required to run the inference
523
- const size_t mem_required =
524
- wctx.buf_model->size() +
525
- wctx.buf_memory.size() +
526
- wctx.buf_compute.size() +
527
- wctx.buf_compute_layer.size();
528
-
529
- fprintf(stderr, "%s: mem_required = %.2f MB\n", __func__, mem_required / 1024.0 / 1024.0);
530
  }
531
 
532
  // load mel filters
@@ -599,11 +590,21 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
599
  }
600
  }
601
 
 
 
 
 
 
 
 
 
 
 
 
602
  // for the big tensors, we have the option to store the data in 16-bit floats
603
  // in order to save memory and also to speed up the computation
604
  const ggml_type wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
605
 
606
-
607
  size_t ctx_size = 0;
608
  size_t ctx_mem_size = 0;
609
 
@@ -722,7 +723,7 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
722
 
723
  ctx_size += (15 + 15*n_audio_layer + 24*n_text_layer)*256; // object overhead
724
 
725
- fprintf(stderr, "%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
726
  }
727
 
728
  // create the ggml context
@@ -983,7 +984,7 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
983
  ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v) +
984
  ggml_nbytes(model.memory_cross_k) + ggml_nbytes(model.memory_cross_v);
985
 
986
- fprintf(stderr, "%s: memory size = %8.2f MB\n", __func__, memory_size/1024.0/1024.0);
987
  }
988
 
989
  // load weights
@@ -1047,7 +1048,7 @@ static bool whisper_model_load(const std::string & fname, whisper_context & wctx
1047
  model.n_loaded++;
1048
  }
1049
 
1050
- fprintf(stderr, "%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0);
1051
 
1052
  if (model.n_loaded == 0) {
1053
  fprintf(stderr, "%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);
 
518
  wctx.buf_memory.resize(MEM_REQ_MEMORY.at(model.type));
519
  wctx.buf_compute.resize(std::max(MEM_REQ_ENCODE.at(model.type), MEM_REQ_DECODE.at(model.type)));
520
  wctx.buf_compute_layer.resize(std::max(MEM_REQ_ENCODE_LAYER.at(model.type), MEM_REQ_DECODE_LAYER.at(model.type)));
 
 
 
 
 
 
 
 
 
521
  }
522
 
523
  // load mel filters
 
590
  }
591
  }
592
 
593
+ {
594
+ // this is the total memory required to run the inference
595
+ const size_t mem_required =
596
+ wctx.buf_model->size() +
597
+ wctx.buf_memory.size() +
598
+ wctx.buf_compute.size() +
599
+ wctx.buf_compute_layer.size();
600
+
601
+ fprintf(stderr, "%s: mem_required = %7.2f MB\n", __func__, mem_required / 1024.0 / 1024.0);
602
+ }
603
+
604
  // for the big tensors, we have the option to store the data in 16-bit floats
605
  // in order to save memory and also to speed up the computation
606
  const ggml_type wtype = model.hparams.f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
607
 
 
608
  size_t ctx_size = 0;
609
  size_t ctx_mem_size = 0;
610
 
 
723
 
724
  ctx_size += (15 + 15*n_audio_layer + 24*n_text_layer)*256; // object overhead
725
 
726
+ fprintf(stderr, "%s: ggml ctx size = %7.2f MB\n", __func__, ctx_size/(1024.0*1024.0));
727
  }
728
 
729
  // create the ggml context
 
984
  ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v) +
985
  ggml_nbytes(model.memory_cross_k) + ggml_nbytes(model.memory_cross_v);
986
 
987
+ fprintf(stderr, "%s: memory size = %7.2f MB\n", __func__, memory_size/1024.0/1024.0);
988
  }
989
 
990
  // load weights
 
1048
  model.n_loaded++;
1049
  }
1050
 
1051
+ fprintf(stderr, "%s: model size = %7.2f MB\n", __func__, total_size/1024.0/1024.0);
1052
 
1053
  if (model.n_loaded == 0) {
1054
  fprintf(stderr, "%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);