ggerganov commited on
Commit
96829a5
·
unverified ·
1 Parent(s): 2110f23

whisper : improve decoding strategy (#244)

Browse files

- Clear past prompt when there is very short audio left for processing.
My observation is that in these cases the decoding tends to repeat and
hallucinate stuff and I think this is induced by the existing prompt
- When we fail to sample timestamp token, retry by clearing the past
prompt. If it fails again, then we advance the window by 1 second

Files changed (1) hide show
  1. whisper.cpp +15 -2
whisper.cpp CHANGED
@@ -2650,10 +2650,17 @@ int whisper_full(
2650
  }
2651
  }
2652
 
 
2653
  if (seek + 100 >= seek_end) {
2654
  break;
2655
  }
2656
 
 
 
 
 
 
 
2657
  if (params.encoder_begin_callback) {
2658
  if (params.encoder_begin_callback(ctx, params.encoder_begin_callback_user_data) == false) {
2659
  fprintf(stderr, "%s: encoder_begin_callback returned false - aborting\n", __func__);
@@ -2780,8 +2787,14 @@ int whisper_full(
2780
  }
2781
 
2782
  if (failed) {
2783
- fprintf(stderr, "\n%s: failed to generate timestamp token - using fallback strategy\n\n", __func__);
2784
- seek += 100;
 
 
 
 
 
 
2785
  continue;
2786
  }
2787
 
 
2650
  }
2651
  }
2652
 
2653
+ // of only 1 second left, then stop
2654
  if (seek + 100 >= seek_end) {
2655
  break;
2656
  }
2657
 
2658
+ // if there is a very short audio segment left to process, we remove any past prompt since it tends
2659
+ // to confuse the decoder and often make it repeat or hallucinate stuff
2660
+ if (seek > seek_start && seek + 500 >= seek_end) {
2661
+ prompt_past.clear();
2662
+ }
2663
+
2664
  if (params.encoder_begin_callback) {
2665
  if (params.encoder_begin_callback(ctx, params.encoder_begin_callback_user_data) == false) {
2666
  fprintf(stderr, "%s: encoder_begin_callback returned false - aborting\n", __func__);
 
2787
  }
2788
 
2789
  if (failed) {
2790
+ // when we fail to sample timestamp token, retry by clearing the past prompt
2791
+ // if it fails again, then we advance the window by 1 second
2792
+ if (prompt_past.size() > 0) {
2793
+ prompt_past.clear();
2794
+ } else {
2795
+ fprintf(stderr, "\n%s: failed to generate timestamp token - skipping one second\n\n", __func__);
2796
+ seek += 100;
2797
+ }
2798
  continue;
2799
  }
2800