Spaces:
Sleeping
Sleeping
whisper : improve decoding strategy (#244)
Browse files- Clear past prompt when there is very short audio left for processing.
My observation is that in these cases the decoding tends to repeat and
hallucinate stuff and I think this is induced by the existing prompt
- When we fail to sample timestamp token, retry by clearing the past
prompt. If it fails again, then we advance the window by 1 second
- whisper.cpp +15 -2
whisper.cpp
CHANGED
|
@@ -2650,10 +2650,17 @@ int whisper_full(
|
|
| 2650 |
}
|
| 2651 |
}
|
| 2652 |
|
|
|
|
| 2653 |
if (seek + 100 >= seek_end) {
|
| 2654 |
break;
|
| 2655 |
}
|
| 2656 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2657 |
if (params.encoder_begin_callback) {
|
| 2658 |
if (params.encoder_begin_callback(ctx, params.encoder_begin_callback_user_data) == false) {
|
| 2659 |
fprintf(stderr, "%s: encoder_begin_callback returned false - aborting\n", __func__);
|
|
@@ -2780,8 +2787,14 @@ int whisper_full(
|
|
| 2780 |
}
|
| 2781 |
|
| 2782 |
if (failed) {
|
| 2783 |
-
|
| 2784 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2785 |
continue;
|
| 2786 |
}
|
| 2787 |
|
|
|
|
| 2650 |
}
|
| 2651 |
}
|
| 2652 |
|
| 2653 |
+
// of only 1 second left, then stop
|
| 2654 |
if (seek + 100 >= seek_end) {
|
| 2655 |
break;
|
| 2656 |
}
|
| 2657 |
|
| 2658 |
+
// if there is a very short audio segment left to process, we remove any past prompt since it tends
|
| 2659 |
+
// to confuse the decoder and often make it repeat or hallucinate stuff
|
| 2660 |
+
if (seek > seek_start && seek + 500 >= seek_end) {
|
| 2661 |
+
prompt_past.clear();
|
| 2662 |
+
}
|
| 2663 |
+
|
| 2664 |
if (params.encoder_begin_callback) {
|
| 2665 |
if (params.encoder_begin_callback(ctx, params.encoder_begin_callback_user_data) == false) {
|
| 2666 |
fprintf(stderr, "%s: encoder_begin_callback returned false - aborting\n", __func__);
|
|
|
|
| 2787 |
}
|
| 2788 |
|
| 2789 |
if (failed) {
|
| 2790 |
+
// when we fail to sample timestamp token, retry by clearing the past prompt
|
| 2791 |
+
// if it fails again, then we advance the window by 1 second
|
| 2792 |
+
if (prompt_past.size() > 0) {
|
| 2793 |
+
prompt_past.clear();
|
| 2794 |
+
} else {
|
| 2795 |
+
fprintf(stderr, "\n%s: failed to generate timestamp token - skipping one second\n\n", __func__);
|
| 2796 |
+
seek += 100;
|
| 2797 |
+
}
|
| 2798 |
continue;
|
| 2799 |
}
|
| 2800 |
|