Spaces:

natasa365
/

whisper.cpp

Running

App Files Files Community

semiformal-net Paul Edwards

ggerganov commited on Dec 1, 2022

Commit

b3a9b29

unverified ·

1 Parent(s): ecd7237

livestream : fix losing words across audio chunk (#195)

Browse files

* improve livestream script

* Update examples/livestream.sh

Co-authored-by: Georgi Gerganov <[email protected]>

Co-authored-by: Paul Edwards <[email protected]>
Co-authored-by: Georgi Gerganov <[email protected]>

Files changed (1) hide show

examples/livestream.sh +33 -13

examples/livestream.sh CHANGED Viewed

@@ -1,5 +1,5 @@
 #!/bin/bash
 # Transcribe audio livestream by feeding ffmpeg output to whisper.cpp at regular intervals
 # Idea by @semiformal-net
 # ref: https://github.com/ggerganov/whisper.cpp/issues/185
@@ -10,14 +10,15 @@
 #
 url="http://a.files.bbci.co.uk/media/live/manifesto/audio/simulcast/hls/nonuk/sbr_low/ak/bbc_world_service.m3u8"
-step_ms=10000
 model="base.en"
 if [ -z "$1" ]; then
-    echo "Usage: $0 stream_url [step_ms] [model]"
     echo ""
     echo "  Example:"
-    echo "    $0 $url $step_ms $model"
     echo ""
     echo "No url specified, using default: $url"
 else
@@ -25,7 +26,7 @@ else
 fi
 if [ -n "$2" ]; then
-    step_ms="$2"
 fi
 if [ -n "$3" ]; then
@@ -54,16 +55,35 @@ fi
 running=1
-trap "running=0" SIGINT SIGTERM
-printf "[+] Transcribing stream with model '$model', step_ms $step_ms (press Ctrl+C to stop):\n\n"
 while [ $running -eq 1 ]; do
-    ffmpeg -y -re -probesize 32 -i $url -ar 16000 -ac 1 -c:a pcm_s16le -t ${step_ms}ms /tmp/whisper-live0.wav > /dev/null 2> /tmp/whisper-live.err
-    if [ $? -ne 0 ]; then
-        printf "Error: ffmpeg failed to capture audio stream\n"
-        exit 1
     fi
-    mv /tmp/whisper-live0.wav /tmp/whisper-live.wav
-    ./main -t 8 -m ./models/ggml-small.en.bin -f /tmp/whisper-live.wav --no-timestamps -otxt 2> /tmp/whispererr | tail -n 1 &
 done

 #!/bin/bash
+set -eo pipefail
 # Transcribe audio livestream by feeding ffmpeg output to whisper.cpp at regular intervals
 # Idea by @semiformal-net
 # ref: https://github.com/ggerganov/whisper.cpp/issues/185
 #
 url="http://a.files.bbci.co.uk/media/live/manifesto/audio/simulcast/hls/nonuk/sbr_low/ak/bbc_world_service.m3u8"
+fmt=aac # the audio format extension of the stream (TODO: auto detect)
+step_s=30
 model="base.en"
 if [ -z "$1" ]; then
+    echo "Usage: $0 stream_url [step_s] [model]"
     echo ""
     echo "  Example:"
+    echo "    $0 $url $step_s $model"
     echo ""
     echo "No url specified, using default: $url"
 else
 fi
 if [ -n "$2" ]; then
+    step_s="$2"
 fi
 if [ -n "$3" ]; then
 running=1
+#trap "running=0" SIGINT SIGTERM
+printf "[+] Transcribing stream with model '$model', step_s $step_s (press Ctrl+C to stop):\n\n"
+# continuous stream in native fmt (this file will grow forever!)
+ffmpeg -loglevel quiet -y -re -probesize 32 -i $url -c copy /tmp/whisper-live0.${fmt}  &
+if [ $? -ne 0 ]; then
+    printf "Error: ffmpeg failed to capture audio stream\n"
+    exit 1
+fi
+printf "Buffering audio. Please wait...\n"
+# For some reason, the initial buffer can end up smaller than step_s (even though we sleep for step_s)
+sleep $(($step_s*2))
+i=0
 while [ $running -eq 1 ]; do
+    # a handy bash built-in, SECONDS,
+    # > "This variable expands to the number of seconds since the shell was started. Assignment to this variable resets the count to the value assigned, and the expanded value becomes the value assigned
+    # > plus the number of seconds since the assignment."
+    SECONDS=0
+    # extract the next piece from the main file above and transcode to wav. -ss sets start time and nudges it by -0.5s to catch missing words (??)
+    if [ $i -gt 0 ]; then
+        ffmpeg -loglevel quiet -noaccurate_seek -i /tmp/whisper-live0.${fmt} -y -ar 16000 -ac 1 -c:a pcm_s16le -ss $(($i*$step_s-1)).5 -t $step_s /tmp/whisper-live.wav
+    else
+        ffmpeg -loglevel quiet -noaccurate_seek -i /tmp/whisper-live0.${fmt} -y -ar 16000 -ac 1 -c:a pcm_s16le -ss $(($i*$step_s)) -t $step_s /tmp/whisper-live.wav
     fi
+    ./main -t 8 -m ./models/ggml-base.en.bin -f /tmp/whisper-live.wav --no-timestamps -otxt 2> /tmp/whispererr | tail -n 1
+    echo
+    while [ $SECONDS -lt $step_s ]; do
+        sleep 1
+    done
+    ((i=i+1))
 done