Spaces:
Sleeping
Sleeping
import os | |
from speechbrain.inference.separation import SepformerSeparation as separator | |
import torchaudio | |
import gradio as gr | |
from moviepy.editor import VideoFileClip | |
def convert_video_to_audio(video_input): | |
video_clip = VideoFileClip(video_input) | |
audio_clip = video_clip.audio | |
audio_clip_filepath = os.path.normpath(f"{video_input.split('.')[0]}.m4a") | |
audio_clip.write_audiofile(audio_clip_filepath, codec='aac') | |
audio_clip.close() | |
video_clip.close() | |
return audio_clip_filepath | |
def speechbrain(input_obj, input_obj_type): | |
model = separator.from_hparams(source="speechbrain/sepformer-whamr-enhancement", savedir='pretrained_models/sepformer-whamr-enhancement') | |
if input_obj_type == "video": | |
aud = convert_video_to_audio(input_obj) | |
else: | |
aud = input_obj | |
est_sources = model.separate_file(path=aud) | |
torchaudio.save("clean_audio_file.wav", est_sources[:, :, 0].detach().cpu(), 8000) | |
return "clean_audio_file.wav" | |
def main(): | |
with gr.Blocks(title="Speech Enhancement", delete_cache=(86400, 86400), theme=gr.themes.Ocean()) as demo: | |
gr.Markdown("Gradio demo for Speech Enhancement by SpeechBrain. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below.") | |
gr.Markdown("<p style='text-align: center'><a href='https://arxiv.org/abs/2010.13154' target='_blank'>Attention is All You Need in Speech Separation</a> | <a href='https://github.com/speechbrain/speechbrain/tree/develop/templates/enhancement' '_blank'>Github Repo</a></p>") | |
# examples = [ | |
# ['samples_audio_samples_test_mixture.wav'] | |
# ] | |
with gr.Tabs(selected="video") as tabs: | |
with gr.Tab("Video", id="video"): | |
gr.Interface( | |
fn=speechbrain, | |
inputs= [ | |
gr.Video(), | |
gr.Radio(choices=["video"], value="video", label="File Type") | |
], | |
outputs= [ | |
gr.Audio(label="Output Audio", type="filepath") | |
] | |
) | |
with gr.Tab("Audio", id="audio"): | |
gr.Interface( | |
fn=speechbrain, | |
inputs=[ | |
gr.Audio(type="filepath"), | |
gr.Radio(choices=["audio"], value="audio", label="File Type") | |
], | |
outputs=[ | |
gr.Audio(label="Output Audio", type="filepath") | |
] | |
) | |
demo.launch() | |
if __name__ == '__main__': | |
main() |