|
import gradio as gr |
|
import torch |
|
import torchaudio |
|
import yaml |
|
from huggingface_hub import hf_hub_download |
|
import os |
|
import numpy as np |
|
|
|
|
|
with open('config.yaml', 'r') as f: |
|
config = yaml.safe_load(f) |
|
|
|
|
|
def load_model(): |
|
|
|
repo_id = "GaboxR67/MelBandRoformers" |
|
filename = "melbandroformers/instrumental/Inst_ExperimentalV1.ckpt" |
|
|
|
|
|
checkpoint_path = hf_hub_download(repo_id=repo_id, filename=filename) |
|
|
|
|
|
checkpoint = torch.load(checkpoint_path, map_location='cpu') |
|
|
|
|
|
|
|
|
|
|
|
return model |
|
|
|
|
|
model = load_model() |
|
|
|
def separate_audio(audio_file): |
|
""" |
|
Process audio file and separate instrumental/vocals |
|
""" |
|
|
|
waveform, sample_rate = torchaudio.load(audio_file) |
|
|
|
|
|
if sample_rate != config['sample_rate']: |
|
resampler = torchaudio.transforms.Resample(sample_rate, config['sample_rate']) |
|
waveform = resampler(waveform) |
|
|
|
|
|
with torch.no_grad(): |
|
|
|
|
|
instrumental = model(waveform) |
|
|
|
|
|
output_path = "output_instrumental.wav" |
|
torchaudio.save(output_path, instrumental, config['sample_rate']) |
|
|
|
return output_path |
|
|
|
|
|
iface = gr.Interface( |
|
fn=separate_audio, |
|
inputs=gr.Audio(type="filepath", label="Upload Audio"), |
|
outputs=gr.Audio(label="Instrumental Output"), |
|
title="MelBand Roformer Audio Separation", |
|
description="Separate instrumental from vocals using MelBand Roformer model" |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |