instgab / app.py
Martin
Initial commit
b3a1370
import gradio as gr
import torch
import torchaudio
import yaml
from huggingface_hub import hf_hub_download
import os
import numpy as np
# Load configuration
with open('config.yaml', 'r') as f:
config = yaml.safe_load(f)
# Define model loading function
def load_model():
# Specify the repository and file path of your checkpoint
repo_id = "GaboxR67/MelBandRoformers" # Replace with actual repo
filename = "melbandroformers/instrumental/Inst_ExperimentalV1.ckpt" # Replace with actual path
# Download the checkpoint from Hugging Face
checkpoint_path = hf_hub_download(repo_id=repo_id, filename=filename)
# Load the checkpoint
checkpoint = torch.load(checkpoint_path, map_location='cpu')
# Initialize your model here based on the MelBandRoformer architecture
# This part depends on the exact model implementation
# You'll need to import or define the MelBandRoformer class
return model # Return the loaded model
# Initialize model
model = load_model()
def separate_audio(audio_file):
"""
Process audio file and separate instrumental/vocals
"""
# Load audio
waveform, sample_rate = torchaudio.load(audio_file)
# Resample if necessary
if sample_rate != config['sample_rate']:
resampler = torchaudio.transforms.Resample(sample_rate, config['sample_rate'])
waveform = resampler(waveform)
# Process with model
with torch.no_grad():
# Add your inference code here
# This will depend on the exact model implementation
instrumental = model(waveform)
# Save output
output_path = "output_instrumental.wav"
torchaudio.save(output_path, instrumental, config['sample_rate'])
return output_path
# Create Gradio interface
iface = gr.Interface(
fn=separate_audio,
inputs=gr.Audio(type="filepath", label="Upload Audio"),
outputs=gr.Audio(label="Instrumental Output"),
title="MelBand Roformer Audio Separation",
description="Separate instrumental from vocals using MelBand Roformer model"
)
if __name__ == "__main__":
iface.launch()