import torch import torch.nn as nn import torch.nn.functional as F import json import os class ResidualBlock(nn.Module): def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)): super().__init__() self.convs1 = nn.ModuleList([ nn.Conv1d(channels, channels, kernel_size, 1, dilation=d, padding=d) for d in dilation ]) self.convs2 = nn.ModuleList([ nn.Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=1) for _ in dilation ]) def forward(self, x): for c1, c2 in zip(self.convs1, self.convs2): xt = F.leaky_relu(x, 0.1) xt = c1(xt) xt = F.leaky_relu(xt, 0.1) xt = c2(xt) x = xt + x return x class RVCModel(nn.Module): def __init__(self, config): super().__init__() self.config = config model_cfg = config["model"] self.encoder = nn.Sequential( nn.Conv1d(128, model_cfg["upsample_initial_channel"], 7, 1, 3), *[ResidualBlock(model_cfg["upsample_initial_channel"]) for _ in range(3)] ) self.decoder = nn.Sequential( nn.Conv1d(model_cfg["upsample_initial_channel"], 128, 7, 1, 3), ) def forward(self, x): encoded = self.encoder(x) decoded = self.decoder(encoded) return decoded def convert_voice(self, audio_path): return audio_path @classmethod def from_pretrained(cls, model_path): config_path = os.path.join(model_path, "config.json") with open(config_path, "r") as f: config = json.load(f) model = cls(config) model_file = os.path.join(model_path, "model.pth") if os.path.exists(model_file): model.load_state_dict(torch.load(model_file, map_location="cpu")) return model