Spaces:
Runtime error
Runtime error
| import importlib | |
| from inspect import isfunction | |
| import os | |
| import soundfile as sf | |
| def seed_everything(seed): | |
| import random, os | |
| import numpy as np | |
| import torch | |
| random.seed(seed) | |
| os.environ['PYTHONHASHSEED'] = str(seed) | |
| np.random.seed(seed) | |
| torch.manual_seed(seed) | |
| torch.cuda.manual_seed(seed) | |
| torch.backends.cudnn.deterministic = True | |
| torch.backends.cudnn.benchmark = True | |
| def save_wave(waveform, savepath, name="outwav"): | |
| if type(name) is not list: | |
| name = [name] * waveform.shape[0] | |
| for i in range(waveform.shape[0]): | |
| path = os.path.join( | |
| savepath, | |
| "%s_%s.wav" | |
| % ( | |
| os.path.basename(name[i]) | |
| if (not ".wav" in name[i]) | |
| else os.path.basename(name[i]).split(".")[0], | |
| i, | |
| ), | |
| ) | |
| sf.write(path, waveform[i, 0], samplerate=16000) | |
| def exists(x): | |
| return x is not None | |
| def default(val, d): | |
| if exists(val): | |
| return val | |
| return d() if isfunction(d) else d | |
| def count_params(model, verbose=False): | |
| total_params = sum(p.numel() for p in model.parameters()) | |
| if verbose: | |
| print(f"{model.__class__.__name__} has {total_params * 1.e-6:.2f} M params.") | |
| return total_params | |
| def get_obj_from_str(string, reload=False): | |
| module, cls = string.rsplit(".", 1) | |
| if reload: | |
| module_imp = importlib.import_module(module) | |
| importlib.reload(module_imp) | |
| return getattr(importlib.import_module(module, package=None), cls) | |
| def instantiate_from_config(config): | |
| if not "target" in config: | |
| if config == "__is_first_stage__": | |
| return None | |
| elif config == "__is_unconditional__": | |
| return None | |
| raise KeyError("Expected key `target` to instantiate.") | |
| return get_obj_from_str(config["target"])(**config.get("params", dict())) | |
| def default_audioldm_config(): | |
| return {'wave_file_save_path': './output', 'id': {'version': 'v1', 'name': 'default', 'root': '/mnt/fast/nobackup/users/hl01486/projects/general_audio_generation/AudioLDM-python/config/default/latent_diffusion.yaml'}, 'model': {'device': 'cuda', 'reload_from_ckpt': '/mnt/fast/nobackup/scratch4weeks/hl01486/exps/audio_generation/stablediffusion/LDM/audioverse/2023_01_14_full_F4_B_spatial_v2_v1/checkpoints/last.ckpt', 'target': 'audioldm.pipline.LatentDiffusion', 'params': {'base_learning_rate': 5e-06, 'linear_start': 0.0015, 'linear_end': 0.0195, 'num_timesteps_cond': 1, 'log_every_t': 200, 'timesteps': 1000, 'first_stage_key': 'fbank', 'cond_stage_key': 'waveform', 'latent_t_size': 256, 'latent_f_size': 16, 'channels': 8, 'cond_stage_trainable': True, 'conditioning_key': 'film', 'monitor': 'val/loss_simple_ema', 'scale_by_std': True, 'unet_config': {'target': 'audioldm.latent_diffusion.openaimodel.UNetModel', 'params': {'image_size': 64, 'extra_film_condition_dim': 512, 'extra_film_use_concat': True, 'in_channels': 8, 'out_channels': 8, 'model_channels': 128, 'attention_resolutions': [8, 4, 2], 'num_res_blocks': 2, 'channel_mult': [1, 2, 3, 5], 'num_head_channels': 32, 'use_spatial_transformer': True}}, 'first_stage_config': {'base_learning_rate': 4.5e-05, 'target': 'audioldm.variational_autoencoder.autoencoder.AutoencoderKL', 'params': {'monitor': 'val/rec_loss', 'image_key': 'fbank', 'subband': 1, 'embed_dim': 8, 'time_shuffle': 1, 'ddconfig': {'double_z': True, 'z_channels': 8, 'resolution': 256, 'downsample_time': False, 'in_channels': 1, 'out_ch': 1, 'ch': 128, 'ch_mult': [1, 2, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0}}}, 'cond_stage_config': {'target': 'audioldm.clap.encoders.CLAPAudioEmbeddingClassifierFreev2', 'params': {'key': 'waveform', 'sampling_rate': 16000, 'embed_mode': 'audio', 'unconditional_prob': 0.1}}}}} |