Spaces:
Runtime error
Runtime error
| { | |
| "base_config": "egs/vocoder/gan/exp_config_base.json", | |
| "model_type": "GANVocoder", | |
| "dataset": [ | |
| "ljspeech", | |
| "vctk", | |
| "libritts", | |
| ], | |
| "dataset_path": { | |
| // TODO: Fill in your dataset path | |
| "ljspeech": "[dataset path]", | |
| "vctk": "[dataset path]", | |
| "libritts": "[dataset path]", | |
| }, | |
| // TODO: Fill in the output log path. The default value is "Amphion/ckpts/vocoder" | |
| "log_dir": "ckpts/vocoder", | |
| "preprocess": { | |
| // TODO: Fill in the output data path. The default value is "Amphion/data" | |
| "processed_dir": "data", | |
| // acoustic features | |
| "extract_mel": true, | |
| "extract_audio": true, | |
| "extract_pitch": false, | |
| "extract_uv": false, | |
| "extract_amplitude_phase": false, | |
| "pitch_extractor": "parselmouth", | |
| // Features used for model training | |
| "use_mel": true, | |
| "use_frame_pitch": false, | |
| "use_uv": false, | |
| "use_audio": true, | |
| "n_mel": 100, | |
| "sample_rate": 24000 | |
| }, | |
| "model": { | |
| "generator": "hifigan", | |
| "discriminators": [ | |
| "msd", | |
| "mpd", | |
| "mssbcqtd", | |
| "msstftd", | |
| ], | |
| "hifigan": { | |
| "resblock": "1", | |
| "upsample_rates": [ | |
| 8, | |
| 4, | |
| 2, | |
| 2, | |
| 2 | |
| ], | |
| "upsample_kernel_sizes": [ | |
| 16, | |
| 8, | |
| 4, | |
| 4, | |
| 4 | |
| ], | |
| "upsample_initial_channel": 768, | |
| "resblock_kernel_sizes": [ | |
| 3, | |
| 5, | |
| 7 | |
| ], | |
| "resblock_dilation_sizes": [ | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ], | |
| [ | |
| 1, | |
| 3, | |
| 5 | |
| ] | |
| ] | |
| }, | |
| "mpd": { | |
| "mpd_reshapes": [ | |
| 2, | |
| 3, | |
| 5, | |
| 7, | |
| 11, | |
| 17, | |
| 23, | |
| 37 | |
| ], | |
| "use_spectral_norm": false, | |
| "discriminator_channel_multi": 1 | |
| } | |
| }, | |
| "train": { | |
| "batch_size": 32, | |
| "adamw": { | |
| "lr": 2.0e-4, | |
| "adam_b1": 0.8, | |
| "adam_b2": 0.99 | |
| }, | |
| "exponential_lr": { | |
| "lr_decay": 0.999 | |
| }, | |
| "criterions": [ | |
| "feature", | |
| "discriminator", | |
| "generator", | |
| "mel", | |
| ] | |
| }, | |
| "inference": { | |
| "batch_size": 1, | |
| } | |
| } |