|
|
|
|
|
models: |
|
|
nextgpt: |
|
|
model_name: NextGPTModel |
|
|
agent_name: DeepSpeedAgent |
|
|
|
|
|
seed: 13 |
|
|
max_length: 512 |
|
|
logging_step: 5 |
|
|
num_clip_tokens: 77 |
|
|
gen_emb_dim: 768 |
|
|
pretrained_ckpt_path: ../ckpt/pretrained_ckpt/ |
|
|
|
|
|
|
|
|
vicuna_version: 7b_v0 |
|
|
|
|
|
|
|
|
imagebind_version: huge |
|
|
|
|
|
|
|
|
n_img_tokens: 4 |
|
|
text_emb_to_img_layers: [-1] |
|
|
num_gen_img_tokens: 4 |
|
|
text_fc_to_img_mode: transformer |
|
|
|
|
|
|
|
|
n_video_tokens: 24 |
|
|
text_emb_to_video_layers: [-1] |
|
|
num_gen_video_tokens: 24 |
|
|
text_fc_to_video_mode: transformer |
|
|
|
|
|
|
|
|
n_audio_tokens: 8 |
|
|
text_emb_to_audio_layers: [-1] |
|
|
num_gen_audio_tokens: 8 |
|
|
text_fc_to_audio_mode: transformer |
|
|
|
|
|
|
|
|
image_diffusion: runwayml/stable-diffusion-v1-5 |
|
|
|
|
|
|
|
|
video_diffusion: cerspense/zeroscope_v2_576w |
|
|
|
|
|
|
|
|
audio_diffusion: cvssp/audioldm-l-full |
|
|
|