mrbear1024 commited on
Commit
874af4a
·
verified ·
1 Parent(s): 4abefb9

Upload 5 files

Browse files
checkpoints/240112_icl_audio2secc_vox2_cmlr/config.yaml ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accumulate_grad_batches: 1
2
+ amp: false
3
+ audio_type: hubert
4
+ base_config:
5
+ - ./audio2secc_vae.yaml
6
+ batch_size: 4
7
+ binarization_args:
8
+ with_coeff: true
9
+ with_hubert: true
10
+ with_mel: true
11
+ binary_data_dir: data/binary/voxceleb2_audio2motion
12
+ blink_mode: blink_unit
13
+ clip_grad_norm: 0.5
14
+ clip_grad_value: 0
15
+ debug: false
16
+ ds_name: Concat_voxceleb2_CMLR
17
+ eval_max_batches: 10
18
+ gen_dir_name: ''
19
+ hidden_size: 256
20
+ icl_model_type: icl_flow_matching
21
+ infer_audio_source_name: ''
22
+ infer_ckpt_steps: 40000
23
+ infer_out_npy_name: ''
24
+ init_from_ckpt: ''
25
+ lambda_kl: 0.02
26
+ lambda_kl_t1: 2000
27
+ lambda_kl_t2: 2000
28
+ lambda_l2_reg_exp: 0.0
29
+ lambda_lap_exp: 0.0
30
+ lambda_lap_exp_x1: 0.1
31
+ lambda_mse_exp: 0.0
32
+ lambda_mse_exp_x1: 0.1
33
+ lambda_mse_icl: 1.0
34
+ lambda_mse_lm2d: 0.0
35
+ lambda_mse_lm3d: 0.0
36
+ lambda_mse_lm3d_x1: 0.1
37
+ lambda_sync_lm3d: 0.05
38
+ load_ckpt: ''
39
+ load_db_to_memory: false
40
+ lr: 0.0005
41
+ max_sentences_per_batch: 512
42
+ max_tokens_per_batch: 20000
43
+ max_updates: 4000000
44
+ motion_type: exp
45
+ num_ckpt_keep: 100
46
+ num_sanity_val_steps: 5
47
+ num_valid_plots: 1
48
+ num_workers: 8
49
+ optimizer_adam_beta1: 0.9
50
+ optimizer_adam_beta2: 0.999
51
+ print_nan_grads: false
52
+ process_id: 0
53
+ raw_data_dir: /home/tiger/datasets/raw/TH1KH_512
54
+ ref_id_mode: first_frame
55
+ resume_from_checkpoint: 0
56
+ sample_min_length: 32
57
+ save_best: false
58
+ save_codes:
59
+ - tasks
60
+ - modules
61
+ - egs
62
+ save_gt: true
63
+ scheduler: exponential
64
+ seed: 9999
65
+ smo_win_size: 5
66
+ split_seed: 999
67
+ syncnet_ckpt_dir: checkpoints/0904_syncnet/syncnet_hubert_vox2
68
+ task_cls: tasks.os_avatar.icl_audio2secc_task.Audio2SECCTask
69
+ tb_log_interval: 100
70
+ total_process: 1
71
+ use_aux_features: true
72
+ use_aux_loss_on_x1: true
73
+ use_eye_amp_embed: false
74
+ use_flow: true
75
+ use_fork: true
76
+ use_kv_dataset: true
77
+ use_mouth_amp_embed: true
78
+ use_pitch: true
79
+ val_check_interval: 2000
80
+ valid_infer_interval: 2000
81
+ valid_monitor_key: val_loss
82
+ valid_monitor_mode: min
83
+ warmup_updates: 1000
84
+ weight_decay: 0
85
+ work_dir: checkpoints/240112_audio2secc/icl_audio2secc_vox2_cmlr
86
+ x_multiply: 16
checkpoints/240112_icl_audio2secc_vox2_cmlr/model_ckpt_steps_1856000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:367167db3b25fe07de9255871ce7813158551c4b00bfadd7334a671648924a2e
3
+ size 462941609
checkpoints/mimictalk_orig/os_secc2plane_torso/config.yaml ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accumulate_grad_batches: 1
2
+ add_ffhq_singe_disc: false
3
+ also_update_decoder: false
4
+ amp: false
5
+ base_channel: 32768
6
+ base_config:
7
+ - ./secc_img2plane.yaml
8
+ batch_size: 1
9
+ binary_data_dir: data/binary/CelebV-HQ
10
+ blur_fade_kimg: 20
11
+ blur_init_sigma: 10
12
+ blur_raw_target: true
13
+ box_warp: 1
14
+ ckpt_milestone_interval: 50000
15
+ clip_grad_norm: 1.0
16
+ clip_grad_value: 0
17
+ cond_hid_dim: 32
18
+ cond_out_dim: 16
19
+ cond_type: idexp_lm3d_normalized
20
+ debug: false
21
+ density_reg_p_dist: 0.004
22
+ disable_highreso_at_stage1: true
23
+ disc_c_noise: 1.0
24
+ disc_cond_mode: none
25
+ ds_name: Concat_VFHQ_CelebVHQ_TH1KH_RAVDESS
26
+ ema_interval: 400
27
+ enable_rescale_plane_regulation: false
28
+ eval_max_batches: 100
29
+ ffhq_disc_inp_mode: eg3d_gen
30
+ final_resolution: 512
31
+ flipped_to_world_coord: true
32
+ fuse_with_deform_source: false
33
+ gen_cond_mode: none
34
+ generator_condition_on_pose: true
35
+ gpc_reg_fade_kimg: 1000
36
+ gpc_reg_prob: 0.8
37
+ group_size_for_mini_batch_std: 2
38
+ htbsr_head_threshold: 1.0
39
+ htbsr_head_weight_fuse_mode: v2
40
+ img2plane_backbone_mode: composite
41
+ img2plane_backbone_scale: standard
42
+ init_from_ckpt: checkpoints/240118_os_secc2planes/os_secc2plane_pertubeBlink0.05_pertubeSECC0.05
43
+ lam_occlusion_2_reg_l1: 0.0
44
+ lam_occlusion_reg_l1: 0.0
45
+ lam_occlusion_weights_entropy: 0.001
46
+ lambda_G_adversarial_adv: 1.0
47
+ lambda_G_supervise_adv: 1.0
48
+ lambda_G_supervise_mse: 1.0
49
+ lambda_G_supervise_mse_raw: 1.0
50
+ lambda_density_reg: 0.25
51
+ lambda_ffhq_mv_adv: 0.002
52
+ lambda_gradient_penalty: 1.0
53
+ lambda_mse: 1.0
54
+ lambda_mse_depth: 0.0
55
+ lambda_th1kh_mv_adv: 0.003
56
+ lambda_weights_entropy: 0.01
57
+ lambda_weights_l1: 0.1
58
+ load_ckpt: ''
59
+ lpips_mode: vgg19_v2
60
+ lr_d: 0.0002
61
+ lr_decay_interval: 5000
62
+ lr_decay_rate: 0.95
63
+ lr_g: 1.0e-05
64
+ lr_lambda_pertube_secc: 0.01
65
+ lr_mul_cano_img2plane: 1.0
66
+ mapping_network_depth: 2
67
+ max_channel: 512
68
+ max_updates: 100000
69
+ mimic_plane: false
70
+ min_rescale_factor: 0.25
71
+ motion_smo_win_size: 5
72
+ neural_rendering_resolution: 128
73
+ normalize_cond: false
74
+ normalize_radius: false
75
+ not_save_modules:
76
+ - criterion_lpips
77
+ - eg3d_model
78
+ num_ckpt_keep: 1
79
+ num_fp16_layers_in_discriminator: 4
80
+ num_fp16_layers_in_generator: 0
81
+ num_fp16_layers_in_super_resolution: 4
82
+ num_samples_coarse: 48
83
+ num_samples_fine: 48
84
+ num_sanity_val_steps: 1
85
+ num_valid_plots: 25
86
+ num_workers: 8
87
+ occlusion_fuse: true
88
+ ones_ws_for_sr: true
89
+ optimizer_adam_beta1_d: 0.0
90
+ optimizer_adam_beta1_g: 0.0
91
+ optimizer_adam_beta2_d: 0.99
92
+ optimizer_adam_beta2_g: 0.99
93
+ phase1_plane_fusion_mode: add
94
+ pncc_cond_mode: cano_src_tgt
95
+ pretrained_eg3d_ckpt: /mnt/bn/ailabrenyi/entries/yezhenhui/projects/GeneFace_private/checkpoints/0628_img2planes/eg3d_baseline_run2/model_ckpt_steps_100000.ckpt
96
+ print_nan_grads: false
97
+ process_id: 0
98
+ processed_data_dir: data/processed/videos
99
+ random_sample_pose: true
100
+ raw_data_dir: /home/tiger/datasets/raw/FFHQ
101
+ ray_far: auto
102
+ ray_near: auto
103
+ reg_interval_d: 16
104
+ reg_interval_g: 4
105
+ reg_interval_g_cond: 4
106
+ reload_head_ckpt: ''
107
+ resume_from_checkpoint: 0
108
+ save_best: true
109
+ save_codes:
110
+ - tasks
111
+ - modules
112
+ - egs
113
+ secc_pertube_mode: randn
114
+ secc_pertube_randn_scale: 0.01
115
+ secc_segformer_scale: b0
116
+ seed: 9999
117
+ seg_out_mode: head
118
+ smo_win_size: 5
119
+ split_seed: 999
120
+ sr_type: vanilla
121
+ start_adv_iters: 40000
122
+ target_pertube_blink_secc_loss: 0.05
123
+ target_pertube_secc_loss: 0.05
124
+ task_cls: tasks.os_avatar.secc_img2plane_torso_task.SECC_Img2PlaneEG3D_TorsoTask
125
+ tb_log_interval: 100
126
+ torch_compile: true
127
+ torso_kp_num: 4
128
+ torso_model_version: v2
129
+ torso_occlusion_reg_unmask_factor: 0.3
130
+ torso_ref_segout_mode: torso
131
+ total_process: 1
132
+ triplane_depth: 3
133
+ triplane_feature_type: trigrid_v2
134
+ triplane_hid_dim: 32
135
+ two_stage_training: true
136
+ update_on_th1kh_samples: false
137
+ update_src2src_interval: 4
138
+ use_kv_dataset: true
139
+ use_motion_smo_net: false
140
+ use_mse: false
141
+ use_th1kh_disc: false
142
+ use_th1kh_mv_adv: false
143
+ val_check_interval: 2000
144
+ valid_infer_interval: 2000
145
+ valid_monitor_key: val_loss
146
+ valid_monitor_mode: min
147
+ video_id: May
148
+ w_dim: 512
149
+ warmup_updates: 4000
150
+ weight_fuse: true
151
+ work_dir: checkpoints/240120_os_secc2planes_torso/os_secc2plane_torso_htbsrFusev2_htbsrThres1.0
152
+ z_dim: 512
checkpoints/mimictalk_orig/os_secc2plane_torso/model_ckpt_steps_100000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:757c1b73d938da0ec3fba555a155a31ac803ddc8d343bba2a3c69845844b213a
3
+ size 1414788463
checkpoints/pretrained_ckpts/mit_b0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df468f7f13c4186f25bd3e2caf09e4f927b5b5ac0abccac84011dae747d4c49c
3
+ size 14331578