|
{ |
|
"model": "vitamin_large", |
|
"exp_name": "unitok_large_causal", |
|
"output_dir": "local_output", |
|
"resume_from": "", |
|
"lpips_path": "", |
|
"dino_path": "", |
|
"fid_eval_src": "", |
|
"fid_eval_dst": "", |
|
"vis_img_dir": "asset/vis_imgs/", |
|
"fid_feature_extractor": "", |
|
"clip_pretrain_path": "", |
|
"fp16": false, |
|
"bf16": true, |
|
"tf32": true, |
|
"compile_model": false, |
|
"ddp_static": false, |
|
"grad_ckpt": true, |
|
"grad_accu": 1, |
|
"train_data": "", |
|
"val_data": null, |
|
"dataset_type": "webdataset", |
|
"imagenet_val": "", |
|
"imagenet_v2": null, |
|
"subset_ratio": 1.0, |
|
"img_size": 256, |
|
"resize_ratio": 1.125, |
|
"hflip": false, |
|
"workers": 16, |
|
"train_num_samples": 1280000000, |
|
"train_data_upsampling_factors": null, |
|
"dataset_resampled": false, |
|
"use_aug": false, |
|
"vocab_size": 32768, |
|
"vocab_width": 64, |
|
"vocab_norm": true, |
|
"vq_beta": 0.25, |
|
"num_codebooks": 8, |
|
"quant_proj": "attn", |
|
"embed_dim": 768, |
|
"num_query": 0, |
|
"use_clip_pretrain": false, |
|
"patch_size": 16, |
|
"drop_path": 0.1, |
|
"text_width": 768, |
|
"text_heads": 12, |
|
"text_layers": 12, |
|
"text_vocab_size": 49408, |
|
"text_context_length": 77, |
|
"local_loss": true, |
|
"gather_with_grad": true, |
|
"pretrained_clip": null, |
|
"pretrained_clip_text": null, |
|
"lock_text": false, |
|
"lock_text_unlocked_layers": 0, |
|
"lock_text_freeze_layer_norm": false, |
|
"force_custom_text": false, |
|
"force_custom_vision": false, |
|
"zeroshot_eval_freq": 1, |
|
"dino_depth": 12, |
|
"dino_kernel_size": 9, |
|
"disc_norm": "gn", |
|
"disc_aug_prob": 1.0, |
|
"disc_specnorm": false, |
|
"step_disc_every": 1, |
|
"vae_init": -0.5, |
|
"vocab_init": -1, |
|
"disc_init": -0.5, |
|
"epoch": 1, |
|
"local_bs": 56, |
|
"vae_local_bs": 56, |
|
"global_bs": 16384, |
|
"lr": 0.0005, |
|
"wd": 0.02, |
|
"disc_lr": 2e-05, |
|
"disc_wd": 0.2, |
|
"grad_clip": 10, |
|
"ema": 0.9999, |
|
"warmup_iter": null, |
|
"warmup_ep": 0.01, |
|
"disc_start_ep": 0.375, |
|
"disc_warmup_ep": 0.03, |
|
"schedule": "cos", |
|
"lr_start_ratio": 0.0, |
|
"lr_end_ratio": 0.1, |
|
"disc_lr_end_ratio": 0.1, |
|
"custom_lr_multiplier": null, |
|
"optimizer": "adamw", |
|
"optim_eps": 1e-06, |
|
"fuse_opt": false, |
|
"optim_beta": "0.9_0.95", |
|
"disc_optim_beta": "0.5_0.9", |
|
"l1": 0.2, |
|
"l2": 1.0, |
|
"lp": 1.0, |
|
"lpr": 48, |
|
"ld": 0.4, |
|
"le": 0.0, |
|
"lq": 1.0, |
|
"lc": 1.0, |
|
"e_temp": 0.01, |
|
"gada": 1, |
|
"bcr": 4.0, |
|
"bcr_cut": 0.2, |
|
"dcrit": "hg", |
|
"report_wandb": true, |
|
"wandb_notes": null, |
|
"run_id": null, |
|
"eval_per_epoch": 8, |
|
"dbg_unused_param": false, |
|
"dbg_nan": false, |
|
"seed": null, |
|
"deterministic": false, |
|
"same_seed_for_all_ranks": 0 |
|
} |