| { | |
| "image_size": 256, | |
| "max_image_size": 256, | |
| "codebook_size": 8192, | |
| "codebook_embed_dim": 8, | |
| "codebook_l2_norm": true, | |
| "codebook_show_usage": true, | |
| "commit_loss_beta": 0.25, | |
| "entropy_loss_ratio": 0.0, | |
| "vq_loss_ratio": 1.0, | |
| "kl_loss_weight": 1e-06, | |
| "tau": 0.07, | |
| "num_codebooks": 1, | |
| "encoder_ch_mult": [ | |
| 1, | |
| 1, | |
| 2, | |
| 2, | |
| 4 | |
| ], | |
| "decoder_ch_mult": [ | |
| 1, | |
| 1, | |
| 2, | |
| 2, | |
| 4 | |
| ], | |
| "z_channels": 256, | |
| "dropout_p": 0.0, | |
| "enc_type": "siglip2", | |
| "dec_type": "siglip2", | |
| "encoder_model": "siglip2_base", | |
| "decoder_model": "siglip2_base", | |
| "num_latent_tokens": 128, | |
| "enc_tuning_method": "full", | |
| "dec_tuning_method": "full", | |
| "enc_pretrained": true, | |
| "dec_pretrained": false, | |
| "enc_patch_size": 16, | |
| "dec_patch_size": 16, | |
| "enc_drop_path_rate": 0.0, | |
| "dec_drop_path_rate": 0.0, | |
| "repa": false, | |
| "repa_patch_size": 16, | |
| "repa_model": "siglip2", | |
| "repa_proj_dim": 1024, | |
| "repa_layer_indices": 1, | |
| "repa_loss_weight": 0.5, | |
| "repa_align": "global", | |
| "vq_mean": 0.0, | |
| "vq_std": 1.0, | |
| "causal_encoder": true, | |
| "causal_decoder": false, | |
| "gradient_checkpointing_encoder": false, | |
| "gradient_checkpointing_decoder": false, | |
| "group_size": 8, | |
| "causal_num": null, | |
| "global_token_loss_weight": 1.0, | |
| "correction_training": true | |
| } |