File size: 1,429 Bytes
23507fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
{
    "image_size": 256,
    "max_image_size": 256,
    "codebook_size": 8192,
    "codebook_embed_dim": 8,
    "codebook_l2_norm": true,
    "codebook_show_usage": true,
    "commit_loss_beta": 0.25,
    "entropy_loss_ratio": 0.0,
    "vq_loss_ratio": 1.0,
    "kl_loss_weight": 1e-06,
    "tau": 0.07,
    "num_codebooks": 1,
    "encoder_ch_mult": [
        1,
        1,
        2,
        2,
        4
    ],
    "decoder_ch_mult": [
        1,
        1,
        2,
        2,
        4
    ],
    "z_channels": 256,
    "dropout_p": 0.0,
    "enc_type": "siglip2",
    "dec_type": "siglip2",
    "encoder_model": "siglip2_base",
    "decoder_model": "siglip2_base",
    "num_latent_tokens": 128,
    "enc_tuning_method": "full",
    "dec_tuning_method": "full",
    "enc_pretrained": true,
    "dec_pretrained": false,
    "enc_patch_size": 16,
    "dec_patch_size": 16,
    "enc_drop_path_rate": 0.0,
    "dec_drop_path_rate": 0.0,
    "repa": false,
    "repa_patch_size": 16,
    "repa_model": "siglip2",
    "repa_proj_dim": 1024,
    "repa_layer_indices": 1,
    "repa_loss_weight": 0.5,
    "repa_align": "global",
    "vq_mean": 0.0,
    "vq_std": 1.0,
    "causal_encoder": true,
    "causal_decoder": false,
    "gradient_checkpointing_encoder": false,
    "gradient_checkpointing_decoder": false,
    "group_size": 8,
    "causal_num": null,
    "global_token_loss_weight": 1.0,
    "correction_training": true
}