| 配置文件已生成: C:\Users\baby7\Desktop\fastAPI\model_config.json | |
| { | |
| "model_info": { | |
| "total_layers": 176, | |
| "layers": [ | |
| { | |
| "name": "image_encoder.encoder_layer.0.weight", | |
| "shape": [ | |
| 64, | |
| 3, | |
| 3, | |
| 3 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "image_encoder.encoder_layer.0.bias", | |
| "shape": [ | |
| 64 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "image_encoder.encoder_layer.4.weight", | |
| "shape": [ | |
| 768, | |
| 788544 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "image_encoder.encoder_layer.4.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_layer.self_attn.in_proj_weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_layer.self_attn.in_proj_bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_layer.self_attn.out_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_layer.self_attn.out_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_layer.linear1.weight", | |
| "shape": [ | |
| 2048, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_layer.linear1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_layer.linear2.weight", | |
| "shape": [ | |
| 768, | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_layer.linear2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_layer.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_layer.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_layer.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_layer.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.0.self_attn.in_proj_weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.0.self_attn.in_proj_bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.0.self_attn.out_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.0.self_attn.out_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.0.linear1.weight", | |
| "shape": [ | |
| 2048, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.0.linear1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.0.linear2.weight", | |
| "shape": [ | |
| 768, | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.0.linear2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.0.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.0.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.0.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.0.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.1.self_attn.in_proj_weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.1.self_attn.in_proj_bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.1.self_attn.out_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.1.self_attn.out_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.1.linear1.weight", | |
| "shape": [ | |
| 2048, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.1.linear1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.1.linear2.weight", | |
| "shape": [ | |
| 768, | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.1.linear2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.1.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.1.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.1.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.1.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.2.self_attn.in_proj_weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.2.self_attn.in_proj_bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.2.self_attn.out_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.2.self_attn.out_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.2.linear1.weight", | |
| "shape": [ | |
| 2048, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.2.linear1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.2.linear2.weight", | |
| "shape": [ | |
| 768, | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.2.linear2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.2.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.2.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.2.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.2.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.3.self_attn.in_proj_weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.3.self_attn.in_proj_bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.3.self_attn.out_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.3.self_attn.out_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.3.linear1.weight", | |
| "shape": [ | |
| 2048, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.3.linear1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.3.linear2.weight", | |
| "shape": [ | |
| 768, | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.3.linear2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.3.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.3.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.3.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.3.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.4.self_attn.in_proj_weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.4.self_attn.in_proj_bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.4.self_attn.out_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.4.self_attn.out_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.4.linear1.weight", | |
| "shape": [ | |
| 2048, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.4.linear1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.4.linear2.weight", | |
| "shape": [ | |
| 768, | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.4.linear2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.4.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.4.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.4.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.4.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.5.self_attn.in_proj_weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.5.self_attn.in_proj_bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.5.self_attn.out_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.5.self_attn.out_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.5.linear1.weight", | |
| "shape": [ | |
| 2048, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.5.linear1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.5.linear2.weight", | |
| "shape": [ | |
| 768, | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.5.linear2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.5.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.5.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.5.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.5.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.6.self_attn.in_proj_weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.6.self_attn.in_proj_bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.6.self_attn.out_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.6.self_attn.out_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.6.linear1.weight", | |
| "shape": [ | |
| 2048, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.6.linear1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.6.linear2.weight", | |
| "shape": [ | |
| 768, | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.6.linear2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.6.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.6.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.6.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.6.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.7.self_attn.in_proj_weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.7.self_attn.in_proj_bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.7.self_attn.out_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.7.self_attn.out_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.7.linear1.weight", | |
| "shape": [ | |
| 2048, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.7.linear1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.7.linear2.weight", | |
| "shape": [ | |
| 768, | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.7.linear2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.7.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.7.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.7.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.7.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.8.self_attn.in_proj_weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.8.self_attn.in_proj_bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.8.self_attn.out_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.8.self_attn.out_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.8.linear1.weight", | |
| "shape": [ | |
| 2048, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.8.linear1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.8.linear2.weight", | |
| "shape": [ | |
| 768, | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.8.linear2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.8.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.8.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.8.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.8.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.9.self_attn.in_proj_weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.9.self_attn.in_proj_bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.9.self_attn.out_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.9.self_attn.out_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.9.linear1.weight", | |
| "shape": [ | |
| 2048, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.9.linear1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.9.linear2.weight", | |
| "shape": [ | |
| 768, | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.9.linear2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.9.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.9.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.9.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.9.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.10.self_attn.in_proj_weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.10.self_attn.in_proj_bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.10.self_attn.out_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.10.self_attn.out_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.10.linear1.weight", | |
| "shape": [ | |
| 2048, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.10.linear1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.10.linear2.weight", | |
| "shape": [ | |
| 768, | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.10.linear2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.10.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.10.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.10.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.10.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.11.self_attn.in_proj_weight", | |
| "shape": [ | |
| 2304, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.11.self_attn.in_proj_bias", | |
| "shape": [ | |
| 2304 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.11.self_attn.out_proj.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.11.self_attn.out_proj.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.11.linear1.weight", | |
| "shape": [ | |
| 2048, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.11.linear1.bias", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.11.linear2.weight", | |
| "shape": [ | |
| 768, | |
| 2048 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.11.linear2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.11.norm1.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.11.norm1.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.11.norm2.weight", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "text_encoder.transformer_encoder.layers.11.norm2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "audio_encoder.encoder_layer.0.weight", | |
| "shape": [ | |
| 768, | |
| 16000 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "audio_encoder.encoder_layer.0.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "audio_encoder.encoder_layer.2.weight", | |
| "shape": [ | |
| 768, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "audio_encoder.encoder_layer.2.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "fusion_layer.fusion_layer.weight", | |
| "shape": [ | |
| 768, | |
| 2304 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "fusion_layer.fusion_layer.bias", | |
| "shape": [ | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "vqa_layer.vqa_layer.weight", | |
| "shape": [ | |
| 30522, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "vqa_layer.vqa_layer.bias", | |
| "shape": [ | |
| 30522 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "caption_layer.caption_layer.weight", | |
| "shape": [ | |
| 30522, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "caption_layer.caption_layer.bias", | |
| "shape": [ | |
| 30522 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "retrieval_layer.retrieval_layer.weight", | |
| "shape": [ | |
| 30522, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "retrieval_layer.retrieval_layer.bias", | |
| "shape": [ | |
| 30522 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "asr_layer.asr_layer.weight", | |
| "shape": [ | |
| 30522, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "asr_layer.asr_layer.bias", | |
| "shape": [ | |
| 30522 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "realtime_asr_layer.realtime_asr_layer.weight", | |
| "shape": [ | |
| 30522, | |
| 768 | |
| ], | |
| "dtype": "torch.float32" | |
| }, | |
| { | |
| "name": "realtime_asr_layer.realtime_asr_layer.bias", | |
| "shape": [ | |
| 30522 | |
| ], | |
| "dtype": "torch.float32" | |
| } | |
| ] | |
| }, | |
| "file_info": { | |
| "path": "C:\\Users\\baby7\\Desktop\\fastAPI\\AutoModel.pth", | |
| "size": 3237240570, | |
| "last_modified": 1735983514.6732724 | |
| } | |
| } | |