|
配置文件已生成: C:\Users\baby7\Desktop\fastAPI\model_config.json
|
|
{
|
|
"model_info": {
|
|
"total_layers": 176,
|
|
"layers": [
|
|
{
|
|
"name": "image_encoder.encoder_layer.0.weight",
|
|
"shape": [
|
|
64,
|
|
3,
|
|
3,
|
|
3
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "image_encoder.encoder_layer.0.bias",
|
|
"shape": [
|
|
64
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "image_encoder.encoder_layer.4.weight",
|
|
"shape": [
|
|
768,
|
|
788544
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "image_encoder.encoder_layer.4.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_layer.self_attn.in_proj_weight",
|
|
"shape": [
|
|
2304,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_layer.self_attn.in_proj_bias",
|
|
"shape": [
|
|
2304
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_layer.self_attn.out_proj.weight",
|
|
"shape": [
|
|
768,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_layer.self_attn.out_proj.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_layer.linear1.weight",
|
|
"shape": [
|
|
2048,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_layer.linear1.bias",
|
|
"shape": [
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_layer.linear2.weight",
|
|
"shape": [
|
|
768,
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_layer.linear2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_layer.norm1.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_layer.norm1.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_layer.norm2.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_layer.norm2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.0.self_attn.in_proj_weight",
|
|
"shape": [
|
|
2304,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.0.self_attn.in_proj_bias",
|
|
"shape": [
|
|
2304
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.0.self_attn.out_proj.weight",
|
|
"shape": [
|
|
768,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.0.self_attn.out_proj.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.0.linear1.weight",
|
|
"shape": [
|
|
2048,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.0.linear1.bias",
|
|
"shape": [
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.0.linear2.weight",
|
|
"shape": [
|
|
768,
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.0.linear2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.0.norm1.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.0.norm1.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.0.norm2.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.0.norm2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.1.self_attn.in_proj_weight",
|
|
"shape": [
|
|
2304,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.1.self_attn.in_proj_bias",
|
|
"shape": [
|
|
2304
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.1.self_attn.out_proj.weight",
|
|
"shape": [
|
|
768,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.1.self_attn.out_proj.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.1.linear1.weight",
|
|
"shape": [
|
|
2048,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.1.linear1.bias",
|
|
"shape": [
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.1.linear2.weight",
|
|
"shape": [
|
|
768,
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.1.linear2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.1.norm1.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.1.norm1.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.1.norm2.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.1.norm2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.2.self_attn.in_proj_weight",
|
|
"shape": [
|
|
2304,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.2.self_attn.in_proj_bias",
|
|
"shape": [
|
|
2304
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.2.self_attn.out_proj.weight",
|
|
"shape": [
|
|
768,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.2.self_attn.out_proj.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.2.linear1.weight",
|
|
"shape": [
|
|
2048,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.2.linear1.bias",
|
|
"shape": [
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.2.linear2.weight",
|
|
"shape": [
|
|
768,
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.2.linear2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.2.norm1.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.2.norm1.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.2.norm2.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.2.norm2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.3.self_attn.in_proj_weight",
|
|
"shape": [
|
|
2304,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.3.self_attn.in_proj_bias",
|
|
"shape": [
|
|
2304
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.3.self_attn.out_proj.weight",
|
|
"shape": [
|
|
768,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.3.self_attn.out_proj.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.3.linear1.weight",
|
|
"shape": [
|
|
2048,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.3.linear1.bias",
|
|
"shape": [
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.3.linear2.weight",
|
|
"shape": [
|
|
768,
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.3.linear2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.3.norm1.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.3.norm1.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.3.norm2.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.3.norm2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.4.self_attn.in_proj_weight",
|
|
"shape": [
|
|
2304,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.4.self_attn.in_proj_bias",
|
|
"shape": [
|
|
2304
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.4.self_attn.out_proj.weight",
|
|
"shape": [
|
|
768,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.4.self_attn.out_proj.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.4.linear1.weight",
|
|
"shape": [
|
|
2048,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.4.linear1.bias",
|
|
"shape": [
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.4.linear2.weight",
|
|
"shape": [
|
|
768,
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.4.linear2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.4.norm1.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.4.norm1.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.4.norm2.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.4.norm2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.5.self_attn.in_proj_weight",
|
|
"shape": [
|
|
2304,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.5.self_attn.in_proj_bias",
|
|
"shape": [
|
|
2304
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.5.self_attn.out_proj.weight",
|
|
"shape": [
|
|
768,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.5.self_attn.out_proj.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.5.linear1.weight",
|
|
"shape": [
|
|
2048,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.5.linear1.bias",
|
|
"shape": [
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.5.linear2.weight",
|
|
"shape": [
|
|
768,
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.5.linear2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.5.norm1.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.5.norm1.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.5.norm2.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.5.norm2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.6.self_attn.in_proj_weight",
|
|
"shape": [
|
|
2304,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.6.self_attn.in_proj_bias",
|
|
"shape": [
|
|
2304
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.6.self_attn.out_proj.weight",
|
|
"shape": [
|
|
768,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.6.self_attn.out_proj.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.6.linear1.weight",
|
|
"shape": [
|
|
2048,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.6.linear1.bias",
|
|
"shape": [
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.6.linear2.weight",
|
|
"shape": [
|
|
768,
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.6.linear2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.6.norm1.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.6.norm1.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.6.norm2.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.6.norm2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.7.self_attn.in_proj_weight",
|
|
"shape": [
|
|
2304,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.7.self_attn.in_proj_bias",
|
|
"shape": [
|
|
2304
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.7.self_attn.out_proj.weight",
|
|
"shape": [
|
|
768,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.7.self_attn.out_proj.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.7.linear1.weight",
|
|
"shape": [
|
|
2048,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.7.linear1.bias",
|
|
"shape": [
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.7.linear2.weight",
|
|
"shape": [
|
|
768,
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.7.linear2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.7.norm1.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.7.norm1.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.7.norm2.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.7.norm2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.8.self_attn.in_proj_weight",
|
|
"shape": [
|
|
2304,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.8.self_attn.in_proj_bias",
|
|
"shape": [
|
|
2304
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.8.self_attn.out_proj.weight",
|
|
"shape": [
|
|
768,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.8.self_attn.out_proj.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.8.linear1.weight",
|
|
"shape": [
|
|
2048,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.8.linear1.bias",
|
|
"shape": [
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.8.linear2.weight",
|
|
"shape": [
|
|
768,
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.8.linear2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.8.norm1.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.8.norm1.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.8.norm2.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.8.norm2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.9.self_attn.in_proj_weight",
|
|
"shape": [
|
|
2304,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.9.self_attn.in_proj_bias",
|
|
"shape": [
|
|
2304
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.9.self_attn.out_proj.weight",
|
|
"shape": [
|
|
768,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.9.self_attn.out_proj.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.9.linear1.weight",
|
|
"shape": [
|
|
2048,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.9.linear1.bias",
|
|
"shape": [
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.9.linear2.weight",
|
|
"shape": [
|
|
768,
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.9.linear2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.9.norm1.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.9.norm1.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.9.norm2.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.9.norm2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.10.self_attn.in_proj_weight",
|
|
"shape": [
|
|
2304,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.10.self_attn.in_proj_bias",
|
|
"shape": [
|
|
2304
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.10.self_attn.out_proj.weight",
|
|
"shape": [
|
|
768,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.10.self_attn.out_proj.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.10.linear1.weight",
|
|
"shape": [
|
|
2048,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.10.linear1.bias",
|
|
"shape": [
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.10.linear2.weight",
|
|
"shape": [
|
|
768,
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.10.linear2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.10.norm1.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.10.norm1.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.10.norm2.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.10.norm2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.11.self_attn.in_proj_weight",
|
|
"shape": [
|
|
2304,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.11.self_attn.in_proj_bias",
|
|
"shape": [
|
|
2304
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.11.self_attn.out_proj.weight",
|
|
"shape": [
|
|
768,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.11.self_attn.out_proj.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.11.linear1.weight",
|
|
"shape": [
|
|
2048,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.11.linear1.bias",
|
|
"shape": [
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.11.linear2.weight",
|
|
"shape": [
|
|
768,
|
|
2048
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.11.linear2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.11.norm1.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.11.norm1.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.11.norm2.weight",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "text_encoder.transformer_encoder.layers.11.norm2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "audio_encoder.encoder_layer.0.weight",
|
|
"shape": [
|
|
768,
|
|
16000
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "audio_encoder.encoder_layer.0.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "audio_encoder.encoder_layer.2.weight",
|
|
"shape": [
|
|
768,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "audio_encoder.encoder_layer.2.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "fusion_layer.fusion_layer.weight",
|
|
"shape": [
|
|
768,
|
|
2304
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "fusion_layer.fusion_layer.bias",
|
|
"shape": [
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "vqa_layer.vqa_layer.weight",
|
|
"shape": [
|
|
30522,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "vqa_layer.vqa_layer.bias",
|
|
"shape": [
|
|
30522
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "caption_layer.caption_layer.weight",
|
|
"shape": [
|
|
30522,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "caption_layer.caption_layer.bias",
|
|
"shape": [
|
|
30522
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "retrieval_layer.retrieval_layer.weight",
|
|
"shape": [
|
|
30522,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "retrieval_layer.retrieval_layer.bias",
|
|
"shape": [
|
|
30522
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "asr_layer.asr_layer.weight",
|
|
"shape": [
|
|
30522,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "asr_layer.asr_layer.bias",
|
|
"shape": [
|
|
30522
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "realtime_asr_layer.realtime_asr_layer.weight",
|
|
"shape": [
|
|
30522,
|
|
768
|
|
],
|
|
"dtype": "torch.float32"
|
|
},
|
|
{
|
|
"name": "realtime_asr_layer.realtime_asr_layer.bias",
|
|
"shape": [
|
|
30522
|
|
],
|
|
"dtype": "torch.float32"
|
|
}
|
|
]
|
|
},
|
|
"file_info": {
|
|
"path": "C:\\Users\\baby7\\Desktop\\fastAPI\\AutoModel.pth",
|
|
"size": 3237240570,
|
|
"last_modified": 1735983514.6732724
|
|
}
|
|
}
|
|
|