{ "architectures": [ "MultiModalClassificationModel" ], "audio_dim": 160000, "lip_dim": 12288, "model_type": "multimodal", "num_classes": 3, "torch_dtype": "float32", "transformers_version": "4.40.2" }