{ | |
"_class_name": "AudioDiffusionPipeline", | |
"_diffusers_version": "0.24.0", | |
"feature_extractor": [ | |
"transformers", | |
"ClapFeatureExtractor" | |
], | |
"language_model": [ | |
"transformers", | |
"GPT2Model" | |
], | |
"projection_model": [ | |
"audioldm2", | |
"AudioLDM2ProjectionModel" | |
], | |
"mel": [ | |
"audio_diffusion", | |
"Mel" | |
], | |
"text_encoder": [ | |
"transformers", | |
"ClapModel" | |
], | |
"text_encoder_2": [ | |
"transformers", | |
"T5EncoderModel" | |
], | |
"scheduler": [ | |
"diffusers", | |
"DDPMScheduler" | |
], | |
"tokenizer": [ | |
"transformers", | |
"RobertaTokenizerFast" | |
], | |
"tokenizer_2": [ | |
"transformers", | |
"T5TokenizerFast" | |
], | |
"unet": [ | |
"diffusers", | |
"UNet2DModel" | |
], | |
"vqvae": [ | |
"diffusers", | |
"AutoencoderKL" | |
] | |
} | |