{ "_class_name": "AudioDiffusion", "_diffusers_version": "0.24.0", "feature_extractor": [ "transformers", "ClapFeatureExtractor" ], "language_model": [ "transformers", "GPT2Model" ], "projection_model": [ "audioldm2", "AudioLDM2ProjectionModel" ], "mel": [ "audio_diffusion", "Mel" ], "text_encoder": [ "transformers", "ClapModel" ], "text_encoder_2": [ "transformers", "T5EncoderModel" ], "scheduler": [ "diffusers", "DDPMScheduler" ], "tokenizer": [ "transformers", "RobertaTokenizerFast" ], "tokenizer_2": [ "transformers", "T5TokenizerFast" ], "unet": [ "diffusers", "UNet2DModel" ], "vqvae": [ "diffusers", "AutoencoderKL" ], "vae": [ "diffusers", "AutoencoderKL" ], "vocoder": [ "transformers", "SpeechT5HifiGan" ] }