import torch import torchaudio from sgmse.model import ScoreModel import gradio as gr # Load the pre-trained model model = ScoreModel.load_from_checkpoint("pretrained_checkpoints/speech_enhancement/train_vb_29nqe0uh_epoch=115.ckpt") def enhance_speech(audio_file): # Load and process the audio file noisy, sr = torchaudio.load(audio_file) noisy = noisy.unsqueeze(0) # Add fake batch dimension if needed # Run the speech enhancement model enhanced = model.predict(noisy) # Save the enhanced audio output_file = 'enhanced_output.wav' torchaudio.save(output_file, enhanced.cpu().squeeze(0), sr) return output_file # Gradio interface setup inputs = gr.Audio(label="Input Audio", type="filepath") outputs = gr.Audio(label="Output Audio", type="filepath") title = "Speech Enhancement using SGMSE" description = "This Gradio demo uses the SGMSE model for speech enhancement. Upload your audio file to enhance it." article = "
" gr.Interface(fn=enhance_speech, inputs=inputs, outputs=outputs, title=title, description=description, article=article).launch()