Spaces:

eyov
/

LyricExtractor

Sleeping

File size: 4,421 Bytes

import os
import sys
import logging
import gradio as gr
import shutil
from demucs_handler import DemucsProcessor, check_dependencies, configure_model
from whisper_handler import WhisperTranscriber
import tempfile
import torch
import torchaudio
import soundfile as sf
import librosa
import numpy as np

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

def validate_environment():
    try:
        import torch
        import torchaudio
        import demucs
        logging.info(f"PyTorch version: {torch.__version__}")
        logging.info(f"Torchaudio version: {torchaudio.__version__}")
        logging.info(f"CUDA available: {torch.cuda.is_available()}")
    except ImportError as e:
        logging.error(f"Environment validation failed: {e}")
        sys.exit(1)

def create_interface():
    validate_environment()
    processor = DemucsProcessor()
    transcriber = WhisperTranscriber()
    
    def process_audio(audio_file, whisper_model="base", progress=gr.Progress()):
        if audio_file is None:
            return None, "Please upload an audio file."
        
        temp_files = []
        try:
            progress(0, desc="Starting processing")
            logging.info(f"Processing file: {audio_file}")
            
            with tempfile.TemporaryDirectory() as temp_dir:
                temp_audio_path = os.path.join(temp_dir, "input.wav")
                vocals_output_path = os.path.join(temp_dir, "vocals.wav")
                
                # Convert to WAV first
                audio, sr = librosa.load(audio_file, sr=44100)
                # Fixed: use samplerate instead of sr
                sf.write(temp_audio_path, audio, samplerate=sr)
                temp_files.append(temp_audio_path)
                
                progress(0.1, desc="Separating vocals")
                try:
                    vocals_path = processor.separate_vocals(temp_audio_path)
                    # Copy vocals to output path
                    shutil.copy2(vocals_path, vocals_output_path)
                    temp_files.append(vocals_output_path)
                except RuntimeError as e:
                    logging.error(f"Vocal separation failed: {str(e)}")
                    return None, f"Vocal separation failed: {str(e)}"
                
                # Load the processed vocals for playback
                vocals_audio, vocals_sr = librosa.load(vocals_output_path, sr=None)
                
                progress(0.75, desc="Transcribing")
                lyrics = transcriber.transcribe(vocals_output_path)
                progress(1.0, desc="Processing complete")
                
                # Return the audio data tuple and lyrics
                return (vocals_sr, vocals_audio), lyrics
                
        except Exception as e:
            error_message = f"Processing error: {str(e)}"
            logging.error(error_message)
            return None, error_message
        finally:
            # Cleanup temporary files
            for file in temp_files:
                if file and os.path.exists(file):
                    try:
                        os.remove(file)
                    except:
                        pass

    interface = gr.Interface(
        fn=process_audio,
        inputs=[
            gr.Audio(label="Upload Audio File", type="filepath"),
            gr.Dropdown(
                choices=["tiny", "base", "small", "medium", "large-v2"],
                value="medium",
                label="Whisper Model Size"
            )
        ],
        outputs=[
            gr.Audio(label="Isolated Vocals", type="numpy"),
            gr.Textbox(label="Transcribed Lyrics", lines=10, max_lines=20)
        ],
        title="Audio Lyrics Extractor",
        description="Upload an audio file to extract vocals and transcribe lyrics\n"+
                   " Created by Ever Olivares - Looking for Summer 2025 Internship Opportunities\n" +
                   " Connect with me: [LinkedIn](https://www.linkedin.com/in/everolivares/)"+" Currently not working as intended on HF tested on LightningAI with T4 running largeV2",
        analytics_enabled=False
    )
    return interface

if __name__ == "__main__":
    if not check_dependencies():
        print("Please install missing dependencies")
        exit(1)
    interface = create_interface()
    interface.launch()