import whisper
import pydub
import os

whisper_model = "medium"


def load_model():
    print("Loading audio model...")
    return whisper.load_model(whisper_model)


def audio_to_text(model, audio_file):
    audio = pydub.AudioSegment.from_file(audio_file)
    # Export for loading later
    audio.export("audio_tmp")
    try:
        audio = whisper.load_audio("audio_tmp")
        audio = whisper.pad_or_trim(audio)
        mel = whisper.log_mel_spectrogram(audio).to(model.device)
        options = whisper.DecodingOptions()
        result = whisper.decode(model, mel, options)
    finally:
        os.remove("audio_tmp")
    return result.text