Spaces:
Runtime error
Runtime error
import os | |
from pathlib import Path | |
from typing import Any | |
from collections import OrderedDict | |
from pytube import YouTube | |
import whisper | |
from transforming.transform import Transform | |
from video import YoutubeVideo | |
from utils import accepts_types | |
class WhisperTransform(Transform): | |
""" | |
Transform a Video object using Whisper model. It's a | |
concrete Transform. | |
Args: | |
model (`str`): | |
Size of Whisper model. Can be tiny, base (default), small, medium, and large. | |
without_timestamps (`bool`, defaults to `False`): | |
To add phrase-level timestamps. | |
""" | |
def __init__(self, model: str="base", without_timestamps: bool=False) -> None: | |
self.model = whisper.load_model(model) | |
self.without_timestamps = without_timestamps | |
def apply(self, video: YoutubeVideo) -> YoutubeVideo: | |
"""Creates a new video with transcriptions created by Whisper. | |
""" | |
# Create a YouTube object | |
yt = YouTube(video.url) | |
print(f"Video title and url: {video.title} {video.url}") | |
audio_file = self._get_audio_from_video(yt) | |
result = self.model.transcribe(audio_file, | |
without_timestamps=self.without_timestamps) | |
transcription = result["text"] | |
data = [] | |
for seg in result['segments']: | |
data.append(OrderedDict({'start': seg['start'], 'end': seg['end'],'text': seg['text']})) | |
os.remove(audio_file) | |
return YoutubeVideo(channel_name = video.channel_name, | |
url = video.url, | |
title = video.title, | |
description = video.description, | |
transcription = transcription, | |
segments = data) | |
def _get_audio_from_video(self, yt: Any) -> Path: | |
# TODO: Add credits | |
try: | |
video = yt.streams.filter(only_audio=True).first() | |
except Exception as e: | |
print(f"StreamingData exception print: {e}") | |
pass | |
else: | |
out_file = video.download(output_path=".") | |
base, _ = os.path.splitext(out_file) | |
new_file = base + ".mp3" | |
os.rename(out_file, new_file) | |
return new_file |