import os from pathlib import Path from typing import Any from collections import OrderedDict from pytube import YouTube import whisper from transforming.transform import Transform from video import YoutubeVideo from utils import accepts_types class WhisperTransform(Transform): """ Transform a Video object using Whisper model. It's a concrete Transform. Args: model (`str`): Size of Whisper model. Can be tiny, base (default), small, medium, and large. without_timestamps (`bool`, defaults to `False`): To add phrase-level timestamps. """ def __init__(self, model: str="base", without_timestamps: bool=False) -> None: self.model = whisper.load_model(model) self.without_timestamps = without_timestamps @accepts_types(YoutubeVideo) def apply(self, video: YoutubeVideo) -> YoutubeVideo: """Creates a new video with transcriptions created by Whisper. """ # Create a YouTube object yt = YouTube(video.url) print(f"Video title and url: {video.title} {video.url}") audio_file = self._get_audio_from_video(yt) result = self.model.transcribe(audio_file, without_timestamps=self.without_timestamps) transcription = result["text"] data = [] for seg in result['segments']: data.append(OrderedDict({'start': seg['start'], 'end': seg['end'],'text': seg['text']})) os.remove(audio_file) return YoutubeVideo(channel_name = video.channel_name, url = video.url, title = video.title, description = video.description, transcription = transcription, segments = data) def _get_audio_from_video(self, yt: Any) -> Path: # TODO: Add credits try: video = yt.streams.filter(only_audio=True).first() except Exception as e: print(f"StreamingData exception print: {e}") pass else: out_file = video.download(output_path=".") base, _ = os.path.splitext(out_file) new_file = base + ".mp3" os.rename(out_file, new_file) return new_file