""" YouTube Video Analysis and Interaction Module This module provides a comprehensive set of tools for analyzing YouTube videos, extracting information, and answering questions based on video content. It leverages the LangChain library for natural language processing tasks and the YouTube Transcript API for fetching video transcripts. Classes: MainPointsExtractor: Extracts and formats main points from YouTube video transcripts. Timestamps are formatted for direct use in YouTube comments, enabling clickable links to specific video sections when pasted. SummaryExtractor: Handles the extraction and formatting of video summaries. QuestionAnswerExtractor: Processes user questions and extracts answers from video transcripts. YouTubeAgent: Manages the overall agent setup for interacting with YouTube videos and processing user queries. Key Features: - Main points summarization in multiple formats - Video content summarization - Question answering based on video content - Flexible AI agent for handling various YouTube video-related tasks """ import os import openai from typing import List, Dict, Any, Union, Type from youtube_transcript_api import YouTubeTranscriptApi from langchain_core.pydantic_v1 import BaseModel, Field from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_openai import ChatOpenAI from langchain.schema.runnable import RunnableLambda, RunnablePassthrough from langchain.agents import tool, AgentExecutor from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser, JsonOutputFunctionsParser from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_core.utils.function_calling import convert_to_openai_function from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser from langchain.agents.format_scratchpad import format_to_openai_functions from langchain.memory import ConversationBufferWindowMemory from dotenv import load_dotenv, find_dotenv _ = load_dotenv(find_dotenv()) # read local .env file openai.api_key = os.environ['OPENAI_API_KEY'] def get_temperature(): return 0 #Default value def set_temperature(new_temperature): global get_temperature def new_get_temperature(): return new_temperature get_temperature = new_get_temperature # print(f"Temperature set to: {get_temperature()}") class Points_1(BaseModel): """Pydantic model for representing extracted points from Youtube-Transcript""" timestamp: float = Field(description="The timestamp (in floating-point number) of when main points are discussed or talked about in the video.") main_point: str = Field(description="A title for Main point.") summary: str = Field(description="A summary of main points discussed at that timestamp. End with fullstop.") emoji: str = Field(description="An emoji that matches the summary.") class Points_2(BaseModel): """Pydantic model for representing extracted points.""" main_point: str = Field(description="The main topic, theme, or subject extracted from the subtitle.") summary: str = Field(description="The context or brief explanation of the main point.") emoji: str = Field(description="An emoji that represents or summarizes the main point.") timestamp: float = Field(description="The timestamp (in floating-point number) from the video where the main point is mentioned.") class MainPointsExtractor: """ A tool for extracting and formatting main points from YouTube video transcripts. This class provides methods to process transcripts and identify key points using natural language processing techniques. """ class Info_1(BaseModel): """Pydantic model for representing a collection of points.""" points: List[Points_1] class Info_2(BaseModel): """Pydantic model for representing a collection of points.""" points: List[Points_2] @staticmethod @tool(return_direct=True) def get_youtube_video_main_points(youtube_video_id: str) -> str: """ Extracts and formats main points with Timestamps from YouTube video transcripts. Timestamps are formatted for direct use in YouTube comments, enabling clickable links to specific video sections when pasted. Args: youtube_video_id (str): The ID of the YouTube video. Returns: str: Formatted string of main points extracted from the video. """ try: transcript = MainPointsExtractor._get_youtube_video_transcript(youtube_video_id) main_points_1 = MainPointsExtractor._extract_main_points(transcript, MainPointsExtractor.Info_1) main_points_2 = MainPointsExtractor._extract_main_points(transcript, MainPointsExtractor.Info_2) formatted_output = f"""Main points extracted from YouTube video (ID: {youtube_video_id})\nStyle_1:\n```\n{main_points_2}\n```\nStyle_2:\n```\n{main_points_1}\n```\nChoose the style that best suits your needs for presenting the main points of the video.""" return formatted_output except Exception as e: raise @staticmethod def _get_youtube_video_transcript(youtube_video_id: str) -> str: """ Fetches the transcript for a YouTube video. Args: youtube_video_id (str): The ID of the YouTube video. Returns: str: The full transcript of the video. Raises: Exception: If there's an error fetching the transcript. """ try: transcript_json = YouTubeTranscriptApi.get_transcript(youtube_video_id) transcript_data = [f"{entry['start']:.2f}: {entry['text']} " for entry in transcript_json] return "".join(transcript_data) except Exception as e: raise @staticmethod def _extract_main_points(transcript: str, info_model: Union[Type[Info_1], Type[Info_2]]) -> List[Dict[str, Any]]: """ Extracts main points from the transcript using NLP techniques. This method maintains a conversation history to provide context for subsequent calls. Args: transcript (str): The full transcript of the video. Returns: List[Dict[str, Any]]: A list of dictionaries containing extracted main points. """ main_points_extraction_function = [convert_to_openai_function(info_model)] model = ChatOpenAI(temperature=get_temperature()) extraction_model = model.bind(functions=main_points_extraction_function) system_message = f""" You are an AI assistant that extracts info from video transcripts. When extracting info, ensure that: 1. Each point has a unique timestamp. In addition to these specific requirements, you have the authority to make other improvements as you see fit. This may include: - Refining the summaries for clarity and conciseness - Adjusting emoji choices to better represent the content - Reorganizing points for better logical flow - Removing redundant information - Adding context where necessary Your goal is to produce a refined and accurate representation of the main points from the video transcript. Use your judgment to balance adherence to the specific rules with overall improvement of the extracted information. """ prompt = ChatPromptTemplate.from_messages([ ("system", system_message), ("human", "{input}") ]) extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="points") text_splitter = RecursiveCharacterTextSplitter(chunk_overlap=0, chunk_size=8192, separators=[f" {char}" for char in "123456789"]) prep = RunnableLambda(lambda x: [{"input": doc} for doc in text_splitter.split_text(x)]) chain = prep | extraction_chain.map() | MainPointsExtractor._flatten | MainPointsExtractor._format_youtube_comment result_1 = chain.invoke(transcript) return result_1 @staticmethod def _flatten(matrix): """Flattens a 2D list into a 1D list.""" return [item for row in matrix for item in row] @staticmethod def _format_youtube_comment(json_data: List[Dict[str, Any]]) -> str: """ Formats extracted main points into a YouTube-style comment. Args: json_data (List[Dict[str, Any]]): List of dictionaries containing main points. Returns: str: Formatted string representing the main points as a YouTube comment. """ def _format_timestamp(seconds): hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) seconds = int(seconds % 60) return f"{hours:02}:{minutes:02}:{seconds:02}" formatted_comment = "" for entry in json_data: timestamp = _format_timestamp(entry['timestamp']) emoji = entry['emoji'] summary = entry['summary'] if entry['main_point'].endswith('.'): point = entry['main_point'][:-1] else: point = entry['main_point'] formatted_comment += f"{timestamp} {emoji} {point}: {summary}\n" return formatted_comment.strip() ####################################################################################################################################### class Summary(BaseModel): """Pydantic model for representing extracted summary.""" summary: str = Field(description="Extract detailed information from the content.") class SummaryExtractor: """ A tool for extracting and formatting summaries from YouTube video transcripts. This class provides methods to process transcripts and generate concise summaries using natural language processing techniques. """ class Info(BaseModel): """Pydantic model for representing a collection of summaries.""" summary: List[Summary] @staticmethod @tool(return_direct=False) def get_youtube_video_summary(youtube_video_id: str) -> str: """ Extracts and formats a summary from a YouTube video transcript. Args: youtube_video_id (str): The ID of the YouTube video. Returns: str: Formatted string of the summary extracted from the video. """ try: transcript = SummaryExtractor._get_youtube_video_transcript(youtube_video_id) summary = SummaryExtractor._extract_summary(transcript) return SummaryExtractor._format_summary(summary) except Exception as e: return f"Error extracting summary: {str(e)}" @staticmethod def _get_youtube_video_transcript(youtube_video_id: str) -> str: """ Fetches the transcript for a YouTube video. Args: youtube_video_id (str): The ID of the YouTube video. Returns: str: The full transcript of the video. Raises: Exception: If there's an error fetching the transcript. """ try: transcript_json = YouTubeTranscriptApi.get_transcript(youtube_video_id) transcript_data = [entry['text'] for entry in transcript_json] return " ".join(transcript_data) except Exception as e: raise @staticmethod def _extract_summary(transcript: str) -> List[Summary]: """ Extracts a summary from a YouTube video transcript. Args: transcript (str): The full transcript of the video. Returns: Summary: A Summary object containing the extracted summary. """ summary_extraction_function = [convert_to_openai_function(SummaryExtractor.Info)] model = ChatOpenAI(temperature=get_temperature()) extraction_model = model.bind(functions=summary_extraction_function) prompt = ChatPromptTemplate.from_messages([("human", "{input}")]) extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="summary") text_splitter = RecursiveCharacterTextSplitter(chunk_overlap=0, chunk_size=8192, separators=[f" {char}" for char in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]) prep = RunnableLambda(lambda x: [{"input": doc} for doc in text_splitter.split_text(x)]) chain = prep | extraction_chain.map() | MainPointsExtractor._flatten return chain.invoke(transcript) @staticmethod def _format_summary(summaries: List[Summary]) -> str: """ Formats the list of summaries into a single string. Args: summaries (List[Summary]): List of Summary objects. Returns: str: A formatted string containing all summaries. """ return "\n\n".join([s["summary"] for s in summaries]) ############################################################################################################################################################# class Answer(BaseModel): """Pydantic model for representing an answer to a question.""" answer: str = Field(description="The answer to the user's question based on the video transcript.") confidence: float = Field(description="A confidence score between 0 and 1 indicating how certain the model is about the answer.") class QuestionAnswerExtractor: """ A tool for answering questions about YouTube videos based on their transcripts. This class provides methods to process transcripts and generate answers to user questions using natural language processing techniques. """ class Info(BaseModel): """Pydantic model for representing a collection of answers.""" answers: List[Answer] @staticmethod @tool(return_direct=True) def get_answer(youtube_video_id: str, question: str) -> str: """ Answers a question about a YouTube video based on its transcript. Args: youtube_video_id (str): The ID of the YouTube video. question (str): The user's question about the video. Returns: str: Formatted string containing the answer to the user's question. """ try: transcript = QuestionAnswerExtractor._get_youtube_video_transcript(youtube_video_id) answer = QuestionAnswerExtractor._extract_answer(transcript, question) return QuestionAnswerExtractor._format_answer(answer) except Exception as e: return f"Error answering question: {str(e)}" @staticmethod def _get_youtube_video_transcript(youtube_video_id: str) -> str: """ Fetches the transcript for a YouTube video. Args: youtube_video_id (str): The ID of the YouTube video. Returns: str: The full transcript of the video. Raises: Exception: If there's an error fetching the transcript. """ try: transcript_json = YouTubeTranscriptApi.get_transcript(youtube_video_id) transcript_data = [entry['text'] for entry in transcript_json] return " ".join(transcript_data) except Exception as e: raise @staticmethod def _extract_answer(transcript: str, question: str) -> List[Answer]: """ Extracts an answer to the user's question from the YouTube video transcript. Args: transcript (str): The full transcript of the video. question (str): The user's question about the video. Returns: List[Answer]: A list of Answer objects containing the extracted answers. """ answer_extraction_function = [convert_to_openai_function(QuestionAnswerExtractor.Info)] model = ChatOpenAI(temperature=get_temperature()) extraction_model = model.bind(functions=answer_extraction_function, function_call={"name": "Info"}) prompt = ChatPromptTemplate.from_messages([ ("system", "You are an AI assistant tasked with answering questions about a video based on its transcript."), ("human", "Transcript: {transcript}\n\nQuestion: {question}\n\nProvide an answer to the question based on the transcript, along with a confidence score.") ]) extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="answers") text_splitter = RecursiveCharacterTextSplitter(chunk_overlap=192, chunk_size=8000, separators=[f" {char}" for char in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]) def prepare_input(x): chunks = text_splitter.split_text(x['transcript']) return [{"transcript": chunk, "question": x['question']} for chunk in chunks] prep = RunnableLambda(prepare_input) chain = prep | extraction_chain.map() | QuestionAnswerExtractor._flatten return chain.invoke({"transcript": transcript, "question": question}) @staticmethod def _flatten(matrix): """Flattens a 2D list into a 1D list.""" return [item for row in matrix for item in row] @staticmethod def _format_answer(answers: List[Answer]) -> str: """ Formats the list of answers into a single string. Args: answers (List[Answer]): List of Answer objects. Returns: str: A formatted string containing the best answer and its confidence score. """ if not answers: return "I couldn't find an answer to your question based on the video transcript." # Sort answers by confidence score and take the best one best_answer = max(answers, key=lambda x: x['confidence']) return f"{best_answer['answer']}({best_answer['confidence']:.2f})" ####################################################################################################################################### class YouTubeAgent: """ An agent for interacting with YouTube videos and processing user queries. This class sets up the necessary components for an AI agent that can understand and respond to user queries about YouTube videos. """ def __init__(self): """Initializes the YouTubeAgent with necessary tools and components.""" self.tools = [ MainPointsExtractor.get_youtube_video_main_points, SummaryExtractor.get_youtube_video_summary, QuestionAnswerExtractor.get_answer ] self.sys_message = "You are a helpful assistant." self.functions = [convert_to_openai_function(f) for f in self.tools] self.model = ChatOpenAI(temperature=get_temperature()).bind(functions=self.functions) self.prompt = ChatPromptTemplate.from_messages([ ("system", self.sys_message), MessagesPlaceholder(variable_name="history"), ("user", "{input}"), MessagesPlaceholder(variable_name="agent_scratchpad") ]) self.agent_chain = RunnablePassthrough.assign( agent_scratchpad= lambda x: format_to_openai_functions(x["intermediate_steps"]) ) | self.prompt | self.model | OpenAIFunctionsAgentOutputParser() self.memory = ConversationBufferWindowMemory(k=3, return_messages=True, memory_key="history") self.agent_executor = AgentExecutor(agent=self.agent_chain, tools=self.tools, memory=self.memory) def invoke(self, input_text: str) -> str: """ Processes a user input and returns the agent's response. Args: input_text (str): The user's input query. Returns: str: The agent's response to the user's query. """ try: result = self.agent_executor.invoke({"input": input_text}) return result['output'] except Exception as e: return f"An error occurred: {str(e)}" # youtube_agent = YouTubeAgent() # video_link = "https://www.youtube.com/watch?v=-OSxeoIAs2w" # main_points = youtube_agent.invoke(f"The race involves which challenges in the following video {video_link}")