""" |
YouTube Video Analysis and Interaction Module |
This module provides a comprehensive set of tools for analyzing YouTube videos, |
extracting information, and answering questions based on video content. It leverages |
the LangChain library for natural language processing tasks and the YouTube Transcript |
API for fetching video transcripts. |
Classes: |
MainPointsExtractor: |
Extracts and formats main points from YouTube video transcripts. |
Timestamps are formatted for direct use in YouTube comments, enabling clickable |
links to specific video sections when pasted. |
SummaryExtractor: |
Handles the extraction and formatting of video summaries. |
QuestionAnswerExtractor: |
Processes user questions and extracts answers from video transcripts. |
YouTubeAgent: |
Manages the overall agent setup for interacting with YouTube videos and processing user queries. |
Key Features: |
- Main points summarization in multiple formats |
- Video content summarization |
- Question answering based on video content |
- Flexible AI agent for handling various YouTube video-related tasks |
""" |
import os |
import openai |
from typing import List, Dict, Any, Union, Type |
from youtube_transcript_api import YouTubeTranscriptApi |
from langchain_core.pydantic_v1 import BaseModel, Field |
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder |
from langchain_openai import ChatOpenAI |
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough |
from langchain.agents import tool, AgentExecutor |
from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser, JsonOutputFunctionsParser |
from langchain.text_splitter import RecursiveCharacterTextSplitter |
from langchain_core.utils.function_calling import convert_to_openai_function |
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser |
from langchain.agents.format_scratchpad import format_to_openai_functions |
from langchain.memory import ConversationBufferWindowMemory |
from dotenv import load_dotenv, find_dotenv |
_ = load_dotenv(find_dotenv()) |
openai.api_key = os.environ['OPENAI_API_KEY'] |
def get_temperature(): |
return 0 |
def set_temperature(new_temperature): |
global get_temperature |
def new_get_temperature(): |
return new_temperature |
get_temperature = new_get_temperature |
class Points_1(BaseModel): |
"""Pydantic model for representing extracted points from Youtube-Transcript""" |
timestamp: float = Field(description="The timestamp (in floating-point number) of when main points are discussed or talked about in the video.") |
main_point: str = Field(description="A title for Main point.") |
summary: str = Field(description="A summary of main points discussed at that timestamp. End with fullstop.") |
emoji: str = Field(description="An emoji that matches the summary.") |
class Points_2(BaseModel): |
"""Pydantic model for representing extracted points.""" |
main_point: str = Field(description="The main topic, theme, or subject extracted from the subtitle.") |
summary: str = Field(description="The context or brief explanation of the main point.") |
emoji: str = Field(description="An emoji that represents or summarizes the main point.") |
timestamp: float = Field(description="The timestamp (in floating-point number) from the video where the main point is mentioned.") |
class MainPointsExtractor: |
""" |
A tool for extracting and formatting main points from YouTube video transcripts. |
This class provides methods to process transcripts and identify key points |
using natural language processing techniques. |
""" |
class Info_1(BaseModel): |
"""Pydantic model for representing a collection of points.""" |
points: List[Points_1] |
class Info_2(BaseModel): |
"""Pydantic model for representing a collection of points.""" |
points: List[Points_2] |
@staticmethod |
@tool(return_direct=True) |
def get_youtube_video_main_points(youtube_video_id: str) -> str: |
""" |
Extracts and formats main points with Timestamps from YouTube video transcripts. Timestamps are formatted for direct use in YouTube comments, enabling clickable links to specific video sections when pasted. |
Args: |
youtube_video_id (str): The ID of the YouTube video. |
Returns: |
str: Formatted string of main points extracted from the video. |
""" |
try: |
transcript = MainPointsExtractor._get_youtube_video_transcript(youtube_video_id) |
main_points_1 = MainPointsExtractor._extract_main_points(transcript, MainPointsExtractor.Info_1) |
main_points_2 = MainPointsExtractor._extract_main_points(transcript, MainPointsExtractor.Info_2) |
formatted_output = f"""Main points extracted from YouTube video (ID: {youtube_video_id})\nStyle_1:\n```\n{main_points_2}\n```\nStyle_2:\n```\n{main_points_1}\n```\nChoose the style that best suits your needs for presenting the main points of the video.""" |
return formatted_output |
except Exception as e: |
raise |
@staticmethod |
def _get_youtube_video_transcript(youtube_video_id: str) -> str: |
""" |
Fetches the transcript for a YouTube video. |
Args: |
youtube_video_id (str): The ID of the YouTube video. |
Returns: |
str: The full transcript of the video. |
Raises: |
Exception: If there's an error fetching the transcript. |
""" |
try: |
transcript_json = YouTubeTranscriptApi.get_transcript(youtube_video_id) |
transcript_data = [f"{entry['start']:.2f}: {entry['text']} " for entry in transcript_json] |
return "".join(transcript_data) |
except Exception as e: |
raise |
@staticmethod |
def _extract_main_points(transcript: str, info_model: Union[Type[Info_1], Type[Info_2]]) -> List[Dict[str, Any]]: |
""" |
Extracts main points from the transcript using NLP techniques. |
This method maintains a conversation history to provide context for subsequent calls. |
Args: |
transcript (str): The full transcript of the video. |
Returns: |
List[Dict[str, Any]]: A list of dictionaries containing extracted main points. |
""" |
main_points_extraction_function = [convert_to_openai_function(info_model)] |
model = ChatOpenAI(temperature=get_temperature()) |
extraction_model = model.bind(functions=main_points_extraction_function) |
system_message = f""" |
You are an AI assistant that extracts info from video transcripts. |
When extracting info, ensure that: |
1. Each point has a unique timestamp. |
In addition to these specific requirements, you have the authority to make other improvements as you see fit. This may include: |
- Refining the summaries for clarity and conciseness |
- Adjusting emoji choices to better represent the content |
- Reorganizing points for better logical flow |
- Removing redundant information |
- Adding context where necessary |
Your goal is to produce a refined and accurate representation of the main points from the video transcript. Use your judgment to balance adherence to the specific rules with overall improvement of the extracted information. |
""" |
prompt = ChatPromptTemplate.from_messages([ |
("system", system_message), |
("human", "{input}") |
]) |
extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="points") |
text_splitter = RecursiveCharacterTextSplitter(chunk_overlap=0, chunk_size=8192, separators=[f" {char}" for char in "123456789"]) |
prep = RunnableLambda(lambda x: [{"input": doc} for doc in text_splitter.split_text(x)]) |
chain = prep | extraction_chain.map() | MainPointsExtractor._flatten | MainPointsExtractor._format_youtube_comment |
result_1 = chain.invoke(transcript) |
return result_1 |
@staticmethod |
def _flatten(matrix): |
"""Flattens a 2D list into a 1D list.""" |
return [item for row in matrix for item in row] |
@staticmethod |
def _format_youtube_comment(json_data: List[Dict[str, Any]]) -> str: |
""" |
Formats extracted main points into a YouTube-style comment. |
Args: |
json_data (List[Dict[str, Any]]): List of dictionaries containing main points. |
Returns: |
str: Formatted string representing the main points as a YouTube comment. |
""" |
def _format_timestamp(seconds): |
hours = int(seconds // 3600) |
minutes = int((seconds % 3600) // 60) |
seconds = int(seconds % 60) |
return f"{hours:02}:{minutes:02}:{seconds:02}" |
formatted_comment = "" |
for entry in json_data: |
timestamp = _format_timestamp(entry['timestamp']) |
emoji = entry['emoji'] |
summary = entry['summary'] |
if entry['main_point'].endswith('.'): |
point = entry['main_point'][:-1] |
else: |
point = entry['main_point'] |
formatted_comment += f"{timestamp} {emoji} {point}: {summary}\n" |
return formatted_comment.strip() |
class Summary(BaseModel): |
"""Pydantic model for representing extracted summary.""" |
summary: str = Field(description="Extract detailed information from the content.") |
class SummaryExtractor: |
""" |
A tool for extracting and formatting summaries from YouTube video transcripts. |
This class provides methods to process transcripts and generate concise summaries |
using natural language processing techniques. |
""" |
class Info(BaseModel): |
"""Pydantic model for representing a collection of summaries.""" |
summary: List[Summary] |
@staticmethod |
@tool(return_direct=False) |
def get_youtube_video_summary(youtube_video_id: str) -> str: |
""" |
Extracts and formats a summary from a YouTube video transcript. |
Args: |
youtube_video_id (str): The ID of the YouTube video. |
Returns: |
str: Formatted string of the summary extracted from the video. |
""" |
try: |
transcript = SummaryExtractor._get_youtube_video_transcript(youtube_video_id) |
summary = SummaryExtractor._extract_summary(transcript) |
return SummaryExtractor._format_summary(summary) |
except Exception as e: |
return f"Error extracting summary: {str(e)}" |
@staticmethod |
def _get_youtube_video_transcript(youtube_video_id: str) -> str: |
""" |
Fetches the transcript for a YouTube video. |
Args: |
youtube_video_id (str): The ID of the YouTube video. |
Returns: |
str: The full transcript of the video. |
Raises: |
Exception: If there's an error fetching the transcript. |
""" |
try: |
transcript_json = YouTubeTranscriptApi.get_transcript(youtube_video_id) |
transcript_data = [entry['text'] for entry in transcript_json] |
return " ".join(transcript_data) |
except Exception as e: |
raise |
@staticmethod |
def _extract_summary(transcript: str) -> List[Summary]: |
""" |
Extracts a summary from a YouTube video transcript. |
Args: |
transcript (str): The full transcript of the video. |
Returns: |
Summary: A Summary object containing the extracted summary. |
""" |
summary_extraction_function = [convert_to_openai_function(SummaryExtractor.Info)] |
model = ChatOpenAI(temperature=get_temperature()) |
extraction_model = model.bind(functions=summary_extraction_function) |
prompt = ChatPromptTemplate.from_messages([("human", "{input}")]) |
extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="summary") |
text_splitter = RecursiveCharacterTextSplitter(chunk_overlap=0, chunk_size=8192, separators=[f" {char}" for char in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]) |
prep = RunnableLambda(lambda x: [{"input": doc} for doc in text_splitter.split_text(x)]) |
chain = prep | extraction_chain.map() | MainPointsExtractor._flatten |
return chain.invoke(transcript) |
@staticmethod |
def _format_summary(summaries: List[Summary]) -> str: |
""" |
Formats the list of summaries into a single string. |
Args: |
summaries (List[Summary]): List of Summary objects. |
Returns: |
str: A formatted string containing all summaries. |
""" |
return "\n\n".join([s["summary"] for s in summaries]) |
class Answer(BaseModel): |
"""Pydantic model for representing an answer to a question.""" |
answer: str = Field(description="The answer to the user's question based on the video transcript.") |
confidence: float = Field(description="A confidence score between 0 and 1 indicating how certain the model is about the answer.") |
class QuestionAnswerExtractor: |
""" |
A tool for answering questions about YouTube videos based on their transcripts. |
This class provides methods to process transcripts and generate answers to user questions |
using natural language processing techniques. |
""" |
class Info(BaseModel): |
"""Pydantic model for representing a collection of answers.""" |
answers: List[Answer] |
@staticmethod |
@tool(return_direct=True) |
def get_answer(youtube_video_id: str, question: str) -> str: |
""" |
Answers a question about a YouTube video based on its transcript. |
Args: |
youtube_video_id (str): The ID of the YouTube video. |
question (str): The user's question about the video. |
Returns: |
str: Formatted string containing the answer to the user's question. |
""" |
try: |
transcript = QuestionAnswerExtractor._get_youtube_video_transcript(youtube_video_id) |
answer = QuestionAnswerExtractor._extract_answer(transcript, question) |
return QuestionAnswerExtractor._format_answer(answer) |
except Exception as e: |
return f"Error answering question: {str(e)}" |
@staticmethod |
def _get_youtube_video_transcript(youtube_video_id: str) -> str: |
""" |
Fetches the transcript for a YouTube video. |
Args: |
youtube_video_id (str): The ID of the YouTube video. |
Returns: |
str: The full transcript of the video. |
Raises: |
Exception: If there's an error fetching the transcript. |
""" |
try: |
transcript_json = YouTubeTranscriptApi.get_transcript(youtube_video_id) |
transcript_data = [entry['text'] for entry in transcript_json] |
return " ".join(transcript_data) |
except Exception as e: |
raise |
@staticmethod |
def _extract_answer(transcript: str, question: str) -> List[Answer]: |
""" |
Extracts an answer to the user's question from the YouTube video transcript. |
Args: |
transcript (str): The full transcript of the video. |
question (str): The user's question about the video. |
Returns: |
List[Answer]: A list of Answer objects containing the extracted answers. |
""" |
answer_extraction_function = [convert_to_openai_function(QuestionAnswerExtractor.Info)] |
model = ChatOpenAI(temperature=get_temperature()) |
extraction_model = model.bind(functions=answer_extraction_function, function_call={"name": "Info"}) |
prompt = ChatPromptTemplate.from_messages([ |
("system", "You are an AI assistant tasked with answering questions about a video based on its transcript."), |
("human", "Transcript: {transcript}\n\nQuestion: {question}\n\nProvide an answer to the question based on the transcript, along with a confidence score.") |
]) |
extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="answers") |
text_splitter = RecursiveCharacterTextSplitter(chunk_overlap=192, chunk_size=8000, separators=[f" {char}" for char in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]) |
def prepare_input(x): |
chunks = text_splitter.split_text(x['transcript']) |
return [{"transcript": chunk, "question": x['question']} for chunk in chunks] |
prep = RunnableLambda(prepare_input) |
chain = prep | extraction_chain.map() | QuestionAnswerExtractor._flatten |
return chain.invoke({"transcript": transcript, "question": question}) |
@staticmethod |
def _flatten(matrix): |
"""Flattens a 2D list into a 1D list.""" |
return [item for row in matrix for item in row] |
@staticmethod |
def _format_answer(answers: List[Answer]) -> str: |
""" |
Formats the list of answers into a single string. |
Args: |
answers (List[Answer]): List of Answer objects. |
Returns: |
str: A formatted string containing the best answer and its confidence score. |
""" |
if not answers: |
return "I couldn't find an answer to your question based on the video transcript." |
best_answer = max(answers, key=lambda x: x['confidence']) |
return f"{best_answer['answer']}({best_answer['confidence']:.2f})" |
class YouTubeAgent: |
""" |
An agent for interacting with YouTube videos and processing user queries. |
This class sets up the necessary components for an AI agent that can understand |
and respond to user queries about YouTube videos. |
""" |
def __init__(self): |
"""Initializes the YouTubeAgent with necessary tools and components.""" |
self.tools = [ |
MainPointsExtractor.get_youtube_video_main_points, |
SummaryExtractor.get_youtube_video_summary, |
QuestionAnswerExtractor.get_answer |
] |
self.sys_message = "You are a helpful assistant." |
self.functions = [convert_to_openai_function(f) for f in self.tools] |
self.model = ChatOpenAI(temperature=get_temperature()).bind(functions=self.functions) |
self.prompt = ChatPromptTemplate.from_messages([ |
("system", self.sys_message), |
MessagesPlaceholder(variable_name="history"), |
("user", "{input}"), |
MessagesPlaceholder(variable_name="agent_scratchpad") |
]) |
self.agent_chain = RunnablePassthrough.assign( |
agent_scratchpad= lambda x: format_to_openai_functions(x["intermediate_steps"]) |
) | self.prompt | self.model | OpenAIFunctionsAgentOutputParser() |
self.memory = ConversationBufferWindowMemory(k=3, return_messages=True, memory_key="history") |
self.agent_executor = AgentExecutor(agent=self.agent_chain, tools=self.tools, memory=self.memory) |
def invoke(self, input_text: str) -> str: |
""" |
Processes a user input and returns the agent's response. |
Args: |
input_text (str): The user's input query. |
Returns: |
str: The agent's response to the user's query. |
""" |
try: |
result = self.agent_executor.invoke({"input": input_text}) |
return result['output'] |
except Exception as e: |
return f"An error occurred: {str(e)}" |