import asyncio from pydantic_ai.result import ResultData, RunResult import streamlit as st from pydantic_ai import Agent from pydantic_ai.models.groq import GroqModel import nest_asyncio import pdfplumber from transformers import pipeline import torch import os import presentation as customClass from streamlit_pdf_viewer import pdf_viewer api_key = os.getenv("API_KEY") data = [] last_message = '' # to generate ppt model = GroqModel('llama-3.1-70b-versatile', api_key = api_key) # to summarize summarizer = pipeline("summarization", model="facebook/bart-large-cnn") def split_long_string(long_string, chunk_size=3500): string_data = "".join(long_string) words = string_data.split() chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)] return chunks async def ppt_content(data): agent = Agent(model, result_type=customClass.PPT, system_prompt=( "You are an expert in making power-point perssentation", "Create 6 sliders", "Title Slide: short into about the presentation", "Methodology Slide: Summarize the methodology in detail", "Results Slide: Present key findings in detail in simple text and bullet points.", "Discussion Slide: Summarize the implications and limitations.", "Conclusion Slide: State the overall conclusion.", "Reference Slide: Include all citations used.", "Each slide should be seperate", "Each slide should have 4 parts :" "1. Title : title of the slide ", "2. Text: he detailed description or narrative content of the slide. This should include key information, explanations, or supporting arguments. Keep it concise yet informative to avoid overwhelming the audience.", "3. Bullet point: A list of bullet points summarizing key information on the slide. Each bullet point should be detail, long, and highlight a specific aspect of the slide's topic. ideally, limit to 3-5 points.", "4. Image Suggestion: A prompt for generating an image to complement the slide content. Describe the desired visual in detail, including elements, style, and relevance to the topic. Ensure the prompt is actionable for AI tools.", )) listOfString = split_long_string(data) for i, chunk in enumerate(listOfString): print(f"Chunk {i}:\n{chunk}\n") # for x in listOfString: # result: RunResult[str] = RunResult(_all_messages:[]) # result = agent.run_sync(user_prompt = x,message_history = result.new_messages()) result_1 = agent.run_sync(user_prompt = data) #result_2 = agent.run_sync(user_prompt = listOfString[1],message_history=result_1.new_messages()) print(result_1.data) def ai_ppt(data): #call summerizer to summerize pdf summary = summarizer(data, max_length=130, min_length=30, do_sample=False) print(summary) asyncio.run(ppt_content(data=data)) def extract_data(feed): with pdfplumber.open(feed) as pdf: pages = pdf.pages for p in pages: data.append(p.extract_text()) return None # if data is not None: # st.caption(data) # ai_ppt(data=data) def main(): uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf") if uploaded_file is not None: extract_data(uploaded_file) binary_data = uploaded_file.getvalue() pdf_viewer(input=binary_data, width=700) if st.button("Make PPT"): ai_ppt(data) if __name__ == '__main__': import asyncio nest_asyncio.apply() main()