Spaces:
Running
Running
File size: 3,469 Bytes
5d14cc6 50d1ce2 4ba3023 ec1c0d9 0e1f166 ec1c0d9 d553fab ec1c0d9 a8e9d4c ec1c0d9 03b4c19 ec1c0d9 a8e9d4c 0e1f166 ec1c0d9 0e1f166 1cd8371 c5224f2 04f5bb7 2afa0ec 5d14cc6 d553fab 3745704 d553fab 5d14cc6 a8e9d4c 75fbe45 bd94ab4 c276872 5d14cc6 bd94ab4 599d94c 823af88 0e1f166 5d14cc6 c5224f2 b417242 c5224f2 c276872 50d1ce2 2afa0ec 0e1f166 60e8e5d 0e1f166 60e8e5d 5d14cc6 0e1f166 5d14cc6 ec1c0d9 86551a1 ec1c0d9 4f86a6f c5224f2 ec1c0d9 a91875b 0e1f166 a91875b a5f868b ec1c0d9 4ba3023 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import asyncio
from pydantic_ai.result import ResultData, RunResult
import streamlit as st
from pydantic_ai import Agent
from pydantic_ai.models.groq import GroqModel
import nest_asyncio
import pdfplumber
from transformers import pipeline
import torch
import os
import presentation as customClass
api_key = os.getenv("API_KEY")
data = []
last_message = ''
# to generate ppt
model = GroqModel('llama-3.1-70b-versatile', api_key = api_key)
# to summarize
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def split_long_string(long_string, chunk_size=3500):
string_data = "".join(long_string)
words = string_data.split()
chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
return chunks
async def ppt_content(data):
agent = Agent(model,
result_type=customClass.PPT,
system_prompt=(
"You are an expert in making power-point perssentation",
"Create 6 sliders",
"Title Slide: short into about the presentation",
"Methodology Slide: Summarize the methodology in detail",
"Results Slide: Present key findings in detail in simple text and bullet points.",
"Discussion Slide: Summarize the implications and limitations.",
"Conclusion Slide: State the overall conclusion.",
"Reference Slide: Include all citations used.",
"Each slide should be seperate",
"Each slide should have 4 parts :"
"1. Title : title of the slide ",
"2. Text: he detailed description or narrative content of the slide. This should include key information, explanations, or supporting arguments. Keep it concise yet informative to avoid overwhelming the audience.",
"3. Bullet point: A list of bullet points summarizing key information on the slide. Each bullet point should be detail, long, and highlight a specific aspect of the slide's topic. ideally, limit to 3-5 points.",
"4. Image Suggestion: A prompt for generating an image to complement the slide content. Describe the desired visual in detail, including elements, style, and relevance to the topic. Ensure the prompt is actionable for AI tools.",
))
listOfString = split_long_string(data)
for i, chunk in enumerate(listOfString):
print(f"Chunk {i}:\n{chunk}\n")
# for x in listOfString:
# result: RunResult[str] = RunResult(_all_messages:[])
# result = agent.run_sync(user_prompt = x,message_history = result.new_messages())
result_1 = agent.run_sync(user_prompt = data)
#result_2 = agent.run_sync(user_prompt = listOfString[1],message_history=result_1.new_messages())
print(result_1.data)
def ai_ppt(data):
#call summerizer to summerize pdf
summary = summarizer(data, max_length=130, min_length=30, do_sample=False)
print(summary)
asyncio.run(ppt_content(data=data))
def extract_data(feed):
with pdfplumber.open(feed) as pdf:
pages = pdf.pages
for p in pages:
data.append(p.extract_text())
print(data)
return None
# if data is not None:
# st.caption(data)
# ai_ppt(data=data)
def main():
uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf")
if uploaded_file is not None:
extract_data(uploaded_file)
if st.button("Make PPT"):
ai_ppt(data)
if __name__ == '__main__':
import asyncio
nest_asyncio.apply()
main()
|