|
from pptx import Presentation |
|
import re |
|
import gradio as gr |
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer |
|
import torch |
|
import torch.nn.functional as F |
|
from transformers import pipeline |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("Ahmed235/roberta_classification") |
|
model = AutoModelForSequenceClassification.from_pretrained("Ahmed235/roberta_classification") |
|
device = torch.device("cpu") |
|
model = model.to(device) |
|
|
|
|
|
summarizer = pipeline("summarization", model="Falconsai/text_summarization") |
|
|
|
def extract_text_from_pptx(file_path): |
|
presentation = Presentation(file_path) |
|
text = [] |
|
for slide_number, slide in enumerate(presentation.slides, start=1): |
|
for shape in slide.shapes: |
|
if hasattr(shape, "text"): |
|
text.append(shape.text) |
|
return "\n".join(text) |
|
|
|
def predict_pptx_content(file_path): |
|
try: |
|
extracted_text = extract_text_from_pptx(file_path) |
|
cleaned_text = re.sub(r'\s+', ' ', extracted_text) |
|
|
|
|
|
input_encoding = tokenizer(cleaned_text, truncation=True, padding=True, return_tensors="pt") |
|
input_encoding = {key: val.to(device) for key, val in input_encoding.items()} |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model(**input_encoding) |
|
logits = outputs.logits |
|
|
|
probabilities = F.softmax(logits, dim=1) |
|
|
|
predicted_label_id = torch.argmax(logits, dim=1).item() |
|
predicted_label = model.config.id2label[predicted_label_id] |
|
predicted_probability = probabilities[0][predicted_label_id].item() |
|
|
|
|
|
summary = summarizer(cleaned_text, max_length=80, min_length=30, do_sample=False)[0]['summary_text'] |
|
|
|
prediction = { |
|
"Predicted Label": predicted_label, |
|
"Evaluation": f"Evaluate the topic according to {predicted_label} is: {predicted_probability}", |
|
"Summary": summary |
|
} |
|
|
|
return prediction |
|
|
|
except Exception as e: |
|
|
|
print(f"Error in predict_pptx_content: {e}") |
|
return {"error": str(e)} |
|
|
|
|
|
iface = gr.Interface( |
|
fn=predict_pptx_content, |
|
inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"), |
|
outputs=["text", "text", "text"], |
|
live=False, |
|
title="<h1 style='color: lightgreen; text-align: center;'>PPTX Analyzer</h1>", |
|
) |
|
|
|
|
|
iface.launch(share=True) |
|
|