|
import gradio as gr |
|
from transformers import pipeline |
|
from pptx import Presentation |
|
import re |
|
import json |
|
|
|
|
|
classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification") |
|
summarizer = pipeline("summarization", model="Falconsai/text_summarization") |
|
|
|
|
|
classification_model_loaded = False |
|
summarization_model_loaded = False |
|
|
|
def load_models(): |
|
global classifier, summarizer, classification_model_loaded, summarization_model_loaded |
|
if not classification_model_loaded: |
|
classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification") |
|
classification_model_loaded = True |
|
if not summarization_model_loaded: |
|
summarizer = pipeline("summarization", model="Falconsai/text_summarization") |
|
summarization_model_loaded = True |
|
|
|
|
|
def extract_text_from_pptx(file_path): |
|
try: |
|
presentation = Presentation(file_path) |
|
text = [] |
|
for slide in presentation.slides: |
|
for shape in slide.shapes: |
|
if hasattr(shape, "text"): |
|
text.append(shape.text) |
|
return "\n".join(text) |
|
except Exception as e: |
|
print(f"Error extracting text from PowerPoint: {e}") |
|
return "" |
|
|
|
|
|
def limit_text_length(text, max_length=512): |
|
return text[:max_length] |
|
|
|
|
|
def predict_pptx_content(file_path): |
|
try: |
|
load_models() |
|
extracted_text = extract_text_from_pptx(file_path) |
|
cleaned_text = re.sub(r'\s+', ' ', extracted_text) |
|
limited_text = limit_text_length(cleaned_text) |
|
result = classifier(limited_text) |
|
predicted_label = result[0]['label'] |
|
predicted_probability = result[0]['score'] |
|
summary = summarizer(cleaned_text, max_length=1000, min_length=30, do_sample=False)[0]['summary_text'] |
|
output = { |
|
"predicted_label": predicted_label, |
|
"evaluation": predicted_probability, |
|
"summary": summary |
|
} |
|
return json.dumps(output, indent=3) |
|
except Exception as e: |
|
print(f"Error predicting content from PowerPoint: {e}") |
|
return {"error": str(e)} |
|
|
|
|
|
iface = gr.Interface( |
|
fn=predict_pptx_content, |
|
inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"), |
|
outputs=gr.Textbox("output"), |
|
live=False, |
|
title="<h1 style='color: lightgreen; text-align: center;'>HackTalk Analyzer</h1>", |
|
) |
|
|
|
|
|
iface.launch(share=True) |
|
|