Spaces:

Ahmed235
/

final

Sleeping

App Files Files Community

final / app.py

Ahmed235

Update app.py

95d05cb verified 10 months ago

raw

history blame

2.74 kB

	from pptx import Presentation
	import re
	import gradio as gr
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import torch
	import torch.nn.functional as F
	from transformers import pipeline

	# Load the pre-trained model and tokenizer
	tokenizer = AutoTokenizer.from_pretrained("Ahmed235/roberta_classification")
	model = AutoModelForSequenceClassification.from_pretrained("Ahmed235/roberta_classification")
	device = torch.device("cpu")
	model = model.to(device) # Move the model to the CPU

	# Create a summarization pipeline
	summarizer = pipeline("summarization", model="Falconsai/text_summarization")

	def extract_text_from_pptx(file_path):
	presentation = Presentation(file_path)
	text = []
	for slide_number, slide in enumerate(presentation.slides, start=1):
	for shape in slide.shapes:
	if hasattr(shape, "text"):
	text.append(shape.text)
	return "\n".join(text)

	def predict_pptx_content(file_path):
	try:
	extracted_text = extract_text_from_pptx(file_path)
	cleaned_text = re.sub(r'\s+', ' ', extracted_text)

	# Tokenize and encode the cleaned text
	input_encoding = tokenizer(cleaned_text, truncation=True, padding=True, return_tensors="pt")
	input_encoding = {key: val.to(device) for key, val in input_encoding.items()} # Move input tensor to CPU

	# Perform inference
	with torch.no_grad():
	outputs = model(**input_encoding)
	logits = outputs.logits

	probabilities = F.softmax(logits, dim=1)

	predicted_label_id = torch.argmax(logits, dim=1).item()
	predicted_label = model.config.id2label[predicted_label_id]
	predicted_probability = probabilities[0][predicted_label_id].item()

	# Summarize the cleaned text
	summary = summarizer(cleaned_text, max_length=80, min_length=30, do_sample=False)[0]['summary_text']

	prediction = {
	"Predicted Label": predicted_label,
	"Evaluation": f"Evaluate the topic according to {predicted_label} is: {predicted_probability}",
	"Summary": summary
	}

	return prediction

	except Exception as e:
	# Log the error details
	print(f"Error in predict_pptx_content: {e}")
	return {"error": str(e)}

	# Define the Gradio interface
	iface = gr.Interface(
	fn=predict_pptx_content,
	inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
	outputs=["text", "text", "text"], # Predicted Label, Evaluation, Summary
	live=False, # Change to True for one-time analysis
	title="<h1 style='color: lightgreen; text-align: center;'>PPTX Analyzer</h1>",
	)

	# Deploy the Gradio interface
	iface.launch(share=True)