Spaces:

SMD00
/

Image_Summarizer

Runtime error

App Files Files Community

Image_Summarizer / app.py

SMD00

Update app.py

1f0fa3b over 1 year ago

raw

history blame contribute delete

7.27 kB

	import gradio as gr
	from PIL import Image
	import pytesseract
	import torch
	import numpy as np
	import nltk
	nltk.download('stopwords')
	nltk.download('punkt')
	from nltk.corpus import stopwords
	from nltk.cluster.util import cosine_distance
	import networkx as nx
	from transformers import pipeline


	if torch.cuda.is_available():
	device = torch.device("cuda")
	else:
	device = torch.device("cpu")


	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

	def read(filepath):
	return pytesseract.image_to_string(Image.open(filepath))

	def clean_text(text):
	article = text.split(".")
	article=[sentence for sentence in article if sentence!=""]

	sentences = []

	for sentence in article:
	sentence=sentence.replace(",", " , ").replace("'", " ' ").split(" ")
	sentence=[word for word in sentence if word!=""]
	sentences.append(sentence)

	return sentences

	def sentence_similarity(sent1, sent2, stopwords): #Creating words in sentences to one hot encoding and then finding cosine distance between the vectors inorder to measure closeness

	if stopwords is None:
	stopwords = []

	sent1 = [w.lower() for w in sent1]
	sent2 = [w.lower() for w in sent2]

	all_words = list(set(sent1 + sent2))

	vector1 = [0] * len(all_words)
	vector2 = [0] * len(all_words)

	# build the vector for the first sentence
	for w in sent1:
	if w in stopwords:
	continue
	vector1[all_words.index(w)] += 1

	# build the vector for the second sentence
	for w in sent2:
	if w in stopwords:
	continue
	vector2[all_words.index(w)] += 1
	if np.isnan(1 - cosine_distance(vector1, vector2)):
	return 0
	return 1 - cosine_distance(vector1, vector2)


	def build_similarity_matrix(sentences, stop_words):

	# Create an empty similarity matrix
	similarity_matrix = np.zeros((len(sentences), len(sentences)))

	for idx1 in range(len(sentences)):
	for idx2 in range(len(sentences)):
	if idx1 == idx2: #ignore if both are same sentences
	continue
	similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)

	return similarity_matrix

	def sentences(text, top_n="auto"):

	# Step 1 - Clean text to generate sentences

	sentences=clean_text(text)
	stop_words = stopwords.words('english')
	stop_words.append(".")
	stop_words.append(",")
	summarize_text = []

	# Step 2 - Generate Similary Martix across sentences

	sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)
	# print(sentence_similarity_martix)

	# Step 3 - Rank sentences in similarity martix

	sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
	# print(sentence_similarity_graph)

	scores = nx.pagerank(sentence_similarity_graph)
	# print(scores)

	# Step 4 - Sort the rank and pick top sentences

	ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True) #Sorting the scores in decending order
	# print("Indexes of top ranked_sentence order are ", ranked_sentence)

	if top_n=="auto": top_n=len(ranked_sentence)
	else: top_n=int(top_n)

	for i in range(top_n):
	ranked_sentence[i][1][0]=ranked_sentence[i][1][0].capitalize() #Capitalising 1st letter of sentence
	# print(ranked_sentence[i][1][0])
	summarize_text.append(" ".join(ranked_sentence[i][1]))

	# Step 5 - Offcourse, output the summarized text

	extractive_summarized=". ".join(summarize_text).replace(" , ",", ").replace(" ' ","'") + "."
	return extractive_summarized

	def important_sentences(filepath, no_of_sentences=5):
	extractedInformation=read(filepath)
	extractedInformation=' '.join(extractedInformation.split('\n'))
	try:
	extractive_summary=sentences(extractedInformation, no_of_sentences)
	except:
	extractive_summary=sentences(extractedInformation,"auto")
	text=""
	for index,sent in enumerate(extractive_summary.split(".")):
	if sent!='':text+=str(index+1)+". "+str(sent).strip()+".\n\n"
	return (gr.Textbox.update(text),gr.Button.update(visible=False),gr.Textbox.update(visible=False),gr.Dropdown.update(visible=False))

	def summarize(filepath):
	extractedInformation=read(filepath)
	extractedInformation=' '.join(extractedInformation.split('\n'))
	abstractive_summary = summarizer(extractedInformation, max_length=int(len(extractedInformation)/6), min_length=int(len(extractedInformation)/10), do_sample=False)
	return (gr.Textbox.update(abstractive_summary[0]["summary_text"]),gr.Button.update(visible=False),gr.Textbox.update(visible=False),gr.Dropdown.update(visible=False))

	def Question_Answer(filepath,question,mod):
	extractedInformation=read(filepath)
	extractedInformation=' '.join(extractedInformation.split('\n'))
	if mod=="Roberta":
	question_answerer = pipeline("question-answering", model="SMD00/QA_model-roberta")
	else :
	question_answerer = pipeline("question-answering", model="SMD00/QA_model-distilbert")
	obj=question_answerer(question=question, context=extractedInformation)
	return obj['answer']

	def show_fn():
	return (gr.Textbox.update(visible=True),gr.Button.update(visible=True),gr.Dropdown.update(visible=True),gr.Textbox.update(""))
	def dummy_fn(x):
	return x

	with gr.Blocks() as demo:
	gr.Markdown("# PicSum")
	gr.Markdown("Gradio demo for PicSum project. You can give an image as input and select any of the three buttons. It generates summary, important sentences and answers questions related to context.")
	img=gr.components.Image(type="filepath", label="Input Image")

	with gr.Row():
	summary_btn = gr.Button(value="Summary")
	sentence_btn = gr.Button(value="Important Sentences")
	quesAndAns_btn = gr.Button(value="Question and Answers")

	mode=gr.Dropdown(["Roberta","DistilBert"],label="Model",info="Choose a model",visible=False)
	ques_box = gr.Textbox(label="Question",info="Enter a Question",interactive=True,visible=False)
	submit_btn= gr.Button(value="Submit",visible=False)
	out_box=gr.Textbox(label="Generated Text")
	summary_btn.click(fn=summarize,inputs=[img],outputs=[out_box,submit_btn,ques_box,mode])
	sentence_btn.click(fn=important_sentences,inputs=[img],outputs=[out_box,submit_btn,ques_box,mode])
	quesAndAns_btn.click(fn=show_fn,outputs=[submit_btn,ques_box,mode,out_box])
	submit_btn.click(fn=Question_Answer,inputs=[img,ques_box,mode],outputs=[out_box])
	gr.Markdown("## Image Examples")
	with gr.Row():
	gr.Examples(
	examples=[ "a.png"],
	inputs=img,
	outputs=img,
	fn=dummy_fn,
	cache_examples=True,
	)
	gr.Examples(
	examples=[ "b.png"],
	inputs=img,
	outputs=img,
	fn=dummy_fn,
	cache_examples=True,
	)
	gr.Examples(
	examples=[ "c.png"],
	inputs=img,
	outputs=img,
	fn=dummy_fn,
	cache_examples=True,
	)
	gr.Examples(
	examples=[ "d.png"],
	inputs=img,
	outputs=img,
	fn=dummy_fn,
	cache_examples=True,
	)
	demo.launch(debug=True)