Spaces:
Runtime error
Runtime error
import gradio as gr | |
from PIL import Image | |
import pytesseract | |
import torch | |
import numpy as np | |
import nltk | |
nltk.download('stopwords') | |
nltk.download('punkt') | |
from nltk.corpus import stopwords | |
from nltk.cluster.util import cosine_distance | |
import networkx as nx | |
from transformers import pipeline | |
if torch.cuda.is_available(): | |
device = torch.device("cuda") | |
else: | |
device = torch.device("cpu") | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
def read(filepath): | |
return pytesseract.image_to_string(Image.open(filepath)) | |
def clean_text(text): | |
article = text.split(".") | |
article=[sentence for sentence in article if sentence!=""] | |
sentences = [] | |
for sentence in article: | |
sentence=sentence.replace(",", " , ").replace("'", " ' ").split(" ") | |
sentence=[word for word in sentence if word!=""] | |
sentences.append(sentence) | |
return sentences | |
def sentence_similarity(sent1, sent2, stopwords): #Creating words in sentences to one hot encoding and then finding cosine distance between the vectors inorder to measure closeness | |
if stopwords is None: | |
stopwords = [] | |
sent1 = [w.lower() for w in sent1] | |
sent2 = [w.lower() for w in sent2] | |
all_words = list(set(sent1 + sent2)) | |
vector1 = [0] * len(all_words) | |
vector2 = [0] * len(all_words) | |
# build the vector for the first sentence | |
for w in sent1: | |
if w in stopwords: | |
continue | |
vector1[all_words.index(w)] += 1 | |
# build the vector for the second sentence | |
for w in sent2: | |
if w in stopwords: | |
continue | |
vector2[all_words.index(w)] += 1 | |
if np.isnan(1 - cosine_distance(vector1, vector2)): | |
return 0 | |
return 1 - cosine_distance(vector1, vector2) | |
def build_similarity_matrix(sentences, stop_words): | |
# Create an empty similarity matrix | |
similarity_matrix = np.zeros((len(sentences), len(sentences))) | |
for idx1 in range(len(sentences)): | |
for idx2 in range(len(sentences)): | |
if idx1 == idx2: #ignore if both are same sentences | |
continue | |
similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words) | |
return similarity_matrix | |
def sentences(text, top_n="auto"): | |
# Step 1 - Clean text to generate sentences | |
sentences=clean_text(text) | |
stop_words = stopwords.words('english') | |
stop_words.append(".") | |
stop_words.append(",") | |
summarize_text = [] | |
# Step 2 - Generate Similary Martix across sentences | |
sentence_similarity_martix = build_similarity_matrix(sentences, stop_words) | |
# print(sentence_similarity_martix) | |
# Step 3 - Rank sentences in similarity martix | |
sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix) | |
# print(sentence_similarity_graph) | |
scores = nx.pagerank(sentence_similarity_graph) | |
# print(scores) | |
# Step 4 - Sort the rank and pick top sentences | |
ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True) #Sorting the scores in decending order | |
# print("Indexes of top ranked_sentence order are ", ranked_sentence) | |
if top_n=="auto": top_n=len(ranked_sentence) | |
else: top_n=int(top_n) | |
for i in range(top_n): | |
ranked_sentence[i][1][0]=ranked_sentence[i][1][0].capitalize() #Capitalising 1st letter of sentence | |
# print(ranked_sentence[i][1][0]) | |
summarize_text.append(" ".join(ranked_sentence[i][1])) | |
# Step 5 - Offcourse, output the summarized text | |
extractive_summarized=". ".join(summarize_text).replace(" , ",", ").replace(" ' ","'") + "." | |
return extractive_summarized | |
def important_sentences(filepath, no_of_sentences=5): | |
extractedInformation=read(filepath) | |
extractedInformation=' '.join(extractedInformation.split('\n')) | |
try: | |
extractive_summary=sentences(extractedInformation, no_of_sentences) | |
except: | |
extractive_summary=sentences(extractedInformation,"auto") | |
text="" | |
for index,sent in enumerate(extractive_summary.split(".")): | |
if sent!='':text+=str(index+1)+". "+str(sent).strip()+".\n\n" | |
return (gr.Textbox.update(text),gr.Button.update(visible=False),gr.Textbox.update(visible=False),gr.Dropdown.update(visible=False)) | |
def summarize(filepath): | |
extractedInformation=read(filepath) | |
extractedInformation=' '.join(extractedInformation.split('\n')) | |
abstractive_summary = summarizer(extractedInformation, max_length=int(len(extractedInformation)/6), min_length=int(len(extractedInformation)/10), do_sample=False) | |
return (gr.Textbox.update(abstractive_summary[0]["summary_text"]),gr.Button.update(visible=False),gr.Textbox.update(visible=False),gr.Dropdown.update(visible=False)) | |
def Question_Answer(filepath,question,mod): | |
extractedInformation=read(filepath) | |
extractedInformation=' '.join(extractedInformation.split('\n')) | |
if mod=="Roberta": | |
question_answerer = pipeline("question-answering", model="SMD00/QA_model-roberta") | |
else : | |
question_answerer = pipeline("question-answering", model="SMD00/QA_model-distilbert") | |
obj=question_answerer(question=question, context=extractedInformation) | |
return obj['answer'] | |
def show_fn(): | |
return (gr.Textbox.update(visible=True),gr.Button.update(visible=True),gr.Dropdown.update(visible=True),gr.Textbox.update("")) | |
def dummy_fn(x): | |
return x | |
with gr.Blocks() as demo: | |
gr.Markdown("# **PicSum**") | |
gr.Markdown("Gradio demo for PicSum project. You can give an image as input and select any of the three buttons. It generates summary, important sentences and answers questions related to context.") | |
img=gr.components.Image(type="filepath", label="Input Image") | |
with gr.Row(): | |
summary_btn = gr.Button(value="Summary") | |
sentence_btn = gr.Button(value="Important Sentences") | |
quesAndAns_btn = gr.Button(value="Question and Answers") | |
mode=gr.Dropdown(["Roberta","DistilBert"],label="Model",info="Choose a model",visible=False) | |
ques_box = gr.Textbox(label="Question",info="Enter a Question",interactive=True,visible=False) | |
submit_btn= gr.Button(value="Submit",visible=False) | |
out_box=gr.Textbox(label="Generated Text") | |
summary_btn.click(fn=summarize,inputs=[img],outputs=[out_box,submit_btn,ques_box,mode]) | |
sentence_btn.click(fn=important_sentences,inputs=[img],outputs=[out_box,submit_btn,ques_box,mode]) | |
quesAndAns_btn.click(fn=show_fn,outputs=[submit_btn,ques_box,mode,out_box]) | |
submit_btn.click(fn=Question_Answer,inputs=[img,ques_box,mode],outputs=[out_box]) | |
gr.Markdown("## Image Examples") | |
with gr.Row(): | |
gr.Examples( | |
examples=[ "a.png"], | |
inputs=img, | |
outputs=img, | |
fn=dummy_fn, | |
cache_examples=True, | |
) | |
gr.Examples( | |
examples=[ "b.png"], | |
inputs=img, | |
outputs=img, | |
fn=dummy_fn, | |
cache_examples=True, | |
) | |
gr.Examples( | |
examples=[ "c.png"], | |
inputs=img, | |
outputs=img, | |
fn=dummy_fn, | |
cache_examples=True, | |
) | |
gr.Examples( | |
examples=[ "d.png"], | |
inputs=img, | |
outputs=img, | |
fn=dummy_fn, | |
cache_examples=True, | |
) | |
demo.launch(debug=True) |