stefanbschneider
commited on
Create simple PDF qna interface
Browse files
app.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from typing import Union
|
3 |
+
|
4 |
+
from pypdf import PdfReader
|
5 |
+
from transformers import pipeline
|
6 |
+
import gradio as gr
|
7 |
+
|
8 |
+
|
9 |
+
question_answerer = pipeline(task="question-answering", model="deepset/tinyroberta-squad2")
|
10 |
+
|
11 |
+
|
12 |
+
def get_text_from_pdf(pdf_file: Union[str, Path]) -> str:
|
13 |
+
"""Read the PDF from the given path and return a string with its entire content."""
|
14 |
+
reader = PdfReader(pdf_file)
|
15 |
+
|
16 |
+
# Extract text from all pages
|
17 |
+
full_text = ""
|
18 |
+
for page in reader.pages:
|
19 |
+
full_text += page.extract_text()
|
20 |
+
return full_text
|
21 |
+
|
22 |
+
|
23 |
+
def answer_doc_question(pdf_file, question):
|
24 |
+
pdf_text = get_text_from_pdf(pdf_file)
|
25 |
+
answer = question_answerer(question, pdf_text)
|
26 |
+
return answer["answer"]
|
27 |
+
|
28 |
+
|
29 |
+
pdf_input = gr.File(file_types=[".pdf"])
|
30 |
+
question = gr.Textbox(label="Type a question regarding the uploaded document here.")
|
31 |
+
gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text").launch()
|