|
from pathlib import Path |
|
from typing import Union |
|
|
|
from pypdf import PdfReader |
|
from transformers import pipeline |
|
import gradio as gr |
|
|
|
|
|
question_answerer = pipeline(task="question-answering", model="deepset/tinyroberta-squad2") |
|
|
|
|
|
def get_text_from_pdf(pdf_file: Union[str, Path]) -> str: |
|
"""Read the PDF from the given path and return a string with its entire content.""" |
|
reader = PdfReader(pdf_file) |
|
|
|
|
|
full_text = "" |
|
for page in reader.pages: |
|
full_text += page.extract_text() |
|
return full_text |
|
|
|
|
|
def answer_doc_question(pdf_file, question): |
|
pdf_text = get_text_from_pdf(pdf_file) |
|
answer = question_answerer(question, pdf_text) |
|
return answer["answer"] |
|
|
|
|
|
|
|
pdf_input = gr.File( |
|
value="https://ris.uni-paderborn.de/download/30236/30237/author_version.pdf", |
|
file_types=[".pdf"], |
|
label="Upload a PDF document and ask a question about it.", |
|
) |
|
question = gr.Textbox( |
|
value="What is mobile-env?", |
|
label="Type a question regarding the uploaded document here.", |
|
) |
|
gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text").launch() |