from pathlib import Path from typing import Union from pypdf import PdfReader from transformers import pipeline import gradio as gr question_answerer = pipeline(task="question-answering", model="deepset/tinyroberta-squad2") def get_text_from_pdf(pdf_file: Union[str, Path]) -> str: """Read the PDF from the given path and return a string with its entire content.""" reader = PdfReader(pdf_file) # Extract text from all pages full_text = "" for page in reader.pages: full_text += page.extract_text() return full_text def answer_doc_question(pdf_file, question): pdf_text = get_text_from_pdf(pdf_file) answer = question_answerer(question, pdf_text) return answer["answer"] # Add default a file and question, so it's easy to try out the app. pdf_input = gr.File( value="https://ris.uni-paderborn.de/download/30236/30237/author_version.pdf", file_types=[".pdf"], label="Upload a PDF document and ask a question about it.", ) question = gr.Textbox( value="What is mobile-env?", label="Type a question regarding the uploaded document here.", ) gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text").launch()