Spaces:
Runtime error
Runtime error
from pdfminer.high_level import extract_text | |
from docx import Document | |
import pytesseract | |
from PIL import Image | |
def extract_text_from_image(file_path): | |
image = Image.open(file_path) | |
text = pytesseract.image_to_string(image) | |
return text | |
def extract_text_from_docx(file_path): | |
doc = Document(file_path) | |
full_text = [] | |
for para in doc.paragraphs: | |
full_text.append(para.text) | |
return '\n'.join(full_text) | |
def extract_text_from_pdf(file_path): | |
text = extract_text(file_path) | |
return text | |