Spaces:

jalvaroluna
/

rag

Runtime error

rag / extract_text.py

Jose Alvaro Luna G

feat: app init2

da6a7d2 3 months ago

528 Bytes

	from pdfminer.high_level import extract_text
	from docx import Document
	import pytesseract
	from PIL import Image

	def extract_text_from_image(file_path):
	image = Image.open(file_path)
	text = pytesseract.image_to_string(image)
	return text

	def extract_text_from_docx(file_path):
	doc = Document(file_path)
	full_text = []
	for para in doc.paragraphs:
	full_text.append(para.text)
	return '\n'.join(full_text)

	def extract_text_from_pdf(file_path):
	text = extract_text(file_path)
	return text