Spaces:
Running
Running
import os | |
from app.db import supabase | |
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader | |
def load_docs(): | |
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
data_dir = os.path.join(BASE_DIR, 'data') | |
if not os.path.exists(data_dir): | |
print(f"Directory not found: {data_dir}") | |
os.makedirs(data_dir) | |
print(f"Created directory: {data_dir}") | |
return [] | |
documents = [] | |
try: | |
files = os.listdir(data_dir) | |
except PermissionError: | |
print(f"Permission denied: {data_dir}") | |
return [] | |
for file in files: | |
file_path = os.path.join(data_dir, file) | |
if file.endswith(".pdf"): | |
try: | |
loader = PyPDFLoader(file_path) | |
documents.extend(loader.load()) | |
except Exception as e: | |
print(f"Error loading PDF file {file}: {e}") | |
elif file.endswith('.docx') or file.endswith('.doc'): | |
try: | |
loader = Docx2txtLoader(file_path) | |
documents.extend(loader.load()) | |
except Exception as e: | |
print(f"Error loading DOCX/DOC file {file}: {e}") | |
elif file.endswith('.txt'): | |
try: | |
loader = TextLoader(file_path) | |
documents.extend(loader.load()) | |
except Exception as e: | |
print(f"Error loading TXT file {file}: {e}") | |
return documents | |
def get_data(): | |
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
data_dir = os.path.join(BASE_DIR, 'data') | |
try: | |
contents = os.listdir(data_dir) | |
except FileNotFoundError: | |
print(f"Directory not found: {data_dir}") | |
return | |
except PermissionError: | |
print(f"Permission denied: {data_dir}") | |
return | |
files_in_local = [f for f in contents if os.path.isfile(os.path.join(data_dir, f))] | |
try: | |
files = supabase.storage.from_("rag-data").list() | |
except Exception as e: | |
print(f"Error fetching file list from storage: {e}") | |
return | |
file_in_storage = [file['name'] for file in files] | |
file_to_delete = list(set(files_in_local) - set(file_in_storage)) | |
file_to_download = list(set(file_in_storage) - set(files_in_local)) | |
for file in file_to_delete: | |
try: | |
os.remove(os.path.join(data_dir, file)) | |
print("Removed", file) | |
except FileNotFoundError: | |
print(f"File not found: {file}") | |
except PermissionError: | |
print(f"Permission denied when removing file: {file}") | |
except Exception as e: | |
print(f"Error removing file {file}: {e}") | |
for file in file_to_download: | |
try: | |
with open(os.path.join(data_dir, file), 'wb+') as f: | |
res = supabase.storage.from_('rag-data').download(file) | |
f.write(res) | |
print("Downloaded", file) | |
except Exception as e: | |
print(f"Error downloading file {file}: {e}") | |