Spaces:
Build error
Build error
import os | |
import pandas as pd | |
from transformers import pipeline | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain_community.vectorstores import FAISS | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.docstore.document import Document | |
from utils.logger import setup_logger | |
from utils.model_loader import ModelLoader | |
logger = setup_logger(__name__) | |
class RAGSystem: | |
def __init__(self, csv_path="apparel.csv"): | |
try: | |
self.setup_system(csv_path) | |
self.qa_pipeline = ModelLoader.load_model_with_retry( | |
"distilbert-base-cased-distilled-squad", | |
pipeline, | |
task="question-answering" | |
) | |
except Exception as e: | |
logger.error(f"Failed to initialize RAGSystem: {str(e)}") | |
raise | |
def setup_system(self, csv_path): | |
if not os.path.exists(csv_path): | |
raise FileNotFoundError(f"CSV file not found at {csv_path}") | |
try: | |
documents = pd.read_csv(csv_path) | |
docs = [ | |
Document( | |
page_content=str(row['Title']), | |
metadata={'index': idx} | |
) for idx, row in documents.iterrows() | |
] | |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
split_docs = text_splitter.split_documents(docs) | |
embeddings = HuggingFaceEmbeddings( | |
model_name="sentence-transformers/all-MiniLM-L6-v2" | |
) | |
self.vector_store = FAISS.from_documents(split_docs, embeddings) | |
self.retriever = self.vector_store.as_retriever() | |
except Exception as e: | |
logger.error(f"Failed to setup RAG system: {str(e)}") | |
raise | |
def process_query(self, query): | |
try: | |
retrieved_docs = self.retriever.get_relevant_documents(query) | |
retrieved_text = "\n".join([doc.page_content for doc in retrieved_docs])[:1000] | |
qa_input = { | |
"question": query, | |
"context": retrieved_text | |
} | |
response = self.qa_pipeline(qa_input) | |
return response['answer'] | |
except Exception as e: | |
logger.error(f"Query processing error: {str(e)}") | |
return "Failed to process query due to an error." | |