import os import pandas as pd from transformers import pipeline from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain.text_splitter import CharacterTextSplitter from langchain.docstore.document import Document from utils.logger import setup_logger from utils.model_loader import ModelLoader logger = setup_logger(__name__) class RAGSystem: def __init__(self, csv_path="apparel.csv"): try: self.setup_system(csv_path) self.qa_pipeline = ModelLoader.load_model_with_retry( "distilbert-base-cased-distilled-squad", pipeline, task="question-answering" ) except Exception as e: logger.error(f"Failed to initialize RAGSystem: {str(e)}") raise def setup_system(self, csv_path): if not os.path.exists(csv_path): raise FileNotFoundError(f"CSV file not found at {csv_path}") try: documents = pd.read_csv(csv_path) docs = [ Document( page_content=str(row['Title']), metadata={'index': idx} ) for idx, row in documents.iterrows() ] text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100) split_docs = text_splitter.split_documents(docs) embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) self.vector_store = FAISS.from_documents(split_docs, embeddings) self.retriever = self.vector_store.as_retriever() except Exception as e: logger.error(f"Failed to setup RAG system: {str(e)}") raise def process_query(self, query): try: retrieved_docs = self.retriever.get_relevant_documents(query) retrieved_text = "\n".join([doc.page_content for doc in retrieved_docs])[:1000] qa_input = { "question": query, "context": retrieved_text } response = self.qa_pipeline(qa_input) return response['answer'] except Exception as e: logger.error(f"Query processing error: {str(e)}") return "Failed to process query due to an error."