Pepe_1 / rag_system.py
nileshhanotia's picture
Create rag_system.py
3a16d21 verified
raw
history blame
2.41 kB
import os
import pandas as pd
from transformers import pipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document
from utils.logger import setup_logger
from utils.model_loader import ModelLoader
logger = setup_logger(__name__)
class RAGSystem:
def __init__(self, csv_path="apparel.csv"):
try:
self.setup_system(csv_path)
self.qa_pipeline = ModelLoader.load_model_with_retry(
"distilbert-base-cased-distilled-squad",
pipeline,
task="question-answering"
)
except Exception as e:
logger.error(f"Failed to initialize RAGSystem: {str(e)}")
raise
def setup_system(self, csv_path):
if not os.path.exists(csv_path):
raise FileNotFoundError(f"CSV file not found at {csv_path}")
try:
documents = pd.read_csv(csv_path)
docs = [
Document(
page_content=str(row['Title']),
metadata={'index': idx}
) for idx, row in documents.iterrows()
]
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
split_docs = text_splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
self.vector_store = FAISS.from_documents(split_docs, embeddings)
self.retriever = self.vector_store.as_retriever()
except Exception as e:
logger.error(f"Failed to setup RAG system: {str(e)}")
raise
def process_query(self, query):
try:
retrieved_docs = self.retriever.get_relevant_documents(query)
retrieved_text = "\n".join([doc.page_content for doc in retrieved_docs])[:1000]
qa_input = {
"question": query,
"context": retrieved_text
}
response = self.qa_pipeline(qa_input)
return response['answer']
except Exception as e:
logger.error(f"Query processing error: {str(e)}")
return "Failed to process query due to an error."