File size: 2,413 Bytes
3a16d21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
import pandas as pd
from transformers import pipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document
from utils.logger import setup_logger
from utils.model_loader import ModelLoader

logger = setup_logger(__name__)

class RAGSystem:
    def __init__(self, csv_path="apparel.csv"):
        try:
            self.setup_system(csv_path)
            self.qa_pipeline = ModelLoader.load_model_with_retry(
                "distilbert-base-cased-distilled-squad",
                pipeline,
                task="question-answering"
            )
        except Exception as e:
            logger.error(f"Failed to initialize RAGSystem: {str(e)}")
            raise

    def setup_system(self, csv_path):
        if not os.path.exists(csv_path):
            raise FileNotFoundError(f"CSV file not found at {csv_path}")
        
        try:
            documents = pd.read_csv(csv_path)
            docs = [
                Document(
                    page_content=str(row['Title']),
                    metadata={'index': idx}
                ) for idx, row in documents.iterrows()
            ]
            
            text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
            split_docs = text_splitter.split_documents(docs)
            
            embeddings = HuggingFaceEmbeddings(
                model_name="sentence-transformers/all-MiniLM-L6-v2"
            )
            self.vector_store = FAISS.from_documents(split_docs, embeddings)
            self.retriever = self.vector_store.as_retriever()
        except Exception as e:
            logger.error(f"Failed to setup RAG system: {str(e)}")
            raise

    def process_query(self, query):
        try:
            retrieved_docs = self.retriever.get_relevant_documents(query)
            retrieved_text = "\n".join([doc.page_content for doc in retrieved_docs])[:1000]
            
            qa_input = {
                "question": query,
                "context": retrieved_text
            }
            response = self.qa_pipeline(qa_input)
            return response['answer']
        except Exception as e:
            logger.error(f"Query processing error: {str(e)}")
            return "Failed to process query due to an error."