Spaces:

nileshhanotia
/

Pepe_1

Build error

App Files Files Community

nileshhanotia commited on Nov 4, 2024

Commit

3a16d21

verified ·

1 Parent(s): bf23bc0

Create rag_system.py

Browse files

Files changed (1) hide show

rag_system.py +64 -0

rag_system.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import os
+import pandas as pd
+from transformers import pipeline
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.docstore.document import Document
+from utils.logger import setup_logger
+from utils.model_loader import ModelLoader
+logger = setup_logger(__name__)
+class RAGSystem:
+    def __init__(self, csv_path="apparel.csv"):
+        try:
+            self.setup_system(csv_path)
+            self.qa_pipeline = ModelLoader.load_model_with_retry(
+                "distilbert-base-cased-distilled-squad",
+                pipeline,
+                task="question-answering"
+            )
+        except Exception as e:
+            logger.error(f"Failed to initialize RAGSystem: {str(e)}")
+            raise
+    def setup_system(self, csv_path):
+        if not os.path.exists(csv_path):
+            raise FileNotFoundError(f"CSV file not found at {csv_path}")
+        try:
+            documents = pd.read_csv(csv_path)
+            docs = [
+                Document(
+                    page_content=str(row['Title']),
+                    metadata={'index': idx}
+                ) for idx, row in documents.iterrows()
+            ]
+            text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+            split_docs = text_splitter.split_documents(docs)
+            embeddings = HuggingFaceEmbeddings(
+                model_name="sentence-transformers/all-MiniLM-L6-v2"
+            )
+            self.vector_store = FAISS.from_documents(split_docs, embeddings)
+            self.retriever = self.vector_store.as_retriever()
+        except Exception as e:
+            logger.error(f"Failed to setup RAG system: {str(e)}")
+            raise
+    def process_query(self, query):
+        try:
+            retrieved_docs = self.retriever.get_relevant_documents(query)
+            retrieved_text = "\n".join([doc.page_content for doc in retrieved_docs])[:1000]
+            qa_input = {
+                "question": query,
+                "context": retrieved_text
+            }
+            response = self.qa_pipeline(qa_input)
+            return response['answer']
+        except Exception as e:
+            logger.error(f"Query processing error: {str(e)}")
+            return "Failed to process query due to an error."