rakeshkumar1812's picture
Upload three files for url RAG
5103377 verified
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import OpenAIEmbeddings
from langchain_cohere import CohereEmbeddings
from langchain_openai import OpenAI
from langchain_community.document_loaders.web_base import WebBaseLoader
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain_community.vectorstores.faiss import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings.huggingface import HuggingFaceInferenceAPIEmbeddings
import os
from dotenv import load_dotenv
load_dotenv()
GEMINI_API_KEY = os.getenv("GOOGLE_AI_API_KEY")
HF_API_KEY = os.getenv("HF_API_KEY")
llm_gemini = ChatGoogleGenerativeAI( google_api_key= GEMINI_API_KEY, model="gemini-pro")
embeddings_hf = HuggingFaceInferenceAPIEmbeddings(api_key=HF_API_KEY, model="sentence-transformers/all-MiniLM-16-v2")
# OPEN_AI_API_KEY = os.getenv("OPEN_AI_API_KEY")
# COHERE_API_KEY = os.getenv("COHERE_API_KEY")
# llm_openai = OpenAI(api_key=OPEN_AI_API_KEY, model="gpt-3.5-turbo")
# embeddings_open_ai = OpenAIEmbeddings(api_key=OPEN_AI_API_KEY) # OPEN_AI
# embeddings_cohere = CohereEmbeddings(api_key=COHERE_API_KEY,model="embed-multilingual-v3.0") # embed-english-v3.0
def ask_gemini(prompt):
AI_Respose = llm_gemini.invoke(prompt)
return AI_Respose.content
def rag_with_url(target_url, prompt):
loader = WebBaseLoader(target_url)
raw_document = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, length_function=len)
splited_document = text_splitter.split_documents(raw_document)
vector_store = FAISS.from_documents(splited_document, embeddings_hf)
retriever = vector_store.as_retriever()
relevant_documents = retriever.get_relevant_documents(prompt)
final_prompt = prompt + " " + " ".join([doc.page_content for doc in relevant_documents])
AI_Respose = llm_gemini.invoke(final_prompt)
return AI_Respose.content
# def rag_with_pdf(file_path, prompt):
# loader = PyPDFLoader(file_path)
# raw_document = loader.load()
# text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200, length_function = len)
# splited_document = text_splitter.split_documents(raw_document)
# vector_store = FAISS.from_documents(splited_document, embeddings_hf)
# retriever = vector_store.as_retriever()
# relevant_documents = retriever.get_relevant_documents(prompt)
# final_prompt = prompt + " " + " ".join([doc.page_content for doc in relevant_documents])
# AI_Respose = llm_gemini.invoke(final_prompt)
# return AI_Respose.content, relevant_documents