Spaces:

gaganyatri
/

django_spaces

Running

django_spaces / inference /rag_process.py

sachin

add sarvam transalte

d83c996 4 months ago

2.56 kB

	from mistralai import Mistral
	import requests
	import numpy as np
	import faiss
	import os

	api_key=os.getenv("MISTRAL_API_KEY", "")
	client = Mistral(api_key=api_key)

	def get_data():

	response = requests.get('https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt')
	text = response.text

	f = open('essay.txt', 'w')
	f.write(text)
	f.close()


	len(text)
	return text

	def create_chunks(text):

	chunk_size = 2048
	chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]

	len(chunks)
	return chunks

	def get_text_embedding(input):
	embeddings_batch_response = client.embeddings.create(
	model="mistral-embed",
	inputs=input
	)
	return embeddings_batch_response.data[0].embedding


	def load_vectors(chunks):
	text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])

	text_embeddings.shape

	d = text_embeddings.shape[1]
	index = faiss.IndexFlatL2(d)
	index.add(text_embeddings)
	return index

	def create_embed_for_question(question):

	question_embeddings = np.array([get_text_embedding(question)])
	question_embeddings.shape
	return question_embeddings

	def get_similar_chunks(index, question_embeddings, chunks):
	D, I = index.search(question_embeddings, k=2)
	print(I)
	retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
	print(retrieved_chunk)
	return retrieved_chunk

	def create_prompt(retrieved_chunk, question):
	prompt = f"""
	Context information is below.
	---------------------
	{retrieved_chunk}
	---------------------
	Given the context information and not prior knowledge, answer the query.
	Query: {question}
	Answer:
	"""
	return prompt


	def run_mistral(user_message, model="mistral-large-latest"):
	messages = [
	{
	"role": "user", "content": user_message
	}
	]
	chat_response = client.chat.complete(
	model=model,
	messages=messages
	)
	return (chat_response.choices[0].message.content)

	def main():
	text = get_data()
	chunks = create_chunks(text=text)
	question = "What were the two main things the author worked on before college?"

	index = load_vectors(chunks=chunks)
	question_embeddings = create_embed_for_question(question=question)
	retrieved_chunk = get_similar_chunks(index, question_embeddings, chunks)
	prompt = create_prompt(retrieved_chunk, question)
	answer = run_mistral(prompt)
	print(answer)

	main()