Spaces:
Running
Running
from mistralai import Mistral | |
import requests | |
import numpy as np | |
import faiss | |
import os | |
api_key=os.getenv("MISTRAL_API_KEY", "") | |
client = Mistral(api_key=api_key) | |
def get_data(): | |
response = requests.get('https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt') | |
text = response.text | |
f = open('essay.txt', 'w') | |
f.write(text) | |
f.close() | |
len(text) | |
return text | |
def create_chunks(text): | |
chunk_size = 2048 | |
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)] | |
len(chunks) | |
return chunks | |
def get_text_embedding(input): | |
embeddings_batch_response = client.embeddings.create( | |
model="mistral-embed", | |
inputs=input | |
) | |
return embeddings_batch_response.data[0].embedding | |
def load_vectors(chunks): | |
text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks]) | |
text_embeddings.shape | |
d = text_embeddings.shape[1] | |
index = faiss.IndexFlatL2(d) | |
index.add(text_embeddings) | |
return index | |
def create_embed_for_question(question): | |
question_embeddings = np.array([get_text_embedding(question)]) | |
question_embeddings.shape | |
return question_embeddings | |
def get_similar_chunks(index, question_embeddings, chunks): | |
D, I = index.search(question_embeddings, k=2) | |
print(I) | |
retrieved_chunk = [chunks[i] for i in I.tolist()[0]] | |
print(retrieved_chunk) | |
return retrieved_chunk | |
def create_prompt(retrieved_chunk, question): | |
prompt = f""" | |
Context information is below. | |
--------------------- | |
{retrieved_chunk} | |
--------------------- | |
Given the context information and not prior knowledge, answer the query. | |
Query: {question} | |
Answer: | |
""" | |
return prompt | |
def run_mistral(user_message, model="mistral-large-latest"): | |
messages = [ | |
{ | |
"role": "user", "content": user_message | |
} | |
] | |
chat_response = client.chat.complete( | |
model=model, | |
messages=messages | |
) | |
return (chat_response.choices[0].message.content) | |
def main(): | |
text = get_data() | |
chunks = create_chunks(text=text) | |
question = "What were the two main things the author worked on before college?" | |
index = load_vectors(chunks=chunks) | |
question_embeddings = create_embed_for_question(question=question) | |
retrieved_chunk = get_similar_chunks(index, question_embeddings, chunks) | |
prompt = create_prompt(retrieved_chunk, question) | |
answer = run_mistral(prompt) | |
print(answer) | |
main() |