File size: 2,560 Bytes
d83c996
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from mistralai import Mistral
import requests
import numpy as np
import faiss
import os

api_key=os.getenv("MISTRAL_API_KEY", "")
client = Mistral(api_key=api_key)

def get_data():

    response = requests.get('https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt')
    text = response.text

    f = open('essay.txt', 'w')
    f.write(text)
    f.close()


    len(text)
    return text

def create_chunks(text):

    chunk_size = 2048
    chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]

    len(chunks)
    return chunks

def get_text_embedding(input):
    embeddings_batch_response = client.embeddings.create(
          model="mistral-embed",
          inputs=input
      )
    return embeddings_batch_response.data[0].embedding


def load_vectors(chunks):
    text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])

    text_embeddings.shape

    d = text_embeddings.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(text_embeddings)
    return index

def create_embed_for_question(question):
    
    question_embeddings = np.array([get_text_embedding(question)])
    question_embeddings.shape
    return question_embeddings

def get_similar_chunks(index, question_embeddings, chunks):
    D, I = index.search(question_embeddings, k=2)
    print(I)
    retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
    print(retrieved_chunk)
    return retrieved_chunk

def create_prompt(retrieved_chunk, question):
    prompt = f"""
    Context information is below.
    ---------------------
    {retrieved_chunk}
    ---------------------
    Given the context information and not prior knowledge, answer the query.
    Query: {question}
    Answer:
    """
    return prompt


def run_mistral(user_message, model="mistral-large-latest"):
    messages = [
        {
            "role": "user", "content": user_message
        }
    ]
    chat_response = client.chat.complete(
        model=model,
        messages=messages
    )
    return (chat_response.choices[0].message.content)

def main():
    text = get_data()
    chunks = create_chunks(text=text)
    question = "What were the two main things the author worked on before college?"

    index = load_vectors(chunks=chunks)
    question_embeddings = create_embed_for_question(question=question)
    retrieved_chunk = get_similar_chunks(index, question_embeddings, chunks)
    prompt = create_prompt(retrieved_chunk, question)
    answer = run_mistral(prompt)
    print(answer)

main()