File size: 1,518 Bytes
420fa8a
c59035e
420fa8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from sentence_transformers import SentenceTransformer
from setup.db_setup import get_mongo_client, get_mongo_url


def get_embedding(text: str) -> list[float]:
    embedding_model = SentenceTransformer("thenlper/gte-large")

    if not text.strip():
        print("Attempted to get embedding for empty text.")
        return []

    embedding = embedding_model.encode(text)

    return embedding.tolist()


def query_results(query, mongo_url):
    mongo_client = get_mongo_client(mongo_url)
    db = mongo_client["EU_Cities"]

    query_embedding = get_embedding(query)
    results = db.EU_cities_collection.aggregate([
        {
            "$vectorSearch": {
                "index": "vector_index",
                "path": "embedding",
                "queryVector": query_embedding,
                "numCandidates": 150,
                "limit": 5
            }
        }
    ])
    return results


def get_search_result(query, mongo_url):
    get_knowledge = query_results(query, mongo_url)
    print(get_knowledge)

    search_result = ""
    for result in get_knowledge:
        search_result += f"City: {result.get('city', 'N/A')}, Abstract: {result.get('combined', 'N/A')}\n"

    return search_result


def get_context(query: str) -> str:
    mongo_url = get_mongo_url()
    source_information = get_search_result(query, mongo_url)
    combined_information = (
        f"Query: {query}\nContinue to answer the query by using the Search Results:\n{source_information}."
    )
    return combined_information