Spaces:
Sleeping
Sleeping
File size: 1,518 Bytes
420fa8a c59035e 420fa8a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from sentence_transformers import SentenceTransformer
from setup.db_setup import get_mongo_client, get_mongo_url
def get_embedding(text: str) -> list[float]:
embedding_model = SentenceTransformer("thenlper/gte-large")
if not text.strip():
print("Attempted to get embedding for empty text.")
return []
embedding = embedding_model.encode(text)
return embedding.tolist()
def query_results(query, mongo_url):
mongo_client = get_mongo_client(mongo_url)
db = mongo_client["EU_Cities"]
query_embedding = get_embedding(query)
results = db.EU_cities_collection.aggregate([
{
"$vectorSearch": {
"index": "vector_index",
"path": "embedding",
"queryVector": query_embedding,
"numCandidates": 150,
"limit": 5
}
}
])
return results
def get_search_result(query, mongo_url):
get_knowledge = query_results(query, mongo_url)
print(get_knowledge)
search_result = ""
for result in get_knowledge:
search_result += f"City: {result.get('city', 'N/A')}, Abstract: {result.get('combined', 'N/A')}\n"
return search_result
def get_context(query: str) -> str:
mongo_url = get_mongo_url()
source_information = get_search_result(query, mongo_url)
combined_information = (
f"Query: {query}\nContinue to answer the query by using the Search Results:\n{source_information}."
)
return combined_information
|