File size: 4,270 Bytes
7a37ee2 9451b58 7a37ee2 9451b58 7a37ee2 9451b58 7a37ee2 e795d36 7a37ee2 e795d36 7a37ee2 e795d36 98725f8 e795d36 98725f8 e795d36 98725f8 e795d36 7a37ee2 e795d36 01ec9c3 7a37ee2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import streamlit as st
import pymongo
import requests
import time
from dotenv import load_dotenv
import os
# file_path = '/content/free_courses.json'
uri = os.getenv("URI")
hf_token = os.getenv("HF_TOKEN")
client = pymongo.MongoClient(uri)
embedding_url = ""
db = client.av_courses
collection =
# Just trying to fetch the first document (you can adjust the query)
sample_doc = collection.find_one()
except Exception as e:
print(f"Error accessing the database or collection: {e}")
# def generate_embedding(text: str) -> list[float]:
# response =
# embedding_url,
# headers={"Authorization": f"Bearer {hf_token}"},
# json={"inputs": text})
# if response.status_code != 200:
# raise ValueError(f"Request failed with status code {response.status_code}: {response.text}")
# return response.json()
def generate_embedding(text: str) -> list[float]:
for attempt in range(5): # Retry up to 5 times
response =
headers={"Authorization": f"Bearer {hf_token}"},
json={"inputs": text}
if response.status_code == 200:
return response.json()
elif response.status_code == 503:
time.sleep(5) # Wait before retrying
raise ValueError(f"Request failed with status code {response.status_code}: {response.text}")
raise ValueError("Failed to generate embedding after multiple retries.")
# for doc in collection.find({'title':{"$exists": True}}).limit(75):
# doc['course_embedding_hf'] = generate_embedding(doc['merged_summary'])
# collection.replace_one({'_id': doc['_id']}, doc)
# print(doc['_id'], end=" ")
# db.avcourses.create_index(
# [("course_embedding_hf", pymongo.GEOSPHERE)],
# name="CourseSemanticSearch"
# )
def getSearchResults(query: str, relevance_threshold: float) -> list[object]:
query_embedding = generate_embedding(query)
results = collection.aggregate([
{"$vectorSearch": {
"queryVector": query_embedding,
"path": "course_embedding_hf",
"numCandidates": 100,
"limit": 10,
"index": "CourseSemanticSearch"
"$addFields": {"score": {"$meta": "vectorSearchScore"}}
"$match": {"score": {"$gte": relevance_threshold}}
{"$sort": {"score": -1}},
"$project": {"vector": 0}
return results
# Streamlit UI
st.title("Semantic Search Interface")
st.subheader("Find the best courses for your query")
query = st.text_input("Enter your query here:")
# Slider for adjusting relevance threshold
slider_value = st.slider(
"Adjust Relevance Threshold",
help="Use this slider to adjust the minimum relevance score for results."
relevance_threshold = slider_value/140
if st.button("Search"):
if query:
# Generate query embedding
with st.spinner("Fetching results..."):
results = getSearchResults(query, relevance_threshold)
if results:
for course in results:
### {course['title']}
- **Description:** {course['description']}
- **Duration:** {course['duration']}
- **Ratings:** {course['ratings']}
- **Difficulty:** {course['difficulty']}
st.markdown(f"[![Go to Course](]({course['course_url']})",unsafe_allow_html=True,)
st.warning("No matches found! Try adjusting the relevance slider or using different keywords.")
# except Exception as e:
# st.markdown(f"rate limit for searching has been completed try after few minutes\n",e)
st.error("Please enter a query.")