Spaces:

kartiksrma
/

SearchCourses

Sleeping

File size: 4,270 Bytes

import streamlit as st
import pymongo
import requests
import time
from dotenv import load_dotenv
import os

load_dotenv()


# file_path = '/content/free_courses.json'
uri = os.getenv("URI")
hf_token = os.getenv("HF_TOKEN")
client = pymongo.MongoClient(uri)
embedding_url = "https://api-inference.huggingface.co/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2"


try:
    db = client.av_courses
    collection = db.courses
    # Just trying to fetch the first document (you can adjust the query)
    sample_doc = collection.find_one()
except Exception as e:
    print(f"Error accessing the database or collection: {e}")

# def generate_embedding(text: str) -> list[float]:

#   response = requests.post(
#     embedding_url,
#     headers={"Authorization": f"Bearer {hf_token}"},
#     json={"inputs": text})

#   if response.status_code != 200:
#     raise ValueError(f"Request failed with status code {response.status_code}: {response.text}")

#   return response.json()

def generate_embedding(text: str) -> list[float]:
    for attempt in range(5):  # Retry up to 5 times
        response = requests.post(
            embedding_url,
            headers={"Authorization": f"Bearer {hf_token}"},
            json={"inputs": text}
        )
        if response.status_code == 200:
            return response.json()
        elif response.status_code == 503:
            time.sleep(5)  # Wait before retrying
        else:
            raise ValueError(f"Request failed with status code {response.status_code}: {response.text}")

    raise ValueError("Failed to generate embedding after multiple retries.")


# for doc in collection.find({'title':{"$exists": True}}).limit(75):
#   doc['course_embedding_hf'] = generate_embedding(doc['merged_summary'])
#   collection.replace_one({'_id': doc['_id']}, doc)
#   print(doc['_id'], end=" ")


# db.avcourses.create_index(
#     [("course_embedding_hf", pymongo.GEOSPHERE)],
#     name="CourseSemanticSearch"
# )


def getSearchResults(query: str, relevance_threshold: float) -> list[object]:

    query_embedding =  generate_embedding(query)
    time.sleep(1)
    results = collection.aggregate([
    {"$vectorSearch": {
        "queryVector": query_embedding,
        "path": "course_embedding_hf",
        "numCandidates": 100,
        "limit": 10,
        "index": "CourseSemanticSearch"
    }},
    {
        "$addFields": {"score": {"$meta": "vectorSearchScore"}}
    },
    {
        "$match": {"score": {"$gte": relevance_threshold}}
    },
    {"$sort": {"score": -1}},
    {
        "$project": {"vector": 0}
    },
    ])
    return results


# Streamlit UI
st.title("Semantic Search Interface")
st.subheader("Find the best courses for your query")


query = st.text_input("Enter your query here:")

# Slider for adjusting relevance threshold
slider_value = st.slider(
    "Adjust Relevance Threshold",
    min_value=0,
    max_value=100,
    value=60,
    step=1,
    help="Use this slider to adjust the minimum relevance score for results."
)
relevance_threshold = slider_value/140

if st.button("Search"):
    if query:
        # Generate query embedding
        with st.spinner("Fetching results..."):
            results = getSearchResults(query, relevance_threshold)
            if results:
                for course in results:
                    print(course)
                    st.markdown(
                        f"""
                        ### {course['title']}
                        - **Description:** {course['description']}
                        - **Duration:** {course['duration']}
                        - **Ratings:** {course['ratings']}
                        - **Difficulty:** {course['difficulty']}
                        """
                    )
                    st.markdown(f"[![Go to Course](https://img.shields.io/badge/Go%20to%20Course-blue)]({course['course_url']})",unsafe_allow_html=True,)

                    st.markdown("---")
            else:
                st.warning("No matches found! Try adjusting the relevance slider or using different keywords.")
            # except Exception as e:
                # st.markdown(f"rate limit for searching has been completed try after few minutes\n",e)
    else:
        st.error("Please enter a query.")