Shreyas094 commited on
Commit
d221460
·
verified ·
1 Parent(s): 7ccb084

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -11
app.py CHANGED
@@ -39,8 +39,6 @@ from typing import List, Dict, Tuple
39
  import datetime
40
  from abc import ABC, abstractmethod
41
  from typing import List, Dict, Any
42
- from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
43
-
44
 
45
  # Automatically get the current year
46
  CURRENT_YEAR = datetime.datetime.now().year
@@ -107,9 +105,8 @@ groq_client = Groq(api_key=GROQ_API_KEY)
107
  MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
108
  mistral_client = Mistral(api_key=MISTRAL_API_KEY)
109
 
110
- similarity_model = HuggingFaceInferenceAPIEmbeddings(
111
- api_key=HF_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
112
- )
113
 
114
  # Step 1: Create a base class for AI models
115
  class AIModel(ABC):
@@ -648,9 +645,9 @@ def rerank_documents(query: str, documents: List[Dict],
648
  bm25_scores = bm25.get_scores(query)
649
 
650
  # Step 4: Get semantic similarity scores
651
- query_embedding = similarity_model.embed_query(query)
652
  doc_summaries = [doc['summary'] for doc in valid_docs]
653
- doc_embeddings = similarity_model.embed_query(doc_summaries)
654
  semantic_scores = util.cos_sim(query_embedding, doc_embeddings)[0]
655
 
656
  # Step 5: Combine scores (normalize first)
@@ -682,11 +679,11 @@ def rerank_documents(query: str, documents: List[Dict],
682
  continue
683
 
684
  # Check similarity with already selected documents
685
- doc_embedding = similarity_model.embed_query(doc['summary'])
686
  is_similar = False
687
 
688
  for content in added_contents:
689
- content_embedding = similarity_model.embed_query(content)
690
  similarity = util.pytorch_cos_sim(doc_embedding, content_embedding)
691
  if similarity > similarity_threshold:
692
  is_similar = True
@@ -708,8 +705,8 @@ def rerank_documents(query: str, documents: List[Dict],
708
 
709
  def compute_similarity(text1, text2):
710
  # Encode the texts
711
- embedding1 = similarity_model.embed_query(text1)
712
- embedding2 = similarity_model.embed_query(text2)
713
 
714
  # Compute cosine similarity
715
  cosine_similarity = util.pytorch_cos_sim(embedding1, embedding2)
 
39
  import datetime
40
  from abc import ABC, abstractmethod
41
  from typing import List, Dict, Any
 
 
42
 
43
  # Automatically get the current year
44
  CURRENT_YEAR = datetime.datetime.now().year
 
105
  MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
106
  mistral_client = Mistral(api_key=MISTRAL_API_KEY)
107
 
108
+ # Initialize the similarity model
109
+ similarity_model = SentenceTransformer('BAAI/bge-small-en-v1.5')
 
110
 
111
  # Step 1: Create a base class for AI models
112
  class AIModel(ABC):
 
645
  bm25_scores = bm25.get_scores(query)
646
 
647
  # Step 4: Get semantic similarity scores
648
+ query_embedding = similarity_model.encode(query)
649
  doc_summaries = [doc['summary'] for doc in valid_docs]
650
+ doc_embeddings = similarity_model.encode(doc_summaries)
651
  semantic_scores = util.cos_sim(query_embedding, doc_embeddings)[0]
652
 
653
  # Step 5: Combine scores (normalize first)
 
679
  continue
680
 
681
  # Check similarity with already selected documents
682
+ doc_embedding = similarity_model.encode(doc['summary'])
683
  is_similar = False
684
 
685
  for content in added_contents:
686
+ content_embedding = similarity_model.encode(content)
687
  similarity = util.pytorch_cos_sim(doc_embedding, content_embedding)
688
  if similarity > similarity_threshold:
689
  is_similar = True
 
705
 
706
  def compute_similarity(text1, text2):
707
  # Encode the texts
708
+ embedding1 = similarity_model.encode(text1)
709
+ embedding2 = similarity_model.encode(text2)
710
 
711
  # Compute cosine similarity
712
  cosine_similarity = util.pytorch_cos_sim(embedding1, embedding2)