Sharathhebbar24's picture
Update app.py
ec1a337 verified
import os
import requests
import streamlit as st
import torch
# from transformers import AutoTokenizer, AutoModel
# from sentence_transformers import util
class SentenceSimiliarity():
def __init__(self, model_name, sentence1, sentence2):
self.KEY = os.getenv("HF_KEY")
self.headers = {"Authorization": f"Bearer {self.KEY}"}
self.sentence1 = sentence1
self.sentence2 = sentence2
self.api_url = f"https://api-inference.huggingface.co/models/{model_name}"
# self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
# self.model = AutoModel.from_pretrained(self.model_name)
def model_selection(self):
available_models = [
# "distilbert-base-uncased",
# "bert-base-uncased",
"sentence-transformers/all-MiniLM-L6-v2",
"sentence-transformers/all-mpnet-base-v2",
"sentence-transformers/distiluse-base-multilingual-cased-v2",
"intfloat/e5-small",
"intfloat/e5-base",
"intfloat/e5-large-v2",
"intfloat/multilingual-e5-base",
# "togethercomputer/m2-bert-80M-32k-retrieval",
# "togethercomputer/m2-bert-80M-8k-retrieval",
# "togethercomputer/m2-bert-80M-2k-retrieval",
]
self.model_name = st.sidebar.selectbox(
label="Select Your Models",
options=available_models,
)
# def tokenize(self):
# tokenized1 = self.tokenizer(
# self.sentence1,
# return_tensors='pt',
# padding=True,
# truncation=True
# )
# tokenized2 = self.tokenizer(
# self.sentence2,
# return_tensors='pt',
# padding=True,
# truncation=True
# )
# return tokenized1, tokenized2
# def get_embeddings(self):
# # tokenized1, tokenized2 = self.tokenize()
# with torch.no_grad():
# embeddings1 = self.model(**tokenized1).last_hidden_state.mean(dim=1)
# embeddings2 = self.model(**tokenized2).last_hidden_state.mean(dim=1)
# return embeddings1, embeddings2
# def get_similarity_scores(self):
# embeddings1, embeddings2 = self.get_embeddings()
# scores = util.cos_sim(embeddings1, embeddings2)
# return scores
def query(self, payload):
response = requests.post(self.api_url, headers=self.headers, json=payload)
return response.json()
def results(self):
scores = self.query({
"inputs": {
"source_sentence": self.sentence1,
"sentences": [
self.sentence2,
]
},
})
# scores = self.get_similarity_scores()
statement = f"The sentence has {scores[0] * 100:.2f}% similarity"
# statement = scores
return statement
class UI():
def __init__(self):
st.title("Sentence Similiarity Checker")
st.caption("You can use this for checking similarity between resume and job description")
def get(self):
available_models = [
# "distilbert-base-uncased",
# "bert-base-uncased",
"sentence-transformers/all-MiniLM-L6-v2",
"sentence-transformers/all-mpnet-base-v2",
"sentence-transformers/distiluse-base-multilingual-cased-v2",
"intfloat/e5-small",
"intfloat/e5-base",
"intfloat/e5-large-v2",
"intfloat/multilingual-e5-base",
# "togethercomputer/m2-bert-80M-32k-retrieval",
# "togethercomputer/m2-bert-80M-8k-retrieval",
# "togethercomputer/m2-bert-80M-2k-retrieval",
]
self.model_name = st.sidebar.selectbox(
label="Select Your Models",
options=available_models,
)
self.sentence1 = st.text_area(
label="Sentence 1",
help="This is a parent text the next text will be compared with this text"
)
self.sentence2 = st.text_area(
label="Sentence 2",
help="This is a child text"
)
self.button = st.button(
label="Check",
help='Check Sentence Similarity'
)
def result(self):
self.get()
ss = SentenceSimiliarity(self.model_name, self.sentence1, self.sentence2)
if self.button:
st.text(ss.results())
# print(ss.results())
ui = UI()
ui.result()