File size: 4,567 Bytes
ec1a337 f652b33 ec1a337 30933bd ec1a337 30933bd ec1a337 30933bd ec1a337 30933bd ec1a337 30933bd ec1a337 30933bd ec1a337 30933bd ec1a337 30933bd ec1a337 30933bd ec1a337 e228ec7 ec1a337 30933bd ec1a337 30933bd ec1a337 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import os
import requests
import streamlit as st
import torch
# from transformers import AutoTokenizer, AutoModel
# from sentence_transformers import util
class SentenceSimiliarity():
def __init__(self, model_name, sentence1, sentence2):
self.KEY = os.getenv("HF_KEY")
self.headers = {"Authorization": f"Bearer {self.KEY}"}
self.sentence1 = sentence1
self.sentence2 = sentence2
self.api_url = f"https://api-inference.huggingface.co/models/{model_name}"
# self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
# self.model = AutoModel.from_pretrained(self.model_name)
def model_selection(self):
available_models = [
# "distilbert-base-uncased",
# "bert-base-uncased",
"sentence-transformers/all-MiniLM-L6-v2",
"sentence-transformers/all-mpnet-base-v2",
"sentence-transformers/distiluse-base-multilingual-cased-v2",
"intfloat/e5-small",
"intfloat/e5-base",
"intfloat/e5-large-v2",
"intfloat/multilingual-e5-base",
# "togethercomputer/m2-bert-80M-32k-retrieval",
# "togethercomputer/m2-bert-80M-8k-retrieval",
# "togethercomputer/m2-bert-80M-2k-retrieval",
]
self.model_name = st.sidebar.selectbox(
label="Select Your Models",
options=available_models,
)
# def tokenize(self):
# tokenized1 = self.tokenizer(
# self.sentence1,
# return_tensors='pt',
# padding=True,
# truncation=True
# )
# tokenized2 = self.tokenizer(
# self.sentence2,
# return_tensors='pt',
# padding=True,
# truncation=True
# )
# return tokenized1, tokenized2
# def get_embeddings(self):
# # tokenized1, tokenized2 = self.tokenize()
# with torch.no_grad():
# embeddings1 = self.model(**tokenized1).last_hidden_state.mean(dim=1)
# embeddings2 = self.model(**tokenized2).last_hidden_state.mean(dim=1)
# return embeddings1, embeddings2
# def get_similarity_scores(self):
# embeddings1, embeddings2 = self.get_embeddings()
# scores = util.cos_sim(embeddings1, embeddings2)
# return scores
def query(self, payload):
response = requests.post(self.api_url, headers=self.headers, json=payload)
return response.json()
def results(self):
scores = self.query({
"inputs": {
"source_sentence": self.sentence1,
"sentences": [
self.sentence2,
]
},
})
# scores = self.get_similarity_scores()
statement = f"The sentence has {scores[0] * 100:.2f}% similarity"
# statement = scores
return statement
class UI():
def __init__(self):
st.title("Sentence Similiarity Checker")
st.caption("You can use this for checking similarity between resume and job description")
def get(self):
available_models = [
# "distilbert-base-uncased",
# "bert-base-uncased",
"sentence-transformers/all-MiniLM-L6-v2",
"sentence-transformers/all-mpnet-base-v2",
"sentence-transformers/distiluse-base-multilingual-cased-v2",
"intfloat/e5-small",
"intfloat/e5-base",
"intfloat/e5-large-v2",
"intfloat/multilingual-e5-base",
# "togethercomputer/m2-bert-80M-32k-retrieval",
# "togethercomputer/m2-bert-80M-8k-retrieval",
# "togethercomputer/m2-bert-80M-2k-retrieval",
]
self.model_name = st.sidebar.selectbox(
label="Select Your Models",
options=available_models,
)
self.sentence1 = st.text_area(
label="Sentence 1",
help="This is a parent text the next text will be compared with this text"
)
self.sentence2 = st.text_area(
label="Sentence 2",
help="This is a child text"
)
self.button = st.button(
label="Check",
help='Check Sentence Similarity'
)
def result(self):
self.get()
ss = SentenceSimiliarity(self.model_name, self.sentence1, self.sentence2)
if self.button:
st.text(ss.results())
# print(ss.results())
ui = UI()
ui.result()
|