Spaces:
Sleeping
Sleeping
File size: 1,736 Bytes
991df01 6d4a490 991df01 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import pandas as pd
import openai
from data import data as df
import numpy as np
import os
openai.api_key = os.environ.get("openai")
def cosine_similarity(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
def get_embedding(text, model="text-embedding-ada-002"):
try:
text = text.replace("\n", " ")
except:
None
return openai.embeddings.create(input = [text], model=model).data[0].embedding
def get_embedding2(text, model="text-embedding-ada-002"):
try:
text = text.replace("\n", " ")
except:
None
try:
return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding']
except:
time.sleep(2)
def search_cv(search, nb=3, pprint=True):
embedding = get_embedding(search, model='text-embedding-ada-002')
df_replicate = df.copy()
def wrap_cos(x,y):
try:
res = cosine_similarity(x,y)
except:
res = 0
return res
df_replicate['similarities'] = df_replicate.embedding.apply(lambda x: wrap_cos(x, embedding))
res = df_replicate.sort_values('similarities', ascending=False).head(int(nb))
return res
def get_cv(text, nb):
result = search_cv(text,nb).to_dict(orient="records")
final_str = ""
for r in result:
final_str += "#### Candidat avec " + str(round(r["similarities"]*100,2)) + "% de similarité :\n"+ str(r["summary"]).replace("#","")
final_str += "\n\n[-> Lien vers le CV complet]("+ str(r["url"]) + ')'
final_str += "\n\n-----------------------------------------------------------------------------------------------------\n\n"
final_str = final_str.replace("`", "")
return final_str |