Spaces:
Runtime error
Runtime error
clean up code
Browse files- main.py +5 -3
- match_utils.py +9 -16
main.py
CHANGED
@@ -13,7 +13,8 @@ from fastapi.staticfiles import StaticFiles
|
|
13 |
from fastapi.responses import HTMLResponse
|
14 |
import pandas as pd
|
15 |
from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks
|
16 |
-
from match_utils import neighborhoods, get_resume,
|
|
|
17 |
|
18 |
# APP SETUP
|
19 |
app = FastAPI()
|
@@ -60,8 +61,9 @@ def get_matches(request: Request):
|
|
60 |
# POST
|
61 |
@app.post('/find-my-match/', response_class=HTMLResponse)
|
62 |
async def post_matches(request: Request, resume: UploadFile = File(...)):
|
|
|
63 |
resume = get_resume(resume)
|
64 |
-
embeds = await coSkillEmbed(resume)
|
65 |
-
simResults = await sim_result_loop(embeds)
|
66 |
skills = await skillNER(resume)
|
|
|
|
|
67 |
return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults})
|
|
|
13 |
from fastapi.responses import HTMLResponse
|
14 |
import pandas as pd
|
15 |
from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks
|
16 |
+
from match_utils import neighborhoods, get_resume, skillNER, sim_result_loop
|
17 |
+
import time
|
18 |
|
19 |
# APP SETUP
|
20 |
app = FastAPI()
|
|
|
61 |
# POST
|
62 |
@app.post('/find-my-match/', response_class=HTMLResponse)
|
63 |
async def post_matches(request: Request, resume: UploadFile = File(...)):
|
64 |
+
t = time.time()
|
65 |
resume = get_resume(resume)
|
|
|
|
|
66 |
skills = await skillNER(resume)
|
67 |
+
simResults = await sim_result_loop(resume)
|
68 |
+
print(time.time() - t)
|
69 |
return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults})
|
match_utils.py
CHANGED
@@ -46,7 +46,6 @@ async def neighborhoods(jobtitle=None):
|
|
46 |
subtitle = f'<span style="font-size: {subtitle_font_size}px;">{subtitle}</span>'
|
47 |
title = f'<span style="font-size: {title_font_size}px;">{title}</span>'
|
48 |
return f'{logo}{title}<br>{subtitle}'
|
49 |
-
|
50 |
fig = px.scatter(coheredat, x = 'longitude', y = 'latitude', color = 'Category', hover_data = ['Category', 'Title'],
|
51 |
title=format_title("Pathfinder", " Job Neighborhoods: Explore the Map!", "(Generated using Co-here AI's LLM & ONET's Task Statements)"))
|
52 |
fig['layout'].update(height=1000, width=1500, font=dict(family='Courier New, monospace', color='black'))
|
@@ -63,7 +62,7 @@ def get_resume(resume):
|
|
63 |
resume = "\n".join(text)
|
64 |
return resume
|
65 |
|
66 |
-
|
67 |
try:
|
68 |
co = cohere.Client(os.getenv("COHERE_TOKEN"))
|
69 |
response = co.embed(
|
@@ -73,12 +72,14 @@ async def coSkillEmbed(text):
|
|
73 |
except CohereError as e:
|
74 |
return e
|
75 |
|
76 |
-
async def sim_result_loop(
|
|
|
77 |
def cosine(A, B):
|
78 |
return np.dot(A,B)/(norm(A)*norm(B))
|
|
|
|
|
79 |
simResults = []
|
80 |
-
for i in range(len(simdat))
|
81 |
-
simResults.append(cosine(np.array(embeds), np.array(simdat.iloc[i,1:])))
|
82 |
simResults = pd.DataFrame(simResults)
|
83 |
simResults['JobTitle'] = simdat['Title']
|
84 |
simResults = simResults.iloc[:,[1,0]]
|
@@ -88,7 +89,7 @@ async def sim_result_loop(embeds):
|
|
88 |
simResults = simResults.iloc[1:,:]
|
89 |
simResults.reset_index(drop=True, inplace=True)
|
90 |
for x in range(len(simResults)):
|
91 |
-
simResults.iloc[x,1] =
|
92 |
return simResults
|
93 |
|
94 |
async def skillNER(resume):
|
@@ -97,7 +98,6 @@ async def skillNER(resume):
|
|
97 |
clean_text = clean_text.replace('-', " ").replace("/"," ")
|
98 |
clean_text = clean(clean_text.translate(str.maketrans('', '', string.punctuation)))
|
99 |
return clean_text
|
100 |
-
|
101 |
resume = clean_my_text(resume)
|
102 |
stops = set(nltk.corpus.stopwords.words('english'))
|
103 |
stops = stops.union({'eg', 'ie', 'etc', 'experience', 'experiences', 'experienced', 'experiencing', 'knowledge',
|
@@ -107,13 +107,6 @@ async def skillNER(resume):
|
|
107 |
resume = [word for word in SpaceTokenizer().tokenize(resume) if word not in stops]
|
108 |
resume = [word for word in resume if ")" not in word]
|
109 |
resume = [word for word in resume if "(" not in word]
|
110 |
-
|
111 |
-
|
112 |
-
for i in range(len(resume)):
|
113 |
-
classification = classifier(resume[i])[0]['label']
|
114 |
-
if classification == 'LABEL_1':
|
115 |
-
labels.append("Skill")
|
116 |
-
else:
|
117 |
-
labels.append("Not Skill")
|
118 |
-
skills = dict(zip(resume, labels))
|
119 |
return skills
|
|
|
46 |
subtitle = f'<span style="font-size: {subtitle_font_size}px;">{subtitle}</span>'
|
47 |
title = f'<span style="font-size: {title_font_size}px;">{title}</span>'
|
48 |
return f'{logo}{title}<br>{subtitle}'
|
|
|
49 |
fig = px.scatter(coheredat, x = 'longitude', y = 'latitude', color = 'Category', hover_data = ['Category', 'Title'],
|
50 |
title=format_title("Pathfinder", " Job Neighborhoods: Explore the Map!", "(Generated using Co-here AI's LLM & ONET's Task Statements)"))
|
51 |
fig['layout'].update(height=1000, width=1500, font=dict(family='Courier New, monospace', color='black'))
|
|
|
62 |
resume = "\n".join(text)
|
63 |
return resume
|
64 |
|
65 |
+
def coSkillEmbed(text):
|
66 |
try:
|
67 |
co = cohere.Client(os.getenv("COHERE_TOKEN"))
|
68 |
response = co.embed(
|
|
|
72 |
except CohereError as e:
|
73 |
return e
|
74 |
|
75 |
+
async def sim_result_loop(resume):
|
76 |
+
embeds = coSkillEmbed(resume)
|
77 |
def cosine(A, B):
|
78 |
return np.dot(A,B)/(norm(A)*norm(B))
|
79 |
+
def format_sim(sim):
|
80 |
+
return "{:0.2f}".format(sim)
|
81 |
simResults = []
|
82 |
+
[simResults.append(cosine(np.array(embeds), np.array(simdat.iloc[i,1:]))) for i in range(len(simdat))]
|
|
|
83 |
simResults = pd.DataFrame(simResults)
|
84 |
simResults['JobTitle'] = simdat['Title']
|
85 |
simResults = simResults.iloc[:,[1,0]]
|
|
|
89 |
simResults = simResults.iloc[1:,:]
|
90 |
simResults.reset_index(drop=True, inplace=True)
|
91 |
for x in range(len(simResults)):
|
92 |
+
simResults.iloc[x,1] = format_sim(simResults.iloc[x,1])
|
93 |
return simResults
|
94 |
|
95 |
async def skillNER(resume):
|
|
|
98 |
clean_text = clean_text.replace('-', " ").replace("/"," ")
|
99 |
clean_text = clean(clean_text.translate(str.maketrans('', '', string.punctuation)))
|
100 |
return clean_text
|
|
|
101 |
resume = clean_my_text(resume)
|
102 |
stops = set(nltk.corpus.stopwords.words('english'))
|
103 |
stops = stops.union({'eg', 'ie', 'etc', 'experience', 'experiences', 'experienced', 'experiencing', 'knowledge',
|
|
|
107 |
resume = [word for word in SpaceTokenizer().tokenize(resume) if word not in stops]
|
108 |
resume = [word for word in resume if ")" not in word]
|
109 |
resume = [word for word in resume if "(" not in word]
|
110 |
+
skills = {}
|
111 |
+
[skills.update({word : "Skill"}) if classifier(word)[0]['label'] == 'LABEL_1' else skills.update({word: "Not Skill"}) for word in resume]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
return skills
|