celise88 commited on
Commit
793cdd0
·
1 Parent(s): d8f6ecc

clean up code

Browse files
Files changed (2) hide show
  1. main.py +5 -3
  2. match_utils.py +9 -16
main.py CHANGED
@@ -13,7 +13,8 @@ from fastapi.staticfiles import StaticFiles
13
  from fastapi.responses import HTMLResponse
14
  import pandas as pd
15
  from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks
16
- from match_utils import neighborhoods, get_resume, coSkillEmbed, sim_result_loop, skillNER
 
17
 
18
  # APP SETUP
19
  app = FastAPI()
@@ -60,8 +61,9 @@ def get_matches(request: Request):
60
  # POST
61
  @app.post('/find-my-match/', response_class=HTMLResponse)
62
  async def post_matches(request: Request, resume: UploadFile = File(...)):
 
63
  resume = get_resume(resume)
64
- embeds = await coSkillEmbed(resume)
65
- simResults = await sim_result_loop(embeds)
66
  skills = await skillNER(resume)
 
 
67
  return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults})
 
13
  from fastapi.responses import HTMLResponse
14
  import pandas as pd
15
  from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks
16
+ from match_utils import neighborhoods, get_resume, skillNER, sim_result_loop
17
+ import time
18
 
19
  # APP SETUP
20
  app = FastAPI()
 
61
  # POST
62
  @app.post('/find-my-match/', response_class=HTMLResponse)
63
  async def post_matches(request: Request, resume: UploadFile = File(...)):
64
+ t = time.time()
65
  resume = get_resume(resume)
 
 
66
  skills = await skillNER(resume)
67
+ simResults = await sim_result_loop(resume)
68
+ print(time.time() - t)
69
  return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults})
match_utils.py CHANGED
@@ -46,7 +46,6 @@ async def neighborhoods(jobtitle=None):
46
  subtitle = f'<span style="font-size: {subtitle_font_size}px;">{subtitle}</span>'
47
  title = f'<span style="font-size: {title_font_size}px;">{title}</span>'
48
  return f'{logo}{title}<br>{subtitle}'
49
-
50
  fig = px.scatter(coheredat, x = 'longitude', y = 'latitude', color = 'Category', hover_data = ['Category', 'Title'],
51
  title=format_title("Pathfinder", " Job Neighborhoods: Explore the Map!", "(Generated using Co-here AI's LLM & ONET's Task Statements)"))
52
  fig['layout'].update(height=1000, width=1500, font=dict(family='Courier New, monospace', color='black'))
@@ -63,7 +62,7 @@ def get_resume(resume):
63
  resume = "\n".join(text)
64
  return resume
65
 
66
- async def coSkillEmbed(text):
67
  try:
68
  co = cohere.Client(os.getenv("COHERE_TOKEN"))
69
  response = co.embed(
@@ -73,12 +72,14 @@ async def coSkillEmbed(text):
73
  except CohereError as e:
74
  return e
75
 
76
- async def sim_result_loop(embeds):
 
77
  def cosine(A, B):
78
  return np.dot(A,B)/(norm(A)*norm(B))
 
 
79
  simResults = []
80
- for i in range(len(simdat)):
81
- simResults.append(cosine(np.array(embeds), np.array(simdat.iloc[i,1:])))
82
  simResults = pd.DataFrame(simResults)
83
  simResults['JobTitle'] = simdat['Title']
84
  simResults = simResults.iloc[:,[1,0]]
@@ -88,7 +89,7 @@ async def sim_result_loop(embeds):
88
  simResults = simResults.iloc[1:,:]
89
  simResults.reset_index(drop=True, inplace=True)
90
  for x in range(len(simResults)):
91
- simResults.iloc[x,1] = "{:0.2f}".format(simResults.iloc[x,1])
92
  return simResults
93
 
94
  async def skillNER(resume):
@@ -97,7 +98,6 @@ async def skillNER(resume):
97
  clean_text = clean_text.replace('-', " ").replace("/"," ")
98
  clean_text = clean(clean_text.translate(str.maketrans('', '', string.punctuation)))
99
  return clean_text
100
-
101
  resume = clean_my_text(resume)
102
  stops = set(nltk.corpus.stopwords.words('english'))
103
  stops = stops.union({'eg', 'ie', 'etc', 'experience', 'experiences', 'experienced', 'experiencing', 'knowledge',
@@ -107,13 +107,6 @@ async def skillNER(resume):
107
  resume = [word for word in SpaceTokenizer().tokenize(resume) if word not in stops]
108
  resume = [word for word in resume if ")" not in word]
109
  resume = [word for word in resume if "(" not in word]
110
-
111
- labels = []
112
- for i in range(len(resume)):
113
- classification = classifier(resume[i])[0]['label']
114
- if classification == 'LABEL_1':
115
- labels.append("Skill")
116
- else:
117
- labels.append("Not Skill")
118
- skills = dict(zip(resume, labels))
119
  return skills
 
46
  subtitle = f'<span style="font-size: {subtitle_font_size}px;">{subtitle}</span>'
47
  title = f'<span style="font-size: {title_font_size}px;">{title}</span>'
48
  return f'{logo}{title}<br>{subtitle}'
 
49
  fig = px.scatter(coheredat, x = 'longitude', y = 'latitude', color = 'Category', hover_data = ['Category', 'Title'],
50
  title=format_title("Pathfinder", " Job Neighborhoods: Explore the Map!", "(Generated using Co-here AI's LLM & ONET's Task Statements)"))
51
  fig['layout'].update(height=1000, width=1500, font=dict(family='Courier New, monospace', color='black'))
 
62
  resume = "\n".join(text)
63
  return resume
64
 
65
+ def coSkillEmbed(text):
66
  try:
67
  co = cohere.Client(os.getenv("COHERE_TOKEN"))
68
  response = co.embed(
 
72
  except CohereError as e:
73
  return e
74
 
75
+ async def sim_result_loop(resume):
76
+ embeds = coSkillEmbed(resume)
77
  def cosine(A, B):
78
  return np.dot(A,B)/(norm(A)*norm(B))
79
+ def format_sim(sim):
80
+ return "{:0.2f}".format(sim)
81
  simResults = []
82
+ [simResults.append(cosine(np.array(embeds), np.array(simdat.iloc[i,1:]))) for i in range(len(simdat))]
 
83
  simResults = pd.DataFrame(simResults)
84
  simResults['JobTitle'] = simdat['Title']
85
  simResults = simResults.iloc[:,[1,0]]
 
89
  simResults = simResults.iloc[1:,:]
90
  simResults.reset_index(drop=True, inplace=True)
91
  for x in range(len(simResults)):
92
+ simResults.iloc[x,1] = format_sim(simResults.iloc[x,1])
93
  return simResults
94
 
95
  async def skillNER(resume):
 
98
  clean_text = clean_text.replace('-', " ").replace("/"," ")
99
  clean_text = clean(clean_text.translate(str.maketrans('', '', string.punctuation)))
100
  return clean_text
 
101
  resume = clean_my_text(resume)
102
  stops = set(nltk.corpus.stopwords.words('english'))
103
  stops = stops.union({'eg', 'ie', 'etc', 'experience', 'experiences', 'experienced', 'experiencing', 'knowledge',
 
107
  resume = [word for word in SpaceTokenizer().tokenize(resume) if word not in stops]
108
  resume = [word for word in resume if ")" not in word]
109
  resume = [word for word in resume if "(" not in word]
110
+ skills = {}
111
+ [skills.update({word : "Skill"}) if classifier(word)[0]['label'] == 'LABEL_1' else skills.update({word: "Not Skill"}) for word in resume]
 
 
 
 
 
 
 
112
  return skills