asimokby commited on
Commit
5f11666
1 Parent(s): f19f36a

update parser

Browse files
Files changed (1) hide show
  1. ResumeParser.py +14 -10
ResumeParser.py CHANGED
@@ -84,26 +84,30 @@ class ResumeParser:
84
  self.parsed_cv["Job History"] = job_history
85
 
86
  def get_job_titles(self, resume_segment):
87
- classes = ["organization", "institution", "job title", "role"]
88
  idx_line = []
89
  for idx, line in enumerate(resume_segment):
90
  has_verb = False
91
- sentence = self.models.get_flair_sentence(line)
 
92
  self.tagger.predict(sentence)
 
93
  for entity in sentence.get_spans('pos'):
 
94
  if entity.tag.startswith("V"):
95
  has_verb = True
96
- break
97
- if not has_verb:
98
- out = self.zero_shot_classifier(line, classes)
99
- class_score = zip(out["labels"], out["scores"])
100
- highest = sorted(class_score, key=lambda x: x[1])[-1]
101
 
102
- if highest[0] == "job title":
103
- idx_line.append((idx, line))
 
 
 
 
 
 
 
104
 
105
  return idx_line
106
-
107
 
108
  def get_job_dates(self, st, end, resume_segment):
109
  search_span = resume_segment[st:end]
 
84
  self.parsed_cv["Job History"] = job_history
85
 
86
  def get_job_titles(self, resume_segment):
87
+ classes = ["organization", "institution", "company", "job title", "work details"]
88
  idx_line = []
89
  for idx, line in enumerate(resume_segment):
90
  has_verb = False
91
+ line_modifed = ''.join(i for i in line if not i.isdigit())
92
+ sentence = self.models.get_flair_sentence(line_modifed)
93
  self.tagger.predict(sentence)
94
+ tags = []
95
  for entity in sentence.get_spans('pos'):
96
+ tags.append(entity.tag)
97
  if entity.tag.startswith("V"):
98
  has_verb = True
 
 
 
 
 
99
 
100
+ most_common_tag = max(set(tags), key=tags.count)
101
+ if most_common_tag == "NNP":
102
+ if not has_verb:
103
+ out = self.zero_shot_classifier(line, classes)
104
+ class_score = zip(out["labels"], out["scores"])
105
+ highest = sorted(class_score, key=lambda x: x[1])[-1]
106
+
107
+ if highest[0] == "job title":
108
+ idx_line.append((idx, line))
109
 
110
  return idx_line
 
111
 
112
  def get_job_dates(self, st, end, resume_segment):
113
  search_span = resume_segment[st:end]