Emanuela Boros commited on
Commit
0ed70a2
·
1 Parent(s): f9cb0bf

confidence doesnt wokr propeerly

Browse files
Files changed (1) hide show
  1. generic_nel.py +21 -27
generic_nel.py CHANGED
@@ -115,9 +115,9 @@ class NelPipeline(Pipeline):
115
  output_scores=True,
116
  )
117
  # Decode the predictions into readable text
118
- wikipedia_predictions = self.tokenizer.batch_decode(
119
  outputs.sequences, skip_special_tokens=True
120
- )
121
  # Process the scores for each token
122
 
123
  transition_scores = self.model.compute_transition_scores(
@@ -127,12 +127,12 @@ class NelPipeline(Pipeline):
127
 
128
  # Calculate the probability for the entire sequence by exponentiating the sum of log probabilities
129
  sequence_confidence = torch.exp(log_prob_sum)
130
- percentages = sequence_confidence.cpu().numpy() * 100.0
131
 
132
- print(wikipedia_predictions, enclosed_entity, lOffset, rOffset, [percentages])
133
 
134
  # Return the predictions along with the extracted entity, lOffset, and rOffset
135
- return wikipedia_predictions, enclosed_entity, lOffset, rOffset, [percentages]
136
 
137
  def _forward(self, inputs):
138
  return inputs
@@ -157,27 +157,21 @@ class NelPipeline(Pipeline):
157
  # ], # This can be improved with a real API call to get the QID
158
  # "confidence_nel": np.round(percentages[i], 2),
159
  # }
160
- wikipedia_predictions, enclosed_entity, lOffset, rOffset, percentages = outputs
161
- results = []
162
- for idx, wikipedia_name in enumerate(wikipedia_predictions):
163
- # Get QID
164
- qid, language = get_wikipedia_page_props(wikipedia_name)
165
- # print(f"{wikipedia_name} -- QID: {qid}")
166
-
167
- # Get Wikipedia title and URL
168
- wkpedia_pagename, url = get_wikipedia_title(qid, language)
169
- results.append(
170
- {
171
- # "id": f"{lOffset}:{rOffset}:{enclosed_entity}:{NEL_MODEL}",
172
- "surface": enclosed_entity,
173
- "wkd_id": qid,
174
- "wkpedia_pagename": wkpedia_pagename,
175
- "wkpedia_url": url,
176
- "type": "UNK",
177
- "confidence_nel": round(percentages[idx], 2),
178
- "lOffset": lOffset,
179
- "rOffset": rOffset,
180
- }
181
- )
182
  print(results)
183
  return results
 
115
  output_scores=True,
116
  )
117
  # Decode the predictions into readable text
118
+ wikipedia_prediction = self.tokenizer.batch_decode(
119
  outputs.sequences, skip_special_tokens=True
120
+ )[0]
121
  # Process the scores for each token
122
 
123
  transition_scores = self.model.compute_transition_scores(
 
127
 
128
  # Calculate the probability for the entire sequence by exponentiating the sum of log probabilities
129
  sequence_confidence = torch.exp(log_prob_sum)
130
+ percentage = sequence_confidence.cpu().numpy() * 100.0
131
 
132
+ # print(wikipedia_prediction, enclosed_entity, lOffset, rOffset, percentage)
133
 
134
  # Return the predictions along with the extracted entity, lOffset, and rOffset
135
+ return wikipedia_prediction, enclosed_entity, lOffset, rOffset, percentage
136
 
137
  def _forward(self, inputs):
138
  return inputs
 
157
  # ], # This can be improved with a real API call to get the QID
158
  # "confidence_nel": np.round(percentages[i], 2),
159
  # }
160
+ wikipedia_prediction, enclosed_entity, lOffset, rOffset, percentage = outputs
161
+ qid, language = get_wikipedia_page_props(wikipedia_prediction)
162
+ title, url = get_wikipedia_title(qid, language="en")
163
+ results = [
164
+ {
165
+ # "id": f"{lOffset}:{rOffset}:{enclosed_entity}:{NEL_MODEL}",
166
+ "surface": enclosed_entity,
167
+ "wkd_id": qid,
168
+ "wkpedia_pagename": title,
169
+ "wkpedia_url": url,
170
+ "type": "UNK",
171
+ "confidence_nel": round(percentage, 2),
172
+ "lOffset": lOffset,
173
+ "rOffset": rOffset,
174
+ }
175
+ ]
 
 
 
 
 
 
176
  print(results)
177
  return results