Emanuela Boros
commited on
Commit
·
0ed70a2
1
Parent(s):
f9cb0bf
confidence doesnt wokr propeerly
Browse files- generic_nel.py +21 -27
generic_nel.py
CHANGED
@@ -115,9 +115,9 @@ class NelPipeline(Pipeline):
|
|
115 |
output_scores=True,
|
116 |
)
|
117 |
# Decode the predictions into readable text
|
118 |
-
|
119 |
outputs.sequences, skip_special_tokens=True
|
120 |
-
)
|
121 |
# Process the scores for each token
|
122 |
|
123 |
transition_scores = self.model.compute_transition_scores(
|
@@ -127,12 +127,12 @@ class NelPipeline(Pipeline):
|
|
127 |
|
128 |
# Calculate the probability for the entire sequence by exponentiating the sum of log probabilities
|
129 |
sequence_confidence = torch.exp(log_prob_sum)
|
130 |
-
|
131 |
|
132 |
-
print(
|
133 |
|
134 |
# Return the predictions along with the extracted entity, lOffset, and rOffset
|
135 |
-
return
|
136 |
|
137 |
def _forward(self, inputs):
|
138 |
return inputs
|
@@ -157,27 +157,21 @@ class NelPipeline(Pipeline):
|
|
157 |
# ], # This can be improved with a real API call to get the QID
|
158 |
# "confidence_nel": np.round(percentages[i], 2),
|
159 |
# }
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
"type": "UNK",
|
177 |
-
"confidence_nel": round(percentages[idx], 2),
|
178 |
-
"lOffset": lOffset,
|
179 |
-
"rOffset": rOffset,
|
180 |
-
}
|
181 |
-
)
|
182 |
print(results)
|
183 |
return results
|
|
|
115 |
output_scores=True,
|
116 |
)
|
117 |
# Decode the predictions into readable text
|
118 |
+
wikipedia_prediction = self.tokenizer.batch_decode(
|
119 |
outputs.sequences, skip_special_tokens=True
|
120 |
+
)[0]
|
121 |
# Process the scores for each token
|
122 |
|
123 |
transition_scores = self.model.compute_transition_scores(
|
|
|
127 |
|
128 |
# Calculate the probability for the entire sequence by exponentiating the sum of log probabilities
|
129 |
sequence_confidence = torch.exp(log_prob_sum)
|
130 |
+
percentage = sequence_confidence.cpu().numpy() * 100.0
|
131 |
|
132 |
+
# print(wikipedia_prediction, enclosed_entity, lOffset, rOffset, percentage)
|
133 |
|
134 |
# Return the predictions along with the extracted entity, lOffset, and rOffset
|
135 |
+
return wikipedia_prediction, enclosed_entity, lOffset, rOffset, percentage
|
136 |
|
137 |
def _forward(self, inputs):
|
138 |
return inputs
|
|
|
157 |
# ], # This can be improved with a real API call to get the QID
|
158 |
# "confidence_nel": np.round(percentages[i], 2),
|
159 |
# }
|
160 |
+
wikipedia_prediction, enclosed_entity, lOffset, rOffset, percentage = outputs
|
161 |
+
qid, language = get_wikipedia_page_props(wikipedia_prediction)
|
162 |
+
title, url = get_wikipedia_title(qid, language="en")
|
163 |
+
results = [
|
164 |
+
{
|
165 |
+
# "id": f"{lOffset}:{rOffset}:{enclosed_entity}:{NEL_MODEL}",
|
166 |
+
"surface": enclosed_entity,
|
167 |
+
"wkd_id": qid,
|
168 |
+
"wkpedia_pagename": title,
|
169 |
+
"wkpedia_url": url,
|
170 |
+
"type": "UNK",
|
171 |
+
"confidence_nel": round(percentage, 2),
|
172 |
+
"lOffset": lOffset,
|
173 |
+
"rOffset": rOffset,
|
174 |
+
}
|
175 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
print(results)
|
177 |
return results
|