Emanuela Boros commited on
Commit
3df5bff
·
1 Parent(s): 8cda137

added confidence

Browse files
Files changed (1) hide show
  1. generic_nel.py +22 -10
generic_nel.py CHANGED
@@ -120,16 +120,28 @@ class NelPipeline(Pipeline):
120
  wikipedia_predictions = self.tokenizer.batch_decode(
121
  outputs.sequences, skip_special_tokens=True
122
  )
123
- print(len(outputs.sequences))
124
- print(outputs.sequences.shape)
125
- print(wikipedia_predictions)
126
- print(len(scores))
127
- for score in scores:
128
- print(score.shape)
129
- # Process scores and normalize
130
- scores_tensor = scores.clone().detach()
131
- probabilities = torch.exp(scores_tensor)
132
- percentages = (probabilities * 100.0).cpu().numpy().tolist()
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  # Return the predictions along with the extracted entity, lOffset, and rOffset
135
  return wikipedia_predictions, enclosed_entity, lOffset, rOffset, percentages
 
120
  wikipedia_predictions = self.tokenizer.batch_decode(
121
  outputs.sequences, skip_special_tokens=True
122
  )
123
+ # Initialize confidence list for each token
124
+ token_confidences = []
125
+
126
+ # Process the scores for each token
127
+ all_probabilities = []
128
+ import torch.nn.functional as F
129
+
130
+ # Process each score (logits for the generated tokens)
131
+ for i, score in enumerate(scores):
132
+ # Apply softmax to convert logits into probabilities
133
+ probabilities = F.softmax(score, dim=-1)
134
+
135
+ # Get the probabilities for the top tokens
136
+ top_probabilities = (
137
+ probabilities.cpu().numpy()
138
+ ) # Move to CPU and convert to NumPy
139
+
140
+ # Store the probabilities
141
+ all_probabilities.append(top_probabilities)
142
+
143
+ # Convert probabilities into percentages if needed
144
+ percentages = [(prob * 100.0).tolist() for prob in all_probabilities]
145
 
146
  # Return the predictions along with the extracted entity, lOffset, and rOffset
147
  return wikipedia_predictions, enclosed_entity, lOffset, rOffset, percentages