Ankur Goyal commited on
Commit
3dc6de3
·
1 Parent(s): e5251a8

Remove custom pipeline

Browse files
config.json CHANGED
@@ -6,12 +6,6 @@
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": 0,
8
  "classifier_dropout": null,
9
- "custom_pipelines": {
10
- "document-question-answering": {
11
- "impl": "pipeline_document_question_answering.DocumentQuestionAnsweringPipeline",
12
- "pt": "AutoModelForQuestionAnswering"
13
- }
14
- },
15
  "eos_token_id": 2,
16
  "gradient_checkpointing": false,
17
  "hidden_act": "gelu",
 
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": 0,
8
  "classifier_dropout": null,
 
 
 
 
 
 
9
  "eos_token_id": 2,
10
  "gradient_checkpointing": false,
11
  "hidden_act": "gelu",
pipeline_document_question_answering.py DELETED
@@ -1,377 +0,0 @@
1
- # NOTE: This code is currently under review for inclusion in the main
2
- # huggingface/transformers repository:
3
- # https://github.com/huggingface/transformers/pull/18414
4
- from typing import List, Optional, Tuple, Union
5
-
6
- import numpy as np
7
-
8
- from transformers.utils import add_end_docstrings, is_torch_available, logging
9
- from transformers.pipelines.base import PIPELINE_INIT_ARGS, Pipeline
10
- from .qa_helpers import select_starts_ends, Image, load_image, VISION_LOADED, pytesseract, TESSERACT_LOADED
11
-
12
-
13
- if is_torch_available():
14
- import torch
15
-
16
- # We do not perform the check in this version of the pipeline code
17
- # from transformers.models.auto.modeling_auto import MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING
18
-
19
- logger = logging.get_logger(__name__)
20
-
21
-
22
- # normalize_bbox() and apply_tesseract() are derived from apply_tesseract in models/layoutlmv3/feature_extraction_layoutlmv3.py.
23
- # However, because the pipeline may evolve from what layoutlmv3 currently does, it's copied (vs. imported) to avoid creating an
24
- # unecessary dependency.
25
- def normalize_box(box, width, height):
26
- return [
27
- int(1000 * (box[0] / width)),
28
- int(1000 * (box[1] / height)),
29
- int(1000 * (box[2] / width)),
30
- int(1000 * (box[3] / height)),
31
- ]
32
-
33
-
34
- def apply_tesseract(image: "Image.Image", lang: Optional[str], tesseract_config: Optional[str]):
35
- """Applies Tesseract OCR on a document image, and returns recognized words + normalized bounding boxes."""
36
- # apply OCR
37
- data = pytesseract.image_to_data(image, lang=lang, output_type="dict", config=tesseract_config)
38
- words, left, top, width, height = data["text"], data["left"], data["top"], data["width"], data["height"]
39
-
40
- # filter empty words and corresponding coordinates
41
- irrelevant_indices = [idx for idx, word in enumerate(words) if not word.strip()]
42
- words = [word for idx, word in enumerate(words) if idx not in irrelevant_indices]
43
- left = [coord for idx, coord in enumerate(left) if idx not in irrelevant_indices]
44
- top = [coord for idx, coord in enumerate(top) if idx not in irrelevant_indices]
45
- width = [coord for idx, coord in enumerate(width) if idx not in irrelevant_indices]
46
- height = [coord for idx, coord in enumerate(height) if idx not in irrelevant_indices]
47
-
48
- # turn coordinates into (left, top, left+width, top+height) format
49
- actual_boxes = []
50
- for x, y, w, h in zip(left, top, width, height):
51
- actual_box = [x, y, x + w, y + h]
52
- actual_boxes.append(actual_box)
53
-
54
- image_width, image_height = image.size
55
-
56
- # finally, normalize the bounding boxes
57
- normalized_boxes = []
58
- for box in actual_boxes:
59
- normalized_boxes.append(normalize_box(box, image_width, image_height))
60
-
61
- assert len(words) == len(normalized_boxes), "Not as many words as there are bounding boxes"
62
-
63
- return words, normalized_boxes
64
-
65
-
66
- @add_end_docstrings(PIPELINE_INIT_ARGS)
67
- class DocumentQuestionAnsweringPipeline(Pipeline):
68
- # TODO: Update task_summary docs to include an example with document QA and then update the first sentence
69
- """
70
- Document Question Answering pipeline using any `AutoModelForDocumentQuestionAnswering`. See the [question answering
71
- examples](../task_summary#question-answering) for more information.
72
-
73
- This document question answering pipeline can currently be loaded from [`pipeline`] using the following task
74
- identifier: `"document-question-answering"`.
75
-
76
- The models that this pipeline can use are models that have been fine-tuned on a document question answering task.
77
- See the up-to-date list of available models on
78
- [huggingface.co/models](https://huggingface.co/models?filter=document-question-answering).
79
- """
80
-
81
- def __init__(self, *args, **kwargs):
82
- super().__init__(*args, **kwargs)
83
- # self.check_model_type(MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING)
84
-
85
- def _sanitize_parameters(
86
- self,
87
- padding=None,
88
- doc_stride=None,
89
- max_question_len=None,
90
- lang: Optional[str] = None,
91
- tesseract_config: Optional[str] = None,
92
- max_answer_len=None,
93
- max_seq_len=None,
94
- top_k=None,
95
- handle_impossible_answer=None,
96
- **kwargs,
97
- ):
98
- preprocess_params, postprocess_params = {}, {}
99
- if padding is not None:
100
- preprocess_params["padding"] = padding
101
- if doc_stride is not None:
102
- preprocess_params["doc_stride"] = doc_stride
103
- if max_question_len is not None:
104
- preprocess_params["max_question_len"] = max_question_len
105
- if max_seq_len is not None:
106
- preprocess_params["max_seq_len"] = max_seq_len
107
- if lang is not None:
108
- preprocess_params["lang"] = lang
109
- if tesseract_config is not None:
110
- preprocess_params["tesseract_config"] = tesseract_config
111
-
112
- if top_k is not None:
113
- if top_k < 1:
114
- raise ValueError(f"top_k parameter should be >= 1 (got {top_k})")
115
- postprocess_params["top_k"] = top_k
116
- if max_answer_len is not None:
117
- if max_answer_len < 1:
118
- raise ValueError(f"max_answer_len parameter should be >= 1 (got {max_answer_len}")
119
- postprocess_params["max_answer_len"] = max_answer_len
120
- if handle_impossible_answer is not None:
121
- postprocess_params["handle_impossible_answer"] = handle_impossible_answer
122
-
123
- return preprocess_params, {}, postprocess_params
124
-
125
- def __call__(
126
- self,
127
- image: Union["Image.Image", str],
128
- question: Optional[str] = None,
129
- word_boxes: Tuple[str, List[float]] = None,
130
- **kwargs,
131
- ):
132
- """
133
- Answer the question(s) given as inputs by using the document(s). A document is defined as an image and an
134
- optional list of (word, box) tuples which represent the text in the document. If the `word_boxes` are not
135
- provided, it will use the Tesseract OCR engine (if available) to extract the words and boxes automatically.
136
-
137
- You can invoke the pipeline several ways:
138
-
139
- - `pipeline(image=image, question=question)`
140
- - `pipeline(image=image, question=question, word_boxes=word_boxes)`
141
- - `pipeline([{"image": image, "question": question}])`
142
- - `pipeline([{"image": image, "question": question, "word_boxes": word_boxes}])`
143
-
144
- Args:
145
- image (`str` or `PIL.Image`):
146
- The pipeline handles three types of images:
147
-
148
- - A string containing a http link pointing to an image
149
- - A string containing a local path to an image
150
- - An image loaded in PIL directly
151
-
152
- The pipeline accepts either a single image or a batch of images. If given a single image, it can be
153
- broadcasted to multiple questions.
154
- question (`str`):
155
- A question to ask of the document.
156
- word_boxes (`List[str, Tuple[float, float, float, float]]`, *optional*):
157
- A list of words and bounding boxes (normalized 0->1000). If you provide this optional input, then the
158
- pipeline will use these words and boxes instead of running OCR on the image to derive them. This allows
159
- you to reuse OCR'd results across many invocations of the pipeline without having to re-run it each
160
- time.
161
- top_k (`int`, *optional*, defaults to 1):
162
- The number of answers to return (will be chosen by order of likelihood). Note that we return less than
163
- top_k answers if there are not enough options available within the context.
164
- doc_stride (`int`, *optional*, defaults to 128):
165
- If the words in the document are too long to fit with the question for the model, it will be split in
166
- several chunks with some overlap. This argument controls the size of that overlap.
167
- max_answer_len (`int`, *optional*, defaults to 15):
168
- The maximum length of predicted answers (e.g., only answers with a shorter length are considered).
169
- max_seq_len (`int`, *optional*, defaults to 384):
170
- The maximum length of the total sentence (context + question) in tokens of each chunk passed to the
171
- model. The context will be split in several chunks (using `doc_stride` as overlap) if needed.
172
- max_question_len (`int`, *optional*, defaults to 64):
173
- The maximum length of the question after tokenization. It will be truncated if needed.
174
- handle_impossible_answer (`bool`, *optional*, defaults to `False`):
175
- Whether or not we accept impossible as an answer.
176
- lang (`str`, *optional*):
177
- Language to use while running OCR. Defaults to english.
178
- tesseract_config (`str`, *optional*):
179
- Additional flags to pass to tesseract while running OCR.
180
-
181
- Return:
182
- A `dict` or a list of `dict`: Each result comes as a dictionary with the following keys:
183
-
184
- - **score** (`float`) -- The probability associated to the answer.
185
- - **start** (`int`) -- The start word index of the answer (in the OCR'd version of the input or provided
186
- `word_boxes`).
187
- - **end** (`int`) -- The end word index of the answer (in the OCR'd version of the input or provided
188
- `word_boxes`).
189
- - **answer** (`str`) -- The answer to the question.
190
- """
191
- if isinstance(question, str):
192
- inputs = {"question": question, "image": image}
193
- if word_boxes is not None:
194
- inputs["word_boxes"] = word_boxes
195
- else:
196
- inputs = image
197
- return super().__call__(inputs, **kwargs)
198
-
199
- def preprocess(
200
- self,
201
- input,
202
- padding="do_not_pad",
203
- doc_stride=None,
204
- max_question_len=64,
205
- max_seq_len=None,
206
- word_boxes: Tuple[str, List[float]] = None,
207
- lang=None,
208
- tesseract_config="",
209
- ):
210
- # NOTE: This code mirrors the code in question answering and will be implemented in a follow up PR
211
- # to support documents with enough tokens that overflow the model's window
212
- # if max_seq_len is None:
213
- # # TODO: LayoutLM's stride is 512 by default. Is it ok to use that as the min
214
- # # instead of 384 (which the QA model uses)?
215
- # max_seq_len = min(self.tokenizer.model_max_length, 512)
216
-
217
- if doc_stride is not None:
218
- # TODO implement
219
- # doc_stride = min(max_seq_len // 2, 128)
220
- raise ValueError("Unsupported: striding inputs")
221
-
222
- image = None
223
- image_features = {}
224
- if input.get("image", None) is not None:
225
- if not VISION_LOADED:
226
- raise ValueError(
227
- "If you provide an image, then the pipeline will run process it with PIL (Pillow), but"
228
- " PIL is not available. Install it with pip install Pillow."
229
- )
230
- image = load_image(input["image"])
231
- if self.feature_extractor is not None:
232
- image_features.update(self.feature_extractor(images=image, return_tensors=self.framework))
233
-
234
- words, boxes = None, None
235
- if "word_boxes" in input:
236
- words = [x[0] for x in input["word_boxes"]]
237
- boxes = [x[1] for x in input["word_boxes"]]
238
- elif "words" in image_features and "boxes" in image_features:
239
- words = image_features.pop("words")
240
- boxes = image_features.pop("boxes")
241
- elif image is not None:
242
- if not TESSERACT_LOADED:
243
- raise ValueError(
244
- "If you provide an image without word_boxes, then the pipeline will run OCR using Tesseract, but"
245
- " pytesseract is not available. Install it with pip install pytesseract."
246
- )
247
- words, boxes = apply_tesseract(image, lang=lang, tesseract_config=tesseract_config)
248
- else:
249
- raise ValueError(
250
- "You must provide an image or word_boxes. If you provide an image, the pipeline will automatically run"
251
- " OCR to derive words and boxes"
252
- )
253
-
254
- if self.tokenizer.padding_side != "right":
255
- raise ValueError(
256
- "Document question answering only supports tokenizers whose padding side is 'right', not"
257
- f" {self.tokenizer.padding_side}"
258
- )
259
-
260
- encoding = self.tokenizer(
261
- text=input["question"].split(),
262
- text_pair=words,
263
- padding=padding,
264
- max_length=max_seq_len,
265
- stride=doc_stride,
266
- return_token_type_ids=True,
267
- is_split_into_words=True,
268
- return_tensors=self.framework,
269
- # TODO: In a future PR, use these feature to handle sequences whose length is longer than
270
- # the maximum allowed by the model. Currently, the tokenizer will produce a sequence that
271
- # may be too long for the model to handle.
272
- # truncation="only_second",
273
- # return_overflowing_tokens=True,
274
- )
275
- encoding.update(image_features)
276
-
277
- # TODO: For now, this should always be num_spans == 1 given the flags we've passed in above, but the
278
- # code is written to naturally handle multiple spans at the right time.
279
- num_spans = len(encoding["input_ids"])
280
-
281
- # p_mask: mask with 1 for token than cannot be in the answer (0 for token which can be in an answer)
282
- # We put 0 on the tokens from the context and 1 everywhere else (question and special tokens)
283
- # This logic mirrors the logic in the question_answering pipeline
284
- p_mask = [[tok != 1 for tok in encoding.sequence_ids(span_id)] for span_id in range(num_spans)]
285
- for span_idx in range(num_spans):
286
- input_ids_span_idx = encoding["input_ids"][span_idx]
287
- # keep the cls_token unmasked (some models use it to indicate unanswerable questions)
288
- if self.tokenizer.cls_token_id is not None:
289
- cls_indices = np.nonzero(np.array(input_ids_span_idx) == self.tokenizer.cls_token_id)[0]
290
- for cls_index in cls_indices:
291
- p_mask[span_idx][cls_index] = 0
292
-
293
- # For each span, place a bounding box [0,0,0,0] for question and CLS tokens, [1000,1000,1000,1000]
294
- # for SEP tokens, and the word's bounding box for words in the original document.
295
- bbox = []
296
- for batch_index in range(num_spans):
297
- for i, s, w in zip(
298
- encoding.input_ids[batch_index],
299
- encoding.sequence_ids(batch_index),
300
- encoding.word_ids(batch_index),
301
- ):
302
- if s == 1:
303
- bbox.append(boxes[w])
304
- elif i == self.tokenizer.sep_token_id:
305
- bbox.append([1000] * 4)
306
- else:
307
- bbox.append([0] * 4)
308
-
309
- if self.framework == "tf":
310
- raise ValueError("Unsupported: Tensorflow preprocessing for DocumentQuestionAnsweringPipeline")
311
- elif self.framework == "pt":
312
- encoding["bbox"] = torch.tensor([bbox])
313
-
314
- word_ids = [encoding.word_ids(i) for i in range(num_spans)]
315
-
316
- # TODO This will be necessary when we implement overflow support
317
- # encoding.pop("overflow_to_sample_mapping", None)
318
-
319
- return {
320
- **encoding,
321
- "p_mask": p_mask,
322
- "word_ids": word_ids,
323
- "words": words,
324
- }
325
-
326
- def _forward(self, model_inputs):
327
- p_mask = model_inputs.pop("p_mask", None)
328
- word_ids = model_inputs.pop("word_ids", None)
329
- words = model_inputs.pop("words", None)
330
-
331
- model_outputs = self.model(**model_inputs)
332
-
333
- model_outputs["p_mask"] = p_mask
334
- model_outputs["word_ids"] = word_ids
335
- model_outputs["words"] = words
336
- model_outputs["attention_mask"] = model_inputs["attention_mask"]
337
- return model_outputs
338
-
339
- def postprocess(self, model_outputs, top_k=1, handle_impossible_answer=False, max_answer_len=15):
340
- min_null_score = 1000000 # large and positive
341
- answers = []
342
- words = model_outputs["words"]
343
-
344
- # TODO: Currently, we expect the length of model_outputs to be 1, because we do not stride
345
- # in the preprocessor code. When we implement that, we'll either need to handle tensors of size
346
- # > 1 or use the ChunkPipeline and handle multiple outputs (each of size = 1).
347
- starts, ends, scores, min_null_score = select_starts_ends(
348
- model_outputs["start_logits"],
349
- model_outputs["end_logits"],
350
- model_outputs["p_mask"],
351
- model_outputs["attention_mask"].numpy() if model_outputs.get("attention_mask", None) is not None else None,
352
- min_null_score,
353
- top_k,
354
- handle_impossible_answer,
355
- max_answer_len,
356
- )
357
-
358
- word_ids = model_outputs["word_ids"][0]
359
- for s, e, score in zip(starts, ends, scores):
360
- word_start, word_end = word_ids[s], word_ids[e]
361
- if word_start is not None and word_end is not None:
362
- answers.append(
363
- {
364
- "score": score,
365
- "answer": " ".join(words[word_start : word_end + 1]),
366
- "start": word_start,
367
- "end": word_end,
368
- }
369
- )
370
-
371
- if handle_impossible_answer:
372
- answers.append({"score": min_null_score, "answer": "", "start": 0, "end": 0})
373
-
374
- answers = sorted(answers, key=lambda x: x["score"], reverse=True)[:top_k]
375
- if len(answers) == 1:
376
- return answers[0]
377
- return answers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
qa_helpers.py DELETED
@@ -1,135 +0,0 @@
1
- # NOTE: This code is currently under review for inclusion in the main
2
- # huggingface/transformers repository:
3
- # https://github.com/huggingface/transformers/pull/18414
4
-
5
- import warnings
6
- from collections.abc import Iterable
7
- from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
8
-
9
- import numpy as np
10
-
11
- from transformers.utils import is_pytesseract_available, is_vision_available
12
-
13
- VISION_LOADED = False
14
- if is_vision_available():
15
- from PIL import Image
16
-
17
- from transformers.image_utils import load_image
18
-
19
- VISION_LOADED = True
20
- else:
21
- Image = None
22
- load_image = None
23
-
24
-
25
- TESSERACT_LOADED = False
26
- if is_pytesseract_available():
27
- import pytesseract
28
-
29
- TESSERACT_LOADED = True
30
- else:
31
- pytesseract = None
32
-
33
-
34
- def decode_spans(
35
- start: np.ndarray, end: np.ndarray, topk: int, max_answer_len: int, undesired_tokens: np.ndarray
36
- ) -> Tuple:
37
- """
38
- Take the output of any `ModelForQuestionAnswering` and will generate probabilities for each span to be the actual
39
- answer.
40
-
41
- In addition, it filters out some unwanted/impossible cases like answer len being greater than max_answer_len or
42
- answer end position being before the starting position. The method supports output the k-best answer through the
43
- topk argument.
44
-
45
- Args:
46
- start (`np.ndarray`): Individual start probabilities for each token.
47
- end (`np.ndarray`): Individual end probabilities for each token.
48
- topk (`int`): Indicates how many possible answer span(s) to extract from the model output.
49
- max_answer_len (`int`): Maximum size of the answer to extract from the model's output.
50
- undesired_tokens (`np.ndarray`): Mask determining tokens that can be part of the answer
51
- """
52
- # Ensure we have batch axis
53
- if start.ndim == 1:
54
- start = start[None]
55
-
56
- if end.ndim == 1:
57
- end = end[None]
58
-
59
- # Compute the score of each tuple(start, end) to be the real answer
60
- outer = np.matmul(np.expand_dims(start, -1), np.expand_dims(end, 1))
61
-
62
- # Remove candidate with end < start and end - start > max_answer_len
63
- candidates = np.tril(np.triu(outer), max_answer_len - 1)
64
-
65
- # Inspired by Chen & al. (https://github.com/facebookresearch/DrQA)
66
- scores_flat = candidates.flatten()
67
- if topk == 1:
68
- idx_sort = [np.argmax(scores_flat)]
69
- elif len(scores_flat) < topk:
70
- idx_sort = np.argsort(-scores_flat)
71
- else:
72
- idx = np.argpartition(-scores_flat, topk)[0:topk]
73
- idx_sort = idx[np.argsort(-scores_flat[idx])]
74
-
75
- starts, ends = np.unravel_index(idx_sort, candidates.shape)[1:]
76
- desired_spans = np.isin(starts, undesired_tokens.nonzero()) & np.isin(ends, undesired_tokens.nonzero())
77
- starts = starts[desired_spans]
78
- ends = ends[desired_spans]
79
- scores = candidates[0, starts, ends]
80
-
81
- return starts, ends, scores
82
-
83
-
84
- def select_starts_ends(
85
- start,
86
- end,
87
- p_mask,
88
- attention_mask,
89
- min_null_score=1000000,
90
- top_k=1,
91
- handle_impossible_answer=False,
92
- max_answer_len=15,
93
- ):
94
- """
95
- Takes the raw output of any `ModelForQuestionAnswering` and first normalizes its outputs and then uses
96
- `decode_spans()` to generate probabilities for each span to be the actual answer.
97
-
98
- Args:
99
- start (`np.ndarray`): Individual start probabilities for each token.
100
- end (`np.ndarray`): Individual end probabilities for each token.
101
- p_mask (`np.ndarray`): A mask with 1 for values that cannot be in the answer
102
- attention_mask (`np.ndarray`): The attention mask generated by the tokenizer
103
- min_null_score(`float`): The minimum null (empty) answer score seen so far.
104
- topk (`int`): Indicates how many possible answer span(s) to extract from the model output.
105
- handle_impossible_answer(`bool`): Whether to allow null (empty) answers
106
- max_answer_len (`int`): Maximum size of the answer to extract from the model's output.
107
- """
108
- # Ensure padded tokens & question tokens cannot belong to the set of candidate answers.
109
- undesired_tokens = np.abs(np.array(p_mask) - 1)
110
-
111
- if attention_mask is not None:
112
- undesired_tokens = undesired_tokens & attention_mask
113
-
114
- # Generate mask
115
- undesired_tokens_mask = undesired_tokens == 0.0
116
-
117
- # Make sure non-context indexes in the tensor cannot contribute to the softmax
118
- start = np.where(undesired_tokens_mask, -10000.0, start)
119
- end = np.where(undesired_tokens_mask, -10000.0, end)
120
-
121
- # Normalize logits and spans to retrieve the answer
122
- start = np.exp(start - start.max(axis=-1, keepdims=True))
123
- start = start / start.sum()
124
-
125
- end = np.exp(end - end.max(axis=-1, keepdims=True))
126
- end = end / end.sum()
127
-
128
- if handle_impossible_answer:
129
- min_null_score = min(min_null_score, (start[0, 0] * end[0, 0]).item())
130
-
131
- # Mask CLS
132
- start[0, 0] = end[0, 0] = 0.0
133
-
134
- starts, ends, scores = decode_spans(start, end, top_k, max_answer_len, undesired_tokens)
135
- return starts, ends, scores, min_null_score