Update app.py
Browse files
app.py
CHANGED
@@ -19,12 +19,12 @@ logging.basicConfig(
|
|
19 |
|
20 |
def proc_submission(
|
21 |
input_text: str,
|
22 |
-
model_type: str,
|
23 |
summary_type: str,
|
|
|
24 |
num_beams,
|
25 |
token_batch_length,
|
26 |
length_penalty,
|
27 |
-
max_input_length: int =
|
28 |
):
|
29 |
"""
|
30 |
proc_submission - a helper function for the gradio module to process submissions
|
@@ -96,12 +96,12 @@ def proc_submission(
|
|
96 |
#**settings_det,
|
97 |
#)
|
98 |
|
99 |
-
settings = settings_tldr if summary_type == '
|
100 |
|
101 |
_summaries = summarize_via_tokenbatches(
|
102 |
tr_in,
|
103 |
-
model_tldr if (summary_type == "
|
104 |
-
tokenizer_tldr if (summary_type == "
|
105 |
batch_length=token_batch_length,
|
106 |
**settings,
|
107 |
)
|
@@ -195,7 +195,7 @@ if __name__ == "__main__":
|
|
195 |
)
|
196 |
with gr.Row():
|
197 |
summary_type = gr.Radio(
|
198 |
-
choices=["
|
199 |
)
|
200 |
model_type = gr.Radio(
|
201 |
choices=["LongT5", "LED"], label="Model Architecture", value="LongT5"
|
@@ -276,7 +276,7 @@ if __name__ == "__main__":
|
|
276 |
"- [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) is a fine-tuned checkpoint of [Stancld/longt5-tglobal-large-16384-pubmed-3k_steps](https://huggingface.co/Stancld/longt5-tglobal-large-16384-pubmed-3k_steps) on the [SumPubMed dataset](https://aclanthology.org/2021.acl-srw.30/). [Blaise-g/longt5_tglobal_large_scitldr](https://huggingface.co/Blaise-g/longt5_tglobal_large_scitldr) is a fine-tuned checkpoint of [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) on the [Scitldr dataset](https://arxiv.org/abs/2004.15011). The goal was to create two models capable of handling the complex information contained in long biomedical documents and subsequently producing scientific summaries according to one of the two possible levels of conciseness: 1) A long explanatory synopsis that retains the majority of domain-specific language used in the original source text. 2)A one sentence long, TLDR style summary."
|
277 |
)
|
278 |
gr.Markdown(
|
279 |
-
"- The two most important parameters-empirically-are the `num_beams` and `token_batch_length`. However, increasing these will also increase the amount of time it takes to generate a summary.
|
280 |
)
|
281 |
gr.Markdown("---")
|
282 |
|
@@ -294,9 +294,9 @@ if __name__ == "__main__":
|
|
294 |
input_text,
|
295 |
summary_type,
|
296 |
model_type,
|
|
|
297 |
num_beams,
|
298 |
token_batch_length,
|
299 |
-
length_penalty,
|
300 |
],
|
301 |
outputs=[output_text, summary_text, compression_rate],
|
302 |
)
|
|
|
19 |
|
20 |
def proc_submission(
|
21 |
input_text: str,
|
|
|
22 |
summary_type: str,
|
23 |
+
model_type: str,
|
24 |
num_beams,
|
25 |
token_batch_length,
|
26 |
length_penalty,
|
27 |
+
max_input_length: int = 2000,
|
28 |
):
|
29 |
"""
|
30 |
proc_submission - a helper function for the gradio module to process submissions
|
|
|
96 |
#**settings_det,
|
97 |
#)
|
98 |
|
99 |
+
settings = settings_tldr if summary_type == 'tldr' else settings_det
|
100 |
|
101 |
_summaries = summarize_via_tokenbatches(
|
102 |
tr_in,
|
103 |
+
model_tldr if (summary_type == "tldr") else model_det,
|
104 |
+
tokenizer_tldr if (summary_type == "tldr") else tokenizer_det,
|
105 |
batch_length=token_batch_length,
|
106 |
**settings,
|
107 |
)
|
|
|
195 |
)
|
196 |
with gr.Row():
|
197 |
summary_type = gr.Radio(
|
198 |
+
choices=["tldr", "detailed"], label="Summary Type", value="tldr"
|
199 |
)
|
200 |
model_type = gr.Radio(
|
201 |
choices=["LongT5", "LED"], label="Model Architecture", value="LongT5"
|
|
|
276 |
"- [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) is a fine-tuned checkpoint of [Stancld/longt5-tglobal-large-16384-pubmed-3k_steps](https://huggingface.co/Stancld/longt5-tglobal-large-16384-pubmed-3k_steps) on the [SumPubMed dataset](https://aclanthology.org/2021.acl-srw.30/). [Blaise-g/longt5_tglobal_large_scitldr](https://huggingface.co/Blaise-g/longt5_tglobal_large_scitldr) is a fine-tuned checkpoint of [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) on the [Scitldr dataset](https://arxiv.org/abs/2004.15011). The goal was to create two models capable of handling the complex information contained in long biomedical documents and subsequently producing scientific summaries according to one of the two possible levels of conciseness: 1) A long explanatory synopsis that retains the majority of domain-specific language used in the original source text. 2)A one sentence long, TLDR style summary."
|
277 |
)
|
278 |
gr.Markdown(
|
279 |
+
"- The two most important parameters-empirically-are the `num_beams` and `token_batch_length`. However, increasing these will also increase the amount of time it takes to generate a summary."
|
280 |
)
|
281 |
gr.Markdown("---")
|
282 |
|
|
|
294 |
input_text,
|
295 |
summary_type,
|
296 |
model_type,
|
297 |
+
length_penalty,
|
298 |
num_beams,
|
299 |
token_batch_length,
|
|
|
300 |
],
|
301 |
outputs=[output_text, summary_text, compression_rate],
|
302 |
)
|