Spaces:

Blaise-g
/

summarize-biomedical-papers-long-summary-or-tldr

Runtime error

App Files Files Community

Blaise-g commited on Aug 19, 2022

Commit

2e0b08a

1 Parent(s): 691b011

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -8

app.py CHANGED Viewed

@@ -19,12 +19,12 @@ logging.basicConfig(
 def proc_submission(
     input_text: str,
-    model_type: str,
     summary_type: str,
     num_beams,
     token_batch_length,
     length_penalty,
-    max_input_length: int = 1000,
 ):
     """
     proc_submission - a helper function for the gradio module to process submissions
@@ -96,12 +96,12 @@ def proc_submission(
             #**settings_det,
         #)
-    settings = settings_tldr if summary_type == 'TLDR' else settings_det
     _summaries = summarize_via_tokenbatches(
         tr_in,
-        model_tldr if (summary_type == "TLDR") else model_det,
-        tokenizer_tldr if (summary_type == "TLDR") else tokenizer_det,
         batch_length=token_batch_length,
         **settings,
     )
@@ -195,7 +195,7 @@ if __name__ == "__main__":
             )
             with gr.Row():
                 summary_type = gr.Radio(
-                    choices=["TLDR", "Detailed"], label="Summary Type", value="TLDR"
                 )
                 model_type = gr.Radio(
                     choices=["LongT5", "LED"], label="Model Architecture", value="LongT5"
@@ -276,7 +276,7 @@ if __name__ == "__main__":
                 "- [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) is a fine-tuned checkpoint of [Stancld/longt5-tglobal-large-16384-pubmed-3k_steps](https://huggingface.co/Stancld/longt5-tglobal-large-16384-pubmed-3k_steps) on the [SumPubMed dataset](https://aclanthology.org/2021.acl-srw.30/). [Blaise-g/longt5_tglobal_large_scitldr](https://huggingface.co/Blaise-g/longt5_tglobal_large_scitldr) is a fine-tuned checkpoint of [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) on the [Scitldr dataset](https://arxiv.org/abs/2004.15011). The goal was to create two models capable of handling the complex information contained in long biomedical documents and subsequently producing scientific summaries according to one of the two possible levels of conciseness: 1) A long explanatory synopsis that retains the majority of domain-specific language used in the original source text. 2)A one sentence long, TLDR style summary."
             )
             gr.Markdown(
-                "- The two most important parameters-empirically-are the `num_beams` and `token_batch_length`. However, increasing these will also increase the amount of time it takes to generate a summary. The `length_penalty` and `repetition_penalty` parameters are also important for the model to generate good summaries."
             )
             gr.Markdown("---")
@@ -294,9 +294,9 @@ if __name__ == "__main__":
                 input_text,
                 summary_type,
                 model_type,
                 num_beams,
                 token_batch_length,
-                length_penalty,
             ],
             outputs=[output_text, summary_text, compression_rate],
         )

 def proc_submission(
     input_text: str,
     summary_type: str,
+    model_type: str,
     num_beams,
     token_batch_length,
     length_penalty,
+    max_input_length: int = 2000,
 ):
     """
     proc_submission - a helper function for the gradio module to process submissions
             #**settings_det,
         #)
+    settings = settings_tldr if summary_type == 'tldr' else settings_det
     _summaries = summarize_via_tokenbatches(
         tr_in,
+        model_tldr if (summary_type == "tldr") else model_det,
+        tokenizer_tldr if (summary_type == "tldr") else tokenizer_det,
         batch_length=token_batch_length,
         **settings,
     )
             )
             with gr.Row():
                 summary_type = gr.Radio(
+                    choices=["tldr", "detailed"], label="Summary Type", value="tldr"
                 )
                 model_type = gr.Radio(
                     choices=["LongT5", "LED"], label="Model Architecture", value="LongT5"
                 "- [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) is a fine-tuned checkpoint of [Stancld/longt5-tglobal-large-16384-pubmed-3k_steps](https://huggingface.co/Stancld/longt5-tglobal-large-16384-pubmed-3k_steps) on the [SumPubMed dataset](https://aclanthology.org/2021.acl-srw.30/). [Blaise-g/longt5_tglobal_large_scitldr](https://huggingface.co/Blaise-g/longt5_tglobal_large_scitldr) is a fine-tuned checkpoint of [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) on the [Scitldr dataset](https://arxiv.org/abs/2004.15011). The goal was to create two models capable of handling the complex information contained in long biomedical documents and subsequently producing scientific summaries according to one of the two possible levels of conciseness: 1) A long explanatory synopsis that retains the majority of domain-specific language used in the original source text. 2)A one sentence long, TLDR style summary."
             )
             gr.Markdown(
+                "- The two most important parameters-empirically-are the `num_beams` and `token_batch_length`. However, increasing these will also increase the amount of time it takes to generate a summary."
             )
             gr.Markdown("---")
                 input_text,
                 summary_type,
                 model_type,
+                length_penalty,
                 num_beams,
                 token_batch_length,
             ],
             outputs=[output_text, summary_text, compression_rate],
         )