Blaise-g commited on
Commit
2e0b08a
Β·
1 Parent(s): 691b011

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -19,12 +19,12 @@ logging.basicConfig(
19
 
20
  def proc_submission(
21
  input_text: str,
22
- model_type: str,
23
  summary_type: str,
 
24
  num_beams,
25
  token_batch_length,
26
  length_penalty,
27
- max_input_length: int = 1000,
28
  ):
29
  """
30
  proc_submission - a helper function for the gradio module to process submissions
@@ -96,12 +96,12 @@ def proc_submission(
96
  #**settings_det,
97
  #)
98
 
99
- settings = settings_tldr if summary_type == 'TLDR' else settings_det
100
 
101
  _summaries = summarize_via_tokenbatches(
102
  tr_in,
103
- model_tldr if (summary_type == "TLDR") else model_det,
104
- tokenizer_tldr if (summary_type == "TLDR") else tokenizer_det,
105
  batch_length=token_batch_length,
106
  **settings,
107
  )
@@ -195,7 +195,7 @@ if __name__ == "__main__":
195
  )
196
  with gr.Row():
197
  summary_type = gr.Radio(
198
- choices=["TLDR", "Detailed"], label="Summary Type", value="TLDR"
199
  )
200
  model_type = gr.Radio(
201
  choices=["LongT5", "LED"], label="Model Architecture", value="LongT5"
@@ -276,7 +276,7 @@ if __name__ == "__main__":
276
  "- [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) is a fine-tuned checkpoint of [Stancld/longt5-tglobal-large-16384-pubmed-3k_steps](https://huggingface.co/Stancld/longt5-tglobal-large-16384-pubmed-3k_steps) on the [SumPubMed dataset](https://aclanthology.org/2021.acl-srw.30/). [Blaise-g/longt5_tglobal_large_scitldr](https://huggingface.co/Blaise-g/longt5_tglobal_large_scitldr) is a fine-tuned checkpoint of [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) on the [Scitldr dataset](https://arxiv.org/abs/2004.15011). The goal was to create two models capable of handling the complex information contained in long biomedical documents and subsequently producing scientific summaries according to one of the two possible levels of conciseness: 1) A long explanatory synopsis that retains the majority of domain-specific language used in the original source text. 2)A one sentence long, TLDR style summary."
277
  )
278
  gr.Markdown(
279
- "- The two most important parameters-empirically-are the `num_beams` and `token_batch_length`. However, increasing these will also increase the amount of time it takes to generate a summary. The `length_penalty` and `repetition_penalty` parameters are also important for the model to generate good summaries."
280
  )
281
  gr.Markdown("---")
282
 
@@ -294,9 +294,9 @@ if __name__ == "__main__":
294
  input_text,
295
  summary_type,
296
  model_type,
 
297
  num_beams,
298
  token_batch_length,
299
- length_penalty,
300
  ],
301
  outputs=[output_text, summary_text, compression_rate],
302
  )
 
19
 
20
  def proc_submission(
21
  input_text: str,
 
22
  summary_type: str,
23
+ model_type: str,
24
  num_beams,
25
  token_batch_length,
26
  length_penalty,
27
+ max_input_length: int = 2000,
28
  ):
29
  """
30
  proc_submission - a helper function for the gradio module to process submissions
 
96
  #**settings_det,
97
  #)
98
 
99
+ settings = settings_tldr if summary_type == 'tldr' else settings_det
100
 
101
  _summaries = summarize_via_tokenbatches(
102
  tr_in,
103
+ model_tldr if (summary_type == "tldr") else model_det,
104
+ tokenizer_tldr if (summary_type == "tldr") else tokenizer_det,
105
  batch_length=token_batch_length,
106
  **settings,
107
  )
 
195
  )
196
  with gr.Row():
197
  summary_type = gr.Radio(
198
+ choices=["tldr", "detailed"], label="Summary Type", value="tldr"
199
  )
200
  model_type = gr.Radio(
201
  choices=["LongT5", "LED"], label="Model Architecture", value="LongT5"
 
276
  "- [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) is a fine-tuned checkpoint of [Stancld/longt5-tglobal-large-16384-pubmed-3k_steps](https://huggingface.co/Stancld/longt5-tglobal-large-16384-pubmed-3k_steps) on the [SumPubMed dataset](https://aclanthology.org/2021.acl-srw.30/). [Blaise-g/longt5_tglobal_large_scitldr](https://huggingface.co/Blaise-g/longt5_tglobal_large_scitldr) is a fine-tuned checkpoint of [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) on the [Scitldr dataset](https://arxiv.org/abs/2004.15011). The goal was to create two models capable of handling the complex information contained in long biomedical documents and subsequently producing scientific summaries according to one of the two possible levels of conciseness: 1) A long explanatory synopsis that retains the majority of domain-specific language used in the original source text. 2)A one sentence long, TLDR style summary."
277
  )
278
  gr.Markdown(
279
+ "- The two most important parameters-empirically-are the `num_beams` and `token_batch_length`. However, increasing these will also increase the amount of time it takes to generate a summary."
280
  )
281
  gr.Markdown("---")
282
 
 
294
  input_text,
295
  summary_type,
296
  model_type,
297
+ length_penalty,
298
  num_beams,
299
  token_batch_length,
 
300
  ],
301
  outputs=[output_text, summary_text, compression_rate],
302
  )