pritamdeka commited on
Commit
5b24777
Β·
1 Parent(s): 1e0cc9f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -55,13 +55,7 @@ all_stopwords = sp.Defaults.stop_words
55
 
56
 
57
 
58
- word_embedding_model = models.Transformer('cambridgeltl/SapBERT-from-PubMedBERT-fulltext')
59
- pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
60
- pooling_mode_mean_tokens=True,
61
- pooling_mode_cls_token=False,
62
- pooling_mode_max_tokens=False)
63
 
64
- embedder = SentenceTransformer(modules=[word_embedding_model, pooling_model])
65
 
66
 
67
  def remove_stopwords(sen):
@@ -70,7 +64,14 @@ def remove_stopwords(sen):
70
 
71
 
72
 
73
- def keyphrase_generator(article_link, model_1, model_2, max_num_keywords):
 
 
 
 
 
 
 
74
 
75
  element=[]
76
  cluster_list_final=[]
@@ -270,13 +271,18 @@ igen_pubmed = gr.Interface(keyphrase_generator,
270
  type="value",
271
  default='sentence-transformers/all-mpnet-base-v1',
272
  label="Select any SBERT model for keyphrases from the list below"),
273
- gr.inputs.Slider(minimum=5, maximum=30, step=1, default=10, label="Max Keywords")],
 
 
 
 
 
274
  outputs=gr.outputs.Dataframe(type="auto", label="Retrieved Results from PubMed",max_cols=None, overflow_row_behaviour="paginate"),
275
  theme="dark-peach",
276
  title="PubMed Abstract Retriever", description="Retrieves relevant PubMed abstracts for an online article which can be used as further references.",
277
  article= "This work is based on the paper <a href=https://dl.acm.org/doi/10.1145/3487664.3487701>provided here</a>."
278
  "\t It uses the TextRank algorithm with SBERT to first find the top sentences and then extracts the keyphrases from those sentences using scispaCy and SBERT."
279
- "\t The application then uses a UMLS based BERT model, <a href=https://arxiv.org/abs/2010.11784>SapBERT</a> to cluster the keyphrases using K-means clustering method and finally create a boolean query. After that the top 10 titles and abstracts are retrieved from PubMed database and displayed according to relevancy. "
280
  "\t The list of SBERT models required in the textboxes can be found in <a href=www.sbert.net/docs/pretrained_models.html>SBERT Pre-trained models hub</a>."
281
  "\t The default model names are provided which can be changed from the list of pretrained models. "
282
  "\t The value of keyphrases can be changed. The default value is 10, minimum is 5 and a maximum value of 30.")
 
55
 
56
 
57
 
 
 
 
 
 
58
 
 
59
 
60
 
61
  def remove_stopwords(sen):
 
64
 
65
 
66
 
67
+ def keyphrase_generator(article_link, model_1, model_2, max_num_keywords, model_3):
68
+ word_embedding_model = models.Transformer(model_3)
69
+ pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
70
+ pooling_mode_mean_tokens=True,
71
+ pooling_mode_cls_token=False,
72
+ pooling_mode_max_tokens=False)
73
+
74
+ embedder = SentenceTransformer(modules=[word_embedding_model, pooling_model])
75
 
76
  element=[]
77
  cluster_list_final=[]
 
271
  type="value",
272
  default='sentence-transformers/all-mpnet-base-v1',
273
  label="Select any SBERT model for keyphrases from the list below"),
274
+ gr.inputs.Slider(minimum=5, maximum=30, step=1, default=10, label="Max Keywords")
275
+ gr.inputs.Dropdown(choices=['cambridgeltl/SapBERT-from-PubMedBERT-fulltext',
276
+ 'cambridgeltl/SapBERT-from-PubMedBERT-fulltext-mean-token'],
277
+ type="value",
278
+ default='cambridgeltl/SapBERT-from-PubMedBERT-fulltext',
279
+ label="Select any SapBERT model for clustering from the list below")],
280
  outputs=gr.outputs.Dataframe(type="auto", label="Retrieved Results from PubMed",max_cols=None, overflow_row_behaviour="paginate"),
281
  theme="dark-peach",
282
  title="PubMed Abstract Retriever", description="Retrieves relevant PubMed abstracts for an online article which can be used as further references.",
283
  article= "This work is based on the paper <a href=https://dl.acm.org/doi/10.1145/3487664.3487701>provided here</a>."
284
  "\t It uses the TextRank algorithm with SBERT to first find the top sentences and then extracts the keyphrases from those sentences using scispaCy and SBERT."
285
+ "\t The application then uses a UMLS based BERT model, <a href=https://arxiv.org/abs/2010.11784>SapBERT</a> to cluster the keyphrases using K-means clustering method and finally create a boolean query. After that the top 10 titles and abstracts are retrieved from PubMed database and displayed according to relevancy. The SapBERT models can be changed as per the list provided. "
286
  "\t The list of SBERT models required in the textboxes can be found in <a href=www.sbert.net/docs/pretrained_models.html>SBERT Pre-trained models hub</a>."
287
  "\t The default model names are provided which can be changed from the list of pretrained models. "
288
  "\t The value of keyphrases can be changed. The default value is 10, minimum is 5 and a maximum value of 30.")