pritamdeka commited on
Commit
1b1ed3e
Β·
1 Parent(s): 012ca65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -9
app.py CHANGED
@@ -13,6 +13,8 @@ from newspaper import Article
13
  from newspaper import fulltext
14
  import requests
15
  import itertools
 
 
16
 
17
  from nltk.tokenize import word_tokenize
18
  from sentence_transformers import SentenceTransformer
@@ -53,7 +55,7 @@ nlp = en_core_sci_lg.load()
53
  sp = en_core_sci_lg.load()
54
  all_stopwords = sp.Defaults.stop_words
55
 
56
-
57
 
58
  def remove_stopwords(sen):
59
  sen_new = " ".join([i for i in sen if i not in stop_words])
@@ -109,7 +111,7 @@ def keyphrase_generator(article_link, model_1, model_2, max_num_keywords, model_
109
  count_dict[l]=0
110
  for sent, score in count_dict.items():
111
  score_list.append(score)
112
- clean_sentences_new = pd.Series(corpus).str.replace("[^a-zA-Z]", " ").tolist()
113
  corpus_embeddings = model_1.encode(clean_sentences_new)
114
  sim_mat = np.zeros([len(clean_sentences_new), len(clean_sentences_new)])
115
  for i in range(len(clean_sentences_new)):
@@ -212,8 +214,7 @@ def keyphrase_generator(article_link, model_1, model_2, max_num_keywords, model_
212
  ncbi_url='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
213
 
214
  last_url='esearch.fcgi?db=pubmed'+'&term='+f_1
215
- search_rettype = '&rettype=json'
216
- overall_url=ncbi_url+last_url+search_rettype+'&sort=relevance'
217
  pubmed_search_request = requests.get(overall_url)
218
 
219
  root = ET.fromstring(pubmed_search_request.text)
@@ -225,11 +226,7 @@ def keyphrase_generator(article_link, model_1, model_2, max_num_keywords, model_
225
  all_search_ids = ','.join(search_id_list)
226
  fetch_url='efetch.fcgi?db=pubmed'
227
  search_id='&id='+all_search_ids
228
- ret_type='&rettype=text'
229
- ret_mode='&retmode=xml'
230
- ret_max='&retmax=500'
231
- ret_sort='&sort=relevance'
232
- return_url=ncbi_url+fetch_url+search_id+ret_type+ret_mode+ret_max+ret_sort
233
  pubmed_abstract_request = requests.get(return_url)
234
  root_1 = ET.fromstring(pubmed_abstract_request.text)
235
  article_title = root_1.findall('.//ArticleTitle')
 
13
  from newspaper import fulltext
14
  import requests
15
  import itertools
16
+ import os
17
+
18
 
19
  from nltk.tokenize import word_tokenize
20
  from sentence_transformers import SentenceTransformer
 
55
  sp = en_core_sci_lg.load()
56
  all_stopwords = sp.Defaults.stop_words
57
 
58
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
59
 
60
  def remove_stopwords(sen):
61
  sen_new = " ".join([i for i in sen if i not in stop_words])
 
111
  count_dict[l]=0
112
  for sent, score in count_dict.items():
113
  score_list.append(score)
114
+ clean_sentences_new = pd.Series(corpus).str.replace("[^a-zA-Z]", " ", regex = True).tolist()
115
  corpus_embeddings = model_1.encode(clean_sentences_new)
116
  sim_mat = np.zeros([len(clean_sentences_new), len(clean_sentences_new)])
117
  for i in range(len(clean_sentences_new)):
 
214
  ncbi_url='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
215
 
216
  last_url='esearch.fcgi?db=pubmed'+'&term='+f_1
217
+ overall_url=ncbi_url+last_url+'&rettype=json'+'&sort=relevance'
 
218
  pubmed_search_request = requests.get(overall_url)
219
 
220
  root = ET.fromstring(pubmed_search_request.text)
 
226
  all_search_ids = ','.join(search_id_list)
227
  fetch_url='efetch.fcgi?db=pubmed'
228
  search_id='&id='+all_search_ids
229
+ return_url=ncbi_url+fetch_url+search_id+'&rettype=text'+'&retmode=xml'+'&retmax=500'+'&sort=relevance'
 
 
 
 
230
  pubmed_abstract_request = requests.get(return_url)
231
  root_1 = ET.fromstring(pubmed_abstract_request.text)
232
  article_title = root_1.findall('.//ArticleTitle')