Spaces:
Runtime error
Runtime error
pritamdeka
commited on
Commit
Β·
1b1ed3e
1
Parent(s):
012ca65
Update app.py
Browse files
app.py
CHANGED
@@ -13,6 +13,8 @@ from newspaper import Article
|
|
13 |
from newspaper import fulltext
|
14 |
import requests
|
15 |
import itertools
|
|
|
|
|
16 |
|
17 |
from nltk.tokenize import word_tokenize
|
18 |
from sentence_transformers import SentenceTransformer
|
@@ -53,7 +55,7 @@ nlp = en_core_sci_lg.load()
|
|
53 |
sp = en_core_sci_lg.load()
|
54 |
all_stopwords = sp.Defaults.stop_words
|
55 |
|
56 |
-
|
57 |
|
58 |
def remove_stopwords(sen):
|
59 |
sen_new = " ".join([i for i in sen if i not in stop_words])
|
@@ -109,7 +111,7 @@ def keyphrase_generator(article_link, model_1, model_2, max_num_keywords, model_
|
|
109 |
count_dict[l]=0
|
110 |
for sent, score in count_dict.items():
|
111 |
score_list.append(score)
|
112 |
-
clean_sentences_new = pd.Series(corpus).str.replace("[^a-zA-Z]", " ").tolist()
|
113 |
corpus_embeddings = model_1.encode(clean_sentences_new)
|
114 |
sim_mat = np.zeros([len(clean_sentences_new), len(clean_sentences_new)])
|
115 |
for i in range(len(clean_sentences_new)):
|
@@ -212,8 +214,7 @@ def keyphrase_generator(article_link, model_1, model_2, max_num_keywords, model_
|
|
212 |
ncbi_url='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
|
213 |
|
214 |
last_url='esearch.fcgi?db=pubmed'+'&term='+f_1
|
215 |
-
|
216 |
-
overall_url=ncbi_url+last_url+search_rettype+'&sort=relevance'
|
217 |
pubmed_search_request = requests.get(overall_url)
|
218 |
|
219 |
root = ET.fromstring(pubmed_search_request.text)
|
@@ -225,11 +226,7 @@ def keyphrase_generator(article_link, model_1, model_2, max_num_keywords, model_
|
|
225 |
all_search_ids = ','.join(search_id_list)
|
226 |
fetch_url='efetch.fcgi?db=pubmed'
|
227 |
search_id='&id='+all_search_ids
|
228 |
-
|
229 |
-
ret_mode='&retmode=xml'
|
230 |
-
ret_max='&retmax=500'
|
231 |
-
ret_sort='&sort=relevance'
|
232 |
-
return_url=ncbi_url+fetch_url+search_id+ret_type+ret_mode+ret_max+ret_sort
|
233 |
pubmed_abstract_request = requests.get(return_url)
|
234 |
root_1 = ET.fromstring(pubmed_abstract_request.text)
|
235 |
article_title = root_1.findall('.//ArticleTitle')
|
|
|
13 |
from newspaper import fulltext
|
14 |
import requests
|
15 |
import itertools
|
16 |
+
import os
|
17 |
+
|
18 |
|
19 |
from nltk.tokenize import word_tokenize
|
20 |
from sentence_transformers import SentenceTransformer
|
|
|
55 |
sp = en_core_sci_lg.load()
|
56 |
all_stopwords = sp.Defaults.stop_words
|
57 |
|
58 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
59 |
|
60 |
def remove_stopwords(sen):
|
61 |
sen_new = " ".join([i for i in sen if i not in stop_words])
|
|
|
111 |
count_dict[l]=0
|
112 |
for sent, score in count_dict.items():
|
113 |
score_list.append(score)
|
114 |
+
clean_sentences_new = pd.Series(corpus).str.replace("[^a-zA-Z]", " ", regex = True).tolist()
|
115 |
corpus_embeddings = model_1.encode(clean_sentences_new)
|
116 |
sim_mat = np.zeros([len(clean_sentences_new), len(clean_sentences_new)])
|
117 |
for i in range(len(clean_sentences_new)):
|
|
|
214 |
ncbi_url='https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
|
215 |
|
216 |
last_url='esearch.fcgi?db=pubmed'+'&term='+f_1
|
217 |
+
overall_url=ncbi_url+last_url+'&rettype=json'+'&sort=relevance'
|
|
|
218 |
pubmed_search_request = requests.get(overall_url)
|
219 |
|
220 |
root = ET.fromstring(pubmed_search_request.text)
|
|
|
226 |
all_search_ids = ','.join(search_id_list)
|
227 |
fetch_url='efetch.fcgi?db=pubmed'
|
228 |
search_id='&id='+all_search_ids
|
229 |
+
return_url=ncbi_url+fetch_url+search_id+'&rettype=text'+'&retmode=xml'+'&retmax=500'+'&sort=relevance'
|
|
|
|
|
|
|
|
|
230 |
pubmed_abstract_request = requests.get(return_url)
|
231 |
root_1 = ET.fromstring(pubmed_abstract_request.text)
|
232 |
article_title = root_1.findall('.//ArticleTitle')
|