Sa-m commited on
Commit
1726132
·
1 Parent(s): a82e853

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -32
app.py CHANGED
@@ -1,19 +1,9 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
  # MANIFESTO ANALYSIS
4
-
5
- ## IMPORTING LIBRARIES
6
  """
7
 
8
- # Commented out IPython magic to ensure Python compatibility.
9
- # %%capture
10
- # !pip install tika
11
- # !pip install clean-text
12
- # !pip install gradio
13
-
14
- # Commented out IPython magic to ensure Python compatibility.
15
-
16
-
17
  import random
18
  import matplotlib.pyplot as plt
19
  import nltk
@@ -21,14 +11,11 @@ from nltk.tokenize import word_tokenize,sent_tokenize
21
  from nltk.corpus import stopwords
22
  from nltk.stem.porter import PorterStemmer
23
  from nltk.stem import WordNetLemmatizer
24
- #import tika
25
- #from tika import parser
26
  from nltk.corpus import stopwords
27
  from nltk.tokenize import word_tokenize
28
  from nltk.probability import FreqDist
29
  from cleantext import clean
30
  import textract
31
-
32
  import urllib.request
33
  import nltk.corpus
34
  from nltk.text import Text
@@ -38,7 +25,6 @@ import sys
38
  import pandas as pd
39
  import cv2
40
  import re
41
-
42
  from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
43
  from textblob import TextBlob
44
  from PIL import Image
@@ -52,7 +38,6 @@ import unidecode
52
  nltk.download('stopwords')
53
  nltk.download('punkt')
54
  nltk.download('wordnet')
55
- nltk.download('averaged_perceptron_tagger')
56
  nltk.download('words')
57
 
58
 
@@ -111,10 +96,11 @@ def Preprocess(textParty):
111
 
112
 
113
 
114
- # Using Concordance,you can see each time a word is used, along with its
115
- # immediate context. It can give you a peek into how a word is being used
116
- # at the sentence level and what words are used with it.
117
-
 
118
  def concordance(text_Party,strng):
119
  word_tokens_party = word_tokenize(text_Party)
120
  moby = Text(word_tokens_party)
@@ -136,7 +122,7 @@ def normalize(d, target=1.0):
136
 
137
  def fDistance(text2Party):
138
  '''
139
- most frequent words search
140
  '''
141
  word_tokens_party = word_tokenize(text2Party) #Tokenizing
142
  fdistance = FreqDist(word_tokens_party).most_common(10)
@@ -188,7 +174,6 @@ def getAnalysis(score):
188
  else:
189
  return 'Positive'
190
 
191
- #http://library.bjp.org/jspui/bitstream/123456789/2988/1/BJP-Election-english-2019.pdf
192
  url = "http://library.bjp.org/jspui/bitstream/123456789/2988/1/BJP-Election-english-2019.pdf"
193
  path_input = "./Bjp_Manifesto_2019.pdf'"
194
  urllib.request.urlretrieve(url, filename=path_input)
@@ -216,8 +201,6 @@ def analysis(Manifesto,Search):
216
  plt.ylabel('Counts')
217
  plt.figure(figsize=(4,3))
218
  df['Analysis on Polarity'].value_counts().plot(kind ='bar')
219
- #plt.savefig('./sentimentAnalysis.png')
220
- #plt.clf()
221
  plt.tight_layout()
222
  buf = BytesIO()
223
  plt.savefig(buf)
@@ -227,8 +210,6 @@ def analysis(Manifesto,Search):
227
 
228
  plt.figure(figsize=(4,3))
229
  df['Analysis on Subjectivity'].value_counts().plot(kind ='bar')
230
- #plt.savefig('sentimentAnalysis2.png')
231
- #plt.clf()
232
  plt.tight_layout()
233
  buf = BytesIO()
234
  plt.savefig(buf)
@@ -249,11 +230,6 @@ def analysis(Manifesto,Search):
249
 
250
  fdist_Party=fDistance(text_Party)
251
  img4=fDistancePlot(text_Party)
252
-
253
- #img1=cv2.imread('/sentimentAnalysis.png')
254
- #img2=cv2.imread('/wordcloud.png')
255
- #img3=cv2.imread('/wordcloud.png')
256
- #img4=cv2.imread('/distplot.png')
257
 
258
  searchRes=concordance(text_Party,Search)
259
  searChRes=clean(searchRes)
@@ -265,7 +241,6 @@ Search_txt=gr.inputs.Textbox()
265
  filePdf = gr.inputs.File()
266
  text = gr.outputs.Textbox(label='SEARCHED OUTPUT')
267
  mfw=gr.outputs.Label(label="Most Relevant Topics")
268
- # mfw2=gr.outputs.Image(label="Most Relevant Topics Plot")
269
  plot1=gr.outputs. Image(label='Sentiment Analysis')
270
  plot2=gr.outputs.Image(label='Subjectivity Analysis')
271
  plot3=gr.outputs.Image(label='Word Cloud')
 
1
  # -*- coding: utf-8 -*-
2
  """
3
  # MANIFESTO ANALYSIS
 
 
4
  """
5
 
6
+ ##IMPORTING LIBRARIES
 
 
 
 
 
 
 
 
7
  import random
8
  import matplotlib.pyplot as plt
9
  import nltk
 
11
  from nltk.corpus import stopwords
12
  from nltk.stem.porter import PorterStemmer
13
  from nltk.stem import WordNetLemmatizer
 
 
14
  from nltk.corpus import stopwords
15
  from nltk.tokenize import word_tokenize
16
  from nltk.probability import FreqDist
17
  from cleantext import clean
18
  import textract
 
19
  import urllib.request
20
  import nltk.corpus
21
  from nltk.text import Text
 
25
  import pandas as pd
26
  import cv2
27
  import re
 
28
  from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
29
  from textblob import TextBlob
30
  from PIL import Image
 
38
  nltk.download('stopwords')
39
  nltk.download('punkt')
40
  nltk.download('wordnet')
 
41
  nltk.download('words')
42
 
43
 
 
96
 
97
 
98
 
99
+ '''
100
+ Using Concordance, you can see each time a word is used, along with its
101
+ immediate context. It can give you a peek into how a word is being used
102
+ at the sentence level and what words are used with it.
103
+ '''
104
  def concordance(text_Party,strng):
105
  word_tokens_party = word_tokenize(text_Party)
106
  moby = Text(word_tokens_party)
 
122
 
123
  def fDistance(text2Party):
124
  '''
125
+ Most frequent words search
126
  '''
127
  word_tokens_party = word_tokenize(text2Party) #Tokenizing
128
  fdistance = FreqDist(word_tokens_party).most_common(10)
 
174
  else:
175
  return 'Positive'
176
 
 
177
  url = "http://library.bjp.org/jspui/bitstream/123456789/2988/1/BJP-Election-english-2019.pdf"
178
  path_input = "./Bjp_Manifesto_2019.pdf'"
179
  urllib.request.urlretrieve(url, filename=path_input)
 
201
  plt.ylabel('Counts')
202
  plt.figure(figsize=(4,3))
203
  df['Analysis on Polarity'].value_counts().plot(kind ='bar')
 
 
204
  plt.tight_layout()
205
  buf = BytesIO()
206
  plt.savefig(buf)
 
210
 
211
  plt.figure(figsize=(4,3))
212
  df['Analysis on Subjectivity'].value_counts().plot(kind ='bar')
 
 
213
  plt.tight_layout()
214
  buf = BytesIO()
215
  plt.savefig(buf)
 
230
 
231
  fdist_Party=fDistance(text_Party)
232
  img4=fDistancePlot(text_Party)
 
 
 
 
 
233
 
234
  searchRes=concordance(text_Party,Search)
235
  searChRes=clean(searchRes)
 
241
  filePdf = gr.inputs.File()
242
  text = gr.outputs.Textbox(label='SEARCHED OUTPUT')
243
  mfw=gr.outputs.Label(label="Most Relevant Topics")
 
244
  plot1=gr.outputs. Image(label='Sentiment Analysis')
245
  plot2=gr.outputs.Image(label='Subjectivity Analysis')
246
  plot3=gr.outputs.Image(label='Word Cloud')