Jan90 commited on
Commit
22b6f1a
·
verified ·
1 Parent(s): ab2f9e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +189 -105
app.py CHANGED
@@ -1,107 +1,191 @@
 
 
1
  import gradio as gr
 
 
2
  import pandas as pd
3
- import numpy as np
4
- import gradio as gr
5
- import random
6
-
7
- def flip_text(x):
8
- return x[::-1]
9
-
10
- def flip_image(x):
11
- return np.fliplr(x)
12
-
13
-
14
- df = pd.DataFrame({
15
- 'Year': np.random.randint(2000, 2024, 25),
16
- 'Reviews': np.random.randint(120, 320, 25),
17
- 'age': np.random.randint(18, 30, 25),
18
- 'ethnicity': [random.choice(["white", "black", "asian"]) for _ in range(25)]
19
- })
20
-
21
- theme = gr.themes.Soft(
22
- primary_hue="yellow",
23
- secondary_hue="amber",
24
- spacing_size="sm",
25
- radius_size="lg",
26
-
27
- )
28
-
29
- with gr.Blocks(theme=theme) as demo:
30
-
31
- gr.ScatterPlot(df, x="Reviews", y="age", color="age")
32
- gr.LinePlot(df, x="Year", y="Reviews")
33
- gr.Slider(2000, 2024, value=2024, label="Count", info="Choose between 2000 and 2024"),
34
- gr.Markdown("Flip text or image files using this demo.")
35
- with gr.Tab("User Interface"):
36
- text_input = gr.Textbox()
37
- text_output = gr.Textbox()
38
- text_button = gr.Button("Flip")
39
- with gr.Tab("Testing Area"):
40
- with gr.Row():
41
- image_input = gr.Image()
42
- image_output = gr.Image()
43
- image_button = gr.Button("Flip")
44
- with gr.Row("Flip Text"):
45
- text_input = gr.Textbox()
46
- text_output = gr.Textbox()
47
- text_button = gr.Button("Flip")
48
- with gr.Column(visible=False) as output_col:
49
- text_input = gr.Textbox()
50
- text_output = gr.Textbox()
51
- text_button = gr.Button("Flip")
52
-
53
-
54
- with gr.Accordion("Open for More!", open=False):
55
- gr.Markdown("Look at me...")
56
- temp_slider = gr.Slider(
57
- 0, 1,
58
- value=0.1,
59
- step=0.1,
60
- interactive=True,
61
- label="Slide me",
62
- )
63
-
64
- text_button.click(flip_text, inputs=text_input, outputs=text_output)
65
- image_button.click(flip_image, inputs=image_input, outputs=image_output)
66
-
67
- track_count = gr.State(1)
68
- add_track_btn = gr.Button("Add Track")
69
-
70
- add_track_btn.click(lambda count: count + 1, track_count, track_count)
71
-
72
- @gr.render(inputs=track_count)
73
- def render_tracks(count):
74
- audios = []
75
- volumes = []
76
- with gr.Row():
77
- for i in range(count):
78
- with gr.Column(variant="panel", min_width=200):
79
- gr.Textbox(placeholder="Data Name", key=f"name-{i}", show_label=False)
80
- track_audio = gr.Audio(label=f"Data {i}", key=f"track-{i}")
81
- track_volume = gr.Slider(0, 100, value=100, label="Volume", key=f"volume-{i}")
82
- audios.append(track_audio)
83
- volumes.append(track_volume)
84
-
85
- def merge(data):
86
- sr, output = None, None
87
- for audio, volume in zip(audios, volumes):
88
- sr, audio_val = data[audio]
89
- volume_val = data[volume]
90
- final_track = audio_val * (volume_val / 100)
91
- if output is None:
92
- output = final_track
93
- else:
94
- min_shape = tuple(min(s1, s2) for s1, s2 in zip(output.shape, final_track.shape))
95
- trimmed_output = output[:min_shape[0], ...][:, :min_shape[1], ...] if output.ndim > 1 else output[:min_shape[0]]
96
- trimmed_final = final_track[:min_shape[0], ...][:, :min_shape[1], ...] if final_track.ndim > 1 else final_track[:min_shape[0]]
97
- output += trimmed_output + trimmed_final
98
- return (sr, output)
99
-
100
- merge_btn.click(merge, set(audios + volumes), output_audio)
101
-
102
- merge_btn = gr.Button("Merge Tracks")
103
- output_audio = gr.Audio(label="Output", interactive=False)
104
-
105
-
106
-
107
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, TextClassificationPipeline
2
+ import torch
3
  import gradio as gr
4
+ from openpyxl import load_workbook
5
+ from numpy import mean
6
  import pandas as pd
7
+ import matplotlib.pyplot as plt
8
+
9
+ # Load tokenizers and models
10
+ tokenizer = AutoTokenizer.from_pretrained("suriya7/bart-finetuned-text-summarization")
11
+ model = AutoModelForSeq2SeqLM.from_pretrained("suriya7/bart-finetuned-text-summarization")
12
+
13
+ tokenizer_keywords = AutoTokenizer.from_pretrained("transformer3/H2-keywordextractor")
14
+ model_keywords = AutoModelForSeq2SeqLM.from_pretrained("transformer3/H2-keywordextractor")
15
+
16
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
+ new_model = AutoModelForSequenceClassification.from_pretrained('roberta-rating')
18
+ new_tokenizer = AutoTokenizer.from_pretrained('roberta-rating')
19
+
20
+ classifier = TextClassificationPipeline(model=new_model, tokenizer=new_tokenizer, device=device)
21
+
22
+ label_mapping = {1: '1/5', 2: '2/5', 3: '3/5', 4: '4/5', 5: '5/5'}
23
+
24
+ # Function to parse Excel file
25
+ def parse_xl(file_path):
26
+ cells = []
27
+
28
+ workbook = load_workbook(filename=file_path)
29
+ for sheet in workbook.worksheets:
30
+ for row in sheet.iter_rows():
31
+ for cell in row:
32
+ if cell.value != None:
33
+ cells.append(cell.value)
34
+
35
+ return cells
36
+
37
+ # Function to display and filter the Excel workbook
38
+ def filter_xl(file, keywords):
39
+ # Load the workbook and convert it to a DataFrame
40
+ workbook = load_workbook(filename=file)
41
+ sheet = workbook.active
42
+ data = sheet.values
43
+ columns = next(data)[0:]
44
+ df = pd.DataFrame(data, columns=columns)
45
+
46
+ if keywords:
47
+ keyword_list = keywords.split(',')
48
+ for keyword in keyword_list:
49
+ df = df[df.apply(lambda row: row.astype(str).str.contains(keyword.strip(), case=False).any(), axis=1)]
50
+
51
+ return df
52
+
53
+ # Function to calculate overall rating from filtered data
54
+ def calculate_rating(filtered_df):
55
+ reviews = filtered_df.to_numpy().flatten()
56
+ ratings = []
57
+ for review in reviews:
58
+ if pd.notna(review):
59
+ rating = int(classifier(review)[0]['label'].split('_')[1])
60
+ ratings.append(rating)
61
+
62
+ return round(mean(ratings), 2), ratings
63
+
64
+ # Function to calculate results including summary, keywords, and sentiment
65
+ def calculate_results(file, keywords):
66
+ filtered_df = filter_xl(file, keywords)
67
+ overall_rating, ratings = calculate_rating(filtered_df)
68
+
69
+ # Summarize and extract keywords from the filtered reviews
70
+ text = " ".join(filtered_df.to_numpy().flatten())
71
+ inputs = tokenizer([text], max_length=1024, truncation=True, return_tensors="pt")
72
+ summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=10, max_length=50)
73
+ summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
74
+ summary = summary.replace("I", "They").replace("my", "their").replace("me", "them")
75
+
76
+ inputs_keywords = tokenizer_keywords([text], max_length=1024, truncation=True, return_tensors="pt")
77
+ summary_ids_keywords = model_keywords.generate(inputs_keywords["input_ids"], num_beams=2, min_length=0, max_length=100)
78
+ keywords = tokenizer_keywords.batch_decode(summary_ids_keywords, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
79
+
80
+ # Determine overall sentiment
81
+ sentiments = []
82
+ for review in filtered_df.to_numpy().flatten():
83
+ if pd.notna(review):
84
+ sentiment = classifier(review)[0]['label']
85
+ sentiment_label = "Positive" if sentiment == "LABEL_4" or sentiment == "LABEL_5" else "Negative" if sentiment == "LABEL_1" or sentiment == "LABEL_2" else "Neutral"
86
+ sentiments.append(sentiment_label)
87
+
88
+ overall_sentiment = "Positive" if sentiments.count("Positive") > sentiments.count("Negative") else "Negative" if sentiments.count("Negative") > sentiments.count("Positive") else "Neutral"
89
+
90
+ return overall_rating, summary, keywords, overall_sentiment, ratings, sentiments
91
+
92
+ # Function to analyze a single review
93
+ def analyze_review(review):
94
+ if not review.strip():
95
+ return "Error: No text provided", "Error: No text provided", "Error: No text provided", "Error: No text provided"
96
+
97
+ # Calculate rating
98
+ rating = int(classifier(review)[0]['label'].split('_')[1])
99
+
100
+ # Summarize review
101
+ inputs = tokenizer([review], max_length=1024, truncation=True, return_tensors="pt")
102
+ summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=10, max_length=50)
103
+ summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
104
+ summary = summary.replace("I", "he/she").replace("my", "his/her").replace("me", "him/her")
105
+
106
+ # Extract keywords
107
+ inputs_keywords = tokenizer_keywords([review], max_length=1024, truncation=True, return_tensors="pt")
108
+ summary_ids_keywords = model_keywords.generate(inputs_keywords["input_ids"], num_beams=2, min_length=0, max_length=100)
109
+ keywords = tokenizer_keywords.batch_decode(summary_ids_keywords, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
110
+
111
+ # Determine sentiment
112
+ sentiment = classifier(review)[0]['label']
113
+ sentiment_label = "Positive" if sentiment == "LABEL_4" or sentiment == "LABEL_5" else "Negative" if sentiment == "LABEL_1" or sentiment == "LABEL_2" else "Neutral"
114
+
115
+ return rating, summary, keywords, sentiment_label
116
+
117
+ # Function to count rows in the filtered DataFrame
118
+ def count_rows(filtered_df):
119
+ return len(filtered_df)
120
+
121
+ # Function to plot ratings
122
+ def plot_ratings(ratings):
123
+ plt.figure(figsize=(10, 5))
124
+ plt.hist(ratings, bins=range(1, 7), edgecolor='black', align='left')
125
+ plt.xlabel('Rating')
126
+ plt.ylabel('Frequency')
127
+ plt.title('Distribution of Ratings')
128
+ plt.xticks(range(1, 6))
129
+ plt.grid(True)
130
+ plt.savefig('ratings_distribution.png')
131
+ return 'ratings_distribution.png'
132
+
133
+ # Function to plot sentiments
134
+ def plot_sentiments(sentiments):
135
+ sentiment_counts = pd.Series(sentiments).value_counts()
136
+ plt.figure(figsize=(10, 5))
137
+ sentiment_counts.plot(kind='bar', color=['green', 'red', 'blue'])
138
+ plt.xlabel('Sentiment')
139
+ plt.ylabel('Frequency')
140
+ plt.title('Distribution of Sentiments')
141
+ plt.grid(True)
142
+ plt.savefig('sentiments_distribution.png')
143
+ return 'sentiments_distribution.png'
144
+
145
+ # Gradio interface
146
+ with gr.Blocks() as demo:
147
+ with gr.Tabs():
148
+ with gr.TabItem("Upload and Filter"):
149
+ with gr.Row():
150
+ with gr.Column(scale=1):
151
+ excel_file = gr.File(label="Upload Excel File")
152
+ keywords_input = gr.Textbox(label="Filter by Keywords (comma-separated)")
153
+ display_button = gr.Button("Display and Filter Excel Data")
154
+ clear_button_upload = gr.Button("Clear")
155
+ row_count = gr.Textbox(label="Number of Rows", interactive=False)
156
+ with gr.Column(scale=3):
157
+ filtered_data = gr.Dataframe(label="Filtered Excel Contents")
158
+
159
+ with gr.TabItem("Calculate Results"):
160
+ with gr.Row():
161
+ with gr.Column():
162
+ overall_rating = gr.Textbox(label="Overall Rating")
163
+ summary = gr.Textbox(label="Summary")
164
+ keywords_output = gr.Textbox(label="Keywords")
165
+ overall_sentiment = gr.Textbox(label="Overall Sentiment")
166
+ calculate_button = gr.Button("Calculate Results")
167
+ with gr.Column():
168
+ ratings_graph = gr.Image(label="Ratings Distribution")
169
+ sentiments_graph = gr.Image(label="Sentiments Distribution")
170
+ calculate_graph_button = gr.Button("Calculate Graph Results")
171
+
172
+ with gr.TabItem("Testing Area / Write a Review"):
173
+ with gr.Row():
174
+ with gr.Column(scale=2):
175
+ review_input = gr.Textbox(label="Write your review here")
176
+ analyze_button = gr.Button("Analyze Review")
177
+ clear_button_review = gr.Button("Clear")
178
+ with gr.Column(scale=2):
179
+ review_rating = gr.Textbox(label="Rating")
180
+ review_summary = gr.Textbox(label="Summary")
181
+ review_keywords = gr.Textbox(label="Keywords")
182
+ review_sentiment = gr.Textbox(label="Sentiment")
183
+
184
+ display_button.click(lambda file, keywords: (filter_xl(file, keywords), count_rows(filter_xl(file, keywords))), inputs=[excel_file, keywords_input], outputs=[filtered_data, row_count])
185
+ calculate_graph_button.click(lambda file, keywords: (*calculate_results(file, keywords)[:4], plot_ratings(calculate_results(file, keywords)[4]), plot_sentiments(calculate_results(file, keywords)[5])), inputs=[excel_file, keywords_input], outputs=[overall_rating, summary, keywords_output, overall_sentiment, ratings_graph, sentiments_graph])
186
+ calculate_button.click(lambda file, keywords: (*calculate_results(file, keywords)[:4], plot_ratings(calculate_results(file, keywords)[4])), inputs=[excel_file, keywords_input], outputs=[overall_rating, summary, keywords_output, overall_sentiment])
187
+ analyze_button.click(analyze_review, inputs=review_input, outputs=[review_rating, review_summary, review_keywords, review_sentiment])
188
+ clear_button_upload.click(lambda: (""), outputs=[keywords_input])
189
+ clear_button_review.click(lambda: ("", "", "", "", ""), outputs=[review_input, review_rating, review_summary, review_keywords, review_sentiment])
190
+
191
+ demo.launch(share=True)