ASokirka commited on
Commit
7c5a194
·
verified ·
1 Parent(s): 03bc94b

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -0
app.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Untitled19.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1GJzV4fIIiYUDFdGIyDG8QzzluC6Fy_aS
8
+ """
9
+
10
+ import os
11
+ import re
12
+ import streamlit as st
13
+ import googleapiclient.discovery
14
+ import pandas as pd
15
+ from transformers import pipeline
16
+ import matplotlib.pyplot as plt
17
+ import seaborn as sns
18
+
19
+ st.title('Анализатор комментариев :red[YouTube] :sunglasses:')
20
+
21
+
22
+ # Инициализируем модель Hugging Face для анализа тональности текста
23
+ # Кэшируем ресурс для одной загрузки модели на все сессии
24
+ #@st.cache_resource
25
+ def load_model():
26
+ """
27
+ Loads the 'blanchefort/rubert-base-cased-sentiment' model from HuggingFace
28
+ and saves to cache for consecutive loads.
29
+ """
30
+ model = pipeline(
31
+ "sentiment-analysis",
32
+ "blanchefort/rubert-base-cased-sentiment")
33
+ return model
34
+
35
+
36
+ def extract_video_id(url: str) -> str:
37
+ """
38
+ Extracts the video ID from a YouTube video URL.
39
+ Args: url (str): The YouTube video URL.
40
+ Returns: str: The extracted video ID,
41
+ or an empty string if the URL is not valid.
42
+ """
43
+ pattern = r"(?<=v=)[\w-]+(?=&|\b)"
44
+ match = re.search(pattern, url)
45
+ if match:
46
+ return match.group()
47
+ else:
48
+ return ""
49
+
50
+
51
+ def download_comments(video_id: str) -> pd.DataFrame:
52
+ """
53
+ Downloads comments from a YouTube video based on the provided video ID
54
+ and returns them as a DataFrame.
55
+ Args: video_id (str): The video ID of the YouTube video.
56
+ Returns: DataFrame: A DataFrame containing the downloaded comments from the video.
57
+ """
58
+ DEV_KEY = os.getenv('API_KEY_YOUTUBE')
59
+ youtube = googleapiclient.discovery.build("youtube",
60
+ "v3",
61
+ developerKey=DEV_KEY)
62
+ request = youtube.commentThreads().list(part="snippet",
63
+ videoId=video_id,
64
+ maxResults=100)
65
+ response = request.execute()
66
+ comments = []
67
+ for item in response['items']:
68
+ comment = item['snippet']['topLevelComment']['snippet']
69
+ comments.append([comment['authorDisplayName'],
70
+ comment['publishedAt'],
71
+ comment['updatedAt'],
72
+ comment['likeCount'],
73
+ comment['textDisplay'],])
74
+ return pd.DataFrame(comments,
75
+ columns=['author',
76
+ 'published_at',
77
+ 'updated_at',
78
+ 'like_count',
79
+ 'text',])
80
+
81
+
82
+ def analyze_emotions_in_comments(df: pd.DataFrame) -> tuple:
83
+ """
84
+ Takes a DataFrame with comments,
85
+ processes the emotional sentiment of each comment in the DataFrame
86
+ Args: dataframe (pandas.DataFrame): DataFrame containing comments to analyze.
87
+ Returns: tuple: containing the updated DataFrame with the added 'Emotional Sentiment' column
88
+ and the total count of processed comments.
89
+ """
90
+ model = load_model()
91
+ selected_columns = ['text', 'author', 'published_at']
92
+ df = df[selected_columns]
93
+ res_list = []
94
+ res_list = model(df['text'][:513].to_list())
95
+ full_df = pd.concat([pd.DataFrame(res_list), df], axis=1)
96
+ return (full_df, len(res_list))
97
+
98
+
99
+ def plot_heatmap_from_dataframe(df: pd.DataFrame) -> plt:
100
+ """
101
+ Visualizes the data from the input DataFrame and returns a matplotlib plot object.
102
+ Args: df (DataFrame): The input DataFrame containing the data to be visualized.
103
+ Returns: plt: A matplotlib plot object showing the visualization of the data.
104
+ """
105
+ df['published_at'] = pd.to_datetime(df['published_at'])
106
+ df['Date'] = df['published_at'].dt.date
107
+ df['Hour'] = df['published_at'].dt.hour
108
+ pivot_table = df.pivot_table(index='Hour',
109
+ columns='Date',
110
+ values='text',
111
+ aggfunc='count')
112
+ plt.figure(figsize=(10, 6))
113
+ sns.heatmap(pivot_table,
114
+ cmap='YlGnBu')
115
+ plt.title('Количество комментариев по часам и датам')
116
+ plt.xlabel('Дата')
117
+ plt.ylabel('Час')
118
+ return plt
119
+
120
+
121
+ def visualize_data(df: pd.DataFrame):
122
+ """
123
+ Visualizes the data from the input DataFrame and returns a matplotlib figure object.
124
+ Args: df (DataFrame): The input DataFrame containing the data to be visualized.
125
+ Returns: fig: A matplotlib figure object
126
+ """
127
+ data = df['label'].value_counts()
128
+ fig, ax = plt.subplots()
129
+ plt.title("Эмоциональная окраска комментариев на YouTube")
130
+ label = data.index
131
+ ax.pie(data, labels=label, autopct='%1.1f%%')
132
+ return fig
133
+
134
+
135
+ def change_url():
136
+ st.session_state.start = False
137
+
138
+
139
+ if "start" not in st.session_state:
140
+ st.session_state.start = False
141
+
142
+ # Получаем id видеоролика из URL для отправки запроса
143
+ url = st.text_input(label="Enter URL from YouTube", on_change=change_url)
144
+ video_id = extract_video_id(url)
145
+ if video_id != "":
146
+ if btn_start := st.button('Загрузить комментарии'):
147
+ st.session_state.start = True
148
+
149
+ if st.session_state.start:
150
+ # Выводим таблицу с результатами на странице
151
+ comments_df = download_comments(video_id)
152
+ with st.spinner('Analyzing comments...'):
153
+ full_df, num_comments = analyze_emotions_in_comments(comments_df)
154
+ st.success(f'Готово! Обработано {num_comments} комментариев.')
155
+ st.write(full_df)
156
+ st.markdown('***')
157
+
158
+ # Выводим heatmap комментариев по часам и датам
159
+ st.pyplot(plot_heatmap_from_dataframe(full_df))
160
+ st.markdown('***')
161
+
162
+ # Выводим круговую диаграмму
163
+ st.pyplot(visualize_data(full_df))