kittendev commited on
Commit
5366a00
·
verified ·
1 Parent(s): 4808764

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -200
app.py CHANGED
@@ -1,200 +1,200 @@
1
- import ast
2
- import os
3
- import pickle
4
- import random
5
- from datetime import datetime, timedelta
6
-
7
- import gradio as gr
8
- import pandas as pd
9
- from langchain_google_genai import ChatGoogleGenerativeAI
10
- from langchain_core.messages import HumanMessage, SystemMessage
11
- from pytrends.request import TrendReq
12
-
13
- from mlxtend.preprocessing import TransactionEncoder
14
-
15
-
16
- def convert_keywords_to_list(keywords_str):
17
- try:
18
- return ast.literal_eval(keywords_str)
19
- except (SyntaxError, ValueError):
20
- return []
21
-
22
-
23
- def convert_scores_to_list(scores_float):
24
- try:
25
- return ast.literal_eval(scores_float)
26
- except (SyntaxError, ValueError):
27
- return []
28
-
29
- video_df = pd.read_csv('video_df_complete.csv')
30
- video_df['keywords'] = video_df['keywords'].apply(convert_keywords_to_list)
31
- video_df['trend_scores'] = video_df['trend_scores'].apply(convert_scores_to_list)
32
-
33
- video_df['total_score'] = video_df['trend_scores'].apply(lambda x: sum(x) / len(x) if len(x) > 0 else 0)
34
-
35
- transactions = []
36
- for index, row in video_df.iterrows():
37
- transactions.append(row['keywords'])
38
-
39
- te = TransactionEncoder()
40
- te_ary = te.fit(transactions).transform(transactions)
41
- df = pd.DataFrame(te_ary, columns=te.columns_)
42
-
43
- merged_df = pd.concat([df, video_df['total_score'], video_df['engagement_rate']], axis=1)
44
-
45
- rules = pd.read_csv('association_rules.csv')
46
- rules['antecedents'] = rules['antecedents'].apply(lambda x: list(eval(x)))
47
- rules['consequents'] = rules['consequents'].apply(lambda x: list(eval(x)))
48
-
49
- model_filename = os.path.join('regression_model_final.pkl')
50
-
51
- with open(model_filename, 'rb') as file:
52
- model = pickle.load(file)
53
-
54
- llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", convert_system_message_to_human=True)
55
-
56
- def custom_predict(keywords, total_score):
57
- """
58
- Custom prediction function using the trained linear regression model.
59
-
60
- Args:
61
- keywords: A list of keywords.
62
- total_score: The total trend score.
63
-
64
- Returns:
65
- The predicted engagement rate.
66
- """
67
- new_data = pd.DataFrame([{col: 0 for col in merged_df.columns}])
68
-
69
- for keyword in keywords:
70
- if keyword in new_data.columns:
71
- new_data.at[0, keyword] = 1
72
-
73
- new_data.at[0, 'total_score'] = total_score
74
-
75
- new_data = new_data.drop('engagement_rate', axis=1)
76
-
77
- prediction = model.predict(new_data)
78
-
79
- return prediction[0][0]
80
-
81
-
82
- def generate_keyword_scores(keywords):
83
- scaled_rate = min(100, 4.5 * 10)
84
-
85
- return [
86
- round(random.uniform(scaled_rate * 0.7, min(100, scaled_rate * 1.2)), 2)
87
- for _ in keywords
88
- ]
89
-
90
-
91
- def get_google_trends_score(keywords, end_date, days_back=7):
92
- """
93
- Mengambil skor tren Google untuk kata kunci tertentu selama periode waktu tertentu.
94
-
95
- Parameters:
96
- keywords (list): Daftar kata kunci yang ingin dianalisis.
97
- end_date (datetime): Tanggal akhir untuk data tren.
98
- days_back (int): Jumlah hari ke belakang dari end_date untuk menentukan rentang waktu (default: 7 hari).
99
-
100
- Returns:
101
- pd.DataFrame: DataFrame berisi data tren per kata kunci selama periode waktu yang ditentukan.
102
- """
103
- try:
104
- if not keywords:
105
- raise ValueError("Daftar kata kunci tidak boleh kosong.")
106
-
107
- pytrends = TrendReq()
108
- start_date = end_date - timedelta(days=days_back)
109
- timeframe = f"{start_date.strftime('%Y-%m-%d')} {end_date.strftime('%Y-%m-%d')}"
110
-
111
- pytrends.build_payload(keywords, timeframe=timeframe, geo='ID', gprop='youtube')
112
- trends_df = pytrends.interest_over_time()
113
-
114
- if 'isPartial' in trends_df.columns:
115
- trends_df = trends_df.drop(columns=['isPartial'])
116
-
117
- return trends_df
118
- except Exception as e:
119
- return pd.DataFrame(generate_keyword_scores(keywords))
120
-
121
-
122
- def generate_title(keyword, category):
123
- if category != 'Gaming':
124
- return "Category belum supported."
125
-
126
- recommendation = recommend_keyword(keyword)
127
-
128
- if not recommendation:
129
- return "No recommendations found."
130
- else:
131
- result = llm(
132
- [
133
- SystemMessage(
134
- content=f"Kamu adalah seorang penulis judul video youtube"
135
- f"Kamu akan diberikan beberapa buah keyword yang wajib digunakan untuk judul"
136
- f"Buat judul yang semenarik mungkin untuk memberikan viewer rasa suka"
137
- f"Cukup keluarkan satu judul saja dalam satu kalimat"
138
- f"Jangan gunnakan formatting seperti '\n' atau hal lainnya. Gunakan saja raw string"
139
- f"Boleh pake emoji"
140
- ),
141
- HumanMessage(
142
- content=f"keyword yang digunakan adalah sebagai berikut: {recommendation}"
143
- f"Total jumlah keyword adalah: {len(recommendation)}"
144
- f"Video memiliki kategori: {category}"
145
- )
146
- ]
147
- )
148
-
149
- return result.content
150
-
151
-
152
- def recommend_keyword(keyword):
153
- keyword_rules = rules[
154
- rules['antecedents'].astype(str).str.contains(keyword) | rules['consequents'].astype(str).str.contains(keyword)]
155
-
156
- top_5_rules = keyword_rules.sort_values(by='lift', ascending=False).head(5)
157
-
158
- recommendation = []
159
- engages = []
160
-
161
- for idx, row in top_5_rules.iterrows():
162
- antecedents = list(row['antecedents'])[0]
163
- consequents = list(row['consequents'])
164
-
165
- recommendation.append([keyword] + consequents)
166
-
167
- if not recommendation:
168
- return []
169
-
170
- for rec in recommendation:
171
- trends_df = get_google_trends_score(rec, datetime.now())
172
-
173
- batch_scores = [
174
- round(trends_df[keyword].mean(), 2) if keyword in trends_df.columns else 0
175
- for keyword in keywords
176
- ]
177
-
178
- batch_scores = sum(batch_scores) / len(batch_scores)
179
-
180
- engagement_rate = custom_predict(rec, batch_scores)
181
-
182
- engages.append(engagement_rate)
183
-
184
- return recommendation[engages.index(max(engages))]
185
-
186
-
187
- distinct_categories = video_df['catergory'].unique()
188
-
189
- iface = gr.Interface(
190
- fn=generate_title,
191
- inputs=[
192
- gr.Textbox(label="Enter a keyword"),
193
- gr.Dropdown(label="Select a category", choices=list(distinct_categories))
194
- ],
195
- outputs=gr.Textbox(label="Recommendations"),
196
- title="Title Recommendation",
197
- description="Do'akan saya langgeng sm Ei"
198
- )
199
-
200
- iface.launch()
 
1
+ import ast
2
+ import os
3
+ import pickle
4
+ import random
5
+ from datetime import datetime, timedelta
6
+
7
+ import gradio as gr
8
+ import pandas as pd
9
+ from langchain_google_genai import ChatGoogleGenerativeAI
10
+ from langchain_core.messages import HumanMessage, SystemMessage
11
+ from pytrends.request import TrendReq
12
+
13
+ from mlxtend.preprocessing import TransactionEncoder
14
+
15
+
16
+ def convert_keywords_to_list(keywords_str):
17
+ try:
18
+ return ast.literal_eval(keywords_str)
19
+ except (SyntaxError, ValueError):
20
+ return []
21
+
22
+
23
+ def convert_scores_to_list(scores_float):
24
+ try:
25
+ return ast.literal_eval(scores_float)
26
+ except (SyntaxError, ValueError):
27
+ return []
28
+
29
+ video_df = pd.read_csv('video_df_complete.csv')
30
+ video_df['keywords'] = video_df['keywords'].apply(convert_keywords_to_list)
31
+ video_df['trend_scores'] = video_df['trend_scores'].apply(convert_scores_to_list)
32
+
33
+ video_df['total_score'] = video_df['trend_scores'].apply(lambda x: sum(x) / len(x) if len(x) > 0 else 0)
34
+
35
+ transactions = []
36
+ for index, row in video_df.iterrows():
37
+ transactions.append(row['keywords'])
38
+
39
+ te = TransactionEncoder()
40
+ te_ary = te.fit(transactions).transform(transactions)
41
+ df = pd.DataFrame(te_ary, columns=te.columns_)
42
+
43
+ merged_df = pd.concat([df, video_df['total_score'], video_df['engagement_rate']], axis=1)
44
+
45
+ rules = pd.read_csv('association_rules.csv')
46
+ rules['antecedents'] = rules['antecedents'].apply(lambda x: list(eval(x)))
47
+ rules['consequents'] = rules['consequents'].apply(lambda x: list(eval(x)))
48
+
49
+ model_filename = os.path.join('regression_model_final.pkl')
50
+
51
+ with open(model_filename, 'rb') as file:
52
+ model = pickle.load(file)
53
+
54
+ llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", convert_system_message_to_human=True)
55
+
56
+ def custom_predict(keywords, total_score):
57
+ """
58
+ Custom prediction function using the trained linear regression model.
59
+
60
+ Args:
61
+ keywords: A list of keywords.
62
+ total_score: The total trend score.
63
+
64
+ Returns:
65
+ The predicted engagement rate.
66
+ """
67
+ new_data = pd.DataFrame([{col: 0 for col in merged_df.columns}])
68
+
69
+ for keyword in keywords:
70
+ if keyword in new_data.columns:
71
+ new_data.at[0, keyword] = 1
72
+
73
+ new_data.at[0, 'total_score'] = total_score
74
+
75
+ new_data = new_data.drop('engagement_rate', axis=1)
76
+
77
+ prediction = model.predict(new_data)
78
+
79
+ return prediction[0][0]
80
+
81
+
82
+ def generate_keyword_scores(keywords):
83
+ scaled_rate = min(100, 4.5 * 10)
84
+
85
+ return [
86
+ round(random.uniform(scaled_rate * 0.7, min(100, scaled_rate * 1.2)), 2)
87
+ for _ in keywords
88
+ ]
89
+
90
+
91
+ def get_google_trends_score(keywords, end_date, days_back=7):
92
+ """
93
+ Mengambil skor tren Google untuk kata kunci tertentu selama periode waktu tertentu.
94
+
95
+ Parameters:
96
+ keywords (list): Daftar kata kunci yang ingin dianalisis.
97
+ end_date (datetime): Tanggal akhir untuk data tren.
98
+ days_back (int): Jumlah hari ke belakang dari end_date untuk menentukan rentang waktu (default: 7 hari).
99
+
100
+ Returns:
101
+ pd.DataFrame: DataFrame berisi data tren per kata kunci selama periode waktu yang ditentukan.
102
+ """
103
+ try:
104
+ if not keywords:
105
+ raise ValueError("Daftar kata kunci tidak boleh kosong.")
106
+
107
+ pytrends = TrendReq()
108
+ start_date = end_date - timedelta(days=days_back)
109
+ timeframe = f"{start_date.strftime('%Y-%m-%d')} {end_date.strftime('%Y-%m-%d')}"
110
+
111
+ pytrends.build_payload(keywords, timeframe=timeframe, geo='ID', gprop='youtube')
112
+ trends_df = pytrends.interest_over_time()
113
+
114
+ if 'isPartial' in trends_df.columns:
115
+ trends_df = trends_df.drop(columns=['isPartial'])
116
+
117
+ return trends_df
118
+ except Exception as e:
119
+ return pd.DataFrame(generate_keyword_scores(keywords))
120
+
121
+
122
+ def generate_title(keyword, category):
123
+ if category != 'Gaming':
124
+ return "Category belum supported."
125
+
126
+ recommendation = recommend_keyword(keyword)
127
+
128
+ if not recommendation:
129
+ return "No recommendations found."
130
+ else:
131
+ result = llm(
132
+ [
133
+ SystemMessage(
134
+ content=f"Kamu adalah seorang penulis judul video youtube"
135
+ f"Kamu akan diberikan beberapa buah keyword yang wajib digunakan untuk judul"
136
+ f"Buat judul yang semenarik mungkin untuk memberikan viewer rasa suka"
137
+ f"Cukup keluarkan satu judul saja dalam satu kalimat"
138
+ f"Jangan gunnakan formatting seperti '\n' atau hal lainnya. Gunakan saja raw string"
139
+ f"Boleh pake emoji"
140
+ ),
141
+ HumanMessage(
142
+ content=f"keyword yang digunakan adalah sebagai berikut: {recommendation}"
143
+ f"Total jumlah keyword adalah: {len(recommendation)}"
144
+ f"Video memiliki kategori: {category}"
145
+ )
146
+ ]
147
+ )
148
+
149
+ return result.content
150
+
151
+
152
+ def recommend_keyword(keyword):
153
+ keyword_rules = rules[
154
+ rules['antecedents'].astype(str).str.contains(keyword) | rules['consequents'].astype(str).str.contains(keyword)]
155
+
156
+ top_5_rules = keyword_rules.sort_values(by='lift', ascending=False).head(5)
157
+
158
+ recommendation = []
159
+ engages = []
160
+
161
+ for idx, row in top_5_rules.iterrows():
162
+ antecedents = list(row['antecedents'])[0]
163
+ consequents = list(row['consequents'])
164
+
165
+ recommendation.append([keyword] + consequents)
166
+
167
+ if not recommendation:
168
+ return []
169
+
170
+ for rec in recommendation:
171
+ trends_df = get_google_trends_score(rec, datetime.now())
172
+
173
+ batch_scores = [
174
+ round(trends_df[keyword].mean(), 2) if keyword in trends_df.columns else 0
175
+ for keyword in rec
176
+ ]
177
+
178
+ batch_scores = sum(batch_scores) / len(batch_scores)
179
+
180
+ engagement_rate = custom_predict(rec, batch_scores)
181
+
182
+ engages.append(engagement_rate)
183
+
184
+ return recommendation[engages.index(max(engages))]
185
+
186
+
187
+ distinct_categories = video_df['catergory'].unique()
188
+
189
+ iface = gr.Interface(
190
+ fn=generate_title,
191
+ inputs=[
192
+ gr.Textbox(label="Enter a keyword"),
193
+ gr.Dropdown(label="Select a category", choices=list(distinct_categories))
194
+ ],
195
+ outputs=gr.Textbox(label="Recommendations"),
196
+ title="Title Recommendation",
197
+ description="Do'akan saya langgeng sm Ei"
198
+ )
199
+
200
+ iface.launch()