kittendev commited on
Commit
a61302b
·
verified ·
1 Parent(s): 63bdb7d

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +200 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import os
3
+ import pickle
4
+ import random
5
+ from datetime import datetime, timedelta
6
+
7
+ import gradio as gr
8
+ import pandas as pd
9
+ from langchain_google_genai import ChatGoogleGenerativeAI
10
+ from langchain_core.messages import HumanMessage, SystemMessage
11
+ from pytrends.request import TrendReq
12
+
13
+ from mlxtend.preprocessing import TransactionEncoder
14
+
15
+
16
+ def convert_keywords_to_list(keywords_str):
17
+ try:
18
+ return ast.literal_eval(keywords_str)
19
+ except (SyntaxError, ValueError):
20
+ return []
21
+
22
+
23
+ def convert_scores_to_list(scores_float):
24
+ try:
25
+ return ast.literal_eval(scores_float)
26
+ except (SyntaxError, ValueError):
27
+ return []
28
+
29
+ video_df = pd.read_csv('video_df_complete.csv')
30
+ video_df['keywords'] = video_df['keywords'].apply(convert_keywords_to_list)
31
+ video_df['trend_scores'] = video_df['trend_scores'].apply(convert_scores_to_list)
32
+
33
+ video_df['total_score'] = video_df['trend_scores'].apply(lambda x: sum(x) / len(x) if len(x) > 0 else 0)
34
+
35
+ transactions = []
36
+ for index, row in video_df.iterrows():
37
+ transactions.append(row['keywords'])
38
+
39
+ te = TransactionEncoder()
40
+ te_ary = te.fit(transactions).transform(transactions)
41
+ df = pd.DataFrame(te_ary, columns=te.columns_)
42
+
43
+ merged_df = pd.concat([df, video_df['total_score'], video_df['engagement_rate']], axis=1)
44
+
45
+ rules = pd.read_csv('association_rules.csv')
46
+ rules['antecedents'] = rules['antecedents'].apply(lambda x: list(eval(x)))
47
+ rules['consequents'] = rules['consequents'].apply(lambda x: list(eval(x)))
48
+
49
+ model_filename = os.path.join('regression_model_final.pkl')
50
+
51
+ with open(model_filename, 'rb') as file:
52
+ model = pickle.load(file)
53
+
54
+ llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", convert_system_message_to_human=True)
55
+
56
+ def custom_predict(keywords, total_score):
57
+ """
58
+ Custom prediction function using the trained linear regression model.
59
+
60
+ Args:
61
+ keywords: A list of keywords.
62
+ total_score: The total trend score.
63
+
64
+ Returns:
65
+ The predicted engagement rate.
66
+ """
67
+ new_data = pd.DataFrame([{col: 0 for col in merged_df.columns}])
68
+
69
+ for keyword in keywords:
70
+ if keyword in new_data.columns:
71
+ new_data.at[0, keyword] = 1
72
+
73
+ new_data.at[0, 'total_score'] = total_score
74
+
75
+ new_data = new_data.drop('engagement_rate', axis=1)
76
+
77
+ prediction = model.predict(new_data)
78
+
79
+ return prediction[0][0]
80
+
81
+
82
+ def generate_keyword_scores(keywords):
83
+ scaled_rate = min(100, 4.5 * 10)
84
+
85
+ return [
86
+ round(random.uniform(scaled_rate * 0.7, min(100, scaled_rate * 1.2)), 2)
87
+ for _ in keywords
88
+ ]
89
+
90
+
91
+ def get_google_trends_score(keywords, end_date, days_back=7):
92
+ """
93
+ Mengambil skor tren Google untuk kata kunci tertentu selama periode waktu tertentu.
94
+
95
+ Parameters:
96
+ keywords (list): Daftar kata kunci yang ingin dianalisis.
97
+ end_date (datetime): Tanggal akhir untuk data tren.
98
+ days_back (int): Jumlah hari ke belakang dari end_date untuk menentukan rentang waktu (default: 7 hari).
99
+
100
+ Returns:
101
+ pd.DataFrame: DataFrame berisi data tren per kata kunci selama periode waktu yang ditentukan.
102
+ """
103
+ try:
104
+ if not keywords:
105
+ raise ValueError("Daftar kata kunci tidak boleh kosong.")
106
+
107
+ pytrends = TrendReq()
108
+ start_date = end_date - timedelta(days=days_back)
109
+ timeframe = f"{start_date.strftime('%Y-%m-%d')} {end_date.strftime('%Y-%m-%d')}"
110
+
111
+ pytrends.build_payload(keywords, timeframe=timeframe, geo='ID', gprop='youtube')
112
+ trends_df = pytrends.interest_over_time()
113
+
114
+ if 'isPartial' in trends_df.columns:
115
+ trends_df = trends_df.drop(columns=['isPartial'])
116
+
117
+ return trends_df
118
+ except Exception as e:
119
+ return pd.DataFrame(generate_keyword_scores(keywords))
120
+
121
+
122
+ def generate_title(keyword, category):
123
+ if category != 'Gaming':
124
+ return "Category belum supported."
125
+
126
+ recommendation = recommend_keyword(keyword)
127
+
128
+ if not recommendation:
129
+ return "No recommendations found."
130
+ else:
131
+ result = llm(
132
+ [
133
+ SystemMessage(
134
+ content=f"Kamu adalah seorang penulis judul video youtube"
135
+ f"Kamu akan diberikan beberapa buah keyword yang wajib digunakan untuk judul"
136
+ f"Buat judul yang semenarik mungkin untuk memberikan viewer rasa suka"
137
+ f"Cukup keluarkan satu judul saja dalam satu kalimat"
138
+ f"Jangan gunnakan formatting seperti '\n' atau hal lainnya. Gunakan saja raw string"
139
+ f"Boleh pake emoji"
140
+ ),
141
+ HumanMessage(
142
+ content=f"keyword yang digunakan adalah sebagai berikut: {recommendation}"
143
+ f"Total jumlah keyword adalah: {len(recommendation)}"
144
+ f"Video memiliki kategori: {category}"
145
+ )
146
+ ]
147
+ )
148
+
149
+ return result.content
150
+
151
+
152
+ def recommend_keyword(keyword):
153
+ keyword_rules = rules[
154
+ rules['antecedents'].astype(str).str.contains(keyword) | rules['consequents'].astype(str).str.contains(keyword)]
155
+
156
+ top_5_rules = keyword_rules.sort_values(by='lift', ascending=False).head(5)
157
+
158
+ recommendation = []
159
+ engages = []
160
+
161
+ for idx, row in top_5_rules.iterrows():
162
+ antecedents = list(row['antecedents'])[0]
163
+ consequents = list(row['consequents'])
164
+
165
+ recommendation.append([keyword] + consequents)
166
+
167
+ if not recommendation:
168
+ return []
169
+
170
+ for rec in recommendation:
171
+ trends_df = get_google_trends_score(rec, datetime.now())
172
+
173
+ batch_scores = [
174
+ round(trends_df[keyword].mean(), 2) if keyword in trends_df.columns else 0
175
+ for keyword in keywords
176
+ ]
177
+
178
+ batch_scores = sum(batch_scores) / len(batch_scores)
179
+
180
+ engagement_rate = custom_predict(rec, batch_scores)
181
+
182
+ engages.append(engagement_rate)
183
+
184
+ return recommendation[engages.index(max(engages))]
185
+
186
+
187
+ distinct_categories = video_df['catergory'].unique()
188
+
189
+ iface = gr.Interface(
190
+ fn=generate_title,
191
+ inputs=[
192
+ gr.Textbox(label="Enter a keyword"),
193
+ gr.Dropdown(label="Select a category", choices=list(distinct_categories))
194
+ ],
195
+ outputs=gr.Textbox(label="Recommendations"),
196
+ title="Title Recommendation",
197
+ description="Do'akan saya langgeng sm Ei"
198
+ )
199
+
200
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ pytrends
2
+ pandas
3
+ matplotlib
4
+ scikit-learn
5
+ mlxtend
6
+ gradio
7
+ langchain-google-genai