Spaces:
Sleeping
Sleeping
amirhosseinkarami
commited on
Commit
·
2518257
1
Parent(s):
abed76f
Create Recommender.py
Browse files- Recommender.py +86 -0
Recommender.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import pandas as pd
|
3 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
4 |
+
|
5 |
+
class Recommender :
|
6 |
+
def __init__(self, title_vec_col, content_vec_col, id_col):
|
7 |
+
self.title_vec_col = title_vec_col
|
8 |
+
self.content_vec_col = content_vec_col
|
9 |
+
self.id_col = id_col
|
10 |
+
|
11 |
+
def calculate_recom_scores (self, k, similarities) :
|
12 |
+
scores = list(enumerate(similarities[0]))
|
13 |
+
scores = sorted(scores, key=lambda x: x[1], reverse=True)
|
14 |
+
scores = scores[1: k + 1]
|
15 |
+
return scores
|
16 |
+
|
17 |
+
def str2arr (self, arr) :
|
18 |
+
output = list()
|
19 |
+
for string in arr :
|
20 |
+
data_list = json.loads(string)
|
21 |
+
|
22 |
+
# Convert the list to a NumPy array
|
23 |
+
data_array = np.array(data_list)
|
24 |
+
output.append(data_array)
|
25 |
+
|
26 |
+
return np.array(output)
|
27 |
+
|
28 |
+
# def recommend_k (self, table, k, id) :
|
29 |
+
|
30 |
+
# data = np.array(list(zip(*table)))
|
31 |
+
# # print(data.shape, data)
|
32 |
+
# idx = int(data[0][data[self.id_col]==id].item())
|
33 |
+
|
34 |
+
# titles = self.str2arr(data[self.title_vec_col, :])
|
35 |
+
# contents = self.str2arr(data[self.content_vec_col, :])
|
36 |
+
# print(titles.shape)
|
37 |
+
# print(titles[int(idx)].shape)
|
38 |
+
|
39 |
+
# titles_sim = cosine_similarity(titles[int(idx)].reshape(1, -1), titles)
|
40 |
+
# print(titles_sim.shape)
|
41 |
+
# contents_sim = cosine_similarity(contents[int(idx)].reshape(1, -1), contents)
|
42 |
+
|
43 |
+
# titles_scores = self.calculate_recom_scores(k, titles_sim)
|
44 |
+
# contents_scores = self.calculate_recom_scores(k, contents_sim)
|
45 |
+
|
46 |
+
# print(titles_scores)
|
47 |
+
# union_scores = np.union1d(titles_scores, contents_scores)
|
48 |
+
|
49 |
+
# print(type(union_scores))
|
50 |
+
# # union_scores = sorted(union_scores.tolist(), key=lambda x: x[1], reverse=True)
|
51 |
+
|
52 |
+
# union_scores = sorted(union_scores.tolist(), key=lambda x: x[1] if isinstance(x, tuple) else x, reverse=True)
|
53 |
+
|
54 |
+
# indices = [i[0] for i in union_scores]
|
55 |
+
# result = data[:, np.isin(data[0,:], indices)]
|
56 |
+
|
57 |
+
# return result[self.id_col, :].tolist()
|
58 |
+
|
59 |
+
def recommend_k(self, table, k, id):
|
60 |
+
|
61 |
+
data = np.array(list(zip(*table)))
|
62 |
+
idx = int(data[0][data[self.id_col] == id].item())
|
63 |
+
|
64 |
+
titles = self.str2arr(data[self.title_vec_col, :])
|
65 |
+
contents = self.str2arr(data[self.content_vec_col, :])
|
66 |
+
|
67 |
+
titles_sim = cosine_similarity(titles[int(idx)].reshape(1, -1), titles)
|
68 |
+
contents_sim = cosine_similarity(contents[int(idx)].reshape(1, -1), contents)
|
69 |
+
|
70 |
+
titles_scores = self.calculate_recom_scores(k, titles_sim)
|
71 |
+
contents_scores = self.calculate_recom_scores(k, contents_sim)
|
72 |
+
|
73 |
+
# union_scores = np.union1d(titles_scores, contents_scores)
|
74 |
+
union_scores = list(set(titles_scores).union(set(contents_scores)))
|
75 |
+
union_scores = sorted(union_scores, key=lambda x: x[1] if isinstance(x, tuple) else x, reverse=True)
|
76 |
+
# indices = [i[0] if isinstance(i, tuple) else i for i in union_scores]
|
77 |
+
# result = data[:, np.isin(data[0, :], indices)]
|
78 |
+
|
79 |
+
unique_dict = {}
|
80 |
+
for t in union_scores:
|
81 |
+
if t[0] not in unique_dict or t[1] > unique_dict[t[0]][1]:
|
82 |
+
unique_dict[t[0]] = t
|
83 |
+
union_scores = list(unique_dict.values())
|
84 |
+
indices = [i[0] for i in union_scores]
|
85 |
+
|
86 |
+
return indices, union_scores, titles_scores
|