amirhosseinkarami commited on
Commit
2518257
·
1 Parent(s): abed76f

Create Recommender.py

Browse files
Files changed (1) hide show
  1. Recommender.py +86 -0
Recommender.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import pandas as pd
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+
5
+ class Recommender :
6
+ def __init__(self, title_vec_col, content_vec_col, id_col):
7
+ self.title_vec_col = title_vec_col
8
+ self.content_vec_col = content_vec_col
9
+ self.id_col = id_col
10
+
11
+ def calculate_recom_scores (self, k, similarities) :
12
+ scores = list(enumerate(similarities[0]))
13
+ scores = sorted(scores, key=lambda x: x[1], reverse=True)
14
+ scores = scores[1: k + 1]
15
+ return scores
16
+
17
+ def str2arr (self, arr) :
18
+ output = list()
19
+ for string in arr :
20
+ data_list = json.loads(string)
21
+
22
+ # Convert the list to a NumPy array
23
+ data_array = np.array(data_list)
24
+ output.append(data_array)
25
+
26
+ return np.array(output)
27
+
28
+ # def recommend_k (self, table, k, id) :
29
+
30
+ # data = np.array(list(zip(*table)))
31
+ # # print(data.shape, data)
32
+ # idx = int(data[0][data[self.id_col]==id].item())
33
+
34
+ # titles = self.str2arr(data[self.title_vec_col, :])
35
+ # contents = self.str2arr(data[self.content_vec_col, :])
36
+ # print(titles.shape)
37
+ # print(titles[int(idx)].shape)
38
+
39
+ # titles_sim = cosine_similarity(titles[int(idx)].reshape(1, -1), titles)
40
+ # print(titles_sim.shape)
41
+ # contents_sim = cosine_similarity(contents[int(idx)].reshape(1, -1), contents)
42
+
43
+ # titles_scores = self.calculate_recom_scores(k, titles_sim)
44
+ # contents_scores = self.calculate_recom_scores(k, contents_sim)
45
+
46
+ # print(titles_scores)
47
+ # union_scores = np.union1d(titles_scores, contents_scores)
48
+
49
+ # print(type(union_scores))
50
+ # # union_scores = sorted(union_scores.tolist(), key=lambda x: x[1], reverse=True)
51
+
52
+ # union_scores = sorted(union_scores.tolist(), key=lambda x: x[1] if isinstance(x, tuple) else x, reverse=True)
53
+
54
+ # indices = [i[0] for i in union_scores]
55
+ # result = data[:, np.isin(data[0,:], indices)]
56
+
57
+ # return result[self.id_col, :].tolist()
58
+
59
+ def recommend_k(self, table, k, id):
60
+
61
+ data = np.array(list(zip(*table)))
62
+ idx = int(data[0][data[self.id_col] == id].item())
63
+
64
+ titles = self.str2arr(data[self.title_vec_col, :])
65
+ contents = self.str2arr(data[self.content_vec_col, :])
66
+
67
+ titles_sim = cosine_similarity(titles[int(idx)].reshape(1, -1), titles)
68
+ contents_sim = cosine_similarity(contents[int(idx)].reshape(1, -1), contents)
69
+
70
+ titles_scores = self.calculate_recom_scores(k, titles_sim)
71
+ contents_scores = self.calculate_recom_scores(k, contents_sim)
72
+
73
+ # union_scores = np.union1d(titles_scores, contents_scores)
74
+ union_scores = list(set(titles_scores).union(set(contents_scores)))
75
+ union_scores = sorted(union_scores, key=lambda x: x[1] if isinstance(x, tuple) else x, reverse=True)
76
+ # indices = [i[0] if isinstance(i, tuple) else i for i in union_scores]
77
+ # result = data[:, np.isin(data[0, :], indices)]
78
+
79
+ unique_dict = {}
80
+ for t in union_scores:
81
+ if t[0] not in unique_dict or t[1] > unique_dict[t[0]][1]:
82
+ unique_dict[t[0]] = t
83
+ union_scores = list(unique_dict.values())
84
+ indices = [i[0] for i in union_scores]
85
+
86
+ return indices, union_scores, titles_scores