File size: 3,037 Bytes
d19f826
 
 
 
 
 
 
 
 
 
 
61faab2
 
 
 
d19f826
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415bdb0
db0d1d3
 
 
d19f826
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415bdb0
61faab2
d19f826
 
 
61faab2
 
 
d19f826
5261ce7
d19f826
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import pickle
import numpy as np 
import pandas as pd
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import BallTree

addr = './'

user_recomms_file = open(addr + 'user_recomms.pkl', "rb")
user_recomms = pickle.load(user_recomms_file)
user_recomms_file.close()

final_movies_file = open(addr + 'final_movies.pkl', "rb")
final_movies = pickle.load(final_movies_file)
final_movies_file.close()

movie_embeds_file = open(addr + 'movie_embeds.pkl', "rb")
movie_embeds = pickle.load(movie_embeds_file)
movie_embeds_file.close()

btree_file = open(addr + 'btree.pkl', "rb")
btree = pickle.load(btree_file)
btree_file.close()

user_embeds_file = open(addr + 'user_embeds.pkl', "rb")
user_embeds = pickle.load(user_embeds_file)
user_embeds_file.close()

user_mapping_file = open(addr + 'user_mapping.pkl', "rb")
user_mapping = pickle.load(user_mapping_file)
user_mapping_file.close()

movie_mapping_file = open(addr + 'movie_mapping.pkl', "rb")
movie_mapping = pickle.load(movie_mapping_file)
movie_mapping_file.close()

user_pos_items_file = open(addr + 'user_pos_items.pkl', "rb")
user_pos_items = pickle.load(user_pos_items_file)
user_pos_items_file.close()

def create_user_embedding(movie_ratings, movies_df):
    # Convert the movie_ratings dictionary to a dataframe
    user_ratings_df = pd.DataFrame.from_dict(movie_ratings, orient='index', columns=['rating'])
    user_ratings_df['movieId'] = user_ratings_df.index
    # Merge the user_ratings_df with the movies_df to get the movie embeddings
    user_movie_embeddings = user_ratings_df.merge(movies_df, on='movieId', how='left')
    
    print(user_ratings_df)
    print(user_movie_embeddings)

    # Multiply the ratings with the movie embeddings
    user_movie_embeddings = user_movie_embeddings.iloc[:, 2:].values * user_movie_embeddings['rating'].values[:, np.newaxis]

    # Calculate the user embedding as the sum of the movie embeddings
    user_embedding = np.sum(user_movie_embeddings, axis=0)
    np.nan_to_num(user_embedding, 0)
    print(user_movie_embeddings.shape)
    return user_embedding

def find_closest_user(user_embedding, tree, user_embeddings):
    # Query the BallTree to find the closest user to the given user_embedding
    _, closest_user_index = tree.query([user_embedding], k=1)

    # Get the embedding of the closest user
    closest_user_embedding = user_embeddings.iloc[closest_user_index[0][0]]

    return closest_user_embedding


def output_list(movie_ratings, movies_df = movie_embeds, tree = btree, user_embeddings = user_embeds, movies = final_movies):
    user_embed = create_user_embedding(movie_ratings, movie_embeds)
    # Call the find_closest_user function with the pre-built BallTree
    closest_user_embed = find_closest_user(user_embed, tree, user_embeds)
    recomms = user_recomms[int(closest_user_embed['userId'])]
    out = [movies['title'].iloc[movie_id] for movie_id in recomms]
    return out

# output_list({1:1,2:2,3:3,4:4,5:5})