AmirShabani's picture
Sooti
26cc869
import pickle
import numpy as np
import pandas as pd
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import BallTree
addr = './'
user_recomms_file = open(addr + 'user_recomms.pkl', "rb")
user_recomms = pickle.load(user_recomms_file)
user_recomms_file.close()
final_movies_file = open(addr + 'final_movies.pkl', "rb")
final_movies = pickle.load(final_movies_file)
final_movies_file.close()
movie_embeds_file = open(addr + 'movie_embeds.pkl', "rb")
movie_embeds = pickle.load(movie_embeds_file)
movie_embeds_file.close()
btree_file = open(addr + 'btree.pkl', "rb")
btree = pickle.load(btree_file)
btree_file.close()
user_embeds_file = open(addr + 'user_embeds.pkl', "rb")
user_embeds = pickle.load(user_embeds_file)
user_embeds_file.close()
user_mapping_file = open(addr + 'user_mapping.pkl', "rb")
user_mapping = pickle.load(user_mapping_file)
user_mapping_file.close()
movie_mapping_file = open(addr + 'movie_mapping.pkl', "rb")
movie_mapping = pickle.load(movie_mapping_file)
movie_mapping_file.close()
user_pos_items_file = open(addr + 'user_pos_items.pkl', "rb")
user_pos_items = pickle.load(user_pos_items_file)
user_pos_items_file.close()
def create_user_embedding(movie_ratings, movies_df):
# Convert the movie_ratings dictionary to a dataframe
user_ratings_df = pd.DataFrame.from_dict(movie_ratings, orient='index', columns=['rating'])
user_ratings_df['movieId'] = user_ratings_df.index
# Merge the user_ratings_df with the movies_df to get the movie embeddings
user_movie_embeddings = user_ratings_df.merge(movies_df, on='movieId', how='left')
print(user_ratings_df)
print(user_movie_embeddings)
# Multiply the ratings with the movie embeddings
user_movie_embeddings = user_movie_embeddings.iloc[:, 2:].values * user_movie_embeddings['rating'].values[:, np.newaxis]
# Calculate the user embedding as the sum of the movie embeddings
user_embedding = np.sum(user_movie_embeddings, axis=0)
np.nan_to_num(user_embedding, 0)
print(user_movie_embeddings.shape)
return user_embedding
def find_closest_user(user_embedding, tree, user_embeddings):
# Query the BallTree to find the closest user to the given user_embedding
_, closest_user_index = tree.query([user_embedding], k=1)
# Get the embedding of the closest user
closest_user_embedding = user_embeddings.iloc[closest_user_index[0][0]]
return closest_user_embedding
def output_list(movie_ratings, movies_df = movie_embeds, tree = btree, user_embeddings = user_embeds, movies = final_movies):
user_embed = create_user_embedding(movie_ratings, movie_embeds)
# Call the find_closest_user function with the pre-built BallTree
closest_user_embed = find_closest_user(user_embed, tree, user_embeds)
recomms = user_recomms[int(closest_user_embed['userId'])]
out = [movies['title'].iloc[movie_id] for movie_id in recomms]
return out
# output_list({1:1,2:2,3:3,4:4,5:5})