|
import pickle |
|
import numpy as np |
|
import pandas as pd |
|
import random |
|
from tqdm import tqdm |
|
import matplotlib.pyplot as plt |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.neighbors import BallTree |
|
|
|
addr = './' |
|
|
|
user_recomms_file = open(addr + 'user_recomms.pkl', "rb") |
|
user_recomms = pickle.load(user_recomms_file) |
|
user_recomms_file.close() |
|
|
|
final_movies_file = open(addr + 'final_movies.pkl', "rb") |
|
final_movies = pickle.load(final_movies_file) |
|
final_movies_file.close() |
|
|
|
movie_embeds_file = open(addr + 'movie_embeds.pkl', "rb") |
|
movie_embeds = pickle.load(movie_embeds_file) |
|
movie_embeds_file.close() |
|
|
|
btree_file = open(addr + 'btree.pkl', "rb") |
|
btree = pickle.load(btree_file) |
|
btree_file.close() |
|
|
|
user_embeds_file = open(addr + 'user_embeds.pkl', "rb") |
|
user_embeds = pickle.load(user_embeds_file) |
|
user_embeds_file.close() |
|
|
|
user_mapping_file = open(addr + 'user_mapping.pkl', "rb") |
|
user_mapping = pickle.load(user_mapping_file) |
|
user_mapping_file.close() |
|
|
|
movie_mapping_file = open(addr + 'movie_mapping.pkl', "rb") |
|
movie_mapping = pickle.load(movie_mapping_file) |
|
movie_mapping_file.close() |
|
|
|
user_pos_items_file = open(addr + 'user_pos_items.pkl', "rb") |
|
user_pos_items = pickle.load(user_pos_items_file) |
|
user_pos_items_file.close() |
|
|
|
def create_user_embedding(movie_ratings, movies_df): |
|
|
|
user_ratings_df = pd.DataFrame.from_dict(movie_ratings, orient='index', columns=['rating']) |
|
user_ratings_df['movieId'] = user_ratings_df.index |
|
|
|
user_movie_embeddings = user_ratings_df.merge(movies_df, on='movieId', how='left') |
|
|
|
print(user_ratings_df) |
|
print(user_movie_embeddings) |
|
|
|
|
|
user_movie_embeddings = user_movie_embeddings.iloc[:, 2:].values * user_movie_embeddings['rating'].values[:, np.newaxis] |
|
|
|
|
|
user_embedding = np.sum(user_movie_embeddings, axis=0) |
|
np.nan_to_num(user_embedding, 0) |
|
print(user_movie_embeddings.shape) |
|
return user_embedding |
|
|
|
def find_closest_user(user_embedding, tree, user_embeddings): |
|
|
|
_, closest_user_index = tree.query([user_embedding], k=1) |
|
|
|
|
|
closest_user_embedding = user_embeddings.iloc[closest_user_index[0][0]] |
|
|
|
return closest_user_embedding |
|
|
|
|
|
def output_list(movie_ratings, movies_df = movie_embeds, tree = btree, user_embeddings = user_embeds, movies = final_movies): |
|
user_embed = create_user_embedding(movie_ratings, movie_embeds) |
|
|
|
closest_user_embed = find_closest_user(user_embed, tree, user_embeds) |
|
recomms = user_recomms[int(closest_user_embed['userId'])] |
|
out = [movies['title'].iloc[movie_id] for movie_id in recomms] |
|
return out |
|
|
|
|
|
|
|
|