-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathuserBased_filtering_recommend.py
More file actions
73 lines (56 loc) · 3.14 KB
/
userBased_filtering_recommend.py
File metadata and controls
73 lines (56 loc) · 3.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import sklearn.utils
from helper_functions import movieId_to_title
def user_based_filtering_recommend(new_user,user_movies_ids,movies_num,n_neighbor,movies_ratings):
""" This function return number of recommended movies based on user based filtering using
cosine similarity to find the most similar users to the new user
it returns movies_num of movies from the top ranked movies of n_neighbour users
who are the most similar to the new user"""
#pivot the dataframe
users_inDB = movies_ratings.pivot_table(index='userId', columns='movieId', values='rating')
list_id_movies = movies_ratings['movieId'].unique()
new_user_vector = pd.DataFrame(new_user, index=list_id_movies).T
#fill Nans with 3 rating
users_inDB = users_inDB.fillna(3.0)
new_user_vector_filled = new_user_vector.fillna(3.0)
#for cosine similarity we have to center the data in order to have a magnitude(0-1)
users_inDB = (users_inDB - 3.0)/2.0
new_user = (new_user_vector_filled - 3.0)/2.0
#label the new user that we want to recommend for:
new_user.index=['new_user']
#add the new use to the original df
users_matrix = pd.concat([users_inDB,new_user])
#calculate cosine similarity
users_similarity_matrix = cosine_similarity(users_matrix)
users_similarity_matrix = pd.DataFrame(users_similarity_matrix,index=users_matrix.index,columns=users_matrix.index)
#we get here (users_num*users_num) similarity matrix
#print(users_matrix_similarity)
# get the new user similarities row: except the last column value(similarity with himself=1)
new_user_similarity = users_similarity_matrix['new_user'].iloc[:-1]
# take the n_neighbors nearest users (N users who have the most similarity with the new user)
similar_users = new_user_similarity.nlargest(n_neighbor).index.values
#print(similar_users)
#we will get (movies_num*n_neighbor*2) movies to choose
recommended_movieIds = []
scores = []
for user in similar_users:
recommended_movieIds.extend(users_inDB.loc[user].nlargest(movies_num*2).index)
scores.extend(users_inDB.loc[user].nlargest(movies_num*2).values)
recommended_movies_dic = {'movie_id':recommended_movieIds,'score':scores}
recommended_movies_df = pd.DataFrame(recommended_movies_dic)
#print(recommended_movies_df)
#Shuffle the movies
recommended_movies_df = sklearn.utils.shuffle(recommended_movies_df)
#Order movies by score
recommended_movies_df = recommended_movies_df.sort_values(by='score',ascending=False)
recommended_movies_ids = recommended_movies_df['movie_id'].unique()
#get the final recommendation: retrn movies_num of movies which the user hasn't rated
top_recommended_movies = []
for movie_id in recommended_movies_ids:
if (movie_id not in user_movies_ids) and (len(top_recommended_movies) < movies_num) :
top_recommended_movies.append(movie_id)
#finally return the movies titles
top_recommended_movies = movieId_to_title(top_recommended_movies,movies_ratings)
return top_recommended_movies