







import numpy as np
import pandas as pd
ratings = pd.read_json('ratings.json')
ratings

# 计算用户与用户的相似度矩阵
simmat = ratings.corr()
simmat

# 基于simmat相似度矩阵 使用UBCF算法实现简单推荐引擎¶
# 获取登录用户
login_user = 'Michael Henry'
# 寻找登录用户的相似用户,去掉自己,去掉反相关的用户
sim_users = simmat[login_user]
sim_users = sim_users.drop(login_user)
sim_users = sim_users[sim_users>0]
# 遍历所有相似用户,看一下相似用户都看过什么电影
# 找个合适的数据结构,存储推荐列表,还有所有人对每一部推荐电影的打分
# {"战狼1": [[4,5,3,4,5], [0.8,0.7,0.8,0.5,0.2]], "哪吒": [[3,4,5], [0.1, 0.2, 0.4]] .... }
movie_list = {}
for sim_user, sim_score in sim_users.items():
movies = ratings[sim_user]
# 检索一遍,哪些是登录用户没看过的,把这些电影给存起来
for movie, score in movies.dropna().items():
if np.isnan(ratings[login_user][movie]):
# 没看过这个电影 把这电影给存起来
if movie not in movie_list.keys():
movie_list[movie] = [[], []]
movie_list[movie][0].append(score)
movie_list[movie][1].append(sim_score)
print(movie_list)
# 对电影列表排序
ml = sorted(movie_list.items(), key=lambda x:np.average(x[1][0], weights=x[1][1]), reverse=True)
print(np.array(ml)[:,0])
"""
{'Inception': [[2.5, 3.0, 3, 3.0], [0.9912407071619304, 0.3812464258315117, 0.924473451641905, 0.6628489803598702]],
'Anger Management': [[3.0, 1.5, 3.0, 2], [0.9912407071619304, 0.3812464258315117, 0.8934051474415644, 0.924473451641905]],
'Jerry Maguire': [[3.0, 3.0, 4.5, 3, 3.0], [0.9912407071619304, 0.3812464258315117, 0.8934051474415644, 0.924473451641905, 0.6628489803598702]]}
['Jerry Maguire' 'Inception' 'Anger Management']
"""