• 基于springboot+vue+爬虫实现电影推荐系统


    作者主页:编程指南针

    作者简介:Java领域优质创作者、CSDN博客专家 、掘金特邀作者、多年架构师设计经验、腾讯课堂常驻讲师

    主要内容:Java项目、毕业设计、简历模板、学习资料、面试题库、技术互助

    文末获取源码 

    项目编号:BS-XX-136

    一,项目简介

       这是一个前后端分离的电影管理和推荐系统,采用Vue.js + Spring Boot技术栈开发,电影数据来源于豆瓣,采用Python爬虫进行爬取相关电影的数据,将数据插入MYSQL数据库,然后在前端进行数据展示。后台主要进行电影相关基本数据的管理功能。给用户推荐的电影数据写入到REDIS数据库中进行存储。推荐算法采用协同过滤算法,采用于ItemCF和UserCF相结合的方式来进行推荐。

    二,环境介绍

    语言环境:Java:  jdk1.8

    数据库:Mysql: mysql5.7+Redis

    应用服务器:Tomcat:  tomcat8.5.31

    开发工具:IDEA或eclipse

    前端技术栈:
    
    - Vue.js
    
    - ElementUI
    
    - axios
    后端技术栈:
    
    - Spring Boot
    
    - MyBatis
    
    - Apache Shiro
    
    - Spring Data Redis

    三,系统展示

    前端分类列表

    详情展示

    全文检索

    电影推荐

    后台管理

    电影管理

    注册

    登陆

    四,核心代码展示

    1. package com.fivesix.fivesixserver.controller;
    2. import com.fivesix.fivesixserver.entity.Menu;
    3. import com.fivesix.fivesixserver.service.MenuService;
    4. import org.springframework.beans.factory.annotation.Autowired;
    5. import org.springframework.web.bind.annotation.*;
    6. import java.util.List;
    7. @RestController
    8. public class MenuController {
    9. @Autowired
    10. MenuService menuService;
    11. @GetMapping("/api/menu")
    12. public List getMenusOfUser() {
    13. return menuService.listAllByCurrUser();
    14. }
    15. @GetMapping("/api/admin/role/menu")
    16. public List getMenusOfRole() {
    17. return menuService.listAllByRole(1);
    18. }
    19. }

    1. package com.fivesix.fivesixserver.controller;
    2. import com.fivesix.fivesixserver.entity.Movie;
    3. import com.fivesix.fivesixserver.service.MovieService;
    4. import org.springframework.beans.factory.annotation.Autowired;
    5. import org.springframework.web.bind.annotation.*;
    6. import org.springframework.web.multipart.MultipartFile;
    7. import java.io.File;
    8. import java.io.IOException;
    9. import java.util.Comparator;
    10. import java.util.List;
    11. import java.util.UUID;
    12. import java.util.stream.Collectors;
    13. @RestController
    14. public class MovieController {
    15. @Autowired
    16. MovieService movieService;
    17. /*
    18. 前台请求接口
    19. */
    20. @GetMapping("/api/movies")
    21. public List list() throws Exception {
    22. System.out.println("load all movies sorted by rate successfully.");
    23. return movieService.list().stream().sorted(Comparator.comparingDouble(Movie::getRate).reversed()).collect(Collectors.toList());
    24. }
    25. @GetMapping("/api/movies/page/{pageIndex}")
    26. public List listByPageIndex(@PathVariable("pageIndex") int pageIndex) {
    27. System.out.printf("请求起始为%d:的21部电影\n",pageIndex);
    28. return movieService.listByPageIndex(pageIndex).stream().sorted(Comparator.comparingDouble(Movie::getRate).reversed()).collect(Collectors.toList());
    29. }
    30. @GetMapping("/api/movies/category/{cid}/{dateOrRate}")
    31. public Object listByCategory(@PathVariable("cid") int cid, @PathVariable("dateOrRate") int dateOrRate) throws Exception {
    32. List res;
    33. if (cid == 0) {
    34. res = movieService.list();
    35. }else{
    36. res = movieService.listByCategory(cid);
    37. }
    38. if (dateOrRate == 1) return res.stream().sorted(Comparator.comparingDouble(Movie::getRate).reversed()).collect(Collectors.toList());
    39. else return res.stream().sorted(Comparator.comparing(Movie::getDate).reversed()).collect(Collectors.toList());
    40. }
    41. @GetMapping("/api/search")
    42. public List listByKeywords(@RequestParam("keywords") String keywords) {
    43. if (!keywords.equals("")){
    44. System.out.println("search result returned.");
    45. return movieService.listByKeywords(keywords).stream().sorted(Comparator.comparing(Movie::getDate).reversed()).collect(Collectors.toList());
    46. }else{
    47. return null;
    48. }
    49. }
    50. /*
    51. 以下为后台请求接口
    52. */
    53. @PostMapping("/api/admin/content/movie/update")
    54. public Movie save(@RequestBody Movie movie, @RequestParam(value = "changeCategories") String categoriesIsChanged) throws Exception {
    55. Movie movie1 = movieService.getByMovieName(movie.getTitle());
    56. if (movie1 != null) {
    57. if (categoriesIsChanged.equals("true")) {
    58. movieService.updateMovieAndCategories(movie);
    59. System.out.println("update movie and categories.");
    60. } else {
    61. movieService.update(movie);
    62. System.out.println("update movie.");
    63. }
    64. } else {
    65. movieService.save(movie);
    66. System.out.println("add new movie.");
    67. }
    68. return movie;
    69. }
    70. @PostMapping("/api/admin/content/movie/delete")
    71. public void delete(@RequestBody Movie movie) throws Exception{
    72. movieService.deleteById(movie.getId());
    73. System.out.println("delete movie by id successfully.");
    74. }
    75. /*
    76. 电影封面上传
    77. */
    78. @PostMapping("/api/admin/content/movie/cover")
    79. public String coversUpload(MultipartFile file) throws Exception {
    80. String folder = "D:/workspace/fivesix/img/full";
    81. File imageFolder = new File(folder);
    82. //对文件重命名,保留文件的格式png/jpg
    83. String newName = UUID.randomUUID().toString();
    84. File f = new File(imageFolder, newName + file.getOriginalFilename()
    85. .substring(file.getOriginalFilename().length() - 4));
    86. if (!f.getParentFile().exists())
    87. f.getParentFile().mkdirs();
    88. try {
    89. file.transferTo(f);
    90. String imgURL = "http://localhost:8443/api/file/" + f.getName();
    91. return imgURL;
    92. } catch (IOException e) {
    93. e.printStackTrace();
    94. return "";
    95. }
    96. }
    97. }

    1. package com.fivesix.fivesixserver.controller;
    2. import com.fivesix.fivesixserver.entity.User;
    3. import com.fivesix.fivesixserver.result.Result;
    4. import com.fivesix.fivesixserver.service.UserService;
    5. import org.apache.shiro.SecurityUtils;
    6. import org.apache.shiro.authc.AuthenticationException;
    7. import org.apache.shiro.authc.UsernamePasswordToken;
    8. import org.apache.shiro.crypto.SecureRandomNumberGenerator;
    9. import org.apache.shiro.crypto.hash.SimpleHash;
    10. import org.apache.shiro.subject.Subject;
    11. import org.springframework.web.bind.annotation.*;
    12. import org.springframework.web.util.HtmlUtils;
    13. import java.util.List;
    14. @RestController
    15. public class UserController {
    16. private final UserService userService;
    17. public UserController (UserService userService) {
    18. this.userService = userService;
    19. }
    20. @PostMapping(value = "/api/login")
    21. public Result login(@RequestBody User requestUser,@RequestParam(value = "rememberMe") boolean rememberMe) {
    22. String requestUserName = HtmlUtils.htmlEscape(requestUser.getUsername());
    23. Subject subject = SecurityUtils.getSubject();
    24. UsernamePasswordToken usernamePasswordToken = new UsernamePasswordToken(requestUserName,requestUser.getPassword());
    25. usernamePasswordToken.setRememberMe(rememberMe);
    26. try{
    27. subject.login(usernamePasswordToken);
    28. return new Result(200,"login successfully");
    29. } catch (AuthenticationException e) {
    30. e.printStackTrace();
    31. return new Result(400,"账号或密码错误");
    32. }
    33. }
    34. @PostMapping("/api/register")
    35. public Result register(@RequestBody User user) {
    36. String username = HtmlUtils.htmlEscape(user.getUsername());
    37. user.setUsername(username);
    38. //生成盐
    39. String salt = new SecureRandomNumberGenerator().nextBytes().toString();
    40. user.setSalt(salt);
    41. //设置迭代次数
    42. int times = 2;
    43. //生成加密的密码
    44. String encodedPassword = new SimpleHash("md5",user.getPassword(),salt,times).toString();
    45. user.setPassword(encodedPassword);
    46. try {
    47. userService.register(user);
    48. return new Result(200,"register successfully.");
    49. }catch (Exception e) {
    50. e.printStackTrace();
    51. return new Result(400,e.getMessage());
    52. }
    53. }
    54. @GetMapping("api/logout")
    55. public Result logout() {
    56. Subject subject = SecurityUtils.getSubject();
    57. subject.logout();
    58. return new Result(200,"登出成功");
    59. }
    60. @GetMapping("api/authentication")
    61. public Result authenticate() {
    62. return new Result(200,"认证成功");
    63. }
    64. @GetMapping("api/admin/user")
    65. public List getAllUsers() {
    66. return userService.listAll();
    67. }
    68. @PostMapping("api/admin/user/delete")
    69. public Result delete(@RequestBody User user) {
    70. try {
    71. userService.deleteUser(user);
    72. return new Result(200,"删除用户成功");
    73. }catch (Exception e) {
    74. return new Result(400,"删除用户失败");
    75. }
    76. }
    77. @PutMapping("api/admin/user/update")
    78. public Result update(@RequestBody User user) {
    79. try{
    80. userService.updateUser(user);
    81. return new Result(200,"更新用户成功");
    82. }catch (Exception e){
    83. return new Result(400,"更新用户失败");
    84. }
    85. }
    86. }

    使用Python来进行电影数据爬取的核心代码

    1. # -*- coding: utf-8 -*-
    2. import scrapy
    3. import json
    4. import re
    5. import time
    6. from douban.items import DoubanItem
    7. from fake_useragent import UserAgent
    8. import random
    9. class MovieHotSpider(scrapy.Spider):
    10. #爬虫的名称,在命令行可以方便的运行爬虫
    11. name = "movie_hot"
    12. allowed_domains = ["movie.douban.com"]
    13. #pro = ['139.224.37.83','115.223.7.110','221.122.91.75']
    14. # 拼接豆瓣电影URL
    15. BASE_URL = 'https://movie.douban.com/j/search_subjects?type=movie&tag=%s&sort=recommend&page_limit=%s&page_start=%s'
    16. MOVIE_TAG = '华语'
    17. PAGE_LIMIT = 20
    18. page_start = 0
    19. domains = BASE_URL % (MOVIE_TAG, PAGE_LIMIT, page_start)
    20. #伪装浏览器
    21. headers = {
    22. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36"
    23. #,"Cookie":'_vwo_uuid_v2=D65EBF690D9454DE4C13354E37DC5B9AA|3bb7e6e65f20e31141b871b4fea88dc2; __yadk_uid=QBp8bLKHjCn5zS2J5r8xV7327R0wnqkU; douban-fav-remind=1; gr_user_id=0a41d8d1-fe39-4619-827a-17961cf31795; viewed="35013197_10769749_23008813_26282806_34912177_22139960_35003794_30249691_26616244_27035127"; push_noty_num=0; push_doumail_num=0; __utmv=30149280.21320; bid=gplG4aEN4Xc; ll="108288"; ap_v=0,6.0; __utma=30149280.819011260.1572087992.1604448803.1604453561.105; __utmc=30149280; __utmz=30149280.1604453561.105.65.utmcsr=accounts.douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __gads=ID=eddb65558a1da756-223ab4f88bc400c8:T=1604453562:RT=1604453562:S=ALNI_MZGB_I69qmiL2tt3lm57JVX1i4r2w; __utmb=30149280.4.10.1604453561; dbcl2="213202515:Ip9mjwUAab4"; ck=wxUS; __utma=223695111.897479705.1572088003.1604448803.1604455298.71; __utmb=223695111.0.10.1604455298; __utmc=223695111; __utmz=223695111.1604455298.71.42.utmcsr=accounts.douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1604455298%2C%22https%3A%2F%2Faccounts.douban.com%2F%22%5D; _pk_ses.100001.4cf6=*; _pk_id.100001.4cf6=e11874c5506d4ab1.1572088003.71.1604455342.1604450364.'
    24. }
    25. #总共爬取的页数
    26. pages = 100
    27. # 爬虫从此开始
    28. def start_requests(self):
    29. print('~~~~爬取列表: '+ self.domains)
    30. yield scrapy.Request(
    31. url = self.domains,
    32. headers=self.headers,
    33. callback=self.request_movies
    34. )
    35. # 分析列表页
    36. def request_movies(self, response):
    37. infos = response.text
    38. # 使用JSON模块解析响应结果
    39. infos = json.loads(infos)
    40. # 迭代影片信息列表
    41. for movie_info in infos['subjects']:
    42. print('~~~爬取电影: ' + movie_info['title'] + '/'+ movie_info['rate'])
    43. # 提取影片页面url,构造Request发送请求,并将item通过meta参数传递给影片页面解析函数
    44. yield scrapy.Request(
    45. url = str(movie_info['url']),
    46. headers = self.headers,
    47. callback = self.request_movie,
    48. dont_filter=True
    49. )
    50. #如果已经爬完pages或者当前标签下没有更多电影时退出
    51. if self.pages > 0 and len(infos['subjects']) == self.PAGE_LIMIT:
    52. self.pages -= 1
    53. self.page_start += self.PAGE_LIMIT
    54. url = self.BASE_URL % (self.MOVIE_TAG,self.PAGE_LIMIT,self.page_start)
    55. time.sleep(5)
    56. print('-----爬取列表: ' + url)
    57. yield scrapy.Request(
    58. url=url,
    59. headers=self.headers,
    60. callback=self.request_movies,
    61. dont_filter=True
    62. )
    63. # 分析详情页
    64. def request_movie(self, response):
    65. #组装数据
    66. movie_item = DoubanItem()
    67. title = response.css('div#content>h1>span:nth-child(1)::text').extract_first()
    68. t = re.findall('[\u3002\uff1b\uff0c\uff1a\u201c\u201d\uff08\uff09\u3001\uff1f\u300a\u300b\u4e00-\u9fa5_0-9]', title)
    69. #获取非info区域数据
    70. movie_item['title'] = ''.join(t)
    71. movie_item['date'] = response.css('div#content>h1>span.year::text').extract_first()[1:-1]
    72. movie_item['rate'] = response.css('strong.rating_num::text').extract_first()
    73. #movie_item['commentCount'] = response.css('div.rating_sum>a.rating_people>span::text').extract_first()
    74. #movie_item['start'] = '/'.join(response.css('span.rating_per::text').extract())
    75. #movie_item['better'] = '/'.join(response.css('div.rating_betterthan>a::text').extract())
    76. movie_item['abs'] = response.css('#link-report>span::text').extract_first().strip()
    77. movie_item['cover'] = response.css('#mainpic>a>img::attr(src)').extract_first()
    78. # 获取整个信息字符串
    79. info = response.css('div.subject div#info').xpath('string(.)').extract_first()
    80. # 提取所以字段名
    81. fields = [s.strip().replace(':', '') for s in response.css('div#info span.pl::text').extract()]
    82. # 提取所有字段的值
    83. values = [re.sub('\s+', '', s.strip()) for s in re.split('\s*(?:%s):\s*' % '|'.join(fields), info)][1:]
    84. # 处理列名称
    85. for i in range(len(fields)):
    86. if '导演' == fields[i]:
    87. fields[i] = 'director'
    88. if '编剧' == fields[i]:
    89. fields[i] = 'scriptwriter'
    90. if '主演' == fields[i]:
    91. fields[i] = 'actors'
    92. if '类型' == fields[i]:
    93. fields[i] = 'categories'
    94. if '制片国家/地区' == fields[i]:
    95. fields[i] = 'district'
    96. if '语言' == fields[i]:
    97. fields[i] = 'language'
    98. if '片长' == fields[i]:
    99. fields[i] = 'duration'
    100. # 将所有信息填入item
    101. other_info = list(zip(fields,values))
    102. for field,value in other_info:
    103. if field in ['IMDb链接','上映日期','官方网站','又名']:
    104. other_info.remove((field,value))
    105. final_info = dict(other_info[:-1])
    106. movie_item.update(final_info)
    107. # 处理缺失字段
    108. if not 'director' in movie_item.keys():
    109. movie_item['director'] = '/'
    110. if not 'scriptwriter' in movie_item.keys():
    111. movie_item['scriptwriter'] = '/'
    112. if not 'actors' in movie_item.keys():
    113. movie_item['actors'] = '/'
    114. if not 'categories' in movie_item.keys():
    115. movie_item['categories'] = '/'
    116. if not 'district' in movie_item.keys():
    117. movie_item['district'] = '/'
    118. if not 'language' in movie_item.keys():
    119. movie_item['language'] = '/'
    120. if not 'duration' in movie_item.keys():
    121. movie_item['duration'] = '/'
    122. print('~完成爬取电影: ' + movie_item['title'] + '/' + movie_item['rate'])
    123. #将数据加入到字典中
    124. yield movie_item

    五,项目总结

       项目采用前后端分离的方式来进行开发实现,并进行了数据的爬取操作,相关亮点比较多,业务功能相对较少。主要实现数据的爬取及展示以及相关的推荐功能。

  • 相关阅读:
    【打工日常】使用docker部署StackEdit编辑器-Markdown之利器
    计组综合练习(期末复习大题部分适用)
    Linux:命令行式-离线安装JDK|Tomcat|Nginx组件全流程解析
    Linux入门之 init
    【go微服务】gRPC
    【校招VIP】产品项目计划之功能分析
    单商户商城系统功能拆解04—店铺商品分类编辑
    go语言GoFrame+Vue+ElementUI后台管理搭建教程
    NodeJs 中 eval的替代函数
    全面吃透Stream流,让代码更优雅
  • 原文地址:https://blog.csdn.net/whirlwind526/article/details/126365133