实现效果如图:
源码如下:
- import requests
- import json
- # Excel自动化第三方库
- import xlwt
- # UA-伪装
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36'
- }
- def Creat_Excel():
- # 创建工作簿
- global excel
- excel = xlwt.Workbook()
- # 添加一个工作表:纪录片
- global sheet
- sheet = excel.add_sheet('纪录片')
- # 在纪录片上的第0行上逐列写排行
- sheet.write(0, 0, '评分')
- sheet.write(0, 1, '排行')
- sheet.write(0, 2, '网址')
- sheet.write(0, 3, '类型')
- sheet.write(0, 4, '国籍')
- sheet.write(0, 5, '电影名')
- sheet.write(0, 6, '发布日期')
- sheet.write(0, 7, '演员')
- def Crawl(page,line):
- url = 'https://movie.douban.com/j/chart/top_list?'
- # url参数
- param = {
- 'type': '1',
- 'interval_id': '100:90',
- 'action': '',
- 'start': page,
- 'limit': '20',
- }
- response = requests.get(url=url, params=param, headers=headers)
- li_data = json.loads(response.text)
-
- for i in li_data:
- # 数据的提取
- score = i['score']
- rank = str(i['rank'])
- url = i['url']
- types = ','.join(i['types'])
- regions = ','.join(i['regions'])
- title = i['title']
- release_date = i['release_date']
- actors = ','.join(i['actors'])
- # 逐列写入Excel表格
- sheet.write(line, 0, score)
- sheet.write(line, 1, rank)
- sheet.write(line, 2, url)
- sheet.write(line, 3, types)
- sheet.write(line, 4, regions)
- sheet.write(line, 5, title)
- sheet.write(line, 6, release_date)
- sheet.write(line, 7, actors)
- line += 1 # 换行
-
- print(score, rank, url, types, regions, title, release_date, actors)
- # excel保存
- excel.save('D:/pycharm_pro/爬虫/My_Demo/豆瓣排行.xls')
- # print('page=%d' % page)
-
- if __name__ == '__main__':
- # 创建Excel文件
- Creat_Excel()
- page = 0 #起始页
- line = 1 #起始行
- for i in range(5):
- Crawl(page,line)
- page+=20
- line+=20