• Python爬虫之爬取并下载哔哩哔哩视频


    亲自使用过,太好用了

    1. # 导入requests模块,模拟发送请求
    2. import requests
    3. # 导入json
    4. import json
    5. # 导入re
    6. import re
    7. # 定义请求头
    8. headers = {
    9. 'Accept': '*/*',
    10. 'Accept-Language': 'en-US,en;q=0.5',
    11. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
    12. }
    13. # 正则表达式,根据条件匹配出值
    14. def my_match(text, pattern):
    15. match = re.search(pattern, text)
    16. print(match.group(1))
    17. print()
    18. return json.loads(match.group(1))
    19. def download_video(old_video_url, video_url, audio_url, video_name):
    20. headers.update({"Referer": old_video_url})
    21. print("开始下载视频:%s" % video_name)
    22. video_content = requests.get(video_url, headers=headers)
    23. print('%s视频大小:' % video_name, video_content.headers['content-length'])
    24. audio_content = requests.get(audio_url, headers=headers)
    25. print('%s音频大小:' % video_name, audio_content.headers['content-length'])
    26. # 下载视频开始
    27. received_video = 0
    28. with open('%s_video.mp4' % video_name, 'ab') as output:
    29. while int(video_content.headers['content-length']) > received_video:
    30. headers['Range'] = 'bytes=' + str(received_video) + '-'
    31. response = requests.get(video_url, headers=headers)
    32. output.write(response.content)
    33. received_video += len(response.content)
    34. # 下载视频结束
    35. # 下载音频开始
    36. audio_content = requests.get(audio_url, headers=headers)
    37. received_audio = 0
    38. with open('%s_audio.mp4' % video_name, 'ab') as output:
    39. while int(audio_content.headers['content-length']) > received_audio:
    40. # 视频分片下载
    41. headers['Range'] = 'bytes=' + str(received_audio) + '-'
    42. response = requests.get(audio_url, headers=headers)
    43. output.write(response.content)
    44. received_audio += len(response.content)
    45. # 下载音频结束
    46. return video_name
    47. if __name__ == '__main__':
    48. # 换成你要爬取的视频地址
    49. url ='https://www.bilibili.com/video/BV1zK4y1B7Z8/?share_source=copy_web'
    50. # 发送请求,拿回数据
    51. res = requests.get(url, headers=headers)
    52. # 视频详情json
    53. playinfo = my_match(res.text, '__playinfo__=(.*?)