• python爬取bilibili,下载视频


    一. 内容简介

    python下载bilibili视频合集

    二. 软件环境

    2.1vsCode

    2.2Anaconda

    version: conda 22.9.0

    2.3代码

    链接:https://pan.baidu.com/s/1WuXTso_iltLlnrLffi1kYQ?pwd=1234

    三.主要流程

    3.1 下载单个视频

    感觉现在下载的清晰度不够,可以在找找,给这块替换了就行
    代码

    import requests
    import os
    from lxml import etree
    import re
    
    def videoDownload1(url_):
        # 设置用户代理,cookie
        headers_ = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
            'Cookie': "buvid3=7014DDC0-BF1E-B121-F5A5-F10753C840B423630infoc; i-wanna-go-back=-1; _uuid=49BF2138-1E10F-D5F5-10898-D8311651B53927883infoc; FEED_LIVE_VERSION=V8; DedeUserID=171300042; DedeUserID__ckMd5=c65bec3211413192; CURRENT_FNVAL=4048; rpdid=|(J|)J~m~llk0J'uYm|)~klRl; header_theme_version=CLOSE; hit-new-style-dyn=1; hit-dyn-v2=1; is-2022-channel=1; fingerprint=fe5c7462625770aa2abce449a7c01fd2; buvid_fp_plain=undefined; b_nut=1691207170; b_ut=5; buvid_fp=fe5c7462625770aa2abce449a7c01fd2; LIVE_BUVID=AUTO4016915564967297; buvid4=1AE73807-AEA0-7078-DA57-7F9FE5C3D6F896987-023080912-A0g5nInZwV3VmJJT68FJxw%3D%3D; home_feed_column=5; SESSDATA=fc1266d3%2C1708653865%2C29c08%2A81-i-T9HQrucvpCVcPwSwXl5LmjTyduIzF9veu0KS9i2IwXK_xkcqlt1XQyxJ3sG-9HMSwLwAAKgA; bili_jct=068bc0a79f3fa7aa1a030e478dbf6d4b; sid=5yvjlnfi; browser_resolution=1920-971; bili_ticket=eyJhbGciOiJFUzM4NCIsImtpZCI6ImVjMDIiLCJ0eXAiOiJKV1QifQ.eyJleHAiOjE2OTMzNjY1MTcsImlhdCI6MTY5MzEwNzMxNywicGx0IjotMX0.I1Yfp8S9UIkU4S0G5vtBJfslPtgY7QLCj1dx9WQpyRmxKpZoA1qB5UYXNW4KBSZFGljMm7F1lbGXSGco7F79JZJ2sZNBvH9QiSVlmipzAJKaucIoFh6s3m1jpqjLp10r; bili_ticket_expires=1693366517; bp_video_offset_171300042=834376858445283367; b_lsid=1021245DB_18A3567E5C2; CURRENT_QUALITY=80; PVID=2"
        }
    
        # 发送请求,得到响应对象
        response_ = requests.get(url_, headers=headers_)
    
        str_data = response_.text  # 视频主页的html代码,类型是字符串
    
        # 使用xpath解析html代码,,得到想要的url
        html_obj = etree.HTML(str_data)  # 转换格式类型
    
        # 获取视频的名称
        res_ = html_obj.xpath('//title/text()')[0]
        # 视频名称的获取
        title_ = re.findall(r'(.*?)_哔哩哔哩', res_)[0]
        # 影响视频合成的特殊字符的处理,目前就遇到过这三个,实际上很有可能不止这三个,遇到了就用同样的方法处理就好了
        title_ = title_.replace('/', '')
        title_ = title_.replace(' ', '')
        title_ = title_.replace('&', '')
        title_ = title_.replace(':', '')
    
        # 使用xpath语法获取数据,取到数据为列表,索引[0]取值取出里面的字符串,即包含视频音频文件的url字符串
        url_list_str = html_obj.xpath('//script[contains(text(),"window.__playinfo__")]/text()')[0]
    
        # 纯视频的url
        video_url = re.findall(r'"video":\[{"id":\d+,"baseUrl":"(.*?)"', url_list_str)[0]
    
        # 纯音频的url
        audio_url = re.findall(r'"audio":\[{"id":\d+,"baseUrl":"(.*?)"', url_list_str)[0]
    
        # 设置跳转字段的headers
        headers_ = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36',
            'Referer': url_
        }
    
        # 获取纯视频的数据
        response_video = requests.get(video_url, headers=headers_, stream=True)
        bytes_video = response_video.content
        # 获取纯音频的数据
        response_audio = requests.get(audio_url, headers=headers_, stream=True)
        bytes_audio = response_audio.content
    
        # 获取文件大小, 单位为KB
        video_size = int(int(response_video.headers['content-length']) / 1024)
        audio_size = int(int(response_audio.headers['content-length']) / 1024)
    
        # 保存纯视频的文件
        title_1 = title_ + '!'  # 名称进行修改,避免重名
        title_1 = title_1.replace(':', '_')
        
        with open(f'{title_1}.mp4', 'wb') as f:
            f.write(bytes_video)
            # print(f'{title_1}纯视频文件下载完毕...,大小为:{video_size}KB, {int(video_size/1024)}MB')
    
        with open(f'{title_1}.mp3', 'wb') as f:
            f.write(bytes_audio)
            # print(f'{title_1}纯音频文件下载完毕...,大小为:{audio_size}KB, {int(audio_size/1024)}MB')
    
            # 利用第三方工具ffmpeg 合成视频, 需要执行终端命令
        ffmpeg_path = r".\ffmpeg\bin\ffmpeg.exe"
        # os.system(f'{ffmpeg_path} -i {title_1}.mp3 -i {title_1}.mp4 -c copy .\video\{title_}.mp4 -loglevel quiet')
    
    
        folder_path = f"./video/{title_}"  # 替换为你想要创建的文件夹路径
    
        if not os.path.exists(folder_path):
            os.mkdir(folder_path)
            # print(f"The folder '{folder_path}' already exists.")
    
    
        command = f'{ffmpeg_path} -i {title_1}.mp3 -i {title_1}.mp4 -c copy ./video/{title_}/{title_}.mp4 -loglevel quiet'
    
        os.system(command)
    
    
        # 显示合成文件的大小
    
        print(f'{title_}  下载完成')
    
        # 移除纯视频文件,
        os.remove(f'{title_1}.mp4')
        # 移除纯音频文件,
        os.remove(f'{title_1}.mp3')
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94

    3.2 下载选集视频

    选集视频的播放链接很好找,就是后面的p=几啥的,拼一下就可以拿到整个的播放链接了
    代码

    import requests
    import os
    from lxml import etree
    import re
    
    # 获取网页源码
    def getUrls2(url):
        # 发送请求,得到响应对象
            # 设置用户代理,cookie
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
            'Cookie': "buvid3=7014DDC0-BF1E-B121-F5A5-F10753C840B423630infoc; i-wanna-go-back=-1; _uuid=49BF2138-1E10F-D5F5-10898-D8311651B53927883infoc; FEED_LIVE_VERSION=V8; DedeUserID=171300042; DedeUserID__ckMd5=c65bec3211413192; CURRENT_FNVAL=4048; rpdid=|(J|)J~m~llk0J'uYm|)~klRl; header_theme_version=CLOSE; hit-new-style-dyn=1; hit-dyn-v2=1; is-2022-channel=1; fingerprint=fe5c7462625770aa2abce449a7c01fd2; buvid_fp_plain=undefined; b_nut=1691207170; b_ut=5; buvid_fp=fe5c7462625770aa2abce449a7c01fd2; LIVE_BUVID=AUTO4016915564967297; buvid4=1AE73807-AEA0-7078-DA57-7F9FE5C3D6F896987-023080912-A0g5nInZwV3VmJJT68FJxw%3D%3D; home_feed_column=5; SESSDATA=fc1266d3%2C1708653865%2C29c08%2A81-i-T9HQrucvpCVcPwSwXl5LmjTyduIzF9veu0KS9i2IwXK_xkcqlt1XQyxJ3sG-9HMSwLwAAKgA; bili_jct=068bc0a79f3fa7aa1a030e478dbf6d4b; sid=5yvjlnfi; browser_resolution=1920-971; bili_ticket=eyJhbGciOiJFUzM4NCIsImtpZCI6ImVjMDIiLCJ0eXAiOiJKV1QifQ.eyJleHAiOjE2OTMzNjY1MTcsImlhdCI6MTY5MzEwNzMxNywicGx0IjotMX0.I1Yfp8S9UIkU4S0G5vtBJfslPtgY7QLCj1dx9WQpyRmxKpZoA1qB5UYXNW4KBSZFGljMm7F1lbGXSGco7F79JZJ2sZNBvH9QiSVlmipzAJKaucIoFh6s3m1jpqjLp10r; bili_ticket_expires=1693366517; bp_video_offset_171300042=834376858445283367; b_lsid=1021245DB_18A3567E5C2; CURRENT_QUALITY=80; PVID=2"
        }
        response_ = requests.get(url, headers=headers)
    
        str_data = response_.text  # 视频主页的html代码,类型是字符串
    
        # 使用xpath解析html代码,,得到想要的url
        html_obj = etree.HTML(str_data)  # 转换格式类型
        urls = []
        # 获取了li的数量,
        lis = html_obj.xpath("//ul[@class='list-box']/li")
        question_mark_index = url.find('?')
    
        # 如果找到了 '?',就截取该位置之前的子串
        if question_mark_index != -1:
            cleaned_url = url[:question_mark_index]
        else:
            cleaned_url = url
        # print(cleaned_url)
        # 拼接api
        for i in range(1,len(lis)+1):
            # print(i)
            strs = cleaned_url + "?p=" + str(i)
            urls.append(strs)
    
        # print(content)
        return urls
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    import requests
    import os
    from lxml import etree
    import re
    
    def videoDownload3(url_,i,name):
        # 设置用户代理,cookie
        headers_ = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
            'Cookie': "buvid3=7014DDC0-BF1E-B121-F5A5-F10753C840B423630infoc; i-wanna-go-back=-1; _uuid=49BF2138-1E10F-D5F5-10898-D8311651B53927883infoc; FEED_LIVE_VERSION=V8; DedeUserID=171300042; DedeUserID__ckMd5=c65bec3211413192; CURRENT_FNVAL=4048; rpdid=|(J|)J~m~llk0J'uYm|)~klRl; header_theme_version=CLOSE; hit-new-style-dyn=1; hit-dyn-v2=1; is-2022-channel=1; fingerprint=fe5c7462625770aa2abce449a7c01fd2; buvid_fp_plain=undefined; b_nut=1691207170; b_ut=5; buvid_fp=fe5c7462625770aa2abce449a7c01fd2; LIVE_BUVID=AUTO4016915564967297; buvid4=1AE73807-AEA0-7078-DA57-7F9FE5C3D6F896987-023080912-A0g5nInZwV3VmJJT68FJxw%3D%3D; home_feed_column=5; SESSDATA=fc1266d3%2C1708653865%2C29c08%2A81-i-T9HQrucvpCVcPwSwXl5LmjTyduIzF9veu0KS9i2IwXK_xkcqlt1XQyxJ3sG-9HMSwLwAAKgA; bili_jct=068bc0a79f3fa7aa1a030e478dbf6d4b; sid=5yvjlnfi; browser_resolution=1920-971; bili_ticket=eyJhbGciOiJFUzM4NCIsImtpZCI6ImVjMDIiLCJ0eXAiOiJKV1QifQ.eyJleHAiOjE2OTMzNjY1MTcsImlhdCI6MTY5MzEwNzMxNywicGx0IjotMX0.I1Yfp8S9UIkU4S0G5vtBJfslPtgY7QLCj1dx9WQpyRmxKpZoA1qB5UYXNW4KBSZFGljMm7F1lbGXSGco7F79JZJ2sZNBvH9QiSVlmipzAJKaucIoFh6s3m1jpqjLp10r; bili_ticket_expires=1693366517; bp_video_offset_171300042=834376858445283367; b_lsid=1021245DB_18A3567E5C2; CURRENT_QUALITY=80; PVID=2"
        }
    
        # 发送请求,得到响应对象
        response_ = requests.get(url_, headers=headers_)
    
        str_data = response_.text  # 视频主页的html代码,类型是字符串
    
        # 使用xpath解析html代码,,得到想要的url
        html_obj = etree.HTML(str_data)  # 转换格式类型
    
        # 获取视频的名称
        res_ = html_obj.xpath('//title/text()')[0]
        # 视频名称的获取
        title_ = re.findall(r'(.*?)_哔哩哔哩', res_)[0]
        
        fileName = name
        # 影响视频合成的特殊字符的处理,目前就遇到过这三个,实际上很有可能不止这三个,遇到了就用同样的方法处理就好了
        title_ = title_.replace('/', '')
        title_ = title_.replace(' ', '')
        title_ = title_.replace('&', '')
        title_ = title_.replace(':', '')
        title_ = title_.replace('-', '')
        title_ = title_.replace('—', '')
        # 使用xpath语法获取数据,取到数据为列表,索引[0]取值取出里面的字符串,即包含视频音频文件的url字符串
        url_list_str = html_obj.xpath('//script[contains(text(),"window.__playinfo__")]/text()')[0]
    
        # 纯视频的url
        video_url = re.findall(r'"video":\[{"id":\d+,"baseUrl":"(.*?)"', url_list_str)[0]
    
        # 纯音频的url
        audio_url = re.findall(r'"audio":\[{"id":\d+,"baseUrl":"(.*?)"', url_list_str)[0]
    
        # 设置跳转字段的headers
        headers_ = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36',
            'Referer': url_
        }
    
        # 获取纯视频的数据
        response_video = requests.get(video_url, headers=headers_, stream=True)
        bytes_video = response_video.content
        # 获取纯音频的数据
        response_audio = requests.get(audio_url, headers=headers_, stream=True)
        bytes_audio = response_audio.content
    
        # 获取文件大小, 单位为KB
        video_size = int(int(response_video.headers['content-length']) / 1024)
        audio_size = int(int(response_audio.headers['content-length']) / 1024)
    
        # 保存纯视频的文件
        title_1 = title_ + '!'  # 名称进行修改,避免重名
        title_1 = title_1.replace(':', '')
        with open(f'{title_1}.mp4', 'wb') as f:
            f.write(bytes_video)
            # print(f'{title_1}纯视频文件下载完毕...,大小为:{video_size}KB, {int(video_size/1024)}MB')
    
        with open(f'{title_1}.mp3', 'wb') as f:
            f.write(bytes_audio)
            # print(f'{title_1}纯音频文件下载完毕...,大小为:{audio_size}KB, {int(audio_size/1024)}MB')
    
            # 利用第三方工具ffmpeg 合成视频, 需要执行终端命令
        ffmpeg_path = r".\ffmpeg\bin\ffmpeg.exe"
        # os.system(f'{ffmpeg_path} -i {title_1}.mp3 -i {title_1}.mp4 -c copy .\video\{title_}.mp4 -loglevel quiet')
    
        folder_path = f"./video/{fileName}"  # 替换为你想要创建的文件夹路径
        if not os.path.exists(folder_path):
            os.mkdir(folder_path)
            # print(f"The folder '{folder_path}' already exists.")
    
        command = f'{ffmpeg_path} -i {title_1}.mp3 -i {title_1}.mp4 -c copy ./video/{fileName}/{i}.{title_1}.mp4 -loglevel quiet'
    
        file_path = f"./video/{fileName}/{i}.{title_}.mp4"
        if os.path.exists(file_path):
            pass
        else:
            os.system(command)
    
        # 显示合成文件的大小
    
        print(f'{i}.{title_}  下载完成')
    
        # 移除纯视频文件,
        os.remove(f'{title_1}.mp4')
        # 移除纯音频文件,
        os.remove(f'{title_1}.mp3')
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96

    3.3 下载合集视频

    合集的里面数据的访问api
    在这里插入图片描述
    合集里面的数据,就是从这个里面拿到播放id,给json中的处理拿出来,拼接视频播放链接
    在这里插入图片描述

    代码

    # 获取网页源码
    def getUrls3(url):
        # 发送请求,得到响应对象
            # 设置用户代理,cookie
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
            'Cookie': "buvid3=7014DDC0-BF1E-B121-F5A5-F10753C840B423630infoc; i-wanna-go-back=-1; _uuid=49BF2138-1E10F-D5F5-10898-D8311651B53927883infoc; FEED_LIVE_VERSION=V8; DedeUserID=171300042; DedeUserID__ckMd5=c65bec3211413192; CURRENT_FNVAL=4048; rpdid=|(J|)J~m~llk0J'uYm|)~klRl; header_theme_version=CLOSE; hit-new-style-dyn=1; hit-dyn-v2=1; is-2022-channel=1; fingerprint=fe5c7462625770aa2abce449a7c01fd2; buvid_fp_plain=undefined; b_nut=1691207170; b_ut=5; buvid_fp=fe5c7462625770aa2abce449a7c01fd2; LIVE_BUVID=AUTO4016915564967297; buvid4=1AE73807-AEA0-7078-DA57-7F9FE5C3D6F896987-023080912-A0g5nInZwV3VmJJT68FJxw%3D%3D; home_feed_column=5; SESSDATA=fc1266d3%2C1708653865%2C29c08%2A81-i-T9HQrucvpCVcPwSwXl5LmjTyduIzF9veu0KS9i2IwXK_xkcqlt1XQyxJ3sG-9HMSwLwAAKgA; bili_jct=068bc0a79f3fa7aa1a030e478dbf6d4b; sid=5yvjlnfi; browser_resolution=1920-971; bili_ticket=eyJhbGciOiJFUzM4NCIsImtpZCI6ImVjMDIiLCJ0eXAiOiJKV1QifQ.eyJleHAiOjE2OTMzNjY1MTcsImlhdCI6MTY5MzEwNzMxNywicGx0IjotMX0.I1Yfp8S9UIkU4S0G5vtBJfslPtgY7QLCj1dx9WQpyRmxKpZoA1qB5UYXNW4KBSZFGljMm7F1lbGXSGco7F79JZJ2sZNBvH9QiSVlmipzAJKaucIoFh6s3m1jpqjLp10r; bili_ticket_expires=1693366517; bp_video_offset_171300042=834376858445283367; b_lsid=1021245DB_18A3567E5C2; CURRENT_QUALITY=80; PVID=2"
        }
        # 使用正则表达式提取数字
        pattern = r'\d+'
        numbers = re.findall(pattern, url)
        mid = numbers[0]
        season_id = numbers[1]
        page_num = 1
        url = f"https://api.bilibili.com/x/polymer/web-space/seasons_archives_list?mid={mid}&season_id={season_id}&sort_reverse=false&page_num={page_num}&page_size=30"
        response = requests.get(url)
    
        if response.status_code == 200:
            json_data = response.json()
            # print(json_data["data"]["page"]["total"])
            total = int(json_data["data"]["page"]["total"])
            page_size = int(json_data["data"]["page"]["page_size"])
            page = int(total / page_size) + 1
            name = json_data["data"]["meta"]["name"]
            # print(total,page)
        
        urls = []
        # 
        for i in range(1,page+1):
            # print(i) 
            url = f"https://api.bilibili.com/x/polymer/web-space/seasons_archives_list?mid={mid}&season_id={season_id}&sort_reverse=false&page_num={i}&page_size=30"
            response = requests.get(url)
            if response.status_code == 200:
                json_data = response.json()
                archives = json_data["data"]["archives"]
                num = 0
                for j in archives:
                    bvid = archives[num]["bvid"]
                    videoUrl = f"https://www.bilibili.com/video/{bvid}/"
                    num = num + 1
                    urls.append(videoUrl)
        return urls,name
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    import requests
    import os
    from lxml import etree
    import re
    
    def videoDownload2(url_,i):
        # 设置用户代理,cookie
        headers_ = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
            'Cookie': "buvid3=7014DDC0-BF1E-B121-F5A5-F10753C840B423630infoc; i-wanna-go-back=-1; _uuid=49BF2138-1E10F-D5F5-10898-D8311651B53927883infoc; FEED_LIVE_VERSION=V8; DedeUserID=171300042; DedeUserID__ckMd5=c65bec3211413192; CURRENT_FNVAL=4048; rpdid=|(J|)J~m~llk0J'uYm|)~klRl; header_theme_version=CLOSE; hit-new-style-dyn=1; hit-dyn-v2=1; is-2022-channel=1; fingerprint=fe5c7462625770aa2abce449a7c01fd2; buvid_fp_plain=undefined; b_nut=1691207170; b_ut=5; buvid_fp=fe5c7462625770aa2abce449a7c01fd2; LIVE_BUVID=AUTO4016915564967297; buvid4=1AE73807-AEA0-7078-DA57-7F9FE5C3D6F896987-023080912-A0g5nInZwV3VmJJT68FJxw%3D%3D; home_feed_column=5; SESSDATA=fc1266d3%2C1708653865%2C29c08%2A81-i-T9HQrucvpCVcPwSwXl5LmjTyduIzF9veu0KS9i2IwXK_xkcqlt1XQyxJ3sG-9HMSwLwAAKgA; bili_jct=068bc0a79f3fa7aa1a030e478dbf6d4b; sid=5yvjlnfi; browser_resolution=1920-971; bili_ticket=eyJhbGciOiJFUzM4NCIsImtpZCI6ImVjMDIiLCJ0eXAiOiJKV1QifQ.eyJleHAiOjE2OTMzNjY1MTcsImlhdCI6MTY5MzEwNzMxNywicGx0IjotMX0.I1Yfp8S9UIkU4S0G5vtBJfslPtgY7QLCj1dx9WQpyRmxKpZoA1qB5UYXNW4KBSZFGljMm7F1lbGXSGco7F79JZJ2sZNBvH9QiSVlmipzAJKaucIoFh6s3m1jpqjLp10r; bili_ticket_expires=1693366517; bp_video_offset_171300042=834376858445283367; b_lsid=1021245DB_18A3567E5C2; CURRENT_QUALITY=80; PVID=2"
        }
    
        # 发送请求,得到响应对象
        response_ = requests.get(url_, headers=headers_)
    
        str_data = response_.text  # 视频主页的html代码,类型是字符串
    
        # 使用xpath解析html代码,,得到想要的url
        html_obj = etree.HTML(str_data)  # 转换格式类型
    
        # 获取视频的名称
        res_ = html_obj.xpath('//title/text()')[0]
        # 视频名称的获取
        title_ = re.findall(r'(.*?)_哔哩哔哩', res_)[0]
        
        fileName = html_obj.xpath('//h1[@class="video-title"]/text()')[0]
        
        # 影响视频合成的特殊字符的处理,目前就遇到过这三个,实际上很有可能不止这三个,遇到了就用同样的方法处理就好了
        title_ = title_.replace('/', '')
        title_ = title_.replace(' ', '')
        title_ = title_.replace('&', '')
        title_ = title_.replace(':', '')
    
        # 使用xpath语法获取数据,取到数据为列表,索引[0]取值取出里面的字符串,即包含视频音频文件的url字符串
        url_list_str = html_obj.xpath('//script[contains(text(),"window.__playinfo__")]/text()')[0]
    
        # 纯视频的url
        video_url = re.findall(r'"video":\[{"id":\d+,"baseUrl":"(.*?)"', url_list_str)[0]
    
        # 纯音频的url
        audio_url = re.findall(r'"audio":\[{"id":\d+,"baseUrl":"(.*?)"', url_list_str)[0]
    
        # 设置跳转字段的headers
        headers_ = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36',
            'Referer': url_
        }
    
        # 获取纯视频的数据
        response_video = requests.get(video_url, headers=headers_, stream=True)
        bytes_video = response_video.content
        # 获取纯音频的数据
        response_audio = requests.get(audio_url, headers=headers_, stream=True)
        bytes_audio = response_audio.content
    
        # 获取文件大小, 单位为KB
        video_size = int(int(response_video.headers['content-length']) / 1024)
        audio_size = int(int(response_audio.headers['content-length']) / 1024)
    
        # 保存纯视频的文件
        title_1 = title_ + '!'  # 名称进行修改,避免重名
        title_1 = title_1.replace(':', '_')
        with open(f'{title_1}.mp4', 'wb') as f:
            f.write(bytes_video)
            # print(f'{title_1}纯视频文件下载完毕...,大小为:{video_size}KB, {int(video_size/1024)}MB')
    
        with open(f'{title_1}.mp3', 'wb') as f:
            f.write(bytes_audio)
            # print(f'{title_1}纯音频文件下载完毕...,大小为:{audio_size}KB, {int(audio_size/1024)}MB')
    
            # 利用第三方工具ffmpeg 合成视频, 需要执行终端命令
        ffmpeg_path = r".\ffmpeg\bin\ffmpeg.exe"
        # os.system(f'{ffmpeg_path} -i {title_1}.mp3 -i {title_1}.mp4 -c copy .\video\{title_}.mp4 -loglevel quiet')
    
        folder_path = f"./video/{fileName}"  # 替换为你想要创建的文件夹路径
        if not os.path.exists(folder_path):
            os.mkdir(folder_path)
            # print(f"The folder '{folder_path}' already exists.")
    
        command = f'{ffmpeg_path} -i {title_1}.mp3 -i {title_1}.mp4 -c copy ./video/{fileName}/{i}.{title_}.mp4 -loglevel quiet'
    
        file_path = f"./video/{fileName}/{i}.{title_}.mp4"
        if os.path.exists(file_path):
            pass
        else:
            os.system(command)
    
        # 显示合成文件的大小
    
        print(f'{i}.{title_}  下载完成')
    
        # 移除纯视频文件,
        os.remove(f'{title_1}.mp4')
        # 移除纯音频文件,
        os.remove(f'{title_1}.mp3')
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96

    3.4 多线程

    代码

    import concurrent.futures
    import requests
    
    # 定义一个下载函数
    def download_video(URL):
        url, index, name = URL.split(" ", 2)
        videoDownload3(url,index,name)
    
    def THREAD(URLS):
        # 创建线程池,指定线程数量
        max_workers = 10  # 这里设置线程数量,根据需要进行调整
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            # 提交下载任务给线程池处理
            futures = [executor.submit(download_video, URL) for URL in URLS]
    
            # 等待所有任务完成
            for future in concurrent.futures.as_completed(futures):
                try:
                    future.result()  # 获取任务的结果(这里不需要结果)
                except Exception as e:
                    print(f"An error occurred: {e}")
    
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23

    3.5 结果

    url_model = "https://space.bilibili.com/471303350/channel/collectiondetail?sid=1278346 3"
    value = url_model.split(' ')
    url = value[0]
    model = value[1]
    
    if model == "1":
        videoDownload1(url)
        print("下载完成")
    if model == "2":
        # 接口分析
        # 点进去的话接口
        # https://www.bilibili.com/video/BV1qW4y1a7fU/?spm_id_from=333.337.search-card.all.click
        # 点击视频的话就这样
        # https://www.bilibili.com/video/BV1qW4y1a7fU?p=1
        # https://www.bilibili.com/video/BV1qW4y1a7fU?p=2&vd_source=de2dcd0f37ff916ec3f8fb83c6366123
        # 可以发现不同的集的接口格式应该是这样的,p = 几就是第几集
        # https://www.bilibili.com/video/BV1qW4y1a7fU?p=1
    
        # 查看有多少集
        # 一种是视频选集那块会写有多少个
        # 获取源码
        urls = getUrls2(url)
        i = 1
        for index,url in enumerate(urls):
            videoDownload2(url,index)
    
        print("下载完成")
    if model == "3":
        # 接口分析
        # 视频合计每个视频接口没有规律,然后再播放页中网页没有直接的播放链接,所以就用合集页的链接来分析
        # 网页里面的每个链接都是动态加载的,需要访问json数据获取,也或者用虚拟浏览器那种等页面加载完成后访问(这种以后可能会更新,感觉这个有点麻烦),
        # 这里是用json数据做的
        # https://space.bilibili.com/107762251/channel/collectiondetail?sid=877119
        # https://api.bilibili.com/x/polymer/web-space/seasons_archives_list?mid=107762251&season_id=877119&sort_reverse=false&page_num=1&page_size=30
        # https://space.bilibili.com/389199842/channel/collectiondetail?sid=1275285
        # https://api.bilibili.com/x/polymer/web-space/seasons_archives_list?mid=389199842&season_id=1275285&sort_reverse=false&page_num=1&page_size=30
        # 这是两个接口,前面那个数字是用户,后面那个数字代表的是合集,下载的接口其实是股东
    
        urls,name = getUrls3(url)
        # print(len(urls))
        for index,url in enumerate(urls):
            # print(url)
            videoDownload3(url,index,name)
        # print(urls)
        # 多线程
        # for index,url in enumerate(urls):
        #     URLS.append(url + " " + str(index) + " " + name)
        #     THREAD(URLS)
        
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49

    那切里做展示,有些合集下载时候有点bug,还没找到问题,可以下载,但是保存路径有点问题,应该是和命令行冲突了,我就不改了
    在这里插入图片描述
    在这里插入图片描述
    在这里插入图片描述

    3.6 合集视频更新

    原来会出现部分合集显示下载成功,但是文件夹里面没有东西,是因为有些合集名字在命令里面没办法执行,因为一些特殊符号什么的,所以把合集名字手动指定一下下载就可以了,然后多线程加上去,代码如下
    拿视频链接的

    # 获取网页源码
    def getUrls3(url):
        # 发送请求,得到响应对象
            # 设置用户代理,cookie
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
            'Cookie': "buvid3=7014DDC0-BF1E-B121-F5A5-F10753C840B423630infoc; i-wanna-go-back=-1; _uuid=49BF2138-1E10F-D5F5-10898-D8311651B53927883infoc; FEED_LIVE_VERSION=V8; DedeUserID=171300042; DedeUserID__ckMd5=c65bec3211413192; CURRENT_FNVAL=4048; rpdid=|(J|)J~m~llk0J'uYm|)~klRl; header_theme_version=CLOSE; hit-new-style-dyn=1; hit-dyn-v2=1; is-2022-channel=1; fingerprint=fe5c7462625770aa2abce449a7c01fd2; buvid_fp_plain=undefined; b_nut=1691207170; b_ut=5; buvid_fp=fe5c7462625770aa2abce449a7c01fd2; LIVE_BUVID=AUTO4016915564967297; buvid4=1AE73807-AEA0-7078-DA57-7F9FE5C3D6F896987-023080912-A0g5nInZwV3VmJJT68FJxw%3D%3D; home_feed_column=5; SESSDATA=fc1266d3%2C1708653865%2C29c08%2A81-i-T9HQrucvpCVcPwSwXl5LmjTyduIzF9veu0KS9i2IwXK_xkcqlt1XQyxJ3sG-9HMSwLwAAKgA; bili_jct=068bc0a79f3fa7aa1a030e478dbf6d4b; sid=5yvjlnfi; browser_resolution=1920-971; bili_ticket=eyJhbGciOiJFUzM4NCIsImtpZCI6ImVjMDIiLCJ0eXAiOiJKV1QifQ.eyJleHAiOjE2OTMzNjY1MTcsImlhdCI6MTY5MzEwNzMxNywicGx0IjotMX0.I1Yfp8S9UIkU4S0G5vtBJfslPtgY7QLCj1dx9WQpyRmxKpZoA1qB5UYXNW4KBSZFGljMm7F1lbGXSGco7F79JZJ2sZNBvH9QiSVlmipzAJKaucIoFh6s3m1jpqjLp10r; bili_ticket_expires=1693366517; bp_video_offset_171300042=834376858445283367; b_lsid=1021245DB_18A3567E5C2; CURRENT_QUALITY=80; PVID=2"
        }
        # 使用正则表达式提取数字
        pattern = r'\d+'
        numbers = re.findall(pattern, url)
        mid = numbers[0]
        season_id = numbers[1]
        page_num = 1
        url = f"https://api.bilibili.com/x/polymer/web-space/seasons_archives_list?mid={mid}&season_id={season_id}&sort_reverse=false&page_num={page_num}&page_size=30"
        response = requests.get(url)
    
        if response.status_code == 200:
            json_data = response.json()
            # print(json_data["data"]["page"]["total"])
            total = int(json_data["data"]["page"]["total"])
            page_size = int(json_data["data"]["page"]["page_size"])
            page = int(total / page_size) + 1
            name = json_data["data"]["meta"]["name"]
            # print(total,page)
        
        urls = []
        # 
        for i in range(1,page+1):
            # print(i) 
            url = f"https://api.bilibili.com/x/polymer/web-space/seasons_archives_list?mid={mid}&season_id={season_id}&sort_reverse=false&page_num={i}&page_size=30"
            response = requests.get(url)
            if response.status_code == 200:
                json_data = response.json()
                archives = json_data["data"]["archives"]
                num = 0
                for j in archives:
                    bvid = archives[num]["bvid"]
                    videoUrl = f"https://www.bilibili.com/video/{bvid}/"
                    num = num + 1
                    urls.append(videoUrl)
        return urls,name
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43

    下载视频的

    import requests
    import os
    from lxml import etree
    import re
    
    def videoDownload3(url_,index,name):
        # 设置用户代理,cookie
        headers_ = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
            'Cookie': "buvid3=7014DDC0-BF1E-B121-F5A5-F10753C840B423630infoc; i-wanna-go-back=-1; _uuid=49BF2138-1E10F-D5F5-10898-D8311651B53927883infoc; FEED_LIVE_VERSION=V8; DedeUserID=171300042; DedeUserID__ckMd5=c65bec3211413192; CURRENT_FNVAL=4048; rpdid=|(J|)J~m~llk0J'uYm|)~klRl; header_theme_version=CLOSE; hit-new-style-dyn=1; hit-dyn-v2=1; is-2022-channel=1; fingerprint=fe5c7462625770aa2abce449a7c01fd2; buvid_fp_plain=undefined; b_nut=1691207170; b_ut=5; buvid_fp=fe5c7462625770aa2abce449a7c01fd2; LIVE_BUVID=AUTO4016915564967297; buvid4=1AE73807-AEA0-7078-DA57-7F9FE5C3D6F896987-023080912-A0g5nInZwV3VmJJT68FJxw%3D%3D; home_feed_column=5; SESSDATA=fc1266d3%2C1708653865%2C29c08%2A81-i-T9HQrucvpCVcPwSwXl5LmjTyduIzF9veu0KS9i2IwXK_xkcqlt1XQyxJ3sG-9HMSwLwAAKgA; bili_jct=068bc0a79f3fa7aa1a030e478dbf6d4b; sid=5yvjlnfi; browser_resolution=1920-971; bili_ticket=eyJhbGciOiJFUzM4NCIsImtpZCI6ImVjMDIiLCJ0eXAiOiJKV1QifQ.eyJleHAiOjE2OTMzNjY1MTcsImlhdCI6MTY5MzEwNzMxNywicGx0IjotMX0.I1Yfp8S9UIkU4S0G5vtBJfslPtgY7QLCj1dx9WQpyRmxKpZoA1qB5UYXNW4KBSZFGljMm7F1lbGXSGco7F79JZJ2sZNBvH9QiSVlmipzAJKaucIoFh6s3m1jpqjLp10r; bili_ticket_expires=1693366517; bp_video_offset_171300042=834376858445283367; b_lsid=1021245DB_18A3567E5C2; CURRENT_QUALITY=80; PVID=2"
        }
    
        # 发送请求,得到响应对象
        response_ = requests.get(url_, headers=headers_)
    
        str_data = response_.text  # 视频主页的html代码,类型是字符串
    
        # 使用xpath解析html代码,,得到想要的url
        html_obj = etree.HTML(str_data)  # 转换格式类型
    
        # 获取视频的名称
        res_ = html_obj.xpath('//title/text()')[0]
        # 视频名称的获取
        title_ = re.findall(r'(.*?)_哔哩哔哩', res_)[0]
        # 影响视频合成的特殊字符的处理,目前就遇到过这三个,实际上很有可能不止这三个,遇到了就用同样的方法处理就好了
        title_ = title_.replace('/', '')
        title_ = title_.replace(' ', '')
        title_ = title_.replace('&', '')
        title_ = title_.replace(':', '')
    
        # 使用xpath语法获取数据,取到数据为列表,索引[0]取值取出里面的字符串,即包含视频音频文件的url字符串
        url_list_str = html_obj.xpath('//script[contains(text(),"window.__playinfo__")]/text()')[0]
    
        # 纯视频的url
        video_url = re.findall(r'"video":\[{"id":\d+,"baseUrl":"(.*?)"', url_list_str)[0]
    
        # 纯音频的url
        audio_url = re.findall(r'"audio":\[{"id":\d+,"baseUrl":"(.*?)"', url_list_str)[0]
    
        # 设置跳转字段的headers
        headers_ = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36',
            'Referer': url_
        }
    
        # 获取纯视频的数据
        response_video = requests.get(video_url, headers=headers_, stream=True)
        bytes_video = response_video.content
        # 获取纯音频的数据
        response_audio = requests.get(audio_url, headers=headers_, stream=True)
        bytes_audio = response_audio.content
    
        # 获取文件大小, 单位为KB
        video_size = int(int(response_video.headers['content-length']) / 1024)
        audio_size = int(int(response_audio.headers['content-length']) / 1024)
    
        # 保存纯视频的文件
        title_1 = title_ + '!'  # 名称进行修改,避免重名
        title_1 = title_1.replace(':', '_')
        
        with open(f'{title_1}.mp4', 'wb') as f:
            f.write(bytes_video)
            # print(f'{title_1}纯视频文件下载完毕...,大小为:{video_size}KB, {int(video_size/1024)}MB')
    
        with open(f'{title_1}.mp3', 'wb') as f:
            f.write(bytes_audio)
            # print(f'{title_1}纯音频文件下载完毕...,大小为:{audio_size}KB, {int(audio_size/1024)}MB')
    
            # 利用第三方工具ffmpeg 合成视频, 需要执行终端命令
        ffmpeg_path = r".\ffmpeg\bin\ffmpeg.exe"
        # os.system(f'{ffmpeg_path} -i {title_1}.mp3 -i {title_1}.mp4 -c copy .\video\{title_}.mp4 -loglevel quiet')
    
    
        folder_path = f"./video/{name}"  # 替换为你想要创建的文件夹路径
    
        if not os.path.exists(folder_path):
            os.mkdir(folder_path)
            # print(f"The folder '{folder_path}' already exists.")
    
    
        command = f'{ffmpeg_path} -i {title_1}.mp3 -i {title_1}.mp4 -c copy ./video/{name}/{index}.{title_}.mp4 -loglevel quiet'
    
        os.system(command)
    
    
        # 显示合成文件的大小
    
        print(f'{title_}  下载完成')
    
        # 移除纯视频文件,
        os.remove(f'{title_1}.mp4')
        # 移除纯音频文件,
        os.remove(f'{title_1}.mp3')
    
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95

    多线程

    import concurrent.futures
    import requests
    
    # 定义一个下载函数
    def download_video(URL):
        url, index, name = URL.split(" ", 2)
        videoDownload3(url,index,name)
    
    def THREAD(URLS):
        # 创建线程池,指定线程数量
        max_workers = 10  # 这里设置线程数量,根据需要进行调整
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            # 提交下载任务给线程池处理
            futures = [executor.submit(download_video, URL) for URL in URLS]
    
            # 等待所有任务完成
            for future in concurrent.futures.as_completed(futures):
                try:
                    future.result()  # 获取任务的结果(这里不需要结果)
                except Exception as e:
                    print(f"An error occurred: {e}")
    
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22

    执行

    
    url_model = "https://space.bilibili.com/389199842/channel/collectiondetail?sid=1275285 3"
    value = url_model.split(' ')
    url = value[0]
    model = value[1]
    
    if model == "1":
        videoDownload1(url)
        print("下载完成")
    if model == "2":
        # 接口分析
        # 点进去的话接口
        # https://www.bilibili.com/video/BV1qW4y1a7fU/?spm_id_from=333.337.search-card.all.click
        # 点击视频的话就这样
        # https://www.bilibili.com/video/BV1qW4y1a7fU?p=1
        # https://www.bilibili.com/video/BV1qW4y1a7fU?p=2&vd_source=de2dcd0f37ff916ec3f8fb83c6366123
        # 可以发现不同的集的接口格式应该是这样的,p = 几就是第几集
        # https://www.bilibili.com/video/BV1qW4y1a7fU?p=1
    
        # 查看有多少集
        # 一种是视频选集那块会写有多少个
        # 获取源码
        urls = getUrls2(url)
        i = 1
        for index,url in enumerate(urls):
            videoDownload2(url,index)
    
        print("下载完成")
    if model == "3":
        # 接口分析
        # 视频合计每个视频接口没有规律,然后再播放页中网页没有直接的播放链接,所以就用合集页的链接来分析
        # 网页里面的每个链接都是动态加载的,需要访问json数据获取,也或者用虚拟浏览器那种等页面加载完成后访问(这种以后可能会更新,感觉这个有点麻烦),
        # 这里是用json数据做的
        # https://space.bilibili.com/107762251/channel/collectiondetail?sid=877119
        # https://api.bilibili.com/x/polymer/web-space/seasons_archives_list?mid=107762251&season_id=877119&sort_reverse=false&page_num=1&page_size=30
        # https://space.bilibili.com/389199842/channel/collectiondetail?sid=1275285
        # https://api.bilibili.com/x/polymer/web-space/seasons_archives_list?mid=389199842&season_id=1275285&sort_reverse=false&page_num=1&page_size=30
        # 这是两个接口,前面那个数字是用户,后面那个数字代表的是合集,下载的接口其实是股东
    
        urls,name = getUrls3(url)
        name = "qml项目"
        URLS = []
        # print(len(urls))
        for index,url in enumerate(urls):
            # print(url)
            URLS.append(url + " " + str(index+1) + " " + name)
        THREAD(URLS)
        print("全部下载完成!!!")
        # print(urls)
        # for index,url in enumerate(urls):
        #     URLS.append(url + " " + str(index) + " " + name)
        #     THREAD(URLS)
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52

    在这里插入图片描述
    在这里插入图片描述

    在这里插入图片描述

    四.参考

    http://t.csdn.cn/6Pt7v 想下载B站视频却不知如何下手?一文教你爬B站!

  • 相关阅读:
    java计算机毕业设计网上报名及成绩查询系统源程序+mysql+系统+lw文档+远程调试
    Unity入门05——Unity重要组件和API(2)
    centos7搭建git服务器
    Kafka消息分区&producer拦截器&无消息丢失(八)
    vue学习-10vue整合SpringBoot跨域请求
    动态数据源自定义SqlSessionFactoryBean时mybatis plus配置失效
    vie的刷新机制
    NestJS学习之优秀项目分析与最佳实践
    组合数(1) 用Vector实现获取所有组合数列表的QT实现
    SpringBoot整合Jedis可切换使用单机、哨兵、集群模式
  • 原文地址:https://blog.csdn.net/qq_45179361/article/details/132526436