python爬取某网站上的歌曲

# -- coding: utf-8 --
"""
代码实现：
    1.发送请求
    2.获取数据
    3.解析数据
    4.保存数据
"""
import requests
import os
import time

file_path = 'musics\\'
if not os.path.exists(file_path):
    os.mkdir(file_path)
# 伪装
headers = {
    'Cookie': 'Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1659252846; '
              'Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1659252846; _ga=GA1.2.1319118617.1659252847; '
              '_gid=GA1.2.1887200127.1659252847; kw_token=98ZQ8KTOPZ',
    'csrf': '98ZQ8KTOPZ',
    'Host': 'www.kuwo.cn',
    'Referer': 'http://www.kuwo.cn/search/list?key=%E9%99%88%E5%A5%95%E8%BF%85',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 '
                  'Safari/537.36',
}
keyword = input('请输入您要搜索的歌手或歌曲：')
dir_name = file_path + keyword + '\\'
if not os.path.exists(dir_name):
    os.mkdir(dir_name)
count = success = fail = 0
# 爬取前3页
for page_num in range(1, 4):
    url = f"http://www.kuwo.cn/api/www/search/searchMusicBykeyWord?key={keyword}&pn={page_num}&rn=30&httpsStatus=1&reqId=23599db0" \
          f"-10a4-11ed-8a4c-b5ed1a6dd7c8 "
    # 1.发送请求
    response = requests.get(url, headers=headers)
    # ：没有定位到网络资源
    # ：请求成功
    # print(response.json())
    # 2.获取数据
    # .text获取字符串数据 .json()获取字典数据
    json_data = response.json()

    # 3.解析数据
    data_list = json_data['data']['list']
    for data in data_list:
        try:
            artist = data['artist']  # 歌手
            name = data['name']  # 歌名
            album = data['album']  # 专辑
            rid = data['rid']  # 歌曲id
            play_url = f'http://www.kuwo.cn/api/v1/www/music/playUrl?mid={rid}&type=music&httpsStatus=1&reqId=87b1ccc1-10a8' \
                       f'-11ed-90ca-8da23cf7f79d '
            # print(play_url)
            count += 1
            # 4.保存数据
            music_json = requests.get(play_url).json()['data']['url']
            # 获取音频二进制数据
            music_data = requests.get(music_json).content
            # 保存音频
            with open(f'{dir_name}{name}--{artist}.mp3', mode='wb') as f:
                f.write(music_data)
            success += 1
            print(f'第{count}首：{name}--{artist}--下载完成')
            time.sleep(0.2)
        # 出现异常，捕获并输出
        except Exception as e:
            print(e)
            print(f'第{count}首：{name}--{artist}--下载失败')
            fail += 1
            # 继续下载下一首
            continue
    if len(data_list) < 30:
        break
    time.sleep(0.5)
print(f'下载完毕，成功下载{keyword}相关歌曲--{success}首')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77

爬取效果：

在这里插入图片描述

参考文章：

https://blog.csdn.net/weixin_44011294/article/details/115439034

相关阅读:
新晋国产证书品牌——JoySSL
小程序对接停车场支付流程思考
Postgresql中的变长参数类型VARIADIC实例与限制
Unity --- 脚本组件 --- 生命周期与执行顺序
Win11怎么把桌面文件路径改到D盘
centos7 一键安装部署wvp-gb28181-pro
Java23种设计模式-创建型模式之抽象工厂模式
数据在内存中的存储
1. Spring Boot 3 入门学习教程之开发第一个 Spring Boot 应用程序
JavaScript 关联数组

原文地址：https://blog.csdn.net/username666/article/details/126146450