目录
- def get_soup(url):
- '''传入url,得到soup库解析后的结果'''
- response = requests.get(url)
- html = response.text
- soup = BeautifulSoup(html, 'html.parser')
- return soup
-
- def parsing_information(ids,soup,count):
- '''传入上一节的解析内容,对具体信息进行解析'''
- # 获取歌单标题,替换英文分割符
- title = ids[count]['title'].replace(',', ',')
- # 获取歌单id
- id = ids[count]['href']
- #获取创建者昵称
- nickname = soup.select('.s-fc7')[0].get_text()
- #获取介绍
- description = soup.select('p')[1].get_text()
- #获取歌曲数量
- song_num = soup.select('span span')[0].get_text()
- #获取播放量
- play_num = soup.select('strong')[0].get_text()
- #获取分享次数
- share_num = soup.select('a i')[2].get_text()
- #获取评论次数
- comment_num = soup.select('a i')[4].get_text() #评论次数
- list1 = [title,id,nickname,description,song_num,play_num,share_num,comment_num]
- return list1
-
- def save_picture(soup):
- '''这是一个传入soup,保存照片到路径的函数'''
- lis = soup.select('#m-playlist')
- img_url = lis[0].select('.j-img')[0]['data-src']
- img = requests.get(url=img_url)
- name_id = str(random.random())
- if os.path.exists(r'E:/学习文件/大三上/现代程序设计/第十二次作业/图片/' + name_id) == False:
- os.mkdir(r'E:/学习文件/大三上/现代程序设计/第十二次作业/图片' + name_id)
- f = open('E:/学习文件/大三上/现代程序设计/第十二次作业/图片/' + name_id + + ".jpg", "wb")
- f.write(img.content)
- f.close()
传入的q是多线程中使用的队列,函数末尾使用put()函数传入
- def producer(q,url):
- soup = get_soup(url)
- # 获取包含歌单详情页网址的标签
- #对dec a的class进行选择
- ids = soup.select('.dec a')
- q.put(ids)
- def consumer(q):
- row_topname = ['歌曲标题','id','昵称','简介','歌曲数量','播放量','分享次数','评论次数']
- file = open('playlist.csv', 'w', encoding='utf-8')
- csv_writer = csv.writer(file) #csv格式写入文件file
- csv_writer.writerow(row_topname)
- ids = q.get()
- count = 0
- for id in ids:
- url = 'https://music.163.com/' + id['href'] #生产者传递的id链接
- soup = get_soup(url)
- #使用自定义的解析函数,返回信息列表
- list1 = parsing_information(ids, soup, count)
- #使用自定义的图片保存函数
- save_picture(soup)
- #写入信息进csv文件
- csv_writer.writerow(list1)
- count = count + 1 #计数器
- file.close()
- def get_soup(url):
- '''传入url,得到soup库解析后的结果'''
- response = requests.get(url)
- html = response.text
- soup = BeautifulSoup(html, 'html.parser')
- return soup
-
- def parsing_information(ids,soup,count):
- '''传入上一节的解析内容,对具体信息进行解析'''
- # 获取歌单标题,替换英文分割符
- title = ids[count]['title'].replace(',', ',')
- # 获取歌单id
- id = ids[count]['href']
- #获取创建者昵称
- nickname = soup.select('.s-fc7')[0].get_text()
- #获取介绍
- description = soup.select('p')[1].get_text()
- #获取歌曲数量
- song_num = soup.select('span span')[0].get_text()
- #获取播放量
- play_num = soup.select('strong')[0].get_text()
- #获取分享次数
- share_num = soup.select('a i')[2].get_text()
- #获取评论次数
- comment_num = soup.select('a i')[4].get_text() #评论次数
- list1 = [title,id,nickname,description,song_num,play_num,share_num,comment_num]
- return list1
-
- def save_picture(soup):
- '''这是一个传入soup,保存照片到路径的函数'''
- lis = soup.select('#m-playlist')
- img_url = lis[0].select('.j-img')[0]['data-src']
- img = requests.get(url=img_url)
- name_id = str(random.random())
- if os.path.exists(r'E:/学习文件/大三上/现代程序设计/第十二次作业/图片/' + name_id) == False:
- os.mkdir(r'E:/学习文件/大三上/现代程序设计/第十二次作业/图片' + name_id)
- f = open('E:/学习文件/大三上/现代程序设计/第十二次作业/图片/' + name_id + + ".jpg", "wb")
- f.write(img.content)
- f.close()
-
- def producer(q,url):
- soup = get_soup(url)
- # 获取包含歌单详情页网址的标签
- #对dec a的class进行选择
- ids = soup.select('.dec a')
- q.put(ids)
-
- def consumer(q):
- row_topname = ['歌曲标题','id','昵称','简介','歌曲数量','播放量','分享次数','评论次数']
- file = open('playlist.csv', 'w', encoding='utf-8')
- csv_writer = csv.writer(file) #csv格式写入文件file
- csv_writer.writerow(row_topname)
- ids = q.get()
- count = 0
- for id in ids:
- url = 'https://music.163.com/' + id['href'] #生产者传递的id链接
- soup = get_soup(url)
- #使用自定义的解析函数,返回信息列表
- list1 = parsing_information(ids, soup, count)
- #使用自定义的图片保存函数
- save_picture(soup)
- #写入信息进csv文件
- csv_writer.writerow(list1)
- count = count + 1 #计数器
- file.close()
- if __name__ == '__main__':
- url_list = []
- plist,clist = [],[]
- q = Queue()
- for n in range(0,1355,35):
- url = f'https://music.163.com/discover/playlist/?order=hot&cat=%E8%AF%B4%E5%94%B1&limit=35&offset={n}'
- url_list.append(url)
- for url in url_list:
- p = Thread(target=producer,args=(q,url,))
- plist.append(p)
- for p in plist:
- p.start()
- for t in plist:
- p.join()
- for i in range(100):
- c = Thread(target=consumer,args=(q,))
- clist.append(c)
- for c in clist: #启动线程
- c.start()
- for c in clist:
- q.put(None)