
- import requests
- import json
- import openpyxl
-
- url = 'https://www.ptpress.com.cn/recommendBook/getRecommendBookListForPortal?bookTagId=d5cbb56d-09ef-41f5-9110-ced741048f5f'
- headers = {
- 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
- '(KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36 Edg/95.0.1020.44',
- 'Cookie':'gr_user_id=796019e3-dc58-40f5-a6df-892a38008bcd; '
- 'acw_tc=2760822416373059896443147efcf3dd457a5539d63a07fdafd12f3041cd93; '
- 'JSESSIONID=A0FD72E84771D06417CF145392DAA679; '
- 'gr_session_id_9311c428042bb76e=1a1d8cc2-0de9-4409-adc4-07de4cdb503f;'
- ' gr_session_id_9311c428042bb76e_1a1d8cc2-0de9-4409-adc4-07de4cdb503f=true'
- }
- text_json = requests.get(url=url,headers=headers)
- res = json.loads(text_json.content)
- def save_execl(res):
- wb1 = openpyxl.Workbook()
- sheet = wb1.active
- sheet.title = "人民邮电新书推荐"
- title = ['书名', '作者', '价格']
- sheet.append(title)
-
- for re in res['data']:
- author, discountPrice = json_detail(re['bookId'])
- sheet.append([re['bookName'], author, discountPrice])
-
- wb1.save('生活类新书基本信息.xlsx')
-
- def json_detail(bookid):
- url = 'https://www.ptpress.com.cn/bookinfo/getBookDetailsById'
- bookid = bookid
- params = {
- 'bookId': bookid,
- }
- text_json = requests.post(url=url, headers=headers, params=params)
- res = json.loads(text_json.content)['data']
- author = res['author']
- discountPrice = res['discountPrice']
- print(res['bookName'], author, discountPrice)
- return author, discountPrice
-
- save_execl(res)
爬取结果:


