import csv
import requests
import re
import json
import pprint
f = open('51job.csv',mode='a',encoding='ANSI',newline='')
csv_writer = csv.DictWriter(f,fieldnames=['职位信息','基本信息','公司名称','公司类型','公司规模','公司性质','公司福利','职位薪资','发布日期','职位详情'])
csv_writer.writeheader()
for page in range(1,6):
url = f'https://search.51job.com/list/151100%252c010000,000000,0000,00,9,99,python,2,{page}.html?lang=c&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare='
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.81 Safari/537.36 Edg/104.0.1293.47'
}
response = requests.get(url = url,headers=headers)
html_data = re.findall('window\.__SEARCH_RESULT__ = (.*?)',response.text)[0]
json_data = json.loads(html_data)
search_result = json_data['engine_jds']
for index in search_result :
title = index['job_name']
attribute_text = index['attribute_text']
job_info = '|'.join(attribute_text)
company_name = index['company_name']
company_text = index['companyind_text']
company_size_text = index['companysize_text']
company_type_text = index['companytype_text']
job_welf = index['jobwelf']
providesalary_text = index['providesalary_text']
updatedate = index['updatedate']
job_href = index['job_href']
dit = {
'职位信息' : title,
'基本信息' : attribute_text,
'公司名称' : company_name,
'公司类型' : company_text,
'公司规模': company_size_text,
'公司性质': company_type_text,
'公司福利': job_welf,
'职位薪资': providesalary_text,
'发布日期': updatedate,
'职位详情': job_href
}
print(dit)
csv_writer.writerow(dit)
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68