1、简单介绍自定义函数
def 函数名(参数1,参数2)
‘’’ 函数注释’‘’ 函数体
return 函数返回值
def jiafa(a,b):#a,b形式参数
c=a+b
return c
if __name__ == '__main__':
z=jiafa(1,2)#调用函数,1,2实参
print(z)
2、采用xpath解析官网数据,完整代码如下:
import requests
from lxml import etree
import csv
def qingqiu(url):#请求
headers={'User-Agent':'Mozilla/5.0'}
response=requests.get(url,headers=headers)
response.encoding='utf-8'
html=response.text
return html,headers
def jiexi(html,headers):#解析
select=etree.HTML(html)#创建解析器
title=select.xpath('//*[@class="text"]/h3/a/@title')
shijian=select.xpath('//*[@class="option"]/text()')
addr=select.xpath('//*[@class="text"]/h3/a/@href')
addrs=[]
for a in addr:
d='https://www.qnzy.net/'+a
addrs.append(d)
dianji_all=[]
for j in addrs:
res_d = requests.get(j, headers=headers)
res_d.encoding = 'utf-8'
res_d = res_d.text
se= etree.HTML(res_d) # 创建解析器
# #class="item_views"
dianji=se.xpath('//*[@class="item_views"]/text()')
# IndexError: list index out of range:越界
if len(dianji)!=0:
dianji=dianji
else:
dianji=['0']
# print(dianji)
dianji=int(dianji[0])
dianji_all.append(dianji)
all=zip(title,shijian,addrs,dianji_all)
return all
def save(result):#保存
with open('new_all.csv',"a",newline='',encoding='utf-8') as f:
wr=csv.writer(f)
# wr.writerow(T_head)
for a in result:
wr.writerow(a)
if __name__ == '__main__':#python的程序入口
T_head = ['标题', '时间', '地址', '点击']
with open('new_all.csv', "a", newline='', encoding='utf-8') as f:
wr = csv.writer(f)
wr.writerow(T_head)
start_ye = int(input("请输入起始页:"))
end_ye = int(input("请输入终止页:"))
for y in range(start_ye, end_ye):
print("正在爬取第{}页".format(y))
if y == 1:
url = 'https://www.qnzy.net/html/1090/'
else:
url = 'https://www.qnzy.net/html/1090/list-{}.html'.format(y)
html,headers=qingqiu(url) #调用请求函数
result=jiexi(html,headers)#调用解析函数
save(result)#调用保存函数