安装lxml
pip3 install lxml
基本用法
import reauests
from lxml import etree
url = 'xxx'
res = reuests.get(url).text
html = etree.HTML(res)
# 获取所有div标签
xpath = '//div'
print(html.xpath(xpath))
#获取id=xx的div标签下的class=yy的span标签
xpath = '//div[@id="xx"]//span[@class="yy"]'
print(html.xpath(xpath))
#获取文本
xpath = '//div[@id="xx"]//span[@class="yy"]//text()'
print(html.xpath(xpath))
#获取属性 href
xpath = '//div[@id="xx"]//span[@class="yy"]//@href'
print(html.xpath