import requests
response=requests.get('https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js?ts=2767546')
#该网址为数据接口网址
result = response.json()
#将数据接口网址进入后的数据进行json解析
for x in result['hero']:
print(x['name'])
import requests
def download(img_url: str, name: str):
res = requests.get(img_url)
with open(f'files/{name}.jpg', 'wb') as f:
f.write(res.content)
if __name__ == '__main__':
response = requests.get('https://game.gtimg.cn/images/lol/act/img/js/hero/1.js')
result = response.json()
for x in result['skins']:
name = x['name']
img_url = x['mainImg']
if not img_url:
img_url = x['chromaImg']
download(img_url, name)
# 用代码创建文件夹:
import os
if not os.path.exists('所有英雄的皮肤/abc'):
os.mkdir('所有英雄的皮肤/abc')
# 1.获取所有英雄的id
def get_all_hero_id():
url = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'
res = requests.get(url).json()
return [x['heroId'] for x in res['hero']]
def get_one_hero_skins(hero_id: str):
# 1. 请求指定英雄对应的数据
url = f'https://game.gtimg.cn/images/lol/act/img/js/hero/{hero_id}.js'
res = requests.get(url)
result = res.json()
# 2. 创建这个英雄对应的文件夹
hero_name = result['hero']['name']
if not os.path.exists(f'所有英雄的皮肤/{hero_name}'):
os.mkdir(f'所有英雄的皮肤/{hero_name}')
# 3. 下载这个英雄所有的皮肤
# 1)遍历拿到每个皮肤的名称和地址
for skin in result['skins']:
skin_name = skin['name'].replace('/', '') # 防止皮肤名称中出现'/'
skin_img = skin['mainImg']
if not skin_img:
skin_img = skin['chromaImg']
# 2) 下载一张图片
res = requests.get(skin_img)
with open(f'所有英雄的皮肤/{hero_name}/{skin_name}.jpg', 'wb') as f:
f.write(res.content)
print('下载完成!')
if __name__ == '__main__':
ids = get_all_hero_id()
for x in ids[:5]:
get_one_hero_skins(x)
from selenium.webdriver import Chrome #控制浏览器模块
查看浏览器版本:chrome://version/
1)创建浏览器对象(浏览器对象如果是全局变量,浏览器不会自动关闭)
b = Chrome()
2)打开网页(你需要爬的数据在哪个网页里面,就打开哪个网页)
b.get('https://movie.douban.com/top250')
3)获取网页源代码(获取到的一定是页面中加载出来的)
print(b.page_source)
4)关闭浏览器
b.close()
from selenium.webdriver import Chrome
from time import sleep
b = Chrome()
b.get('https://www.jd.com')
1)找到输入框
input_tag = b.find_element_by_id('key')
2)输入框输入内容
input_tag.send_keys('电脑\n')
sleep(2)
print(b.page_source)
1)找到需要点击的标签
btn = b.find_element_by_css_selector('#navitems-group2 .b')
2)点击标签
btn.click()
input('是否结束:')
b.close()
from selenium.webdriver import Chrome
from bs4 import BeautifulSoup
from selenium.webdriver import Chrome
from time import sleep
from bs4 import BeautifulSoup
import csv
b = Chrome()
b.get('https://www.jd.com')
input_tag = b.find_element_by_id('key')
input_tag.send_keys('毛线\n')
sleep(1)
all_data = []
# 解析第一页数据
soup = BeautifulSoup(b.page_source, 'lxml')
all_goods_div = soup.select('#J_goodsList>ul>li>div.gl-i-wrap')
for x in all_goods_div:
name = x.select_one('.p-name em').text
price = x.select_one('.p-price i').text
all_data.append([name, price])
# 点击下一页
next_btn = b.find_element_by_class_name('pn-next')
next_btn.click()
sleep(1)
# 解析第二页数据
soup = BeautifulSoup(b.page_source, 'lxml')
all_goods_div = soup.select('#J_goodsList>ul>li>div.gl-i-wrap')
for x in all_goods_div:
name = x.select_one('.p-name em').text
price = x.select_one('.p-price i').text
all_data.append([name, price])
writer = csv.writer(open('files/毛线.csv', 'w', encoding='utf-8', newline=''))
writer.writerows(all_data)
input('结束:')
b.close()
# 1. 基本操作
b = Chrome() # 创建浏览器
b.get('https://www.cnki.net/') # 打开中国知网
search_tag = b.find_element_by_id('txt_SearchText') # 获取输入框
search_tag.send_keys('数据分析\n') # 输入框输入'数据分析',然后按回车
sleep(1) # 切换界面最后做一个等待操作
# 获取需要点击的所有标签: 如果拿到标签后需要点击或者输入,必须通过浏览器获取标签
all_result = b.find_elements_by_css_selector('.result-table-list .name>a')
# 点击第一个结果(这儿会打开一个新的选项卡)
all_result[0].click()
sleep(1)
# 2. 切换选项卡
# 注意:selenium中,浏览器对象(b)默认指向一开始打开的选项卡,除非用代码切换,否则浏览器对象指向的选项卡不会变
# 1)获取当前浏览器上所有的窗口(选项卡): 浏览器.window_handles
# 2)切换选项卡
b.switch_to.window(b.window_handles[-1])
# 3)解析内容
soup = BeautifulSoup(b.page_source, 'lxml')
result = soup.select_one('#ChDivSummary').text
print(result)
b.close() # 关闭当前指向的窗口(最后一个窗口),窗口关闭后,浏览器对象的指向不会发生改变
# 回到第一个窗口,点击下一个搜索结果
b.switch_to.window(b.window_handles[0])
all_result[1].click()
sleep(1)
b.switch_to.window(b.window_handles[-1])
soup = BeautifulSoup(b.page_source, 'lxml')
result = soup.select_one('#ChDivSummary').text
print(result)
b.close()
input('结束:')
b.close()
from selenium.webdriver import Chrome
from time import sleep
from bs4 import BeautifulSoup
import csv
def ziw():
b = Chrome()
b.get('https://www.cnki.net')
input_list = b.find_element_by_id('txt_SearchText')
input_list.send_keys('数据分析\n')
sleep(1)
list_all=[]
for i in range(0,20):
all_result = b.find_elements_by_css_selector('.result-table-list .name>a')
all_result[i].click()
sleep(1)
b.switch_to.window(b.window_handles[-1])
first = BeautifulSoup(b.page_source,'lxml')
try:
result1 = first.select_one('#ChDivSummary').text
except:
result1 = first.select_one('.wx-tit').text
result2 = first.select_one('.wx-tit h1').text
list_all.append([result2,result1])
b.close()
b.switch_to.window(b.window_handles[0])
b.close()
return list_all
def file(list_all):
with open('files/中国知网摘要前20.csv','w',encoding='utf-8',newline='') as f:
writer = csv.writer(f)
writer.writerow(['题名','摘要'])
writer.writerows(list_all)
if __name__ == '__main__':
list_all = ziw()
file(list_all)