免责声明:本文仅做技术交流与学习...
爬取后,结合暗黑搜索引擎等等进行进一步搜索.
- import requests, time
- from bs4 import BeautifulSoup
- for i in range(1, 20):
- url = f'https://src.sjtu.edu.cn/rank/firm/0/?page={i}'
- print(f"正在获取第{i}页数据")
- s = requests.get(url).text
- # print(s)
- soup = BeautifulSoup(s, 'html.parser')
- edu1 = soup.find_all('tr')
- # print(edu1)
- for edu in edu1:
- try:
- edu_name = edu.a.text
- # print(edu_name)
- with open('edu_name.txt', 'a+',encoding='utf-8') as f:
- f.write(edu_name + '\n')
- except:
- pass
- print(f"{i}页已经写入!!!")