from threading import Thread
from time import sleep
from datetime import datetime
from random import randint
def download(name):
print(f'{name}开始下载:{datetime()}')
sleep(randint(2, 7))
print(f'{name}下载完成:{datetime()}')
if __name__ == '__main__':
t1 = Thread(target=download, args=('明日战纪',))
t2 = Thread(target=download, args=('斗罗大陆',))
t3 = Thread(target=download, args=('独行月球',))
# 示例1一:
t1.start()
t2.start()
t3.start()
# 1. 子线程对象.join() - 阻塞当前线程直到指定子线程任务完成
t1.join()
t2.join()
t3.join()
print('==============全部下载完成!============')
# 示例2:前两个电影都下载完成后才下载第三个电影
t1.start()
t2.start()
t1.join()
t2.join()
t3.start()
t3.join()
print('==============全部下载完成!============')
from threading import Thread, current_thread
from concurrent.futures import ThreadPoolExecutor
# 导入相关模块
from threading import Thread, current_thread
from time import sleep
from datetime import datetime
from random import randint
from concurrent.futures import ThreadPoolExecutor
f'{name}开始下载:{datetime.now()}', current_thread()
# 创建函数模拟下载电影
def download(name):
print(f'{name}开始下载:{datetime.now()}', current_thread())
sleep(randint(2, 7))
print(f'{name}下载结束:{datetime.now()}')
# 开始下载电影
# 方案1:直接使用多线程下载1000个电影
if __name__ == '__main__':
num = 0
for _ in range(11):
# 创建空列表保存每一个子线程
ts = []
for x in range(101):
num += 1
t = Thread(target=download, args=(f'电影{num}',))
ts.append(t)
t.start()
# 循环调取每一个添加的子线程,直到全部运行结束,打印下载完成
for x in ts:
x.join()
print('全部下载完成')
# 方案2:使用线程池下载1000个电影
if __name__ == '__main__':
pool = ThreadPoolExecutor(3)
# 一次添加一个任务: submit
pool.submit(download, '肖生克的救赎')
pool.submit(download, '霸王别姬')
# 一次添加多个任务
pool.map(download, ['V字仇杀队', '恐怖游轮', '沉默的羔羊'])
# 下载任务完成后,可以关闭线程池,等所有任务完成后打印结果
pool.shutdown()
print('==============完成!=============')
# 导入相关模块
import requests
from bs4 import BeautifulSoup
import csv
from concurrent.futures import ThreadPoolExecutor
# 创建访问函数
def get_nte_date(url:str):
headers = {
'user-agent':'Mozilla/4.0 (Windows NT 10.0; Win32; x64) AppleWebKit/522.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
}
response = requests.get(url, headers=headers)
result = response.text
# 直接调用解析函数
analysis_data(result)
# 解析数据
def analysis_data(html:str):
# 每一页数据以列表形式保存
all_data = []
# BeautifulSoup解析数据
soup = BeautifulSoup(html,'lxml')
all_div = soup.select('.grid_view>li>div.item')
for div in all_div:
rank = div.select_one('div.pic>em').text
name = div.select_one('div.hd>a>span.title').text
link = div.select_one('div.pic>a').attrs['href']
info = div.select_one('.bd>p').text.strip().split('\n')[-1].strip()
info_list = info.split('/')
time = info_list[0]
country = info_list[-2]
category = info_list[-1]
score = div.select_one('.rating_num').text
comment_count = div.select('.star>span')[-1].text[:-3]
intro_span = div.select_one('.inq')
if intro_span:
intro = intro_span.text
else:
intro = ''
all_data.append([int(rank), name, link, score, time.strip(), country.strip(), category.strip(), comment_count, intro])
# 将没页数据添加到总表
films.append(all_data)
if __name__ == '__main__':
# 创建总表保存每页数据
films = []
# 创建csv文件实现数据持久化
f = open('file/豆瓣数据.csv', 'w', encoding='utf-8')
writer = csv.writer(f)
writer.writerow(['排名', '电影名称', '链接', '评分', '上映时间', '国家', '类型', '评论数', '简介'])
# 使用线程池提高效率
pool = ThreadPoolExecutor(10)
for i in range(0, 251, 25):
url = f'https://movie.douban.com/top250?start={i}&filter='
pool.submit(get_nte_date, url)
pool.shutdown()
# 排序 利用列表比较原则
films.sort()
# 将数据写入文件
for x in films:
writer.writerows(x)
# 保存文件
f.close()
第一步:找到一个用来放虚拟环境的文件夹
第二步:通过cd指令进入到存放虚拟环境的文件夹中
第三步:创建虚拟环境
第四步:激活虚拟环境
第五步:退出虚拟环境 – deactivate