1)线程和进程
进程:一个正在运行的应用程序,每个进程运行其专门的且受保护的内存空间
线程:线程就是进程执行任务的基本单元(一个进程中的任务都是在线程中执行的)
进程就是车间,线程就是车间的工人
一个线程中默认有一个线程,这个叫做主线程
2)线程的特点
如果一个线程中执行多个任务,任务是串行执行的(当一个程序中有很多个任务的时候,如果只有一个线程,那么执行效率会很低)
3)多线程
一个进程中有多个线程就是多线程。多个线程执行任务的时候,多个任务同时(并行)执行。
4)多线程原理:
一个CPU同一时间只能调度一个线程,多线程其实就是CPU快速在多个线程之间切换,造成多个线程同时执行的假象(实际上是提高CPU的利用率)
一个进程默认只有一个线程,这个线程叫做主线程,如果有多个线程,主线程以外的叫做子线程
from threading import Thread
from datetime import datetime
from time import sleep
# 模拟电影下载。不创建多线程的情况
from threading import Thread
from datetime import datetime
from time import sleep
# 模拟电影下载。不创建多线程的情况
def doenload(name):
print(f'{name}正在下载:{datetime.now()}')
sleep(2)
print(f'{name}下载结束:{datetime.now()}')
if __name__=='__main__':
doenload("天下第一")
doenload("斗罗大陆")
doenload("猪猪侠")
#天下第一正在下载:2022-08-18 16:13:26.708714
#天下第一下载结束:2022-08-18 16:13:28.722060
#斗罗大陆正在下载:2022-08-18 16:13:28.722060
#斗罗大陆下载结束:2022-08-18 16:13:30.722841
#猪猪侠正在下载:2022-08-18 16:13:30.722841
#猪猪侠下载结束:2022-08-18 16:13:32.730465
from threading import Thread
from datetime import datetime
from time import sleep
# 模拟电影下载。创建多线程的情况
def doenload(name):
print(f'{name}正在下载:{datetime.now()}')
sleep(2)
print(f'{name}下载结束:{datetime.now()}')
if __name__=='__main__':
t1 = Thread(target=doenload, args=("天下第一",))
t2 = Thread(target=doenload, args=('斗罗大陆',))
t3 = Thread(target=doenload, args=("猪猪侠",))
t1.start()
t2.start()
t3.start()
天下第一正在下载:2022-08-18 16:15:08.847120
斗罗大陆正在下载:2022-08-18 16:15:08.847120
猪猪侠正在下载:2022-08-18 16:15:08.847120
天下第一下载结束:2022-08-18 16:15:10.859699猪猪侠下载结束:2022-08-18 16:15:10.859699
斗罗大陆下载结束:2022-08-18 16:15:10.859699
创建线程对象:
线程对象: = Thread(target = 函数,args = 元祖)
创建线程后要启动线程
"""
Author: 余婷
Time: 2022/8/18 16:30
Good Good Study, Day Day Up!
"""
import requests
from datetime import datetime
from threading import Thread
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
}
def get_one_page_data(page):
print(f'第{page}页开始获取:{datetime.now()}')
url = fr'https://movie.douban.com/top250?start={page}&filter='
response = requests.get(url, headers=headers)
print('下载完成!')
print(f'第{page}页开始获取:{datetime.now()}')
if __name__ == '__main__':
for page in range(0, 256, 25):
t = Thread(target=get_one_page_data, args=(page,))
t.start()
import requests
from lxml import etree
import csv
def get_net_data():
url = 'https://movie.douban.com/top250'
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
}
response = requests.get(url, headers=headers)
return response.text
def analysis_data1(html: str):
root = etree.HTML(html)
names = root.xpath('//ol[@class="grid_view"]/li/div/div[@class="info"]/div[1]/a/span[1]/text()')
scores = root.xpath('//ol[@class="grid_view"]/li/div/div[@class="info"]/div[2]/div/span[@class="rating_num"]/text()')
comment_count = root.xpath('//ol[@class="grid_view"]/li/div/div[@class="info"]/div[2]/div/span[last()]/text()')
info = root.xpath('//ol[@class="grid_view"]/li/div/div[@class="info"]/div[2]/p[@class="quote"]/span/text()')
all_data = list(map(lambda i1, i2, i3, i4: [i1, i2, i3, i4], names, scores, comment_count, info))
with open('files/电影.csv', 'w', encoding='utf-8', newline='') as f:
writer = csv.writer(f)
writer.writerow(['电影名称', '评分', '评论数', '简介'])
writer.writerows(all_data)
def analysis_data2(html: str):
root = etree.HTML(html)
all_film_div = root.xpath('//ol[@class="grid_view"]/li/div')
all_data = []
for div in all_film_div:
name = div.xpath('./div[@class="info"]/div[1]/a/span[1]/text()')[0]
score = div.xpath('./div[@class="info"]/div[2]/div/span[2]/text()')[0]
comment_count = div.xpath('./div[@class="info"]/div[2]/div/span[last()]/text()')[0]
info = div.xpath('./div[@class="info"]/div[2]/p/span/text()')[0]
all_data.append([name, score, comment_count, info])
# print(name, score, comment_count, info)
# print('------------------------------------------华丽的分割线------------------------------------------------')
with open('files/电影2.csv', 'w', encoding='utf-8', newline='') as f:
writer = csv.writer(f)
writer.writerow(['电影名称', '评分', '评论数', '简介'])
writer.writerows(all_data)
if __name__ == '__main__':
data = get_net_data()
analysis_data2(data)