运行代码时,它会打开斗鱼网站并逐个打印每个房间的相关信息 打印出每个房间的标题、类型、所有者、观看人数和封面图片
- import time
-
- from selenium import webdriver
-
-
- class Douyu(object):
- def __init__(self):
- self.url = 'https://www.douyu.com/directory/all'
- self.driver = webdriver.Edge()
-
- def parse_data(self):
- time.sleep(3)
- room_list = self.driver.find_elements_by_xpath('//*[@id="listAll"]/section[2]/div[2]/ul/li/div')
- # print(len(room_list))
- # 遍历
- data_list = []
- for room in room_list:
- temp = {}
- temp['title'] = room.find_element_by_xpath('./a/div[2]/div[1]/h3').text
- temp['type'] = room.find_element_by_xpath('./a/div[2]/div[1]/span').text
- temp['owner'] = room.find_element_by_xpath('./a/div[2]/div[2]/h2').text
- temp['num'] = room.find_element_by_xpath('./a/div[2]/div[2]/span').text
- temp['picture'] = room.find_element_by_xpath('./a/div[1]/div[1]/picture/img').get_attribute('src')
- # print(temp)
- data_list.append(temp)
- return data_list
-
- def save_data(self, data_list):
- for data in data_list:
- print((data))
-
- def run(self):
- # url
- # driver
- # get
- self.driver.get(self.url)
- while True:
- # parse
- data_list = self.parse_data()
- # save
- self.save_data(data_list)
- # next page
- # try:
- el_next = self.driver.find_element_by_xpath('//*[@class= "dy-Pagination-next"]')
- self.driver.execute_script('scrollTo(0,10000000)')
- el_next.click()
- # except Exception:
- # break
-
- #
-
-
- if __name__ == '__main__':
- douyu = Douyu()
- douyu.run()