BeautifulSoup下载小说
链接:
link
import requests
from bs4 import BeautifulSoup
def chapters():
base_url = ""
resp = requests.get(base_url)
resp.encoding = "utf-8"
bf = BeautifulSoup(resp.text, "html.parser")
data = []
for li in bf.find("ul", id="htmlChapterList").find_all("li"):
link_a = li.find("a")
if not link_a:
continue
data.append((link_a["href"], link_a.get_text()))
print(data)
return data
def content_novel(url):
res = requests.get(url)
res.encoding = "UTF-8"
bf = BeautifulSoup(res.text, "html.parser")
if not bf.find("div", id="htmlContent"):
return None
return bf.find("div", id="htmlContent").get_text()
novels = chapters()
numbers = len(novels)
ids = 0
for novel_title in chapters():
url, title = novel_title
ids += 1
print("download: "+str(ids)+","+"total:"+ str(numbers))
with open("%s.txt"%title, 'w', encoding="UTF-8") as fwrite:
if not content_novel(url):
continue
fwrite.write(content_novel(url))
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46