因为使用到了一些第三方的包,包还是比较大的如果直接从社区下载比较费劲,所以建议配置国内镜像源,这里以清华的镜像源为例。
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
pip config set global.trusted_host pypi.tuna.tsinghua.edu.cn
安装第三方包
pip install pandas bs4 pyecharts
从网页爬取数据
import requests
from bs4 import BeautifulSoup
# 从url获取信息
response = requests.get('url地址')
# 解析HTML内容
soup = BeautifulSoup(response.text, 'html.parser')
soup.get_text()
1、从网页爬取每日最新数据
2、将数据追加存储到excel
# !/usr/bin/python
# -*-coding:utf-8 -*-
"""
File : spider.py
Time : 2024/1/17 10:00
Author : 天选之子
Email :
version : python 3.10.11
Description :
"""
import datetime
import os
import numpy
import requests
from bs4 import BeautifulSoup
import pandas
url = "https://qt.gtimg.cn"
fund_code_list = ['sh510300', 'sz159995']
target_path = r'excel存储位置'
sheet_name = '存储的excel的sheet页'
def get_fund_close_price(url_str):
"""
获取基金收盘价
"""
response = requests.get(url_str)
soup = BeautifulSoup(response.text, 'html.parser')
if soup is None:
return '没找到'
else:
market = soup.get_text()
return market.split('~')[4]
def df_combiner(targetpath, dataframe, sheetname):
"""
将来源excel合并到目标excel
:param targetpath:
:param dataframe:
:param sheetname:
:return: dataframe合并后的最终excel对象
"""
if not os.path.exists(targetpath):
return None
tar_data_frame = pandas.DataFrame(pandas.read_excel(targetpath, sheet_name=sheetname, keep_default_na=False))
tar_date = set(numpy.array(tar_data_frame['日期']).tolist())
to_date = set(numpy.array(dataframe['日期']).tolist())
if to_date.issubset(tar_date):
tar_df_all = tar_data_frame
else:
if tar_data_frame is None:
return dataframe
tar_df_all = pandas.concat([tar_data_frame, dataframe], ignore_index=True)
return tar_df_all
def main():
df_all = None
for fund_code in fund_code_list:
fund_url = f'{url}/?q={fund_code}'
result = get_fund_close_price(fund_url)
print("该只基金的收盘价为:", result)
print(datetime.datetime.now().strftime('%Y-%m-%d'))
# 创建一个DataFrame
data = {'日期': [datetime.datetime.now().strftime('%Y%m%d')], '基金代码': [fund_code], '收盘价': [result]}
df = pandas.DataFrame(data)
df_all = pandas.concat([df_all, df], ignore_index=True)
target_df = df_combiner(target_path, df_all, sheet_name)
excel_writer = pandas.ExcelWriter(target_path)
target_df.to_excel(excel_writer, index=False, sheet_name='基金收盘价')
excel_writer.close()
if __name__ == '__main__':
main()
将获取的excel数据展示成折线图,这里使用的是pyecharts,不多废话直接上代码
# https://pyecharts.org/#/zh-cn/quickstart
import numpy
import pandas
from pyecharts import options as opts
from pyecharts.charts import Line, Page
target_path = r'excel位置'
sheet_name = '基金收盘价'
fund_code_list = ['sh510300', 'sz159995']
tar_data_frame = pandas.DataFrame(pandas.read_excel(target_path, sheet_name=sheet_name, keep_default_na=False))
tar_data_frame = tar_data_frame.sort_values(by=['日期','基金代码'], ascending=[True, True])
data_date = list(set(numpy.array(tar_data_frame['日期']).tolist()))
data_date.sort()
sh510300_date = numpy.array(tar_data_frame.loc[tar_data_frame['基金代码'] == 'sh510300']['收盘价']).tolist()
sz159995_date = numpy.array(tar_data_frame.loc[tar_data_frame['基金代码'] == 'sz159995']['收盘价']).tolist()
# 创建柱状图
bar_chart = Line()
bar_chart.add_xaxis(data_date)
title_opts = opts.TitleOpts(title="月度销售额折线图"),
bar_chart.add_yaxis(series_name='sh510300'
, linestyle_opts=opts.LineStyleOpts(color="red", width=2, is_show=True)
, itemstyle_opts=opts.ItemStyleOpts(color="red", border_width=1)
, is_smooth=True
, y_axis=sh510300_date)
bar_chart.add_yaxis(series_name='sz159995_date'
, linestyle_opts=opts.LineStyleOpts(color="yellow", width=2, is_show=True)
, itemstyle_opts=opts.ItemStyleOpts(color="yellow", border_width=1)
, is_smooth=True
, y_axis=sz159995_date)
bar_chart.set_global_opts(title_opts=opts.TitleOpts(title="基金收盘价格走势", is_show=True, pos_left='center')
, xaxis_opts=opts.AxisOpts(name="月份")
, yaxis_opts=opts.AxisOpts(name="收盘价(元)", min_=0, max_=10)
, legend_opts=opts.LegendOpts(pos_left='right'))
page = Page(page_title='基金收盘价')
page.add(line_chart)
# 渲染图表到 HTML 文件
page.render("基金收盘价.html")
展示结果如下: