首先,生成一脚本,读取到所需的excel表格
- import xlrd
-
- data = xlrd.open_workbook('xxxx.xls') # 打开xls文件
- table = data.sheet_by_index(0) # 通过索引获取表格
-
- # 初始化奖项字典
- awards_dict = {
- "一等奖": 0,
- "二等奖": 0,
- "三等奖": 0
- }
-
- # 初始化科目字典
- subjects_dict = {}
-
- # 遍历表格的每一行,跳过表头
- for i in range(1, table.nrows):
- row = table.row_values(i)
- # 是xx大学才进行处理
- if row[2] == 'xx大学':
- # 转为小写,避免首字母不一样带来的误判
- subject = row[4].lower()
- # 获得的奖项
- award = row[5]
- # 加入字典
- if subject not in subjects_dict:
- subjects_dict[subject] = awards_dict.copy()
- subjects_dict[subject][award] += 1
-
- # 将结果写入文件
- with open("result.txt", "w") as f:
- for subject, awards in subjects_dict.items():
- f.write(f"{subject}\n")
- for award, count in awards.items():
- f.write(f"{award}: {count}\n")
- f.write("\n")
我们要GPT根据上面的代码,用直方图展示xx大学在各个类别中的获奖情况,也就是去对这个文本文件进行数据可视化,于是chatGPT就生成了以下的代码
- import xlrd
- import matplotlib.pyplot as plt
- import numpy as np
-
- plt.rcParams['font.sans-serif'] = ['SimHei'] # 设置中文字体,不然显示不了中文
- data = xlrd.open_workbook('xxxx.xls') # 打开xls文件
- table = data.sheet_by_index(0) # 通过索引获取表格
-
- # 初始化奖项字典
- awards_dict = {
- "一等奖": 0,
- "二等奖": 0,
- "三等奖": 0
- }
-
- # 初始化科目字典
- subjects_dict = {}
-
- # 遍历表格的每一行,跳过表头
- for i in range(1, table.nrows):
- row = table.row_values(i)
- if row[2] == 'xx大学':
- # 转为小写,避免首字母不一样带来的误判
- subject = row[4].lower()
- award = row[5]
- if subject not in subjects_dict:
- subjects_dict[subject] = awards_dict.copy()
- subjects_dict[subject][award] += 1
-
- # 直方图展示xx大学在各个类别中的获奖情况
- x_labels = subjects_dict.keys()
- x = np.arange(len(x_labels)) # 使用numpy的arange生成连续的横坐标值
-
- # 提取各个奖项的数量
- y_first = [subjects_dict[subject]['一等奖'] for subject in x_labels]
- y_second = [subjects_dict[subject]['二等奖'] for subject in x_labels]
- y_third = [subjects_dict[subject]['三等奖'] for subject in x_labels]
-
- # 设置颜色
- color_first = 'skyblue'
- color_second = 'lightgreen'
- color_third = 'lightcoral'
-
- # 绘制直方图
- plt.bar(x, y_third, width=0.2, align='center', label='三等奖', color=color_third)
- plt.bar(x + 0.2, y_second, width=0.2, align='center', label='二等奖', color=color_second)
- plt.bar(x - 0.2, y_first, width=0.2, align='center', label='一等奖', color=color_first)
-
- plt.xlabel('类别')
- plt.ylabel('获奖数量')
- plt.title('xx大学在各个类别中的获奖情况')
- plt.xticks(x, x_labels)
- plt.legend()
-
- # 调整图例位置和边框样式
- plt.legend(loc='upper right', frameon=False)
-
- # 设置图形背景色
- plt.gca().set_facecolor('whitesmoke')
-
- # 调整图形布局
- plt.tight_layout()
-
- plt.show()
pyecharts 是一个用于生成 Echarts 图表的类库。 Echarts 是百度开源的一个数据可视化 JS 库。
- import xlrd
- from pyecharts.charts import Bar
- from pyecharts import options as opts
-
- data = xlrd.open_workbook('xxxx.xls') # 打开xls文件
- table = data.sheet_by_index(0) # 通过索引获取表格
-
- # 初始化奖项字典
- awards_dict = {
- "一等奖": 0,
- "二等奖": 0,
- "三等奖": 0
- }
-
- # 初始化科目字典
- subjects_dict = {}
-
- # 遍历表格的每一行,跳过表头
- for i in range(1, table.nrows):
- row = table.row_values(i)
- if row[2] == 'xx大学':
- # 转为小写,避免首字母不一样带来的误判
- subject = row[4].lower()
- award = row[5]
- if subject not in subjects_dict:
- subjects_dict[subject] = awards_dict.copy()
- subjects_dict[subject][award] += 1
-
- # 直方图展示xx大学在各个类别中的获奖情况
- x_labels = subjects_dict.keys()
-
- # 提取各个奖项的数量
- y_first = [subjects_dict[subject]['一等奖'] for subject in x_labels]
- y_second = [subjects_dict[subject]['二等奖'] for subject in x_labels]
- y_third = [subjects_dict[subject]['三等奖'] for subject in x_labels]
-
- # 使用 Pyecharts 绘制直方图
- bar = (
- Bar()
- .add_xaxis(list(x_labels))
- .add_yaxis('一等奖', y_first)
- .add_yaxis('二等奖', y_second)
- .add_yaxis('三等奖', y_third)
- .set_global_opts(
- xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=45)),
- yaxis_opts=opts.AxisOpts(name='获奖数量'),
- title_opts=opts.TitleOpts(title='xx大学在各个类别中的获奖情况'),
- legend_opts=opts.LegendOpts(pos_right='5%', pos_top='20%')
- )
- )
-
- # 生成图表并保存为 HTML 文件
- bar.render('bar_chart.html')