import pandas as pd
读取表格数据
df = pd.read_excel("超市营业额.xlsx")
根据日期分类汇总,按交易额求和汇总并取最小的前3位和交易额的列名
df1 = df.groupby(by="日期", as_index=False).agg({"交易额": "sum"}).nsmallest(3,['交易额'])
print(pd.to_datetime(df1.日期).dt.day_name())
24 Monday
20 Thursday
9 Sunday
Name: 日期, dtype: object
import pandas as pd
df = pd.read_excel("超市营业额.xlsx")
df1 = df.groupby(by="日期", as_index=False).agg({"交易额": "sum"}).nsmallest(3,['交易额'])
print(pd.to_datetime(df1.日期).dt.day_name())
import pandas as pd
读取表格数据
df = pd.read_excel("超市营业额.xlsx")
把所有员工的工号前面增加一位数字
df['工号']=df['工号'].map(lambda s: str(s)[-1] + str(s))
把修改后的数据写入新的文件“超市营业额2_修改工号.xlsx”
df1 = pd.read_excel("超市营业额2_修改工号.xlsx")
查看是否写入
df1 = pd.read_excel("超市营业额2_修改工号.xlsx")
print(df1)
工号 姓名 日期 时段 交易额 柜台
0 11001 张三 2019-03-01 9:00-14:00 1664.0 化妆品
1 21002 李四 2019-03-01 14:00-21:00 954.0 化妆品
2 31003 王五 2019-03-01 9:00-14:00 1407.0 食品
3 41004 赵六 2019-03-01 14:00-21:00 1320.0 食品
4 51005 周七 2019-03-01 9:00-14:00 994.0 日用品
.. ... .. ... ... ... ...
244 21002 李四 2019-03-31 14:00-21:00 859.0 蔬菜水果
245 41004 赵六 2019-03-31 9:00-14:00 1668.0 日用品
246 41004 赵六 2019-03-31 14:00-21:00 1722.0 日用品
247 31003 王五 2019-03-31 9:00-14:00 1274.0 食品
248 61006 钱八 2019-03-31 14:00-21:00 812.0 食品
import pandas as pd
df = pd.read_excel("超市营业额.xlsx")
df['工号']=df['工号'].map(lambda s: str(s)[-1] + str(s))
df.to_excel('超市营业额2_修改工号.xlsx',index = False)
df1 = pd.read_excel("超市营业额2_修改工号.xlsx")
print(df1)
import pandas as pd
读取表格数据
df = pd.read_excel("超市营业额.xlsx")
设置输出格式
writer = pd.ExcelWriter('各员工数据.xlsx')
读取姓名并去重
names = set(df['姓名'])
每个员工的数据占一个 worksheet,并以员工姓名作为worksheet 的标题
for name in names:
df1 = df[df.姓名 == name]
df1.to_excel(writer, sheet_name = name, index = False)
writer.save()
import pandas as pd
df = pd.read_excel("超市营业额.xlsx")
writer = pd.ExcelWriter('各员工数据.xlsx')
names = set(df['姓名'])
for name in names:
df1 = df[df.姓名 == name]
df1.to_excel(writer, sheet_name = name, index = False)
writer.save()
import pandas as pd
import matplotlib.pyplot as pt
import matplotlib.font_manager as fm
读取表格数据并筛选列名为’日期’, ‘柜台’, ‘交易额’,的列
df = pd.read_excel('超市营业额.xlsx',usecols=['日期', '柜台', '交易额'])
根据日期和柜台分类汇总,对交易额进行求和
df = df.groupby(by = ['日期','柜台'], as_index = False).agg({"交易额": "sum"})
以日期横轴,柜台为纵轴,交易额为变化曲线画图
df = df.pivot(index = '日期', columns = '柜台', values = '交易额')
df.plot()
import pandas as pd
import matplotlib.pyplot as pt
import matplotlib.font_manager as fm
df = pd.read_excel('超市营业额.xlsx',usecols=['日期', '柜台', '交易额'])
df = df.groupby(by = ['日期','柜台'], as_index = False).agg({"交易额": "sum"})
df = df.pivot(index = '日期', columns = '柜台', values = '交易额')
df.plot()
import pandas as pd
import matplotlib.pyplot as pt
import matplotlib.font_manager as fm
设置使用中文字体(font)为黑体(SimHei)
pt.rcParams['font.sans-serif'] = ['simhei']
读取表格数据并筛选列名为 ‘柜台’, ‘交易额’,的列
df = pd.read_excel('超市营业额.xlsx',usecols=['柜台', '交易额'])
根据柜台分类汇总,对交易额进行求和
df = df.groupby(by = ['柜台'], as_index = False).agg({"交易额": "sum"})
展示该月各柜台营业额在交易总额中的占比
df = df.plot(x = '柜台', y = '交易额', kind = 'pie', labels = df.柜台.values)
pt.legend()
pt.show()
import pandas as pd
import matplotlib.pyplot as pt
import matplotlib.font_manager as fm
pt.rcParams['font.sans-serif'] = ['simhei']
df = pd.read_excel('超市营业额.xlsx',usecols=['柜台', '交易额'])
df = df.groupby(by = ['柜台'], as_index = False).agg({"交易额": "sum"})
df = df.plot(x = '柜台', y = '交易额', kind = 'pie', labels = df.柜台.values)
pt.legend()
pt.show()
import pandas as pd
import matplotlib.pyplot as pt
import matplotlib.font_manager as fm
设置使用中文字体(font)为黑体(SimHei)
pt.rcParams['font.sans-serif'] = ['simhei']
读取表格数据并筛选列名为’姓名’, ‘柜台’, ‘交易额’,的列
df = pd.read_excel('超市营业额.xlsx',usecols=['姓名','柜台', '交易额'])
筛选姓名为张三的信息
df = df[df.姓名 == '张三']
根据柜台分类汇总,对交易额进行求和
df = df.groupby(by = ['柜台'], as_index = False).agg({"交易额": "sum"})
绘制柱状图展示张三在不同柜台的交易总额
df = df.plot(x = '柜台', y = '交易额', kind = 'bar')
pt.legend()
pt.show()
import pandas as pd
import matplotlib.pyplot as pt
import matplotlib.font_manager as fm
pt.rcParams['font.sans-serif'] = ['simhei']
df = pd.read_excel('超市营业额.xlsx',usecols=['姓名','柜台', '交易额'])
df = df[df.姓名 == '张三']
df = df.groupby(by = ['柜台'], as_index = False).agg({"交易额": "sum"})
df = df.plot(x = '柜台', y = '交易额', kind = 'bar')
pt.legend()
pt.show()