类似Sql:
select city,max(temperature) from city_weather group by city;
groupby:先对数据分组,然后在每个分组上应用聚合函数,转换函数
本次演示
import pandas as pd
import numpy as np
df = pd.DataFrame({
'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
'C': np.random.randn(8),
'D': np.random.randn(8)
})
print(df)
# 1.groupby中A变成了数据的索引列
# 2.因为要统计sum,但B列不是数字,所以被自动忽略掉
print(df.groupby('A').sum())
# (A,B)成对变成了二级索引
print(df.groupby(['A', 'B']).mean())
print("*" * 32)
print(df.groupby(['A', 'B'], as_index=False).mean())
import pandas as pd
fpath = "../data/tianqi.csv"
df = pd.read_csv(fpath)
# 替换掉温度的后缀
df.loc[:, "bWenDu"] = df["bWenDu"].str.replace("°C", "").astype('int32')
df.loc[:, "yWenDu"] = df["yWenDu"].str.replace("°C", "").astype('int32')
print(df.head())
# 新增一列为月份
df['mouth'] = df['ymd'].str[:7]
print(df.head())
data = df.groupby('mouth')['bWenDu'].max()
print(data)
type(data)
print(type(df.groupby('mouth')))
group_data = df.groupby('mouth').agg({"bWenDu": np.max, "yWenDu": np.min, "aqi": np.mean})
print(group_data)