# create a functiondefgender_to_numeric(x):if x =='M':return1if x =='F':return0# apply() 使用时,通常放入一个 lambda 函数表达式、或一个函数作为操作运算# apply the function to the gender column and create a new column
users['gender_n']= users['gender'].apply(gender_to_numeric)
a = users.groupby('occupation').gender_n.sum()/ users.occupation.value_counts()*100# sort to the most male
a.sort_values(ascending =False)
# create a data frame and apply count to gender# 根据'occupation'与'gender'两项做groupby分组,然后根据gender做计数统计
gender_ocup = users.groupby(['occupation','gender']).agg({'gender':'count'})
gender_ocup.head()
1
2
3
4
# create a DataFrame and apply count for each occupation
occup_count = users.groupby(['occupation']).agg('count')
occup_count.head()
1
2
3
# divide the gender_ocup per the occup_count and multiply per 100# div() 方法将 DataFrame 中的每个值除以指定的值。
occup_gender = gender_ocup.div(occup_count, level ="occupation")*100
occup_gender.head()
1
2
3
4
# present all rows from the 'gender column'
occup_gender.loc[:,'gender']
# Group the dataframe by regiment, and for each regiment,for name, group in regiment.groupby('regiment'):# print the name of the regimentprint(name)# print the data of that regimentprint(group)