• pyechart练习三:黑色星期五用户画像


    一、概述

    数据集: 537577行 × 12列。

    二、可视化

    1、年龄/性别的各自用户占比

    1. import pandas as pd
    2. from pyecharts.charts import Pie, Bar, Grid, Page, Boxplot
    3. import pyecharts.options as opts
    4. df = pd.read_csv('./BlackFridaySales.csv')
    5. age_data = df['Age'].value_counts().to_dict().items()
    6. gender_data = df['Gender'].value_counts().to_dict().items()
    7. gender_age_data = df.groupby(['Gender', 'Age']).agg({'User_ID': pd.Series.nunique})
    8. print(gender_age_data.loc['F', :]['User_ID'])
    9. pie = Pie()
    10. pie2 = Pie()
    11. pie.add('', list(age_data), radius=['20%','30%'],center=['10%','50%'], label_opts=opts.LabelOpts(formatter='{d}%'))
    12. pie.set_global_opts(title_opts=opts.TitleOpts(title='用户的年龄占比', pos_left='3%'),
    13. legend_opts=opts.LegendOpts(is_show=False))
    14. pie.render('aaa.html')
    15. pie2.add('', list(gender_data), radius=['20%','30%'],center=['30%','50%'], label_opts=opts.LabelOpts(formatter='{b}:{d}%', position='inside'))
    16. pie2.set_global_opts(title_opts=opts.TitleOpts(title='用户的性别占比', pos_left='23%'),
    17. legend_opts=opts.LegendOpts(is_show=False))
    18. pie2.render('bbb.html')
    19. bar = Bar()
    20. bar.add_xaxis(list(gender_age_data.loc['F', :].index))
    21. bar.add_yaxis('F', list(gender_age_data.loc['F', :].User_ID), stack='s1', label_opts=opts.LabelOpts(position='inside'))
    22. bar.add_yaxis('M', list(gender_age_data.loc['M', :].User_ID), stack='s1', label_opts=opts.LabelOpts(position='inside'))
    23. bar.set_global_opts(title_opts=opts.TitleOpts(title='用户性别&年龄占比',pos_left='60%'),
    24. legend_opts=opts.LegendOpts(pos_right='right'))
    25. bar.render('ccc.html')

    水平展现多图

    1. grid = Grid()
    2. grid.add(bar, grid_opts=opts.GridOpts(pos_left='45%'))
    3. grid.add(pie, grid_opts=opts.GridOpts(pos_left='10'))
    4. grid.add(pie2, grid_opts=opts.GridOpts(pos_left='30%'))
    5. grid.render('ddd.html')

     

    垂直展现多图

    1. page = Page()
    2. page.add(pie, pie2, bar)
    3. page.render('eee.html')

    2、婚姻状况/城市类别/消费情况的占比

    1. marriage = df['Marital_Status'].value_counts()
    2. print(marriage.index)
    3. marriage.rename(index={0: '未婚', 1: '已婚'}, inplace=True)
    4. marriage_data = list(marriage.to_dict().items())
    5. print(marriage)
    6. p1 = Pie()
    7. p1.add('', marriage_data, radius=['20%', '30%'], center=['20%','50%'], label_opts=opts.LabelOpts(formatter='{b}:{d}%'))
    8. p1.set_global_opts(title_opts=opts.TitleOpts(title='用户婚姻状况占比'),
    9. legend_opts=opts.LegendOpts(type_='plain', pos_top='10%', pos_left='left'))
    10. p1.render('fff.html')
    11. city_data = df['City_Category'].value_counts().to_dict().items()
    12. print(city_data)
    13. p2 = Pie()
    14. p2.add('', list(city_data), radius=['20%', '30%'], center=['50%','50%'], label_opts=opts.LabelOpts(formatter='{b}:{d}%'))
    15. p2.set_global_opts(title_opts=opts.TitleOpts(title='城市类别占比',pos_right='45%'),
    16. legend_opts=opts.LegendOpts(pos_top='10%', pos_right='right'))
    17. pie.render('ggg.html')
    18. purchase_data = df.groupby(['City_Category']).agg({'Purchase': sum}).to_dict()['Purchase'].items()
    19. print(purchase_data)
    20. p3 = Pie()
    21. p3.add('',list(purchase_data), radius=['20%','30%'],center=['80%','50%'],label_opts=opts.LabelOpts(formatter='{b}:{c}$'))
    22. p3.set_global_opts(title_opts=opts.TitleOpts('消费占比-城市类别', pos_right='right'),
    23. legend_opts=opts.LegendOpts(is_show=False))
    24. p3.render('hhh.html')
    25. ''' 水平多图 '''
    26. grid = Grid()
    27. grid.add(p1,grid_opts=opts.GridOpts(pos_left='10%'))
    28. grid.add(p2,grid_opts=opts.GridOpts(pos_left='30%'))
    29. grid.add(p3,grid_opts=opts.GridOpts(pos_left='70%'))
    30. grid.render('iii.html')

     

    3、当前城市停留时间&性别的用户对应的平均消费

    1. data = df.groupby(['Gender','Stay_In_Current_City_Years']).Purchase.mean().round(0)
    2. print(list(data.loc['F', :]))
    3. b = Bar()
    4. b.add_xaxis(list(data.loc['F', :].index))
    5. b.add_yaxis('F', list(data.loc['F', :]), color='#FFCF43')
    6. b.add_yaxis('M', list(data.loc['M', :]), color='#6CBEFD')
    7. b.set_global_opts(title_opts=opts.TitleOpts(title='当前城市停留时间&性别的用户对应的平均消费', pos_left='center'),
    8. xaxis_opts=opts.AxisOpts(name='当前城市\n停留时间'),
    9. yaxis_opts=opts.AxisOpts(name='平均消费', min_=8600),
    10. legend_opts=opts.LegendOpts(pos_top='7%'))
    11. b.render('jjj.html')

     

    4、箱型图-购买力(性别/年龄)

    1. f = df.groupby(['Gender','Age','User_ID']).Purchase.mean().round(0).reset_index()
    2. data_x = f.Age.unique().tolist()
    3. data_f = [list(f[(f.Gender == 'F') & (f.Age == i)].Purchase) for i in data_x]
    4. data_m = [list(f[(f.Gender == 'M') & (f.Age == i)].Purchase) for i in data_x]
    5. bp = Boxplot()
    6. bp.add_xaxis(data_x)
    7. ''' bp.prepare_data:依次计算箱子的最小值、Q1、中位数、Q3、最大值
    8. opts.ItemStyleOpts.border_color的颜色与图例的颜色是一致的 '''
    9. bp.add_yaxis('F',bp.prepare_data(data_f),
    10. itemstyle_opts=opts.ItemStyleOpts(color='#EEFDD3',border_color='#56C71C'))
    11. bp.add_yaxis('M',bp.prepare_data(data_m),
    12. itemstyle_opts=opts.ItemStyleOpts(color='#FEF8D8',border_color='#D6A12D'))
    13. bp.set_global_opts(legend_opts=opts.LegendOpts(legend_icon='circle'))
    14. bp.render('kkk.html')

     

  • 相关阅读:
    浏览器安全级别怎么设置,设置浏览器安全级别的方法
    【Redis学习笔记】第十章 Redis高级数据类型
    【LeetCode-112】路径总和
    ZCC5429 异步升压芯片
    createSocketTask:fail wcwss url not in domain list 小程序网络异常
    聊一聊被 .NET程序员 遗忘的 COM 组件
    总结:常见的服务发布策略
    消息队列,推拉模式的区别在哪?
    The Sandbox阐释对元宇宙平台的愿景
    2023年金九银十网络安全考试试题
  • 原文地址:https://blog.csdn.net/bb8886/article/details/132839369