本期我们利用Python分析 30000+条电影影评数据,分析一下由沈腾、马丽主演的《独行月球》这部电影观影情况,看看大家对这部国产灾难电影都有哪些看法,希望对小伙伴们有所帮助,如有疑问或者需要改进的地方可以私信小编。
涉及到的库:
可视化部分:
import jieba
import stylecloud
import pandas as pd
from PIL import Image
from collections import Counter
from pyecharts.charts import Geo
from pyecharts.charts import Scatter
from pyecharts.charts import Line
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
from pyecharts.globals import ChartType
import math
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Wedge, Polygon
from matplotlib.offsetbox import TextArea, AnnotationBbox
plt.rcParams['font.sans-serif']=['Microsoft YaHei']
plt.rcParams['axes.unicode_minus']=False
df = pd.read_excel('独行月球.xlsx')
df.head(10)

df.shape()
(34436, 6)
一共有34436条影评数据,数据时间分布2022-8-01至2021-08-07。
df.info()

def pic():
df_star = df.groupby('评分')['评论'].count()
count_sco = list(df_star.values)
sum_count = sum(count_sco)
data = [count_sco[0]/sum_count/2]
for i in range(1,len(count_sco),2):
rate = (count_sco[i] + count_sco[i+1])/sum_count
data.append(rate/2)
series = pd.Series(data)
label = ['5分力荐', '4分推荐','3分还行','2分较差','1分很差','0分极差']
colors = ['#009688','#26A69A','#4DB6AC','#80CBC4','#B2DFDB','#E0F2F1'][::-1]
rate_lable = [f'{i*2:.2%}' for i in data]
fig = plt.figure(figsize=(10,7),dpi = 100,facecolor='#FFEFD5')
ax=plt.subplot(111,facecolor='#FFEFD5', alpha=.01)
patches,l_text = ax.pie(
series,
colors = colors,
wedgeprops = dict(width = 0.4),
startangle = 0,
textprops = dict(fontsize = 15),
normalize = False
)
# 数据标签
ax.text(-0.45, 0.6, rate_lable[5], fontsize = 18,rotation = 298)
ax.text(0.53, 0.36, rate_lable[4], fontsize = 17,rotation = 35)
ax.text(0.64, 0.20, rate_lable[3], fontsize = 16,rotation = 21)
ax.text(0.7, 0.048, rate_lable[1], fontsize = 14,rotation = 5)
semicircle=Wedge((0,0),1.05,0,180, edgecolor = 'grey', facecolor = 'None', linestyle='--')
ax.add_artist(semicircle)
plt.axis('equal')
semicircle=Wedge((0,0),0.55,0,180, edgecolor = 'grey', facecolor = 'None', linestyle='-')
ax.add_artist(semicircle)
plt.axis('equal')
x = [-1.05,1.05]
y = [0,0]
plt.plot(x, y,color="#FFEFD5",alpha=1)
plt.plot(x, y,color="#FFEFD5",alpha=1)
plt.text(-0.43, 0.25, '《独行月球》评分分布', fontsize = 20, color = "r"),
plt.text(-0.32, 0.4, '(制作@公众号:Python当打之年)', color = "#283593", fontsize = 10,alpha = 0.8),
plt.legend(patches, label[::-1], loc=0, ncol = 6, fontsize = 11,
bbox_to_anchor=(0.92, 0.15),handlelength=1, handleheight=1)

复古版海报:

line1 = (
Line(init_opts=opts.InitOpts(bg_color=JsCode(color_js)))
.add_xaxis(xaxis_data=day_x_data)
.add_yaxis(
series_name="",
y_axis=day_y_data,
is_smooth=True,
is_symbol_show=True,
symbol="circle",
symbol_size=6,
linestyle_opts=opts.LineStyleOpts(color="#fff"),
label_opts=opts.LabelOpts(is_show=True, position="top", color="white"),
itemstyle_opts=opts.ItemStyleOpts(
color="red", border_color="#fff", border_width=3
),
tooltip_opts=opts.TooltipOpts(is_show=False),
)
.set_global_opts(
title_opts=opts.TitleOpts(
title="每日评论量",
pos_top="5%",
pos_left="center",
title_textstyle_opts=opts.TextStyleOpts(color="#fff", font_family='FZYaoTi', font_size=16),
),
xaxis_opts=opts.AxisOpts(
type_="category",
boundary_gap=True,
axislabel_opts=opts.LabelOpts(margin=30, color="#ffffff63"),
axisline_opts=opts.AxisLineOpts(is_show=False),
),
yaxis_opts=opts.AxisOpts(
type_="value",
position="left",
axislabel_opts=opts.LabelOpts(margin=20, color="#ffffff63"),
axisline_opts=opts.AxisLineOpts(
linestyle_opts=opts.LineStyleOpts(width=2, color="#fff")
),
),
legend_opts=opts.LegendOpts(is_show=False),
)
)


single_axis, titles = [], []
scatter = Scatter(init_opts=opts.InitOpts(width='1000px', height='800px', theme='light',bg_color='#0d0735'))
for idx, day in enumerate(weeks[::-1]):
scatter.add_xaxis(xaxis_data=hours)
single_axis.append({'left': 100,
'nameGap': 20,
'nameLocation': 'start',
'type': 'category',
'boundaryGap': False,
'data': hours,
'top': '{}%'.format(idx * 100 / 7 + 5),
'height': '{}%'.format(100 / 7 - 10),
'gridIndex': idx,
'axisLabel': {'interval': 2,'color':'#9FC131'},
})
titles.append(dict(text=day,top='{}%'.format(idx * 100 / 7 + 6), left='2%',
textStyle=dict(color='#fff200', font_family='FZYaoTi')))
scatter.add_yaxis('',
y_axis=[int(item[2]) for item in allinfo if item[0] == 7-idx],
symbol_size=JsCode('function(p) { return p[1] * 0.15;}'),
label_opts=opts.LabelOpts(is_show=False),
)
scatter.set_global_opts(
xaxis_opts=opts.AxisOpts(is_show=False),
yaxis_opts=opts.AxisOpts(is_show=False),
title_opts=titles,
)

geo = (
Geo(init_opts=opts.InitOpts(width="1000px", height="600px", bg_color="#0d0735"))
.add_schema(maptype="china")
.add("评论数量", data,type_=ChartType.HEATMAP,)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(
title_opts=opts.TitleOpts(title="地理位置分布",pos_top="2%", pos_left="center",
title_textstyle_opts=opts.TextStyleOpts(color="#fff", font_family='FZYaoTi',font_size=16)),
legend_opts=opts.LegendOpts(is_show=False),
visualmap_opts=opts.VisualMapOpts(
is_show=True,
is_piecewise=True,
min_ = 0,
max_ = 50,
split_number = 5,
series_index=0,
pos_bottom='5%',
pos_left='5%',
textstyle_opts=opts.TextStyleOpts(color="#fff"),
),
)
)

stylecloud.gen_stylecloud(
text=’ ‘.join(content_list),
font_path=r’STXINWEI.TTF’,
palette=‘cartocolors.diverging.TealRose_2’,
icon_name=‘fas fa-moon’, # 设置蒙版方案
output_name=‘独行月球.png’,
background_color=‘#FFEFD5’,
)

网盘: https://pan.baidu.com/doc/share/Olj4d~aKuXT7AF0cq01MrQ-437060019167360
提取码: pyra
以上就是本期为大家整理的全部内容了,赶快练习起来吧,原创不易,喜欢的朋友可以点赞、收藏也可以分享(注明出处)让更多人知道。