168、分组统计(1)
该题目使用的表和数据如下:
/*
DROP TABLE IF EXISTS tb_user_video_log, tb_video_info;
CREATE TABLE tb_user_video_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
video_id INT NOT NULL COMMENT '视频ID',
start_time datetime COMMENT '开始观看时间',
end_time datetime COMMENT '结束观看时间',
if_follow TINYINT COMMENT '是否关注',
if_like TINYINT COMMENT '是否点赞',
if_retweet TINYINT COMMENT '是否转发',
comment_id INT COMMENT '评论ID'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_video_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
video_id INT UNIQUE NOT NULL COMMENT '视频ID',
author INT NOT NULL COMMENT '创作者ID',
tag VARCHAR(16) NOT NULL COMMENT '类别标签',
duration INT NOT NULL COMMENT '视频时长(秒数)',
release_time datetime NOT NULL COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_video_log(uid, video_id, start_time, end_time, if_follow, if_like, if_retweet, comment_id) VALUES
(101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:30', 0, 1, 1, null),
(102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:24', 0, 0, 1, null),
(103, 2001, '2021-10-01 11:00:00', '2021-10-01 11:00:34', 0, 1, 0, 1732526),
(101, 2002, '2021-09-01 10:00:00', '2021-09-01 10:00:42', 1, 0, 1, null),
(102, 2002, '2021-10-01 11:00:00', '2021-10-01 11:00:30', 1, 0, 1, null);
INSERT INTO tb_video_info(video_id, author, tag, duration, release_time) VALUES
(2001, 901, '影视', 30, '2021-01-01 7:00:00'),
(2002, 901, '美食', 60, '2021-01-01 7:00:00'),
(2003, 902, '旅游', 90, '2021-01-01 7:00:00');
*/
用户-视频互动表:tb_user_video_log(uid:用户ID, video_id:视频ID, start_time:开始观看时间,end_time:结束观看时间,if_follow:是否关注,if_like:是否点赞,if_retweet:是否转发,comment_id:评论ID),表中数据如下:
mysql> select * from tb_user_video_log;
+----+-----+----------+---------------------+---------------------+-----------+---------+--------+
| id | uid | video_id | start_time | end_time | if_follow | if_like | if_retweet | comment_id |
+----+-----+----------+---------------------+---------------------+-----------+---------+--------+
| 1 | 101 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:30 | 0 | 1 | 1 | NULL |
| 2 | 102 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:24 | 0 | 0 | 1 | NULL |
| 3 | 103 | 2001 | 2021-10-01 11:00:00 | 2021-10-01 11:00:34 | 0 | 1 | 0 | 1732526 |
| 4 | 101 | 2002 | 2021-09-01 10:00:00 | 2021-09-01 10:00:42 | 1 | 0 | 1 | NULL |
| 5 | 102 | 2002 | 2021-10-01 11:00:00 | 2021-10-01 11:00:30 | 1 | 0 | 1 | NULL |
+----+-----+----------+---------------------+----------+-----------+---------+-----------------+
5 rows in set (0.03 sec)
短视频信息表:tb_video_info(video_id:视频ID,author:创作者ID,tag:类别标签,duration:视频时长(秒), release_time:发布时间),表中数据如下:
mysql> select * from tb_video_info;
+----+----------+--------+--------+----------+---------------------+
| id | video_id | author | tag | duration | release_time |
+----+----------+--------+--------+----------+---------------------+
| 1 | 2001 | 901 | 影视 | 30 | 2021-01-01 07:00:00 |
| 2 | 2002 | 901 | 美食 | 60 | 2021-01-01 07:00:00 |
| 3 | 2003 | 902 | 旅游 | 90 | 2021-01-01 07:00:00 |
+----+----------+--------+--------+----------+---------------------+
3 rows in set (0.00 sec)
【问题】计算 2021 年有播放记录的每个视频的完播率(结果保留三位小数),按完播率降序排序。
说明:视频完播率是指完成播放次数占总播放次数的比例。即:结束观看时间与开始播放时间的差 >= 视频时长,视为完成播放。
查询结果如下:
| video_id | avg_comp_play_rate |
|---|---|
| 2001 | 0.667 |
| 2002 | 0.000 |
解答:
/*
select b.video_id, round(ifnull(a.finish_cnt, 0)/b.cnt, 3) avg_comp_play_rate
from (select tu.video_id, count(*) finish_cnt
from tb_user_video_log tu join tb_video_info tv on tu.video_id = tv.video_id
where year(start_time) = 2021 and
right(timediff(end_time, start_time),2) + substr(timediff(end_time, start_time),4,2)*60
>= tv.duration
group by tu.video_id) a right join
(select video_id, count(*) cnt
from tb_user_video_log
group by video_id) b
on a.video_id = b.video_id
order by avg_comp_play_rate desc;
*/
mysql> select b.video_id, round(ifnull(a.finish_cnt, 0)/b.cnt, 3) avg_comp_play_rate
-> from (select tu.video_id, count(*) finish_cnt
-> from tb_user_video_log tu join tb_video_info tv on tu.video_id = tv.video_id
-> where year(start_time) = 2021 and
-> right(timediff(end_time, start_time),2) + substr(timediff(end_time, start_time),4,2)*60
-> >= tv.duration
-> group by tu.video_id) a right join
-> (select video_id, count(*) cnt
-> from tb_user_video_log
-> group by video_id) b
-> on a.video_id = b.video_id
-> order by avg_comp_play_rate desc;
+----------+--------------------+
| video_id | avg_comp_play_rate |
+----------+--------------------+
| 2001 | 0.667 |
| 2002 | 0.000 |
+----------+--------------------+
2 rows in set (0.00 sec)
169、分组统计(2)
该题目使用的表和数据如下:
/*
DROP TABLE IF EXISTS tb_user_video_log, tb_video_info;
CREATE TABLE tb_user_video_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
video_id INT NOT NULL COMMENT '视频ID',
start_time datetime COMMENT '开始观看时间',
end_time datetime COMMENT '结束观看时间',
if_follow TINYINT COMMENT '是否关注',
if_like TINYINT COMMENT '是否点赞',
if_retweet TINYINT COMMENT '是否转发',
comment_id INT COMMENT '评论ID'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_video_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
video_id INT UNIQUE NOT NULL COMMENT '视频ID',
author INT NOT NULL COMMENT '创作者ID',
tag VARCHAR(16) NOT NULL COMMENT '类别标签',
duration INT NOT NULL COMMENT '视频时长(秒数)',
release_time datetime NOT NULL COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_video_log(uid, video_id, start_time, end_time, if_follow, if_like, if_retweet, comment_id) VALUES
(101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:30', 0, 1, 1, null),
(102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:21', 0, 0, 1, null),
(103, 2001, '2021-10-01 11:00:50', '2021-10-01 11:01:20', 0, 1, 0, 1732526),
(102, 2002, '2021-10-01 11:00:00', '2021-10-01 11:00:30', 1, 0, 1, null),
(103, 2002, '2021-10-01 10:59:05', '2021-10-01 11:00:05', 1, 0, 1, null);
INSERT INTO tb_video_info(video_id, author, tag, duration, release_time) VALUES
(2001, 901, '影视', 30, '2021-01-01 7:00:00'),
(2002, 901, '美食', 60, '2021-01-01 7:00:00'),
(2003, 902, '旅游', 90, '2020-01-01 7:00:00');
*/
用户-视频互动表:tb_user_video_log(uid:用户ID, video_id:视频ID, start_time:开始观看时间,end_time:结束观看时间,if_follow:是否关注,if_like:是否点赞,if_retweet:是否转发,comment_id:评论ID),表中数据如下:
mysql> select * from tb_user_video_log;
+----+-----+----------+---------------------+---------------------+-----------+---------+-------+
| id | uid | video_id | start_time | end_time | if_follow | if_like | if_retweet | comment_id |
+----+-----+----------+---------------------+---------------------+-----------+---------+-------+
| 1 | 101 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:30 | 0 | 1 | 1 | NULL |
| 2 | 102 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:21 | 0 | 0 | 1 | NULL |
| 3 | 103 | 2001 | 2021-10-01 11:00:50 | 2021-10-01 11:01:20 | 0 | 1 | 0 | 1732526 |
| 4 | 102 | 2002 | 2021-10-01 11:00:00 | 2021-10-01 11:00:30 | 1 | 0 | 1 | NULL |
| 5 | 103 | 2002 | 2021-10-01 10:59:05 | 2021-10-01 11:00:05 | 1 | 0 | 1 | NULL |
+----+-----+----------+---------------------+---------------------+-----------+---------+-------+
5 rows in set (0.00 sec)
短视频信息表:tb_video_info(video_id:视频ID,author:创作者ID,tag:类别标签,duration:视频时长(秒), release_time:发布时间),表中数据如下:
mysql> select * from tb_video_info;
+----+----------+--------+--------+----------+---------------------+
| id | video_id | author | tag | duration | release_time |
+----+----------+--------+--------+----------+---------------------+
| 1 | 2001 | 901 | 影视 | 30 | 2021-01-01 07:00:00 |
| 2 | 2002 | 901 | 美食 | 60 | 2021-01-01 07:00:00 |
| 3 | 2003 | 902 | 旅游 | 90 | 2020-01-01 07:00:00 |
+----+----------+--------+--------+----------+---------------------+
3 rows in set (0.00 sec)
【问题】计算各类视频的平均播放进度,将进度大于 60% 的类别输出。
说明:
(1)播放进度 = 播放时长 ÷ 视频时长 * 100%,当播放时长大于视频时长时,播放进度均记为 100%。
(2)结果保留两位小数,并按播放进度倒序排序。
查询结果如下:
| tag | avg_play_progress |
|---|---|
| 影视 | 90.00% |
| 美食 | 75.00% |
解答:
/*
select tv.tag, concat(round( avg(
if(
(right(timediff(tu.end_time, tu.start_time),2) +
substr(timediff(tu.end_time, tu.start_time),4,2)*60
) / tv.duration >= 1, 1,
(right(timediff(tu.end_time, tu.start_time),2) +
substr(timediff(tu.end_time, tu.start_time),4,2)*60) / tv. duration
) * 100
), 2),'%') avg_play_progress
from tb_user_video_log tu join tb_video_info tv on tu.video_id = tv.video_id
group by tv.tag having avg_play_progress > '60.00%'
order by avg_play_progress desc;
*/
mysql> select tv.tag, concat(round( avg(
-> if(
-> (right(timediff(tu.end_time, tu.start_time),2) +
-> substr(timediff(tu.end_time, tu.start_time),4,2)*60
-> ) / tv.duration >= 1, 1,
-> (right(timediff(tu.end_time, tu.start_time),2) +
-> substr(timediff(tu.end_time, tu.start_time),4,2)*60) / tv. duration
-> ) * 100
-> ), 2),'%') avg_play_progress
-> from tb_user_video_log tu join tb_video_info tv on tu.video_id = tv.video_id
-> group by tv.tag having avg_play_progress > '60.00%'
-> order by avg_play_progress desc;
+--------+-------------------+
| tag | avg_play_progress |
+--------+-------------------+
| 影视 | 90.00% |
| 美食 | 75.00% |
+--------+-------------------+
2 rows in set (0.00 sec)
170、分组统计(3)
该题目使用的表和数据如下:
/*
DROP TABLE IF EXISTS tb_user_video_log, tb_video_info;
CREATE TABLE tb_user_video_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
video_id INT NOT NULL COMMENT '视频ID',
start_time datetime COMMENT '开始观看时间',
end_time datetime COMMENT '结束观看时间',
if_follow TINYINT COMMENT '是否关注',
if_like TINYINT COMMENT '是否点赞',
if_retweet TINYINT COMMENT '是否转发',
comment_id INT COMMENT '评论ID'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_video_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
video_id INT UNIQUE NOT NULL COMMENT '视频ID',
author INT NOT NULL COMMENT '创作者ID',
tag VARCHAR(16) NOT NULL COMMENT '类别标签',
duration INT NOT NULL COMMENT '视频时长(秒数)',
release_time datetime NOT NULL COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_video_log(uid, video_id, start_time, end_time, if_follow, if_like, if_retweet, comment_id) VALUES
(101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:20', 0, 1, 1, null)
,(102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:15', 0, 0, 1, null)
,(103, 2001, '2021-10-01 11:00:50', '2021-10-01 11:01:15', 0, 1, 0, 1732526)
,(102, 2002, '2021-09-10 11:00:00', '2021-09-10 11:00:30', 1, 0, 1, null)
,(103, 2002, '2021-10-01 10:59:05', '2021-10-01 11:00:05', 1, 0, 0, null);
INSERT INTO tb_video_info(video_id, author, tag, duration, release_time) VALUES
(2001, 901, '影视', 30, '2021-01-01 7:00:00')
,(2002, 901, '美食', 60, '2021-01-01 7:00:00')
,(2003, 902, '旅游', 90, '2020-01-01 7:00:00');
*/
用户-视频互动表:tb_user_video_log(uid:用户ID, video_id:视频ID, start_time:开始观看时间,end_time:结束观看时间,if_follow:是否关注,if_like:是否点赞,if_retweet:是否转发,comment_id:评论ID),表中数据如下:
mysql> select * from tb_user_video_log;
+----+-----+----------+---------------------+---------------------+-----------+---------+--------+
| id | uid | video_id | start_time | end_time | if_follow | if_like | if_retweet | comment_id |
+----+-----+----------+---------------------+---------------------+-----------+---------+--------+
| 1 | 101 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:20 | 0 | 1 | 1 | NULL |
| 2 | 102 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:15 | 0 | 0 | 1 | NULL |
| 3 | 103 | 2001 | 2021-10-01 11:00:50 | 2021-10-01 11:01:15 | 0 | 1 | 0 | 1732526 |
| 4 | 102 | 2002 | 2021-09-10 11:00:00 | 2021-09-10 11:00:30 | 1 | 0 | 1 | NULL |
| 5 | 103 | 2002 | 2021-10-01 10:59:05 | 2021-10-01 11:00:05 | 1 | 0 | 0 | NULL |
+----+-----+----------+---------------------+---------------------+-+------------+------------+
5 rows in set (0.00 sec)
短视频信息表:tb_video_info(video_id:视频ID,author:创作者ID,tag:类别标签,duration:视频时长(秒), release_time:发布时间),表中数据如下:
mysql> select * from tb_video_info;
+----+----------+--------+--------+----------+---------------------+
| id | video_id | author | tag | duration | release_time |
+----+----------+--------+--------+----------+---------------------+
| 1 | 2001 | 901 | 影视 | 30 | 2021-01-01 07:00:00 |
| 2 | 2002 | 901 | 美食 | 60 | 2021-01-01 07:00:00 |
| 3 | 2003 | 902 | 旅游 | 90 | 2020-01-01 07:00:00 |
+----+----------+--------+--------+----------+---------------------+
3 rows in set (0.00 sec)
【问题】统计在有用户互动的最近一个月(按包含当天在内的近 30 天算,比如 10 月 31 日的近 30 天为 10.2~10.31 之间的数据)中,每类视频的转发量和转发率(保留 3 位小数)。
说明:转发率=转发量 ÷ 播放量。结果按转发率降序排序。查询结果如下:
| tag | retweet_cut | retweet_rate |
|---|---|---|
| 影视 | 2 | 0.667 |
| 美食 | 1 | 0.500 |
解答:
/*
select a.tag, a.retweet_cnt, round(a.retweet_cnt / b.cnt, 3) retweet_rate
from (select tv.tag, count(*) retweet_cnt
from tb_user_video_log tu join tb_video_info tv on tu.video_id = tv.video_id
where tu.start_time >= (select adddate(max(start_time), -30) tb_user_video_log)
and if_retweet = 1
group by tv.tag) a join
(select tv.tag, count(*) cnt
from tb_user_video_log tu join tb_video_info tv on tu.video_id = tv.video_id
where tu.start_time >= (select adddate(max(start_time), -30) tb_user_video_log)
group by tv.tag) b
on a.tag = b.tag
order by retweet_rate desc;
*/
mysql> select a.tag, a.retweet_cnt, round(a.retweet_cnt / b.cnt, 3) retweet_rate
-> from (select tv.tag, count(*) retweet_cnt
-> from tb_user_video_log tu join tb_video_info tv on tu.video_id = tv.video_id
-> where tu.start_time >= (select adddate(max(start_time), -30) tb_user_video_log)
-> and if_retweet = 1
-> group by tv.tag) a join
-> (select tv.tag, count(*) cnt
-> from tb_user_video_log tu join tb_video_info tv on tu.video_id = tv.video_id
-> where tu.start_time >= (select adddate(max(start_time), -30) tb_user_video_log)
-> group by tv.tag) b
-> on a.tag = b.tag
-> order by retweet_rate desc;
+--------+-------------+--------------+
| tag | retweet_cnt | retweet_rate |
+--------+-------------+--------------+
| 影视 | 2 | 0.667 |
| 美食 | 1 | 0.500 |
+--------+-------------+--------------+
2 rows in set (0.00 sec)
171、分组统计(4)
该题目使用的表和数据如下:
/*
DROP TABLE IF EXISTS tb_user_video_log, tb_video_info;
CREATE TABLE tb_user_video_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
video_id INT NOT NULL COMMENT '视频ID',
start_time datetime COMMENT '开始观看时间',
end_time datetime COMMENT '结束观看时间',
if_follow TINYINT COMMENT '是否关注',
if_like TINYINT COMMENT '是否点赞',
if_retweet TINYINT COMMENT '是否转发',
comment_id INT COMMENT '评论ID'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_video_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
video_id INT UNIQUE NOT NULL COMMENT '视频ID',
author INT NOT NULL COMMENT '创作者ID',
tag VARCHAR(16) NOT NULL COMMENT '类别标签',
duration INT NOT NULL COMMENT '视频时长(秒数)',
release_time datetime NOT NULL COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_video_log(uid, video_id, start_time, end_time, if_follow, if_like, if_retweet, comment_id) VALUES
(101, 2001, '2021-09-01 10:00:00', '2021-09-01 10:00:20', 0, 1, 1, null)
,(105, 2002, '2021-09-10 11:00:00', '2021-09-10 11:00:30', 1, 0, 1, null)
,(101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:20', 1, 1, 1, null)
,(102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:15', 0, 0, 1, null)
,(103, 2001, '2021-10-01 11:00:50', '2021-10-01 11:01:15', 1, 1, 0, 1732526)
,(106, 2002, '2021-10-01 10:59:05', '2021-10-01 11:00:05', 2, 0, 0, null);
INSERT INTO tb_video_info(video_id, author, tag, duration, release_time) VALUES
(2001, 901, '影视', 30, '2021-01-01 7:00:00')
,(2002, 901, '影视', 60, '2021-01-01 7:00:00')
,(2003, 902, '旅游', 90, '2020-01-01 7:00:00')
,(2004, 902, '美女', 90, '2020-01-01 8:00:00');
*/
用户-视频互动表:tb_user_video_log(uid:用户ID, video_id:视频ID, start_time:开始观看时间,end_time:结束观看时间,if_follow:是否关注,if_like:是否点赞,if_retweet:是否转发,comment_id:评论ID),表中数据如下:
mysql> select * from tb_user_video_log;
+----+-----+----------+---------------------+---------------------+-----------+---------+--------+
| id | uid | video_id | start_time | end_time | if_follow | if_like | if_retweet | comment_id |
+----+-----+----------+---------------------+---------------------+-----------+---------+--------+
| 1 | 101 | 2001 | 2021-09-01 10:00:00 | 2021-09-01 10:00:20 | 0 | 1 | 1 | NULL |
| 2 | 105 | 2002 | 2021-09-10 11:00:00 | 2021-09-10 11:00:30 | 1 | 0 | 1 | NULL |
| 3 | 101 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:20 | 1 | 1 | 1 | NULL |
| 4 | 102 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:15 | 0 | 0 | 1 | NULL |
| 5 | 103 | 2001 | 2021-10-01 11:00:50 | 2021-10-01 11:01:15 | 1 | 1 | 0 | 1732526 |
| 6 | 106 | 2002 | 2021-10-01 10:59:05 | 2021-10-01 11:00:05 | 2 | 0 | 0 | NULL |
+----+-----+----------+---------------------+---------------------+-----------+---------+--------+
6 rows in set (0.00 sec)
短视频信息表:tb_video_info(video_id:视频ID,author:创作者ID,tag:类别标签,duration:视频时长(秒), release_time:发布时间),表中数据如下:
mysql> select * from tb_video_info;
+----+----------+--------+--------+----------+---------------------+
| id | video_id | author | tag | duration | release_time |
+----+----------+--------+--------+----------+---------------------+
| 1 | 2001 | 901 | 影视 | 30 | 2021-01-01 07:00:00 |
| 2 | 2002 | 901 | 影视 | 60 | 2021-01-01 07:00:00 |
| 3 | 2003 | 902 | 旅游 | 90 | 2020-01-01 07:00:00 |
| 4 | 2004 | 902 | 美女 | 90 | 2020-01-01 08:00:00 |
+----+----------+--------+--------+----------+---------------------+
4 rows in set (0.00 sec)
【问题】计算 2021 年里每个创作者每月的涨粉率及截止当月的总粉丝量。
说明:
(1)涨粉率=(加粉量 - 掉粉量) / 播放量。结果按创作者ID、总粉丝量升序排序。
(2)if_follow(是否关注)为 1 表示用户观看视频中关注了视频创作者,为 0 表示此次互动前后关注状态未发生变化,为 2 表示本次观看过程中取消了关注。
查询结果如下:
| author | month | fans_growth_rate | total_fans |
|---|---|---|---|
| 901 | 2021-09 | 0.500 | 1 |
| 901 | 2021-10 | 0.250 | 2 |
结果:
/*
select a.author, a.month,
round(a.fans_growth / b.cnt, 3) fans_growth_rate,
(select @a := @a + a.fans_growth) total_fans
from (select tv.author,
concat(left(start_time,4), '-', substr(start_time, 6,2)) month,
sum(case if_follow when 1 then 1 when 0 then 0 when 2 then -1 end) fans_growth
from tb_user_video_log tu join tb_video_info tv on tu.video_id = tv.video_id
group by tv.author, month) a join
(select tv.author,
concat(left(start_time,4), '-', substr(start_time, 6,2)) month,
count(*) cnt
from tb_user_video_log tu join tb_video_info tv on tu.video_id = tv.video_id
group by tv.author, month) b
on a.author = b.author and a.month = b.month
join (select @a := 0) c
order by a.author, total_fans;
*/
mysql> select a.author, a.month,
-> round(a.fans_growth / b.cnt, 3) fans_growth_rate,
-> (select @a := @a + a.fans_growth) total_fans
-> from (select tv.author,
-> concat(left(start_time,4), '-', substr(start_time, 6,2)) month,
-> sum(case if_follow when 1 then 1 when 0 then 0 when 2 then -1 end) fans_growth
-> from tb_user_video_log tu join tb_video_info tv on tu.video_id = tv.video_id
-> group by tv.author, month) a join
-> (select tv.author,
-> concat(left(start_time,4), '-', substr(start_time, 6,2)) month,
-> count(*) cnt
-> from tb_user_video_log tu join tb_video_info tv on tu.video_id = tv.video_id
-> group by tv.author, month) b
-> on a.author = b.author and a.month = b.month
-> join (select @a := 0) c
-> order by a.author, total_fans;
+--------+---------+------------------+------------+
| author | month | fans_growth_rate | total_fans |
+--------+---------+------------------+------------+
| 901 | 2021-09 | 0.500 | 1 |
| 901 | 2021-10 | 0.250 | 2 |
+--------+---------+------------------+------------+
2 rows in set (0.03 sec)
172、分组统计(5)
该题目使用的表和数据如下:
/*
DROP TABLE IF EXISTS tb_user_video_log, tb_video_info;
CREATE TABLE tb_user_video_log (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid INT NOT NULL COMMENT '用户ID',
video_id INT NOT NULL COMMENT '视频ID',
start_time datetime COMMENT '开始观看时间',
end_time datetime COMMENT '结束观看时间',
if_follow TINYINT COMMENT '是否关注',
if_like TINYINT COMMENT '是否点赞',
if_retweet TINYINT COMMENT '是否转发',
comment_id INT COMMENT '评论ID'
) CHARACTER SET utf8 COLLATE utf8_bin;
CREATE TABLE tb_video_info (
id INT PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
video_id INT UNIQUE NOT NULL COMMENT '视频ID',
author INT NOT NULL COMMENT '创作者ID',
tag VARCHAR(16) NOT NULL COMMENT '类别标签',
duration INT NOT NULL COMMENT '视频时长(秒数)',
release_time datetime NOT NULL COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_bin;
INSERT INTO tb_user_video_log(uid, video_id, start_time, end_time, if_follow, if_like, if_retweet, comment_id) VALUES
(101, 2001, '2021-09-24 10:00:00', '2021-09-24 10:00:20', 1, 1, 0, null)
,(105, 2002, '2021-09-25 11:00:00', '2021-09-25 11:00:30', 0, 0, 1, null)
,(102, 2002, '2021-09-25 11:00:00', '2021-09-25 11:00:30', 1, 1, 1, null)
,(101, 2002, '2021-09-26 11:00:00', '2021-09-26 11:00:30', 1, 0, 1, null)
,(101, 2002, '2021-09-27 11:00:00', '2021-09-27 11:00:30', 1, 1, 0, null)
,(102, 2002, '2021-09-28 11:00:00', '2021-09-28 11:00:30', 1, 0, 1, null)
,(103, 2002, '2021-09-29 11:00:00', '2021-09-29 11:00:30', 1, 0, 1, null)
,(102, 2002, '2021-09-30 11:00:00', '2021-09-30 11:00:30', 1, 1, 1, null)
,(101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:20', 1, 1, 0, null)
,(102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:15', 0, 0, 1, null)
,(103, 2001, '2021-10-01 11:00:50', '2021-10-01 11:01:15', 1, 1, 0, 1732526)
,(106, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:05', 2, 0, 1, null)
,(107, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:05', 1, 0, 1, null)
,(108, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:05', 1, 1, 1, null)
,(109, 2002, '2021-10-03 10:59:05', '2021-10-03 11:00:05', 0, 1, 0, null);
INSERT INTO tb_video_info(video_id, author, tag, duration, release_time) VALUES
(2001, 901, '旅游', 30, '2020-01-01 7:00:00')
,(2002, 901, '旅游', 60, '2021-01-01 7:00:00')
,(2003, 902, '影视', 90, '2020-01-01 7:00:00')
,(2004, 902, '美女', 90, '2020-01-01 8:00:00');
*/
用户-视频互动表:tb_user_video_log(uid:用户ID, video_id:视频ID, start_time:开始观看时间,end_time:结束观看时间,if_follow:是否关注,if_like:是否点赞,if_retweet:是否转发,comment_id:评论ID),表中数据如下:
mysql> select * from tb_user_video_log;
+----+-----+----------+---------------------+---------------------+-----------+---------+--------+
| id | uid | video_id | start_time | end_time | if_follow | if_like | if_retweet | comment_id |
+----+-----+----------+---------------------+---------------------+-----------+---------+--------+
| 1 | 101 | 2001 | 2021-09-24 10:00:00 | 2021-09-24 10:00:20 | 1 | 1 | 0 | NULL |
| 2 | 105 | 2002 | 2021-09-25 11:00:00 | 2021-09-25 11:00:30 | 0 | 0 | 1 | NULL |
| 3 | 102 | 2002 | 2021-09-25 11:00:00 | 2021-09-25 11:00:30 | 1 | 1 | 1 | NULL |
| 4 | 101 | 2002 | 2021-09-26 11:00:00 | 2021-09-26 11:00:30 | 1 | 0 | 1 | NULL |
| 5 | 101 | 2002 | 2021-09-27 11:00:00 | 2021-09-27 11:00:30 | 1 | 1 | 0 | NULL |
| 6 | 102 | 2002 | 2021-09-28 11:00:00 | 2021-09-28 11:00:30 | 1 | 0 | 1 | NULL |
| 7 | 103 | 2002 | 2021-09-29 11:00:00 | 2021-09-29 11:00:30 | 1 | 0 | 1 | NULL |
| 8 | 102 | 2002 | 2021-09-30 11:00:00 | 2021-09-30 11:00:30 | 1 | 1 | 1 | NULL |
| 9 | 101 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:20 | 1 | 1 | 0 | NULL |
| 10 | 102 | 2001 | 2021-10-01 10:00:00 | 2021-10-01 10:00:15 | 0 | 0 | 1 | NULL |
| 11 | 103 | 2001 | 2021-10-01 11:00:50 | 2021-10-01 11:01:15 | 1 | 1 | 0 | 1732526 |
| 12 | 106 | 2002 | 2021-10-02 10:59:05 | 2021-10-02 11:00:05 | 2 | 0 | 1 | NULL |
| 13 | 107 | 2002 | 2021-10-02 10:59:05 | 2021-10-02 11:00:05 | 1 | 0 | 1 | NULL |
| 14 | 108 | 2002 | 2021-10-02 10:59:05 | 2021-10-02 11:00:05 | 1 | 1 | 1 | NULL |
| 15 | 109 | 2002 | 2021-10-03 10:59:05 | 2021-10-03 11:00:05 | 0 | 1 | 0 | NULL |
+----+-----+----------+---------------------+---------------------+-----------+---------+--------+
15 rows in set (0.00 sec)
短视频信息表:tb_video_info(video_id:视频ID,author:创作者ID,tag:类别标签,duration:视频时长(秒), release_time:发布时间),表中数据如下:
mysql> select * from tb_video_info;
+----+----------+--------+--------+----------+---------------------+
| id | video_id | author | tag | duration | release_time |
+----+----------+--------+--------+----------+---------------------+
| 1 | 2001 | 901 | 旅游 | 30 | 2020-01-01 07:00:00 |
| 2 | 2002 | 901 | 旅游 | 60 | 2021-01-01 07:00:00 |
| 3 | 2003 | 902 | 影视 | 90 | 2020-01-01 07:00:00 |
| 4 | 2004 | 902 | 美女 | 90 | 2020-01-01 08:00:00 |
+----+----------+--------+--------+----------+---------------------+
4 rows in set (0.00 sec)
【问题】统计 2021 年国庆头 3 天每类视频每天的近一周总点赞量和一周内最大单天转发量,结果按视频类别降序、日期升序排序。假设数据库中数据足够多,至少每个类别下国庆头 3 天及之前一周的每天都有播放记录。查询结果如下:
| tag | dt | sum_like_cnt_7d | max_retweet_cnt_7d |
|---|---|---|---|
| 旅游 | 2021-10-01 | 5 | 2 |
| 旅游 | 2021-10-02 | 5 | 3 |
| 旅游 | 2021-10-03 | 6 | 3 |
解答:
/*
select tv.tag, date(tu.start_time) dt,
(select sum(if_like) from tb_user_video_log
where date(start_time) between adddate(dt, -6) and dt
and video_id in (select video_id from tb_video_info where tag = tv.tag)
) sum_like_cnt_7d,
(select sum(if_retweet) aa
from tb_user_video_log
where date(start_time) between adddate(dt, -6) and dt
and video_id in (select video_id from tb_video_info where tag = tv.tag)
group by date(start_time)
order by aa desc limit 1
) max_retweet_cnt_7d
from tb_user_video_log tu join tb_video_info tv on tu.video_id = tv.video_id
where tu.start_time between '2021-10-01 00:00:00' and '2021-10-03 23:59:59'
group by tv.tag, dt
order by tv.tag desc, dt;
*/
mysql> select tv.tag, date(tu.start_time) dt,
-> (select sum(if_like) from tb_user_video_log
-> where date(start_time) between adddate(dt, -6) and dt
-> and video_id in (select video_id from tb_video_info where tag = tv.tag)
-> ) sum_like_cnt_7d,
-> (select sum(if_retweet) aa
-> from tb_user_video_log
-> where date(start_time) between adddate(dt, -6) and dt
-> and video_id in (select video_id from tb_video_info where tag = tv.tag)
-> group by date(start_time)
-> order by aa desc limit 1
-> ) max_retweet_cnt_7d
-> from tb_user_video_log tu join tb_video_info tv on tu.video_id = tv.video_id
-> where tu.start_time between '2021-10-01 00:00:00' and '2021-10-03 23:59:59'
-> group by tv.tag, dt
-> order by tv.tag desc, dt;
+--------+------------+-----------------+--------------------+
| tag | dt | sum_like_cnt_7d | max_retweet_cnt_7d |
+--------+------------+-----------------+--------------------+
| 旅游 | 2021-10-01 | 5 | 2 |
| 旅游 | 2021-10-02 | 5 | 3 |
| 旅游 | 2021-10-03 | 6 | 3 |
+--------+------------+-----------------+--------------------+
3 rows in set (0.00 sec)