常用日期函数:
-- 返回当前日期
select curdate(); # 2024-05-06
-- 返回当前时间
select curtime(); # 21:41:58
-- 返回当前日期加时间
select now(); # 2024-05-06 21:40:53
-- 获取指定date的年份
select year('2024-05-06 21:40:53'); # 2024
-- 获取指定date的月份
select month('2024-05-06 21:40:53'); # 5
-- 获取指定date的年份
select day('2024-05-06 21:40:53'); # 6
-- 返回一个日期/时间值加上一个时间间隔expr后的时间值
select date_add('2024-05-06',interval 1 year ); # 2025-05-06
select date_add('2024-05-06',interval 1 month ); # 2024-06-06
select date_add('2024-05-06',interval 1 day ); # 2024-05-07
-- timestampdiff(单位 ,date1,date2) 返回俩个日期之间的差值
# 单位有:
# 1、YEAR 年
# 2、QUARTER 季度
# 3、MONTH 月
# 4、WEEk 星期
# 5、DAY 天
# 6、HOUR 小时
# 7、MINUTE 分钟
# 8、SECOND 秒
# 9、FRAC_SECOND 毫秒
select timestampdiff(day ,'2024-05-06','2024-06-06'); # 31
-- DATE_FORMAT() 函数用于以不同的格式显示日期/时间数据。
select DATE_FORMAT(NOW(),'%Y-%m-%d'); # 2024-05-06
select DATE_FORMAT(NOW(),'%d %b %Y %T'); # 06 May 2024 22:02:33
-- 返回一个日期/时间值加上一个时间间隔expr后的时间值
select date_sub('2024-05-06',interval 1 day ); # 2024-05-05
-- 截取字符串
sub_str('2024-08-22',1,4) ----> '2024'
【注】索引是从1开始,截取4个字符
---------------------------------------------------
-- 字符串拼接
concat_ws(',',delivery_time,customer_id) -- 将两列的数据按‘,’拼接成一列
concat(delivery_time,customer_id) -- 字符串拼接
行转列的常规做法是,group by+sum(if())【或count(if())】
问题描述:

代码实现:
-- 行转列
-- 建表
create table table2(year int,month int ,amount double) CHARACTER SET utf8 COLLATE utf8_bin;
insert into table2 values
(1991,1,1.1),
(1991,2,1.2),
(1991,3,1.3),
(1991,4,1.4),
(1992,1,2.1),
(1992,2,2.2),
(1992,3,2.3),
(1992,4,2.4);
-- 实现
select year,
sum(if(month = 1,amount,0)) as m1,
sum(if(month = 2,amount,0)) as m2,
sum(if(month = 3,amount,0)) as m3,
sum(if(month = 4,amount,0)) as m4
from table2
group by year;
问题描述:

代码实现:

sql实现:
# 建表:
CREATE TABLE game (
name VARCHAR(10) ,
login_date date
) CHARACTER SET utf8 COLLATE utf8_bin;
insert into game values
('张三','2021-01-01'),
('张三','2021-01-02'),
('张三','2021-01-03'),
('张三','2021-01-02'),
('李四','2021-01-01'),
('李四','2021-01-02'),
('王五','2021-01-03'),
('王五','2021-01-02'),
('王五','2021-01-02');
-- sql计算
with t1 as (
select
distinct *
from game
),# 先对数据进行去重
t2 as (
select *,
row_number() over (partition by name order by login_date) as tmp
from t1
),# 采用窗口函数增加一列,对name组内,按日期由大到小进行顺序标号
t3 as (select
*,
date_sub(login_date,interval tmp day ) as tmp_date
from t2)
select
name
from t3
group by name,tmp_date
having count(1) >= 3;

with t1 as (
select
name,
login_date
from game
group by name,login_date
),
t2 as (
select
*,
row_number() over (partition by name order by login_date) as rk
from t1
),
t3 as (
select
*,
date_sub(login_date,interval rk day) as tmp_date
from t2
),
t4 as (
select
name,count(tmp_date) as login_nums
from t3
group by name,tmp_date
# having max(count(tmp_date))
)
select
name,
max(login_nums)
from t4
group by name

分析:

sql实现:
# 建表
create table if not exists tb_cuid_1d
(
cuid int comment '用户的唯一标识',
os varchar(10) comment '平台',
soft_version int comment '版本',
event_day date comment '日期'
) CHARACTER SET utf8 COLLATE utf8_bin;
insert into tb_cuid_1d values
(1,'android',1,'2020-04-01'),
(1,'android',1,'2020-04-02'),
(1,'android',1,'2020-04-08'),
(2,'android',1,'2020-04-01'),
(3,'android',1,'2020-04-02');
-- sql计算
with t1 as (
select
*
from tb_cuid_1d
where event_day in ('2020-04-01','2020-04-02','2020-04-08')
),
t2 as ( # 计算用户的留存情况,非零表示用户当日的访问次数,null表示用户当日没有访问
select
cuid,
count(if(event_day='2020-04-01',1,null)) as cnt1,
count(if(event_day='2020-04-02',1,null)) as cnt2,
count(if(event_day='2020-04-08',1,null)) as cnt8
from t1
group by cuid
having cnt1 > 0
),
t3 as ( # 计算留存用户数
select
count(cuid) as uv1,
count(if(cnt2 > 0,1,null)) as uv2,
count(if(cnt8 > 0,1,null)) as uv7
from t2
)
select
uv2 / uv1, # 留存率计算
uv7 / uv1
from t3;
select
t1.first_time as dt,
round(count(t2.in_time)/count(t1.first_time),2) as uv_left_rate
from
(select uid
,min(date(in_time)) as first_time
from tb_user_log
group by uid) as t1 -- 每天新用户表
left join (select uid , date(in_time) as in_time
from tb_user_log
union
select uid , date(out_time)
from tb_user_log) as t2 -- 用户活跃表
on t1.uid=t2.uid and t1.first_time=date_sub(t2.in_time,INTERVAL 1 day)
where date_format(t1.first_time,'%Y-%m') = '2021-11'
group by t1.first_time
order by t1.first_time;
需求常见词:【每组xxx的第一个yyy的zzz】【每组xxx的最后一个】
【每组xxx的前n个】【每组最xx的前n个】
公式:row_number() over(partition by 组名) as rn,再筛选rn<=N名


sql代码:
with t1 as (
select
*,
row_number() over (partition by deptno order by sal desc ) as '部门薪资排名',
sum(sal) over(partition by deptno) as '部门总工资'
from emp
)
select
empno as '员工工号',
sal as '员工工资',
deptno as '部门编号',
部门薪资排名,
部门总工资,
round(sal/部门总工资,2)
from t1
where 部门薪资排名 <= 3;
有一个log_date为日期的分区表av_play_info,记录的是一个up主的稿件,每天的播放量情况。
log_date video_id play_cnt
string bigint bigint
从20230101-20240415,截止到每一天的,年度总播放和月度总播放。
例如20230612这一天,年度总播放 看的就是 20230101-20230612 的累计播放
月度总播放就是20230601-20230612的累计播放.
输出3个字段
log_date, year_play_cnt, month_play_cnt
with t1 as (
select
log_date,
sum(play_cnt) as total_play_day
from av_play_info
group by log_date
),
t2 as (
select
log_date,
sum(total_play_day) over (partition by substring(log_date,1,4) order by log_date) as year_play,
sum(total_play_day) over (partition by substring(log_date,1,6) order by log_date) as month_play
from t1
)
select
*
from t2
order by log_date
select
tag,
sum(if_retweet) as retweet_cut,
round(sum(if_retweet)/count(*),3) as retweet_rate
from
(select
tb1.video_id,
tb1.if_retweet,
tb2.tag
from tb_user_video_log tb1 left join tb_video_info tb2
on tb1.video_id = tb2.video_id
where timestampdiff(day,DATE((select max(start_time) FROM tb_user_video_log)), DATE(start_time)) <= 29) as t1
group by tag
order by retweet_rate desc;
## 考察的知识点:
1、timestampdiff(day,date1,date2) 表示date1和date2俩个日期之间相差的天数
2、date(date1) 将日期转化为'xxxx-xx-xx'的格式
3、如何在where语句后面使用聚合函数,以及使用聚合函数进行查询时,必须使用原表名
-- 举例如下:
-- 输入
user_id bal dt
001 100 20240815
001 NULL 20240816
001 NULL 20240817
001 300 20240818
001 500 20240819
001 NULL 20240820
-- 输出
user_id bal_fix dt
001 100 20240815
001 100 20240816
001 100 20240817
001 300 20240818
001 500 20240819
001 500 20240820

create table dwd_fnd_bal_usr_dd (
user_id varchar(20),
bal bigint,
dt varchar(20)
);
INSERT INTO dwd_fnd_bal_usr_dd VALUES
('001',100,'20240815'),
('001',NULL,'20240816'),
('001',NULL,'20240817'),
('001',300,'20240818'),
('001',500,'20240819'),
('001',NULL,'20240820')
;
--具体代码------------------------------------------------------------------------------
select
user_id,
sum(bal) over(partition by user_id, groupid order by dt) as bal_fix
,dt
from (
select
user_id,
bal,
dt,
sum(if(bal > 0, 1, 0)) over(partition by user_id order by dt) as groupid
from dwd_fnd_bal_usr_dd
) t
;
使用 LEFT JOIN 和 WHERE 子句
使用 LEFT JOIN 将表A和表B连接起来,然后使用 WHERE 子句筛选出那些在表B中没有对应记录的行。
SELECT A.*
FROM A
LEFT JOIN B ON A.id = B.id
WHERE B.id IS NULL;
在使用group by的语句中,select 后面只能跟group by字段和聚合函数。
可以用在select后面去重
也可以用在聚合函数里,用于某个字段中的数据去重计算
SELECT *
FROM a
JOIN b USING(course_id);
解释
这条SQL语句的作用是从两个表 a 和 b 中根据 course_id 字段进行内连接(JOIN),并选择所有列的数据。USING(course_id) 表示 course_id 是连接条件,且在 a 和 b 表中都存在。
详细说明
JOIN 是 INNER JOIN,即只返回在 a 和 b 表中都有匹配 course_id 的行。USING(course_id) 指定了连接的字段 course_id,这意味着:
course_id 一次。course_id 在结果集中不会重复,因为它在 a 和 b 表中都存在,并且是连接条件。注意事项
USING,而需要使用 ON 子句LEFT JOIN 或 RIGHT JOIN 等。在 MySQL 中,WHERE 子句后确实不能直接使用聚合函数(如 SUM(), COUNT(), AVG(), MAX(), MIN() 等)。聚合函数用于计算一组值并返回一个单一的值,而 WHERE 子句用于过滤单行记录。因此,聚合函数不能在 WHERE 子句中直接使用,因为在执行 WHERE 子句时,行记录还没有被聚合。