• hive sql多表练习


    hive sql多表练习

    准备原始数据集

    学生表 student.csv
    讲师表 teacher.csv
    课程表 course.csv
    分数表 score.csv

    学生表 student.csv

    001,彭于晏,1995-05-16,男
    002,胡歌,1994-03-20,男
    003,周杰伦,1995-04-30,男
    004,刘德华,1998-08-28,男
    005,唐国强,1993-09-10,男
    006,陈道明,1992-11-12,男
    007,陈坤,1999-04-09,男
    008,吴京,1994-02-06,男
    009,郭德纲,1992-12-05,男
    010,于谦,1998-08-23,男
    011,潘长江,1995-05-27,男
    012,杨紫,1996-12-21,女
    013,蒋欣,1997-11-08,女
    014,赵丽颖,1990-01-09,女
    015,刘亦菲,1993-01-14,女
    016,周冬雨,1990-06-18,女
    017,范冰冰,1992-07-04,女
    018,李冰冰,1993-09-24,女
    019,邓紫棋,1994-08-31,女
    020,宋丹丹,1991-03-01,女
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20

    讲师表 teacher.csv

    1001,张高数
    1002,李体音
    1003,王子文
    1004,刘丽英
    
    • 1
    • 2
    • 3
    • 4

    课程表 course.csv

    01,语文,1003
    02,数学,1001
    03,英语,1004
    04,体育,1002
    05,音乐,1002
    
    • 1
    • 2
    • 3
    • 4
    • 5

    分数表 score.csv

    001,01,94
    002,01,74
    004,01,85
    005,01,64
    006,01,71
    007,01,48
    008,01,56
    009,01,75
    010,01,84
    011,01,61
    012,01,44
    013,01,47
    014,01,81
    015,01,90
    016,01,71
    017,01,58
    018,01,38
    019,01,46
    020,01,89
    001,02,63
    002,02,84
    004,02,93
    005,02,44
    006,02,90
    007,02,55
    008,02,34
    009,02,78
    010,02,68
    011,02,49
    012,02,74
    013,02,35
    014,02,39
    015,02,48
    016,02,89
    017,02,34
    018,02,58
    019,02,39
    020,02,59
    001,03,79
    002,03,87
    004,03,89
    005,03,99
    006,03,59
    007,03,70
    008,03,39
    009,03,60
    010,03,47
    011,03,70
    012,03,62
    013,03,93
    014,03,32
    015,03,84
    016,03,71
    017,03,55
    018,03,49
    019,03,93
    020,03,81
    001,04,54
    002,04,100
    004,04,59
    005,04,85
    007,04,63
    009,04,79
    010,04,34
    013,04,69
    014,04,40
    016,04,94
    017,04,34
    020,04,50
    005,05,85
    007,05,63
    009,05,79
    015,05,59
    018,05,87
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74

    创建数据库和数据表

    create database chap05;
    use chap05;
    
    • 1
    • 2
    -- 学生表 student.csv
    create external table student (
    	stu_id string comment '学生ID',
    	stu_name string comment '学生姓名',
        birthday string comment '出生日期',
        gender string comment '学生性别'
    )
     row format delimited fields terminated by ','
     lines terminated by '\n'
     stored as textfile
     location '/quiz03/student';
    
    load data local inpath '/root/data/data02/student.csv' overwrite into table student;
    
    select * from student;
    
    -- 讲师表 teacher.csv
    create external table teacher (
    	tea_id string comment '课程ID',
    	tea_name string comment '课程名称'
    )
     row format delimited fields terminated by ','
     lines terminated by '\n'
     stored as textfile
     location '/quiz03/teacher';
    
    load data local inpath '/root/data/data02/teacher.csv' overwrite into table teacher;
    
    select * from teacher;
    
    -- 课程表 course.csv
    create external table course (
    	course_id string comment '课程ID',
    	course_name string comment '课程名称',
        tea_id string comment '讲师ID'
    )
     row format delimited fields terminated by ','
     lines terminated by '\n'
     stored as textfile
     location '/quiz03/course';
    
    load data local inpath '/root/data/data02/course.csv' overwrite into table course;
    
    select * from course;
    
    -- 分数表 score.csv
    create external table score (
    	stu_id string comment '学生ID',
    	course_id string comment '课程ID',
        score int comment '成绩'
    )
     row format delimited fields terminated by ','
     lines terminated by '\n'
     stored as textfile
     location '/quiz03/score';
    
    load data local inpath '/root/data/data02/score.csv' overwrite into table score;
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57

    SQL练习

    -- 查询所有学生信息
    select * from student;
    
    -- 查询周姓学生信息
    select * from student where stu_name like '周%';
    
    -- 查询周姓学生数量
    select count(*) from student where stu_name like '周%';
    
    -- 查询 学生ID 004 的分数 超过 85 的成绩
    select stu.stu_id, stu_name, birthday, gender, course_id, score from student stu inner join score s
       on stu.stu_id = 004 and stu.stu_id = s.stu_id and score > 85;
    
    -- 查询 学生程ID 004 的成绩降序
    select stu.stu_id, stu_name, birthday, gender, course_id, score from student stu inner join score s
       on stu.stu_id = 004  and  stu.stu_id = s.stu_id order by score desc;
    
    -- 查询 数学成绩不及格学生及其对应的成绩 学生成绩
    select stu.stu_id, stu_name, birthday, gender, course_name, score from student stu
        inner join score s inner join course c on s.course_id = c.course_id and  stu.stu_id = s.stu_id
        and c.course_name = '数学' and score < 60 order by score;
    
    -- 查询男女生人数
    select gender,count(*) from student group by gender;
    
    -- 查询编号为 02 的课程平均成绩
    select round(avg(score),2) from score where course_id = 02;
    
    -- 查询每科课程平均成绩
    select course_id,round(avg(score),2) from score group by course_id;
    
    -- 查询参加考试学生人数
    select count(distinct stu_id) from score where score is not null and score >= 0;
    select count(stu_id) from (select stu_id from score where score is not null and score >= 0 group by stu_id) t;
    
    -- 查询每科有多少学生参加考试
    select course_id,count(stu_id) from score where score is not null and score >= 0 group by course_id;
    
    -- 查询未参加考试的学生信息
    select * from student where stu_id in (
        select stu.stu_id from student stu
            left join score s on stu.stu_id = s.stu_id
            group by stu.stu_id having count(*) != (select count(*) from course)
    );
    
    -- 查询选修至少 4 门 以上课程学生的学号
    select stu_id,count(course_id) course_count from score
     group by stu_id
     having course_count >= 4;
    
    -- 查询姓氏相同学生名单 并且同姓人数大于 2 的姓氏
    select first_name ,count(*) first_name_count from (
     select stu_id,stu_name,substr(stu_name,1,1) as first_name
     from student
    ) ts
     group by ts.first_name
     having first_name_count > 1;
    
    -- 查询每门功课的学生的平均成绩 按照平均成绩升序 平均成绩相同按照课程编号降序
    select course_id, avg(score) avg_score
     from score
     group by course_id
     order by avg_score,course_id desc;
    
    -- 统计参加考试人数大于等于 15 的学科
    select course_id,count(*) as stu_count from score group by course_id having stu_count > 15;
    
    -- 查询学生总成绩并按照总成绩降序排序
    select stu_id, sum(score) sum_score
     from score
     group by stu_id
     order by sum_score desc;
    
    -- 按照指定格式显示 stu_id 语文 数学 英语 选课数 平均成绩
    select
     s.stu_id,
     sum(`if`(c.course_name='语文',score,0)) as `语文`,
     sum(`if`(c.course_name='数学',score,0)) as `数学`,
     sum(`if`(c.course_name='英语',score,0)) as `英语`,
     count(s.course_id) as `选课数`,
     avg(s.score) as `平均成绩`
     from course c left join score s
     on c.course_id = s.course_id
     group by s.stu_id
     order by `平均成绩` desc;
    
    -- 查询一共参加了三门功课且其中一门为语文的学生id 和 姓名
    select s.stu_id,stu_name from
    (select t1.stu_id ,count(t1.course_id) course_count  from
    	(select stu_id,course_id from score
    		where stu_id in ( select stu_id from score where course_id = "01")
    	) t1 group by  t1.stu_id having course_count >=3
    ) t2 join student s on t2.stu_id = s.stu_id;
    -- 分解
    -- 查询该学生的姓名
    select s.stu_id,stu_name from
    -- 成绩表中学习科目数量 >=3 科的学生
    (select t1.stu_id ,count(t1.course_id) course_count  from
     	--  报名了语文的学生还报名了那些学科
    	(select stu_id,course_id from score
    		where stu_id in (
                -- 查询报名了语文的学生ID
                select stu_id from score where course_id = "01"
            )
    	) t1 group by  t1.stu_id having course_count >=3
    ) t2 join student s on t2.stu_id = s.stu_id;
    
    -- 查询两门以上的课程不及格学生的学号及其平均成绩
    -- 1、先按照学生分组 过滤出成绩低于60的数量 大于1
    -- 2、计算所有学生的平均成绩
    -- 3、两个子查询相互join
    select  t1.stu_id,t2.avg_score from
    (select stu_id, sum(if(score < 60, 1, 0)) as result from score group by stu_id having result > 1) t1
     left join
    (select stu_id,avg(score) as avg_score from score group by stu_id) t2 on t1.stu_id =t2.stu_id;
    
    -- 查询所有学生的学号、姓名、选课数、总成绩
    select
        stu.stu_id,stu.stu_name,count(s.course_id) count_course ,nvl(sum(s.score),0) total_score
    from student stu left join score s on stu.stu_id = s.stu_id
    group by stu.stu_id, stu.stu_name order by stu.stu_id;
    
    -- 平均成绩大于 85 的所有学生的学号、姓名、平均成绩
    select
        stu.stu_id,stu.stu_name ,nvl(avg(s.score),0) as `avg_score`
    from student stu left join score s on stu.stu_id = s.stu_id
    group by stu.stu_id, stu.stu_name having nvl(avg(s.score),0) > 85 order by stu.stu_id
    
    -- 查询学生的选课情况:学号,姓名,课程号,课程名称
    select student.stu_id,student.stu_name,c.course_id,c.course_name from student
    right join score s on student.stu_id = s.stu_id
    left join course c on s.course_id = c.course_id
    
    -- 查询学生的没有选课情况:学号,姓名
    select stu_id,stu_name from
    (
    select student.stu_id,student.stu_name, s.course_id from student
    left join score s on student.stu_id = s.stu_id
    left join course c on s.course_id = c.course_id
    ) t where course_id is null
    
    -- 查询出每门课程的及格人数和不及格人数
    select c.course_id,course_name,pass,fail
    from course c join
    (
    select
     course_id,sum(if(score >= 60,1,0)) as `pass`, sum(if(score < 60,1,0)) as `fail`
     from score group by course_id
    ) t on c.course_id = t.course_id
    
    -- 查询课程编号为03且课程成绩在80分以上的学生的学号和姓名及课程信息
    select t1.stu_id,s.stu_name,t1.course_id,c.course_name,t1.score from
    (select * from score where course_id = '03' and score > 80) t1
    left join student s on s.stu_id = t1.stu_id
    left join course c on t1.course_id = c.course_id
    
    -- 查询语文成绩低于平均分数的学生是谁,教师是谁
    select t3.stu_id,t3.stu_name,t3.`avg_score`,t.tea_name from
    (select t2.stu_id,t2.`avg_score`,s.stu_name,t2.course_id,c.tea_id from
        (select t1.stu_id,t1.course_id,t1.`avg_score` from
            (select stu_id,s.course_id, avg(score) as `avg_score` from score s right join
             (select course_id from course where course_name = '语文') t1 on t1.course_id = s.course_id
             group by stu_id,s.course_id) t1
            where t1.`avg_score` < (select avg(score) as `avg_score` from score s right join (select course_id from course where course_name = '语文') t1 on t1.course_id = s.course_id)
        ) t2 left join student s on t2.stu_id = s.stu_id
        left join course c on t2.course_id = c.course_id
    )t3 left join teacher t on t3.tea_id = t.tea_id;
    
    -- 查询所有学生总成绩和平均成绩,
    -- 且他们的总成绩低于平均成绩的有多少个人,
    -- 高于平均成绩的有多少人,
    -- 低于平均成绩的男生和女生分别有多少人,
    -- 且他们的任课老师是谁。
    
    -- 统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-75],[70-60],[0-60]及所占百分比
    select c.course_id, course_name, a, b, c, d from course c left join (
        select course_id,
           concat(round((sum(`if`(score >= 85,1,0)) / count(*)) * 100,2), '%') as a,
           concat(round((sum(`if`(score between 75 and 84,1,0)) / count(*)) * 100,2), '%') as b,
           concat(round((sum(`if`(score between 60 and 74,1,0)) / count(*)) * 100,2), '%') as c,
           concat(round((sum(`if`(score < 60,1,0)) / count(*)) * 100,2), '%') as d
        from score group by course_id
    ) t on t.course_id = c.course_id;
    
    -- 查询各科成绩最高分、最低分和平均分,以如下形式显示:
    -- 课程ID,课程name,最高分,最低分,平均分,中下率,中等率,优良率,优秀率
    -- 及格为>=60,中等为:70-80,优良为:80-90,优秀为:>=90
    select c.course_id, course_name, `最高分`,`最低分`,`平均分`,`优秀率`, `优良率`, `中等率`, `中下率`,`芸芸众生` from course c left join (
        select course_id,
               max(score) `最高分`,
               min(score) `最低分`,
               round(avg(score),2) `平均分`,
           concat(round((sum(`if`(score >= 90,1,0)) / count(*)) * 100,2), '%') as `优秀率`,
           concat(round((sum(`if`(score between 80 and 89,1,0)) / count(*)) * 100,2), '%') as `优良率`,
           concat(round((sum(`if`(score between 70 and 79,1,0)) / count(*)) * 100,2), '%') as `中等率`,
           concat(round((sum(`if`(score between 60 and 69,1,0)) / count(*)) * 100,2), '%') as `中下率`,
           concat(round((sum(`if`(score < 60,1,0)) / count(*)) * 100,2), '%') as `芸芸众生`
        from score group by course_id
    ) t on t.course_id = c.course_id;
    
    -- 查询每门课程的男生和女生的比例是多少
    select t1.course_id,t1.gender, concat(round((count_gender / count_course_student) * 100,2), '%') as proportion from
        (select course_id,gender,count(*) count_gender from score s1 inner join student s2 on s1.stu_id = s2.stu_id group by course_id,gender) t1
        inner join
        (select course_id,count(*) count_course_student from score s1 inner join student s2 on s1.stu_id = s2.stu_id group by course_id) t2
        on t2.course_id = t1.course_id;
    
    -- 每门学科的成绩是男生比较优一些还是女生比较优一些,并且每门课程的最高分是谁。
    select battle_t.course_id,male_avg_score, female_avg_score, battle, max_stu_id,min_stu_id, max_score, min_score from
        (select male_t.course_id,round(male_avg_score,2) male_avg_score,round(female_avg_score,2) female_avg_score,
               case
                   when male_avg_score > female_avg_score then '男性优秀'
                   when male_avg_score < female_avg_score then '女性优秀'
                   else '势均力敌'
               end battle
            from
            (select course_id,avg(score) male_avg_score from score s1 inner join student s2 on s1.stu_id = s2.stu_id and gender = '男' group by course_id) male_t
            inner join
            (select course_id,avg(score) female_avg_score from score s1 inner join student s2 on s1.stu_id = s2.stu_id and gender = '女' group by course_id) female_t
            on male_t.course_id = female_t.course_id) battle_t
        inner join
        (select max_t.course_id,max_t.stu_id max_stu_id,max_score,min_t.stu_id min_stu_id,min_score from
            (select stu_id, s.course_id, max_score from score s
                    inner join
                    (select course_id, max(score) max_score from score group by course_id) t
                    on s.course_id = t.course_id and max_score = score) max_t
            full join
            (select stu_id, s.course_id, min_score from score s
                    inner join
                    (select course_id,min(score) min_score from score group by course_id) t
                    on s.course_id = t.course_id and min_score = score) min_t
            on max_t.course_id = min_t.course_id) info
        on battle_t.course_id = info.course_id;
    
    -- 课程编号为"01"且课程分数小于60,按分数降序排列的学生信息
    
    select s.stu_id, stu.stu_name, stu.birthday, stu.gender,s.score
        from score s join student stu on s.stu_id = stu.stu_id
        where s.score < 60  order by s.score desc;
    
    -- 查询所有课程成绩在70分以上的学生的姓名、课程名称和分数,按分数升序
    select stu.stu_name, c.course_name, s2.score
        from student stu join
        (select s.stu_id, sum(`if`(s.score >= 70, 0, 1)) as `is_ok` from score s group by s.stu_id having is_ok = 0) t1
        on stu.stu_id = t1.stu_id left join score s2 on stu.stu_id = s2.stu_id left join course c on s2.course_id = c.course_id
        order by s2.score;
    
    -- 查询某学生不同课程的成绩相同的学生编号、课程编号、学生成绩
    select s1.stu_id,collect_list(s1.course_id) as course_id,collect_set(s1.score) as score
        from score s1 join score s2 on s1.stu_id = s2.stu_id
        and s1.course_id != s2.course_id
        and s1.score == s2.score
    	group by s1.stu_id;
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
    • 161
    • 162
    • 163
    • 164
    • 165
    • 166
    • 167
    • 168
    • 169
    • 170
    • 171
    • 172
    • 173
    • 174
    • 175
    • 176
    • 177
    • 178
    • 179
    • 180
    • 181
    • 182
    • 183
    • 184
    • 185
    • 186
    • 187
    • 188
    • 189
    • 190
    • 191
    • 192
    • 193
    • 194
    • 195
    • 196
    • 197
    • 198
    • 199
    • 200
    • 201
    • 202
    • 203
    • 204
    • 205
    • 206
    • 207
    • 208
    • 209
    • 210
    • 211
    • 212
    • 213
    • 214
    • 215
    • 216
    • 217
    • 218
    • 219
    • 220
    • 221
    • 222
    • 223
    • 224
    • 225
    • 226
    • 227
    • 228
    • 229
    • 230
    • 231
    • 232
    • 233
    • 234
    • 235
    • 236
    • 237
    • 238
    • 239
    • 240
    • 241
    • 242
    • 243
    • 244
    • 245
    • 246
    • 247
    • 248
    • 249
    • 250
    • 251
    • 252
    • 253

    分组排序取TopN

    查询各科成绩前五名的学生

    select a.course_id,a.stu_id,a.score from score a
        left join score b
        on a.course_id = b.course_id and a.score <= b.score
        group by a.stu_id,a.course_id,a.score
        having count(a.stu_id) <=5
        order by a.course_id,a.score desc;
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    select S1.course_id,s1.stu_id,s1.score from score s1 where
        (select count(*) from score s2
            where s2.course_id=s1.course_id AND s2.score > s1.score
            ) <= 5 order by s1.course_id,s1.score desc;
    
    • 1
    • 2
    • 3
    • 4

    row_number

    row_number() over () 连续序号
    over()里头的分组以及排序的执行晚于 where 、group by、order by 的执行。

    select * from
        (select course_id, stu_id,  score,
            row_number() over (partition by course_id order by score desc ) as mum
            from score) t where mum <= 5;
    
    • 1
    • 2
    • 3
    • 4

    rank

    rank() over () 排名 跳跃排序 序号不是连续的

    select * from
        (select course_id, stu_id,  score,
            rank() over (partition by course_id order by score desc ) as mum
            from score) t where mum <= 5;
    
    • 1
    • 2
    • 3
    • 4

    dense_rank

    dense_rank() over () 排名 连续排序

    select * from
        (select course_id, stu_id,  score,
            dense_rank() over (partition by course_id order by score desc ) as mum
            from score) t where mum <= 5;
    
    • 1
    • 2
    • 3
    • 4
  • 相关阅读:
    Java关于RedisTemplate的使用分析 附代码
    一、首页第一个首页栏制作【仿淘票票系统前后端完全制作(除支付外)】
    VS中cmake多配置构建设置
    Vue基础_Vue2
    算法通关村第13关【青铜】| 数字与数学基础问题
    你安全吗?丨沉默的“复仇”到底是什么东西?
    基于Label studio实现UIE信息抽取智能标注方案,提升标注效率!
    Vue | Vue.js 组件化基础 - 脚手架
    Java版人脸跟踪三部曲之一:极速体验
    PaddleOCR ‘could not create a primitive descriptor for a reorder primitive‘异常处理
  • 原文地址:https://blog.csdn.net/qq_24330181/article/details/134486953