我们的需求是这样,有一份学生的考试分数信息,语文、数学、英语这三门,需要计算出班级中单科排名 前三名学生的姓名
- create external table student_score(
- id int,
- name string,
- sub string,
- score int
- )row format delimited
- fields terminated by '\t'
- location '/data/student_score';
-
- hdfs dfs -put /data/soft/hivedata/student_score.data /data/student_score

- select *,row_number() over(partition by sub order by score desc) num
- from student_score

- select * from(
- select *,row_number() over(partition by sub order by score desc) num from student_score
- ) s where s.num<=3

- create external table student_favors(
- name string,
- favor string
- )row format delimited
- fields terminated by '\t'
- location '/data/student_favors';
-
- hdfs dfs -put /data/soft/hivedata/student_favors.data /data/student_favors
- select name,concat_ws(',',collect_list(favor)) as favor_list
- from student_favors group by name;

- select name,concat_ws(',',collect_list(favor)) as favor_list
- from student_favors group by name;

- create external table student_favors_2(
- name string,
- favorlist string
- )row format delimited
- fields terminated by '\t'
- location '/data/student_favors_2';
-
- hdfs dfs -put /data/soft/hivedata/student_favors_2.data /data/student_favors_2
select split(favorlist,',') from student_favors_2;

select explode(split(favorlist,',')) from student_favors_2;

select name,favor_new from student_favors_2 lateral view explode(split(favorlist,','))table1 as favor_new;

order by:会对查询的结果进行一次全局排序,使用这个语句的时候生成的reduce任务只有一个
sort by :能保证每个reducer出来的数据是有序的,但是不能保证所有的数据是全局有序的