资料文件地址:链接:https://pan.baidu.com/s/1rPlbKgHlxDoc2KwviC6QOw?pwd=z7wo
针对销售数据,完成统计:
日期表(dimdate) | ||
dt | date | 日期 |
yearmonth | int | 年月 |
year | smallint | 年 |
month | tinyint | 月 |
day | tinyint | 日 |
week | tinyint | 周几 |
weeks | tinyint | 第几周 |
quat | tinyint | 季度 |
tendays | tinyint | 旬 |
halfmonth | tinyint | 半月 |
订单表(sale) | ||
orderid | string | 订单号 |
locationid | string | 交易位置 |
dt | date | 交易日期 |
订单销售明细表(saledetail) | ||
orderid | string | 订单号 |
rownum | int | 行号 |
itemid | string | 货品 |
num | int | 数量 |
price | double | 单价 |
amount | double | 金额 |
将数据存放在ORC文件中
- -- createtable.hql
- drop database sale cascade;
- create database if not exists sale;
-
- create table sale.dimdate_ori(
- dt date,
- yearmonth int,
- year smallint,
- month tinyint,
- day tinyint,
- week tinyint,
- weeks tinyint,
- quat tinyint,
- tendays tinyint,
- halfmonth tinyint
- )
- row format delimited
- fields terminated by ",";
-
- create table sale.sale_ori(
- orderid string,
- locationid string,
- dt date
- )
- row format delimited
- fields terminated by ",";
-
- create table sale.saledetail_ori(
- orderid string,
- rownum int,
- goods string,
- num int,
- price double,
- amount double
- )
- row format delimited
- fields terminated by ",";
-
-
- create table sale.dimdate(
- dt date,
- yearmonth int,
- year smallint,
- month tinyint,
- day tinyint,
- week tinyint,
- weeks tinyint,
- quat tinyint,
- tendays tinyint,
- halfmonth tinyint
- ) stored as orc;
-
- create table sale.sale(
- orderid string,
- locationid string,
- dt date
- ) stored as orc;
-
- create table sale.saledetail(
- orderid string,
- rownum int,
- goods string,
- num int,
- price double,
- amount double
- )stored as orc;
-
- hive -f createtable.hql
- -- 加载数据
- use sale;
- load data local inpath "/root/data/tbDate.dat" overwrite into table dimdate_ori;
- load data local inpath "/root/data/tbSale.dat" overwrite into table sale_ori;
- load data local inpath "/root/data/tbSaleDetail.dat" overwrite into table saledetail_ori;
-
- -- 导入数据
- insert into table dimdate select * from dimdate_ori;
- insert into table sale select * from sale_ori;
- insert into table saledetail select * from saledetail_ori;
-
- hive -f loaddata.hql
(1)按年统计销售额
- SELECT year(B.dt) year, round(sum(A.amount)/10000, 2) amount
- FROM saledetail A join sale B on A.orderid=B.orderid
- group by year(B.dt);
(2)销售金额在 10W 以上的订单
- SELECT orderid, round(sum(amount), 2) amount
- FROM saledetail
- group by orderid
- having sum(amount) > 100000
(3)每年销售额的差值
- SELECT year, round(amount, 2) amount, round(lag(amount) over (ORDER BY year), 2) prioramount
- ,round(amount - lag(amount) over (ORDER BY year), 2) diff
- from (SELECT year(B.dt) year, sum(A.amount) amount
- from saledetail A join sale B on A.orderid=B.orderid
- group by year(B.dt)
- ) tmp;
(4)年度订单金额前10位(年度、订单号、订单金额、排名)
- -- 方法一
- SELECT dt, orderid, amount, rank
- from (SELECT dt, orderid, amount,
- dense_rank() over(PARTITION BY dt ORDER BY amount desc) rank
- from (SELECT year(B.dt) dt, A.orderid, sum(A.amount) amount
- from saledetail A join sale B on A.orderid=B.orderid
- GROUP BY year(B.dt), A.orderid
- ) tmp1
- ) tmp2
- where rank <= 10;
-
-
- -- 方法二
- with tmp as (
- SELECT year(B.dt) dt, A.orderid, sum(A.amount) amount
- from saledetail A join sale B on A.orderid=B.orderid
- GROUP BY year(B.dt), A.orderid
- )
- SELECT dt, orderid, amount, rank
- from (SELECT dt, orderid, amount,
- dense_rank() over(PARTITION BY dt ORDER BY amount desc) rank
- from tmp
- ) tmp2
- where rank <= 10;
(5)季度订单金额前10位(年度、季度、订单id、订单金额、排名)
- -- 方法一
- with tmp as (
- select C.year, C.quat, A.orderid, round(sum(B.amount), 2) amount
- from sale A join saledetail B on A.orderid=B.orderid
- join dimdate C on A.dt=C.dt
- group by C.year, C.quat, A.orderid
- )
- select year, quat, orderid, amount, rank
- from (
- select year, quat, orderid, amount,
- dense_rank() over (partition by year, quat order by amount desc) rank
- from tmp
- ) tmp1
- where rank <= 10;
-
-
- -- 方法二
- with tmp as(
- select year(A.dt) year,
- case when month(A.dt) <= 3 then 1
- when month(A.dt) <= 6 then 2
- when month(A.dt) <= 9 then 3
- else 4 end quat,
- A.orderid,
- round(sum(B.amount), 2) amount
- from sale A join saledetail B on A.orderid = B.orderid
- group by year(A.dt),
- case when month(A.dt) <= 3 then 1
- when month(A.dt) <= 6 then 2
- when month(A.dt) <= 9 then 3
- else 4 end,
- A.orderid
- )
- select year, quat, orderid, amount, rank
- from (
- select year, quat, orderid, amount,
- dense_rank() over (partition by year, quat order by amount desc) rank
- from tmp
- ) tmp1
- where rank <= 10;
-
-
- -- 方法三。求季度
- select floor(month(dt/3.1)) + 1;
- with tmp as (
- select year(A.dt) year, floor(month(A.dt)/3.1) + 1 quat,
- A.orderid,
- round(sum(B.amount), 2) amount
- from sale A join saledetail B on A.orderid=B.orderid
- group by year(A.dt), floor(month(A.dt)/3.1) + 1, A.orderid
- )
- select year, quat, orderid, amount, rank
- from (
- select year, quat, orderid, amount,
- dense_rank() over (partition by year, quat order by amount desc) rank
- from tmp
- ) tmp1
- where rank <= 10;
(6)求所有交易日中订单金额最高的前10位
- topN问题:
- 1、基础数据
- 2、上排名函数
- 3、解决N的问题
-
- with tmp as (
- select A.dt, A.orderid, round(sum(B.amount), 2) amount
- from sale A join saledetail B on A.orderid=B.orderid
- group by A.dt, A.orderid
- )
- select dt, orderid, amount, rank
- from (
- select dt, orderid, amount, dense_rank() over(order by amount desc) rank
- from tmp
- ) tmp1
- where rank <= 10;
(7)每年度销售额最大的交易日
- with tmp as (
- select A.dt, round(sum(B.amount), 2) amount
- from sale A join saledetail B on A.orderid=B.orderid
- group by A.dt
- )
- select year(dt) year, max(amount) dayamount
- from tmp
- group by year(dt);
备注:以上求解忽略了交易日,以下SQL更符合题意
- with tmp as (
- select dt, amount, dense_rank() over (partition by year(dt) order by amount desc) as rank
- from (select A.dt, round(sum(B.amount), 2) amount
- from sale A join saledetail B on A.orderid=B.orderid
- group by A.dt) tab1
- )
- select year(dt) as year, dt, amount
- from tmp
- where rank=1;
(8)年度最畅销的商品(即每年销售金额最大的商品)
- with tmp as (
- select year(B.dt) year, goods, round(sum(amount),2) amount
- from saledetail A join sale B on A.orderid=B.orderid
- group by year(B.dt), goods
- )
- select year, goods, amount
- from (select year, goods, amount, dense_rank() over
- (partition by year order by amount desc) rank
- from tmp) tmp1
- where rank = 1;