• Hive案例


    资料文件地址:链接:https://pan.baidu.com/s/1rPlbKgHlxDoc2KwviC6QOw?pwd=z7wo

    1、需求描述

    针对销售数据,完成统计:

    1. 按年统计销售额
    2. 销售金额在 10W 以上的订单
    3. 每年销售额的差值
    4. 年度订单金额前10位(年度、订单号、订单金额、排名)
    5. 季度订单金额前10位(年度、季度、订单id、订单金额、排名)
    6. 求所有交易日中订单金额最高的前10位
    7. 每年度销售额最大的交易日
    8. 年度最畅销的商品(即每年销售金额最大的商品)

    2、数据说明

    日期表(dimdate)
    dtdate日期
    yearmonthint年月
    yearsmallint
    monthtinyint
    daytinyint
    weektinyint周几
    weekstinyint第几周
    quattinyint季度
    tendaystinyint
    halfmonthtinyint半月
    订单表(sale)
    orderidstring订单号
    locationidstring交易位置
    dtdate交易日期
    订单销售明细表(saledetail)
    orderidstring订单号
    rownumint行号
    itemidstring货品
    numint数量
    pricedouble单价
    amountdouble金额

    3、实现

    3.1、创建表

    将数据存放在ORC文件中

    1. -- createtable.hql
    2. drop database sale cascade;
    3. create database if not exists sale;
    4. create table sale.dimdate_ori(
    5. dt date,
    6. yearmonth int,
    7. year smallint,
    8. month tinyint,
    9. day tinyint,
    10. week tinyint,
    11. weeks tinyint,
    12. quat tinyint,
    13. tendays tinyint,
    14. halfmonth tinyint
    15. )
    16. row format delimited
    17. fields terminated by ",";
    18. create table sale.sale_ori(
    19. orderid string,
    20. locationid string,
    21. dt date
    22. )
    23. row format delimited
    24. fields terminated by ",";
    25. create table sale.saledetail_ori(
    26. orderid string,
    27. rownum int,
    28. goods string,
    29. num int,
    30. price double,
    31. amount double
    32. )
    33. row format delimited
    34. fields terminated by ",";
    35. create table sale.dimdate(
    36. dt date,
    37. yearmonth int,
    38. year smallint,
    39. month tinyint,
    40. day tinyint,
    41. week tinyint,
    42. weeks tinyint,
    43. quat tinyint,
    44. tendays tinyint,
    45. halfmonth tinyint
    46. ) stored as orc;
    47. create table sale.sale(
    48. orderid string,
    49. locationid string,
    50. dt date
    51. ) stored as orc;
    52. create table sale.saledetail(
    53. orderid string,
    54. rownum int,
    55. goods string,
    56. num int,
    57. price double,
    58. amount double
    59. )stored as orc;
    60. hive -f createtable.hql

    3.2、导入数据

    1. -- 加载数据
    2. use sale;
    3. load data local inpath "/root/data/tbDate.dat" overwrite into table dimdate_ori;
    4. load data local inpath "/root/data/tbSale.dat" overwrite into table sale_ori;
    5. load data local inpath "/root/data/tbSaleDetail.dat" overwrite into table saledetail_ori;
    6. -- 导入数据
    7. insert into table dimdate select * from dimdate_ori;
    8. insert into table sale select * from sale_ori;
    9. insert into table saledetail select * from saledetail_ori;
    10. hive -f loaddata.hql

    3.3、SQL实现

    (1)按年统计销售额

    1. SELECT year(B.dt) year, round(sum(A.amount)/10000, 2) amount
    2. FROM saledetail A join sale B on A.orderid=B.orderid
    3. group by year(B.dt);

    (2)销售金额在 10W 以上的订单

    1. SELECT orderid, round(sum(amount), 2) amount
    2. FROM saledetail
    3. group by orderid
    4. having sum(amount) > 100000

    (3)每年销售额的差值

    1. SELECT year, round(amount, 2) amount, round(lag(amount) over (ORDER BY year), 2) prioramount
    2. ,round(amount - lag(amount) over (ORDER BY year), 2) diff
    3. from (SELECT year(B.dt) year, sum(A.amount) amount
    4. from saledetail A join sale B on A.orderid=B.orderid
    5. group by year(B.dt)
    6. ) tmp;

    (4)年度订单金额前10位(年度、订单号、订单金额、排名)

    1. -- 方法一
    2. SELECT dt, orderid, amount, rank
    3. from (SELECT dt, orderid, amount,
    4. dense_rank() over(PARTITION BY dt ORDER BY amount desc) rank
    5. from (SELECT year(B.dt) dt, A.orderid, sum(A.amount) amount
    6. from saledetail A join sale B on A.orderid=B.orderid
    7. GROUP BY year(B.dt), A.orderid
    8. ) tmp1
    9. ) tmp2
    10. where rank <= 10;
    11. -- 方法二
    12. with tmp as (
    13. SELECT year(B.dt) dt, A.orderid, sum(A.amount) amount
    14. from saledetail A join sale B on A.orderid=B.orderid
    15. GROUP BY year(B.dt), A.orderid
    16. )
    17. SELECT dt, orderid, amount, rank
    18. from (SELECT dt, orderid, amount,
    19. dense_rank() over(PARTITION BY dt ORDER BY amount desc) rank
    20. from tmp
    21. ) tmp2
    22. where rank <= 10;

    (5)季度订单金额前10位(年度、季度、订单id、订单金额、排名)

    1. -- 方法一
    2. with tmp as (
    3. select C.year, C.quat, A.orderid, round(sum(B.amount), 2) amount
    4. from sale A join saledetail B on A.orderid=B.orderid
    5. join dimdate C on A.dt=C.dt
    6. group by C.year, C.quat, A.orderid
    7. )
    8. select year, quat, orderid, amount, rank
    9. from (
    10. select year, quat, orderid, amount,
    11. dense_rank() over (partition by year, quat order by amount desc) rank
    12. from tmp
    13. ) tmp1
    14. where rank <= 10;
    15. -- 方法二
    16. with tmp as(
    17. select year(A.dt) year,
    18. case when month(A.dt) <= 3 then 1
    19. when month(A.dt) <= 6 then 2
    20. when month(A.dt) <= 9 then 3
    21. else 4 end quat,
    22. A.orderid,
    23. round(sum(B.amount), 2) amount
    24. from sale A join saledetail B on A.orderid = B.orderid
    25. group by year(A.dt),
    26. case when month(A.dt) <= 3 then 1
    27. when month(A.dt) <= 6 then 2
    28. when month(A.dt) <= 9 then 3
    29. else 4 end,
    30. A.orderid
    31. )
    32. select year, quat, orderid, amount, rank
    33. from (
    34. select year, quat, orderid, amount,
    35. dense_rank() over (partition by year, quat order by amount desc) rank
    36. from tmp
    37. ) tmp1
    38. where rank <= 10;
    39. -- 方法三。求季度
    40. select floor(month(dt/3.1)) + 1;
    41. with tmp as (
    42. select year(A.dt) year, floor(month(A.dt)/3.1) + 1 quat,
    43. A.orderid,
    44. round(sum(B.amount), 2) amount
    45. from sale A join saledetail B on A.orderid=B.orderid
    46. group by year(A.dt), floor(month(A.dt)/3.1) + 1, A.orderid
    47. )
    48. select year, quat, orderid, amount, rank
    49. from (
    50. select year, quat, orderid, amount,
    51. dense_rank() over (partition by year, quat order by amount desc) rank
    52. from tmp
    53. ) tmp1
    54. where rank <= 10;

    (6)求所有交易日中订单金额最高的前10位

    1. topN问题:
    2. 1、基础数据
    3. 2、上排名函数
    4. 3、解决N的问题
    5. with tmp as (
    6. select A.dt, A.orderid, round(sum(B.amount), 2) amount
    7. from sale A join saledetail B on A.orderid=B.orderid
    8. group by A.dt, A.orderid
    9. )
    10. select dt, orderid, amount, rank
    11. from (
    12. select dt, orderid, amount, dense_rank() over(order by amount desc) rank
    13. from tmp
    14. ) tmp1
    15. where rank <= 10;

    (7)每年度销售额最大的交易日

    1. with tmp as (
    2. select A.dt, round(sum(B.amount), 2) amount
    3. from sale A join saledetail B on A.orderid=B.orderid
    4. group by A.dt
    5. )
    6. select year(dt) year, max(amount) dayamount
    7. from tmp
    8. group by year(dt);

    备注:以上求解忽略了交易日,以下SQL更符合题意

    1. with tmp as (
    2. select dt, amount, dense_rank() over (partition by year(dt) order by amount desc) as rank
    3. from (select A.dt, round(sum(B.amount), 2) amount
    4. from sale A join saledetail B on A.orderid=B.orderid
    5. group by A.dt) tab1
    6. )
    7. select year(dt) as year, dt, amount
    8. from tmp
    9. where rank=1;

    (8)年度最畅销的商品(即每年销售金额最大的商品)

    1. with tmp as (
    2. select year(B.dt) year, goods, round(sum(amount),2) amount
    3. from saledetail A join sale B on A.orderid=B.orderid
    4. group by year(B.dt), goods
    5. )
    6. select year, goods, amount
    7. from (select year, goods, amount, dense_rank() over
    8. (partition by year order by amount desc) rank
    9. from tmp) tmp1
    10. where rank = 1;
  • 相关阅读:
    剑指offer(C++)-JZ67:把字符串转换成整数atoi(算法-模拟)
    存储资源盘活系统,“盘活”物联网架构难题(上)
    什么是工作流引擎
    C++日期和时间编程总结
    [Codeforces] number theory (R1600) Part.2
    【MAPBOX基础功能】08、mapbox绘制点图层并进行添加、删除、更新、显隐等操作
    多任务环境中如何喂看门狗?
    vue3中使用element-plus
    Fritzing软件绘制Arduino面包板接线图传感器模块库文件216
    C++的文件操作
  • 原文地址:https://blog.csdn.net/weixin_52851967/article/details/127424595