- 数据量比对;
- 数据内容比对;
- 不同维度上比对, 查看维度因素;
- -- 总数据量比对
- select
- aa.cnt as pre_cnt
- ,bb.cnt as pro_cnt
- ,( aa.cnt - bb.cnt ) as err
- ,( aa.cnt - bb.cn t)*1.000/bb.cnt as err_ratio
- from ( -- 测试数据
- select
- '1' as id
- ,count(1) as cnt
- from
- ${tbl_name_1}
- where
- dt = '20230730'
- ) aa
- join ( -- 生产数据
- select
- '1' as id
- ,count(1) as cnt
- from
- ${tbl_name_2}
- where
- dt = '20230730'
- ) bb
- on aa.id = bb.id
- ;
-
-
- -- 单维度比对数据量
- select
- aa.cnt pro_cnt
- ,bb.cnt pre_cnt
- ,(aa.cnt - bb.cnt) as err_cnt
- ,(aa.cnt - bb.cnt)*1.00/aa.cnt as err_ratio
- from (
- select
- app_id
- ,count(1) as cnt
- from
- tbl_pro
- where
- dt = '20231010'
- group by
- app_id
- ) aa
- full join (
- select
- app_id
- ,count(1) as cnt
- from
- tbl_pro
- where
- dt = '20231010'
- group by
- app_id
- ) bb
- on aa.app_id = bb.app_id
- ;
- -- 内容比对
- with tmp as (
- select
- aa
- ,bb
- from
- ${tbl_name_1}
- where
- dt = '20231017'
-
- except
-
- select
- aa
- ,bb
- from
- ${tbl_name_2}
- where
- dt = '20231017'
- )
- select
- *
- from
- tmp
- ;
- -- 比对指标总和
- select
- aa.cnt pro_cnt
- ,bb.cnt pre_cnt
- ,(aa.cnt - bb.cnt) as err_cnt
- ,(aa.cnt - bb.cnt)*1.00/aa.cnt as err_ratio
- from (
- select
- 'id' as id
- ,sum(active_num) as cnt
- from
- pro_tbl
- where
- dt = '20231010'
- ) aa
- join (
- select
- 'id' as id
- ,sum(active_num) as cnt
- from
- pre_tbl
- where
- dt = '20231010'
- ) bb
- on aa.id = bb.id
- ;
-
- -- 某些维度上指标和比对
- select
- aa.app_id
- ,aa.cnt as cnt_pro
- ,bb.cnt as cnt_pre
- ,(aa.cnt - bb.cnt) as err_cnt
- ,(aa.cnt - bb.cnt)*1.000/aa.cnt as error
- from (
- select
- app_id
- ,sum(active_num) as cnt
- from
- pro_tbl
- where
- dt = '20231010'
- group by
- app_id
- ) aa
- full join (
- select
- app_id
- ,sum(active_num) as cnt
- from
- pre_tbl
- where
- dt = '20231010'
- group by
- app_id
- ) bb
- on aa.app_id = bb.app_id
- order by aa.cnt desc
- ;
1. 随机函数
first、last函数会随机取维度值, 造成这些维度指标不完全一样, 可验证其它维度上指标是否一致.
2. join异常
3. 增加过滤后数据变多
过滤掉了无效数据, 保留下了更过的有效数据;
比如pkgs数组字段有大量空值,row_number方式排序去重(rnk=1)前没有过滤pkgs为空的数据,这样获取了许多pkgs为空的数据,导致展开数组时会丢失有效数据.