• AV1时域滤波相关代码


    链接:AV1编码器优化技术_mob604756f6460e的技术博客_51CTO博客

    总结,av1时域滤波算法的特点:1.使用一种称为非局部均值的方法来计算块差异,并确定我们要分配给该块的权重,2在帧内有一个噪声水平估计算法,一旦噪声很高时,我们便使用更强的过滤器。3 改为使用八边形运动搜索模式;

    结果:对于480p的中分辨率视频,PSNR的增益约为4-5%,SSIM的增益约为9%;对于720和1080p这类较高分辨率的视频内容,PSNR增益约为6.5%,而SSIM的增益约为11%至12%。

    滤波函数:

    多线程:

    prepare_tf_workers->tf_worker_hook->av1_tf_do_filtering_row->

    相关代码:

    1. Multi-threads:
    2. // Perform temporal filtering process.
    3. if (mt_info->num_workers > 1)
    4. av1_tf_do_filtering_mt(cpi);
    5. else
    6. tf_do_filtering(cpi);

    1pass:

    av1_encode_strategy->denoise_and_encode->av1_temporal_filter->tf_do_filtering->av1_tf_do_filtering_row->

    2pass:

    encoder_encode->av1_get_compressed_data->av1_encode_strategy->av1_get_second_pass_params->av1_tf_info_filtering->av1_temporal_filter->tf_do_filtering->av1_tf_do_filtering_row->

    滤波前对key_frame 设置相关参数:

    1. void av1_setup_past_independence(AV1_COMMON *cm) {
    2. // Reset the segment feature data to the default stats:
    3. // Features disabled, 0, with delta coding (Default state).
    4. av1_clearall_segfeatures(&cm->seg);
    5. //base boost1 boost2
    6. if (cm->cur_frame->seg_map) {
    7. memset(cm->cur_frame->seg_map, 0,
    8. (cm->cur_frame->mi_rows * cm->cur_frame->mi_cols));
    9. }
    10. // reset mode ref deltas
    11. av1_set_default_ref_deltas(cm->cur_frame->ref_deltas);
    12. av1_set_default_mode_deltas(cm->cur_frame->mode_deltas);
    13. set_default_lf_deltas(&cm->lf);
    14. av1_default_coef_probs(cm);
    15. av1_init_mode_probs(cm->fc);
    16. av1_init_mv_probs(cm);
    17. cm->fc->initialized = 1;
    18. av1_setup_frame_contexts(cm);
    19. }

    运动搜索代码:

    1. static void tf_motion_search(AV1_COMP *cpi, MACROBLOCK *mb,
    2. const YV12_BUFFER_CONFIG *frame_to_filter,
    3. const YV12_BUFFER_CONFIG *ref_frame,
    4. const BLOCK_SIZE block_size, const int mb_row,
    5. const int mb_col, MV *ref_mv, MV *subblock_mvs,
    6. int *subblock_mses) {
    7. // Frame information
    8. const int min_frame_size = AOMMIN(cpi->common.width, cpi->common.height);
    9. // Block information (ONLY Y-plane is used for motion search).
    10. const int mb_height = block_size_high[block_size];
    11. const int mb_width = block_size_wide[block_size];
    12. const int mb_pels = mb_height * mb_width;
    13. const int y_stride = frame_to_filter->y_stride;
    14. assert(y_stride == ref_frame->y_stride);
    15. const int y_offset = mb_row * mb_height * y_stride + mb_col * mb_width;
    16. // Save input state.
    17. MACROBLOCKD *const mbd = &mb->e_mbd;
    18. const struct buf_2d ori_src_buf = mb->plane[0].src;
    19. const struct buf_2d ori_pre_buf = mbd->plane[0].pre[0];
    20. // Parameters used for motion search.
    21. FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
    22. SUBPEL_MOTION_SEARCH_PARAMS ms_params;
    23. const SEARCH_METHODS search_method = NSTEP;
    24. const search_site_config *search_site_cfg =
    25. cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
    26. const int step_param = av1_init_search_range(
    27. AOMMAX(frame_to_filter->y_crop_width, frame_to_filter->y_crop_height));
    28. const SUBPEL_SEARCH_TYPE subpel_search_type = USE_8_TAPS;
    29. const int force_integer_mv = cpi->common.features.cur_frame_force_integer_mv;
    30. const MV_COST_TYPE mv_cost_type =
    31. min_frame_size >= 720
    32. ? MV_COST_L1_HDRES
    33. : (min_frame_size >= 480 ? MV_COST_L1_MIDRES : MV_COST_L1_LOWRES);
    34. // Starting position for motion search.
    35. FULLPEL_MV start_mv = get_fullmv_from_mv(ref_mv);
    36. // Baseline position for motion search (used for rate distortion comparison).
    37. const MV baseline_mv = kZeroMv;
    38. // Setup.
    39. mb->plane[0].src.buf = frame_to_filter->y_buffer + y_offset;
    40. mb->plane[0].src.stride = y_stride;
    41. mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset;
    42. mbd->plane[0].pre[0].stride = y_stride;
    43. // Unused intermediate results for motion search.
    44. unsigned int sse, error;
    45. int distortion;
    46. int cost_list[5];
    47. // Do motion search.
    48. int_mv best_mv; // Searched motion vector.
    49. int block_mse = INT_MAX;
    50. MV block_mv = kZeroMv;
    51. av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb, block_size,
    52. &baseline_mv, search_site_cfg,
    53. /*fine_search_interval=*/0);
    54. av1_set_mv_search_method(&full_ms_params, search_site_cfg, search_method);
    55. full_ms_params.run_mesh_search = 1;
    56. full_ms_params.mv_cost_params.mv_cost_type = mv_cost_type;
    57. av1_full_pixel_search(start_mv, &full_ms_params, step_param,
    58. cond_cost_list(cpi, cost_list), &best_mv.as_fullmv,
    59. NULL);
    60. if (force_integer_mv == 1) { // Only do full search on the entire block.
    61. const int mv_row = best_mv.as_mv.row;
    62. const int mv_col = best_mv.as_mv.col;
    63. best_mv.as_mv.row = GET_MV_SUBPEL(mv_row);
    64. best_mv.as_mv.col = GET_MV_SUBPEL(mv_col);
    65. const int mv_offset = mv_row * y_stride + mv_col;
    66. error = cpi->ppi->fn_ptr[block_size].vf(
    67. ref_frame->y_buffer + y_offset + mv_offset, y_stride,
    68. frame_to_filter->y_buffer + y_offset, y_stride, &sse);
    69. block_mse = DIVIDE_AND_ROUND(error, mb_pels);
    70. block_mv = best_mv.as_mv;
    71. } else { // Do fractional search on the entire block and all sub-blocks.
    72. av1_make_default_subpel_ms_params(&ms_params, cpi, mb, block_size,
    73. &baseline_mv, cost_list);
    74. ms_params.forced_stop = EIGHTH_PEL;
    75. ms_params.var_params.subpel_search_type = subpel_search_type;
    76. // Since we are merely refining the result from full pixel search, we don't
    77. // need regularization for subpel search
    78. ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE;
    79. MV subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
    80. error = cpi->mv_search_params.find_fractional_mv_step(
    81. &mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv, &best_mv.as_mv,
    82. &distortion, &sse, NULL);
    83. block_mse = DIVIDE_AND_ROUND(error, mb_pels);
    84. block_mv = best_mv.as_mv;
    85. *ref_mv = best_mv.as_mv;
    86. // On 4 sub-blocks.
    87. const BLOCK_SIZE subblock_size = ss_size_lookup[block_size][1][1];
    88. const int subblock_height = block_size_high[subblock_size];
    89. const int subblock_width = block_size_wide[subblock_size];
    90. const int subblock_pels = subblock_height * subblock_width;
    91. start_mv = get_fullmv_from_mv(ref_mv);
    92. int subblock_idx = 0;
    93. for (int i = 0; i < mb_height; i += subblock_height) {
    94. for (int j = 0; j < mb_width; j += subblock_width) {
    95. const int offset = i * y_stride + j;
    96. mb->plane[0].src.buf = frame_to_filter->y_buffer + y_offset + offset;
    97. mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset + offset;
    98. av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb,
    99. subblock_size, &baseline_mv,
    100. search_site_cfg,
    101. /*fine_search_interval=*/0);
    102. av1_set_mv_search_method(&full_ms_params, search_site_cfg,
    103. search_method);
    104. full_ms_params.run_mesh_search = 1;
    105. full_ms_params.mv_cost_params.mv_cost_type = mv_cost_type;
    106. av1_full_pixel_search(start_mv, &full_ms_params, step_param,
    107. cond_cost_list(cpi, cost_list),
    108. &best_mv.as_fullmv, NULL);
    109. av1_make_default_subpel_ms_params(&ms_params, cpi, mb, subblock_size,
    110. &baseline_mv, cost_list);
    111. ms_params.forced_stop = EIGHTH_PEL;
    112. ms_params.var_params.subpel_search_type = subpel_search_type;
    113. // Since we are merely refining the result from full pixel search, we
    114. // don't need regularization for subpel search
    115. ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE;
    116. subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
    117. error = cpi->mv_search_params.find_fractional_mv_step(
    118. &mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv,
    119. &best_mv.as_mv, &distortion, &sse, NULL);
    120. subblock_mses[subblock_idx] = DIVIDE_AND_ROUND(error, subblock_pels);
    121. subblock_mvs[subblock_idx] = best_mv.as_mv;
    122. ++subblock_idx;
    123. }
    124. }
    125. }
    126. // Restore input state.
    127. mb->plane[0].src = ori_src_buf;
    128. mbd->plane[0].pre[0] = ori_pre_buf;
    129. // Make partition decision.
    130. tf_determine_block_partition(block_mv, block_mse, subblock_mvs,
    131. subblock_mses);
    132. // Do not pass down the reference motion vector if error is too large.
    133. const int thresh = (min_frame_size >= 720) ? 12 : 3;
    134. if (block_mse > (thresh << (mbd->bd - 8))) {
    135. *ref_mv = kZeroMv;
    136. }
    137. }

    滤波函数代码:

    1. void av1_tf_do_filtering_row(AV1_COMP *cpi, ThreadData *td, int mb_row) {
    2. TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
    3. YV12_BUFFER_CONFIG **frames = tf_ctx->frames;
    4. const int num_frames = tf_ctx->num_frames;
    5. const int filter_frame_idx = tf_ctx->filter_frame_idx;
    6. const int compute_frame_diff = tf_ctx->compute_frame_diff;
    7. const struct scale_factors *scale = &tf_ctx->sf;
    8. const double *noise_levels = tf_ctx->noise_levels;
    9. const int num_pels = tf_ctx->num_pels;
    10. const int q_factor = tf_ctx->q_factor;
    11. const BLOCK_SIZE block_size = TF_BLOCK_SIZE;
    12. const YV12_BUFFER_CONFIG *const frame_to_filter = frames[filter_frame_idx];
    13. MACROBLOCK *const mb = &td->mb;
    14. MACROBLOCKD *const mbd = &mb->e_mbd;
    15. TemporalFilterData *const tf_data = &td->tf_data;
    16. const int mb_height = block_size_high[block_size];
    17. const int mb_width = block_size_wide[block_size];
    18. const int mi_h = mi_size_high_log2[block_size];
    19. const int mi_w = mi_size_wide_log2[block_size];
    20. const int num_planes = av1_num_planes(&cpi->common);
    21. uint32_t *accum = tf_data->accum;
    22. uint16_t *count = tf_data->count;
    23. uint8_t *pred = tf_data->pred;
    24. // Factor to control the filering strength.
    25. const int filter_strength = cpi->oxcf.algo_cfg.arnr_strength;
    26. // Do filtering.
    27. FRAME_DIFF *diff = &td->tf_data.diff;
    28. av1_set_mv_row_limits(&cpi->common.mi_params, &mb->mv_limits,
    29. (mb_row << mi_h), (mb_height >> MI_SIZE_LOG2),
    30. cpi->oxcf.border_in_pixels);
    31. for (int mb_col = 0; mb_col < tf_ctx->mb_cols; mb_col++) {
    32. av1_set_mv_col_limits(&cpi->common.mi_params, &mb->mv_limits,
    33. (mb_col << mi_w), (mb_width >> MI_SIZE_LOG2),
    34. cpi->oxcf.border_in_pixels);
    35. memset(accum, 0, num_pels * sizeof(accum[0]));
    36. memset(count, 0, num_pels * sizeof(count[0]));
    37. MV ref_mv = kZeroMv; // Reference motion vector passed down along frames.
    38. // Perform temporal filtering frame by frame.
    39. for (int frame = 0; frame < num_frames; frame++) {
    40. if (frames[frame] == NULL) continue;
    41. // Motion search.
    42. MV subblock_mvs[4] = { kZeroMv, kZeroMv, kZeroMv, kZeroMv };
    43. int subblock_mses[4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX };
    44. if (frame ==
    45. filter_frame_idx) { // Frame to be filtered.
    46. // Change ref_mv sign for following frames.
    47. ref_mv.row *= -1;
    48. ref_mv.col *= -1;
    49. } else { // Other reference frames.
    50. tf_motion_search(cpi, mb, frame_to_filter, frames[frame], block_size,
    51. mb_row, mb_col, &ref_mv, subblock_mvs, subblock_mses);
    52. }
    53. // Perform weighted averaging.
    54. if (frame == filter_frame_idx) { // Frame to be filtered.
    55. tf_apply_temporal_filter_self(frames[frame], mbd, block_size, mb_row,
    56. mb_col, num_planes, accum, count);
    57. } else { // Other reference frames.
    58. tf_build_predictor(frames[frame], mbd, block_size, mb_row, mb_col,
    59. num_planes, scale, subblock_mvs, pred);
    60. // All variants of av1_apply_temporal_filter() contain floating point
    61. // operations. Hence, clear the system state.
    62. // TODO(any): avx2/sse2 version should be changed to align with C
    63. // function before using. In particular, current avx2/sse2 function
    64. // only supports 32x32 block size and 5x5 filtering window.
    65. if (is_frame_high_bitdepth(frame_to_filter)) { // for high bit-depth
    66. #if CONFIG_AV1_HIGHBITDEPTH
    67. if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
    68. av1_highbd_apply_temporal_filter(
    69. frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
    70. noise_levels, subblock_mvs, subblock_mses, q_factor,
    71. filter_strength, pred, accum, count);
    72. } else {
    73. #endif // CONFIG_AV1_HIGHBITDEPTH
    74. av1_apply_temporal_filter_c(
    75. frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
    76. noise_levels, subblock_mvs, subblock_mses, q_factor,
    77. filter_strength, pred, accum, count);
    78. #if CONFIG_AV1_HIGHBITDEPTH
    79. }
    80. #endif // CONFIG_AV1_HIGHBITDEPTH
    81. } else { // for 8-bit
    82. if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
    83. av1_apply_temporal_filter(frame_to_filter, mbd, block_size, mb_row,
    84. mb_col, num_planes, noise_levels,
    85. subblock_mvs, subblock_mses, q_factor,
    86. filter_strength, pred, accum, count);
    87. } else {
    88. av1_apply_temporal_filter_c(
    89. frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
    90. noise_levels, subblock_mvs, subblock_mses, q_factor,
    91. filter_strength, pred, accum, count);
    92. }
    93. }
    94. }
    95. }
    96. tf_normalize_filtered_frame(mbd, block_size, mb_row, mb_col, num_planes,
    97. accum, count, tf_ctx->output_frame);
    98. if (compute_frame_diff) {
    99. const int y_height = mb_height >> mbd->plane[0].subsampling_y;
    100. const int y_width = mb_width >> mbd->plane[0].subsampling_x;
    101. const int source_y_stride = frame_to_filter->y_stride;
    102. const int filter_y_stride = tf_ctx->output_frame->y_stride;
    103. const int source_offset =
    104. mb_row * y_height * source_y_stride + mb_col * y_width;
    105. const int filter_offset =
    106. mb_row * y_height * filter_y_stride + mb_col * y_width;
    107. unsigned int sse = 0;
    108. cpi->ppi->fn_ptr[block_size].vf(
    109. frame_to_filter->y_buffer + source_offset, source_y_stride,
    110. tf_ctx->output_frame->y_buffer + filter_offset, filter_y_stride,
    111. &sse);
    112. diff->sum += sse;
    113. diff->sse += sse * (int64_t)sse;
    114. }
    115. }
    116. }

  • 相关阅读:
    什么是“SQL注入攻击”?如何预防和应对?
    js验证字符串是否是时间日期格式
    机器人xacro设计+gazebo/rviz启动
    从原理到代码实践 | pytorch损失函数
    Github 2024-02-13 开源项目日报 Top9
    自用bat脚本,命令
    POM文件详解
    Redis过期key的删除及淘汰机制
    thinkphp6 获取url路径中的应用名、控制器名、操作名
    冥想第五百一十六天
  • 原文地址:https://blog.csdn.net/maryhaocoolcool/article/details/126749282