链接:AV1编码器优化技术_mob604756f6460e的技术博客_51CTO博客
总结,av1时域滤波算法的特点:1.使用一种称为非局部均值的方法来计算块差异,并确定我们要分配给该块的权重,2在帧内有一个噪声水平估计算法,一旦噪声很高时,我们便使用更强的过滤器。3 改为使用八边形运动搜索模式;
结果:对于480p的中分辨率视频,PSNR的增益约为4-5%,SSIM的增益约为9%;对于720和1080p这类较高分辨率的视频内容,PSNR增益约为6.5%,而SSIM的增益约为11%至12%。
滤波函数:
多线程:
prepare_tf_workers->tf_worker_hook->av1_tf_do_filtering_row->
相关代码:
- Multi-threads:
- // Perform temporal filtering process.
- if (mt_info->num_workers > 1)
- av1_tf_do_filtering_mt(cpi);
- else
- tf_do_filtering(cpi);
1pass:
av1_encode_strategy->denoise_and_encode->av1_temporal_filter->tf_do_filtering->av1_tf_do_filtering_row->
2pass:
encoder_encode->av1_get_compressed_data->av1_encode_strategy->av1_get_second_pass_params->av1_tf_info_filtering->av1_temporal_filter->tf_do_filtering->av1_tf_do_filtering_row->
滤波前对key_frame 设置相关参数:
- void av1_setup_past_independence(AV1_COMMON *cm) {
- // Reset the segment feature data to the default stats:
- // Features disabled, 0, with delta coding (Default state).
- av1_clearall_segfeatures(&cm->seg);
-
- //base boost1 boost2
- if (cm->cur_frame->seg_map) {
- memset(cm->cur_frame->seg_map, 0,
- (cm->cur_frame->mi_rows * cm->cur_frame->mi_cols));
- }
-
- // reset mode ref deltas
- av1_set_default_ref_deltas(cm->cur_frame->ref_deltas);
- av1_set_default_mode_deltas(cm->cur_frame->mode_deltas);
- set_default_lf_deltas(&cm->lf);
-
- av1_default_coef_probs(cm);
- av1_init_mode_probs(cm->fc);
- av1_init_mv_probs(cm);
- cm->fc->initialized = 1;
- av1_setup_frame_contexts(cm);
- }
运动搜索代码:
- static void tf_motion_search(AV1_COMP *cpi, MACROBLOCK *mb,
- const YV12_BUFFER_CONFIG *frame_to_filter,
- const YV12_BUFFER_CONFIG *ref_frame,
- const BLOCK_SIZE block_size, const int mb_row,
- const int mb_col, MV *ref_mv, MV *subblock_mvs,
- int *subblock_mses) {
- // Frame information
- const int min_frame_size = AOMMIN(cpi->common.width, cpi->common.height);
-
- // Block information (ONLY Y-plane is used for motion search).
- const int mb_height = block_size_high[block_size];
- const int mb_width = block_size_wide[block_size];
- const int mb_pels = mb_height * mb_width;
- const int y_stride = frame_to_filter->y_stride;
- assert(y_stride == ref_frame->y_stride);
- const int y_offset = mb_row * mb_height * y_stride + mb_col * mb_width;
-
- // Save input state.
- MACROBLOCKD *const mbd = &mb->e_mbd;
- const struct buf_2d ori_src_buf = mb->plane[0].src;
- const struct buf_2d ori_pre_buf = mbd->plane[0].pre[0];
-
- // Parameters used for motion search.
- FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
- SUBPEL_MOTION_SEARCH_PARAMS ms_params;
- const SEARCH_METHODS search_method = NSTEP;
- const search_site_config *search_site_cfg =
- cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
- const int step_param = av1_init_search_range(
- AOMMAX(frame_to_filter->y_crop_width, frame_to_filter->y_crop_height));
- const SUBPEL_SEARCH_TYPE subpel_search_type = USE_8_TAPS;
- const int force_integer_mv = cpi->common.features.cur_frame_force_integer_mv;
- const MV_COST_TYPE mv_cost_type =
- min_frame_size >= 720
- ? MV_COST_L1_HDRES
- : (min_frame_size >= 480 ? MV_COST_L1_MIDRES : MV_COST_L1_LOWRES);
-
- // Starting position for motion search.
- FULLPEL_MV start_mv = get_fullmv_from_mv(ref_mv);
- // Baseline position for motion search (used for rate distortion comparison).
- const MV baseline_mv = kZeroMv;
-
- // Setup.
- mb->plane[0].src.buf = frame_to_filter->y_buffer + y_offset;
- mb->plane[0].src.stride = y_stride;
- mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset;
- mbd->plane[0].pre[0].stride = y_stride;
- // Unused intermediate results for motion search.
- unsigned int sse, error;
- int distortion;
- int cost_list[5];
-
- // Do motion search.
- int_mv best_mv; // Searched motion vector.
- int block_mse = INT_MAX;
- MV block_mv = kZeroMv;
-
- av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb, block_size,
- &baseline_mv, search_site_cfg,
- /*fine_search_interval=*/0);
- av1_set_mv_search_method(&full_ms_params, search_site_cfg, search_method);
- full_ms_params.run_mesh_search = 1;
- full_ms_params.mv_cost_params.mv_cost_type = mv_cost_type;
-
- av1_full_pixel_search(start_mv, &full_ms_params, step_param,
- cond_cost_list(cpi, cost_list), &best_mv.as_fullmv,
- NULL);
-
- if (force_integer_mv == 1) { // Only do full search on the entire block.
- const int mv_row = best_mv.as_mv.row;
- const int mv_col = best_mv.as_mv.col;
- best_mv.as_mv.row = GET_MV_SUBPEL(mv_row);
- best_mv.as_mv.col = GET_MV_SUBPEL(mv_col);
- const int mv_offset = mv_row * y_stride + mv_col;
- error = cpi->ppi->fn_ptr[block_size].vf(
- ref_frame->y_buffer + y_offset + mv_offset, y_stride,
- frame_to_filter->y_buffer + y_offset, y_stride, &sse);
- block_mse = DIVIDE_AND_ROUND(error, mb_pels);
- block_mv = best_mv.as_mv;
- } else { // Do fractional search on the entire block and all sub-blocks.
- av1_make_default_subpel_ms_params(&ms_params, cpi, mb, block_size,
- &baseline_mv, cost_list);
- ms_params.forced_stop = EIGHTH_PEL;
- ms_params.var_params.subpel_search_type = subpel_search_type;
- // Since we are merely refining the result from full pixel search, we don't
- // need regularization for subpel search
- ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE;
- MV subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
- error = cpi->mv_search_params.find_fractional_mv_step(
- &mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv, &best_mv.as_mv,
- &distortion, &sse, NULL);
- block_mse = DIVIDE_AND_ROUND(error, mb_pels);
- block_mv = best_mv.as_mv;
- *ref_mv = best_mv.as_mv;
- // On 4 sub-blocks.
- const BLOCK_SIZE subblock_size = ss_size_lookup[block_size][1][1];
- const int subblock_height = block_size_high[subblock_size];
- const int subblock_width = block_size_wide[subblock_size];
- const int subblock_pels = subblock_height * subblock_width;
- start_mv = get_fullmv_from_mv(ref_mv);
- int subblock_idx = 0;
- for (int i = 0; i < mb_height; i += subblock_height) {
- for (int j = 0; j < mb_width; j += subblock_width) {
- const int offset = i * y_stride + j;
- mb->plane[0].src.buf = frame_to_filter->y_buffer + y_offset + offset;
- mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset + offset;
- av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb,
- subblock_size, &baseline_mv,
- search_site_cfg,
- /*fine_search_interval=*/0);
- av1_set_mv_search_method(&full_ms_params, search_site_cfg,
- search_method);
- full_ms_params.run_mesh_search = 1;
- full_ms_params.mv_cost_params.mv_cost_type = mv_cost_type;
- av1_full_pixel_search(start_mv, &full_ms_params, step_param,
- cond_cost_list(cpi, cost_list),
- &best_mv.as_fullmv, NULL);
- av1_make_default_subpel_ms_params(&ms_params, cpi, mb, subblock_size,
- &baseline_mv, cost_list);
- ms_params.forced_stop = EIGHTH_PEL;
- ms_params.var_params.subpel_search_type = subpel_search_type;
- // Since we are merely refining the result from full pixel search, we
- // don't need regularization for subpel search
- ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE;
-
- subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
- error = cpi->mv_search_params.find_fractional_mv_step(
- &mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv,
- &best_mv.as_mv, &distortion, &sse, NULL);
- subblock_mses[subblock_idx] = DIVIDE_AND_ROUND(error, subblock_pels);
- subblock_mvs[subblock_idx] = best_mv.as_mv;
- ++subblock_idx;
- }
- }
- }
-
- // Restore input state.
- mb->plane[0].src = ori_src_buf;
- mbd->plane[0].pre[0] = ori_pre_buf;
-
- // Make partition decision.
- tf_determine_block_partition(block_mv, block_mse, subblock_mvs,
- subblock_mses);
-
- // Do not pass down the reference motion vector if error is too large.
- const int thresh = (min_frame_size >= 720) ? 12 : 3;
- if (block_mse > (thresh << (mbd->bd - 8))) {
- *ref_mv = kZeroMv;
- }
- }
滤波函数代码:
- void av1_tf_do_filtering_row(AV1_COMP *cpi, ThreadData *td, int mb_row) {
- TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
- YV12_BUFFER_CONFIG **frames = tf_ctx->frames;
- const int num_frames = tf_ctx->num_frames;
- const int filter_frame_idx = tf_ctx->filter_frame_idx;
- const int compute_frame_diff = tf_ctx->compute_frame_diff;
- const struct scale_factors *scale = &tf_ctx->sf;
- const double *noise_levels = tf_ctx->noise_levels;
- const int num_pels = tf_ctx->num_pels;
- const int q_factor = tf_ctx->q_factor;
- const BLOCK_SIZE block_size = TF_BLOCK_SIZE;
- const YV12_BUFFER_CONFIG *const frame_to_filter = frames[filter_frame_idx];
- MACROBLOCK *const mb = &td->mb;
- MACROBLOCKD *const mbd = &mb->e_mbd;
- TemporalFilterData *const tf_data = &td->tf_data;
- const int mb_height = block_size_high[block_size];
- const int mb_width = block_size_wide[block_size];
- const int mi_h = mi_size_high_log2[block_size];
- const int mi_w = mi_size_wide_log2[block_size];
- const int num_planes = av1_num_planes(&cpi->common);
- uint32_t *accum = tf_data->accum;
- uint16_t *count = tf_data->count;
- uint8_t *pred = tf_data->pred;
-
- // Factor to control the filering strength.
- const int filter_strength = cpi->oxcf.algo_cfg.arnr_strength;
-
- // Do filtering.
- FRAME_DIFF *diff = &td->tf_data.diff;
- av1_set_mv_row_limits(&cpi->common.mi_params, &mb->mv_limits,
- (mb_row << mi_h), (mb_height >> MI_SIZE_LOG2),
- cpi->oxcf.border_in_pixels);
- for (int mb_col = 0; mb_col < tf_ctx->mb_cols; mb_col++) {
- av1_set_mv_col_limits(&cpi->common.mi_params, &mb->mv_limits,
- (mb_col << mi_w), (mb_width >> MI_SIZE_LOG2),
- cpi->oxcf.border_in_pixels);
- memset(accum, 0, num_pels * sizeof(accum[0]));
- memset(count, 0, num_pels * sizeof(count[0]));
- MV ref_mv = kZeroMv; // Reference motion vector passed down along frames.
- // Perform temporal filtering frame by frame.
- for (int frame = 0; frame < num_frames; frame++) {
- if (frames[frame] == NULL) continue;
-
- // Motion search.
- MV subblock_mvs[4] = { kZeroMv, kZeroMv, kZeroMv, kZeroMv };
- int subblock_mses[4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX };
- if (frame ==
- filter_frame_idx) { // Frame to be filtered.
- // Change ref_mv sign for following frames.
- ref_mv.row *= -1;
- ref_mv.col *= -1;
- } else { // Other reference frames.
- tf_motion_search(cpi, mb, frame_to_filter, frames[frame], block_size,
- mb_row, mb_col, &ref_mv, subblock_mvs, subblock_mses);
- }
-
- // Perform weighted averaging.
- if (frame == filter_frame_idx) { // Frame to be filtered.
- tf_apply_temporal_filter_self(frames[frame], mbd, block_size, mb_row,
- mb_col, num_planes, accum, count);
- } else { // Other reference frames.
- tf_build_predictor(frames[frame], mbd, block_size, mb_row, mb_col,
- num_planes, scale, subblock_mvs, pred);
-
- // All variants of av1_apply_temporal_filter() contain floating point
- // operations. Hence, clear the system state.
-
- // TODO(any): avx2/sse2 version should be changed to align with C
- // function before using. In particular, current avx2/sse2 function
- // only supports 32x32 block size and 5x5 filtering window.
- if (is_frame_high_bitdepth(frame_to_filter)) { // for high bit-depth
- #if CONFIG_AV1_HIGHBITDEPTH
- if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
- av1_highbd_apply_temporal_filter(
- frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
- noise_levels, subblock_mvs, subblock_mses, q_factor,
- filter_strength, pred, accum, count);
- } else {
- #endif // CONFIG_AV1_HIGHBITDEPTH
- av1_apply_temporal_filter_c(
- frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
- noise_levels, subblock_mvs, subblock_mses, q_factor,
- filter_strength, pred, accum, count);
- #if CONFIG_AV1_HIGHBITDEPTH
- }
- #endif // CONFIG_AV1_HIGHBITDEPTH
- } else { // for 8-bit
- if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
- av1_apply_temporal_filter(frame_to_filter, mbd, block_size, mb_row,
- mb_col, num_planes, noise_levels,
- subblock_mvs, subblock_mses, q_factor,
- filter_strength, pred, accum, count);
- } else {
- av1_apply_temporal_filter_c(
- frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
- noise_levels, subblock_mvs, subblock_mses, q_factor,
- filter_strength, pred, accum, count);
- }
- }
- }
- }
- tf_normalize_filtered_frame(mbd, block_size, mb_row, mb_col, num_planes,
- accum, count, tf_ctx->output_frame);
-
- if (compute_frame_diff) {
- const int y_height = mb_height >> mbd->plane[0].subsampling_y;
- const int y_width = mb_width >> mbd->plane[0].subsampling_x;
- const int source_y_stride = frame_to_filter->y_stride;
- const int filter_y_stride = tf_ctx->output_frame->y_stride;
- const int source_offset =
- mb_row * y_height * source_y_stride + mb_col * y_width;
- const int filter_offset =
- mb_row * y_height * filter_y_stride + mb_col * y_width;
- unsigned int sse = 0;
- cpi->ppi->fn_ptr[block_size].vf(
- frame_to_filter->y_buffer + source_offset, source_y_stride,
- tf_ctx->output_frame->y_buffer + filter_offset, filter_y_stride,
- &sse);
- diff->sum += sse;
- diff->sse += sse * (int64_t)sse;
- }
- }
- }