• x265 传递残差计算


    一 传递残差的作用

         传递残差最终会直接增加到当前帧的Cost上,影响了最终的码控,本文分析传递残差计算过程

    二 代码详细分析

         传递残差迭代过程

    1. void Lookahead::estimateCUPropagate(Lowres **frames, double averageDuration, int p0, int p1, int b, int referenced)
    2. {
    3. /*
    4. 1 帧序列
    5. 2 平均duration
    6. 3 前向帧
    7. 4 后向帧
    8. 5 当前帧
    9. 6 是否被参考了
    10. */
    11. uint16_t *refCosts[2] = { frames[p0]->propagateCost, frames[p1]->propagateCost };
    12. int32_t distScaleFactor = (((b - p0) << 8) + ((p1 - p0) >> 1)) / (p1 - p0);
    13. int32_t bipredWeight = m_param->bEnableWeightedBiPred ? 64 - (distScaleFactor >> 2) : 32;
    14. int32_t bipredWeights[2] = { bipredWeight, 64 - bipredWeight }; //计算双向权重
    15. int listDist[2] = { b - p0, p1 - b };
    16. memset(m_scratch, 0, m_8x8Width * sizeof(int));
    17. uint16_t *propagateCost = frames[b]->propagateCost; //传递残差指针
    18. s265_emms();
    19. double fpsFactor = CLIP_DURATION((double)m_param->fpsDenom / m_param->fpsNum) / CLIP_DURATION(averageDuration);
    20. //duration factor,看下是否是不均匀的帧率,正常情况下是1
    21. /* For non-referred frames the source costs are always zero, so just memset one row and re-use it. */
    22. if (!referenced) //如果非参考
    23. memset(frames[b]->propagateCost, 0, m_8x8Width * sizeof(uint16_t)); //
    24. int32_t strideInCU = m_8x8Width; //以cu为单位的行宽
    25. for (uint16_t blocky = 0; blocky < m_8x8Height; blocky++) //
    26. {
    27. int cuIndex = blocky * strideInCU; //遍历每一行
    28. if (m_param->rc.qgSize == 8) //如果qgSize == 8
    29. primitives.propagateCost(m_scratch, propagateCost,
    30. frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex,
    31. frames[b]->invQscaleFactor8x8 + cuIndex, &fpsFactor, m_8x8Width);
    32. //计算传递残差
    33. else
    34. primitives.propagateCost(m_scratch, propagateCost,
    35. frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex,
    36. frames[b]->invQscaleFactor + cuIndex, &fpsFactor, m_8x8Width);
    37. //
    38. if (referenced) //如果是参考帧,
    39. propagateCost += m_8x8Width; //偏移一行,要不就是复用的
    40. for (uint16_t blockx = 0; blockx < m_8x8Width; blockx++, cuIndex++) //遍历每一行的每个块
    41. {
    42. int32_t propagate_amount = m_scratch[blockx];
    43. /* Don't propagate for an intra block. */
    44. if (propagate_amount > 0) //propagate_amount 传递次数
    45. {
    46. /* Access width-2 bitfield. */
    47. int32_t lists_used = frames[b]->lowresCosts[b - p0][p1 - b][cuIndex] >> LOWRES_COST_SHIFT; //
    48. //最高两位存放 前向和后向使用情况, 一共16位的lowresCosts, 后14真的存放的是cost,前2位存放的是方向信息
    49. lowresCosts ,画面在1/4分辨率时候的Cost , 右移位 14,
    50. /* Follow the MVs to the previous frame(s). */
    51. for (uint16_t list = 0; list < 2; list++) //0, 1 前向/后向参考传递
    52. {
    53. if ((lists_used >> list) & 1) //为1 表示有这个方向上的。 当list为0,
    54. {
    55. #define CLIP_ADD(s, x) (s) = (uint16_t)S265_MIN((s) + (x), (1 << 16) - 1)
    56. int32_t listamount = propagate_amount;
    57. /* Apply bipred weighting. */
    58. if (lists_used == 3) //双向的
    59. listamount = (listamount * bipredWeights[list] + 32) >> 6; //双向的需要调整,根据双向权重值
    60. MV *mvs = frames[b]->lowresMvs[list][listDist[list]]; //拿到一个方向上的mv值
    61. /* Early termination for simple case of mv 0. */
    62. if (!mvs[cuIndex].word) //如果mv是0, 说明是
    63. {
    64. CLIP_ADD(refCosts[list][cuIndex], listamount);//直接把当前块的cost增大一些,当然不能超过1 << 16
    65. //因为这里没有运动向量的cost, 所以只需要加上传递残差
    66. continue;
    67. }
    68. int32_t x = mvs[cuIndex].x;
    69. int32_t y = mvs[cuIndex].y;
    70. int32_t cux = (x >> 5) + blockx;
    71. int32_t cuy = (y >> 5) + blocky;
    72. int32_t idx0 = cux + cuy * strideInCU; //当前块位置
    73. int32_t idx1 = idx0 + 1;//当前块前一个块
    74. int32_t idx2 = idx0 + strideInCU;// 当前块下面一个块
    75. int32_t idx3 = idx0 + strideInCU + 1; //当前块,前面下方一个块
    76. /*
    77. D(当前块) idx1
    78. idx2 idx3
    79. */
    80. x &= 31;
    81. y &= 31;
    82. int32_t idx0weight = (32 - y) * (32 - x);
    83. int32_t idx1weight = (32 - y) * x;
    84. int32_t idx2weight = y * (32 - x);
    85. int32_t idx3weight = y * x;
    86. /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
    87. * be counted. */
    88. if (cux < m_8x8Width - 1 && cuy < m_8x8Height - 1 && cux >= 0 && cuy >= 0)
    89. { //给这些块,都加上传递残差的影响 ,在边界范围内的
    90. CLIP_ADD(refCosts[list][idx0], (listamount * idx0weight + 512) >> 10);
    91. CLIP_ADD(refCosts[list][idx1], (listamount * idx1weight + 512) >> 10);
    92. CLIP_ADD(refCosts[list][idx2], (listamount * idx2weight + 512) >> 10);
    93. CLIP_ADD(refCosts[list][idx3], (listamount * idx3weight + 512) >> 10);
    94. }
    95. else /* Check offsets individually 出界的,需要逐个判断,要不然idx 下标访问越界*/
    96. {
    97. if (cux < m_8x8Width && cuy < m_8x8Height && cux >= 0 && cuy >= 0)
    98. CLIP_ADD(refCosts[list][idx0], (listamount * idx0weight + 512) >> 10);
    99. if (cux + 1 < m_8x8Width && cuy < m_8x8Height && cux + 1 >= 0 && cuy >= 0)
    100. CLIP_ADD(refCosts[list][idx1], (listamount * idx1weight + 512) >> 10);
    101. if (cux < m_8x8Width && cuy + 1 < m_8x8Height && cux >= 0 && cuy + 1 >= 0)
    102. CLIP_ADD(refCosts[list][idx2], (listamount * idx2weight + 512) >> 10);
    103. if (cux + 1 < m_8x8Width && cuy + 1 < m_8x8Height && cux + 1 >= 0 && cuy + 1 >= 0)
    104. CLIP_ADD(refCosts[list][idx3], (listamount * idx3weight + 512) >> 10);
    105. }
    106. }
    107. }
    108. }
    109. }
    110. }
    111. if (m_param->rc.vbvBufferSize && m_param->lookaheadDepth && referenced)
    112. cuTreeFinish(frames[b], averageDuration, b == p1 ? b - p0 : 0);
    113. }

    三 传递残差具体计算

    1. /* Estimate the total amount of influence on future quality that could be had if we
    2. * were to improve the reference samples used to inter predict any given CU. */
    3. static void estimateCUPropagateCost(
    4. int* dst, //最终存储传递残差Amount的地方
    5. const uint16_t* propagateIn,//当前帧存储传递残差的地方,这个会不断迭代,因为一帧一帧参考关系的计算
    6. const int32_t* intraCosts,
    7. const uint16_t* interCosts,
    8. const int32_t* invQscales,
    9. const double* fpsFactor,
    10. int len)
    11. {
    12. double fps = *fpsFactor / 256; // range[0.01, 1.00]
    13. for (int i = 0; i < len; i++) //一行的每个cu块
    14. {
    15. int intraCost = intraCosts[i]; //当前块的帧内Costs
    16. int interCost = S265_MIN(intraCosts[i], interCosts[i] & LOWRES_COST_MASK);//00ffffff 低14bit存放的才是cost
    17. //上面选择最小的作为Cost
    18. double propagateIntra = intraCost * invQscales[i]; // Q16 x Q8.8 = Q24.8 帧内cost * 一定的系数
    19. double propagateAmount = (double)propagateIn[i]/*原有的传递残差*/ + propagateIntra * fps; // Q16.0 + Q24.8 x Q0.x = Q25.0 帧内Cost * fps,计算1/duration
    20. double propagateNum = (double)(intraCost - interCost); // Q32 - Q32 = Q33.0 帧间Cost - 帧内Cost
    21. double propagateDenom = (double)intraCost; // Q32
    22. dst[i] = (int)(propagateAmount * propagateNum / propagateDenom + 0.5);//最终的传递残差值
    23. }
    24. //}
    25. }

    以上就是cuTree影响帧Cost的全过程

  • 相关阅读:
    (个人杂记)第六章 跑马灯实验
    Jmeter提取协议报文、请求头、请求体、响应体
    一个小脚本,挑选自己想要的图片并存入固定文件夹
    SpringBoot集成Nacos动态读取配置文件及服务发现
    Camunda 7.x 系列【49】存储服务 RepositoryService
    计算机系统(19)----- 进程互斥的硬件实现方法
    【敏捷那些事儿 06期】选敏捷还是瀑布?
    html实现简单的目标树
    【学习笔记】线性基
    YOLO v4详解
  • 原文地址:https://blog.csdn.net/fantasy_ARM9/article/details/126506607