一 传递残差的作用
传递残差最终会直接增加到当前帧的Cost上,影响了最终的码控,本文分析传递残差计算过程
二 代码详细分析
传递残差迭代过程
- void Lookahead::estimateCUPropagate(Lowres **frames, double averageDuration, int p0, int p1, int b, int referenced)
- {
- /*
- 1 帧序列
- 2 平均duration
- 3 前向帧
- 4 后向帧
- 5 当前帧
- 6 是否被参考了
- */
- uint16_t *refCosts[2] = { frames[p0]->propagateCost, frames[p1]->propagateCost };
- int32_t distScaleFactor = (((b - p0) << 8) + ((p1 - p0) >> 1)) / (p1 - p0);
- int32_t bipredWeight = m_param->bEnableWeightedBiPred ? 64 - (distScaleFactor >> 2) : 32;
- int32_t bipredWeights[2] = { bipredWeight, 64 - bipredWeight }; //计算双向权重
- int listDist[2] = { b - p0, p1 - b };
-
- memset(m_scratch, 0, m_8x8Width * sizeof(int));
-
- uint16_t *propagateCost = frames[b]->propagateCost; //传递残差指针
-
- s265_emms();
- double fpsFactor = CLIP_DURATION((double)m_param->fpsDenom / m_param->fpsNum) / CLIP_DURATION(averageDuration);
- //duration factor,看下是否是不均匀的帧率,正常情况下是1
-
- /* For non-referred frames the source costs are always zero, so just memset one row and re-use it. */
- if (!referenced) //如果非参考
- memset(frames[b]->propagateCost, 0, m_8x8Width * sizeof(uint16_t)); //
-
- int32_t strideInCU = m_8x8Width; //以cu为单位的行宽
- for (uint16_t blocky = 0; blocky < m_8x8Height; blocky++) //
- {
- int cuIndex = blocky * strideInCU; //遍历每一行
- if (m_param->rc.qgSize == 8) //如果qgSize == 8
- primitives.propagateCost(m_scratch, propagateCost,
- frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex,
- frames[b]->invQscaleFactor8x8 + cuIndex, &fpsFactor, m_8x8Width);
- //计算传递残差
- else
- primitives.propagateCost(m_scratch, propagateCost,
- frames[b]->intraCost + cuIndex, frames[b]->lowresCosts[b - p0][p1 - b] + cuIndex,
- frames[b]->invQscaleFactor + cuIndex, &fpsFactor, m_8x8Width);
- //
-
- if (referenced) //如果是参考帧,
- propagateCost += m_8x8Width; //偏移一行,要不就是复用的
-
- for (uint16_t blockx = 0; blockx < m_8x8Width; blockx++, cuIndex++) //遍历每一行的每个块
- {
- int32_t propagate_amount = m_scratch[blockx];
- /* Don't propagate for an intra block. */
- if (propagate_amount > 0) //propagate_amount 传递次数
- {
- /* Access width-2 bitfield. */
- int32_t lists_used = frames[b]->lowresCosts[b - p0][p1 - b][cuIndex] >> LOWRES_COST_SHIFT; //
- //最高两位存放 前向和后向使用情况, 一共16位的lowresCosts, 后14真的存放的是cost,前2位存放的是方向信息
- lowresCosts ,画面在1/4分辨率时候的Cost , 右移位 14,
- /* Follow the MVs to the previous frame(s). */
- for (uint16_t list = 0; list < 2; list++) //0, 1 前向/后向参考传递
- {
- if ((lists_used >> list) & 1) //为1 表示有这个方向上的。 当list为0,
- {
- #define CLIP_ADD(s, x) (s) = (uint16_t)S265_MIN((s) + (x), (1 << 16) - 1)
- int32_t listamount = propagate_amount;
- /* Apply bipred weighting. */
- if (lists_used == 3) //双向的
- listamount = (listamount * bipredWeights[list] + 32) >> 6; //双向的需要调整,根据双向权重值
- MV *mvs = frames[b]->lowresMvs[list][listDist[list]]; //拿到一个方向上的mv值
- /* Early termination for simple case of mv 0. */
- if (!mvs[cuIndex].word) //如果mv是0, 说明是
- {
- CLIP_ADD(refCosts[list][cuIndex], listamount);//直接把当前块的cost增大一些,当然不能超过1 << 16
- //因为这里没有运动向量的cost, 所以只需要加上传递残差
- continue;
- }
- int32_t x = mvs[cuIndex].x;
- int32_t y = mvs[cuIndex].y;
- int32_t cux = (x >> 5) + blockx;
- int32_t cuy = (y >> 5) + blocky;
- int32_t idx0 = cux + cuy * strideInCU; //当前块位置
- int32_t idx1 = idx0 + 1;//当前块前一个块
- int32_t idx2 = idx0 + strideInCU;// 当前块下面一个块
- int32_t idx3 = idx0 + strideInCU + 1; //当前块,前面下方一个块
- /*
- D(当前块) idx1
- idx2 idx3
- */
- x &= 31;
- y &= 31;
- int32_t idx0weight = (32 - y) * (32 - x);
- int32_t idx1weight = (32 - y) * x;
- int32_t idx2weight = y * (32 - x);
- int32_t idx3weight = y * x;
- /* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
- * be counted. */
- if (cux < m_8x8Width - 1 && cuy < m_8x8Height - 1 && cux >= 0 && cuy >= 0)
- { //给这些块,都加上传递残差的影响 ,在边界范围内的
- CLIP_ADD(refCosts[list][idx0], (listamount * idx0weight + 512) >> 10);
- CLIP_ADD(refCosts[list][idx1], (listamount * idx1weight + 512) >> 10);
- CLIP_ADD(refCosts[list][idx2], (listamount * idx2weight + 512) >> 10);
- CLIP_ADD(refCosts[list][idx3], (listamount * idx3weight + 512) >> 10);
- }
- else /* Check offsets individually 出界的,需要逐个判断,要不然idx 下标访问越界*/
- {
- if (cux < m_8x8Width && cuy < m_8x8Height && cux >= 0 && cuy >= 0)
- CLIP_ADD(refCosts[list][idx0], (listamount * idx0weight + 512) >> 10);
- if (cux + 1 < m_8x8Width && cuy < m_8x8Height && cux + 1 >= 0 && cuy >= 0)
- CLIP_ADD(refCosts[list][idx1], (listamount * idx1weight + 512) >> 10);
- if (cux < m_8x8Width && cuy + 1 < m_8x8Height && cux >= 0 && cuy + 1 >= 0)
- CLIP_ADD(refCosts[list][idx2], (listamount * idx2weight + 512) >> 10);
- if (cux + 1 < m_8x8Width && cuy + 1 < m_8x8Height && cux + 1 >= 0 && cuy + 1 >= 0)
- CLIP_ADD(refCosts[list][idx3], (listamount * idx3weight + 512) >> 10);
- }
- }
- }
- }
- }
- }
-
- if (m_param->rc.vbvBufferSize && m_param->lookaheadDepth && referenced)
- cuTreeFinish(frames[b], averageDuration, b == p1 ? b - p0 : 0);
- }
三 传递残差具体计算
- /* Estimate the total amount of influence on future quality that could be had if we
- * were to improve the reference samples used to inter predict any given CU. */
- static void estimateCUPropagateCost(
- int* dst, //最终存储传递残差Amount的地方
- const uint16_t* propagateIn,//当前帧存储传递残差的地方,这个会不断迭代,因为一帧一帧参考关系的计算
- const int32_t* intraCosts,
- const uint16_t* interCosts,
- const int32_t* invQscales,
- const double* fpsFactor,
- int len)
- {
- double fps = *fpsFactor / 256; // range[0.01, 1.00]
- for (int i = 0; i < len; i++) //一行的每个cu块
- {
- int intraCost = intraCosts[i]; //当前块的帧内Costs
- int interCost = S265_MIN(intraCosts[i], interCosts[i] & LOWRES_COST_MASK);//00ffffff 低14bit存放的才是cost
- //上面选择最小的作为Cost
- double propagateIntra = intraCost * invQscales[i]; // Q16 x Q8.8 = Q24.8 帧内cost * 一定的系数
- double propagateAmount = (double)propagateIn[i]/*原有的传递残差*/ + propagateIntra * fps; // Q16.0 + Q24.8 x Q0.x = Q25.0 帧内Cost * fps,计算1/duration
- double propagateNum = (double)(intraCost - interCost); // Q32 - Q32 = Q33.0 帧间Cost - 帧内Cost
-
-
- double propagateDenom = (double)intraCost; // Q32
- dst[i] = (int)(propagateAmount * propagateNum / propagateDenom + 0.5);//最终的传递残差值
- }
- //}
- }
以上就是cuTree影响帧Cost的全过程