x265编码器在编码一帧时以行为单位进行WPP编码,一行一个线程,每个线程分别对自己的行中每个CTU进行compressCTU压缩分析
/*
压缩分析CTU
过程:
1.为当前CTU加载QP/熵编码上下文
2.是否有编码信息输入来方便快速最优模式分析
·bCTUInfo,加载depth/content/prevCtuInfoChange
·analysisMultiPassRefine,加载之前pass计算分析得到的mv/mvpIdx/ref/modes/depth
·analysisLoad && 非Islice,加载load数据中的Ref/Depth/Modes/PartSize/MergeFlag
3.对CTU压缩编码
·Islice
1.若analysisLoad,则加载cuDepth/partSize/lumaIntraDir/chromaIntraDir
2.compressIntraCU
·P/Bslice
1.判断是否有可用的编码分析数据
2.若有可用编码分析数据则拷贝这些可用数据:cuDepth/predMode/partSize/skipFlag/lumaIntraDir/chromaIntraDir
3.进行实际的P/Bslice编码
·若开启bIntraRefresh,且CTU处于Pir范围内,则对CTU进行compressIntraCU编码
·若rdlevel = 0
1.将原始YUV数据拷贝到recon图像中
2.进行compressInterCU_rd0_4压缩编码
3.进行encodeResidue编码残差
·若analysisLoad
1.拷贝cuDepth/predMode/partSize/lumaIntraDir/chromaIntraDir
2.进行qprdRefine优化rd qp
3.返回CTU的bestMode
·若开启bDistributeModeAnalysis,且rdlevel>=2,则进行compressInterCU_dist分布式压缩编码
·若rdlevel 0~4,则进行compressInterCU_rd0_4压缩编码
·若rdlevel 5~6,则进行compressInterCU_rd5_6压缩编码
4.若使用 rd优化 或 CU级qp优化,则进行qprdRefine优化
5.若csvlog等级>=2,则collectPUStatistics进行PU信息统计
6.返回CTU的bestMode
*/
Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext)
{
//取CTU所在slice
m_slice = ctu.m_slice;
//取CTU所在frame
m_frame = &frame;
//取param
m_param = m_frame->m_param;
//若rdlevel>=3则要计算chroma的sa8d
m_bChromaSa8d = m_param->rdLevel >= 3;
#if _DEBUG || CHECKED_BUILD
invalidateContexts(0);
#endif
int qp = setLambdaFromQP(ctu, m_slice->m_pps->bUseDQP ? calculateQpforCuSize(ctu, cuGeom) : m_slice->m_sliceQp);
ctu.setQPSubParts((int8_t)qp, 0, 0);
//0深度四叉树加载context
m_rqt[0].cur.load(initialContext);
//得到CTU的mean qp
ctu.m_meanQP = initialContext.m_meanQP;
//复制YUV数据到0深度的modeDepth中
m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, ctu.m_cuAddr, 0);
if (m_param->bSsimRd) //若使用ssim rdo
calculateNormFactor(ctu, qp);
//取CTU的4x4块个数
uint32_t numPartition = ctu.m_numPartitions;
//bCTUInfo
if (m_param->bCTUInfo && (*m_frame->m_ctuInfo + ctu.m_cuAddr))
{
//取CTU的info
x265_ctu_info_t* ctuTemp = *m_frame->m_ctuInfo + ctu.m_cuAddr;
//深度0
int32_t depthIdx = 0;
//最大64个8x8块
uint32_t maxNum8x8Partitions = 64;
//取目标数据存储 depthInfoPtr/contentInfoPtr/prevCtuInfoChangePtr
uint8_t* depthInfoPtr = m_frame->m_addOnDepth[ctu.m_cuAddr];
uint8_t* contentInfoPtr = m_frame->m_addOnCtuInfo[ctu.m_cuAddr];
int* prevCtuInfoChangePtr = m_frame->m_addOnPrevChange[ctu.m_cuAddr];
//遍历所有的partition,拷贝API外的编码分析数据到目标depthInfoPtr/contentInfoPtr/prevCtuInfoChangePtr中
do
{
//取出API外的编码分析数据depth/content/prevCtuInfoChange
uint8_t depth = (uint8_t)ctuTemp->ctuPartitions[depthIdx];
uint8_t content = (uint8_t)(*((int32_t *)ctuTemp->ctuInfo + depthIdx));
int prevCtuInfoChange = m_frame->m_prevCtuInfoChange[ctu.m_cuAddr * maxNum8x8Partitions + depthIdx];
//将depth/content/prevCtuInfoChange拷贝给addOnDepth/addOnCtuInfo/addOnPrevChange
memset(depthInfoPtr, depth, sizeof(uint8_t) * numPartition >> 2 * depth);
memset(contentInfoPtr, content, sizeof(uint8_t) * numPartition >> 2 * depth);
memset(prevCtuInfoChangePtr, 0, sizeof(int) * numPartition >> 2 * depth);
for (uint32_t l = 0; l < numPartition >> 2 * depth; l++)
prevCtuInfoChangePtr[l] = prevCtuInfoChange;
//更新depthInfoPtr/contentInfoPtr/prevCtuInfoChangePtr数据指针
depthInfoPtr += ctu.m_numPartitions >> 2 * depth;
contentInfoPtr += ctu.m_numPartitions >> 2 * depth;
prevCtuInfoChangePtr += ctu.m_numPartitions >> 2 * depth;
depthIdx++;
} while (ctuTemp->ctuPartitions[depthIdx] != 0);
m_additionalCtuInfo = m_frame->m_addOnCtuInfo[ctu.m_cuAddr];
m_prevCtuInfoChange = m_frame->m_addOnPrevChange[ctu.m_cuAddr];
memcpy(ctu.m_cuDepth, m_frame->m_addOnDepth[ctu.m_cuAddr], sizeof(uint8_t) * numPartition);
//Calculate log2CUSize from depth
for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
ctu.m_log2CUSize[i] = (uint8_t)m_param->maxLog2CUSize - ctu.m_cuDepth[i];
}
//若开启analysisMultiPassRefine && bStatRead && 非Islice
if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && (m_slice->m_sliceType != I_SLICE))
{
int numPredDir = m_slice->isInterP() ? 1 : 2;
//取之前pass计算分析得到interData
m_reuseInterDataCTU = m_frame->m_analysisData.interData;
//加载之前pass计算分析得到的mv/mvpIdx/ref/modes/depth
for (int dir = 0; dir < numPredDir; dir++)
{
m_reuseMv[dir] = &m_reuseInterDataCTU->mv[dir][ctu.m_cuAddr * ctu.m_numPartitions];
m_reuseMvpIdx[dir] = &m_reuseInterDataCTU->mvpIdx[dir][ctu.m_cuAddr * ctu.m_numPartitions];
}
m_reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr * ctu.m_numPartitions];
m_reuseModes = &m_reuseInterDataCTU->modes[ctu.m_cuAddr * ctu.m_numPartitions];
m_reuseDepth = &m_reuseInterDataCTU->depth[ctu.m_cuAddr * ctu.m_numPartitions];
}
/*
若开启了编码器外第三方编码信息读取,则将读取的信息载入,用于后续编码
*/
//若(开启analysisSave || 开启analysisLoad) && 非Islice && analysisReuseLevel等级在[2,9]
if ((m_param->analysisSave || m_param->analysisLoad) && m_slice->m_sliceType != I_SLICE && m_param->analysisReuseLevel > 1 && m_param->analysisReuseLevel < 10)
{
//得到预测方向
int numPredDir = m_slice->isInterP() ? 1 : 2;
//取interData/ref/depth/mode数据
m_reuseInterDataCTU = m_frame->m_analysisData.interData;
m_reuseRef = &m_reuseInterDataCTU->ref [ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir];
m_reuseDepth = &m_reuseInterDataCTU->depth[ctu.m_cuAddr * ctu.m_numPartitions];
m_reuseModes = &m_reuseInterDataCTU->modes[ctu.m_cuAddr * ctu.m_numPartitions];
//若analysisReuseLevel > 4,则再取PartSize/MergeFlag数据
if (m_param->analysisReuseLevel > 4)
{
m_reusePartSize = &m_reuseInterDataCTU->partSize[ctu.m_cuAddr * ctu.m_numPartitions];
m_reuseMergeFlag = &m_reuseInterDataCTU->mergeFlag[ctu.m_cuAddr * ctu.m_numPartitions];
}
//若analysisSave而不analysisLoad,则置所有Ref为-1
if (m_param->analysisSave && !m_param->analysisLoad)
for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * numPredDir; i++)
m_reuseRef[i] = -1;
}
ProfileCUScope(ctu, totalCTUTime, totalCTUs);
/*
进行压缩
*/
if (m_slice->m_sliceType == I_SLICE) //Islice
{
//取分析的intraData
x265_analysis_intra_data* intraDataCTU = m_frame->m_analysisData.intraData;
//若analysisLoad && analysisReuseLevel > 1
if (m_param->analysisLoad && m_param->analysisReuseLevel > 1)
{
//复制analysisData中的cuDepth/lumaIntraDir/partSize/chromaIntraDir到CTU信息中,用于编码
memcpy(ctu.m_cuDepth, &intraDataCTU->depth[ctu.m_cuAddr * numPartition], sizeof(uint8_t) * numPartition);
memcpy(ctu.m_lumaIntraDir, &intraDataCTU->modes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) * numPartition);
memcpy(ctu.m_partSize, &intraDataCTU->partSizes[ctu.m_cuAddr * numPartition], sizeof(char) * numPartition);
memcpy(ctu.m_chromaIntraDir, &intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) * numPartition);
}
//进行帧内压缩
compressIntraCU(ctu, cuGeom, qp);
} //end of I slice
else //P、Bslice
{
/*
判断是否bCopyAnalysis
*/
bool bCopyAnalysis = ((m_param->analysisLoad && m_param->analysisReuseLevel == 10) || (m_param->bAnalysisType == AVC_INFO && m_param->analysisReuseLevel >= 7 && ctu.m_numPartitions <= 16));
//bAnalysisType = AVC_INFO && analysisReuseLevel >= 7 && rdlevel 0~4
bool BCompressInterCUrd0_4 = (m_param->bAnalysisType == AVC_INFO && m_param->analysisReuseLevel >= 7 && m_param->rdLevel <= 4);
//bAnalysisType = AVC_INFO && analysisReuseLevel >= 7 && rdlevel 5~6
bool BCompressInterCUrd5_6 = (m_param->bAnalysisType == AVC_INFO && m_param->analysisReuseLevel >= 7 && m_param->rdLevel >= 5 && m_param->rdLevel <= 6);
bCopyAnalysis = bCopyAnalysis || BCompressInterCUrd0_4 || BCompressInterCUrd5_6;
//若bCopyAnalysis,则拷贝编码分析数据
if (bCopyAnalysis)
{
//取分析的intraData
x265_analysis_inter_data* interDataCTU = m_frame->m_analysisData.interData;
//得到CTU的位置,单位4x4block
int posCTU = ctu.m_cuAddr * numPartition;
//复制analysisData中的cuDepth/predMode/partSize/skipFlag到CTU信息中,用于编码
memcpy(ctu.m_cuDepth, &interDataCTU->depth[posCTU], sizeof(uint8_t) * numPartition);
memcpy(ctu.m_predMode, &interDataCTU->modes[posCTU], sizeof(uint8_t) * numPartition);
memcpy(ctu.m_partSize, &interDataCTU->partSize[posCTU], sizeof(uint8_t) * numPartition);
for (int list = 0; list < m_slice->isInterB() + 1; list++)
memcpy(ctu.m_skipFlag[list], &m_frame->m_analysisData.modeFlag[list][posCTU], sizeof(uint8_t) * numPartition);
//若(是Pslice || 允许Bslice中intra) && bAnalysisType != AVC_INFO,则还要读取intra信息
if ((m_slice->m_sliceType == P_SLICE || m_param->bIntraInBFrames) && !(m_param->bAnalysisType == AVC_INFO))
{
//取分析的intraData
x265_analysis_intra_data* intraDataCTU = m_frame->m_analysisData.intraData;
//复制analysisData中的lumaIntraDir/chromaIntraDir到CTU信息中,用于编码
memcpy(ctu.m_lumaIntraDir, &intraDataCTU->modes[posCTU], sizeof(uint8_t) * numPartition);
memcpy(ctu.m_chromaIntraDir, &intraDataCTU->chromaModes[posCTU], sizeof(uint8_t) * numPartition);
}
//Calculate log2CUSize from depth
for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
ctu.m_log2CUSize[i] = (uint8_t)m_param->maxLog2CUSize - ctu.m_cuDepth[i];
}
//若开启了bIntraRefresh && Pslice && CTU处于pirStartCol和pirEndCol之间,则进行intra编码
if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE &&
ctu.m_cuPelX / m_param->maxCUSize >= frame.m_encData->m_pir.pirStartCol
&& ctu.m_cuPelX / m_param->maxCUSize < frame.m_encData->m_pir.pirEndCol)
compressIntraCU(ctu, cuGeom, qp);
//rd level = 0
else if (!m_param->rdLevel)
{
/* In RD Level 0/1, copy source pixels into the reconstructed block so
* they are available for intra predictions */
//将原始YUV数据拷贝到recon中
m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPic, ctu.m_cuAddr, 0);
//进行inter压缩
compressInterCU_rd0_4(ctu, cuGeom, qp);
/* generate residual for entire CTU at once and copy to reconPic
编码残差值 */
encodeResidue(ctu, cuGeom);
}
/*开启analysisLoad && analysisReuseLevel=10 && (bAnalysisType!=HEVC || 非Pslice)
或
bAnalysisType = AVC_INFO && analysisReuseLevel >= 7 && 4x4block个数<=16*/
else if ((m_param->analysisLoad &&
m_param->analysisReuseLevel == 10 &&
(!(m_param->bAnalysisType == HEVC_INFO) || m_slice->m_sliceType != P_SLICE)) ||
((m_param->bAnalysisType == AVC_INFO) && m_param->analysisReuseLevel >= 7 && ctu.m_numPartitions <= 16))
{
//取分析的interData
x265_analysis_inter_data* interDataCTU = m_frame->m_analysisData.interData;
//得到CTU以4x4block为单位的position
int posCTU = ctu.m_cuAddr * numPartition;
//将载入的interData中的cuDepth/predMode/partSize拷贝到CTU信息中,用于后续编码
memcpy(ctu.m_cuDepth, &interDataCTU->depth[posCTU], sizeof(uint8_t) * numPartition);
memcpy(ctu.m_predMode, &interDataCTU->modes[posCTU], sizeof(uint8_t) * numPartition);
memcpy(ctu.m_partSize, &interDataCTU->partSize[posCTU], sizeof(uint8_t) * numPartition);
//若(是Pslice || 允许Bslice使用intra) && bAnalysisType!=AVC_INFO
if ((m_slice->m_sliceType == P_SLICE || m_param->bIntraInBFrames) && !(m_param->bAnalysisType == AVC_INFO))
{
//将载入的interData中的lumaIntraDir/chromaIntraDir拷贝到CTU信息中,用于后续编码
x265_analysis_intra_data* intraDataCTU = m_frame->m_analysisData.intraData;
memcpy(ctu.m_lumaIntraDir, &intraDataCTU->modes[posCTU], sizeof(uint8_t) * numPartition);
memcpy(ctu.m_chromaIntraDir, &intraDataCTU->chromaModes[posCTU], sizeof(uint8_t) * numPartition);
}
//Calculate log2CUSize from depth 计算CTU每个4x4block的size
for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
ctu.m_log2CUSize[i] = (uint8_t)m_param->maxLog2CUSize - ctu.m_cuDepth[i];
//qp rd 优化
qprdRefine (ctu, cuGeom, qp, qp);
//返回CTU的bestMode
return *m_modeDepth[0].bestMode;
}
//分布式多线程mode分析 && rdlevel>=2。当rdlevel<2时分布式收益不大,rdlevel越高收益越大
else if (m_param->bDistributeModeAnalysis && m_param->rdLevel >= 2)
compressInterCU_dist(ctu, cuGeom, qp);
else if (m_param->rdLevel <= 4) //rdlevel 0~4 inter compress
compressInterCU_rd0_4(ctu, cuGeom, qp);
else //rdlevel 5~6 inter compress
compressInterCU_rd5_6(ctu, cuGeom, qp);
} //end of p/b slice
//若bEnableRdRefine || bOptCUDeltaQP,则进行qp rd优化
if (m_param->bEnableRdRefine || m_param->bOptCUDeltaQP)
qprdRefine(ctu, cuGeom, qp, qp);
//若csvLogLevel >= 2,则统计PU信息
if (m_param->csvLogLevel >= 2)
collectPUStatistics(ctu, cuGeom);
//返回当前深度的bestMode
return *m_modeDepth[0].bestMode;
}