当前位置: 首页 > 工具软件 > compress > 使用案例 >

Analysis::compressCTU()

华心思
2023-12-01

x265编码器在编码一帧时以行为单位进行WPP编码,一行一个线程,每个线程分别对自己的行中每个CTU进行compressCTU压缩分析

/*
	压缩分析CTU

	过程:
	1.为当前CTU加载QP/熵编码上下文
	2.是否有编码信息输入来方便快速最优模式分析
		·bCTUInfo,加载depth/content/prevCtuInfoChange
		·analysisMultiPassRefine,加载之前pass计算分析得到的mv/mvpIdx/ref/modes/depth
		·analysisLoad && 非Islice,加载load数据中的Ref/Depth/Modes/PartSize/MergeFlag
	3.对CTU压缩编码
		·Islice
			1.若analysisLoad,则加载cuDepth/partSize/lumaIntraDir/chromaIntraDir
			2.compressIntraCU
		·P/Bslice
			1.判断是否有可用的编码分析数据
			2.若有可用编码分析数据则拷贝这些可用数据:cuDepth/predMode/partSize/skipFlag/lumaIntraDir/chromaIntraDir
			3.进行实际的P/Bslice编码
				·若开启bIntraRefresh,且CTU处于Pir范围内,则对CTU进行compressIntraCU编码
				·若rdlevel = 0
					1.将原始YUV数据拷贝到recon图像中
					2.进行compressInterCU_rd0_4压缩编码
					3.进行encodeResidue编码残差
				·若analysisLoad
					1.拷贝cuDepth/predMode/partSize/lumaIntraDir/chromaIntraDir
					2.进行qprdRefine优化rd qp
					3.返回CTU的bestMode
				·若开启bDistributeModeAnalysis,且rdlevel>=2,则进行compressInterCU_dist分布式压缩编码
				·若rdlevel 0~4,则进行compressInterCU_rd0_4压缩编码
				·若rdlevel 5~6,则进行compressInterCU_rd5_6压缩编码
	4.若使用 rd优化 或 CU级qp优化,则进行qprdRefine优化
	5.若csvlog等级>=2,则collectPUStatistics进行PU信息统计
	6.返回CTU的bestMode
*/
Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext)
{
	//取CTU所在slice
    m_slice = ctu.m_slice;
	//取CTU所在frame
    m_frame = &frame;
	//取param
	m_param = m_frame->m_param;
	//若rdlevel>=3则要计算chroma的sa8d
    m_bChromaSa8d = m_param->rdLevel >= 3;

#if _DEBUG || CHECKED_BUILD
    invalidateContexts(0);
#endif

    int qp = setLambdaFromQP(ctu, m_slice->m_pps->bUseDQP ? calculateQpforCuSize(ctu, cuGeom) : m_slice->m_sliceQp);
    ctu.setQPSubParts((int8_t)qp, 0, 0);

	//0深度四叉树加载context
    m_rqt[0].cur.load(initialContext);
	//得到CTU的mean qp
    ctu.m_meanQP = initialContext.m_meanQP;
	//复制YUV数据到0深度的modeDepth中
    m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, ctu.m_cuAddr, 0);

    if (m_param->bSsimRd)	//若使用ssim rdo
        calculateNormFactor(ctu, qp);

	//取CTU的4x4块个数
    uint32_t numPartition = ctu.m_numPartitions;

	//bCTUInfo
    if (m_param->bCTUInfo && (*m_frame->m_ctuInfo + ctu.m_cuAddr))
    {
		//取CTU的info
        x265_ctu_info_t* ctuTemp = *m_frame->m_ctuInfo + ctu.m_cuAddr;
		//深度0
        int32_t depthIdx = 0;
		//最大64个8x8块
        uint32_t maxNum8x8Partitions = 64;
		
		//取目标数据存储 depthInfoPtr/contentInfoPtr/prevCtuInfoChangePtr
        uint8_t* depthInfoPtr = m_frame->m_addOnDepth[ctu.m_cuAddr];
        uint8_t* contentInfoPtr = m_frame->m_addOnCtuInfo[ctu.m_cuAddr];
        int* prevCtuInfoChangePtr = m_frame->m_addOnPrevChange[ctu.m_cuAddr];
        
		//遍历所有的partition,拷贝API外的编码分析数据到目标depthInfoPtr/contentInfoPtr/prevCtuInfoChangePtr中
		do
        {
			//取出API外的编码分析数据depth/content/prevCtuInfoChange
            uint8_t depth = (uint8_t)ctuTemp->ctuPartitions[depthIdx];
            uint8_t content = (uint8_t)(*((int32_t *)ctuTemp->ctuInfo + depthIdx));
            int prevCtuInfoChange = m_frame->m_prevCtuInfoChange[ctu.m_cuAddr * maxNum8x8Partitions + depthIdx];
            
			//将depth/content/prevCtuInfoChange拷贝给addOnDepth/addOnCtuInfo/addOnPrevChange
			memset(depthInfoPtr, depth, sizeof(uint8_t) * numPartition >> 2 * depth);
            memset(contentInfoPtr, content, sizeof(uint8_t) * numPartition >> 2 * depth);
            memset(prevCtuInfoChangePtr, 0, sizeof(int) * numPartition >> 2 * depth);
            for (uint32_t l = 0; l < numPartition >> 2 * depth; l++)
                prevCtuInfoChangePtr[l] = prevCtuInfoChange;
            
			//更新depthInfoPtr/contentInfoPtr/prevCtuInfoChangePtr数据指针
			depthInfoPtr += ctu.m_numPartitions >> 2 * depth;
            contentInfoPtr += ctu.m_numPartitions >> 2 * depth;
            prevCtuInfoChangePtr += ctu.m_numPartitions >> 2 * depth;
            depthIdx++;

        } while (ctuTemp->ctuPartitions[depthIdx] != 0);

        m_additionalCtuInfo = m_frame->m_addOnCtuInfo[ctu.m_cuAddr];
        m_prevCtuInfoChange = m_frame->m_addOnPrevChange[ctu.m_cuAddr];
        memcpy(ctu.m_cuDepth, m_frame->m_addOnDepth[ctu.m_cuAddr], sizeof(uint8_t) * numPartition);
        //Calculate log2CUSize from depth
        for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
            ctu.m_log2CUSize[i] = (uint8_t)m_param->maxLog2CUSize - ctu.m_cuDepth[i];
    }

	//若开启analysisMultiPassRefine && bStatRead && 非Islice
    if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && (m_slice->m_sliceType != I_SLICE))
    {
        int numPredDir = m_slice->isInterP() ? 1 : 2;
		//取之前pass计算分析得到interData
        m_reuseInterDataCTU = m_frame->m_analysisData.interData;
		//加载之前pass计算分析得到的mv/mvpIdx/ref/modes/depth
        for (int dir = 0; dir < numPredDir; dir++)
        {
            m_reuseMv[dir] = &m_reuseInterDataCTU->mv[dir][ctu.m_cuAddr * ctu.m_numPartitions];
            m_reuseMvpIdx[dir] = &m_reuseInterDataCTU->mvpIdx[dir][ctu.m_cuAddr * ctu.m_numPartitions];
        }
        m_reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr * ctu.m_numPartitions];
        m_reuseModes = &m_reuseInterDataCTU->modes[ctu.m_cuAddr * ctu.m_numPartitions];
        m_reuseDepth = &m_reuseInterDataCTU->depth[ctu.m_cuAddr * ctu.m_numPartitions];
    }
    
	/*
		若开启了编码器外第三方编码信息读取,则将读取的信息载入,用于后续编码
	*/
	//若(开启analysisSave || 开启analysisLoad) && 非Islice && analysisReuseLevel等级在[2,9]
    if ((m_param->analysisSave || m_param->analysisLoad) && m_slice->m_sliceType != I_SLICE && m_param->analysisReuseLevel > 1 && m_param->analysisReuseLevel < 10)
    {
		//得到预测方向
        int numPredDir = m_slice->isInterP() ? 1 : 2;
		//取interData/ref/depth/mode数据
        m_reuseInterDataCTU = m_frame->m_analysisData.interData;
        m_reuseRef = &m_reuseInterDataCTU->ref [ctu.m_cuAddr * X265_MAX_PRED_MODE_PER_CTU * numPredDir];
        m_reuseDepth = &m_reuseInterDataCTU->depth[ctu.m_cuAddr * ctu.m_numPartitions];
        m_reuseModes = &m_reuseInterDataCTU->modes[ctu.m_cuAddr * ctu.m_numPartitions];
		//若analysisReuseLevel > 4,则再取PartSize/MergeFlag数据
        if (m_param->analysisReuseLevel > 4)
        {
            m_reusePartSize = &m_reuseInterDataCTU->partSize[ctu.m_cuAddr * ctu.m_numPartitions];
            m_reuseMergeFlag = &m_reuseInterDataCTU->mergeFlag[ctu.m_cuAddr * ctu.m_numPartitions];
        }
		//若analysisSave而不analysisLoad,则置所有Ref为-1
        if (m_param->analysisSave && !m_param->analysisLoad)
            for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * numPredDir; i++)
                m_reuseRef[i] = -1;
    }
    ProfileCUScope(ctu, totalCTUTime, totalCTUs);

	/*
		进行压缩
	*/
    if (m_slice->m_sliceType == I_SLICE)	//Islice
    {
		//取分析的intraData
        x265_analysis_intra_data* intraDataCTU = m_frame->m_analysisData.intraData;
		//若analysisLoad && analysisReuseLevel > 1
        if (m_param->analysisLoad && m_param->analysisReuseLevel > 1)
        {
			//复制analysisData中的cuDepth/lumaIntraDir/partSize/chromaIntraDir到CTU信息中,用于编码
            memcpy(ctu.m_cuDepth, &intraDataCTU->depth[ctu.m_cuAddr * numPartition], sizeof(uint8_t) * numPartition);
            memcpy(ctu.m_lumaIntraDir, &intraDataCTU->modes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) * numPartition);
            memcpy(ctu.m_partSize, &intraDataCTU->partSizes[ctu.m_cuAddr * numPartition], sizeof(char) * numPartition);
            memcpy(ctu.m_chromaIntraDir, &intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) * numPartition);
        }
		//进行帧内压缩
        compressIntraCU(ctu, cuGeom, qp);
    } //end of I slice
    else //P、Bslice
    {
		/*
			判断是否bCopyAnalysis
		*/
		bool bCopyAnalysis = ((m_param->analysisLoad && m_param->analysisReuseLevel == 10) || (m_param->bAnalysisType == AVC_INFO && m_param->analysisReuseLevel >= 7 && ctu.m_numPartitions <= 16));
		//bAnalysisType = AVC_INFO && analysisReuseLevel >= 7 && rdlevel 0~4
		bool BCompressInterCUrd0_4 = (m_param->bAnalysisType == AVC_INFO && m_param->analysisReuseLevel >= 7 && m_param->rdLevel <= 4);
        //bAnalysisType = AVC_INFO && analysisReuseLevel >= 7 && rdlevel 5~6
		bool BCompressInterCUrd5_6 = (m_param->bAnalysisType == AVC_INFO && m_param->analysisReuseLevel >= 7 && m_param->rdLevel >= 5 && m_param->rdLevel <= 6);
        bCopyAnalysis = bCopyAnalysis || BCompressInterCUrd0_4 || BCompressInterCUrd5_6;

		//若bCopyAnalysis,则拷贝编码分析数据
        if (bCopyAnalysis)
        {
			//取分析的intraData
            x265_analysis_inter_data* interDataCTU = m_frame->m_analysisData.interData;
			//得到CTU的位置,单位4x4block
            int posCTU = ctu.m_cuAddr * numPartition;
			//复制analysisData中的cuDepth/predMode/partSize/skipFlag到CTU信息中,用于编码
            memcpy(ctu.m_cuDepth, &interDataCTU->depth[posCTU], sizeof(uint8_t) * numPartition);
            memcpy(ctu.m_predMode, &interDataCTU->modes[posCTU], sizeof(uint8_t) * numPartition);
            memcpy(ctu.m_partSize, &interDataCTU->partSize[posCTU], sizeof(uint8_t) * numPartition);
            for (int list = 0; list < m_slice->isInterB() + 1; list++)
                memcpy(ctu.m_skipFlag[list], &m_frame->m_analysisData.modeFlag[list][posCTU], sizeof(uint8_t) * numPartition);
			//若(是Pslice || 允许Bslice中intra) && bAnalysisType != AVC_INFO,则还要读取intra信息
            if ((m_slice->m_sliceType == P_SLICE || m_param->bIntraInBFrames) && !(m_param->bAnalysisType == AVC_INFO))
            {
				//取分析的intraData
                x265_analysis_intra_data* intraDataCTU = m_frame->m_analysisData.intraData;
				//复制analysisData中的lumaIntraDir/chromaIntraDir到CTU信息中,用于编码
                memcpy(ctu.m_lumaIntraDir, &intraDataCTU->modes[posCTU], sizeof(uint8_t) * numPartition);
                memcpy(ctu.m_chromaIntraDir, &intraDataCTU->chromaModes[posCTU], sizeof(uint8_t) * numPartition);
            }
            //Calculate log2CUSize from depth
            for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
                ctu.m_log2CUSize[i] = (uint8_t)m_param->maxLog2CUSize - ctu.m_cuDepth[i];
        }

		//若开启了bIntraRefresh && Pslice && CTU处于pirStartCol和pirEndCol之间,则进行intra编码
		if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE &&
			ctu.m_cuPelX / m_param->maxCUSize >= frame.m_encData->m_pir.pirStartCol
			&& ctu.m_cuPelX / m_param->maxCUSize < frame.m_encData->m_pir.pirEndCol)
			compressIntraCU(ctu, cuGeom, qp);
		//rd level = 0
        else if (!m_param->rdLevel)
        {
            /* In RD Level 0/1, copy source pixels into the reconstructed block so
             * they are available for intra predictions */
			
			 //将原始YUV数据拷贝到recon中
            m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPic, ctu.m_cuAddr, 0);
			//进行inter压缩
            compressInterCU_rd0_4(ctu, cuGeom, qp);
            /* generate residual for entire CTU at once and copy to reconPic 
			    编码残差值 */
            encodeResidue(ctu, cuGeom);
        }
		/*开启analysisLoad && analysisReuseLevel=10 && (bAnalysisType!=HEVC || 非Pslice)
		  或
		  bAnalysisType = AVC_INFO && analysisReuseLevel >= 7 && 4x4block个数<=16*/
        else if ((m_param->analysisLoad && 
			m_param->analysisReuseLevel == 10 && 
			(!(m_param->bAnalysisType == HEVC_INFO) || m_slice->m_sliceType != P_SLICE)) ||
                 ((m_param->bAnalysisType == AVC_INFO) && m_param->analysisReuseLevel >= 7 && ctu.m_numPartitions <= 16))
        {
			//取分析的interData
            x265_analysis_inter_data* interDataCTU = m_frame->m_analysisData.interData;
			
			//得到CTU以4x4block为单位的position
            int posCTU = ctu.m_cuAddr * numPartition;
			
			//将载入的interData中的cuDepth/predMode/partSize拷贝到CTU信息中,用于后续编码
            memcpy(ctu.m_cuDepth, &interDataCTU->depth[posCTU], sizeof(uint8_t) * numPartition);
            memcpy(ctu.m_predMode, &interDataCTU->modes[posCTU], sizeof(uint8_t) * numPartition);
            memcpy(ctu.m_partSize, &interDataCTU->partSize[posCTU], sizeof(uint8_t) * numPartition);
            
			//若(是Pslice || 允许Bslice使用intra) && bAnalysisType!=AVC_INFO
			if ((m_slice->m_sliceType == P_SLICE || m_param->bIntraInBFrames) && !(m_param->bAnalysisType == AVC_INFO))
            {
				//将载入的interData中的lumaIntraDir/chromaIntraDir拷贝到CTU信息中,用于后续编码
                x265_analysis_intra_data* intraDataCTU = m_frame->m_analysisData.intraData;
                memcpy(ctu.m_lumaIntraDir, &intraDataCTU->modes[posCTU], sizeof(uint8_t) * numPartition);
                memcpy(ctu.m_chromaIntraDir, &intraDataCTU->chromaModes[posCTU], sizeof(uint8_t) * numPartition);
            }

            //Calculate log2CUSize from depth 计算CTU每个4x4block的size
            for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
                ctu.m_log2CUSize[i] = (uint8_t)m_param->maxLog2CUSize - ctu.m_cuDepth[i];

			//qp rd 优化
            qprdRefine (ctu, cuGeom, qp, qp);

			//返回CTU的bestMode
            return *m_modeDepth[0].bestMode;
        }
		//分布式多线程mode分析 && rdlevel>=2。当rdlevel<2时分布式收益不大,rdlevel越高收益越大
        else if (m_param->bDistributeModeAnalysis && m_param->rdLevel >= 2)
            compressInterCU_dist(ctu, cuGeom, qp);
        else if (m_param->rdLevel <= 4)	//rdlevel 0~4 inter compress
            compressInterCU_rd0_4(ctu, cuGeom, qp);
        else	//rdlevel 5~6 inter compress
            compressInterCU_rd5_6(ctu, cuGeom, qp);
    }	//end of p/b slice

	//若bEnableRdRefine || bOptCUDeltaQP,则进行qp rd优化
    if (m_param->bEnableRdRefine || m_param->bOptCUDeltaQP)
        qprdRefine(ctu, cuGeom, qp, qp);

	//若csvLogLevel >= 2,则统计PU信息
    if (m_param->csvLogLevel >= 2)
        collectPUStatistics(ctu, cuGeom);

	//返回当前深度的bestMode
    return *m_modeDepth[0].bestMode;
}
 类似资料: