static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
{
// 声明数组变量 pixel pix0[16*16], pix1[16*16];
// 一个16x16宏块大小
ALIGNED_ARRAY_N( pixel, pix0,[16*16] );
ALIGNED_ARRAY_N( pixel, pix1,[16*16] );
pixel *src0, *src1;
intptr_t stride0 = 16, stride1 = 16;
int i_ref, i_mvc;
ALIGNED_4( int16_t mvc[9][2] );
int try_skip = a->b_try_skip;
int list1_skipped = 0;
int i_halfpel_thresh[2] = {INT_MAX, INT_MAX};
int *p_halfpel_thresh[2] = {(a->b_early_terminate && h->mb.pic.i_fref[0]>1) ? &i_halfpel_thresh[0] : NULL,
(a->b_early_terminate && h->mb.pic.i_fref[1]>1) ? &i_halfpel_thresh[1] : NULL};
x264_me_t m;
// 初始化pixel size为 16x16, (PIXEL_16x16 = 0)
m.i_pixel = PIXEL_16x16;
// 见 LOAD_FENC, 初始化 m的成员, p_cost_mv, i_stride[0], p_fenc[0]等
//
// #define LOAD_FENC(m, src, xoff, yoff) \
// { \
// (m)->p_cost_mv = a->p_cost_mv; \
// (m)->i_stride[0] = h->mb.pic.i_stride[0]; \
// (m)->i_stride[1] = h->mb.pic.i_stride[1]; \
// (m)->i_stride[2] = h->mb.pic.i_stride[2]; \
// (m)->p_fenc[0] = &(src)[0][(xoff)+(yoff)*FENC_STRIDE]; \
// (m)->p_fenc[1] = &(src)[1][((xoff)>>CHROMA_H_SHIFT)+((yoff)>>CHROMA_V_SHIFT)*FENC_STRIDE]; \
// (m)->p_fenc[2] = &(src)[2][((xoff)>>CHROMA_H_SHIFT)+((yoff)>>CHROMA_V_SHIFT)*FENC_STRIDE]; \
// }
LOAD_FENC( &m, h->mb.pic.p_fenc, 0, 0 );
/* 16x16 Search on list 0 and list 1 */
// 将l0, l1的代价置为最大
a->l0.me16x16.cost = INT_MAX;
a->l1.me16x16.cost = INT_MAX;
for( int l = 1; l >= 0; )
{
// 将lX指向a->l0 或 a->l1 (依据l的值)
// 这里显然先分析l1, 下面的英文注释说明了预测顺序
x264_mb_analysis_list_t *lX = l ? &a->l1 : &a->l0;
/* This loop is extremely munged in order to facilitate the following order of operations,
* necessary for an efficient fast skip.
// 搜索 list1 ref0, 然后list0 ref0
// 然后try skip
// 然后list0 其他参考帧
// 最后回到list1,搜索其他参考帧
* 1. Search list1 ref0.
* 2. Search list0 ref0.
* 3. Try skip.
* 4. Search the rest of list0.
* 5. Go back and finish list1.
*/
for( i_ref = (list1_skipped && l == 1) ? 1 : 0; i_ref < h->mb.pic.i_fref[l]; i_ref++ )
{
if( try_skip && l == 1 && i_ref > 0 )
{
list1_skipped = 1;
break;
}
// 用a->p_cost_ref[l][i_ref]来初始化m.i_ref_cost
m.i_ref_cost = REF_COST( l, i_ref );
/* search with ref */
// 见LOAD_HPELS, 装载整平面, 半像素平面到m的相关成员
LOAD_HPELS( &m, h->mb.pic.p_fref[l][i_ref], l, i_ref, 0, 0 );
// 见 x264_mb_predict_mv_16x16
// 从缓存的mb.cache.ref[l], mb.cache.mv[l]
// 得到当前宏块左,顶, 顶右宏块参考帧序号,及其对应的运动向量
// 依据当前宏块与其左,顶,顶右宏块参考帧序号相同的情况
// 取某个mv 或 三者的median 作为候选的预测运动向量
//
x264_mb_predict_mv_16x16( h, l, i_ref, m.mvp );
// 见 x264_mb_predict_mv_ref16x16
//
x264_mb_predict_mv_ref16x16( h, l, i_ref, mvc, &i_mvc );
x264_me_search_ref( h, &m, mvc, i_mvc, p_halfpel_thresh[l] );
/* add ref cost */
m.cost += m.i_ref_cost;
if( m.cost < lX->me16x16.cost )
h->mc.memcpy_aligned( &lX->me16x16, &m, sizeof(x264_me_t) );
/* save mv for predicting neighbors */
CP32( lX->mvc[i_ref][0], m.mv );
CP32( h->mb.mvr[l][i_ref][h->mb.i_mb_xy], m.mv );
/* Fast skip detection. */
if( i_ref == 0 && try_skip )
{
if( abs(lX->me16x16.mv[0]-h->mb.cache.direct_mv[l][0][0]) +
abs(lX->me16x16.mv[1]-h->mb.cache.direct_mv[l][0][1]) > 1 )
{
try_skip = 0;
}
else if( !l )
{
/* We already tested skip */
h->mb.i_type = B_SKIP;
x264_analyse_update_cache( h, a );
return;
}
}
}
if( list1_skipped && l == 1 && i_ref == h->mb.pic.i_fref[1] )
break;
if( list1_skipped && l == 0 )
l = 1;
else
l--;
}
/* get cost of BI mode */
h->mc.memcpy_aligned( &a->l0.bi16x16, &a->l0.me16x16, sizeof(x264_me_t) );
h->mc.memcpy_aligned( &a->l1.bi16x16, &a->l1.me16x16, sizeof(x264_me_t) );
int ref_costs = REF_COST( 0, a->l0.bi16x16.i_ref ) + REF_COST( 1, a->l1.bi16x16.i_ref );
src0 = h->mc.get_ref( pix0, &stride0,
h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref], h->mb.pic.i_stride[0],
a->l0.bi16x16.mv[0], a->l0.bi16x16.mv[1], 16, 16, x264_weight_none );
src1 = h->mc.get_ref( pix1, &stride1,
h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref], h->mb.pic.i_stride[0],
a->l1.bi16x16.mv[0], a->l1.bi16x16.mv[1], 16, 16, x264_weight_none );
h->mc.avg[PIXEL_16x16]( pix0, 16, src0, stride0, src1, stride1, h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
a->i_cost16x16bi = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
+ ref_costs
+ a->l0.bi16x16.cost_mv
+ a->l1.bi16x16.cost_mv;
if( h->mb.b_chroma_me )
a->i_cost16x16bi += x264_analyse_bi_chroma( h, a, 0, PIXEL_16x16 );
/* Always try the 0,0,0,0 vector; helps avoid errant motion vectors in fades */
if( M32( a->l0.bi16x16.mv ) | M32( a->l1.bi16x16.mv ) )
{
int l0_mv_cost = a->l0.bi16x16.p_cost_mv[-a->l0.bi16x16.mvp[0]]
+ a->l0.bi16x16.p_cost_mv[-a->l0.bi16x16.mvp[1]];
int l1_mv_cost = a->l1.bi16x16.p_cost_mv[-a->l1.bi16x16.mvp[0]]
+ a->l1.bi16x16.p_cost_mv[-a->l1.bi16x16.mvp[1]];
h->mc.avg[PIXEL_16x16]( pix0, 16, h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][0], h->mb.pic.i_stride[0],
h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][0], h->mb.pic.i_stride[0],
h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
int cost00 = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
+ ref_costs + l0_mv_cost + l1_mv_cost;
if( h->mb.b_chroma_me && cost00 < a->i_cost16x16bi )
{
ALIGNED_ARRAY_16( pixel, bi, [16*FENC_STRIDE] );
if( CHROMA444 )
{
h->mc.avg[PIXEL_16x16]( bi, FENC_STRIDE, h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4], h->mb.pic.i_stride[1],
h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4], h->mb.pic.i_stride[1],
h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
cost00 += h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[1], FENC_STRIDE, bi, FENC_STRIDE );
h->mc.avg[PIXEL_16x16]( bi, FENC_STRIDE, h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][8], h->mb.pic.i_stride[2],
h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][8], h->mb.pic.i_stride[2],
h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
cost00 += h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[2], FENC_STRIDE, bi, FENC_STRIDE );
}
else
{
ALIGNED_ARRAY_16( pixel, pixuv, [2],[16*FENC_STRIDE] );
int chromapix = h->luma2chroma_pixel[PIXEL_16x16];
int v_shift = CHROMA_V_SHIFT;
if( v_shift & MB_INTERLACED & a->l0.bi16x16.i_ref )
{
int l0_mvy_offset = (h->mb.i_mb_y & 1)*4 - 2;
h->mc.mc_chroma( pixuv[0], pixuv[0]+8, FENC_STRIDE, h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4],
h->mb.pic.i_stride[1], 0, 0 + l0_mvy_offset, 8, 8 );
}
else
h->mc.load_deinterleave_chroma_fenc( pixuv[0], h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4],
h->mb.pic.i_stride[1], 16>>v_shift );
if( v_shift & MB_INTERLACED & a->l1.bi16x16.i_ref )
{
int l1_mvy_offset = (h->mb.i_mb_y & 1)*4 - 2;
h->mc.mc_chroma( pixuv[1], pixuv[1]+8, FENC_STRIDE, h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4],
h->mb.pic.i_stride[1], 0, 0 + l1_mvy_offset, 8, 8 );
}
else
h->mc.load_deinterleave_chroma_fenc( pixuv[1], h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4],
h->mb.pic.i_stride[1], 16>>v_shift );
h->mc.avg[chromapix]( bi, FENC_STRIDE, pixuv[0], FENC_STRIDE, pixuv[1], FENC_STRIDE,
h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
h->mc.avg[chromapix]( bi+8, FENC_STRIDE, pixuv[0]+8, FENC_STRIDE, pixuv[1]+8, FENC_STRIDE,
h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
cost00 += h->pixf.mbcmp[chromapix]( h->mb.pic.p_fenc[1], FENC_STRIDE, bi, FENC_STRIDE )
+ h->pixf.mbcmp[chromapix]( h->mb.pic.p_fenc[2], FENC_STRIDE, bi+8, FENC_STRIDE );
}
}
if( cost00 < a->i_cost16x16bi )
{
M32( a->l0.bi16x16.mv ) = 0;
M32( a->l1.bi16x16.mv ) = 0;
a->l0.bi16x16.cost_mv = l0_mv_cost;
a->l1.bi16x16.cost_mv = l1_mv_cost;
a->i_cost16x16bi = cost00;
}
}
/* mb type cost */
a->i_cost16x16bi += a->i_lambda * i_mb_b_cost_table[B_BI_BI];
a->l0.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L0_L0];
a->l1.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L1_L1];
}
#define LOAD_HPELS(m, src, list, ref, xoff, yoff) \
{ \
// 指向整像素平面Yn
(m)->p_fref_w = (m)->p_fref[0] = &(src)[0][(xoff)+(yoff)*(m)->i_stride[0]]; \
// 指向水平半像素平面 Yh
(m)->p_fref[1] = &(src)[1][(xoff)+(yoff)*(m)->i_stride[0]]; \
// 指向垂直半像素平面 Yv
(m)->p_fref[2] = &(src)[2][(xoff)+(yoff)*(m)->i_stride[0]]; \
// 指向斜对角半像素平面 Yhv
(m)->p_fref[3] = &(src)[3][(xoff)+(yoff)*(m)->i_stride[0]]; \
if( CHROMA444 ) \
{ \
// 指向整像素平面 Un
(m)->p_fref[ 4] = &(src)[ 4][(xoff)+(yoff)*(m)->i_stride[1]]; \
// 指向水平半像素平面 Uh
(m)->p_fref[ 5] = &(src)[ 5][(xoff)+(yoff)*(m)->i_stride[1]]; \
// 指向垂直半像素平面 Uv
(m)->p_fref[ 6] = &(src)[ 6][(xoff)+(yoff)*(m)->i_stride[1]]; \
// 指向斜对角半像素平面 Uhv
(m)->p_fref[ 7] = &(src)[ 7][(xoff)+(yoff)*(m)->i_stride[1]]; \
// 指向整像素平面 Vn
(m)->p_fref[ 8] = &(src)[ 8][(xoff)+(yoff)*(m)->i_stride[2]]; \
// 指向水平半像素平面 Vh
(m)->p_fref[ 9] = &(src)[ 9][(xoff)+(yoff)*(m)->i_stride[2]]; \
// 指向垂直半像素平面 Vv
(m)->p_fref[10] = &(src)[10][(xoff)+(yoff)*(m)->i_stride[2]]; \
// 指向斜对角半像素平面 Vhv
(m)->p_fref[11] = &(src)[11][(xoff)+(yoff)*(m)->i_stride[2]]; \
} \
else \
// 指向整像素平面 UV
(m)->p_fref[4] = &(src)[4][(xoff)+((yoff)>>CHROMA_V_SHIFT)*(m)->i_stride[1]]; \
(m)->integral = &h->mb.pic.p_integral[list][ref][(xoff)+(yoff)*(m)->i_stride[0]]; \
(m)->weight = x264_weight_none; \
(m)->i_ref = ref; \
}
// 从缓存的mb.cache.ref[l], mb.cache.mv[l]
// 得到当前宏块左,顶, 顶右宏块参考帧序号,及其对应的运动向量
// 依据当前宏块与其左,顶,顶右宏块参考帧序号相同的情况
// 取某个mv 或 三者的median 作为候选的预测运动向量
//
void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int16_t mvp[2] )
{
// left of ref
int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
// left of mv
int16_t *mv_a = h->mb.cache.mv[i_list][X264_SCAN8_0 - 1];
// top of ref
int i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8];
// top of mv
int16_t *mv_b = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8];
// topright of ref
int i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
// topright of mv
int16_t *mv_c = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 + 4];
if( i_refc == -2 )
{ // topleft
i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1];
mv_c = h->mb.cache.mv[i_list][X264_SCAN8_0 - 8 - 1];
}
int i_count = (i_refa == i_ref) + (i_refb == i_ref) + (i_refc == i_ref);
if( i_count > 1 )
{
median:
// let mvp = median of mv_a, mv_b, mv_c
x264_median_mv( mvp, mv_a, mv_b, mv_c );
}
else if( i_count == 1 )
{
if( i_refa == i_ref )
CP32( mvp, mv_a ); // mvp = mv_a
else if( i_refb == i_ref )
CP32( mvp, mv_b ); // mvp = mv_b
else
CP32( mvp, mv_c ); // mvp = mv_c
}
else if( i_refb == -2 && i_refc == -2 && i_refa != -2 )
CP32( mvp, mv_a ); // mvp = mv_a
else
goto median; // mvp = median of mv_a, mv_b, mv_c
}