blob: ca4774b3e001658bce1b9c79dab883a7a7c74959 [file] [log] [blame]
/*!
*************************************************************************************
*
* \file me_umhexsmp.c
*
* \brief
* Fast integer pixel and sub pixel motion estimation
* Improved and simplified from the original UMHexagonS algorithms
* See JVT-P021 for details
*
* \author
* Main contributors: (see contributors.h for copyright, address and affiliation details)
* - Zhibo Chen <chenzhibo@tsinghua.org.cn>
* - JianFeng Xu <fenax@video.mdc.tsinghua.edu.cn>
* - Wenfang Fu <fwf@video.mdc.tsinghua.edu.cn>
*
* - Xiaoquan Yi <xyi@engr.scu.edu>
* - Jun Zhang <jzhang2@engr.scu.edu>
*
* \date
* 6. Nov. 2006
*************************************************************************************
*/
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include "global.h"
#include "memalloc.h"
#include "me_umhexsmp.h"
#include "refbuf.h"
#include "me_distortion.h"
extern int *byte_abs;
extern int *mvbits;
static const short Diamond_X[4] = {-1, 1, 0, 0};
static const short Diamond_Y[4] = { 0, 0,-1, 1};
static const short Hexagon_X[6] = {-2, 2,-1, 1,-1, 1};
static const short Hexagon_Y[6] = { 0, 0,-2, 2, 2,-2};
static const short Big_Hexagon_X[16] = {-4, 4, 0, 0,-4, 4,-4, 4,-4, 4,-4, 4,-2, 2,-2, 2};
static const short Big_Hexagon_Y[16] = { 0, 0,-4, 4,-1, 1, 1,-1,-2, 2, 2,-2,-3, 3, 3,-3};
const short block_type_shift_factor[8] = {0, 0, 1, 1, 2, 3, 3, 1}; // last one relaxed to 1 instead 4
static StorablePicture *ref_pic_ptr;
static int dist_method;
extern short* spiral_hpel_search_x;
extern short* spiral_hpel_search_y;
extern short* spiral_search_x;
extern short* spiral_search_y;
// Macro for motion estimation cost computation per match
#define SEARCH_ONE_PIXEL_HELPER \
if(iabs(cand_x - center_x) <= search_range && iabs(cand_y - center_y) <= search_range) \
{ \
mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y); \
mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x, \
min_mcost - mcost, (cand_x + IMG_PAD_SIZE) << 2, (cand_y + IMG_PAD_SIZE) << 2); \
if (mcost < min_mcost) \
{ \
best_x = cand_x; \
best_y = cand_y; \
min_mcost = mcost; \
} \
}
#define SEARCH_ONE_PIXEL_BIPRED_HELPER \
if (iabs(cand_x - center2_x) <= search_range && iabs(cand_y - center2_y) <= search_range) \
{ \
mcost = MV_COST (lambda_factor, mvshift, center1_x, center1_y, pred_x1, pred_y1); \
mcost += MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x2, pred_y2); \
if (mcost < min_mcost) \
{ \
mcost += computeBiPred(cur_pic, blocksize_y, blocksize_x, \
min_mcost - mcost, \
(center1_x << 2) + IMG_PAD_SIZE_TIMES4, \
(center1_y << 2) + IMG_PAD_SIZE_TIMES4, \
(cand_x << 2) + IMG_PAD_SIZE_TIMES4, \
(cand_y << 2) + IMG_PAD_SIZE_TIMES4); \
if (mcost < min_mcost) \
{ \
best_x = cand_x; \
best_y = cand_y; \
min_mcost = mcost; \
} \
} \
}
/*!
************************************************************************
* \brief
* Set thresholds for fast motion estimation
* Those thresholds may be adjusted to trade off rate-distortion
* performance and simplified UMHEX speed
************************************************************************
*/
void smpUMHEX_init()
{
SymmetricalCrossSearchThreshold1 = 800;
SymmetricalCrossSearchThreshold2 = 7000;
ConvergeThreshold = 1000;
SubPelThreshold1 = 1000;
SubPelThreshold3 = 400;
}
/*!
************************************************************************
* \brief
* Allocation of space for fast motion estimation
************************************************************************
*/
int smpUMHEX_get_mem()
{
int memory_size = 0;
if (NULL==(smpUMHEX_flag_intra = calloc((img->width>>4)+1, sizeof(byte))))
no_mem_exit("smpUMHEX_get_mem: smpUMHEX_flag_intra");
memory_size += get_mem3Dint(&smpUMHEX_l0_cost, 9, img->height/4, img->width/4);
memory_size += get_mem3Dint(&smpUMHEX_l1_cost, 9, img->height/4, img->width/4);
memory_size += get_mem2D(&smpUMHEX_SearchState, 7, 7);
return memory_size;
}
/*!
************************************************************************
* \brief
* Free space for fast motion estimation
************************************************************************
*/
void smpUMHEX_free_mem()
{
free_mem3Dint(smpUMHEX_l0_cost, 9);
free_mem3Dint(smpUMHEX_l1_cost, 9);
free_mem2D(smpUMHEX_SearchState);
free (smpUMHEX_flag_intra);
}
/*!
************************************************************************
* \brief
* Fast integer pixel block motion estimation
************************************************************************
*/
int // ==> minimum motion cost after search
smpUMHEXIntegerPelBlockMotionSearch (
imgpel *orig_pic, // <-- not used
short ref, // <-- reference frame (0... or -1 (backward))
int list, // <-- reference picture list
int pic_pix_x, // <-- absolute x-coordinate of regarded AxB block
int pic_pix_y, // <-- absolute y-coordinate of regarded AxB block
int blocktype, // <-- block type (1-16x16 ... 7-4x4)
short pred_mv_x, // <-- motion vector predictor (x) in sub-pel units
short pred_mv_y, // <-- motion vector predictor (y) in sub-pel units
short* mv_x, // --> motion vector (x) - in pel units
short* mv_y, // --> motion vector (y) - in pel units
int search_range, // <-- 1-d search range in pel units
int min_mcost, // <-- minimum motion cost (cost for center or huge value)
int lambda_factor) // <-- lagrangian parameter for determining motion cost
{
int list_offset = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))?
img->current_mb_nr%2 ? 4 : 2 : 0;
int mvshift = 2; // motion vector shift for getting sub-pel units
int blocksize_y = input->blc_size[blocktype][1]; // vertical block size
int blocksize_x = input->blc_size[blocktype][0]; // horizontal block size
int pred_x = (pic_pix_x << mvshift) + pred_mv_x; // predicted position x (in sub-pel units)
int pred_y = (pic_pix_y << mvshift) + pred_mv_y; // predicted position y (in sub-pel units)
int center_x = pic_pix_x + *mv_x; // center position x (in pel units)
int center_y = pic_pix_y + *mv_y; // center position y (in pel units)
int best_x = 0, best_y = 0;
int search_step, iYMinNow, iXMinNow;
int cand_x, cand_y, mcost;
unsigned short i, m;
//===== Use weighted Reference for ME ====
int apply_weights = ( (active_pps->weighted_pred_flag && (img->type == P_SLICE || img->type == SP_SLICE)) ||
(active_pps->weighted_bipred_idc && (img->type == B_SLICE))) && input->UseWeightedReferenceME;
dist_method = F_PEL + 3 * apply_weights;
ref_pic_ptr = listX[list+list_offset][ref];
// Note that following seem to be universal for all functions and could be moved to a separate, clean public function in me_distortion.c
ref_pic_sub.luma = ref_pic_ptr->imgY_sub;
img_width = ref_pic_ptr->size_x;
img_height = ref_pic_ptr->size_y;
width_pad = ref_pic_ptr->size_x_pad;
height_pad = ref_pic_ptr->size_y_pad;
if (apply_weights)
{
weight_luma = wp_weight[list + list_offset][ref][0];
offset_luma = wp_offset[list + list_offset][ref][0];
}
if (ChromaMEEnable)
{
ref_pic_sub.crcb[0] = ref_pic_ptr->imgUV_sub[0];
ref_pic_sub.crcb[1] = ref_pic_ptr->imgUV_sub[1];
width_pad_cr = ref_pic_ptr->size_x_cr_pad;
height_pad_cr = ref_pic_ptr->size_y_cr_pad;
if (apply_weights)
{
weight_cr[0] = wp_weight[list + list_offset][ref][1];
weight_cr[1] = wp_weight[list + list_offset][ref][2];
offset_cr[0] = wp_offset[list + list_offset][ref][1];
offset_cr[1] = wp_offset[list + list_offset][ref][2];
}
}
//===== set function for getting reference picture lines =====
if ((center_x > search_range) && (center_x < img_width - 1 - search_range - blocksize_x) &&
(center_y > search_range) && (center_y < img_height - 1 - search_range - blocksize_y))
{
ref_access_method = FAST_ACCESS;
}
else
{
ref_access_method = UMV_ACCESS;
}
//check the center median predictor
cand_x = center_x ;
cand_y = center_y ;
mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y);
mcost += computeUniPred[dist_method](orig_pic, blocksize_y,blocksize_x, min_mcost - mcost,
(cand_x << 2) + IMG_PAD_SIZE_TIMES4, (cand_y << 2) + IMG_PAD_SIZE_TIMES4);
if (mcost < min_mcost)
{
min_mcost = mcost;
best_x = cand_x;
best_y = cand_y;
}
iXMinNow = best_x;
iYMinNow = best_y;
if ((0 != pred_mv_x) || (0 != pred_mv_y))
{
cand_x = pic_pix_x;
cand_y = pic_pix_y;
SEARCH_ONE_PIXEL_HELPER
}
// If the min_mcost is small enough, do a local search then terminate
// Ihis is good for stationary or quasi-stationary areas
if (min_mcost < (ConvergeThreshold>>block_type_shift_factor[blocktype]))
{
for (m = 0; m < 4; m++)
{
cand_x = iXMinNow + Diamond_X[m];
cand_y = iYMinNow + Diamond_Y[m];
SEARCH_ONE_PIXEL_HELPER
}
*mv_x = (short) (best_x - pic_pix_x);
*mv_y = (short) (best_y - pic_pix_y);
return min_mcost;
}
// Small local search
for (m = 0; m < 4; m++)
{
cand_x = iXMinNow + Diamond_X[m];
cand_y = iYMinNow + Diamond_Y[m];
SEARCH_ONE_PIXEL_HELPER
}
// First_step: Symmetrical-cross search
// If distortion is large, use large shapes. Otherwise, compact shapes are faster
if ( (blocktype == 1 &&
min_mcost > (SymmetricalCrossSearchThreshold1>>block_type_shift_factor[blocktype])) ||
(min_mcost > (SymmetricalCrossSearchThreshold2>>block_type_shift_factor[blocktype])) )
{
iXMinNow = best_x;
iYMinNow = best_y;
for(i = 1; i <= search_range/2; i++)
{
search_step = (i<<1) - 1;
cand_x = iXMinNow + search_step;
cand_y = iYMinNow;
SEARCH_ONE_PIXEL_HELPER
cand_x = iXMinNow - search_step;
SEARCH_ONE_PIXEL_HELPER
cand_x = iXMinNow;
cand_y = iYMinNow + search_step;
SEARCH_ONE_PIXEL_HELPER
cand_y = iYMinNow - search_step;
SEARCH_ONE_PIXEL_HELPER
}
// Hexagon Search
iXMinNow = best_x;
iYMinNow = best_y;
for (m = 0; m < 6; m++)
{
cand_x = iXMinNow + Hexagon_X[m];
cand_y = iYMinNow + Hexagon_Y[m];
SEARCH_ONE_PIXEL_HELPER
}
// Multi Big Hexagon Search
iXMinNow = best_x;
iYMinNow = best_y;
for(i = 1; i <= search_range/4; i++)
{
for (m = 0; m < 16; m++)
{
cand_x = iXMinNow + Big_Hexagon_X[m]*i;
cand_y = iYMinNow + Big_Hexagon_Y[m]*i;
SEARCH_ONE_PIXEL_HELPER
}
}
}
// Search up_layer predictor for non 16x16 blocks
if (blocktype > 1)
{
cand_x = pic_pix_x + (smpUMHEX_pred_MV_uplayer_X/4);
cand_y = pic_pix_y + (smpUMHEX_pred_MV_uplayer_Y/4);
SEARCH_ONE_PIXEL_HELPER
}
if(center_x != pic_pix_x || center_y != pic_pix_y)
{
cand_x = pic_pix_x;
cand_y = pic_pix_y;
SEARCH_ONE_PIXEL_HELPER
iXMinNow = best_x;
iYMinNow = best_y;
// Local diamond search
for (m = 0; m < 4; m++)
{
cand_x = iXMinNow + Diamond_X[m];
cand_y = iYMinNow + Diamond_Y[m];
SEARCH_ONE_PIXEL_HELPER
}
}
// If the minimum cost is small enough, do a local search
// and finish the search here
if (min_mcost < (ConvergeThreshold>>block_type_shift_factor[blocktype]))
{
iXMinNow = best_x;
iYMinNow = best_y;
for (m = 0; m < 4; m++)
{
cand_x = iXMinNow + Diamond_X[m];
cand_y = iYMinNow + Diamond_Y[m];
SEARCH_ONE_PIXEL_HELPER
}
*mv_x = (short) (best_x - pic_pix_x);
*mv_y = (short) (best_y - pic_pix_y);
return min_mcost;
}
//second_step: Extended Hexagon-based Search
for(i = 0; i < search_range; i++)
{
iXMinNow = best_x;
iYMinNow = best_y;
for (m = 0; m < 6; m++)
{
cand_x = iXMinNow + Hexagon_X[m];
cand_y = iYMinNow + Hexagon_Y[m];
SEARCH_ONE_PIXEL_HELPER
}
// The minimum cost point happens in the center
if (best_x == iXMinNow && best_y == iYMinNow)
{
break;
}
}
//third_step: Small diamond search
for(i = 0; i < search_range; i++)
{
iXMinNow = best_x;
iYMinNow = best_y;
for (m = 0; m < 4; m++)
{
cand_x = iXMinNow + Diamond_X[m];
cand_y = iYMinNow + Diamond_Y[m];
SEARCH_ONE_PIXEL_HELPER
}
// The minimum cost point happens in the center
if (best_x == iXMinNow && best_y == iYMinNow)
{
break;
}
}
*mv_x = (short) (best_x - pic_pix_x);
*mv_y = (short) (best_y - pic_pix_y);
return min_mcost;
}
/*!
***********************************************************************
* \brief
* Sub pixel block motion search enhanced
***********************************************************************
*/
int // ==> minimum motion cost after search
smpUMHEXFullSubPelBlockMotionSearch (imgpel* orig_pic, // <-- original pixel values for the AxB block
short ref, // <-- reference frame (0... or -1 (backward))
int list, // <-- reference picture list
int pic_pix_x, // <-- absolute x-coordinate of regarded AxB block
int pic_pix_y, // <-- absolute y-coordinate of regarded AxB block
int blocktype, // <-- block type (1-16x16 ... 7-4x4)
short pred_mv_x, // <-- motion vector predictor (x) in sub-pel units
short pred_mv_y, // <-- motion vector predictor (y) in sub-pel units
short* mv_x, // <--> in: search center (x) / out: motion vector (x) - in pel units
short* mv_y, // <--> in: search center (y) / out: motion vector (y) - in pel units
int search_pos2, // <-- search positions for half-pel search (default: 9)
int search_pos4, // <-- search positions for quarter-pel search (default: 9)
int min_mcost, // <-- minimum motion cost (cost for center or huge value)
int lambda_factor // <-- lagrangian parameter for determining motion cost
)
{
int pos, best_pos, mcost;
int cand_mv_x, cand_mv_y;
int check_position0 = (!input->rdopt && img->type!=B_SLICE && ref==0 && blocktype==1 && *mv_x==0 && *mv_y==0);
int blocksize_x = input->blc_size[blocktype][0];
int blocksize_y = input->blc_size[blocktype][1];
int pic4_pix_x = ((pic_pix_x + IMG_PAD_SIZE)<< 2);
int pic4_pix_y = ((pic_pix_y + IMG_PAD_SIZE)<< 2);
int max_pos2 = ( !start_me_refinement_hp ? imax(1,search_pos2) : search_pos2);
int list_offset = img->mb_data[img->current_mb_nr].list_offset;
int apply_weights = ( (active_pps->weighted_pred_flag &&
(img->type == P_SLICE || img->type == SP_SLICE)) ||
(active_pps->weighted_bipred_idc && (img->type == B_SLICE)) )
&& input->UseWeightedReferenceME;
int cmv_x, cmv_y;
StorablePicture *ref_picture = listX[list+list_offset][ref];
int max_pos_x4 = ((ref_picture->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
int max_pos_y4 = ((ref_picture->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
dist_method = Q_PEL + 3 * apply_weights;
ref_pic_sub.luma = ref_picture->imgY_sub;
img_width = ref_picture->size_x;
img_height = ref_picture->size_y;
width_pad = ref_picture->size_x_pad;
height_pad = ref_picture->size_y_pad;
if (apply_weights)
{
weight_luma = wp_weight[list + list_offset][ref][0];
offset_luma = wp_offset[list + list_offset][ref][0];
}
if (ChromaMEEnable)
{
ref_pic_sub.crcb[0] = ref_picture->imgUV_sub[0];
ref_pic_sub.crcb[1] = ref_picture->imgUV_sub[1];
width_pad_cr = ref_picture->size_x_cr_pad;
height_pad_cr = ref_picture->size_y_cr_pad;
if (apply_weights)
{
weight_cr[0] = wp_weight[list + list_offset][ref][1];
weight_cr[1] = wp_weight[list + list_offset][ref][2];
offset_cr[0] = wp_offset[list + list_offset][ref][1];
offset_cr[1] = wp_offset[list + list_offset][ref][2];
}
}
/*********************************
***** *****
***** HALF-PEL REFINEMENT *****
***** *****
*********************************/
//===== set function for getting pixel values =====
if ((pic4_pix_x + *mv_x > 1) && (pic4_pix_x + *mv_x < max_pos_x4 - 1) &&
(pic4_pix_y + *mv_y > 1) && (pic4_pix_y + *mv_y < max_pos_y4 - 1) )
{
ref_access_method = FAST_ACCESS;
}
else
{
ref_access_method = UMV_ACCESS;
}
//===== loop over search positions =====
for (best_pos = 0, pos = start_me_refinement_hp; pos < max_pos2; pos++)
{
cand_mv_x = *mv_x + (spiral_hpel_search_x[pos]); // quarter-pel units
cand_mv_y = *mv_y + (spiral_hpel_search_y[pos]); // quarter-pel units
//----- set motion vector cost -----
mcost = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
if (mcost >= min_mcost) continue;
cmv_x = cand_mv_x + pic4_pix_x;
cmv_y = cand_mv_y + pic4_pix_y;
mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x, min_mcost - mcost, cmv_x, cmv_y);
if (pos==0 && check_position0)
{
mcost -= WEIGHTED_COST (lambda_factor, 16);
}
if (mcost < min_mcost)
{
min_mcost = mcost;
best_pos = pos;
}
if (min_mcost < (SubPelThreshold3>>block_type_shift_factor[blocktype]))
{
break;
}
}
if (best_pos)
{
*mv_x += (spiral_hpel_search_x [best_pos]);
*mv_y += (spiral_hpel_search_y [best_pos]);
}
if ((*mv_x == 0) && (*mv_y == 0) && (pred_mv_x == 0 && pred_mv_y == 0) &&
(min_mcost < (SubPelThreshold1>>block_type_shift_factor[blocktype])) )
{
best_pos = 0;
return min_mcost;
}
if ( !start_me_refinement_qp )
min_mcost = INT_MAX;
/************************************
***** *****
***** QUARTER-PEL REFINEMENT *****
***** *****
************************************/
//===== set function for getting pixel values =====
if ((pic4_pix_x + *mv_x > 0) && (pic4_pix_x + *mv_x < max_pos_x4) &&
(pic4_pix_y + *mv_y > 0) && (pic4_pix_y + *mv_y < max_pos_y4) )
{
ref_access_method = FAST_ACCESS;
}
else
{
ref_access_method = UMV_ACCESS;
}
//===== loop over search positions =====
for (best_pos = 0, pos = start_me_refinement_qp; pos < search_pos4; pos++)
{
cand_mv_x = *mv_x + spiral_search_x[pos]; // quarter-pel units
cand_mv_y = *mv_y + spiral_search_y[pos]; // quarter-pel units
//----- set motion vector cost -----
mcost = MV_COST_SMP (lambda_factor, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
if (mcost >= min_mcost) continue;
cmv_x = cand_mv_x + pic4_pix_x;
cmv_y = cand_mv_y + pic4_pix_y;
mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x, min_mcost - mcost, cmv_x, cmv_y);
if (mcost < min_mcost)
{
min_mcost = mcost;
best_pos = pos;
}
if (min_mcost < (SubPelThreshold3>>block_type_shift_factor[blocktype]))
{
break;
}
}
if (best_pos)
{
*mv_x += spiral_search_x [best_pos];
*mv_y += spiral_search_y [best_pos];
}
//===== return minimum motion cost =====
return min_mcost;
}
/*!
************************************************************************
* \brief
* Fast sub pixel block motion estimation
************************************************************************
*/
int // ==> minimum motion cost after search
smpUMHEXSubPelBlockMotionSearch (
imgpel* orig_pic, // <-- original pixel values for the AxB block
short ref, // <-- reference frame (0... or -1 (backward))
int list, // <-- reference picture list
int pic_pix_x, // <-- absolute x-coordinate of regarded AxB block
int pic_pix_y, // <-- absolute y-coordinate of regarded AxB block
int blocktype, // <-- block type (1-16x16 ... 7-4x4)
short pred_mv_x, // <-- motion vector predictor (x) in sub-pel units
short pred_mv_y, // <-- motion vector predictor (y) in sub-pel units
short* mv_x, // <--> in: search center (x) / out: MV (x) - in pel units
short* mv_y, // <--> in: search center (y) / out: MV (y) - in pel units
int search_pos2, // <-- search positions for half-pel search (default: 9)
int search_pos4, // <-- search positions for quarter-pel search (default: 9)
int min_mcost, // <-- minimum motion cost (cost for center or huge value)
int lambda_factor) // <-- lagrangian parameter for determining motion cost
{
int mcost;
int cand_mv_x, cand_mv_y;
int list_offset = ((img->MbaffFrameFlag) &&
(img->mb_data[img->current_mb_nr].mb_field)) ?
img->current_mb_nr%2 ? 4 : 2 : 0;
StorablePicture *ref_picture = listX[list+list_offset][ref];
short mv_shift = 0;
short blocksize_x = (short) input->blc_size[blocktype][0];
short blocksize_y = (short) input->blc_size[blocktype][1];
int pic4_pix_x = ((pic_pix_x + IMG_PAD_SIZE)<<2);
int pic4_pix_y = ((pic_pix_y + IMG_PAD_SIZE)<<2);
short max_pos_x4 = (short) ((ref_picture->size_x - blocksize_x + 2*IMG_PAD_SIZE)<<2);
short max_pos_y4 = (short) ((ref_picture->size_y - blocksize_y + 2*IMG_PAD_SIZE)<<2);
int iXMinNow, iYMinNow;
short dynamic_search_range, i, m;
int currmv_x = 0, currmv_y = 0;
int pred_frac_mv_x,pred_frac_mv_y,abort_search;
int pred_frac_up_mv_x, pred_frac_up_mv_y;
int apply_weights = ( (active_pps->weighted_pred_flag &&
(img->type == P_SLICE || img->type == SP_SLICE)) ||
(active_pps->weighted_bipred_idc && (img->type == B_SLICE)) )
&& input->UseWeightedReferenceME;
dist_method = Q_PEL + 3 * apply_weights;
ref_pic_sub.luma = ref_pic_ptr->imgY_sub;
img_width = ref_pic_ptr->size_x;
img_height = ref_pic_ptr->size_y;
width_pad = ref_pic_ptr->size_x_pad;
height_pad = ref_pic_ptr->size_y_pad;
if (apply_weights)
{
weight_luma = wp_weight[list + list_offset][ref][0];
offset_luma = wp_offset[list + list_offset][ref][0];
}
if (ChromaMEEnable)
{
ref_pic_sub.crcb[0] = ref_pic_ptr->imgUV_sub[0];
ref_pic_sub.crcb[1] = ref_pic_ptr->imgUV_sub[1];
width_pad_cr = ref_pic_ptr->size_x_cr_pad;
height_pad_cr = ref_pic_ptr->size_y_cr_pad;
if (apply_weights)
{
weight_cr[0] = wp_weight[list + list_offset][ref][1];
weight_cr[1] = wp_weight[list + list_offset][ref][2];
offset_cr[0] = wp_offset[list + list_offset][ref][1];
offset_cr[1] = wp_offset[list + list_offset][ref][2];
}
}
if ((pic4_pix_x + *mv_x > 1) && (pic4_pix_x + *mv_x < max_pos_x4 - 1) &&
(pic4_pix_y + *mv_y > 1) && (pic4_pix_y + *mv_y < max_pos_y4 - 1))
{
ref_access_method = FAST_ACCESS;
}
else
{
ref_access_method = UMV_ACCESS;
}
dynamic_search_range = 3;
pred_frac_mv_x = (pred_mv_x - *mv_x) % 4;
pred_frac_mv_y = (pred_mv_y - *mv_y) % 4;
pred_frac_up_mv_x = (smpUMHEX_pred_MV_uplayer_X - *mv_x) % 4;
pred_frac_up_mv_y = (smpUMHEX_pred_MV_uplayer_Y - *mv_y) % 4;
memset(smpUMHEX_SearchState[0], 0,
(2*dynamic_search_range+1)*(2*dynamic_search_range+1));
smpUMHEX_SearchState[dynamic_search_range][dynamic_search_range] = 1;
if( !start_me_refinement_hp )
{
cand_mv_x = *mv_x;
cand_mv_y = *mv_y;
mcost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
min_mcost - mcost, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
if (mcost < min_mcost)
{
min_mcost = mcost;
currmv_x = cand_mv_x;
currmv_y = cand_mv_y;
}
}
else
{
currmv_x = *mv_x;
currmv_y = *mv_y;
}
// If the min_mcost is small enough and other statistics are positive,
// better to stop the search now
if ( ((*mv_x) == 0) && ((*mv_y) == 0) &&
(pred_frac_mv_x == 0 && pred_frac_up_mv_x == 0) &&
(pred_frac_mv_y == 0 && pred_frac_up_mv_y == 0) &&
(min_mcost < (SubPelThreshold1>>block_type_shift_factor[blocktype])) )
{
*mv_x = (short) currmv_x;
*mv_y = (short) currmv_y;
return min_mcost;
}
if(pred_frac_mv_x || pred_frac_mv_y)
{
cand_mv_x = *mv_x + pred_frac_mv_x;
cand_mv_y = *mv_y + pred_frac_mv_y;
mcost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
min_mcost - mcost, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
smpUMHEX_SearchState[cand_mv_y -*mv_y + dynamic_search_range][cand_mv_x - *mv_x + dynamic_search_range] = 1;
if (mcost < min_mcost)
{
min_mcost = mcost;
currmv_x = cand_mv_x;
currmv_y = cand_mv_y;
}
}
// Multiple small diamond search
for(i = 0; i < dynamic_search_range; i++)
{
abort_search = 1;
iXMinNow = currmv_x;
iYMinNow = currmv_y;
for (m = 0; m < 4; m++)
{
cand_mv_x = iXMinNow + Diamond_X[m];
cand_mv_y = iYMinNow + Diamond_Y[m];
if(iabs(cand_mv_x - *mv_x) <= dynamic_search_range && iabs(cand_mv_y - *mv_y) <= dynamic_search_range)
{
if(!smpUMHEX_SearchState[cand_mv_y - *mv_y + dynamic_search_range][cand_mv_x - *mv_x + dynamic_search_range])
{
mcost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
mcost += computeUniPred[dist_method]( orig_pic, blocksize_y, blocksize_x,
min_mcost - mcost, cand_mv_x + pic4_pix_x, cand_mv_y + pic4_pix_y);
smpUMHEX_SearchState[cand_mv_y - *mv_y + dynamic_search_range][cand_mv_x - *mv_x + dynamic_search_range] = 1;
if (mcost < min_mcost)
{
min_mcost = mcost;
currmv_x = cand_mv_x;
currmv_y = cand_mv_y;
abort_search = 0;
}
if (min_mcost < (SubPelThreshold3>>block_type_shift_factor[blocktype]))
{
*mv_x = (short) currmv_x;
*mv_y = (short) currmv_y;
return min_mcost;
}
}
}
}
// If the minimum cost point is in the center, break out the loop
if (abort_search)
{
break;
}
}
*mv_x = (short) currmv_x;
*mv_y = (short) currmv_y;
return min_mcost;
}
/*!
************************************************************************
* \brief
* smpUMHEXBipredIntegerPelBlockMotionSearch: fast pixel block motion search for bipred mode
*
************************************************************************
*/
int // ==> minimum motion cost after search
smpUMHEXBipredIntegerPelBlockMotionSearch (imgpel* cur_pic, // <-- original pixel values for the AxB block
short ref, // <-- reference frame (0... or -1 (backward))
int list,
int pic_pix_x, // <-- absolute x-coordinate of regarded AxB block
int pic_pix_y, // <-- absolute y-coordinate of regarded AxB block
int blocktype, // <-- block type (1-16x16 ... 7-4x4)
short pred_mv_x1, // <-- motion vector predictor (x) in sub-pel units
short pred_mv_y1, // <-- motion vector predictor (y) in sub-pel units
short pred_mv_x2, // <-- motion vector predictor (x) in sub-pel units
short pred_mv_y2, // <-- motion vector predictor (y) in sub-pel units
short* mv_x, // <--> in: search center (x) / out: motion vector (x) - in pel units
short* mv_y, // <--> in: search center (y) / out: motion vector (y) - in pel units
short* s_mv_x, // <--> in: search center (x) / out: motion vector (x) - in pel units
short* s_mv_y, // <--> in: search center (y) / out: motion vector (y) - in pel units
int search_range, // <-- 1-d search range in pel units
int min_mcost, // <-- minimum motion cost (cost for center or huge value)
int lambda_factor) // <-- lagrangian parameter for determining motion cost
{
int mvshift = 2; // motion vector shift for getting sub-pel units
int search_step, iYMinNow, iXMinNow;
int i, m;
int cand_x, cand_y, mcost;
int list_offset = img->mb_data[img->current_mb_nr].list_offset;
int blocksize_y = input->blc_size[blocktype][1]; // vertical block size
int blocksize_x = input->blc_size[blocktype][0]; // horizontal block size
int pred_x1 = (pic_pix_x << 2) + pred_mv_x1; // predicted position x (in sub-pel units)
int pred_y1 = (pic_pix_y << 2) + pred_mv_y1; // predicted position y (in sub-pel units)
int pred_x2 = (pic_pix_x << 2) + pred_mv_x2; // predicted position x (in sub-pel units)
int pred_y2 = (pic_pix_y << 2) + pred_mv_y2; // predicted position y (in sub-pel units)
short center2_x = pic_pix_x + *mv_x; // center position x (in pel units)
short center2_y = pic_pix_y + *mv_y; // center position y (in pel units)
short center1_x = pic_pix_x + *s_mv_x; // mvx of second pred (in pel units)
short center1_y = pic_pix_y + *s_mv_y; // mvy of second pred (in pel units)
int best_x = center2_x;
int best_y = center2_y;
short apply_weights = (active_pps->weighted_bipred_idc>0);
short offset1 = (apply_weights ? (list == 0? wp_offset[list_offset ][ref][0]: wp_offset[list_offset + 1][0 ][ref]) : 0);
short offset2 = (apply_weights ? (list == 0? wp_offset[list_offset + 1][ref][0]: wp_offset[list_offset ][0 ][ref]) : 0);
ref_pic1_sub.luma = listX[list + list_offset][ref]->imgY_sub;
ref_pic2_sub.luma = listX[list == 0 ? 1 + list_offset: list_offset][ 0 ]->imgY_sub;
img_width = listX[list + list_offset][ref]->size_x;
img_height = listX[list + list_offset][ref]->size_y;
width_pad = listX[list + list_offset][ref]->size_x_pad;
height_pad = listX[list + list_offset][ref]->size_y_pad;
if (apply_weights)
{
weight1 = list == 0 ? wbp_weight[list_offset ][ref][0][0] : wbp_weight[list_offset + LIST_1][0 ][ref][0];
weight2 = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][0] : wbp_weight[list_offset ][0 ][ref][0];
offsetBi=(offset1 + offset2 + 1)>>1;
computeBiPred = computeBiPredSAD2; //ME only supports SAD computations
}
else
{
weight1 = 1<<luma_log_weight_denom;
weight2 = 1<<luma_log_weight_denom;
offsetBi = 0;
computeBiPred = computeBiPredSAD1; //ME only supports SAD computations
}
if (ChromaMEEnable )
{
ref_pic1_sub.crcb[0] = listX[list + list_offset][ref]->imgUV_sub[0];
ref_pic1_sub.crcb[1] = listX[list + list_offset][ref]->imgUV_sub[1];
ref_pic2_sub.crcb[0] = listX[list == 0 ? 1 + list_offset: list_offset][ 0 ]->imgUV_sub[0];
ref_pic2_sub.crcb[1] = listX[list == 0 ? 1 + list_offset: list_offset][ 0 ]->imgUV_sub[1];
width_pad_cr = listX[list + list_offset][ref]->size_x_cr_pad;
height_pad_cr = listX[list + list_offset][ref]->size_y_cr_pad;
if (apply_weights)
{
weight1_cr[0] = list == 0 ? wbp_weight[list_offset ][ref][0][1] : wbp_weight[list_offset + LIST_1][0 ][ref][1];
weight1_cr[1] = list == 0 ? wbp_weight[list_offset ][ref][0][2] : wbp_weight[list_offset + LIST_1][0 ][ref][2];
weight2_cr[0] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][1] : wbp_weight[list_offset ][0 ][ref][1];
weight2_cr[1] = list == 0 ? wbp_weight[list_offset + LIST_1][ref][0][2] : wbp_weight[list_offset ][0 ][ref][2];
offsetBi_cr[0] = (list == 0)
? (wp_offset[list_offset ][ref][1] + wp_offset[list_offset + LIST_1][ref][1] + 1) >> 1
: (wp_offset[list_offset + LIST_1][0 ][1] + wp_offset[list_offset ][0 ][1] + 1) >> 1;
offsetBi_cr[1] = (list == 0)
? (wp_offset[list_offset ][ref][2] + wp_offset[list_offset + LIST_1][ref][2] + 1) >> 1
: (wp_offset[list_offset + LIST_1][0 ][2] + wp_offset[list_offset ][0 ][2] + 1) >> 1;
}
else
{
weight1_cr[0] = 1<<chroma_log_weight_denom;
weight1_cr[1] = 1<<chroma_log_weight_denom;
weight2_cr[0] = 1<<chroma_log_weight_denom;
weight2_cr[1] = 1<<chroma_log_weight_denom;
offsetBi_cr[0] = 0;
offsetBi_cr[1] = 0;
}
}
// Set function for getting reference picture lines
if ((center2_x > search_range) && (center2_x < img_width -1-search_range-blocksize_x) &&
(center2_y > search_range) && (center2_y < img_height-1-search_range-blocksize_y))
{
bipred2_access_method = FAST_ACCESS;
}
else
{
bipred2_access_method = UMV_ACCESS;
}
// Set function for getting reference picture lines
if ((center1_y > search_range) && (center1_y < img_height-1-search_range-blocksize_y))
{
bipred1_access_method = FAST_ACCESS;
}
else
{
bipred1_access_method = UMV_ACCESS;
}
// Check the center median predictor
cand_x = center2_x ;
cand_y = center2_y ;
mcost = MV_COST (lambda_factor, mvshift, center1_x, center1_y, pred_x1, pred_y1);
mcost += MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x2, pred_y2);
mcost += computeBiPred(cur_pic,
blocksize_y, blocksize_x, INT_MAX,
(center1_x << 2) + IMG_PAD_SIZE_TIMES4,
(center1_y << 2) + IMG_PAD_SIZE_TIMES4,
(cand_x << 2) + IMG_PAD_SIZE_TIMES4,
(cand_y << 2) + IMG_PAD_SIZE_TIMES4);
if (mcost < min_mcost)
{
min_mcost = mcost;
best_x = cand_x;
best_y = cand_y;
}
iXMinNow = best_x;
iYMinNow = best_y;
if (0 != pred_mv_x1 || 0 != pred_mv_y1 || 0 != pred_mv_x2 || 0 != pred_mv_y2)
{
cand_x = pic_pix_x;
cand_y = pic_pix_y;
SEARCH_ONE_PIXEL_BIPRED_HELPER;
}
// If the min_mcost is small enough, do a local search then terminate
// This is good for stationary or quasi-stationary areas
if ((min_mcost<<3) < (ConvergeThreshold>>(block_type_shift_factor[blocktype])))
{
for (m = 0; m < 4; m++)
{
cand_x = iXMinNow + Diamond_X[m];
cand_y = iYMinNow + Diamond_Y[m];
SEARCH_ONE_PIXEL_BIPRED_HELPER;
}
*mv_x = best_x - pic_pix_x;
*mv_y = best_y - pic_pix_y;
return min_mcost;
}
// Small local search
for (m = 0; m < 4; m++)
{
cand_x = iXMinNow + Diamond_X[m];
cand_y = iYMinNow + Diamond_Y[m];
SEARCH_ONE_PIXEL_BIPRED_HELPER;
}
// First_step: Symmetrical-cross search
// If distortion is large, use large shapes. Otherwise, compact shapes are faster
if ((blocktype == 1 &&
(min_mcost<<2) > (SymmetricalCrossSearchThreshold1>>block_type_shift_factor[blocktype])) ||
((min_mcost<<2) > (SymmetricalCrossSearchThreshold2>>block_type_shift_factor[blocktype])))
{
iXMinNow = best_x;
iYMinNow = best_y;
for (i = 1; i <= search_range / 2; i++)
{
search_step = (i<<1) - 1;
cand_x = iXMinNow + search_step;
cand_y = iYMinNow;
SEARCH_ONE_PIXEL_BIPRED_HELPER
cand_x = iXMinNow - search_step;
SEARCH_ONE_PIXEL_BIPRED_HELPER
cand_x = iXMinNow;
cand_y = iYMinNow + search_step;
SEARCH_ONE_PIXEL_BIPRED_HELPER
cand_y = iYMinNow - search_step;
SEARCH_ONE_PIXEL_BIPRED_HELPER
}
// Hexagon Search
iXMinNow = best_x;
iYMinNow = best_y;
for (m = 0; m < 6; m++)
{
cand_x = iXMinNow + Hexagon_X[m];
cand_y = iYMinNow + Hexagon_Y[m];
SEARCH_ONE_PIXEL_BIPRED_HELPER
}
// Multi Big Hexagon Search
iXMinNow = best_x;
iYMinNow = best_y;
for(i = 1; i <= search_range / 4; i++)
{
for (m = 0; m < 16; m++)
{
cand_x = iXMinNow + Big_Hexagon_X[m] * i;
cand_y = iYMinNow + Big_Hexagon_Y[m] * i;
SEARCH_ONE_PIXEL_BIPRED_HELPER
}
}
}
// Search up_layer predictor for non 16x16 blocks
if (blocktype > 1)
{
cand_x = pic_pix_x + (smpUMHEX_pred_MV_uplayer_X / 4);
cand_y = pic_pix_y + (smpUMHEX_pred_MV_uplayer_Y / 4);
SEARCH_ONE_PIXEL_BIPRED_HELPER
}
if(center2_x != pic_pix_x || center2_y != pic_pix_y)
{
cand_x = pic_pix_x;
cand_y = pic_pix_y;
SEARCH_ONE_PIXEL_BIPRED_HELPER
iXMinNow = best_x;
iYMinNow = best_y;
// Local diamond search
for (m = 0; m < 4; m++)
{
cand_x = iXMinNow + Diamond_X[m];
cand_y = iYMinNow + Diamond_Y[m];
SEARCH_ONE_PIXEL_BIPRED_HELPER
}
}
// If the minimum cost is small enough, do a local search
// and finish the search here
if ((min_mcost<<2) < (ConvergeThreshold>>block_type_shift_factor[blocktype]))
{
iXMinNow = best_x;
iYMinNow = best_y;
for (m = 0; m < 4; m++)
{
cand_x = iXMinNow + Diamond_X[m];
cand_y = iYMinNow + Diamond_Y[m];
SEARCH_ONE_PIXEL_BIPRED_HELPER
}
*mv_x = (short) (best_x - pic_pix_x);
*mv_y = (short) (best_y - pic_pix_y);
return min_mcost;
}
// Second_step: Extended Hexagon-based Search
for (i = 0; i < search_range; i++)
{
iXMinNow = best_x;
iYMinNow = best_y;
for (m = 0; m < 6; m++)
{
cand_x = iXMinNow + Hexagon_X[m];
cand_y = iYMinNow + Hexagon_Y[m];
SEARCH_ONE_PIXEL_BIPRED_HELPER
}
// The minimum cost point happens in the center
if (best_x == iXMinNow && best_y == iYMinNow)
{
break;
}
}
// Third_step: Small diamond search
for (i = 0; i < search_range; i++)
{
iXMinNow = best_x;
iYMinNow = best_y;
for (m = 0; m < 4; m++)
{
cand_x = iXMinNow + Diamond_X[m];
cand_y = iYMinNow + Diamond_Y[m];
SEARCH_ONE_PIXEL_BIPRED_HELPER
}
// The minimum cost point happens in the center
if (best_x == iXMinNow && best_y == iYMinNow)
{
break;
}
}
*mv_x = (short) (best_x - pic_pix_x);
*mv_y = (short) (best_y - pic_pix_y);
return min_mcost;
}
/*!
************************************************************************
* \brief
* Set neighbouring block mode (intra/inter)
* used for fast motion estimation
************************************************************************
*/
void smpUMHEX_decide_intrabk_SAD()
{
if (img->type != I_SLICE)
{
if (img->pix_x == 0 && img->pix_y == 0)
{
smpUMHEX_flag_intra_SAD = 0;
}
else if (img->pix_x == 0)
{
smpUMHEX_flag_intra_SAD = smpUMHEX_flag_intra[(img->pix_x)>>4];
}
else if (img->pix_y == 0)
{
smpUMHEX_flag_intra_SAD = smpUMHEX_flag_intra[((img->pix_x)>>4)-1];
}
else
{
smpUMHEX_flag_intra_SAD = ((smpUMHEX_flag_intra[(img->pix_x)>>4])||
(smpUMHEX_flag_intra[((img->pix_x)>>4)-1])||
(smpUMHEX_flag_intra[((img->pix_x)>>4)+1])) ;
}
}
return;
}
/*!
************************************************************************
* \brief
* Set cost to zero if neighbouring block is intra
* used for fast motion estimation
************************************************************************
*/
void smpUMHEX_skip_intrabk_SAD(int best_mode, int ref_max)
{
short i, j, k;
if (img->number > 0)
{
smpUMHEX_flag_intra[(img->pix_x)>>4] = (best_mode == 9 || best_mode == 10) ? 1 : 0;
}
if (img->type != I_SLICE && (best_mode == 9 || best_mode == 10))
{
for (i=0; i < 4; i++)
{
for (j=0; j < 4; j++)
{
for (k=0; k < 9;k++)
{
smpUMHEX_l0_cost[k][j][i] = 0;
smpUMHEX_l1_cost[k][j][i] = 0;
}
}
}
}
return;
}
/*!
************************************************************************
* \brief
* Set up prediction MV and prediction up layer cost
* used for fast motion estimation
************************************************************************
*/
void smpUMHEX_setup(short ref,
int list,
int block_y,
int block_x,
int blocktype,
short ******all_mv)
{
if (blocktype > 6)
{
smpUMHEX_pred_MV_uplayer_X = all_mv[block_y][block_x][list][ref][5][0];
smpUMHEX_pred_MV_uplayer_Y = all_mv[block_y][block_x][list][ref][5][1];
}
else if (blocktype > 4)
{
smpUMHEX_pred_MV_uplayer_X = all_mv[block_y][block_x][list][ref][4][0];
smpUMHEX_pred_MV_uplayer_Y = all_mv[block_y][block_x][list][ref][4][1];
}
else if (blocktype == 4)
{
smpUMHEX_pred_MV_uplayer_X = all_mv[block_y][block_x][list][ref][2][0];
smpUMHEX_pred_MV_uplayer_Y = all_mv[block_y][block_x][list][ref][2][1];
}
else if (blocktype > 1)
{
smpUMHEX_pred_MV_uplayer_X = all_mv[block_y][block_x][list][ref][1][0];
smpUMHEX_pred_MV_uplayer_Y = all_mv[block_y][block_x][list][ref][1][1];
}
if (blocktype > 1)
{
if (blocktype > 6)
{
smpUMHEX_pred_SAD_uplayer = (list==1) ?
(smpUMHEX_l1_cost[5][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
: (smpUMHEX_l0_cost[5][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
smpUMHEX_pred_SAD_uplayer /= 2;
}
else if (blocktype > 4)
{
smpUMHEX_pred_SAD_uplayer = (list==1) ?
(smpUMHEX_l1_cost[4][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
: (smpUMHEX_l0_cost[4][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
smpUMHEX_pred_SAD_uplayer /= 2;
}
else if (blocktype == 4)
{
smpUMHEX_pred_SAD_uplayer = (list==1) ?
(smpUMHEX_l1_cost[2][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
: (smpUMHEX_l0_cost[2][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
smpUMHEX_pred_SAD_uplayer /= 2;
}
else
{
smpUMHEX_pred_SAD_uplayer = (list==1) ?
(smpUMHEX_l1_cost[1][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x])
: (smpUMHEX_l0_cost[1][(img->pix_y>>2)+block_y][(img->pix_x>>2)+block_x]);
smpUMHEX_pred_SAD_uplayer /= 2;
}
smpUMHEX_pred_SAD_uplayer = smpUMHEX_flag_intra_SAD ? 0 : smpUMHEX_pred_SAD_uplayer;
}
}