blob: 8d7d9f127c16235221772d0e62893c2bc73c00f6 [file] [log] [blame]
/* ------------------------------------------------------------------
* Copyright (C) 1998-2009 PacketVideo
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied.
* See the License for the specific language governing permissions
* and limitations under the License.
* -------------------------------------------------------------------
*/
#include "mp4enc_lib.h"
#include "mp4lib_int.h"
#include "dct_inline.h"
#define FDCT_SHIFT 10
#ifdef __cplusplus
extern "C"
{
#endif
/**************************************************************************/
/* Function: BlockDCT_AANwSub
Date: 7/31/01
Input:
Output: out[64] ==> next block
Purpose: Do subtraction for zero MV first
Modified:
**************************************************************************/
Void BlockDCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width)
{
Short *dst;
Int k0, k1, k2, k3, k4, k5, k6, k7;
Int round;
Int k12 = 0x022A02D4;
Int k14 = 0x0188053A;
Int abs_sum;
Int mask;
Int tmp, tmp2;
Int ColTh;
dst = out + 64 ;
ColTh = *dst;
out += 128;
round = 1 << (FDCT_SHIFT - 1);
do /* fdct_nextrow */
{
/* assuming the block is word-aligned */
mask = 0x1FE;
tmp = *((Int*) cur); /* contains 4 pixels */
tmp2 = *((Int*) pred); /* prediction 4 pixels */
k0 = tmp2 & 0xFF;
k1 = mask & (tmp << 1);
k0 = k1 - (k0 << 1);
k1 = (tmp2 >> 8) & 0xFF;
k2 = mask & (tmp >> 7);
k1 = k2 - (k1 << 1);
k2 = (tmp2 >> 16) & 0xFF;
k3 = mask & (tmp >> 15);
k2 = k3 - (k2 << 1);
k3 = (tmp2 >> 24) & 0xFF;
k4 = mask & (tmp >> 23);
k3 = k4 - (k3 << 1);
tmp = *((Int*)(cur + 4)); /* another 4 pixels */
tmp2 = *((Int*)(pred + 4));
k4 = tmp2 & 0xFF;
k5 = mask & (tmp << 1);
k4 = k5 - (k4 << 1);
k5 = (tmp2 >> 8) & 0xFF;
k6 = mask & (tmp >> 7);
k5 = k6 - (k5 << 1);
k6 = (tmp2 >> 16) & 0xFF;
k7 = mask & (tmp >> 15);
k6 = k7 - (k6 << 1);
k7 = (tmp2 >> 24) & 0xFF;
tmp = mask & (tmp >> 23);
k7 = tmp - (k7 << 1);
cur += width;
pred += 16;
/* fdct_1 */
k0 = k0 + k7;
k7 = k0 - (k7 << 1);
k1 = k1 + k6;
k6 = k1 - (k6 << 1);
k2 = k2 + k5;
k5 = k2 - (k5 << 1);
k3 = k3 + k4;
k4 = k3 - (k4 << 1);
k0 = k0 + k3;
k3 = k0 - (k3 << 1);
k1 = k1 + k2;
k2 = k1 - (k2 << 1);
k0 = k0 + k1;
k1 = k0 - (k1 << 1);
/**********/
dst[0] = k0;
dst[4] = k1; /* col. 4 */
/* fdct_2 */
k4 = k4 + k5;
k5 = k5 + k6;
k6 = k6 + k7;
k2 = k2 + k3;
/* MUL2C k2,k5,724,FDCT_SHIFT */
/* k0, k1 become scratch */
/* assume FAST MULTIPLY */
k1 = mla724(k12, k5, round);
k0 = mla724(k12, k2, round);
k5 = k1 >> FDCT_SHIFT;
k2 = k0 >> FDCT_SHIFT;
/*****************/
k2 = k2 + k3;
k3 = (k3 << 1) - k2;
/********/
dst[2] = k2; /* col. 2 */
k3 <<= 1; /* scale up col. 6 */
dst[6] = k3; /* col. 6 */
/* fdct_3 */
/* ROTATE k4,k6,392,946, FDCT_SHIFT */
/* assume FAST MULTIPLY */
/* k0, k1 are output */
k0 = k4 - k6;
k1 = mla392(k0, k14, round);
k0 = mla554(k4, k12, k1);
k1 = mla1338(k6, k14, k1);
k4 = k0 >> FDCT_SHIFT;
k6 = k1 >> FDCT_SHIFT;
/***********************/
k5 = k5 + k7;
k7 = (k7 << 1) - k5;
k4 = k4 + k7;
k7 = (k7 << 1) - k4;
k5 = k5 + k6;
k4 <<= 1; /* scale up col.5 */
k6 = k5 - (k6 << 1);
/********/
dst[5] = k4; /* col. 5 */
k6 <<= 2; /* scale up col. 7 */
dst[1] = k5; /* col. 1 */
dst[7] = k6; /* col. 7 */
dst[3] = k7; /* col. 3 */
dst += 8;
}
while (dst < out);
out -= 64;
dst = out + 8;
/* Vertical Block Loop */
do /* Vertical 8xDCT loop */
{
k0 = out[0];
k1 = out[8];
k2 = out[16];
k3 = out[24];
k4 = out[32];
k5 = out[40];
k6 = out[48];
k7 = out[56];
/* deadzone thresholding for column */
abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
if (abs_sum < ColTh)
{
out[0] = 0x7fff;
out++;
continue;
}
/* fdct_1 */
k0 = k0 + k7;
k7 = k0 - (k7 << 1);
k1 = k1 + k6;
k6 = k1 - (k6 << 1);
k2 = k2 + k5;
k5 = k2 - (k5 << 1);
k3 = k3 + k4;
k4 = k3 - (k4 << 1);
k0 = k0 + k3;
k3 = k0 - (k3 << 1);
k1 = k1 + k2;
k2 = k1 - (k2 << 1);
k0 = k0 + k1;
k1 = k0 - (k1 << 1);
/**********/
out[32] = k1; /* row 4 */
out[0] = k0; /* row 0 */
/* fdct_2 */
k4 = k4 + k5;
k5 = k5 + k6;
k6 = k6 + k7;
k2 = k2 + k3;
/* MUL2C k2,k5,724,FDCT_SHIFT */
/* k0, k1 become scratch */
/* assume FAST MULTIPLY */
k1 = mla724(k12, k5, round);
k0 = mla724(k12, k2, round);
k5 = k1 >> FDCT_SHIFT;
k2 = k0 >> FDCT_SHIFT;
/*****************/
k2 = k2 + k3;
k3 = (k3 << 1) - k2;
k3 <<= 1; /* scale up col. 6 */
/********/
out[48] = k3; /* row 6 */
out[16] = k2; /* row 2 */
/* fdct_3 */
/* ROTATE k4,k6,392,946, FDCT_SHIFT */
/* assume FAST MULTIPLY */
/* k0, k1 are output */
k0 = k4 - k6;
k1 = mla392(k0, k14, round);
k0 = mla554(k4, k12, k1);
k1 = mla1338(k6, k14, k1);
k4 = k0 >> FDCT_SHIFT;
k6 = k1 >> FDCT_SHIFT;
/***********************/
k5 = k5 + k7;
k7 = (k7 << 1) - k5;
k4 = k4 + k7;
k7 = (k7 << 1) - k4;
k5 = k5 + k6;
k4 <<= 1; /* scale up col. 5 */
k6 = k5 - (k6 << 1);
/********/
out[24] = k7 ; /* row 3 */
k6 <<= 2; /* scale up col. 7 */
out[56] = k6 ; /* row 7 */
out[8] = k5 ; /* row 1 */
out[40] = k4 ; /* row 5 */
out++;
}
while ((uintptr_t)out < (uintptr_t)dst) ;
return ;
}
/**************************************************************************/
/* Function: Block4x4DCT_AANwSub
Date: 7/31/01
Input:
Output: out[64] ==> next block
Purpose: Do subtraction for zero MV first before 4x4 DCT
Modified:
**************************************************************************/
Void Block4x4DCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width)
{
Short *dst;
Int k0, k1, k2, k3, k4, k5, k6, k7;
Int round;
Int k12 = 0x022A02D4;
Int k14 = 0x0188053A;
Int mask;
Int tmp, tmp2;
Int abs_sum;
Int ColTh;
dst = out + 64 ;
ColTh = *dst;
out += 128;
round = 1 << (FDCT_SHIFT - 1);
do /* fdct_nextrow */
{
/* assuming the block is word-aligned */
mask = 0x1FE;
tmp = *((Int*) cur); /* contains 4 pixels */
tmp2 = *((Int*) pred); /* prediction 4 pixels */
k0 = tmp2 & 0xFF;
k1 = mask & (tmp << 1);
k0 = k1 - (k0 << 1);
k1 = (tmp2 >> 8) & 0xFF;
k2 = mask & (tmp >> 7);
k1 = k2 - (k1 << 1);
k2 = (tmp2 >> 16) & 0xFF;
k3 = mask & (tmp >> 15);
k2 = k3 - (k2 << 1);
k3 = (tmp2 >> 24) & 0xFF;
k4 = mask & (tmp >> 23);
k3 = k4 - (k3 << 1);
tmp = *((Int*)(cur + 4)); /* another 4 pixels */
tmp2 = *((Int*)(pred + 4));
k4 = tmp2 & 0xFF;
k5 = mask & (tmp << 1);
k4 = k5 - (k4 << 1);
k5 = (tmp2 >> 8) & 0xFF;
k6 = mask & (tmp >> 7);
k5 = k6 - (k5 << 1);
k6 = (tmp2 >> 16) & 0xFF;
k7 = mask & (tmp >> 15);
k6 = k7 - (k6 << 1);
k7 = (tmp2 >> 24) & 0xFF;
tmp = mask & (tmp >> 23);
k7 = tmp - (k7 << 1);
cur += width;
pred += 16;
/* fdct_1 */
k0 = k0 + k7;
k7 = k0 - (k7 << 1);
k1 = k1 + k6;
k6 = k1 - (k6 << 1);
k2 = k2 + k5;
k5 = k2 - (k5 << 1);
k3 = k3 + k4;
k4 = k3 - (k4 << 1);
k0 = k0 + k3;
k3 = k0 - (k3 << 1);
k1 = k1 + k2;
k2 = k1 - (k2 << 1);
k0 = k0 + k1;
/**********/
dst[0] = k0;
/* fdct_2 */
k4 = k4 + k5;
k5 = k5 + k6;
k6 = k6 + k7;
k2 = k2 + k3;
/* MUL2C k2,k5,724,FDCT_SHIFT */
/* k0, k1 become scratch */
/* assume FAST MULTIPLY */
k1 = mla724(k12, k5, round);
k0 = mla724(k12, k2, round);
k5 = k1 >> FDCT_SHIFT;
k2 = k0 >> FDCT_SHIFT;
/*****************/
k2 = k2 + k3;
/********/
dst[2] = k2; /* col. 2 */
/* fdct_3 */
/* ROTATE k4,k6,392,946, FDCT_SHIFT */
/* assume FAST MULTIPLY */
/* k0, k1 are output */
k0 = k4 - k6;
k1 = mla392(k0, k14, round);
k0 = mla554(k4, k12, k1);
k1 = mla1338(k6, k14, k1);
k4 = k0 >> FDCT_SHIFT;
k6 = k1 >> FDCT_SHIFT;
/***********************/
k5 = k5 + k7;
k7 = (k7 << 1) - k5;
k7 = k7 - k4;
k5 = k5 + k6;
/********/
dst[1] = k5; /* col. 1 */
dst[3] = k7; /* col. 3 */
dst += 8;
}
while (dst < out);
out -= 64;
dst = out + 4;
/* Vertical Block Loop */
do /* Vertical 8xDCT loop */
{
k0 = out[0];
k1 = out[8];
k2 = out[16];
k3 = out[24];
k4 = out[32];
k5 = out[40];
k6 = out[48];
k7 = out[56];
abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
if (abs_sum < ColTh)
{
out[0] = 0x7fff;
out++;
continue;
}
/* fdct_1 */
k0 = k0 + k7;
k7 = k0 - (k7 << 1);
k1 = k1 + k6;
k6 = k1 - (k6 << 1);
k2 = k2 + k5;
k5 = k2 - (k5 << 1);
k3 = k3 + k4;
k4 = k3 - (k4 << 1);
k0 = k0 + k3;
k3 = k0 - (k3 << 1);
k1 = k1 + k2;
k2 = k1 - (k2 << 1);
k0 = k0 + k1;
/**********/
out[0] = k0; /* row 0 */
/* fdct_2 */
k4 = k4 + k5;
k5 = k5 + k6;
k6 = k6 + k7;
k2 = k2 + k3;
/* MUL2C k2,k5,724,FDCT_SHIFT */
/* k0, k1 become scratch */
/* assume FAST MULTIPLY */
k1 = mla724(k12, k5, round);
k0 = mla724(k12, k2, round);
k5 = k1 >> FDCT_SHIFT;
k2 = k0 >> FDCT_SHIFT;
/*****************/
k2 = k2 + k3;
/********/
out[16] = k2; /* row 2 */
/* fdct_3 */
/* ROTATE k4,k6,392,946, FDCT_SHIFT */
/* assume FAST MULTIPLY */
/* k0, k1 are output */
k0 = k4 - k6;
k1 = mla392(k0, k14, round);
k0 = mla554(k4, k12, k1);
k1 = mla1338(k6, k14, k1);
k4 = k0 >> FDCT_SHIFT;
k6 = k1 >> FDCT_SHIFT;
/***********************/
k5 = k5 + k7;
k7 = (k7 << 1) - k5;
k7 = k7 - k4 ;
k5 = k5 + k6;
/********/
out[24] = k7 ; /* row 3 */
out[8] = k5 ; /* row 1 */
out++;
}
while ((uintptr_t)out < (uintptr_t)dst) ;
return ;
}
/**************************************************************************/
/* Function: Block2x2DCT_AANwSub
Date: 7/31/01
Input:
Output: out[64] ==> next block
Purpose: Do subtraction for zero MV first before 2x2 DCT
Modified:
**************************************************************************/
Void Block2x2DCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width)
{
Short *dst;
Int k0, k1, k2, k3, k4, k5, k6, k7;
Int round;
Int k12 = 0x022A02D4;
Int k14 = 0x018803B2;
Int mask;
Int tmp, tmp2;
Int abs_sum;
Int ColTh;
dst = out + 64 ;
ColTh = *dst;
out += 128;
round = 1 << (FDCT_SHIFT - 1);
do /* fdct_nextrow */
{
/* assuming the block is word-aligned */
mask = 0x1FE;
tmp = *((Int*) cur); /* contains 4 pixels */
tmp2 = *((Int*) pred); /* prediction 4 pixels */
k0 = tmp2 & 0xFF;
k1 = mask & (tmp << 1);
k0 = k1 - (k0 << 1);
k1 = (tmp2 >> 8) & 0xFF;
k2 = mask & (tmp >> 7);
k1 = k2 - (k1 << 1);
k2 = (tmp2 >> 16) & 0xFF;
k3 = mask & (tmp >> 15);
k2 = k3 - (k2 << 1);
k3 = (tmp2 >> 24) & 0xFF;
k4 = mask & (tmp >> 23);
k3 = k4 - (k3 << 1);
tmp = *((Int*)(cur + 4)); /* another 4 pixels */
tmp2 = *((Int*)(pred + 4));
k4 = tmp2 & 0xFF;
k5 = mask & (tmp << 1);
k4 = k5 - (k4 << 1);
k5 = (tmp2 >> 8) & 0xFF;
k6 = mask & (tmp >> 7);
k5 = k6 - (k5 << 1);
k6 = (tmp2 >> 16) & 0xFF;
k7 = mask & (tmp >> 15);
k6 = k7 - (k6 << 1);
k7 = (tmp2 >> 24) & 0xFF;
tmp = mask & (tmp >> 23);
k7 = tmp - (k7 << 1);
cur += width;
pred += 16;
/* fdct_1 */
k0 = k0 + k7;
k7 = k0 - (k7 << 1);
k1 = k1 + k6;
k6 = k1 - (k6 << 1);
k2 = k2 + k5;
k5 = k2 - (k5 << 1);
k3 = k3 + k4;
k4 = k3 - (k4 << 1);
k0 = k0 + k3;
k3 = k0 - (k3 << 1);
k1 = k1 + k2;
k2 = k1 - (k2 << 1);
k0 = k0 + k1;
/**********/
dst[0] = k0;
/* fdct_2 */
k4 = k4 + k5;
k5 = k5 + k6;
k6 = k6 + k7;
/* MUL2C k2,k5,724,FDCT_SHIFT */
/* k0, k1 become scratch */
/* assume FAST MULTIPLY */
k1 = mla724(k12, k5, round);
k5 = k1 >> FDCT_SHIFT;
/*****************/
/********/
/* fdct_3 */
/* ROTATE k4,k6,392,946, FDCT_SHIFT */
/* assume FAST MULTIPLY */
/* k0, k1 are output */
k1 = mla392(k4, k14, round);
k1 = mla946(k6, k14, k1);
k6 = k1 >> FDCT_SHIFT;
/***********************/
k5 = k5 + k7;
k5 = k5 + k6;
/********/
dst[1] = k5;
dst += 8;
}
while (dst < out);
out -= 64;
dst = out + 2;
/* Vertical Block Loop */
do /* Vertical 8xDCT loop */
{
k0 = out[0];
k1 = out[8];
k2 = out[16];
k3 = out[24];
k4 = out[32];
k5 = out[40];
k6 = out[48];
k7 = out[56];
abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
if (abs_sum < ColTh)
{
out[0] = 0x7fff;
out++;
continue;
}
/* fdct_1 */
k0 = k0 + k7;
k7 = k0 - (k7 << 1);
k1 = k1 + k6;
k6 = k1 - (k6 << 1);
k2 = k2 + k5;
k5 = k2 - (k5 << 1);
k3 = k3 + k4;
k4 = k3 - (k4 << 1);
k0 = k0 + k3;
k3 = k0 - (k3 << 1);
k1 = k1 + k2;
k2 = k1 - (k2 << 1);
k0 = k0 + k1;
/**********/
out[0] = k0; /* row 0 */
/* fdct_2 */
k4 = k4 + k5;
k5 = k5 + k6;
k6 = k6 + k7;
/* MUL2C k2,k5,724,FDCT_SHIFT */
/* k0, k1 become scratch */
/* assume FAST MULTIPLY */
k1 = mla724(k12, k5, round);
k5 = k1 >> FDCT_SHIFT;
/*****************/
/********/
/* fdct_3 */
/* ROTATE k4,k6,392,946, FDCT_SHIFT */
/* assume FAST MULTIPLY */
/* k0, k1 are output */
k1 = mla392(k4, k14, round);
k1 = mla946(k6, k14, k1);
k6 = k1 >> FDCT_SHIFT;
/***********************/
k5 = k5 + k7;
k5 = k5 + k6;
/********/
out[8] = k5 ; /* row 1 */
out++;
}
while ((uintptr_t)out < (uintptr_t)dst) ;
return ;
}
/**************************************************************************/
/* Function: BlockDCT_AANIntra
Date: 8/9/01
Input: rec
Output: out[64] ==> next block
Purpose: Input directly from rec frame.
Modified:
**************************************************************************/
Void BlockDCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width)
{
Short *dst;
Int k0, k1, k2, k3, k4, k5, k6, k7;
Int round;
Int k12 = 0x022A02D4;
Int k14 = 0x0188053A;
Int abs_sum;
Int mask;
Int *curInt, tmp;
Int ColTh;
OSCL_UNUSED_ARG(dummy2);
dst = out + 64 ;
ColTh = *dst;
out += 128;
round = 1 << (FDCT_SHIFT - 1);
do /* fdct_nextrow */
{
mask = 0x1FE;
curInt = (Int*) cur;
tmp = curInt[0]; /* contains 4 pixels */
k0 = mask & (tmp << 1);
k1 = mask & (tmp >> 7);
k2 = mask & (tmp >> 15);
k3 = mask & (tmp >> 23);
tmp = curInt[1]; /* another 4 pixels */
k4 = mask & (tmp << 1);
k5 = mask & (tmp >> 7);
k6 = mask & (tmp >> 15);
k7 = mask & (tmp >> 23);
cur += width;
/* fdct_1 */
k0 = k0 + k7;
k7 = k0 - (k7 << 1);
k1 = k1 + k6;
k6 = k1 - (k6 << 1);
k2 = k2 + k5;
k5 = k2 - (k5 << 1);
k3 = k3 + k4;
k4 = k3 - (k4 << 1);
k0 = k0 + k3;
k3 = k0 - (k3 << 1);
k1 = k1 + k2;
k2 = k1 - (k2 << 1);
k0 = k0 + k1;
k1 = k0 - (k1 << 1);
/**********/
dst[0] = k0;
dst[4] = k1; /* col. 4 */
/* fdct_2 */
k4 = k4 + k5;
k5 = k5 + k6;
k6 = k6 + k7;
k2 = k2 + k3;
/* MUL2C k2,k5,724,FDCT_SHIFT */
/* k0, k1 become scratch */
/* assume FAST MULTIPLY */
k1 = mla724(k12, k5, round);
k0 = mla724(k12, k2, round);
k5 = k1 >> FDCT_SHIFT;
k2 = k0 >> FDCT_SHIFT;
/*****************/
k2 = k2 + k3;
k3 = (k3 << 1) - k2;
/********/
dst[2] = k2; /* col. 2 */
k3 <<= 1; /* scale up col. 6 */
dst[6] = k3; /* col. 6 */
/* fdct_3 */
/* ROTATE k4,k6,392,946, FDCT_SHIFT */
/* assume FAST MULTIPLY */
/* k0, k1 are output */
k0 = k4 - k6;
k1 = mla392(k0, k14, round);
k0 = mla554(k4, k12, k1);
k1 = mla1338(k6, k14, k1);
k4 = k0 >> FDCT_SHIFT;
k6 = k1 >> FDCT_SHIFT;
/***********************/
k5 = k5 + k7;
k7 = (k7 << 1) - k5;
k4 = k4 + k7;
k7 = (k7 << 1) - k4;
k5 = k5 + k6;
k4 <<= 1; /* scale up col.5 */
k6 = k5 - (k6 << 1);
/********/
dst[5] = k4; /* col. 5 */
k6 <<= 2; /* scale up col. 7 */
dst[1] = k5; /* col. 1 */
dst[7] = k6; /* col. 7 */
dst[3] = k7; /* col. 3 */
dst += 8;
}
while (dst < out);
out -= 64;
dst = out + 8;
/* Vertical Block Loop */
do /* Vertical 8xDCT loop */
{
k0 = out[0];
k1 = out[8];
k2 = out[16];
k3 = out[24];
k4 = out[32];
k5 = out[40];
k6 = out[48];
k7 = out[56];
/* deadzone thresholding for column */
abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
if (abs_sum < ColTh)
{
out[0] = 0x7fff;
out++;
continue;
}
/* fdct_1 */
k0 = k0 + k7;
k7 = k0 - (k7 << 1);
k1 = k1 + k6;
k6 = k1 - (k6 << 1);
k2 = k2 + k5;
k5 = k2 - (k5 << 1);
k3 = k3 + k4;
k4 = k3 - (k4 << 1);
k0 = k0 + k3;
k3 = k0 - (k3 << 1);
k1 = k1 + k2;
k2 = k1 - (k2 << 1);
k0 = k0 + k1;
k1 = k0 - (k1 << 1);
/**********/
out[32] = k1; /* row 4 */
out[0] = k0; /* row 0 */
/* fdct_2 */
k4 = k4 + k5;
k5 = k5 + k6;
k6 = k6 + k7;
k2 = k2 + k3;
/* MUL2C k2,k5,724,FDCT_SHIFT */
/* k0, k1 become scratch */
/* assume FAST MULTIPLY */
k1 = mla724(k12, k5, round);
k0 = mla724(k12, k2, round);
k5 = k1 >> FDCT_SHIFT;
k2 = k0 >> FDCT_SHIFT;
/*****************/
k2 = k2 + k3;
k3 = (k3 << 1) - k2;
k3 <<= 1; /* scale up col. 6 */
/********/
out[48] = k3; /* row 6 */
out[16] = k2; /* row 2 */
/* fdct_3 */
/* ROTATE k4,k6,392,946, FDCT_SHIFT */
/* assume FAST MULTIPLY */
/* k0, k1 are output */
k0 = k4 - k6;
k1 = mla392(k0, k14, round);
k0 = mla554(k4, k12, k1);
k1 = mla1338(k6, k14, k1);
k4 = k0 >> FDCT_SHIFT;
k6 = k1 >> FDCT_SHIFT;
/***********************/
k5 = k5 + k7;
k7 = (k7 << 1) - k5;
k4 = k4 + k7;
k7 = (k7 << 1) - k4;
k5 = k5 + k6;
k4 <<= 1; /* scale up col. 5 */
k6 = k5 - (k6 << 1);
/********/
out[24] = k7 ; /* row 3 */
k6 <<= 2; /* scale up col. 7 */
out[56] = k6 ; /* row 7 */
out[8] = k5 ; /* row 1 */
out[40] = k4 ; /* row 5 */
out++;
}
while ((uintptr_t)out < (uintptr_t)dst) ;
return ;
}
/**************************************************************************/
/* Function: Block4x4DCT_AANIntra
Date: 8/9/01
Input: prev
Output: out[64] ==> next block
Purpose: Input directly from prev frame. output 2x2 DCT
Modified:
**************************************************************************/
Void Block4x4DCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width)
{
Short *dst;
Int k0, k1, k2, k3, k4, k5, k6, k7;
Int round;
Int k12 = 0x022A02D4;
Int k14 = 0x0188053A;
Int mask;
Int *curInt, tmp;
Int abs_sum;
Int ColTh;
OSCL_UNUSED_ARG(dummy2);
dst = out + 64 ;
ColTh = *dst;
out += 128;
round = 1 << (FDCT_SHIFT - 1);
do /* fdct_nextrow */
{
mask = 0x1FE;
curInt = (Int*) cur;
tmp = curInt[0]; /* contains 4 pixels */
k0 = mask & (tmp << 1);
k1 = mask & (tmp >> 7);
k2 = mask & (tmp >> 15);
k3 = mask & (tmp >> 23);
tmp = curInt[1]; /* another 4 pixels */
k4 = mask & (tmp << 1);
k5 = mask & (tmp >> 7);
k6 = mask & (tmp >> 15);
k7 = mask & (tmp >> 23);
cur += width;
/* fdct_1 */
k0 = k0 + k7;
k7 = k0 - (k7 << 1);
k1 = k1 + k6;
k6 = k1 - (k6 << 1);
k2 = k2 + k5;
k5 = k2 - (k5 << 1);
k3 = k3 + k4;
k4 = k3 - (k4 << 1);
k0 = k0 + k3;
k3 = k0 - (k3 << 1);
k1 = k1 + k2;
k2 = k1 - (k2 << 1);
k0 = k0 + k1;
/**********/
dst[0] = k0;
/* fdct_2 */
k4 = k4 + k5;
k5 = k5 + k6;
k6 = k6 + k7;
k2 = k2 + k3;
/* MUL2C k2,k5,724,FDCT_SHIFT */
/* k0, k1 become scratch */
/* assume FAST MULTIPLY */
k1 = mla724(k12, k5, round);
k0 = mla724(k12, k2, round);
k5 = k1 >> FDCT_SHIFT;
k2 = k0 >> FDCT_SHIFT;
/*****************/
k2 = k2 + k3;
/********/
dst[2] = k2; /* col. 2 */
/* fdct_3 */
/* ROTATE k4,k6,392,946, FDCT_SHIFT */
/* assume FAST MULTIPLY */
/* k0, k1 are output */
k0 = k4 - k6;
k1 = mla392(k0, k14, round);
k0 = mla554(k4, k12, k1);
k1 = mla1338(k6, k14, k1);
k4 = k0 >> FDCT_SHIFT;
k6 = k1 >> FDCT_SHIFT;
/***********************/
k5 = k5 + k7;
k7 = (k7 << 1) - k5;
k7 = k7 - k4;
k5 = k5 + k6;
/********/
dst[1] = k5; /* col. 1 */
dst[3] = k7; /* col. 3 */
dst += 8;
}
while (dst < out);
out -= 64;
dst = out + 4;
/* Vertical Block Loop */
do /* Vertical 8xDCT loop */
{
k0 = out[0];
k1 = out[8];
k2 = out[16];
k3 = out[24];
k4 = out[32];
k5 = out[40];
k6 = out[48];
k7 = out[56];
abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
if (abs_sum < ColTh)
{
out[0] = 0x7fff;
out++;
continue;
}
/* fdct_1 */
k0 = k0 + k7;
k7 = k0 - (k7 << 1);
k1 = k1 + k6;
k6 = k1 - (k6 << 1);
k2 = k2 + k5;
k5 = k2 - (k5 << 1);
k3 = k3 + k4;
k4 = k3 - (k4 << 1);
k0 = k0 + k3;
k3 = k0 - (k3 << 1);
k1 = k1 + k2;
k2 = k1 - (k2 << 1);
k0 = k0 + k1;
/**********/
out[0] = k0; /* row 0 */
/* fdct_2 */
k4 = k4 + k5;
k5 = k5 + k6;
k6 = k6 + k7;
k2 = k2 + k3;
/* MUL2C k2,k5,724,FDCT_SHIFT */
/* k0, k1 become scratch */
/* assume FAST MULTIPLY */
k1 = mla724(k12, k5, round);
k0 = mla724(k12, k2, round);
k5 = k1 >> FDCT_SHIFT;
k2 = k0 >> FDCT_SHIFT;
/*****************/
k2 = k2 + k3;
/********/
out[16] = k2; /* row 2 */
/* fdct_3 */
/* ROTATE k4,k6,392,946, FDCT_SHIFT */
/* assume FAST MULTIPLY */
/* k0, k1 are output */
k0 = k4 - k6;
k1 = mla392(k0, k14, round);
k0 = mla554(k4, k12, k1);
k1 = mla1338(k6, k14, k1);
k4 = k0 >> FDCT_SHIFT;
k6 = k1 >> FDCT_SHIFT;
/***********************/
k5 = k5 + k7;
k7 = (k7 << 1) - k5;
k7 = k7 - k4 ;
k5 = k5 + k6;
/********/
out[24] = k7 ; /* row 3 */
out[8] = k5 ; /* row 1 */
out++;
}
while ((uintptr_t)out < (uintptr_t)dst) ;
return ;
}
/**************************************************************************/
/* Function: Block2x2DCT_AANIntra
Date: 8/9/01
Input: prev
Output: out[64] ==> next block
Purpose: Input directly from prev frame. output 2x2 DCT
Modified:
**************************************************************************/
Void Block2x2DCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width)
{
Short *dst;
Int k0, k1, k2, k3, k4, k5, k6, k7;
Int round;
Int k12 = 0x022A02D4;
Int k14 = 0x018803B2;
Int mask;
Int *curInt, tmp;
Int abs_sum;
Int ColTh;
OSCL_UNUSED_ARG(dummy2);
dst = out + 64 ;
ColTh = *dst;
out += 128;
round = 1 << (FDCT_SHIFT - 1);
do /* fdct_nextrow */
{
mask = 0x1FE;
curInt = (Int*) cur;
tmp = curInt[0]; /* contains 4 pixels */
k0 = mask & (tmp << 1);
k1 = mask & (tmp >> 7);
k2 = mask & (tmp >> 15);
k3 = mask & (tmp >> 23);
tmp = curInt[1]; /* another 4 pixels */
k4 = mask & (tmp << 1);
k5 = mask & (tmp >> 7);
k6 = mask & (tmp >> 15);
k7 = mask & (tmp >> 23);
cur += width;
/* fdct_1 */
k0 = k0 + k7;
k7 = k0 - (k7 << 1);
k1 = k1 + k6;
k6 = k1 - (k6 << 1);
k2 = k2 + k5;
k5 = k2 - (k5 << 1);
k3 = k3 + k4;
k4 = k3 - (k4 << 1);
k0 = k0 + k3;
k3 = k0 - (k3 << 1);
k1 = k1 + k2;
k2 = k1 - (k2 << 1);
k0 = k0 + k1;
/**********/
dst[0] = k0;
/* fdct_2 */
k4 = k4 + k5;
k5 = k5 + k6;
k6 = k6 + k7;
/* MUL2C k2,k5,724,FDCT_SHIFT */
/* k0, k1 become scratch */
/* assume FAST MULTIPLY */
k1 = mla724(k12, k5, round);
k5 = k1 >> FDCT_SHIFT;
/*****************/
/********/
/* fdct_3 */
/* ROTATE k4,k6,392,946, FDCT_SHIFT */
/* assume FAST MULTIPLY */
/* k0, k1 are output */
k1 = mla392(k4, k14, round);
k1 = mla946(k6, k14, k1);
k6 = k1 >> FDCT_SHIFT;
/***********************/
k5 = k5 + k7;
k5 = k5 + k6;
/********/
dst[1] = k5;
dst += 8;
}
while (dst < out);
out -= 64;
dst = out + 2;
/* Vertical Block Loop */
do /* Vertical 8xDCT loop */
{
k0 = out[0];
k1 = out[8];
k2 = out[16];
k3 = out[24];
k4 = out[32];
k5 = out[40];
k6 = out[48];
k7 = out[56];
abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7);
if (abs_sum < ColTh)
{
out[0] = 0x7fff;
out++;
continue;
}
/* fdct_1 */
k0 = k0 + k7;
k7 = k0 - (k7 << 1);
k1 = k1 + k6;
k6 = k1 - (k6 << 1);
k2 = k2 + k5;
k5 = k2 - (k5 << 1);
k3 = k3 + k4;
k4 = k3 - (k4 << 1);
k0 = k0 + k3;
k3 = k0 - (k3 << 1);
k1 = k1 + k2;
k2 = k1 - (k2 << 1);
k0 = k0 + k1;
/**********/
out[0] = k0; /* row 0 */
/* fdct_2 */
k4 = k4 + k5;
k5 = k5 + k6;
k6 = k6 + k7;
/* MUL2C k2,k5,724,FDCT_SHIFT */
/* k0, k1 become scratch */
/* assume FAST MULTIPLY */
k1 = mla724(k12, k5, round);
k5 = k1 >> FDCT_SHIFT;
/*****************/
/********/
/* fdct_3 */
/* ROTATE k4,k6,392,946, FDCT_SHIFT */
/* assume FAST MULTIPLY */
/* k0, k1 are output */
k1 = mla392(k4, k14, round);
k1 = mla946(k6, k14, k1);
k6 = k1 >> FDCT_SHIFT;
/***********************/
k5 = k5 + k7;
k5 = k5 + k6;
/********/
out[8] = k5 ; /* row 1 */
out++;
}
while ((uintptr_t)out < (uintptr_t)dst) ;
return ;
}
/**************************************************************************/
/* Function: Block1x1DCTwSub
Date: 8/9/01
Input: block
Output: y
Purpose: Compute DC value only
Modified:
**************************************************************************/
void Block1x1DCTwSub(Short *out, UChar *cur, UChar *pred, Int width)
{
UChar *end;
Int temp = 0;
Int offset2;
offset2 = width - 8;
end = pred + (16 << 3);
do
{
temp += (*cur++ - *pred++);
temp += (*cur++ - *pred++);
temp += (*cur++ - *pred++);
temp += (*cur++ - *pred++);
temp += (*cur++ - *pred++);
temp += (*cur++ - *pred++);
temp += (*cur++ - *pred++);
temp += (*cur++ - *pred++);
cur += offset2;
pred += 8;
}
while (pred < end) ;
out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = 0;
out[0] = temp >> 3;
return ;
}
/**************************************************************************/
/* Function: Block1x1DCTIntra
Date: 8/9/01
Input: prev
Output: out
Purpose: Compute DC value only
Modified:
**************************************************************************/
void Block1x1DCTIntra(Short *out, UChar *cur, UChar *dummy2, Int width)
{
UChar *end;
Int temp = 0;
ULong word;
OSCL_UNUSED_ARG(dummy2);
end = cur + (width << 3);
do
{
word = *((ULong*)cur);
temp += (word >> 24);
temp += ((word >> 16) & 0xFF);
temp += ((word >> 8) & 0xFF);
temp += (word & 0xFF);
word = *((ULong*)(cur + 4));
temp += (word >> 24);
temp += ((word >> 16) & 0xFF);
temp += ((word >> 8) & 0xFF);
temp += (word & 0xFF);
cur += width;
}
while (cur < end) ;
out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = 0;
out[0] = temp >> 3;
return ;
}
#ifdef __cplusplus
}
#endif