| /****************************************************************************** |
| * |
| * Copyright 2014 The Android Open Source Project |
| * Copyright 2003 - 2004 Open Interface North America, Inc. All rights |
| * reserved. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| ******************************************************************************/ |
| |
| /******************************************************************************* |
| $Revision: #1 $ |
| ******************************************************************************/ |
| |
| /** @file |
| @ingroup codec_internal |
| */ |
| |
| /**@addgroup codec_internal*/ |
| /**@{*/ |
| |
| /* |
| * Performs an 8-point Type-II scaled DCT using the Arai-Agui-Nakajima |
| * factorization. The scaling factors are folded into the windowing |
| * constants. 29 adds and 5 16x32 multiplies per 8 samples. |
| */ |
| |
| #include "oi_codec_sbc_private.h" |
| |
| #define AAN_C4_FIX (759250125) /* S1.30 759250125 0.707107*/ |
| |
| #define AAN_C6_FIX (410903207) /* S1.30 410903207 0.382683*/ |
| |
| #define AAN_Q0_FIX (581104888) /* S1.30 581104888 0.541196*/ |
| |
| #define AAN_Q1_FIX (1402911301) /* S1.30 1402911301 1.306563*/ |
| |
| /** Scales x by y bits to the right, adding a rounding factor. |
| */ |
| #ifndef SCALE |
| #define SCALE(x, y) (((x) + (1 << ((y)-1))) >> (y)) |
| #endif |
| |
| /** |
| * Default C language implementation of a 32x32->32 multiply. This function may |
| * be replaced by a platform-specific version for speed. |
| * |
| * @param u A signed 32-bit multiplicand |
| * @param v A signed 32-bit multiplier |
| |
| * @return A signed 32-bit value corresponding to the 32 most significant bits |
| * of the 64-bit product of u and v. |
| */ |
| INLINE int32_t default_mul_32s_32s_hi(int32_t u, int32_t v) { |
| uint32_t u0, v0; |
| int32_t u1, v1, w1, w2, t; |
| |
| u0 = u & 0xFFFF; |
| u1 = u >> 16; |
| v0 = v & 0xFFFF; |
| v1 = v >> 16; |
| t = u0 * v0; |
| t = u1 * v0 + ((uint32_t)t >> 16); |
| w1 = t & 0xFFFF; |
| w2 = t >> 16; |
| w1 = u0 * v1 + w1; |
| return u1 * v1 + w2 + (w1 >> 16); |
| } |
| |
| #define MUL_32S_32S_HI(_x, _y) default_mul_32s_32s_hi(_x, _y) |
| |
| #ifdef DEBUG_DCT |
| PRIVATE void float_dct2_8(float* RESTRICT out, int32_t const* RESTRICT in) { |
| #define FIX(x, bits) \ |
| (((int)floor(0.5f + ((x) * ((float)(1 << bits))))) / ((float)(1 << bits))) |
| #define FLOAT_BUTTERFLY(x, y) \ |
| x += y; \ |
| y = x - (y * 2); \ |
| OI_ASSERT(VALID_INT32(x)); \ |
| OI_ASSERT(VALID_INT32(y)); |
| #define FLOAT_MULT_DCT(K, sample) (FIX(K, 20) * sample) |
| #define FLOAT_SCALE(x, y) (((x) / (double)(1 << (y)))) |
| |
| double L00, L01, L02, L03, L04, L05, L06, L07; |
| double L25; |
| |
| double in0, in1, in2, in3; |
| double in4, in5, in6, in7; |
| |
| in0 = FLOAT_SCALE(in[0], DCTII_8_SHIFT_IN); |
| OI_ASSERT(VALID_INT32(in0)); |
| in1 = FLOAT_SCALE(in[1], DCTII_8_SHIFT_IN); |
| OI_ASSERT(VALID_INT32(in1)); |
| in2 = FLOAT_SCALE(in[2], DCTII_8_SHIFT_IN); |
| OI_ASSERT(VALID_INT32(in2)); |
| in3 = FLOAT_SCALE(in[3], DCTII_8_SHIFT_IN); |
| OI_ASSERT(VALID_INT32(in3)); |
| in4 = FLOAT_SCALE(in[4], DCTII_8_SHIFT_IN); |
| OI_ASSERT(VALID_INT32(in4)); |
| in5 = FLOAT_SCALE(in[5], DCTII_8_SHIFT_IN); |
| OI_ASSERT(VALID_INT32(in5)); |
| in6 = FLOAT_SCALE(in[6], DCTII_8_SHIFT_IN); |
| OI_ASSERT(VALID_INT32(in6)); |
| in7 = FLOAT_SCALE(in[7], DCTII_8_SHIFT_IN); |
| OI_ASSERT(VALID_INT32(in7)); |
| |
| L00 = (in0 + in7); |
| OI_ASSERT(VALID_INT32(L00)); |
| L01 = (in1 + in6); |
| OI_ASSERT(VALID_INT32(L01)); |
| L02 = (in2 + in5); |
| OI_ASSERT(VALID_INT32(L02)); |
| L03 = (in3 + in4); |
| OI_ASSERT(VALID_INT32(L03)); |
| |
| L04 = (in3 - in4); |
| OI_ASSERT(VALID_INT32(L04)); |
| L05 = (in2 - in5); |
| OI_ASSERT(VALID_INT32(L05)); |
| L06 = (in1 - in6); |
| OI_ASSERT(VALID_INT32(L06)); |
| L07 = (in0 - in7); |
| OI_ASSERT(VALID_INT32(L07)); |
| |
| FLOAT_BUTTERFLY(L00, L03); |
| FLOAT_BUTTERFLY(L01, L02); |
| |
| L02 += L03; |
| OI_ASSERT(VALID_INT32(L02)); |
| |
| L02 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L02); |
| OI_ASSERT(VALID_INT32(L02)); |
| |
| FLOAT_BUTTERFLY(L00, L01); |
| |
| out[0] = (float)FLOAT_SCALE(L00, DCTII_8_SHIFT_0); |
| OI_ASSERT(VALID_INT16(out[0])); |
| out[4] = (float)FLOAT_SCALE(L01, DCTII_8_SHIFT_4); |
| OI_ASSERT(VALID_INT16(out[4])); |
| |
| FLOAT_BUTTERFLY(L03, L02); |
| out[6] = (float)FLOAT_SCALE(L02, DCTII_8_SHIFT_6); |
| OI_ASSERT(VALID_INT16(out[6])); |
| out[2] = (float)FLOAT_SCALE(L03, DCTII_8_SHIFT_2); |
| OI_ASSERT(VALID_INT16(out[2])); |
| |
| L04 += L05; |
| OI_ASSERT(VALID_INT32(L04)); |
| L05 += L06; |
| OI_ASSERT(VALID_INT32(L05)); |
| L06 += L07; |
| OI_ASSERT(VALID_INT32(L06)); |
| |
| L04 /= 2; |
| L05 /= 2; |
| L06 /= 2; |
| L07 /= 2; |
| |
| L05 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L05); |
| OI_ASSERT(VALID_INT32(L05)); |
| |
| L25 = L06 - L04; |
| OI_ASSERT(VALID_INT32(L25)); |
| L25 = FLOAT_MULT_DCT(AAN_C6_FLOAT, L25); |
| OI_ASSERT(VALID_INT32(L25)); |
| |
| L04 = FLOAT_MULT_DCT(AAN_Q0_FLOAT, L04); |
| OI_ASSERT(VALID_INT32(L04)); |
| L04 -= L25; |
| OI_ASSERT(VALID_INT32(L04)); |
| |
| L06 = FLOAT_MULT_DCT(AAN_Q1_FLOAT, L06); |
| OI_ASSERT(VALID_INT32(L06)); |
| L06 -= L25; |
| OI_ASSERT(VALID_INT32(L25)); |
| |
| FLOAT_BUTTERFLY(L07, L05); |
| |
| FLOAT_BUTTERFLY(L05, L04); |
| out[3] = (float)(FLOAT_SCALE(L04, DCTII_8_SHIFT_3 - 1)); |
| OI_ASSERT(VALID_INT16(out[3])); |
| out[5] = (float)(FLOAT_SCALE(L05, DCTII_8_SHIFT_5 - 1)); |
| OI_ASSERT(VALID_INT16(out[5])); |
| |
| FLOAT_BUTTERFLY(L07, L06); |
| out[7] = (float)(FLOAT_SCALE(L06, DCTII_8_SHIFT_7 - 1)); |
| OI_ASSERT(VALID_INT16(out[7])); |
| out[1] = (float)(FLOAT_SCALE(L07, DCTII_8_SHIFT_1 - 1)); |
| OI_ASSERT(VALID_INT16(out[1])); |
| } |
| #undef BUTTERFLY |
| #endif |
| |
| /* |
| * This function calculates the AAN DCT. Its inputs are in S16.15 format, as |
| * returned by OI_SBC_Dequant. In practice, abs(in[x]) < 52429.0 / 1.38 |
| * (1244918057 integer). The function it computes is an approximation to the |
| * array defined by: |
| * |
| * diag(aan_s) * AAN= C2 |
| * |
| * or |
| * |
| * AAN = diag(1/aan_s) * C2 |
| * |
| * where C2 is as it is defined in the comment at the head of this file, and |
| * |
| * aan_s[i] = aan_s = 1/(2*cos(i*pi/16)) with i = 1..7, aan_s[0] = 1; |
| * |
| * aan_s[i] = [ 1.000 0.510 0.541 0.601 0.707 0.900 1.307 2.563 ] |
| * |
| * The output ranges are shown as follows: |
| * |
| * Let Y[0..7] = AAN * X[0..7] |
| * |
| * Without loss of generality, assume the input vector X consists of elements |
| * between -1 and 1. The maximum possible value of a given output element occurs |
| * with some particular combination of input vector elements each of which is -1 |
| * or 1. Consider the computation of Y[i]. Y[i] = sum t=0..7 of AAN[t,i]*X[i]. Y |
| * is maximized if the sign of X[i] matches the sign of AAN[t,i], ensuring a |
| * positive contribution to the sum. Equivalently, one may simply sum |
| * abs(AAN)[t,i] over t to get the maximum possible value of Y[i]. |
| * |
| * This yields approximately: |
| * [8.00 10.05 9.66 8.52 8.00 5.70 4.00 2.00] |
| * |
| * Given the maximum magnitude sensible input value of +/-37992, this yields the |
| * following vector of maximum output magnitudes: |
| * |
| * [ 303936 381820 367003 323692 303936 216555 151968 75984 ] |
| * |
| * Ultimately, these values must fit into 16 bit signed integers, so they must |
| * be scaled. A non-uniform scaling helps maximize the kept precision. The |
| * relative number of extra bits of precision maintainable with respect to the |
| * largest value is given here: |
| * |
| * [ 0 0 0 0 0 0 1 2 ] |
| * |
| */ |
| PRIVATE void dct2_8(SBC_BUFFER_T* RESTRICT out, int32_t const* RESTRICT in) { |
| #define BUTTERFLY(x, y) \ |
| x += (y); \ |
| (y) = (x) - ((y) << 1); |
| #define FIX_MULT_DCT(K, x) (MUL_32S_32S_HI(K, x) << 2) |
| |
| int32_t L00, L01, L02, L03, L04, L05, L06, L07; |
| int32_t L25; |
| |
| int32_t in0, in1, in2, in3; |
| int32_t in4, in5, in6, in7; |
| |
| #if DCTII_8_SHIFT_IN != 0 |
| in0 = SCALE(in[0], DCTII_8_SHIFT_IN); |
| in1 = SCALE(in[1], DCTII_8_SHIFT_IN); |
| in2 = SCALE(in[2], DCTII_8_SHIFT_IN); |
| in3 = SCALE(in[3], DCTII_8_SHIFT_IN); |
| in4 = SCALE(in[4], DCTII_8_SHIFT_IN); |
| in5 = SCALE(in[5], DCTII_8_SHIFT_IN); |
| in6 = SCALE(in[6], DCTII_8_SHIFT_IN); |
| in7 = SCALE(in[7], DCTII_8_SHIFT_IN); |
| #else |
| in0 = in[0]; |
| in1 = in[1]; |
| in2 = in[2]; |
| in3 = in[3]; |
| in4 = in[4]; |
| in5 = in[5]; |
| in6 = in[6]; |
| in7 = in[7]; |
| #endif |
| |
| L00 = in0 + in7; |
| L01 = in1 + in6; |
| L02 = in2 + in5; |
| L03 = in3 + in4; |
| |
| L04 = in3 - in4; |
| L05 = in2 - in5; |
| L06 = in1 - in6; |
| L07 = in0 - in7; |
| |
| BUTTERFLY(L00, L03); |
| BUTTERFLY(L01, L02); |
| |
| L02 += L03; |
| |
| L02 = FIX_MULT_DCT(AAN_C4_FIX, L02); |
| |
| BUTTERFLY(L00, L01); |
| |
| out[0] = (int16_t)SCALE(L00, DCTII_8_SHIFT_0); |
| out[4] = (int16_t)SCALE(L01, DCTII_8_SHIFT_4); |
| |
| BUTTERFLY(L03, L02); |
| out[6] = (int16_t)SCALE(L02, DCTII_8_SHIFT_6); |
| out[2] = (int16_t)SCALE(L03, DCTII_8_SHIFT_2); |
| |
| L04 += L05; |
| L05 += L06; |
| L06 += L07; |
| |
| L04 /= 2; |
| L05 /= 2; |
| L06 /= 2; |
| L07 /= 2; |
| |
| L05 = FIX_MULT_DCT(AAN_C4_FIX, L05); |
| |
| L25 = L06 - L04; |
| L25 = FIX_MULT_DCT(AAN_C6_FIX, L25); |
| |
| L04 = FIX_MULT_DCT(AAN_Q0_FIX, L04); |
| L04 -= L25; |
| |
| L06 = FIX_MULT_DCT(AAN_Q1_FIX, L06); |
| L06 -= L25; |
| |
| BUTTERFLY(L07, L05); |
| |
| BUTTERFLY(L05, L04); |
| out[3] = (int16_t)SCALE(L04, DCTII_8_SHIFT_3 - 1); |
| out[5] = (int16_t)SCALE(L05, DCTII_8_SHIFT_5 - 1); |
| |
| BUTTERFLY(L07, L06); |
| out[7] = (int16_t)SCALE(L06, DCTII_8_SHIFT_7 - 1); |
| out[1] = (int16_t)SCALE(L07, DCTII_8_SHIFT_1 - 1); |
| #undef BUTTERFLY |
| |
| #ifdef DEBUG_DCT |
| { |
| float float_out[8]; |
| float_dct2_8(float_out, in); |
| } |
| #endif |
| } |
| |
| /**@}*/ |