| /* ------------------------------------------------------------------ |
| * Copyright (C) 1998-2009 PacketVideo |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either |
| * express or implied. |
| * See the License for the specific language governing permissions |
| * and limitations under the License. |
| * ------------------------------------------------------------------- |
| */ |
| /* |
| |
| Pathname: ./src/fft_rx4_short.c |
| Funtions: fft_rx4_short |
| |
| ------------------------------------------------------------------------------ |
| REVISION HISTORY |
| |
| Description: |
| (1) Eliminated search for max in the main loop. |
| (2) Simplified the function by eliminating different conditions |
| for exp. |
| (3) Reduced precision on w_64rx4 from Q15 to Q12, so now the |
| input can be as high as 1.0 and saturation will not occurre |
| because the accumulation times the new Q12 format will never |
| exceed 31 bits. |
| |
| Description: |
| (1) Added comment to explain max search elimination and |
| Q format during multiplications |
| (2) Increased down shift from 1 to 2, to ensure that 32-bit |
| numbers will not overflow when 2 consecutive adds are done |
| This was found during code review. |
| |
| Who: Date: |
| Description: |
| |
| ------------------------------------------------------------------------------ |
| INPUT AND OUTPUT DEFINITIONS |
| |
| Inputs: |
| Data = Input complex vector, arranged in the following order: |
| real, imag, real, imag... |
| This is a complex vector whose elements (real and Imag) are |
| Int32. |
| type Int32 * |
| |
| peak_value = Input, peak value of the input vector |
| Output, peak value of the resulting vector |
| type Int32 * |
| |
| Local Stores/Buffers/Pointers Needed: |
| None |
| |
| Global Stores/Buffers/Pointers Needed: |
| None |
| |
| Outputs: |
| exponent returns a shift to compensate the scaling introduced by |
| overflow protection |
| |
| Pointers and Buffers Modified: |
| calculation are done in-place and returned in Data |
| |
| Local Stores Modified: |
| None |
| |
| Global Stores Modified: |
| None |
| |
| ------------------------------------------------------------------------------ |
| FUNCTION DESCRIPTION |
| |
| Fast Fourier Transform, radix 4 with Decimation in Frequency and block |
| floating point arithmetic. |
| The radix-4 FFT simply divides the FFT into four smaller FFTs. Each of |
| the smaller FFTs is then further divided into smaller ones and so on. |
| It consists of log 4 N stages and each stage consists of N/4 dragonflies. |
| |
| An FFT is nothing but a bundle of multiplications and summations which |
| may overflow during calculations. |
| |
| |
| This routine uses a scheme to test and scale the result output from |
| each FFT stage in order to fix the accumulation overflow. |
| |
| The Input Data should be in Q13 format to get the highest precision. |
| At the end of each dragonfly calculation, a test for possible bit growth |
| is made, if bit growth is possible the Data is scale down back to Q13. |
| |
| |
| ------------------------------------------------------------------------------ |
| REQUIREMENTS |
| |
| This function should provide a fixed point FFT for an input array |
| of size 64. |
| |
| ------------------------------------------------------------------------------ |
| REFERENCES |
| |
| [1] Advance Digital Signal Processing, J. Proakis, C. Rader, F. Ling, |
| C. Nikias, Macmillan Pub. Co. |
| |
| ------------------------------------------------------------------------------ |
| PSEUDO-CODE |
| |
| |
| MODIFY( x[] ) |
| RETURN( exponent ) |
| |
| ------------------------------------------------------------------------------ |
| RESOURCES USED |
| When the code is written for a specific target processor the |
| the resources used should be documented below. |
| |
| STACK USAGE: [stack count for this module] + [variable to represent |
| stack usage for each subroutine called] |
| |
| where: [stack usage variable] = stack usage for [subroutine |
| name] (see [filename].ext) |
| |
| DATA MEMORY USED: x words |
| |
| PROGRAM MEMORY USED: x words |
| |
| CLOCK CYCLES: [cycle count equation for this module] + [variable |
| used to represent cycle count for each subroutine |
| called] |
| |
| where: [cycle count variable] = cycle count for [subroutine |
| name] (see [filename].ext) |
| |
| ------------------------------------------------------------------------------ |
| */ |
| /*---------------------------------------------------------------------------- |
| ; INCLUDES |
| ----------------------------------------------------------------------------*/ |
| |
| #include "pv_audio_type_defs.h" |
| #include "fft_rx4.h" |
| #include "pv_normalize.h" |
| #include "fxp_mul32.h" |
| |
| /*---------------------------------------------------------------------------- |
| ; MACROS |
| ; Define module specific macros here |
| ----------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------------- |
| ; DEFINES |
| ; Include all pre-processor statements here. Include conditional |
| ; compile variables also. |
| ----------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------------- |
| ; LOCAL FUNCTION DEFINITIONS |
| ; Function Prototype declaration |
| ----------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------------- |
| ; LOCAL VARIABLE DEFINITIONS |
| ; Variable declaration - defined here and used outside this module |
| ----------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------------- |
| ; EXTERNAL FUNCTION REFERENCES |
| ; Declare functions defined elsewhere and referenced in this module |
| ----------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------------- |
| ; EXTERNAL VARIABLES REFERENCES |
| ; Declare variables used in this module but defined elsewhere |
| ----------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------------- |
| ; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES |
| ; Declare variables used in this module but defined elsewhere |
| ----------------------------------------------------------------------------*/ |
| |
| /*---------------------------------------------------------------------------- |
| ; FUNCTION CODE |
| ----------------------------------------------------------------------------*/ |
| |
| Int fft_rx4_short( |
| Int32 Data[], |
| Int32 *peak_value) |
| |
| { |
| Int n1; |
| Int n2; |
| Int n3; |
| Int j; |
| Int k; |
| Int i; |
| Int32 exp_jw1; |
| Int32 exp_jw2; |
| Int32 exp_jw3; |
| |
| |
| Int32 t1; |
| Int32 t2; |
| Int32 r1; |
| Int32 r2; |
| Int32 r3; |
| Int32 s1; |
| Int32 s2; |
| Int32 s3; |
| |
| Int32 *pData1; |
| Int32 *pData2; |
| Int32 *pData3; |
| Int32 *pData4; |
| const Int32 *pw; |
| Int32 temp1; |
| Int32 temp2; |
| Int32 temp3; |
| Int32 temp4; |
| Int32 max; |
| Int exp; |
| Int exponent = 0; |
| Int shift; |
| |
| |
| max = *peak_value; |
| exp = 0; |
| |
| if (max > 0x008000) |
| { |
| exp = 8 - pv_normalize(max); /* use 24 bits */ |
| |
| exponent = exp; /* keeps track of # of shifts */ |
| |
| } |
| |
| n2 = FFT_RX4_SHORT; |
| |
| pw = W_64rx4; |
| |
| |
| /* shift down to avoid possible overflow in first pass of the loop */ |
| shift = 2; |
| |
| for (k = FFT_RX4_SHORT; k > 4; k >>= 2) |
| { |
| |
| n1 = n2; |
| n2 >>= 2; |
| n3 = n1 >> 1; |
| |
| exp -= 2; |
| |
| for (i = 0; i < FFT_RX4_SHORT; i += n1) |
| { |
| pData1 = &Data[ i<<1]; |
| pData3 = pData1 + n3; |
| pData2 = pData1 + n1; |
| pData4 = pData3 + n1; |
| |
| temp1 = *(pData1); |
| temp2 = *(pData2); |
| temp1 >>= shift; |
| temp2 >>= shift; |
| |
| r1 = temp1 + temp2; |
| r2 = temp1 - temp2; |
| |
| temp3 = *(pData3++); |
| temp4 = *(pData4++); |
| temp3 >>= shift; |
| temp4 >>= shift; |
| |
| t1 = temp3 + temp4; |
| t2 = temp3 - temp4; |
| |
| *(pData1++) = (r1 + t1) >> exp; |
| *(pData2++) = (r1 - t1) >> exp; |
| |
| temp1 = *pData1; |
| temp2 = *pData2; |
| temp1 >>= shift; |
| temp2 >>= shift; |
| |
| s1 = temp1 + temp2; |
| s2 = temp1 - temp2; |
| |
| temp3 = *pData3; |
| temp4 = *pData4; |
| temp3 >>= shift; |
| temp4 >>= shift; |
| |
| t1 = temp3 + temp4; |
| r1 = temp3 - temp4; |
| |
| *pData1 = (s1 + t1) >> exp; |
| *pData2 = (s1 - t1) >> exp; |
| |
| *pData4-- = (s2 + t2) >> exp; |
| *pData4 = (r2 - r1) >> exp; |
| |
| *pData3-- = (s2 - t2) >> exp; |
| *pData3 = (r2 + r1) >> exp; |
| |
| |
| } /* i */ |
| |
| for (j = 1; j < n2; j++) |
| { |
| exp_jw1 = *pw++; |
| exp_jw2 = *pw++; |
| exp_jw3 = *pw++; |
| |
| |
| for (i = j; i < FFT_RX4_SHORT; i += n1) |
| { |
| pData1 = &Data[ i<<1]; |
| pData3 = pData1 + n3; |
| pData2 = pData1 + n1; |
| pData4 = pData3 + n1; |
| |
| temp1 = *(pData1); |
| temp2 = *(pData2++); |
| temp1 >>= shift; |
| temp2 >>= shift; |
| |
| r1 = temp1 + temp2; |
| r2 = temp1 - temp2; |
| temp3 = *(pData3++); |
| temp4 = *(pData4++); |
| temp3 >>= shift; |
| temp4 >>= shift; |
| |
| t1 = temp3 + temp4; |
| t2 = temp3 - temp4; |
| |
| *(pData1++) = (r1 + t1) >> exp; |
| r1 = (r1 - t1) >> exp; |
| |
| temp1 = *pData1; |
| temp2 = *pData2; |
| temp1 >>= shift; |
| temp2 >>= shift; |
| |
| s1 = temp1 + temp2; |
| s2 = temp1 - temp2; |
| |
| s3 = (s2 + t2) >> exp; |
| s2 = (s2 - t2) >> exp; |
| |
| temp3 = *pData3; |
| temp4 = *pData4 ; |
| temp3 >>= shift; |
| temp4 >>= shift; |
| |
| t1 = temp3 + temp4; |
| t2 = temp3 - temp4; |
| |
| *pData1 = (s1 + t1) >> exp; |
| s1 = (s1 - t1) >> exp; |
| |
| |
| *pData2-- = cmplx_mul32_by_16(s1, -r1, exp_jw2) << 1; |
| *pData2 = cmplx_mul32_by_16(r1, s1, exp_jw2) << 1; |
| |
| r3 = ((r2 - t2) >> exp); |
| r2 = ((r2 + t2) >> exp); |
| |
| *pData3-- = cmplx_mul32_by_16(s2, -r2, exp_jw1) << 1; |
| *pData3 = cmplx_mul32_by_16(r2, s2, exp_jw1) << 1; |
| |
| *pData4-- = cmplx_mul32_by_16(s3, -r3, exp_jw3) << 1; |
| *pData4 = cmplx_mul32_by_16(r3, s3, exp_jw3) << 1; |
| |
| } /* i */ |
| |
| } /* j */ |
| |
| /* |
| * this will reset exp and shift to zero for the second pass of the |
| * loop |
| */ |
| exp = 2; |
| shift = 0; |
| |
| } /* k */ |
| |
| |
| max = 0; |
| |
| pData1 = Data - 7; |
| |
| for (i = ONE_FOURTH_FFT_RX4_SHORT; i != 0 ; i--) |
| { |
| pData1 += 7; |
| |
| pData3 = pData1 + 2; |
| pData2 = pData1 + 4; |
| pData4 = pData1 + 6; |
| |
| temp1 = *pData1; |
| temp2 = *pData2++; |
| |
| r1 = temp1 + temp2; |
| r2 = temp1 - temp2; |
| |
| temp1 = *pData3++; |
| temp2 = *pData4++; |
| |
| t1 = temp1 + temp2; |
| t2 = temp1 - temp2; |
| |
| temp1 = (r1 + t1); |
| r1 = (r1 - t1); |
| *(pData1++) = temp1; |
| max |= (temp1 >> 31) ^ temp1; |
| |
| |
| |
| temp1 = *pData1; |
| temp2 = *pData2; |
| |
| s1 = temp1 + temp2; |
| s2 = temp1 - temp2; |
| |
| s3 = (s2 + t2); |
| s2 = (s2 - t2); |
| |
| temp1 = *pData3; |
| temp2 = *pData4; |
| |
| t1 = temp1 + temp2; |
| t2 = temp1 - temp2; |
| |
| temp1 = (s1 + t1); |
| temp2 = (s1 - t1); |
| *pData1 = temp1; |
| *pData2-- = temp2; |
| max |= (temp1 >> 31) ^ temp1; |
| max |= (temp2 >> 31) ^ temp2; |
| |
| *pData2 = r1; |
| *pData3-- = s2; |
| *pData4-- = s3; |
| max |= (r1 >> 31) ^ r1; |
| max |= (s2 >> 31) ^ s2; |
| max |= (s3 >> 31) ^ s3; |
| |
| temp1 = (r2 - t2); |
| temp2 = (r2 + t2); |
| *pData4 = temp1; |
| *pData3 = temp2; |
| max |= (temp1 >> 31) ^ temp1; |
| max |= (temp2 >> 31) ^ temp2; |
| |
| } /* i */ |
| |
| *peak_value = max; |
| |
| |
| return (exponent); |
| |
| } |