| ;// |
| ;// |
| ;// File Name: omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s |
| ;// OpenMAX DL: v1.0.2 |
| ;// Revision: 12290 |
| ;// Date: Wednesday, April 9, 2008 |
| ;// |
| ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. |
| ;// |
| ;// |
| ;// |
| |
| INCLUDE omxtypes_s.h |
| INCLUDE armCOMM_s.h |
| |
| M_VARIANTS CortexA8 |
| |
| IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe |
| IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe |
| |
| IF CortexA8 |
| |
| LOOP_COUNT EQU 0x55000000 |
| |
| |
| ;// Function arguments |
| |
| pSrcDst RN 0 |
| srcdstStep RN 1 |
| pAlpha RN 2 |
| pBeta RN 3 |
| |
| pThresholds RN 5 |
| pBS RN 4 |
| bS10 RN 12 |
| |
| pAlpha_0 RN 2 |
| pBeta_0 RN 3 |
| |
| pAlpha_1 RN 7 |
| pBeta_1 RN 8 |
| |
| |
| |
| ;// Loop |
| |
| XY RN 9 |
| |
| pTmp RN 6 |
| step RN 10 |
| |
| ;// Pixels |
| dP_0 DN D4.U8 |
| dP_1 DN D5.U8 |
| dP_2 DN D6.U8 |
| dP_3 DN D7.U8 |
| dQ_0 DN D8.U8 |
| dQ_1 DN D9.U8 |
| dQ_2 DN D10.U8 |
| dQ_3 DN D11.U8 |
| |
| |
| ;// Filtering Decision |
| dAlpha DN D0.U8 |
| dBeta DN D2.U8 |
| |
| dFilt DN D16.U8 |
| dAqflg DN D12.U8 |
| dApflg DN D17.U8 |
| |
| dAp0q0 DN D13.U8 |
| dAp1p0 DN D12.U8 |
| dAq1q0 DN D18.U8 |
| dAp2p0 DN D19.U8 |
| dAq2q0 DN D17.U8 |
| |
| ;// bSLT4 |
| dTC0 DN D18.U8 |
| dTC1 DN D19.U8 |
| dTC01 DN D18.U8 |
| |
| dTCs DN D31.S8 |
| dTC DN D31.U8 |
| |
| dMask_0 DN D14.U8 |
| dMask_1 DN D15.U8 |
| |
| Mask_0 RN 11 |
| |
| dTemp DN D19.U8 |
| |
| ;// Computing P0,Q0 |
| qDq0p0 QN Q10.S16 |
| qDp1q1 QN Q11.S16 |
| qDelta QN Q10.S16 ; reuse qDq0p0 |
| dDelta DN D20.S8 |
| |
| |
| ;// Computing P1,Q1 |
| dRp0q0 DN D24.U8 |
| |
| dMaxP DN D23.U8 |
| dMinP DN D22.U8 |
| |
| dMaxQ DN D19.U8 |
| dMinQ DN D21.U8 |
| |
| dDeltaP DN D26.U8 |
| dDeltaQ DN D27.U8 |
| |
| qP_0n QN Q14.S16 |
| qQ_0n QN Q12.S16 |
| |
| dQ_0n DN D24.U8 |
| dQ_1n DN D25.U8 |
| dP_0n DN D29.U8 |
| dP_1n DN D30.U8 |
| |
| ;// bSGE4 |
| |
| qSp0q0 QN Q10.U16 |
| |
| qSp2q1 QN Q11.U16 |
| qSp0q0p1 QN Q12.U16 |
| qSp3p2 QN Q13.U16 |
| dHSp0q1 DN D28.U8 |
| |
| qSq2p1 QN Q11.U16 |
| qSp0q0q1 QN Q12.U16 |
| qSq3q2 QN Q13.U16 ;!! |
| dHSq0p1 DN D28.U8 ;!! |
| |
| qTemp1 QN Q11.U16 ;!!;qSp2q1 |
| qTemp2 QN Q12.U16 ;!!;qSp0q0p1 |
| |
| dP_0t DN D28.U8 ;!!;dHSp0q1 |
| dQ_0t DN D22.U8 ;!!;Temp1 |
| |
| dP_0n DN D29.U8 |
| dP_1n DN D30.U8 |
| dP_2n DN D31.U8 |
| |
| dQ_0n DN D24.U8 ;!!;Temp2 |
| dQ_1n DN D25.U8 ;!!;Temp2 |
| dQ_2n DN D28.U8 ;!!;dQ_0t |
| |
| |
| ;// Function header |
| M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11, d15 |
| |
| ;//Arguments on the stack |
| M_ARG ppThresholds, 4 |
| M_ARG ppBS, 4 |
| |
| ;// d0-dAlpha_0 |
| ;// d2-dBeta_0 |
| |
| ADD pAlpha_1, pAlpha_0, #1 |
| ADD pBeta_1, pBeta_0, #1 |
| |
| VLD1 {dAlpha[]}, [pAlpha_0] |
| SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 |
| VLD1 {dBeta[]}, [pBeta_0] |
| |
| M_LDR pBS, ppBS |
| M_LDR pThresholds, ppThresholds |
| |
| MOV Mask_0,#0 |
| |
| ;dMask_0-14 |
| ;dMask_1-15 |
| |
| VMOV dMask_0, #0 |
| VMOV dMask_1, #1 |
| |
| ADD step, srcdstStep, srcdstStep |
| |
| LDR XY,=LOOP_COUNT |
| |
| ;// p0-p3 - d4-d7 |
| ;// q0-q3 - d8-d11 |
| LoopY |
| LoopX |
| LDRH bS10, [pBS], #2 |
| ADD pTmp, pSrcDst, srcdstStep |
| CMP bS10, #0 |
| BEQ NoFilterBS0 |
| |
| VLD1 dP_3, [pSrcDst], step |
| VLD1 dP_2, [pTmp], step |
| VLD1 dP_1, [pSrcDst], step |
| VLD1 dP_0, [pTmp], step |
| VLD1 dQ_0, [pSrcDst], step |
| VABD dAp1p0, dP_0, dP_1 |
| VLD1 dQ_1, [pTmp] |
| VABD dAp0q0, dQ_0, dP_0 |
| VLD1 dQ_2, [pSrcDst], srcdstStep |
| |
| VABD dAq1q0, dQ_1, dQ_0 |
| VABD dAp2p0, dP_2, dP_0 |
| VCGT dFilt, dAlpha, dAp0q0 |
| |
| TST bS10, #0xff |
| VMAX dAp1p0, dAq1q0, dAp1p0 |
| VABD dAq2q0, dQ_2, dQ_0 |
| |
| VMOVEQ.U32 dFilt[0], Mask_0 |
| TST bS10, #0xff00 |
| |
| VCGT dAp2p0, dBeta, dAp2p0 |
| VCGT dAp1p0, dBeta, dAp1p0 |
| |
| VMOVEQ.U32 dFilt[1], Mask_0 |
| |
| VCGT dAq2q0, dBeta, dAq2q0 |
| VLD1 dQ_3, [pSrcDst] |
| VAND dFilt, dFilt, dAp1p0 |
| TST bS10, #4 |
| |
| VAND dAqflg, dFilt, dAq2q0 |
| VAND dApflg, dFilt, dAp2p0 |
| |
| BNE bSGE4 |
| bSLT4 |
| ;// bS < 4 Filtering |
| SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 |
| SUB pSrcDst, pSrcDst, srcdstStep |
| |
| BL armVCM4P10_DeblockingLumabSLT4_unsafe |
| |
| ;// Result Storage |
| VST1 dP_1n, [pSrcDst], srcdstStep |
| VST1 dP_0n, [pSrcDst], srcdstStep |
| SUB pTmp, pSrcDst, srcdstStep, LSL #2 |
| VST1 dQ_0n, [pSrcDst], srcdstStep |
| ADDS XY, XY, XY |
| VST1 dQ_1n, [pSrcDst] |
| ADD pSrcDst, pTmp, #8 |
| |
| BCC LoopX |
| B ExitLoopY |
| |
| NoFilterBS0 |
| ADD pSrcDst, pSrcDst, #8 |
| ADDS XY, XY, XY |
| ADD pThresholds, pThresholds, #2 |
| BCC LoopX |
| B ExitLoopY |
| bSGE4 |
| ;// bS >= 4 Filtering |
| SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 |
| SUB pSrcDst, pSrcDst, srcdstStep, LSL #1 |
| BL armVCM4P10_DeblockingLumabSGE4_unsafe |
| |
| ;// Result Storage |
| VST1 dP_2n, [pSrcDst], srcdstStep |
| VST1 dP_1n, [pSrcDst], srcdstStep |
| VST1 dP_0n, [pSrcDst], srcdstStep |
| SUB pTmp, pSrcDst, srcdstStep, LSL #2 |
| VST1 dQ_0n, [pSrcDst], srcdstStep |
| ADDS XY,XY,XY |
| VST1 dQ_1n, [pSrcDst], srcdstStep |
| ADD pThresholds, pThresholds, #2 |
| VST1 dQ_2n, [pSrcDst] |
| |
| ADD pSrcDst, pTmp, #8 |
| BCC LoopX |
| |
| ExitLoopY |
| |
| SUB pSrcDst, pSrcDst, #16 |
| VLD1 {dAlpha[]}, [pAlpha_1] |
| ADD pSrcDst, pSrcDst, srcdstStep, LSL #2 |
| VLD1 {dBeta[]}, [pBeta_1] |
| BNE LoopY |
| |
| MOV r0, #OMX_Sts_NoErr |
| |
| M_END |
| |
| ENDIF |
| |
| |
| |
| |
| END |
| |
| |