| ;// |
| ;// Copyright (C) 2007-2008 ARM Limited |
| ;// |
| ;// Licensed under the Apache License, Version 2.0 (the "License"); |
| ;// you may not use this file except in compliance with the License. |
| ;// You may obtain a copy of the License at |
| ;// |
| ;// http://www.apache.org/licenses/LICENSE-2.0 |
| ;// |
| ;// Unless required by applicable law or agreed to in writing, software |
| ;// distributed under the License is distributed on an "AS IS" BASIS, |
| ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| ;// See the License for the specific language governing permissions and |
| ;// limitations under the License. |
| ;// |
| ;// |
| ;// |
| ;// File Name: armVCM4P10_DeblockingChroma_unsafe_s.s |
| ;// OpenMAX DL: v1.0.2 |
| ;// Revision: 12290 |
| ;// Date: Wednesday, April 9, 2008 |
| ;// |
| ;// |
| ;// |
| ;// |
| |
| INCLUDE omxtypes_s.h |
| INCLUDE armCOMM_s.h |
| |
| M_VARIANTS CortexA8 |
| |
| |
| IF CortexA8 |
| |
| pAlpha RN 2 |
| pBeta RN 3 |
| |
| pThresholds RN 5 |
| pBS RN 4 |
| bS3210 RN 6 |
| |
| ;// Pixels |
| dP_0 DN D4.U8 |
| dP_1 DN D5.U8 |
| dP_2 DN D6.U8 |
| dP_3 DN D7.U8 |
| dQ_0 DN D8.U8 |
| dQ_1 DN D9.U8 |
| dQ_2 DN D10.U8 |
| dQ_3 DN D11.U8 |
| |
| |
| ;// Filtering Decision |
| dAlpha DN D0.U8 |
| dBeta DN D2.U8 |
| |
| dFilt DN D16.U8 |
| dAqflg DN D12.U8 |
| dApflg DN D17.U8 |
| |
| dAp0q0 DN D13.U8 |
| |
| ;// bSLT4 |
| dTC3210 DN D18.U8 |
| dTCs DN D31.S8 |
| dTC DN D31.U8 |
| |
| dMask_0 DN D14.U8 |
| dMask_1 DN D15.U8 |
| dMask_4 DN D26.U16 |
| |
| dTemp DN D28.U8 |
| dDummy DN D17.U8 |
| |
| ;// Computing P0,Q0 |
| qDq0p0 QN Q10.S16 |
| qDp1q1 QN Q11.S16 |
| qDelta QN Q10.S16 ; reuse qDq0p0 |
| dDelta DN D20.S8 |
| |
| |
| ;// Computing P1,Q1 |
| qP_0n QN Q14.S16 |
| qQ_0n QN Q12.S16 |
| |
| dQ_0n DN D24.U8 |
| dP_0n DN D29.U8 |
| |
| ;// bSGE4 |
| |
| dHSp0q1 DN D13.U8 |
| dHSq0p1 DN D31.U8 |
| |
| dBS3210 DN D28.U16 |
| |
| dP_0t DN D13.U8 ;dHSp0q1 |
| dQ_0t DN D31.U8 ;Temp1 |
| |
| dP_0n DN D29.U8 |
| dQ_0n DN D24.U8 ;Temp2 |
| |
| ;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe |
| ;// |
| ;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11 |
| ;// - Filter masks - filt: D16, aqflg: D12, apflg: D17 |
| ;// - Additional Params - pThresholds: r5 |
| ;// |
| ;// Outputs - Pixels - P0-P1: D29-D30, Q0-Q1: D24-D25 |
| ;// - Additional Params - pThresholds: r5 |
| |
| ;// Registers Corrupted - D18-D31 |
| |
| |
| M_START armVCM4P10_DeblockingChromabSLT4_unsafe |
| |
| |
| ;dTC3210 -18 |
| ;dTemp-28 |
| |
| VLD1 d18.U32[0], [pThresholds]! ;here |
| |
| ;// delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3; |
| ;// dDelta = (qDp1q1 >> 2 + qDq0p0 + 1)>> 1 |
| |
| ;// qDp1q1-11 |
| ;// qDq0p0-10 |
| VSUBL qDp1q1, dP_1, dQ_1 |
| VMOV dTemp, dTC3210 |
| VSUBL qDq0p0, dQ_0, dP_0 |
| VSHR qDp1q1, qDp1q1, #2 |
| VZIP.8 dTC3210, dTemp |
| |
| ;// qDelta-qDq0p0-10 |
| |
| ;// dTC = dTC01 + (dAplg & 1) + (dAqflg & 1) |
| |
| ;// dTC3210-18 |
| ;// dTemp-28 |
| ;// dTC-31 |
| VBIF dTC3210, dMask_0, dFilt |
| VRHADD qDelta, qDp1q1, qDq0p0 |
| VADD dTC, dTC3210, dMask_1 |
| VQMOVN dDelta, qDelta |
| ;// dDelta-d20 |
| |
| ;// dDelta = (OMX_U8)armClip(0, 255, q0 - delta); |
| VLD1 {dAlpha[]}, [pAlpha] |
| VMIN dDelta, dDelta, dTCs |
| VNEG dTCs, dTCs |
| VLD1 {dBeta[]}, [pBeta] |
| ;1 |
| VMAX dDelta, dDelta, dTCs |
| |
| ;// dP_0n - 29 |
| ;// dQ_0n - 24 |
| |
| ;// pQ0[-1*Step] = (OMX_U8)armClip(0, 255, dP_0 - delta); |
| ;// pQ0[0*Step] = (OMX_U8)armClip(0, 255, dQ_0 - delta); |
| |
| ;// dP_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta); |
| ;// dQ_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta); |
| |
| ;// qP_0n - 14 |
| ;// qQ_0n - 12 |
| |
| VMOVL qP_0n, dP_0 |
| VMOVL qQ_0n, dQ_0 |
| |
| ;1 |
| VADDW qP_0n, qP_0n, dDelta |
| VSUBW qQ_0n, qQ_0n, dDelta |
| |
| VQMOVUN dP_0n, qP_0n |
| VQMOVUN dQ_0n, qQ_0n |
| |
| M_END |
| |
| ;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe() |
| ;// |
| ;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11 |
| ;// - Filter masks - filt: D16, aqflg: D12, apflg: D17 |
| ;// - Additional Params - alpha: D0, dMask_1: D15 |
| ;// |
| ;// Outputs - Pixels - P0-P2: D29-D31, Q0-Q2: D24,D25,D28 |
| |
| ;// Registers Corrupted - D18-D31 |
| |
| M_START armVCM4P10_DeblockingChromabSGE4_unsafe |
| |
| ;dHSq0p1 - 31 |
| ;dHSp0q1 - 13 |
| VHADD dHSp0q1, dP_0, dQ_1 |
| VHADD dHSq0p1, dQ_0, dP_1 |
| |
| ;// Prepare the bS mask |
| |
| ;// dHSp0q1-13 |
| ;// dP_0t-dHSp0q1-13 |
| ;// dHSq0p1-31 |
| ;// dQ_0t-Temp1-31 |
| VLD1 {dAlpha[]}, [pAlpha] |
| ADD pThresholds, pThresholds, #4 |
| VLD1 {dBeta[]}, [pBeta] |
| |
| VRHADD dP_0t, dHSp0q1, dP_1 |
| VRHADD dQ_0t, dHSq0p1, dQ_1 |
| |
| M_END |
| |
| ENDIF |
| |
| END |