| ;// |
| ;// Copyright (C) 2007-2008 ARM Limited |
| ;// |
| ;// Licensed under the Apache License, Version 2.0 (the "License"); |
| ;// you may not use this file except in compliance with the License. |
| ;// You may obtain a copy of the License at |
| ;// |
| ;// http://www.apache.org/licenses/LICENSE-2.0 |
| ;// |
| ;// Unless required by applicable law or agreed to in writing, software |
| ;// distributed under the License is distributed on an "AS IS" BASIS, |
| ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| ;// See the License for the specific language governing permissions and |
| ;// limitations under the License. |
| ;// |
| ;// |
| ;// |
| ;// File Name: armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s |
| ;// OpenMAX DL: v1.0.2 |
| ;// Revision: 12290 |
| ;// Date: Wednesday, April 9, 2008 |
| ;// |
| ;// |
| ;// |
| ;// |
| |
| INCLUDE omxtypes_s.h |
| INCLUDE armCOMM_s.h |
| |
| EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe |
| |
| M_VARIANTS CortexA8 |
| |
| IF CortexA8 |
| M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r11 |
| |
| ;// Declare input registers |
| pSrc RN 0 |
| srcStep RN 1 |
| pDst RN 2 |
| dstStep RN 3 |
| |
| ;// Declare Neon registers |
| dTCoeff5 DN 30.U8 |
| dTCoeff20 DN 31.U8 |
| dCoeff5 DN 30.S16 |
| dCoeff20 DN 31.S16 |
| |
| qSrcA01 QN 0.U8 |
| qSrcB23 QN 1.U8 |
| qSrcC45 QN 2.U8 |
| qSrcD67 QN 3.U8 |
| qSrcE89 QN 4.U8 |
| qSrcF1011 QN 5.U8 |
| qSrcG1213 QN 6.U8 |
| qSrcH1415 QN 7.U8 |
| qSrcI1617 QN 8.U8 |
| |
| dSrcA0 DN 0.U8 |
| dSrcB2 DN 2.U8 |
| dSrcC4 DN 4.U8 |
| dSrcD6 DN 6.U8 |
| dSrcE8 DN 8.U8 |
| dSrcF10 DN 10.U8 |
| dSrcG12 DN 12.U8 |
| dSrcH14 DN 14.U8 |
| dSrcI16 DN 16.U8 |
| |
| dSrcA1 DN 1.U8 |
| dSrcB3 DN 3.U8 |
| dSrcC5 DN 5.U8 |
| dSrcD7 DN 7.U8 |
| dSrcE9 DN 9.U8 |
| dSrcF11 DN 11.U8 |
| dSrcG13 DN 13.U8 |
| dSrcH15 DN 15.U8 |
| dSrcI17 DN 17.U8 |
| |
| qTempP01 QN 9.S16 |
| qTempQ01 QN 10.S16 |
| qTempR01 QN 11.S16 |
| qTempS01 QN 12.S16 |
| |
| qTempP23 QN 0.S16 |
| qTempQ23 QN 1.S16 |
| qTempR23 QN 2.S16 |
| qTempS23 QN 3.S16 |
| |
| dTempP0 DN 18.S16 |
| dTempP1 DN 19.S16 |
| dTempP2 DN 0.S16 |
| |
| dTempQ0 DN 20.S16 |
| dTempQ1 DN 21.S16 |
| dTempQ2 DN 2.S16 |
| |
| dTempR0 DN 22.S16 |
| dTempR1 DN 23.S16 |
| dTempR2 DN 4.S16 |
| |
| dTempS0 DN 24.S16 |
| dTempS1 DN 25.S16 |
| dTempS2 DN 6.S16 |
| |
| dTempB0 DN 26.S16 |
| dTempC0 DN 27.S16 |
| dTempD0 DN 28.S16 |
| dTempF0 DN 29.S16 |
| |
| dTempAcc0 DN 0.U16 |
| dTempAcc1 DN 2.U16 |
| dTempAcc2 DN 4.U16 |
| dTempAcc3 DN 6.U16 |
| |
| dAcc0 DN 0.U8 |
| dAcc1 DN 2.U8 |
| dAcc2 DN 4.U8 |
| dAcc3 DN 6.U8 |
| |
| qAcc0 QN 0.S32 |
| qAcc1 QN 1.S32 |
| qAcc2 QN 2.S32 |
| qAcc3 QN 3.S32 |
| |
| qTAcc0 QN 0.U16 |
| qTAcc1 QN 1.U16 |
| qTAcc2 QN 2.U16 |
| qTAcc3 QN 3.U16 |
| |
| qTmp QN 4.S16 |
| dTmp DN 8.S16 |
| |
| VLD1 qSrcA01, [pSrc], srcStep ;// [a0 a1 a2 a3 .. a15] |
| ADD r12, pSrc, srcStep, LSL #2 |
| VMOV dTCoeff5, #5 |
| VMOV dTCoeff20, #20 |
| VLD1 qSrcF1011, [r12], srcStep |
| VLD1 qSrcB23, [pSrc], srcStep ;// [b0 b1 b2 b3 .. b15] |
| |
| VLD1 qSrcG1213, [r12], srcStep |
| VADDL qTempP01, dSrcA0, dSrcF10 |
| VLD1 qSrcC45, [pSrc], srcStep ;// [c0 c1 c2 c3 .. c15] |
| VADDL qTempP23, dSrcA1, dSrcF11 |
| VLD1 qSrcD67, [pSrc], srcStep |
| VADDL qTempQ01, dSrcB2, dSrcG12 |
| VLD1 qSrcE89, [pSrc], srcStep |
| |
| ;//t0 |
| VMLAL qTempP01, dSrcC4, dTCoeff20 |
| |
| VLD1 qSrcH1415, [r12], srcStep |
| |
| VMLAL qTempP23, dSrcC5, dTCoeff20 |
| |
| VLD1 qSrcI1617, [r12], srcStep ;// [i0 i1 i2 i3 .. ] |
| |
| VMLAL qTempP01, dSrcD6, dTCoeff20 |
| VMLAL qTempQ01, dSrcD6, dTCoeff20 |
| VMLSL qTempP23, dSrcB3, dTCoeff5 |
| |
| VADDL qTempR01, dSrcC4, dSrcH14 |
| |
| VMLSL qTempP01, dSrcB2, dTCoeff5 |
| |
| VADDL qTempQ23, dSrcB3, dSrcG13 |
| |
| VMLAL qTempP23, dSrcD7, dTCoeff20 |
| VMLAL qTempQ01, dSrcE8, dTCoeff20 |
| |
| VMLSL qTempP01, dSrcE8, dTCoeff5 |
| VMLAL qTempQ23, dSrcD7, dTCoeff20 |
| |
| VMLSL qTempP23, dSrcE9, dTCoeff5 |
| |
| ;//t1 |
| |
| VMLAL qTempR01, dSrcE8, dTCoeff20 |
| VMLSL qTempQ01, dSrcC4, dTCoeff5 |
| VMLSL qTempQ23, dSrcC5, dTCoeff5 |
| VADDL qTempR23, dSrcC5, dSrcH15 |
| |
| VMLAL qTempR01, dSrcF10, dTCoeff20 |
| VMLSL qTempQ01, dSrcF10, dTCoeff5 |
| VMLAL qTempQ23, dSrcE9, dTCoeff20 |
| VMLAL qTempR23, dSrcE9, dTCoeff20 |
| VADDL qTempS01, dSrcD6, dSrcI16 |
| |
| |
| VMLSL qTempR01, dSrcD6, dTCoeff5 |
| VMLSL qTempQ23, dSrcF11, dTCoeff5 |
| VMLSL qTempR23, dSrcD7, dTCoeff5 |
| |
| ;//t2 |
| VADDL qTempS23, dSrcD7, dSrcI17 |
| VMLAL qTempS01, dSrcF10, dTCoeff20 |
| VMLSL qTempR01, dSrcG12, dTCoeff5 |
| VMLSL qTempR23, dSrcG13, dTCoeff5 |
| |
| VMLAL qTempS23, dSrcF11, dTCoeff20 |
| VMLAL qTempS01, dSrcG12, dTCoeff20 |
| VEXT dTempB0, dTempP0, dTempP1, #1 |
| VMLAL qTempR23, dSrcF11, dTCoeff20 |
| |
| |
| ;//t3 |
| VMLAL qTempS23, dSrcG13, dTCoeff20 |
| VMLSL qTempS01, dSrcE8, dTCoeff5 |
| VEXT dTempC0, dTempP0, dTempP1, #2 |
| VMOV dCoeff20, #20 |
| VMLSL qTempS23, dSrcE9, dTCoeff5 |
| VMLSL qTempS01, dSrcH14, dTCoeff5 |
| VEXT dTempF0, dTempP1, dTempP2, #1 |
| VEXT dTempD0, dTempP0, dTempP1, #3 |
| VMLSL qTempS23, dSrcH15, dTCoeff5 |
| |
| VADDL qAcc0, dTempP0, dTempF0 |
| VADD dTempC0, dTempC0, dTempD0 |
| ;//h |
| VMOV dCoeff5, #5 |
| |
| ;// res0 |
| VADD dTempB0, dTempB0, dTempP1 |
| VMLAL qAcc0, dTempC0, dCoeff20 |
| VEXT dTempC0, dTempQ0, dTempQ1, #2 |
| VEXT dTempD0, dTempQ0, dTempQ1, #3 |
| VEXT dTempF0, dTempQ1, dTempQ2, #1 |
| VMLSL qAcc0, dTempB0, dCoeff5 |
| |
| ;// res1 |
| VEXT dTempB0, dTempQ0, dTempQ1, #1 |
| VADDL qAcc1, dTempQ0, dTempF0 |
| VADD dTempC0, dTempC0, dTempD0 |
| VADD dTempB0, dTempB0, dTempQ1 |
| VEXT dTempD0, dTempR0, dTempR1, #3 |
| VMLAL qAcc1, dTempC0, dCoeff20 |
| VEXT dTempF0, dTempR1, dTempR2, #1 |
| VEXT dTempC0, dTempR0, dTempR1, #2 |
| VEXT dTmp, dTempR0, dTempR1, #1 |
| VADDL qAcc2, dTempR0, dTempF0 |
| VMLSL qAcc1, dTempB0, dCoeff5 |
| ; VEXT dTempB0, dTempR0, dTempR1, #1 |
| VADD dTempC0, dTempC0, dTempD0 |
| |
| ;// res2 |
| VADD dTempB0, dTmp, dTempR1 |
| VEXT dTempD0, dTempS0, dTempS1, #3 |
| VMLAL qAcc2, dTempC0, dCoeff20 |
| ; VADD dTempB0, dTempB0, dTempR1 |
| |
| ;// res3 |
| VEXT dTempC0, dTempS0, dTempS1, #2 |
| VEXT dTempF0, dTempS1, dTempS2, #1 |
| VADD dTempC0, dTempC0, dTempD0 |
| VEXT dTmp, dTempS0, dTempS1, #1 |
| VADDL qAcc3, dTempS0, dTempF0 |
| VMLSL qAcc2, dTempB0, dCoeff5 |
| VMLAL qAcc3, dTempC0, dCoeff20 |
| VADD dTmp, dTmp, dTempS1 |
| VMLSL qAcc3, dTmp, dCoeff5 |
| |
| VQRSHRUN dTempAcc0, qAcc0, #10 |
| VQRSHRUN dTempAcc1, qAcc1, #10 |
| VQRSHRUN dTempAcc2, qAcc2, #10 |
| VQRSHRUN dTempAcc3, qAcc3, #10 |
| |
| VQMOVN dAcc0, qTAcc0 |
| VQMOVN dAcc1, qTAcc1 |
| VQMOVN dAcc2, qTAcc2 |
| VQMOVN dAcc3, qTAcc3 |
| |
| M_END |
| |
| ENDIF |
| |
| |
| |
| |
| |
| END |
| |