| /* |
| * Copyright 2024 The LibYuv Project Authors. All rights reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include "libyuv/row.h" |
| |
| #ifdef __cplusplus |
| namespace libyuv { |
| extern "C" { |
| #endif |
| |
| #if !defined(LIBYUV_DISABLE_SVE) && defined(__aarch64__) |
| |
| #define READYUV444_SVE \ |
| "ld1b {z0.h}, p1/z, [%[src_y]] \n" \ |
| "ld1b {z1.h}, p1/z, [%[src_u]] \n" \ |
| "ld1b {z2.h}, p1/z, [%[src_v]] \n" \ |
| "add %[src_y], %[src_y], %[vl] \n" \ |
| "add %[src_u], %[src_u], %[vl] \n" \ |
| "add %[src_v], %[src_v], %[vl] \n" \ |
| "prfm pldl1keep, [%[src_y], 448] \n" \ |
| "prfm pldl1keep, [%[src_u], 448] \n" \ |
| "trn1 z0.b, z0.b, z0.b \n" \ |
| "prfm pldl1keep, [%[src_v], 448] \n" |
| |
| #define READYUV422_SVE \ |
| "ld1b {z0.h}, p1/z, [%[src_y]] \n" \ |
| "ld1b {z1.s}, p1/z, [%[src_u]] \n" \ |
| "ld1b {z2.s}, p1/z, [%[src_v]] \n" \ |
| "inch %[src_y] \n" \ |
| "incw %[src_u] \n" \ |
| "incw %[src_v] \n" \ |
| "prfm pldl1keep, [%[src_y], 448] \n" \ |
| "prfm pldl1keep, [%[src_u], 128] \n" \ |
| "prfm pldl1keep, [%[src_v], 128] \n" \ |
| "trn1 z0.b, z0.b, z0.b \n" \ |
| "trn1 z1.h, z1.h, z1.h \n" \ |
| "trn1 z2.h, z2.h, z2.h \n" |
| |
| #define YUVTORGB_SVE_SETUP \ |
| "ld1rb {z28.h}, p0/z, [%[kUVCoeff], #0] \n" \ |
| "ld1rb {z29.h}, p0/z, [%[kUVCoeff], #1] \n" \ |
| "ld1rb {z30.h}, p0/z, [%[kUVCoeff], #2] \n" \ |
| "ld1rb {z31.h}, p0/z, [%[kUVCoeff], #3] \n" \ |
| "ld1rh {z24.h}, p0/z, [%[kRGBCoeffBias], #0] \n" \ |
| "ld1rh {z25.h}, p0/z, [%[kRGBCoeffBias], #2] \n" \ |
| "ld1rh {z26.h}, p0/z, [%[kRGBCoeffBias], #4] \n" \ |
| "ld1rh {z27.h}, p0/z, [%[kRGBCoeffBias], #6] \n" |
| |
| #define I4XXTORGB_SVE \ |
| "umulh z0.h, z24.h, z0.h \n" /* Y */ \ |
| "mul z6.h, z30.h, z1.h \n" \ |
| "mul z4.h, z28.h, z1.h \n" /* DB */ \ |
| "mul z5.h, z29.h, z2.h \n" /* DR */ \ |
| "mla z6.h, p0/m, z31.h, z2.h \n" /* DG */ \ |
| "add z17.h, z0.h, z26.h \n" /* G */ \ |
| "add z16.h, z0.h, z4.h \n" /* B */ \ |
| "add z18.h, z0.h, z5.h \n" /* R */ \ |
| "uqsub z17.h, z17.h, z6.h \n" /* G */ \ |
| "uqsub z16.h, z16.h, z25.h \n" /* B */ \ |
| "uqsub z18.h, z18.h, z27.h \n" /* R */ |
| |
| // Convert from 2.14 fixed point RGB to 8 bit RGBA, interleaving as BG and RA |
| // pairs to allow us to use ST2 for storing rather than ST4. |
| #define RGBTORGBA8_SVE \ |
| "uqshrnb z16.b, z16.h, #6 \n" \ |
| "uqshrnb z18.b, z18.h, #6 \n" \ |
| "uqshrnt z16.b, z17.h, #6 \n" \ |
| "trn1 z17.b, z18.b, z19.b \n" |
| |
| #define YUVTORGB_SVE_REGS \ |
| "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z16", "z17", "z18", "z19", \ |
| "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", "p0", "p1" |
| |
| void I444ToARGBRow_SVE2(const uint8_t* src_y, |
| const uint8_t* src_u, |
| const uint8_t* src_v, |
| uint8_t* dst_argb, |
| const struct YuvConstants* yuvconstants, |
| int width) { |
| uint64_t vl; |
| asm("cnth %[vl] \n" |
| "ptrue p0.b \n" YUVTORGB_SVE_SETUP |
| "dup z19.b, #255 \n" /* A */ |
| "subs %w[width], %w[width], %w[vl] \n" |
| "b.lt 2f \n" |
| |
| // Run bulk of computation with an all-true predicate to avoid predicate |
| // generation overhead. |
| "ptrue p1.h \n" |
| "1: \n" READYUV444_SVE |
| I4XXTORGB_SVE RGBTORGBA8_SVE |
| "subs %w[width], %w[width], %w[vl] \n" |
| "st2h {z16.h, z17.h}, p1, [%[dst_argb]] \n" |
| "add %[dst_argb], %[dst_argb], %[vl], lsl #2 \n" |
| "b.ge 1b \n" |
| |
| "2: \n" |
| "adds %w[width], %w[width], %w[vl] \n" |
| "b.eq 99f \n" |
| |
| // Calculate a predicate for the final iteration to deal with the tail. |
| "whilelt p1.h, wzr, %w[width] \n" READYUV444_SVE |
| I4XXTORGB_SVE RGBTORGBA8_SVE |
| "st2h {z16.h, z17.h}, p1, [%[dst_argb]] \n" |
| |
| "99: \n" |
| : [src_y] "+r"(src_y), // %[src_y] |
| [src_u] "+r"(src_u), // %[src_u] |
| [src_v] "+r"(src_v), // %[src_v] |
| [dst_argb] "+r"(dst_argb), // %[dst_argb] |
| [width] "+r"(width), // %[width] |
| [vl] "=&r"(vl) // %[vl] |
| : [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff] |
| [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias] |
| : "cc", "memory", YUVTORGB_SVE_REGS); |
| } |
| |
| void I422ToARGBRow_SVE2(const uint8_t* src_y, |
| const uint8_t* src_u, |
| const uint8_t* src_v, |
| uint8_t* dst_argb, |
| const struct YuvConstants* yuvconstants, |
| int width) { |
| uint64_t vl; |
| asm("cnth %[vl] \n" |
| "ptrue p0.b \n" YUVTORGB_SVE_SETUP |
| "dup z19.b, #255 \n" /* A */ |
| "subs %w[width], %w[width], %w[vl] \n" |
| "b.lt 2f \n" |
| |
| // Run bulk of computation with an all-true predicate to avoid predicate |
| // generation overhead. |
| "ptrue p1.h \n" |
| "1: \n" READYUV422_SVE |
| I4XXTORGB_SVE RGBTORGBA8_SVE |
| "subs %w[width], %w[width], %w[vl] \n" |
| "st2h {z16.h, z17.h}, p1, [%[dst_argb]] \n" |
| "add %[dst_argb], %[dst_argb], %[vl], lsl #2 \n" |
| "b.ge 1b \n" |
| |
| "2: \n" |
| "adds %w[width], %w[width], %w[vl] \n" |
| "b.eq 99f \n" |
| |
| // Calculate a predicate for the final iteration to deal with the tail. |
| "whilelt p1.h, wzr, %w[width] \n" READYUV422_SVE |
| I4XXTORGB_SVE RGBTORGBA8_SVE |
| "st2h {z16.h, z17.h}, p1, [%[dst_argb]] \n" |
| |
| "99: \n" |
| : [src_y] "+r"(src_y), // %[src_y] |
| [src_u] "+r"(src_u), // %[src_u] |
| [src_v] "+r"(src_v), // %[src_v] |
| [dst_argb] "+r"(dst_argb), // %[dst_argb] |
| [width] "+r"(width), // %[width] |
| [vl] "=&r"(vl) // %[vl] |
| : [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff] |
| [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias] |
| : "cc", "memory", YUVTORGB_SVE_REGS); |
| } |
| |
| void I444AlphaToARGBRow_SVE2(const uint8_t* src_y, |
| const uint8_t* src_u, |
| const uint8_t* src_v, |
| const uint8_t* src_a, |
| uint8_t* dst_argb, |
| const struct YuvConstants* yuvconstants, |
| int width) { |
| uint64_t vl; |
| asm("cnth %[vl] \n" |
| "ptrue p0.b \n" YUVTORGB_SVE_SETUP |
| "subs %w[width], %w[width], %w[vl] \n" |
| "b.lt 2f \n" |
| |
| // Run bulk of computation with an all-true predicate to avoid predicate |
| // generation overhead. |
| "ptrue p1.h \n" |
| "1: \n" READYUV444_SVE |
| "ld1b {z19.h}, p1/z, [%[src_a]] \n" // A |
| "add %[src_a], %[src_a], %[vl] \n" I4XXTORGB_SVE |
| RGBTORGBA8_SVE |
| "subs %w[width], %w[width], %w[vl] \n" |
| "st2h {z16.h, z17.h}, p1, [%[dst_argb]] \n" |
| "add %[dst_argb], %[dst_argb], %[vl], lsl #2 \n" |
| "b.ge 1b \n" |
| |
| "2: \n" |
| "adds %w[width], %w[width], %w[vl] \n" |
| "b.eq 99f \n" |
| |
| // Calculate a predicate for the final iteration to deal with the tail. |
| "whilelt p1.h, wzr, %w[width] \n" READYUV444_SVE |
| "ld1b {z19.h}, p1/z, [%[src_a]] \n" // A |
| I4XXTORGB_SVE RGBTORGBA8_SVE |
| "st2h {z16.h, z17.h}, p1, [%[dst_argb]] \n" |
| |
| "99: \n" |
| : [src_y] "+r"(src_y), // %[src_y] |
| [src_u] "+r"(src_u), // %[src_u] |
| [src_v] "+r"(src_v), // %[src_v] |
| [src_a] "+r"(src_a), // %[src_a] |
| [dst_argb] "+r"(dst_argb), // %[dst_argb] |
| [width] "+r"(width), // %[width] |
| [vl] "=&r"(vl) // %[vl] |
| : [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff] |
| [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias] |
| : "cc", "memory", YUVTORGB_SVE_REGS); |
| } |
| |
| void I422AlphaToARGBRow_SVE2(const uint8_t* src_y, |
| const uint8_t* src_u, |
| const uint8_t* src_v, |
| const uint8_t* src_a, |
| uint8_t* dst_argb, |
| const struct YuvConstants* yuvconstants, |
| int width) { |
| uint64_t vl; |
| asm("cnth %[vl] \n" |
| "ptrue p0.b \n" YUVTORGB_SVE_SETUP |
| "subs %w[width], %w[width], %w[vl] \n" |
| "b.lt 2f \n" |
| |
| // Run bulk of computation with an all-true predicate to avoid predicate |
| // generation overhead. |
| "ptrue p1.h \n" |
| "1: \n" READYUV422_SVE |
| "ld1b {z19.h}, p1/z, [%[src_a]] \n" // A |
| "add %[src_a], %[src_a], %[vl] \n" I4XXTORGB_SVE |
| RGBTORGBA8_SVE |
| "subs %w[width], %w[width], %w[vl] \n" |
| "st2h {z16.h, z17.h}, p1, [%[dst_argb]] \n" |
| "add %[dst_argb], %[dst_argb], %[vl], lsl #2 \n" |
| "b.ge 1b \n" |
| |
| "2: \n" |
| "adds %w[width], %w[width], %w[vl] \n" |
| "b.eq 99f \n" |
| |
| // Calculate a predicate for the final iteration to deal with the tail. |
| "whilelt p1.h, wzr, %w[width] \n" READYUV422_SVE |
| "ld1b {z19.h}, p1/z, [%[src_a]] \n" // A |
| I4XXTORGB_SVE RGBTORGBA8_SVE |
| "st2h {z16.h, z17.h}, p1, [%[dst_argb]] \n" |
| |
| "99: \n" |
| : [src_y] "+r"(src_y), // %[src_y] |
| [src_u] "+r"(src_u), // %[src_u] |
| [src_v] "+r"(src_v), // %[src_v] |
| [src_a] "+r"(src_a), // %[src_a] |
| [dst_argb] "+r"(dst_argb), // %[dst_argb] |
| [width] "+r"(width), // %[width] |
| [vl] "=&r"(vl) // %[vl] |
| : [kUVCoeff] "r"(&yuvconstants->kUVCoeff), // %[kUVCoeff] |
| [kRGBCoeffBias] "r"(&yuvconstants->kRGBCoeffBias) // %[kRGBCoeffBias] |
| : "cc", "memory", YUVTORGB_SVE_REGS); |
| } |
| |
| // Dot-product constants are stored as four-tuples with the two innermost |
| // elements flipped to account for the interleaving nature of the widening |
| // addition instructions. |
| |
| static const int16_t kArgbToUvArr[] = { |
| // UB, -UR, -UG, 0, -VB, VR, -VG, 0 |
| 56, -19, -37, 0, -9, 56, -47, 0, |
| }; |
| |
| static const int16_t kRgbaToUvArr[] = { |
| // 0, -UG, UB, -UR, 0, -VG, -VB, VR |
| 0, -37, 56, -19, 0, -47, -9, 56, |
| }; |
| |
| static const int16_t kBgraToUvArr[] = { |
| // 0, -UG, -UR, UB, 0, -VG, VR, -VB |
| 0, -37, -19, 56, 0, -47, 56, -9, |
| }; |
| |
| static const int16_t kAbgrToUvArr[] = { |
| // -UR, UB, -UG, 0, VR, -VB, -VG, 0 |
| -19, 56, -37, 0, 56, -9, -47, 0, |
| }; |
| |
| static const int16_t kArgbToUvjArr[] = { |
| // UB, -UR, -UG, 0, -VB, VR, -VG, 0 |
| 63, -21, -42, 0, -10, 63, -53, 0, |
| }; |
| |
| static const int16_t kAbgrToUvjArr[] = { |
| // -UR, UB, -UG, 0, VR, -VB, -VG, 0 |
| -21, 63, -42, 0, 63, -10, -53, 0, |
| }; |
| |
| void ARGBToUVMatrixRow_SVE2(const uint8_t* src_argb, |
| int src_stride_argb, |
| uint8_t* dst_u, |
| uint8_t* dst_v, |
| int width, |
| const int16_t* uvconstants) { |
| const uint8_t* src_argb_1 = src_argb + src_stride_argb; |
| uint64_t vl; |
| asm volatile( |
| "ptrue p0.b \n" |
| "ld1rd {z24.d}, p0/z, [%[uvconstants]] \n" |
| "ld1rd {z25.d}, p0/z, [%[uvconstants], #8] \n" |
| "mov z26.b, #0x80 \n" |
| |
| "cntb %[vl] \n" |
| "subs %w[width], %w[width], %w[vl] \n" |
| "b.lt 2f \n" |
| |
| // Process 4x vectors from each input row per iteration. |
| // Cannot use predication here due to unrolling. |
| "1: \n" // e.g. |
| "ld1b {z0.b}, p0/z, [%[src0], #0, mul vl] \n" // bgrabgra |
| "ld1b {z4.b}, p0/z, [%[src1], #0, mul vl] \n" // bgrabgra |
| "ld1b {z1.b}, p0/z, [%[src0], #1, mul vl] \n" // bgrabgra |
| "ld1b {z5.b}, p0/z, [%[src1], #1, mul vl] \n" // bgrabgra |
| "ld1b {z2.b}, p0/z, [%[src0], #2, mul vl] \n" // bgrabgra |
| "ld1b {z6.b}, p0/z, [%[src1], #2, mul vl] \n" // bgrabgra |
| "ld1b {z3.b}, p0/z, [%[src0], #3, mul vl] \n" // bgrabgra |
| "ld1b {z7.b}, p0/z, [%[src1], #3, mul vl] \n" // bgrabgra |
| "incb %[src0], all, mul #4 \n" |
| "incb %[src1], all, mul #4 \n" |
| |
| "uaddlb z16.h, z0.b, z4.b \n" // brbrbrbr |
| "uaddlt z17.h, z0.b, z4.b \n" // gagagaga |
| "uaddlb z18.h, z1.b, z5.b \n" // brbrbrbr |
| "uaddlt z19.h, z1.b, z5.b \n" // gagagaga |
| "uaddlb z20.h, z2.b, z6.b \n" // brbrbrbr |
| "uaddlt z21.h, z2.b, z6.b \n" // gagagaga |
| "uaddlb z22.h, z3.b, z7.b \n" // brbrbrbr |
| "uaddlt z23.h, z3.b, z7.b \n" // gagagaga |
| |
| "trn1 z0.s, z16.s, z17.s \n" // brgabgra |
| "trn2 z1.s, z16.s, z17.s \n" // brgabgra |
| "trn1 z2.s, z18.s, z19.s \n" // brgabgra |
| "trn2 z3.s, z18.s, z19.s \n" // brgabgra |
| "trn1 z4.s, z20.s, z21.s \n" // brgabgra |
| "trn2 z5.s, z20.s, z21.s \n" // brgabgra |
| "trn1 z6.s, z22.s, z23.s \n" // brgabgra |
| "trn2 z7.s, z22.s, z23.s \n" // brgabgra |
| |
| "subs %w[width], %w[width], %w[vl] \n" // 4*VL per loop |
| |
| "urhadd z0.h, p0/m, z0.h, z1.h \n" // brgabrga |
| "urhadd z2.h, p0/m, z2.h, z3.h \n" // brgabrga |
| "urhadd z4.h, p0/m, z4.h, z5.h \n" // brgabrga |
| "urhadd z6.h, p0/m, z6.h, z7.h \n" // brgabrga |
| |
| "movi v16.8h, #0 \n" |
| "movi v17.8h, #0 \n" |
| "movi v18.8h, #0 \n" |
| "movi v19.8h, #0 \n" |
| |
| "movi v20.8h, #0 \n" |
| "movi v21.8h, #0 \n" |
| "movi v22.8h, #0 \n" |
| "movi v23.8h, #0 \n" |
| |
| "sdot z16.d, z0.h, z24.h \n" // UUxxxxxx |
| "sdot z17.d, z2.h, z24.h \n" // UUxxxxxx |
| "sdot z18.d, z4.h, z24.h \n" // UUxxxxxx |
| "sdot z19.d, z6.h, z24.h \n" // UUxxxxxx |
| |
| "sdot z20.d, z0.h, z25.h \n" // VVxxxxxx |
| "sdot z21.d, z2.h, z25.h \n" // VVxxxxxx |
| "sdot z22.d, z4.h, z25.h \n" // VVxxxxxx |
| "sdot z23.d, z6.h, z25.h \n" // VVxxxxxx |
| |
| "uzp1 z16.s, z16.s, z17.s \n" // UUxx |
| "uzp1 z18.s, z18.s, z19.s \n" // UUxx |
| "uzp1 z20.s, z20.s, z21.s \n" // VVxx |
| "uzp1 z22.s, z22.s, z23.s \n" // VVxx |
| |
| "uzp1 z16.h, z16.h, z18.h \n" // UU |
| "uzp1 z20.h, z20.h, z22.h \n" // VV |
| |
| "addhnb z16.b, z16.h, z26.h \n" // U |
| "addhnb z20.b, z20.h, z26.h \n" // V |
| |
| "st1b {z16.h}, p0, [%[dst_u]] \n" // U |
| "st1b {z20.h}, p0, [%[dst_v]] \n" // V |
| "inch %[dst_u] \n" |
| "inch %[dst_v] \n" |
| |
| "b.ge 1b \n" |
| |
| "2: \n" |
| "adds %w[width], %w[width], %w[vl] \n" // VL per loop |
| "b.le 99f \n" |
| |
| // Process remaining pixels from each input row. |
| // Use predication to do one vector from each input array, so may loop up |
| // to three iterations. |
| "cntw %x[vl] \n" |
| |
| "3: \n" |
| "whilelt p1.s, wzr, %w[width] \n" |
| "ld1d {z0.d}, p1/z, [%[src0]] \n" // bgrabgra |
| "ld1d {z4.d}, p1/z, [%[src1]] \n" // bgrabgra |
| "incb %[src0] \n" |
| "incb %[src1] \n" |
| |
| "uaddlb z16.h, z0.b, z4.b \n" // brbrbrbr |
| "uaddlt z17.h, z0.b, z4.b \n" // gagagaga |
| |
| "trn1 z0.s, z16.s, z17.s \n" // brgabgra |
| "trn2 z1.s, z16.s, z17.s \n" // brgabgra |
| |
| "urhadd z0.h, p0/m, z0.h, z1.h \n" // brgabrga |
| |
| "subs %w[width], %w[width], %w[vl] \n" // VL per loop |
| |
| "movi v16.8h, #0 \n" |
| "movi v20.8h, #0 \n" |
| |
| "sdot z16.d, z0.h, z24.h \n" |
| "sdot z20.d, z0.h, z25.h \n" |
| |
| "addhnb z16.b, z16.h, z26.h \n" // U |
| "addhnb z20.b, z20.h, z26.h \n" // V |
| |
| "st1b {z16.d}, p0, [%[dst_u]] \n" // U |
| "st1b {z20.d}, p0, [%[dst_v]] \n" // V |
| "incd %[dst_u] \n" |
| "incd %[dst_v] \n" |
| "b.gt 3b \n" |
| |
| "99: \n" |
| : [src0] "+r"(src_argb), // %[src0] |
| [src1] "+r"(src_argb_1), // %[src1] |
| [dst_u] "+r"(dst_u), // %[dst_u] |
| [dst_v] "+r"(dst_v), // %[dst_v] |
| [width] "+r"(width), // %[width] |
| [vl] "=&r"(vl) // %[vl] |
| : [uvconstants] "r"(uvconstants) |
| : "cc", "memory", "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z16", |
| "z17", "z18", "z19", "z20", "z21", "z22", "z23", "z24", "z25", "z26", |
| "p0"); |
| } |
| |
| void ARGBToUVRow_SVE2(const uint8_t* src_argb, |
| int src_stride_argb, |
| uint8_t* dst_u, |
| uint8_t* dst_v, |
| int width) { |
| ARGBToUVMatrixRow_SVE2(src_argb, src_stride_argb, dst_u, dst_v, width, |
| kArgbToUvArr); |
| } |
| |
| void ARGBToUVJRow_SVE2(const uint8_t* src_argb, |
| int src_stride_argb, |
| uint8_t* dst_u, |
| uint8_t* dst_v, |
| int width) { |
| ARGBToUVMatrixRow_SVE2(src_argb, src_stride_argb, dst_u, dst_v, width, |
| kArgbToUvjArr); |
| } |
| |
| void ABGRToUVJRow_SVE2(const uint8_t* src_abgr, |
| int src_stride_abgr, |
| uint8_t* dst_uj, |
| uint8_t* dst_vj, |
| int width) { |
| ARGBToUVMatrixRow_SVE2(src_abgr, src_stride_abgr, dst_uj, dst_vj, width, |
| kAbgrToUvjArr); |
| } |
| |
| void BGRAToUVRow_SVE2(const uint8_t* src_bgra, |
| int src_stride_bgra, |
| uint8_t* dst_u, |
| uint8_t* dst_v, |
| int width) { |
| ARGBToUVMatrixRow_SVE2(src_bgra, src_stride_bgra, dst_u, dst_v, width, |
| kBgraToUvArr); |
| } |
| |
| void ABGRToUVRow_SVE2(const uint8_t* src_abgr, |
| int src_stride_abgr, |
| uint8_t* dst_u, |
| uint8_t* dst_v, |
| int width) { |
| ARGBToUVMatrixRow_SVE2(src_abgr, src_stride_abgr, dst_u, dst_v, width, |
| kAbgrToUvArr); |
| } |
| |
| void RGBAToUVRow_SVE2(const uint8_t* src_rgba, |
| int src_stride_rgba, |
| uint8_t* dst_u, |
| uint8_t* dst_v, |
| int width) { |
| ARGBToUVMatrixRow_SVE2(src_rgba, src_stride_rgba, dst_u, dst_v, width, |
| kRgbaToUvArr); |
| } |
| |
| #endif // !defined(LIBYUV_DISABLE_SVE) && defined(__aarch64__) |
| |
| #ifdef __cplusplus |
| } // extern "C" |
| } // namespace libyuv |
| #endif |