| /* |
| * Copyright (C) 2026 Ramiro Polla |
| * |
| * This file is part of FFmpeg. |
| * |
| * FFmpeg is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * FFmpeg is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with FFmpeg; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| #ifndef SWSCALE_AARCH64_OPS_IMPL_H |
| #define SWSCALE_AARCH64_OPS_IMPL_H |
| |
| #include <assert.h> |
| #include <stddef.h> |
| #include <stdint.h> |
| |
| /* Similar to SwsPixelType */ |
| typedef enum SwsAArch64PixelType { |
| AARCH64_PIXEL_U8, |
| AARCH64_PIXEL_U16, |
| AARCH64_PIXEL_U32, |
| AARCH64_PIXEL_F32, |
| AARCH64_PIXEL_TYPE_NB, |
| } SwsAArch64PixelType; |
| |
| /* Similar to SwsOpType */ |
| typedef enum SwsAArch64OpType { |
| AARCH64_SWS_OP_NONE = 0, |
| AARCH64_SWS_OP_PROCESS, |
| AARCH64_SWS_OP_PROCESS_RETURN, |
| AARCH64_SWS_OP_READ_BIT, |
| AARCH64_SWS_OP_READ_NIBBLE, |
| AARCH64_SWS_OP_READ_PACKED, |
| AARCH64_SWS_OP_READ_PLANAR, |
| AARCH64_SWS_OP_WRITE_BIT, |
| AARCH64_SWS_OP_WRITE_NIBBLE, |
| AARCH64_SWS_OP_WRITE_PACKED, |
| AARCH64_SWS_OP_WRITE_PLANAR, |
| AARCH64_SWS_OP_SWAP_BYTES, |
| AARCH64_SWS_OP_SWIZZLE, |
| AARCH64_SWS_OP_UNPACK, |
| AARCH64_SWS_OP_PACK, |
| AARCH64_SWS_OP_LSHIFT, |
| AARCH64_SWS_OP_RSHIFT, |
| AARCH64_SWS_OP_CLEAR, |
| AARCH64_SWS_OP_CONVERT, |
| AARCH64_SWS_OP_EXPAND, |
| AARCH64_SWS_OP_MIN, |
| AARCH64_SWS_OP_MAX, |
| AARCH64_SWS_OP_SCALE, |
| AARCH64_SWS_OP_LINEAR, |
| AARCH64_SWS_OP_DITHER, |
| AARCH64_SWS_OP_TYPE_NB, |
| } SwsAArch64OpType; |
| |
| /* Each nibble in the mask corresponds to one component. */ |
| typedef uint16_t SwsAArch64OpMask; |
| |
| /** |
| * Affine coefficient mask for linear op. Packs a 4x5 matrix in execution |
| * order, where the offset is the first element, with 2 bits per element: |
| * 00: m[i][j] == 0 |
| * 01: m[i][j] == 1 |
| * 11: m[i][j] is any other coefficient |
| */ |
| typedef uint64_t SwsAArch64LinearOpMask; |
| |
| typedef struct SwsAArch64LinearOp { |
| SwsAArch64LinearOpMask mask; |
| uint8_t fmla; |
| } SwsAArch64LinearOp; |
| |
| typedef struct SwsAArch64DitherOp { |
| uint16_t y_offset; |
| uint8_t size_log2; |
| } SwsAArch64DitherOp; |
| |
| /** |
| * SwsAArch64OpImplParams describes the parameters for an SwsAArch64OpType |
| * operation. It consists of simplified parameters from the SwsOp structure, |
| * with the purpose of being straight-forward to implement and execute. |
| */ |
| typedef struct SwsAArch64OpImplParams { |
| SwsAArch64OpType op; |
| SwsAArch64OpMask mask; |
| SwsAArch64PixelType type; |
| uint8_t block_size; |
| union { |
| uint8_t shift; |
| SwsAArch64OpMask swizzle; |
| SwsAArch64OpMask pack; |
| SwsAArch64PixelType to_type; |
| SwsAArch64LinearOp linear; |
| SwsAArch64DitherOp dither; |
| }; |
| } SwsAArch64OpImplParams; |
| |
| /* SwsAArch64OpMask-related helpers. */ |
| |
| #define MASK_GET(mask, idx) (((mask) >> ((idx) << 2)) & 0xf) |
| #define MASK_SET(mask, idx, val) do { (mask) |= (((val) & 0xf) << ((idx) << 2)); } while (0) |
| |
| #define LOOP(mask, idx) \ |
| for (int idx = 0; idx < 4; idx++) \ |
| if (MASK_GET(mask, idx)) |
| #define LOOP_BWD(mask, idx) \ |
| for (int idx = 3; idx >= 0; idx--) \ |
| if (MASK_GET(mask, idx)) |
| |
| #define LOOP_MASK(p, idx) LOOP(p->mask, idx) |
| #define LOOP_MASK_BWD(p, idx) LOOP_BWD(p->mask, idx) |
| |
| #define LINEAR_MASK_GET(mask, idx, jdx) (((mask) >> (2 * ((5 * (idx) + (jdx))))) & 3) |
| #define LINEAR_MASK_SET(mask, idx, jdx, val) do { \ |
| (mask) |= ((((SwsAArch64LinearOpMask) (val)) & 3) << (2 * ((5 * (idx) + (jdx))))); \ |
| } while (0) |
| #define LINEAR_MASK_0 0 |
| #define LINEAR_MASK_1 1 |
| #define LINEAR_MASK_X 3 |
| |
| #define LOOP_LINEAR_MASK(p, idx, jdx) \ |
| LOOP_MASK(p, idx) \ |
| for (int jdx = 0; jdx < 5; jdx++) \ |
| if (LINEAR_MASK_GET(p->linear.mask, idx, jdx)) |
| |
| /* Compute number of vector registers needed to store all coefficients. */ |
| static inline int linear_num_vregs(const SwsAArch64OpImplParams *params) |
| { |
| int count = 0; |
| LOOP_LINEAR_MASK(params, i, j) |
| count++; |
| return (count + 3) / 4; |
| } |
| |
| static inline int linear_index_to_sws_op(int idx) |
| { |
| const int reorder_col[5] = { 4, 0, 1, 2, 3 }; |
| return reorder_col[idx]; |
| } |
| |
| static inline int linear_index_is_offset(int idx) |
| { |
| return (idx == 0); |
| } |
| |
| static inline int linear_index_to_vx(int idx) |
| { |
| /* The offset shouldn't map to any vx, but to please UBSan we map |
| * it to 0. */ |
| if (linear_index_is_offset(idx)) |
| return 0; |
| return (idx - 1); |
| } |
| |
| /** |
| * These values will be used by ops_asmgen to access fields inside of |
| * SwsOpExec and SwsOpImpl. The sizes are checked below when compiling |
| * for AArch64 to make sure there is no mismatch. |
| */ |
| #define offsetof_exec_in 0 |
| #define offsetof_exec_out 32 |
| #define offsetof_exec_in_bump 128 |
| #define offsetof_exec_out_bump 160 |
| #define offsetof_impl_cont 0 |
| #define offsetof_impl_priv 16 |
| #define sizeof_impl 32 |
| |
| #if ARCH_AARCH64 && HAVE_NEON |
| static_assert(offsetof_exec_in == offsetof(SwsOpExec, in), "SwsOpExec layout mismatch"); |
| static_assert(offsetof_exec_out == offsetof(SwsOpExec, out), "SwsOpExec layout mismatch"); |
| static_assert(offsetof_exec_in_bump == offsetof(SwsOpExec, in_bump), "SwsOpExec layout mismatch"); |
| static_assert(offsetof_exec_out_bump == offsetof(SwsOpExec, out_bump), "SwsOpExec layout mismatch"); |
| static_assert(offsetof_impl_cont == offsetof(SwsOpImpl, cont), "SwsOpImpl layout mismatch"); |
| static_assert(offsetof_impl_priv == offsetof(SwsOpImpl, priv), "SwsOpImpl layout mismatch"); |
| #endif |
| |
| #endif /* SWSCALE_AARCH64_OPS_IMPL_H */ |