blob: 8f37af01018b8b751b7e45a1964bc153f296b748 [file]
/*
* Copyright (C) 2026 Ramiro Polla
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* NOTE: This file is #include'd directly by both the NEON backend and
* the sws_ops_aarch64 tool.
*/
#include "libavutil/error.h"
#include "libavutil/rational.h"
#include "libswscale/ops.h"
#include "ops_impl.h"
static uint8_t sws_pixel_to_aarch64(SwsPixelType type)
{
switch (type) {
case SWS_PIXEL_U8: return AARCH64_PIXEL_U8;
case SWS_PIXEL_U16: return AARCH64_PIXEL_U16;
case SWS_PIXEL_U32: return AARCH64_PIXEL_U32;
case SWS_PIXEL_F32: return AARCH64_PIXEL_F32;
}
return 0;
}
/**
* The column index order for SwsLinearOp.mask follows the affine transform
* order, where the offset is the last element. SwsAArch64LinearOpMask, on
* the other hand, follows execution order, where the offset is the first
* element.
*/
static int linear_index_from_sws_op(int idx)
{
const int reorder_col[5] = { 1, 2, 3, 4, 0 };
return reorder_col[idx];
}
/**
* Convert SwsOp to a SwsAArch64OpImplParams. Read the comments regarding
* SwsAArch64OpImplParams in ops_impl.h for more information.
*/
static int convert_to_aarch64_impl(SwsContext *ctx, const SwsOpList *ops, int n,
int block_size, SwsAArch64OpImplParams *out)
{
const SwsOp *op = &ops->ops[n];
const SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : op;
out->block_size = block_size;
/**
* Most SwsOp work on fields described by next->comps.unused.
* The few that don't will override this field later.
*/
out->mask = 0;
for (int i = 0; i < 4; i++) {
if (!next->comps.unused[i])
MASK_SET(out->mask, i, 1);
}
out->type = sws_pixel_to_aarch64(op->type);
/* Map SwsOpType to SwsAArch64OpType */
switch (op->op) {
case SWS_OP_READ:
if (op->rw.filter)
return AVERROR(ENOTSUP);
/**
* The different types of read operations have been split into
* their own SwsAArch64OpType to simplify the implementation.
*/
if (op->rw.frac == 1)
out->op = AARCH64_SWS_OP_READ_NIBBLE;
else if (op->rw.frac == 3)
out->op = AARCH64_SWS_OP_READ_BIT;
else if (op->rw.packed && op->rw.elems != 1)
out->op = AARCH64_SWS_OP_READ_PACKED;
else
out->op = AARCH64_SWS_OP_READ_PLANAR;
break;
case SWS_OP_WRITE:
if (op->rw.filter)
return AVERROR(ENOTSUP);
/**
* The different types of write operations have been split into
* their own SwsAArch64OpType to simplify the implementation.
*/
if (op->rw.frac == 1)
out->op = AARCH64_SWS_OP_WRITE_NIBBLE;
else if (op->rw.frac == 3)
out->op = AARCH64_SWS_OP_WRITE_BIT;
else if (op->rw.packed && op->rw.elems != 1)
out->op = AARCH64_SWS_OP_WRITE_PACKED;
else
out->op = AARCH64_SWS_OP_WRITE_PLANAR;
break;
case SWS_OP_SWAP_BYTES: out->op = AARCH64_SWS_OP_SWAP_BYTES; break;
case SWS_OP_SWIZZLE: out->op = AARCH64_SWS_OP_SWIZZLE; break;
case SWS_OP_UNPACK: out->op = AARCH64_SWS_OP_UNPACK; break;
case SWS_OP_PACK: out->op = AARCH64_SWS_OP_PACK; break;
case SWS_OP_LSHIFT: out->op = AARCH64_SWS_OP_LSHIFT; break;
case SWS_OP_RSHIFT: out->op = AARCH64_SWS_OP_RSHIFT; break;
case SWS_OP_CLEAR: out->op = AARCH64_SWS_OP_CLEAR; break;
case SWS_OP_CONVERT:
out->op = op->convert.expand ? AARCH64_SWS_OP_EXPAND : AARCH64_SWS_OP_CONVERT;
break;
case SWS_OP_MIN: out->op = AARCH64_SWS_OP_MIN; break;
case SWS_OP_MAX: out->op = AARCH64_SWS_OP_MAX; break;
case SWS_OP_SCALE: out->op = AARCH64_SWS_OP_SCALE; break;
case SWS_OP_LINEAR: out->op = AARCH64_SWS_OP_LINEAR; break;
case SWS_OP_DITHER: out->op = AARCH64_SWS_OP_DITHER; break;
}
switch (out->op) {
case AARCH64_SWS_OP_READ_BIT:
case AARCH64_SWS_OP_READ_NIBBLE:
case AARCH64_SWS_OP_READ_PACKED:
case AARCH64_SWS_OP_READ_PLANAR:
case AARCH64_SWS_OP_WRITE_BIT:
case AARCH64_SWS_OP_WRITE_NIBBLE:
case AARCH64_SWS_OP_WRITE_PACKED:
case AARCH64_SWS_OP_WRITE_PLANAR:
switch (op->rw.elems) {
case 1: out->mask = 0x0001; break;
case 2: out->mask = 0x0011; break;
case 3: out->mask = 0x0111; break;
case 4: out->mask = 0x1111; break;
};
break;
case AARCH64_SWS_OP_SWAP_BYTES:
/* Only the element size matters, not the type. */
if (out->type == AARCH64_PIXEL_F32)
out->type = AARCH64_PIXEL_U32;
break;
case AARCH64_SWS_OP_SWIZZLE:
out->mask = 0;
MASK_SET(out->mask, 0, op->swizzle.in[0] != 0);
MASK_SET(out->mask, 1, op->swizzle.in[1] != 1);
MASK_SET(out->mask, 2, op->swizzle.in[2] != 2);
MASK_SET(out->mask, 3, op->swizzle.in[3] != 3);
MASK_SET(out->swizzle, 0, op->swizzle.in[0]);
MASK_SET(out->swizzle, 1, op->swizzle.in[1]);
MASK_SET(out->swizzle, 2, op->swizzle.in[2]);
MASK_SET(out->swizzle, 3, op->swizzle.in[3]);
/* The element size and type don't matter. */
out->block_size = block_size * ff_sws_pixel_type_size(op->type);
out->type = AARCH64_PIXEL_U8;
break;
case AARCH64_SWS_OP_UNPACK:
MASK_SET(out->pack, 0, op->pack.pattern[0]);
MASK_SET(out->pack, 1, op->pack.pattern[1]);
MASK_SET(out->pack, 2, op->pack.pattern[2]);
MASK_SET(out->pack, 3, op->pack.pattern[3]);
break;
case AARCH64_SWS_OP_PACK:
out->mask = 0;
MASK_SET(out->mask, 0, !op->comps.unused[0]);
MASK_SET(out->mask, 1, !op->comps.unused[1]);
MASK_SET(out->mask, 2, !op->comps.unused[2]);
MASK_SET(out->mask, 3, !op->comps.unused[3]);
MASK_SET(out->pack, 0, op->pack.pattern[0]);
MASK_SET(out->pack, 1, op->pack.pattern[1]);
MASK_SET(out->pack, 2, op->pack.pattern[2]);
MASK_SET(out->pack, 3, op->pack.pattern[3]);
break;
case AARCH64_SWS_OP_LSHIFT:
case AARCH64_SWS_OP_RSHIFT:
out->shift = op->shift.amount;
break;
case AARCH64_SWS_OP_CLEAR:
out->mask = 0;
MASK_SET(out->mask, 0, !!op->clear.value[0].den);
MASK_SET(out->mask, 1, !!op->clear.value[1].den);
MASK_SET(out->mask, 2, !!op->clear.value[2].den);
MASK_SET(out->mask, 3, !!op->clear.value[3].den);
break;
case AARCH64_SWS_OP_EXPAND:
case AARCH64_SWS_OP_CONVERT:
out->to_type = sws_pixel_to_aarch64(op->convert.to);
break;
case AARCH64_SWS_OP_LINEAR:
/**
* The out->linear.mask field packs the 4x5 matrix from SwsLinearOp as
* 2 bits per element:
* 00: m[i][j] == 0
* 01: m[i][j] == 1
* 11: m[i][j] is any other coefficient
*/
out->mask = 0;
for (int i = 0; i < 4; i++) {
/* Skip unused or identity rows */
if (op->comps.unused[i] || !(op->lin.mask & SWS_MASK_ROW(i)))
continue;
MASK_SET(out->mask, i, 1);
for (int j = 0; j < 5; j++) {
int jj = linear_index_from_sws_op(j);
if (!av_cmp_q(op->lin.m[i][j], av_make_q(1, 1)))
LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_1);
else if (av_cmp_q(op->lin.m[i][j], av_make_q(0, 1)))
LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_X);
}
}
out->linear.fmla = !(ctx->flags & SWS_BITEXACT);
break;
case AARCH64_SWS_OP_DITHER:
out->mask = 0;
MASK_SET(out->mask, 0, op->dither.y_offset[0] >= 0);
MASK_SET(out->mask, 1, op->dither.y_offset[1] >= 0);
MASK_SET(out->mask, 2, op->dither.y_offset[2] >= 0);
MASK_SET(out->mask, 3, op->dither.y_offset[3] >= 0);
MASK_SET(out->dither.y_offset, 0, op->dither.y_offset[0]);
MASK_SET(out->dither.y_offset, 1, op->dither.y_offset[1]);
MASK_SET(out->dither.y_offset, 2, op->dither.y_offset[2]);
MASK_SET(out->dither.y_offset, 3, op->dither.y_offset[3]);
out->dither.size_log2 = op->dither.size_log2;
break;
}
return 0;
}