libswscale/aarch64/ops_impl_conv.c - third_party/ffmpeg - Git at Google

 /*
  * Copyright (C) 2026 Ramiro Polla
  *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */

 /**
  * NOTE: This file is #include'd directly by both the NEON backend and
  *       the sws_ops_aarch64 tool.
  */

 #include "libavutil/error.h"
 #include "libavutil/rational.h"
 #include "libswscale/ops.h"

 #include "ops_impl.h"

 static uint8_t sws_pixel_to_aarch64(SwsPixelType type)
 {
     switch (type) {
     case SWS_PIXEL_U8:  return AARCH64_PIXEL_U8;
     case SWS_PIXEL_U16: return AARCH64_PIXEL_U16;
     case SWS_PIXEL_U32: return AARCH64_PIXEL_U32;
     case SWS_PIXEL_F32: return AARCH64_PIXEL_F32;
     }
     return 0;
 }

 /**
  * The column index order for SwsLinearOp.mask follows the affine transform
  * order, where the offset is the last element. SwsAArch64LinearOpMask, on
  * the other hand, follows execution order, where the offset is the first
  * element.
  */
 static int linear_index_from_sws_op(int idx)
 {
     const int reorder_col[5] = { 1, 2, 3, 4, 0 };
     return reorder_col[idx];
 }

 /**
  * Convert SwsOp to a SwsAArch64OpImplParams. Read the comments regarding
  * SwsAArch64OpImplParams in ops_impl.h for more information.
  */
 static int convert_to_aarch64_impl(SwsContext *ctx, const SwsOpList *ops, int n,
                                    int block_size, SwsAArch64OpImplParams *out)
 {
     const SwsOp *op = &ops->ops[n];
     const SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : op;

     out->block_size = block_size;

     /**
      * Most SwsOp work on fields described by next->comps.unused.
      * The few that don't will override this field later.
      */
     out->mask = 0;
     for (int i = 0; i < 4; i++) {
         if (!next->comps.unused[i])
             MASK_SET(out->mask, i, 1);
     }

     out->type = sws_pixel_to_aarch64(op->type);

     /* Map SwsOpType to SwsAArch64OpType */
     switch (op->op) {
     case SWS_OP_READ:
         if (op->rw.filter)
             return AVERROR(ENOTSUP);
         /**
          * The different types of read operations have been split into
          * their own SwsAArch64OpType to simplify the implementation.
          */
         if (op->rw.frac == 1)
             out->op = AARCH64_SWS_OP_READ_NIBBLE;
         else if (op->rw.frac == 3)
             out->op = AARCH64_SWS_OP_READ_BIT;
         else if (op->rw.packed && op->rw.elems != 1)
             out->op = AARCH64_SWS_OP_READ_PACKED;
         else
             out->op = AARCH64_SWS_OP_READ_PLANAR;
         break;
     case SWS_OP_WRITE:
         if (op->rw.filter)
             return AVERROR(ENOTSUP);
         /**
          * The different types of write operations have been split into
          * their own SwsAArch64OpType to simplify the implementation.
          */
         if (op->rw.frac == 1)
             out->op = AARCH64_SWS_OP_WRITE_NIBBLE;
         else if (op->rw.frac == 3)
             out->op = AARCH64_SWS_OP_WRITE_BIT;
         else if (op->rw.packed && op->rw.elems != 1)
             out->op = AARCH64_SWS_OP_WRITE_PACKED;
         else
             out->op = AARCH64_SWS_OP_WRITE_PLANAR;
         break;
     case SWS_OP_SWAP_BYTES: out->op = AARCH64_SWS_OP_SWAP_BYTES; break;
     case SWS_OP_SWIZZLE:    out->op = AARCH64_SWS_OP_SWIZZLE;    break;
     case SWS_OP_UNPACK:     out->op = AARCH64_SWS_OP_UNPACK;     break;
     case SWS_OP_PACK:       out->op = AARCH64_SWS_OP_PACK;       break;
     case SWS_OP_LSHIFT:     out->op = AARCH64_SWS_OP_LSHIFT;     break;
     case SWS_OP_RSHIFT:     out->op = AARCH64_SWS_OP_RSHIFT;     break;
     case SWS_OP_CLEAR:      out->op = AARCH64_SWS_OP_CLEAR;      break;
     case SWS_OP_CONVERT:
         out->op = op->convert.expand ? AARCH64_SWS_OP_EXPAND : AARCH64_SWS_OP_CONVERT;
         break;
     case SWS_OP_MIN:        out->op = AARCH64_SWS_OP_MIN;        break;
     case SWS_OP_MAX:        out->op = AARCH64_SWS_OP_MAX;        break;
     case SWS_OP_SCALE:      out->op = AARCH64_SWS_OP_SCALE;      break;
     case SWS_OP_LINEAR:     out->op = AARCH64_SWS_OP_LINEAR;     break;
     case SWS_OP_DITHER:     out->op = AARCH64_SWS_OP_DITHER;     break;
     }

     switch (out->op) {
     case AARCH64_SWS_OP_READ_BIT:
     case AARCH64_SWS_OP_READ_NIBBLE:
     case AARCH64_SWS_OP_READ_PACKED:
     case AARCH64_SWS_OP_READ_PLANAR:
     case AARCH64_SWS_OP_WRITE_BIT:
     case AARCH64_SWS_OP_WRITE_NIBBLE:
     case AARCH64_SWS_OP_WRITE_PACKED:
     case AARCH64_SWS_OP_WRITE_PLANAR:
         switch (op->rw.elems) {
         case 1: out->mask = 0x0001; break;
         case 2: out->mask = 0x0011; break;
         case 3: out->mask = 0x0111; break;
         case 4: out->mask = 0x1111; break;
         };
         break;
     case AARCH64_SWS_OP_SWAP_BYTES:
         /* Only the element size matters, not the type. */
         if (out->type == AARCH64_PIXEL_F32)
             out->type = AARCH64_PIXEL_U32;
         break;
     case AARCH64_SWS_OP_SWIZZLE:
         out->mask = 0;
         MASK_SET(out->mask, 0, op->swizzle.in[0] != 0);
         MASK_SET(out->mask, 1, op->swizzle.in[1] != 1);
         MASK_SET(out->mask, 2, op->swizzle.in[2] != 2);
         MASK_SET(out->mask, 3, op->swizzle.in[3] != 3);
         MASK_SET(out->swizzle, 0, op->swizzle.in[0]);
         MASK_SET(out->swizzle, 1, op->swizzle.in[1]);
         MASK_SET(out->swizzle, 2, op->swizzle.in[2]);
         MASK_SET(out->swizzle, 3, op->swizzle.in[3]);
         /* The element size and type don't matter. */
         out->block_size = block_size * ff_sws_pixel_type_size(op->type);
         out->type = AARCH64_PIXEL_U8;
         break;
     case AARCH64_SWS_OP_UNPACK:
         MASK_SET(out->pack, 0, op->pack.pattern[0]);
         MASK_SET(out->pack, 1, op->pack.pattern[1]);
         MASK_SET(out->pack, 2, op->pack.pattern[2]);
         MASK_SET(out->pack, 3, op->pack.pattern[3]);
         break;
     case AARCH64_SWS_OP_PACK:
         out->mask = 0;
         MASK_SET(out->mask, 0, !op->comps.unused[0]);
         MASK_SET(out->mask, 1, !op->comps.unused[1]);
         MASK_SET(out->mask, 2, !op->comps.unused[2]);
         MASK_SET(out->mask, 3, !op->comps.unused[3]);
         MASK_SET(out->pack, 0, op->pack.pattern[0]);
         MASK_SET(out->pack, 1, op->pack.pattern[1]);
         MASK_SET(out->pack, 2, op->pack.pattern[2]);
         MASK_SET(out->pack, 3, op->pack.pattern[3]);
         break;
     case AARCH64_SWS_OP_LSHIFT:
     case AARCH64_SWS_OP_RSHIFT:
         out->shift = op->shift.amount;
         break;
     case AARCH64_SWS_OP_CLEAR:
         out->mask = 0;
         MASK_SET(out->mask, 0, !!op->clear.value[0].den);
         MASK_SET(out->mask, 1, !!op->clear.value[1].den);
         MASK_SET(out->mask, 2, !!op->clear.value[2].den);
         MASK_SET(out->mask, 3, !!op->clear.value[3].den);
         break;
     case AARCH64_SWS_OP_EXPAND:
     case AARCH64_SWS_OP_CONVERT:
         out->to_type = sws_pixel_to_aarch64(op->convert.to);
         break;
     case AARCH64_SWS_OP_LINEAR:
         /**
          * The out->linear.mask field packs the 4x5 matrix from SwsLinearOp as
          * 2 bits per element:
          *   00: m[i][j] == 0
          *   01: m[i][j] == 1
          *   11: m[i][j] is any other coefficient
          */
         out->mask = 0;
         for (int i = 0; i < 4; i++) {
             /* Skip unused or identity rows */
             if (op->comps.unused[i] || !(op->lin.mask & SWS_MASK_ROW(i)))
                 continue;
             MASK_SET(out->mask, i, 1);
             for (int j = 0; j < 5; j++) {
                 int jj = linear_index_from_sws_op(j);
                 if (!av_cmp_q(op->lin.m[i][j], av_make_q(1, 1)))
                     LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_1);
                 else if (av_cmp_q(op->lin.m[i][j], av_make_q(0, 1)))
                     LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_X);
             }
         }
         out->linear.fmla = !(ctx->flags & SWS_BITEXACT);
         break;
     case AARCH64_SWS_OP_DITHER:
         out->mask = 0;
         MASK_SET(out->mask, 0, op->dither.y_offset[0] >= 0);
         MASK_SET(out->mask, 1, op->dither.y_offset[1] >= 0);
         MASK_SET(out->mask, 2, op->dither.y_offset[2] >= 0);
         MASK_SET(out->mask, 3, op->dither.y_offset[3] >= 0);
         MASK_SET(out->dither.y_offset, 0, op->dither.y_offset[0]);
         MASK_SET(out->dither.y_offset, 1, op->dither.y_offset[1]);
         MASK_SET(out->dither.y_offset, 2, op->dither.y_offset[2]);
         MASK_SET(out->dither.y_offset, 3, op->dither.y_offset[3]);
         out->dither.size_log2 = op->dither.size_log2;
         break;
     }

     return 0;
 }
	/*
	* Copyright (C) 2026 Ramiro Polla
	*
	* This file is part of FFmpeg.
	*
	* FFmpeg is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* FFmpeg is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with FFmpeg; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	*/

	/**
	* NOTE: This file is #include'd directly by both the NEON backend and
	* the sws_ops_aarch64 tool.
	*/

	#include "libavutil/error.h"
	#include "libavutil/rational.h"
	#include "libswscale/ops.h"

	#include "ops_impl.h"

	static uint8_t sws_pixel_to_aarch64(SwsPixelType type)
	{
	switch (type) {
	case SWS_PIXEL_U8: return AARCH64_PIXEL_U8;
	case SWS_PIXEL_U16: return AARCH64_PIXEL_U16;
	case SWS_PIXEL_U32: return AARCH64_PIXEL_U32;
	case SWS_PIXEL_F32: return AARCH64_PIXEL_F32;
	}
	return 0;
	}

	/**
	* The column index order for SwsLinearOp.mask follows the affine transform
	* order, where the offset is the last element. SwsAArch64LinearOpMask, on
	* the other hand, follows execution order, where the offset is the first
	* element.
	*/
	static int linear_index_from_sws_op(int idx)
	{
	const int reorder_col[5] = { 1, 2, 3, 4, 0 };
	return reorder_col[idx];
	}

	/**
	* Convert SwsOp to a SwsAArch64OpImplParams. Read the comments regarding
	* SwsAArch64OpImplParams in ops_impl.h for more information.
	*/
	static int convert_to_aarch64_impl(SwsContext ctx, const SwsOpList ops, int n,
	int block_size, SwsAArch64OpImplParams *out)
	{
	const SwsOp *op = &ops->ops[n];
	const SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : op;

	out->block_size = block_size;

	/**
	* Most SwsOp work on fields described by next->comps.unused.
	* The few that don't will override this field later.
	*/
	out->mask = 0;
	for (int i = 0; i < 4; i++) {
	if (!next->comps.unused[i])
	MASK_SET(out->mask, i, 1);
	}

	out->type = sws_pixel_to_aarch64(op->type);

	/* Map SwsOpType to SwsAArch64OpType */
	switch (op->op) {
	case SWS_OP_READ:
	if (op->rw.filter)
	return AVERROR(ENOTSUP);
	/**
	* The different types of read operations have been split into
	* their own SwsAArch64OpType to simplify the implementation.
	*/
	if (op->rw.frac == 1)
	out->op = AARCH64_SWS_OP_READ_NIBBLE;
	else if (op->rw.frac == 3)
	out->op = AARCH64_SWS_OP_READ_BIT;
	else if (op->rw.packed && op->rw.elems != 1)
	out->op = AARCH64_SWS_OP_READ_PACKED;
	else
	out->op = AARCH64_SWS_OP_READ_PLANAR;
	break;
	case SWS_OP_WRITE:
	if (op->rw.filter)
	return AVERROR(ENOTSUP);
	/**
	* The different types of write operations have been split into
	* their own SwsAArch64OpType to simplify the implementation.
	*/
	if (op->rw.frac == 1)
	out->op = AARCH64_SWS_OP_WRITE_NIBBLE;
	else if (op->rw.frac == 3)
	out->op = AARCH64_SWS_OP_WRITE_BIT;
	else if (op->rw.packed && op->rw.elems != 1)
	out->op = AARCH64_SWS_OP_WRITE_PACKED;
	else
	out->op = AARCH64_SWS_OP_WRITE_PLANAR;
	break;
	case SWS_OP_SWAP_BYTES: out->op = AARCH64_SWS_OP_SWAP_BYTES; break;
	case SWS_OP_SWIZZLE: out->op = AARCH64_SWS_OP_SWIZZLE; break;
	case SWS_OP_UNPACK: out->op = AARCH64_SWS_OP_UNPACK; break;
	case SWS_OP_PACK: out->op = AARCH64_SWS_OP_PACK; break;
	case SWS_OP_LSHIFT: out->op = AARCH64_SWS_OP_LSHIFT; break;
	case SWS_OP_RSHIFT: out->op = AARCH64_SWS_OP_RSHIFT; break;
	case SWS_OP_CLEAR: out->op = AARCH64_SWS_OP_CLEAR; break;
	case SWS_OP_CONVERT:
	out->op = op->convert.expand ? AARCH64_SWS_OP_EXPAND : AARCH64_SWS_OP_CONVERT;
	break;
	case SWS_OP_MIN: out->op = AARCH64_SWS_OP_MIN; break;
	case SWS_OP_MAX: out->op = AARCH64_SWS_OP_MAX; break;
	case SWS_OP_SCALE: out->op = AARCH64_SWS_OP_SCALE; break;
	case SWS_OP_LINEAR: out->op = AARCH64_SWS_OP_LINEAR; break;
	case SWS_OP_DITHER: out->op = AARCH64_SWS_OP_DITHER; break;
	}

	switch (out->op) {
	case AARCH64_SWS_OP_READ_BIT:
	case AARCH64_SWS_OP_READ_NIBBLE:
	case AARCH64_SWS_OP_READ_PACKED:
	case AARCH64_SWS_OP_READ_PLANAR:
	case AARCH64_SWS_OP_WRITE_BIT:
	case AARCH64_SWS_OP_WRITE_NIBBLE:
	case AARCH64_SWS_OP_WRITE_PACKED:
	case AARCH64_SWS_OP_WRITE_PLANAR:
	switch (op->rw.elems) {
	case 1: out->mask = 0x0001; break;
	case 2: out->mask = 0x0011; break;
	case 3: out->mask = 0x0111; break;
	case 4: out->mask = 0x1111; break;
	};
	break;
	case AARCH64_SWS_OP_SWAP_BYTES:
	/* Only the element size matters, not the type. */
	if (out->type == AARCH64_PIXEL_F32)
	out->type = AARCH64_PIXEL_U32;
	break;
	case AARCH64_SWS_OP_SWIZZLE:
	out->mask = 0;
	MASK_SET(out->mask, 0, op->swizzle.in[0] != 0);
	MASK_SET(out->mask, 1, op->swizzle.in[1] != 1);
	MASK_SET(out->mask, 2, op->swizzle.in[2] != 2);
	MASK_SET(out->mask, 3, op->swizzle.in[3] != 3);
	MASK_SET(out->swizzle, 0, op->swizzle.in[0]);
	MASK_SET(out->swizzle, 1, op->swizzle.in[1]);
	MASK_SET(out->swizzle, 2, op->swizzle.in[2]);
	MASK_SET(out->swizzle, 3, op->swizzle.in[3]);
	/* The element size and type don't matter. */
	out->block_size = block_size * ff_sws_pixel_type_size(op->type);
	out->type = AARCH64_PIXEL_U8;
	break;
	case AARCH64_SWS_OP_UNPACK:
	MASK_SET(out->pack, 0, op->pack.pattern[0]);
	MASK_SET(out->pack, 1, op->pack.pattern[1]);
	MASK_SET(out->pack, 2, op->pack.pattern[2]);
	MASK_SET(out->pack, 3, op->pack.pattern[3]);
	break;
	case AARCH64_SWS_OP_PACK:
	out->mask = 0;
	MASK_SET(out->mask, 0, !op->comps.unused[0]);
	MASK_SET(out->mask, 1, !op->comps.unused[1]);
	MASK_SET(out->mask, 2, !op->comps.unused[2]);
	MASK_SET(out->mask, 3, !op->comps.unused[3]);
	MASK_SET(out->pack, 0, op->pack.pattern[0]);
	MASK_SET(out->pack, 1, op->pack.pattern[1]);
	MASK_SET(out->pack, 2, op->pack.pattern[2]);
	MASK_SET(out->pack, 3, op->pack.pattern[3]);
	break;
	case AARCH64_SWS_OP_LSHIFT:
	case AARCH64_SWS_OP_RSHIFT:
	out->shift = op->shift.amount;
	break;
	case AARCH64_SWS_OP_CLEAR:
	out->mask = 0;
	MASK_SET(out->mask, 0, !!op->clear.value[0].den);
	MASK_SET(out->mask, 1, !!op->clear.value[1].den);
	MASK_SET(out->mask, 2, !!op->clear.value[2].den);
	MASK_SET(out->mask, 3, !!op->clear.value[3].den);
	break;
	case AARCH64_SWS_OP_EXPAND:
	case AARCH64_SWS_OP_CONVERT:
	out->to_type = sws_pixel_to_aarch64(op->convert.to);
	break;
	case AARCH64_SWS_OP_LINEAR:
	/**
	* The out->linear.mask field packs the 4x5 matrix from SwsLinearOp as
	* 2 bits per element:
	* 00: m[i][j] == 0
	* 01: m[i][j] == 1
	* 11: m[i][j] is any other coefficient
	*/
	out->mask = 0;
	for (int i = 0; i < 4; i++) {
	/* Skip unused or identity rows */
	if (op->comps.unused[i] \|\| !(op->lin.mask & SWS_MASK_ROW(i)))
	continue;
	MASK_SET(out->mask, i, 1);
	for (int j = 0; j < 5; j++) {
	int jj = linear_index_from_sws_op(j);
	if (!av_cmp_q(op->lin.m[i][j], av_make_q(1, 1)))
	LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_1);
	else if (av_cmp_q(op->lin.m[i][j], av_make_q(0, 1)))
	LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_X);
	}
	}
	out->linear.fmla = !(ctx->flags & SWS_BITEXACT);
	break;
	case AARCH64_SWS_OP_DITHER:
	out->mask = 0;
	MASK_SET(out->mask, 0, op->dither.y_offset[0] >= 0);
	MASK_SET(out->mask, 1, op->dither.y_offset[1] >= 0);
	MASK_SET(out->mask, 2, op->dither.y_offset[2] >= 0);
	MASK_SET(out->mask, 3, op->dither.y_offset[3] >= 0);
	MASK_SET(out->dither.y_offset, 0, op->dither.y_offset[0]);
	MASK_SET(out->dither.y_offset, 1, op->dither.y_offset[1]);
	MASK_SET(out->dither.y_offset, 2, op->dither.y_offset[2]);
	MASK_SET(out->dither.y_offset, 3, op->dither.y_offset[3]);
	out->dither.size_log2 = op->dither.size_log2;
	break;
	}

	return 0;
	}