| /** |
| * Copyright (C) 2026 Niklas Haas |
| * |
| * This file is part of FFmpeg. |
| * |
| * FFmpeg is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * FFmpeg is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with FFmpeg; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| #include "libavutil/avassert.h" |
| |
| #include "uops_tmpl.h" |
| |
| /** |
| * We want to disable FP contraction because this is a reference backend that |
| * establishes a bit-exact reference result. |
| */ |
| #ifdef __clang__ |
| #pragma STDC FP_CONTRACT OFF |
| #elif AV_GCC_VERSION_AT_LEAST(4, 8) |
| #pragma GCC optimize ("fp-contract=off") |
| #elif defined(_MSC_VER) |
| #pragma fp_contract (off) |
| #endif |
| |
| #if AV_GCC_VERSION_AT_LEAST(4, 4) |
| #pragma GCC optimize ("finite-math-only") |
| #endif |
| |
| /* Integer types */ |
| #define IS_FLOAT 0 |
| # define BIT_DEPTH 8 |
| # include "uops_tmpl.c" |
| # undef BIT_DEPTH |
| # define BIT_DEPTH 16 |
| # include "uops_tmpl.c" |
| # undef BIT_DEPTH |
| # define BIT_DEPTH 32 |
| # include "uops_tmpl.c" |
| # undef BIT_DEPTH |
| #undef IS_FLOAT |
| |
| /* Floating point types */ |
| #define IS_FLOAT 1 |
| # define BIT_DEPTH 32 |
| # include "uops_tmpl.c" |
| # undef BIT_DEPTH |
| #undef IS_FLOAT |
| |
| /* Expanded as new uop types are implemented in the C/template backend */ |
| #define REF_ALL_UOPS(TYPE) \ |
| SWS_FOR(TYPE, READ_PLANAR, REF_ENTRY) \ |
| SWS_FOR(TYPE, READ_PLANAR_FV, REF_ENTRY) \ |
| SWS_FOR(TYPE, READ_PLANAR_FH, REF_ENTRY) \ |
| SWS_FOR(TYPE, READ_PACKED, REF_ENTRY) \ |
| SWS_FOR(TYPE, READ_NIBBLE, REF_ENTRY) \ |
| SWS_FOR(TYPE, READ_BIT, REF_ENTRY) \ |
| SWS_FOR(TYPE, PERMUTE, REF_ENTRY) \ |
| SWS_FOR(TYPE, COPY, REF_ENTRY) \ |
| SWS_FOR(TYPE, WRITE_PLANAR, REF_ENTRY) \ |
| SWS_FOR(TYPE, WRITE_PACKED, REF_ENTRY) \ |
| SWS_FOR(TYPE, WRITE_NIBBLE, REF_ENTRY) \ |
| SWS_FOR(TYPE, WRITE_BIT, REF_ENTRY) \ |
| SWS_FOR(TYPE, SWAP_BYTES, REF_ENTRY) \ |
| SWS_FOR(TYPE, EXPAND_BIT, REF_ENTRY) \ |
| SWS_FOR(TYPE, EXPAND_PAIR, REF_ENTRY) \ |
| SWS_FOR(TYPE, EXPAND_QUAD, REF_ENTRY) \ |
| SWS_FOR(TYPE, TO_U8, REF_ENTRY) \ |
| SWS_FOR(TYPE, TO_U16, REF_ENTRY) \ |
| SWS_FOR(TYPE, TO_U32, REF_ENTRY) \ |
| SWS_FOR(TYPE, TO_F32, REF_ENTRY) \ |
| SWS_FOR(TYPE, SCALE, REF_ENTRY) \ |
| SWS_FOR(TYPE, ADD, REF_ENTRY) \ |
| SWS_FOR(TYPE, MIN, REF_ENTRY) \ |
| SWS_FOR(TYPE, MAX, REF_ENTRY) \ |
| SWS_FOR(TYPE, UNPACK, REF_ENTRY) \ |
| SWS_FOR(TYPE, PACK, REF_ENTRY) \ |
| SWS_FOR(TYPE, LSHIFT, REF_ENTRY) \ |
| SWS_FOR(TYPE, RSHIFT, REF_ENTRY) \ |
| SWS_FOR(TYPE, CLEAR, REF_ENTRY) \ |
| SWS_FOR(TYPE, LINEAR, REF_ENTRY) \ |
| SWS_FOR(TYPE, DITHER, REF_ENTRY) \ |
| /* end of macro */ |
| |
| static const SwsOpTable op_table = { |
| .block_size = SWS_BLOCK_SIZE, |
| .entries = { |
| REF_ALL_UOPS(U8) |
| REF_ALL_UOPS(U16) |
| REF_ALL_UOPS(U32) |
| REF_ALL_UOPS(F32) |
| NULL |
| }, |
| }; |
| |
| static void process(const SwsOpExec *exec, const void *priv, |
| const int bx_start, const int y_start, |
| int bx_end, int y_end) |
| { |
| const SwsOpChain *chain = priv; |
| const SwsOpImpl *impl = chain->impl; |
| block_t x, y, z, w; /* allocate enough space for any intermediate */ |
| |
| SwsOpIter iterdata; |
| SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */ |
| iter->exec = exec; |
| for (int i = 0; i < 4; i++) { |
| iter->in[i] = (uintptr_t) exec->in[i]; |
| iter->out[i] = (uintptr_t) exec->out[i]; |
| } |
| |
| for (iter->y = y_start; iter->y < y_end; iter->y++) { |
| for (int block = bx_start; block < bx_end; block++) { |
| iter->x = block * SWS_BLOCK_SIZE; |
| CONTINUE(&x, &y, &z, &w); |
| } |
| |
| const int y_bump = exec->in_bump_y ? exec->in_bump_y[iter->y] : 0; |
| for (int i = 0; i < 4; i++) { |
| iter->in[i] += exec->in_bump[i] + y_bump * exec->in_stride[i]; |
| iter->out[i] += exec->out_bump[i]; |
| } |
| } |
| } |
| |
| static int compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out) |
| { |
| int ret; |
| |
| SwsOpChain *chain = ff_sws_op_chain_alloc(); |
| if (!chain) |
| return AVERROR(ENOMEM); |
| |
| SwsUOpList *uops = ff_sws_uop_list_alloc(); |
| if (!uops) { |
| ret = AVERROR(ENOMEM); |
| goto fail; |
| } |
| |
| ret = ff_sws_ops_translate(ctx, ops, 0, uops); |
| if (ret < 0) |
| goto fail; |
| |
| av_assert0(uops->num_ops > 0); |
| for (int i = 0; i < uops->num_ops; i++) { |
| const SwsOpTable *table = &op_table; |
| ret = ff_sws_uop_lookup(ctx, &table, 1, &uops->ops[i], |
| SWS_BLOCK_SIZE, chain); |
| if (ret < 0) |
| goto fail; |
| } |
| |
| *out = (SwsCompiledOp) { |
| .slice_align = 1, |
| .block_size = SWS_BLOCK_SIZE, |
| .cpu_flags = chain->cpu_flags, |
| .priv = chain, |
| .free = ff_sws_op_chain_free_cb, |
| .func = process, |
| }; |
| |
| memcpy(out->over_read, chain->over_read, sizeof(out->over_read)); |
| memcpy(out->over_write, chain->over_write, sizeof(out->over_write)); |
| |
| av_log(ctx, AV_LOG_DEBUG, "Compiled micro-ops:\n"); |
| for (int i = 0; i < uops->num_ops; i++) { |
| char name[SWS_UOP_NAME_MAX]; |
| ff_sws_uop_name(&uops->ops[i], name); |
| av_log(ctx, AV_LOG_DEBUG, " %s\n", name); |
| } |
| |
| ff_sws_uop_list_free(&uops); |
| return 0; |
| |
| fail: |
| ff_sws_uop_list_free(&uops); |
| ff_sws_op_chain_free(chain); |
| return ret; |
| } |
| |
| const SwsOpBackend backend_c = { |
| .name = "c", |
| .flags = SWS_BACKEND_C, |
| .compile = compile, |
| .hw_format = AV_PIX_FMT_NONE, |
| }; |