| /** |
| * Copyright (C) 2025 Niklas Haas |
| * |
| * This file is part of FFmpeg. |
| * |
| * FFmpeg is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * FFmpeg is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with FFmpeg; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| #include "libavutil/avassert.h" |
| #include "libavutil/mem.h" |
| #include "libavutil/rational.h" |
| |
| #include "ops_chain.h" |
| |
| #define Q(N) ((AVRational) { N, 1 }) |
| |
| SwsOpChain *ff_sws_op_chain_alloc(void) |
| { |
| return av_mallocz(sizeof(SwsOpChain)); |
| } |
| |
| void ff_sws_op_chain_free_cb(void *ptr) |
| { |
| if (!ptr) |
| return; |
| |
| SwsOpChain *chain = ptr; |
| for (int i = 0; i < chain->num_impl + 1; i++) { |
| if (chain->free[i]) |
| chain->free[i](chain->impl[i].priv.ptr); |
| } |
| |
| av_free(chain); |
| } |
| |
| int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, |
| void (*free)(void *), const SwsOpPriv *priv) |
| { |
| const int idx = chain->num_impl; |
| if (idx == SWS_MAX_OPS) |
| return AVERROR(EINVAL); |
| |
| av_assert1(func); |
| chain->impl[idx].cont = func; |
| chain->impl[idx + 1].priv = *priv; |
| chain->free[idx + 1] = free; |
| chain->num_impl++; |
| return 0; |
| } |
| |
| /** |
| * Match an operation against a reference operation. Returns a score for how |
| * well the reference matches the operation, or 0 if there is no match. |
| * |
| * If `ref->comps` has any flags set, they must be set in `op` as well. |
| * Likewise, if `ref->comps` has any components marked as unused, they must be |
| * marked as as unused in `ops` as well. |
| * |
| * For SWS_OP_LINEAR, `ref->linear.mask` must be a strict superset of |
| * `op->linear.mask`, but may not contain any columns explicitly ignored by |
| * `op->comps.unused`. |
| * |
| * For SWS_OP_READ, SWS_OP_WRITE, SWS_OP_SWAP_BYTES and SWS_OP_SWIZZLE, the |
| * exact type is not checked, just the size. |
| * |
| * Components set in `next.unused` are ignored when matching. If `flexible` |
| * is true, the op body is ignored - only the operation, pixel type, and |
| * component masks are checked. |
| */ |
| static int op_match(const SwsOp *op, const SwsOpEntry *entry, const SwsComps next) |
| { |
| int score = 10; |
| if (op->op != entry->op) |
| return 0; |
| |
| switch (op->op) { |
| case SWS_OP_READ: |
| case SWS_OP_WRITE: |
| case SWS_OP_SWAP_BYTES: |
| case SWS_OP_SWIZZLE: |
| /* Only the size matters for these operations */ |
| if (ff_sws_pixel_type_size(op->type) != ff_sws_pixel_type_size(entry->type)) |
| return 0; |
| break; |
| default: |
| if (op->type != entry->type) |
| return 0; |
| break; |
| } |
| |
| for (int i = 0; i < 4; i++) { |
| if (entry->unused[i]) { |
| if (op->comps.unused[i]) |
| score += 1; /* Operating on fewer components is better .. */ |
| else |
| return 0; /* .. but not too few! */ |
| } |
| } |
| |
| if (op->op == SWS_OP_CLEAR) { |
| /* Clear pattern must match exactly, regardless of `entry->flexible` */ |
| for (int i = 0; i < 4; i++) { |
| if (!next.unused[i] && entry->unused[i] != !!op->c.q4[i].den) |
| return 0; |
| } |
| } |
| |
| /* Flexible variants always match, but lower the score to prioritize more |
| * specific implementations if they exist */ |
| if (entry->flexible) |
| return score - 5; |
| |
| switch (op->op) { |
| case SWS_OP_INVALID: |
| return 0; |
| case SWS_OP_READ: |
| case SWS_OP_WRITE: |
| if (op->rw.elems != entry->rw.elems || |
| op->rw.frac != entry->rw.frac || |
| (op->rw.elems > 1 && op->rw.packed != entry->rw.packed)) |
| return 0; |
| return score; |
| case SWS_OP_SWAP_BYTES: |
| return score; |
| case SWS_OP_PACK: |
| case SWS_OP_UNPACK: |
| for (int i = 0; i < 4 && op->pack.pattern[i]; i++) { |
| if (op->pack.pattern[i] != entry->pack.pattern[i]) |
| return 0; |
| } |
| return score; |
| case SWS_OP_CLEAR: |
| for (int i = 0; i < 4; i++) { |
| if (!op->c.q4[i].den) |
| continue; |
| if (av_cmp_q(op->c.q4[i], Q(entry->clear_value)) && !next.unused[i]) |
| return 0; |
| } |
| return score; |
| case SWS_OP_LSHIFT: |
| case SWS_OP_RSHIFT: |
| av_assert1(entry->flexible); |
| return score; |
| case SWS_OP_SWIZZLE: |
| for (int i = 0; i < 4; i++) { |
| if (op->swizzle.in[i] != entry->swizzle.in[i] && !next.unused[i]) |
| return 0; |
| } |
| return score; |
| case SWS_OP_CONVERT: |
| if (op->convert.to != entry->convert.to || |
| op->convert.expand != entry->convert.expand) |
| return 0; |
| return score; |
| case SWS_OP_DITHER: |
| return op->dither.size_log2 == entry->dither_size ? score : 0; |
| case SWS_OP_MIN: |
| case SWS_OP_MAX: |
| av_assert1(entry->flexible); |
| return score; |
| case SWS_OP_LINEAR: |
| /* All required elements must be present */ |
| if (op->lin.mask & ~entry->linear_mask) |
| return 0; |
| /* To avoid operating on possibly undefined memory, filter out |
| * implementations that operate on more input components */ |
| for (int i = 0; i < 4; i++) { |
| if ((entry->linear_mask & SWS_MASK_COL(i)) && op->comps.unused[i]) |
| return 0; |
| } |
| /* Prioritize smaller implementations */ |
| score += av_popcount(SWS_MASK_ALL ^ entry->linear_mask); |
| return score; |
| case SWS_OP_SCALE: |
| return score; |
| case SWS_OP_TYPE_NB: |
| break; |
| } |
| |
| av_unreachable("Invalid operation type!"); |
| return 0; |
| } |
| |
| int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables, |
| SwsOpList *ops, const int block_size, |
| SwsOpChain *chain) |
| { |
| static const SwsOp dummy = { .comps.unused = { true, true, true, true }}; |
| const SwsOp *next = ops->num_ops > 1 ? &ops->ops[1] : &dummy; |
| const unsigned cpu_flags = av_get_cpu_flags(); |
| const SwsOpEntry *best = NULL; |
| const SwsOp *op = &ops->ops[0]; |
| int ret, best_score = 0, best_cpu_flags; |
| SwsOpPriv priv = {0}; |
| |
| for (int n = 0; n < num_tables; n++) { |
| const SwsOpTable *table = tables[n]; |
| if (table->block_size && table->block_size != block_size || |
| table->cpu_flags & ~cpu_flags) |
| continue; |
| |
| for (int i = 0; table->entries[i]; i++) { |
| const SwsOpEntry *entry = table->entries[i]; |
| int score = op_match(op, entry, next->comps); |
| if (score > best_score) { |
| best_score = score; |
| best_cpu_flags = table->cpu_flags; |
| best = entry; |
| } |
| } |
| } |
| |
| if (!best) |
| return AVERROR(ENOTSUP); |
| |
| if (best->setup) { |
| ret = best->setup(op, &priv); |
| if (ret < 0) |
| return ret; |
| } |
| |
| chain->cpu_flags |= best_cpu_flags; |
| ret = ff_sws_op_chain_append(chain, best->func, best->free, &priv); |
| if (ret < 0) { |
| if (best->free) |
| best->free(priv.ptr); |
| return ret; |
| } |
| |
| ops->ops++; |
| ops->num_ops--; |
| return ops->num_ops ? AVERROR(EAGAIN) : 0; |
| } |
| |
| #define q2pixel(type, q) ((q).den ? (type) (q).num / (q).den : 0) |
| |
| int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out) |
| { |
| out->u8[0] = op->c.u; |
| return 0; |
| } |
| |
| int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out) |
| { |
| switch (op->type) { |
| case SWS_PIXEL_U8: out->u8[0] = op->c.u; return 0; |
| case SWS_PIXEL_U16: out->u16[0] = op->c.u; return 0; |
| case SWS_PIXEL_U32: out->u32[0] = op->c.u; return 0; |
| case SWS_PIXEL_F32: out->f32[0] = op->c.u; return 0; |
| default: return AVERROR(EINVAL); |
| } |
| } |
| |
| int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out) |
| { |
| switch (op->type) { |
| case SWS_PIXEL_U8: out->u8[0] = q2pixel(uint8_t, op->c.q); return 0; |
| case SWS_PIXEL_U16: out->u16[0] = q2pixel(uint16_t, op->c.q); return 0; |
| case SWS_PIXEL_U32: out->u32[0] = q2pixel(uint32_t, op->c.q); return 0; |
| case SWS_PIXEL_F32: out->f32[0] = q2pixel(float, op->c.q); return 0; |
| default: return AVERROR(EINVAL); |
| } |
| |
| return 0; |
| } |
| |
| int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out) |
| { |
| for (int i = 0; i < 4; i++) { |
| switch (op->type) { |
| case SWS_PIXEL_U8: out->u8[i] = q2pixel(uint8_t, op->c.q4[i]); break; |
| case SWS_PIXEL_U16: out->u16[i] = q2pixel(uint16_t, op->c.q4[i]); break; |
| case SWS_PIXEL_U32: out->u32[i] = q2pixel(uint32_t, op->c.q4[i]); break; |
| case SWS_PIXEL_F32: out->f32[i] = q2pixel(float, op->c.q4[i]); break; |
| default: return AVERROR(EINVAL); |
| } |
| } |
| |
| return 0; |
| } |