| /** |
| * Copyright (C) 2025 Niklas Haas |
| * |
| * This file is part of FFmpeg. |
| * |
| * FFmpeg is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * FFmpeg is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with FFmpeg; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| #include "libavutil/avassert.h" |
| #include "libavutil/bswap.h" |
| #include "libavutil/rational.h" |
| |
| #include "ops.h" |
| #include "ops_internal.h" |
| |
| #define RET(x) \ |
| do { \ |
| if ((ret = (x)) < 0) \ |
| return ret; \ |
| } while (0) |
| |
| /* Returns true for operations that are independent per channel. These can |
| * usually be commuted freely other such operations. */ |
| static bool op_type_is_independent(SwsOpType op) |
| { |
| switch (op) { |
| case SWS_OP_SWAP_BYTES: |
| case SWS_OP_LSHIFT: |
| case SWS_OP_RSHIFT: |
| case SWS_OP_CONVERT: |
| case SWS_OP_DITHER: |
| case SWS_OP_MIN: |
| case SWS_OP_MAX: |
| case SWS_OP_SCALE: |
| return true; |
| case SWS_OP_INVALID: |
| case SWS_OP_READ: |
| case SWS_OP_WRITE: |
| case SWS_OP_SWIZZLE: |
| case SWS_OP_CLEAR: |
| case SWS_OP_LINEAR: |
| case SWS_OP_PACK: |
| case SWS_OP_UNPACK: |
| return false; |
| case SWS_OP_TYPE_NB: |
| break; |
| } |
| |
| av_unreachable("Invalid operation type!"); |
| return false; |
| } |
| |
| /* merge_comp_flags() forms a monoid with flags_identity as the null element */ |
| static const unsigned flags_identity = SWS_COMP_ZERO | SWS_COMP_EXACT; |
| static unsigned merge_comp_flags(unsigned a, unsigned b) |
| { |
| const unsigned flags_or = SWS_COMP_GARBAGE; |
| const unsigned flags_and = SWS_COMP_ZERO | SWS_COMP_EXACT; |
| return ((a & b) & flags_and) | ((a | b) & flags_or); |
| } |
| |
| /* Infer + propagate known information about components */ |
| void ff_sws_op_list_update_comps(SwsOpList *ops) |
| { |
| SwsComps next = { .unused = {true, true, true, true} }; |
| SwsComps prev = { .flags = { |
| SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, SWS_COMP_GARBAGE, |
| }}; |
| |
| /* Forwards pass, propagates knowledge about the incoming pixel values */ |
| for (int n = 0; n < ops->num_ops; n++) { |
| SwsOp *op = &ops->ops[n]; |
| |
| /* Prefill min/max values automatically; may have to be fixed in |
| * special cases */ |
| memcpy(op->comps.min, prev.min, sizeof(prev.min)); |
| memcpy(op->comps.max, prev.max, sizeof(prev.max)); |
| |
| if (op->op != SWS_OP_SWAP_BYTES) { |
| ff_sws_apply_op_q(op, op->comps.min); |
| ff_sws_apply_op_q(op, op->comps.max); |
| } |
| |
| switch (op->op) { |
| case SWS_OP_READ: |
| for (int i = 0; i < op->rw.elems; i++) { |
| if (ff_sws_pixel_type_is_int(op->type)) { |
| int bits = 8 * ff_sws_pixel_type_size(op->type); |
| if (!op->rw.packed && ops->src.desc) { |
| /* Use legal value range from pixdesc if available; |
| * we don't need to do this for packed formats because |
| * non-byte-aligned packed formats will necessarily go |
| * through SWS_OP_UNPACK anyway */ |
| for (int c = 0; c < 4; c++) { |
| if (ops->src.desc->comp[c].plane == i) { |
| bits = ops->src.desc->comp[c].depth; |
| break; |
| } |
| } |
| } |
| |
| op->comps.flags[i] = SWS_COMP_EXACT; |
| op->comps.min[i] = Q(0); |
| op->comps.max[i] = Q((1ULL << bits) - 1); |
| } |
| } |
| for (int i = op->rw.elems; i < 4; i++) |
| op->comps.flags[i] = prev.flags[i]; |
| break; |
| case SWS_OP_WRITE: |
| for (int i = 0; i < op->rw.elems; i++) |
| av_assert1(!(prev.flags[i] & SWS_COMP_GARBAGE)); |
| /* fall through */ |
| case SWS_OP_SWAP_BYTES: |
| case SWS_OP_LSHIFT: |
| case SWS_OP_RSHIFT: |
| case SWS_OP_MIN: |
| case SWS_OP_MAX: |
| /* Linearly propagate flags per component */ |
| for (int i = 0; i < 4; i++) |
| op->comps.flags[i] = prev.flags[i]; |
| break; |
| case SWS_OP_DITHER: |
| /* Strip zero flag because of the nonzero dithering offset */ |
| for (int i = 0; i < 4; i++) |
| op->comps.flags[i] = prev.flags[i] & ~SWS_COMP_ZERO; |
| break; |
| case SWS_OP_UNPACK: |
| for (int i = 0; i < 4; i++) { |
| if (op->pack.pattern[i]) |
| op->comps.flags[i] = prev.flags[0]; |
| else |
| op->comps.flags[i] = SWS_COMP_GARBAGE; |
| } |
| break; |
| case SWS_OP_PACK: { |
| unsigned flags = flags_identity; |
| for (int i = 0; i < 4; i++) { |
| if (op->pack.pattern[i]) |
| flags = merge_comp_flags(flags, prev.flags[i]); |
| if (i > 0) /* clear remaining comps for sanity */ |
| op->comps.flags[i] = SWS_COMP_GARBAGE; |
| } |
| op->comps.flags[0] = flags; |
| break; |
| } |
| case SWS_OP_CLEAR: |
| for (int i = 0; i < 4; i++) { |
| if (op->c.q4[i].den) { |
| if (op->c.q4[i].num == 0) { |
| op->comps.flags[i] = SWS_COMP_ZERO | SWS_COMP_EXACT; |
| } else if (op->c.q4[i].den == 1) { |
| op->comps.flags[i] = SWS_COMP_EXACT; |
| } |
| } else { |
| op->comps.flags[i] = prev.flags[i]; |
| } |
| } |
| break; |
| case SWS_OP_SWIZZLE: |
| for (int i = 0; i < 4; i++) |
| op->comps.flags[i] = prev.flags[op->swizzle.in[i]]; |
| break; |
| case SWS_OP_CONVERT: |
| for (int i = 0; i < 4; i++) { |
| op->comps.flags[i] = prev.flags[i]; |
| if (ff_sws_pixel_type_is_int(op->convert.to)) |
| op->comps.flags[i] |= SWS_COMP_EXACT; |
| } |
| break; |
| case SWS_OP_LINEAR: |
| for (int i = 0; i < 4; i++) { |
| unsigned flags = flags_identity; |
| AVRational min = Q(0), max = Q(0); |
| for (int j = 0; j < 4; j++) { |
| const AVRational k = op->lin.m[i][j]; |
| AVRational mink = av_mul_q(prev.min[j], k); |
| AVRational maxk = av_mul_q(prev.max[j], k); |
| if (k.num) { |
| flags = merge_comp_flags(flags, prev.flags[j]); |
| if (k.den != 1) /* fractional coefficient */ |
| flags &= ~SWS_COMP_EXACT; |
| if (k.num < 0) |
| FFSWAP(AVRational, mink, maxk); |
| min = av_add_q(min, mink); |
| max = av_add_q(max, maxk); |
| } |
| } |
| if (op->lin.m[i][4].num) { /* nonzero offset */ |
| flags &= ~SWS_COMP_ZERO; |
| if (op->lin.m[i][4].den != 1) /* fractional offset */ |
| flags &= ~SWS_COMP_EXACT; |
| min = av_add_q(min, op->lin.m[i][4]); |
| max = av_add_q(max, op->lin.m[i][4]); |
| } |
| op->comps.flags[i] = flags; |
| op->comps.min[i] = min; |
| op->comps.max[i] = max; |
| } |
| break; |
| case SWS_OP_SCALE: |
| for (int i = 0; i < 4; i++) { |
| op->comps.flags[i] = prev.flags[i]; |
| if (op->c.q.den != 1) /* fractional scale */ |
| op->comps.flags[i] &= ~SWS_COMP_EXACT; |
| if (op->c.q.num < 0) |
| FFSWAP(AVRational, op->comps.min[i], op->comps.max[i]); |
| } |
| break; |
| |
| case SWS_OP_INVALID: |
| case SWS_OP_TYPE_NB: |
| av_unreachable("Invalid operation type!"); |
| } |
| |
| prev = op->comps; |
| } |
| |
| /* Backwards pass, solves for component dependencies */ |
| for (int n = ops->num_ops - 1; n >= 0; n--) { |
| SwsOp *op = &ops->ops[n]; |
| |
| switch (op->op) { |
| case SWS_OP_READ: |
| case SWS_OP_WRITE: |
| for (int i = 0; i < op->rw.elems; i++) |
| op->comps.unused[i] = op->op == SWS_OP_READ; |
| for (int i = op->rw.elems; i < 4; i++) |
| op->comps.unused[i] = next.unused[i]; |
| break; |
| case SWS_OP_SWAP_BYTES: |
| case SWS_OP_LSHIFT: |
| case SWS_OP_RSHIFT: |
| case SWS_OP_CONVERT: |
| case SWS_OP_DITHER: |
| case SWS_OP_MIN: |
| case SWS_OP_MAX: |
| case SWS_OP_SCALE: |
| for (int i = 0; i < 4; i++) |
| op->comps.unused[i] = next.unused[i]; |
| break; |
| case SWS_OP_UNPACK: { |
| bool unused = true; |
| for (int i = 0; i < 4; i++) { |
| if (op->pack.pattern[i]) |
| unused &= next.unused[i]; |
| op->comps.unused[i] = i > 0; |
| } |
| op->comps.unused[0] = unused; |
| break; |
| } |
| case SWS_OP_PACK: |
| for (int i = 0; i < 4; i++) { |
| if (op->pack.pattern[i]) |
| op->comps.unused[i] = next.unused[0]; |
| else |
| op->comps.unused[i] = true; |
| } |
| break; |
| case SWS_OP_CLEAR: |
| for (int i = 0; i < 4; i++) { |
| if (op->c.q4[i].den) |
| op->comps.unused[i] = true; |
| else |
| op->comps.unused[i] = next.unused[i]; |
| } |
| break; |
| case SWS_OP_SWIZZLE: { |
| bool unused[4] = { true, true, true, true }; |
| for (int i = 0; i < 4; i++) |
| unused[op->swizzle.in[i]] &= next.unused[i]; |
| for (int i = 0; i < 4; i++) |
| op->comps.unused[i] = unused[i]; |
| break; |
| } |
| case SWS_OP_LINEAR: |
| for (int j = 0; j < 4; j++) { |
| bool unused = true; |
| for (int i = 0; i < 4; i++) { |
| if (op->lin.m[i][j].num) |
| unused &= next.unused[i]; |
| } |
| op->comps.unused[j] = unused; |
| } |
| break; |
| } |
| |
| next = op->comps; |
| } |
| } |
| |
| /* returns log2(x) only if x is a power of two, or 0 otherwise */ |
| static int exact_log2(const int x) |
| { |
| int p; |
| if (x <= 0) |
| return 0; |
| p = av_log2(x); |
| return (1 << p) == x ? p : 0; |
| } |
| |
| static int exact_log2_q(const AVRational x) |
| { |
| if (x.den == 1) |
| return exact_log2(x.num); |
| else if (x.num == 1) |
| return -exact_log2(x.den); |
| else |
| return 0; |
| } |
| |
| /** |
| * If a linear operation can be reduced to a scalar multiplication, returns |
| * the corresponding scaling factor, or 0 otherwise. |
| */ |
| static bool extract_scalar(const SwsLinearOp *c, SwsComps prev, SwsComps next, |
| SwsConst *out_scale) |
| { |
| SwsConst scale = {0}; |
| |
| /* There are components not on the main diagonal */ |
| if (c->mask & ~SWS_MASK_DIAG4) |
| return false; |
| |
| for (int i = 0; i < 4; i++) { |
| const AVRational s = c->m[i][i]; |
| if ((prev.flags[i] & SWS_COMP_ZERO) || next.unused[i]) |
| continue; |
| if (scale.q.den && av_cmp_q(s, scale.q)) |
| return false; |
| scale.q = s; |
| } |
| |
| if (scale.q.den) |
| *out_scale = scale; |
| return scale.q.den; |
| } |
| |
| /* Extracts an integer clear operation (subset) from the given linear op. */ |
| static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev, |
| SwsConst *out_clear) |
| { |
| SwsConst clear = {0}; |
| bool ret = false; |
| |
| for (int i = 0; i < 4; i++) { |
| bool const_row = c->m[i][4].den == 1; /* offset is integer */ |
| for (int j = 0; j < 4; j++) { |
| const_row &= c->m[i][j].num == 0 || /* scalar is zero */ |
| (prev.flags[j] & SWS_COMP_ZERO); /* input is zero */ |
| } |
| if (const_row && (c->mask & SWS_MASK_ROW(i))) { |
| clear.q4[i] = c->m[i][4]; |
| for (int j = 0; j < 5; j++) |
| c->m[i][j] = Q(i == j); |
| c->mask &= ~SWS_MASK_ROW(i); |
| ret = true; |
| } |
| } |
| |
| if (ret) |
| *out_clear = clear; |
| return ret; |
| } |
| |
| /* Unswizzle a linear operation by aligning single-input rows with |
| * their corresponding diagonal */ |
| static bool extract_swizzle(SwsLinearOp *op, SwsComps prev, SwsSwizzleOp *out_swiz) |
| { |
| SwsSwizzleOp swiz = SWS_SWIZZLE(0, 1, 2, 3); |
| SwsLinearOp c = *op; |
| |
| for (int i = 0; i < 4; i++) { |
| int idx = -1; |
| for (int j = 0; j < 4; j++) { |
| if (!c.m[i][j].num || (prev.flags[j] & SWS_COMP_ZERO)) |
| continue; |
| if (idx >= 0) |
| return false; /* multiple inputs */ |
| idx = j; |
| } |
| |
| if (idx >= 0 && idx != i) { |
| /* Move coefficient to the diagonal */ |
| c.m[i][i] = c.m[i][idx]; |
| c.m[i][idx] = Q(0); |
| swiz.in[i] = idx; |
| } |
| } |
| |
| if (swiz.mask == SWS_SWIZZLE(0, 1, 2, 3).mask) |
| return false; /* no swizzle was identified */ |
| |
| c.mask = ff_sws_linear_mask(c); |
| *out_swiz = swiz; |
| *op = c; |
| return true; |
| } |
| |
| int ff_sws_op_list_optimize(SwsOpList *ops) |
| { |
| int ret; |
| |
| retry: |
| ff_sws_op_list_update_comps(ops); |
| |
| for (int n = 0; n < ops->num_ops;) { |
| SwsOp dummy = {0}; |
| SwsOp *op = &ops->ops[n]; |
| SwsOp *prev = n ? &ops->ops[n - 1] : &dummy; |
| SwsOp *next = n + 1 < ops->num_ops ? &ops->ops[n + 1] : &dummy; |
| |
| /* common helper variable */ |
| bool noop = true; |
| |
| switch (op->op) { |
| case SWS_OP_READ: |
| /* Optimized further into refcopy / memcpy */ |
| if (next->op == SWS_OP_WRITE && |
| next->rw.elems == op->rw.elems && |
| next->rw.packed == op->rw.packed && |
| next->rw.frac == op->rw.frac) |
| { |
| ff_sws_op_list_remove_at(ops, n, 2); |
| av_assert1(ops->num_ops == 0); |
| return 0; |
| } |
| |
| /* Skip reading extra unneeded components */ |
| if (!op->rw.packed) { |
| int needed = op->rw.elems; |
| while (needed > 0 && next->comps.unused[needed - 1]) |
| needed--; |
| if (op->rw.elems != needed) { |
| op->rw.elems = needed; |
| goto retry; |
| } |
| } |
| break; |
| |
| case SWS_OP_SWAP_BYTES: |
| /* Redundant (double) swap */ |
| if (next->op == SWS_OP_SWAP_BYTES) { |
| ff_sws_op_list_remove_at(ops, n, 2); |
| goto retry; |
| } |
| break; |
| |
| case SWS_OP_UNPACK: |
| /* Redundant unpack+pack */ |
| if (next->op == SWS_OP_PACK && next->type == op->type && |
| next->pack.pattern[0] == op->pack.pattern[0] && |
| next->pack.pattern[1] == op->pack.pattern[1] && |
| next->pack.pattern[2] == op->pack.pattern[2] && |
| next->pack.pattern[3] == op->pack.pattern[3]) |
| { |
| ff_sws_op_list_remove_at(ops, n, 2); |
| goto retry; |
| } |
| break; |
| |
| case SWS_OP_LSHIFT: |
| case SWS_OP_RSHIFT: |
| /* Two shifts in the same direction */ |
| if (next->op == op->op) { |
| op->c.u += next->c.u; |
| ff_sws_op_list_remove_at(ops, n + 1, 1); |
| goto retry; |
| } |
| |
| /* No-op shift */ |
| if (!op->c.u) { |
| ff_sws_op_list_remove_at(ops, n, 1); |
| goto retry; |
| } |
| break; |
| |
| case SWS_OP_CLEAR: |
| for (int i = 0; i < 4; i++) { |
| if (!op->c.q4[i].den) |
| continue; |
| |
| if ((prev->comps.flags[i] & SWS_COMP_ZERO) && |
| !(prev->comps.flags[i] & SWS_COMP_GARBAGE) && |
| op->c.q4[i].num == 0) |
| { |
| /* Redundant clear-to-zero of zero component */ |
| op->c.q4[i].den = 0; |
| } else if (next->comps.unused[i]) { |
| /* Unnecessary clear of unused component */ |
| op->c.q4[i] = (AVRational) {0, 0}; |
| } else if (op->c.q4[i].den) { |
| noop = false; |
| } |
| } |
| |
| if (noop) { |
| ff_sws_op_list_remove_at(ops, n, 1); |
| goto retry; |
| } |
| |
| /* Transitive clear */ |
| if (next->op == SWS_OP_CLEAR) { |
| for (int i = 0; i < 4; i++) { |
| if (next->c.q4[i].den) |
| op->c.q4[i] = next->c.q4[i]; |
| } |
| ff_sws_op_list_remove_at(ops, n + 1, 1); |
| goto retry; |
| } |
| |
| /* Prefer to clear as late as possible, to avoid doing |
| * redundant work */ |
| if ((op_type_is_independent(next->op) && next->op != SWS_OP_SWAP_BYTES) || |
| next->op == SWS_OP_SWIZZLE) |
| { |
| if (next->op == SWS_OP_CONVERT) |
| op->type = next->convert.to; |
| ff_sws_apply_op_q(next, op->c.q4); |
| FFSWAP(SwsOp, *op, *next); |
| goto retry; |
| } |
| break; |
| |
| case SWS_OP_SWIZZLE: { |
| bool seen[4] = {0}; |
| bool has_duplicates = false; |
| for (int i = 0; i < 4; i++) { |
| if (next->comps.unused[i]) |
| continue; |
| if (op->swizzle.in[i] != i) |
| noop = false; |
| has_duplicates |= seen[op->swizzle.in[i]]; |
| seen[op->swizzle.in[i]] = true; |
| } |
| |
| /* Identity swizzle */ |
| if (noop) { |
| ff_sws_op_list_remove_at(ops, n, 1); |
| goto retry; |
| } |
| |
| /* Transitive swizzle */ |
| if (next->op == SWS_OP_SWIZZLE) { |
| const SwsSwizzleOp orig = op->swizzle; |
| for (int i = 0; i < 4; i++) |
| op->swizzle.in[i] = orig.in[next->swizzle.in[i]]; |
| ff_sws_op_list_remove_at(ops, n + 1, 1); |
| goto retry; |
| } |
| |
| /* Try to push swizzles with duplicates towards the output */ |
| if (has_duplicates && op_type_is_independent(next->op)) { |
| if (next->op == SWS_OP_CONVERT) |
| op->type = next->convert.to; |
| if (next->op == SWS_OP_MIN || next->op == SWS_OP_MAX) { |
| /* Un-swizzle the next operation */ |
| const SwsConst c = next->c; |
| for (int i = 0; i < 4; i++) { |
| if (!next->comps.unused[i]) |
| next->c.q4[op->swizzle.in[i]] = c.q4[i]; |
| } |
| } |
| FFSWAP(SwsOp, *op, *next); |
| goto retry; |
| } |
| |
| /* Move swizzle out of the way between two converts so that |
| * they may be merged */ |
| if (prev->op == SWS_OP_CONVERT && next->op == SWS_OP_CONVERT) { |
| op->type = next->convert.to; |
| FFSWAP(SwsOp, *op, *next); |
| goto retry; |
| } |
| break; |
| } |
| |
| case SWS_OP_CONVERT: |
| /* No-op conversion */ |
| if (op->type == op->convert.to) { |
| ff_sws_op_list_remove_at(ops, n, 1); |
| goto retry; |
| } |
| |
| /* Transitive conversion */ |
| if (next->op == SWS_OP_CONVERT && |
| op->convert.expand == next->convert.expand) |
| { |
| av_assert1(op->convert.to == next->type); |
| op->convert.to = next->convert.to; |
| ff_sws_op_list_remove_at(ops, n + 1, 1); |
| goto retry; |
| } |
| |
| /* Conversion followed by integer expansion */ |
| if (next->op == SWS_OP_SCALE && !op->convert.expand && |
| !av_cmp_q(next->c.q, ff_sws_pixel_expand(op->type, op->convert.to))) |
| { |
| op->convert.expand = true; |
| ff_sws_op_list_remove_at(ops, n + 1, 1); |
| goto retry; |
| } |
| break; |
| |
| case SWS_OP_MIN: |
| for (int i = 0; i < 4; i++) { |
| if (next->comps.unused[i] || !op->c.q4[i].den) |
| continue; |
| if (av_cmp_q(op->c.q4[i], prev->comps.max[i]) < 0) |
| noop = false; |
| } |
| |
| if (noop) { |
| ff_sws_op_list_remove_at(ops, n, 1); |
| goto retry; |
| } |
| break; |
| |
| case SWS_OP_MAX: |
| for (int i = 0; i < 4; i++) { |
| if (next->comps.unused[i] || !op->c.q4[i].den) |
| continue; |
| if (av_cmp_q(prev->comps.min[i], op->c.q4[i]) < 0) |
| noop = false; |
| } |
| |
| if (noop) { |
| ff_sws_op_list_remove_at(ops, n, 1); |
| goto retry; |
| } |
| break; |
| |
| case SWS_OP_DITHER: |
| for (int i = 0; i < 4; i++) { |
| noop &= (prev->comps.flags[i] & SWS_COMP_EXACT) || |
| next->comps.unused[i]; |
| } |
| |
| if (noop) { |
| ff_sws_op_list_remove_at(ops, n, 1); |
| goto retry; |
| } |
| break; |
| |
| case SWS_OP_LINEAR: { |
| SwsSwizzleOp swizzle; |
| SwsConst c; |
| |
| /* No-op (identity) linear operation */ |
| if (!op->lin.mask) { |
| ff_sws_op_list_remove_at(ops, n, 1); |
| goto retry; |
| } |
| |
| if (next->op == SWS_OP_LINEAR) { |
| /* 5x5 matrix multiplication after appending [ 0 0 0 0 1 ] */ |
| const SwsLinearOp m1 = op->lin; |
| const SwsLinearOp m2 = next->lin; |
| for (int i = 0; i < 4; i++) { |
| for (int j = 0; j < 5; j++) { |
| AVRational sum = Q(0); |
| for (int k = 0; k < 4; k++) |
| sum = av_add_q(sum, av_mul_q(m2.m[i][k], m1.m[k][j])); |
| if (j == 4) /* m1.m[4][j] == 1 */ |
| sum = av_add_q(sum, m2.m[i][4]); |
| op->lin.m[i][j] = sum; |
| } |
| } |
| op->lin.mask = ff_sws_linear_mask(op->lin); |
| ff_sws_op_list_remove_at(ops, n + 1, 1); |
| goto retry; |
| } |
| |
| /* Optimize away zero columns */ |
| for (int j = 0; j < 4; j++) { |
| const uint32_t col = SWS_MASK_COL(j); |
| if (!(prev->comps.flags[j] & SWS_COMP_ZERO) || !(op->lin.mask & col)) |
| continue; |
| for (int i = 0; i < 4; i++) |
| op->lin.m[i][j] = Q(i == j); |
| op->lin.mask &= ~col; |
| goto retry; |
| } |
| |
| /* Optimize away unused rows */ |
| for (int i = 0; i < 4; i++) { |
| const uint32_t row = SWS_MASK_ROW(i); |
| if (!next->comps.unused[i] || !(op->lin.mask & row)) |
| continue; |
| for (int j = 0; j < 5; j++) |
| op->lin.m[i][j] = Q(i == j); |
| op->lin.mask &= ~row; |
| goto retry; |
| } |
| |
| /* Convert constant rows to explicit clear instruction */ |
| if (extract_constant_rows(&op->lin, prev->comps, &c)) { |
| RET(ff_sws_op_list_insert_at(ops, n + 1, &(SwsOp) { |
| .op = SWS_OP_CLEAR, |
| .type = op->type, |
| .comps = op->comps, |
| .c = c, |
| })); |
| goto retry; |
| } |
| |
| /* Multiplication by scalar constant */ |
| if (extract_scalar(&op->lin, prev->comps, next->comps, &c)) { |
| op->op = SWS_OP_SCALE; |
| op->c = c; |
| goto retry; |
| } |
| |
| /* Swizzle by fixed pattern */ |
| if (extract_swizzle(&op->lin, prev->comps, &swizzle)) { |
| RET(ff_sws_op_list_insert_at(ops, n, &(SwsOp) { |
| .op = SWS_OP_SWIZZLE, |
| .type = op->type, |
| .swizzle = swizzle, |
| })); |
| goto retry; |
| } |
| break; |
| } |
| |
| case SWS_OP_SCALE: { |
| const int factor2 = exact_log2_q(op->c.q); |
| |
| /* No-op scaling */ |
| if (op->c.q.num == 1 && op->c.q.den == 1) { |
| ff_sws_op_list_remove_at(ops, n, 1); |
| goto retry; |
| } |
| |
| /* Scaling by integer before conversion to int */ |
| if (op->c.q.den == 1 && |
| next->op == SWS_OP_CONVERT && |
| ff_sws_pixel_type_is_int(next->convert.to)) |
| { |
| op->type = next->convert.to; |
| FFSWAP(SwsOp, *op, *next); |
| goto retry; |
| } |
| |
| /* Scaling by exact power of two */ |
| if (factor2 && ff_sws_pixel_type_is_int(op->type)) { |
| op->op = factor2 > 0 ? SWS_OP_LSHIFT : SWS_OP_RSHIFT; |
| op->c.u = FFABS(factor2); |
| goto retry; |
| } |
| break; |
| } |
| } |
| |
| /* No optimization triggered, move on to next operation */ |
| n++; |
| } |
| |
| return 0; |
| } |
| |
| int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[], |
| int size, uint8_t clear_val, |
| int *read_bytes, int *write_bytes) |
| { |
| const SwsOp read = ops->ops[0]; |
| const int read_size = ff_sws_pixel_type_size(read.type); |
| uint32_t mask[4] = {0}; |
| |
| if (!ops->num_ops || read.op != SWS_OP_READ) |
| return AVERROR(EINVAL); |
| if (read.rw.frac || (!read.rw.packed && read.rw.elems > 1)) |
| return AVERROR(ENOTSUP); |
| |
| for (int i = 0; i < read.rw.elems; i++) |
| mask[i] = 0x01010101 * i * read_size + 0x03020100; |
| |
| for (int opidx = 1; opidx < ops->num_ops; opidx++) { |
| const SwsOp *op = &ops->ops[opidx]; |
| switch (op->op) { |
| case SWS_OP_SWIZZLE: { |
| uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] }; |
| for (int i = 0; i < 4; i++) |
| mask[i] = orig[op->swizzle.in[i]]; |
| break; |
| } |
| |
| case SWS_OP_SWAP_BYTES: |
| for (int i = 0; i < 4; i++) { |
| switch (ff_sws_pixel_type_size(op->type)) { |
| case 2: mask[i] = av_bswap16(mask[i]); break; |
| case 4: mask[i] = av_bswap32(mask[i]); break; |
| } |
| } |
| break; |
| |
| case SWS_OP_CLEAR: |
| for (int i = 0; i < 4; i++) { |
| if (!op->c.q4[i].den) |
| continue; |
| if (op->c.q4[i].num != 0 || !clear_val) |
| return AVERROR(ENOTSUP); |
| mask[i] = 0x1010101ul * clear_val; |
| } |
| break; |
| |
| case SWS_OP_CONVERT: { |
| if (!op->convert.expand) |
| return AVERROR(ENOTSUP); |
| for (int i = 0; i < 4; i++) { |
| switch (ff_sws_pixel_type_size(op->type)) { |
| case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break; |
| case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break; |
| } |
| } |
| break; |
| } |
| |
| case SWS_OP_WRITE: { |
| if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1)) |
| return AVERROR(ENOTSUP); |
| |
| /* Initialize to no-op */ |
| memset(shuffle, clear_val, size); |
| |
| const int write_size = ff_sws_pixel_type_size(op->type); |
| const int read_chunk = read.rw.elems * read_size; |
| const int write_chunk = op->rw.elems * write_size; |
| const int num_groups = size / FFMAX(read_chunk, write_chunk); |
| for (int n = 0; n < num_groups; n++) { |
| const int base_in = n * read_chunk; |
| const int base_out = n * write_chunk; |
| for (int i = 0; i < op->rw.elems; i++) { |
| const int offset = base_out + i * write_size; |
| for (int b = 0; b < write_size; b++) { |
| const uint8_t idx = mask[i] >> (b * 8); |
| if (idx != clear_val) |
| shuffle[offset + b] = base_in + idx; |
| } |
| } |
| } |
| |
| *read_bytes = num_groups * read_chunk; |
| *write_bytes = num_groups * write_chunk; |
| return num_groups; |
| } |
| |
| default: |
| return AVERROR(ENOTSUP); |
| } |
| } |
| |
| return AVERROR(EINVAL); |
| } |