| /* |
| * Copyright (c) Lynne |
| * |
| * This file is part of FFmpeg. |
| * |
| * FFmpeg is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * FFmpeg is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with FFmpeg; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| #include "vulkan_filter.h" |
| #include "libavutil/vulkan_loader.h" |
| |
| int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s, |
| AVBufferRef *frames_ref, |
| int width, int height, enum AVPixelFormat sw_format) |
| { |
| int err; |
| AVHWFramesContext *frames_ctx; |
| AVHWDeviceContext *device_ctx; |
| AVVulkanFramesContext *vk_frames; |
| AVVulkanDeviceContext *vk_dev; |
| AVBufferRef *device_ref = avctx->hw_device_ctx; |
| |
| /* Check if context is reusable as-is */ |
| if (frames_ref) { |
| int no_storage = 0; |
| FFVulkanFunctions *vk; |
| const VkFormat *sub = av_vkfmt_from_pixfmt(sw_format); |
| |
| frames_ctx = (AVHWFramesContext *)frames_ref->data; |
| device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data; |
| vk_frames = frames_ctx->hwctx; |
| vk_dev = device_ctx->hwctx; |
| |
| /* Width and height mismatch */ |
| if (width != frames_ctx->width || |
| height != frames_ctx->height) |
| goto skip; |
| |
| /* Format mismatch */ |
| if (sw_format != frames_ctx->sw_format) |
| goto skip; |
| |
| /* Unusual tiling mismatch. Don't let linear through either. */ |
| if (vk_frames->tiling != VK_IMAGE_TILING_OPTIMAL) |
| goto skip; |
| |
| /* Usage mismatch */ |
| if ((vk_frames->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT)) != |
| (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT)) |
| goto skip; |
| |
| s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, |
| vk_dev->nb_enabled_dev_extensions); |
| err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); |
| if (err < 0) |
| return err; |
| vk = &s->vkfn; |
| |
| /* Check if the subformats can do storage */ |
| for (int i = 0; sub[i] != VK_FORMAT_UNDEFINED; i++) { |
| VkFormatProperties2 prop = { |
| .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, |
| }; |
| vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, sub[i], |
| &prop); |
| |
| if (vk_frames->tiling == VK_IMAGE_TILING_LINEAR) { |
| no_storage |= !(prop.formatProperties.linearTilingFeatures & |
| VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); |
| } else { |
| no_storage |= !(prop.formatProperties.optimalTilingFeatures & |
| VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); |
| } |
| } |
| |
| /* Check if it's usable */ |
| if (no_storage) { |
| skip: |
| device_ref = frames_ctx->device_ref; |
| frames_ref = NULL; |
| } else { |
| frames_ref = av_buffer_ref(frames_ref); |
| if (!frames_ref) |
| return AVERROR(ENOMEM); |
| } |
| } |
| |
| if (!frames_ref) { |
| if (!device_ref) { |
| av_log(avctx, AV_LOG_ERROR, |
| "Vulkan filtering requires a device context!\n"); |
| return AVERROR(EINVAL); |
| } |
| |
| frames_ref = av_hwframe_ctx_alloc(device_ref); |
| |
| frames_ctx = (AVHWFramesContext *)frames_ref->data; |
| frames_ctx->format = AV_PIX_FMT_VULKAN; |
| frames_ctx->sw_format = sw_format; |
| frames_ctx->width = width; |
| frames_ctx->height = height; |
| |
| vk_frames = frames_ctx->hwctx; |
| vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; |
| vk_frames->usage = VK_IMAGE_USAGE_SAMPLED_BIT | |
| VK_IMAGE_USAGE_STORAGE_BIT | |
| VK_IMAGE_USAGE_TRANSFER_SRC_BIT | |
| VK_IMAGE_USAGE_TRANSFER_DST_BIT; |
| |
| err = av_hwframe_ctx_init(frames_ref); |
| if (err < 0) { |
| av_buffer_unref(&frames_ref); |
| return err; |
| } |
| |
| device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data; |
| vk_dev = device_ctx->hwctx; |
| } |
| |
| s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, |
| vk_dev->nb_enabled_dev_extensions); |
| |
| /** |
| * libplacebo does not use descriptor buffers. |
| */ |
| if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) && |
| strcmp(avctx->filter->name, "libplacebo")) { |
| av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires that " |
| "the %s extension is supported!\n", |
| VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME); |
| av_buffer_unref(&frames_ref); |
| return AVERROR(EINVAL); |
| } |
| |
| err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); |
| if (err < 0) { |
| av_buffer_unref(&frames_ref); |
| return err; |
| } |
| |
| s->frames_ref = frames_ref; |
| s->frames = frames_ctx; |
| s->hwfc = vk_frames; |
| s->device = device_ctx; |
| s->hwctx = device_ctx->hwctx; |
| |
| err = ff_vk_load_props(s); |
| if (err < 0) |
| av_buffer_unref(&s->frames_ref); |
| |
| return err; |
| } |
| |
| int ff_vk_filter_config_input(AVFilterLink *inlink) |
| { |
| AVHWFramesContext *input_frames; |
| AVFilterContext *avctx = inlink->dst; |
| FFVulkanContext *s = inlink->dst->priv; |
| |
| if (!inlink->hw_frames_ctx) { |
| av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a " |
| "hardware frames context on the input.\n"); |
| return AVERROR(EINVAL); |
| } |
| |
| input_frames = (AVHWFramesContext *)inlink->hw_frames_ctx->data; |
| if (input_frames->format != AV_PIX_FMT_VULKAN) |
| return AVERROR(EINVAL); |
| |
| /* Extract the device and default output format from the first input. */ |
| if (avctx->inputs[0] != inlink) |
| return 0; |
| |
| /* Save the ref, without reffing it */ |
| s->input_frames_ref = inlink->hw_frames_ctx; |
| |
| /* Defaults */ |
| s->input_format = input_frames->sw_format; |
| s->output_format = input_frames->sw_format; |
| s->output_width = inlink->w; |
| s->output_height = inlink->h; |
| |
| return 0; |
| } |
| |
| int ff_vk_filter_config_output(AVFilterLink *outlink) |
| { |
| int err; |
| FFVulkanContext *s = outlink->src->priv; |
| |
| av_buffer_unref(&outlink->hw_frames_ctx); |
| |
| err = ff_vk_filter_init_context(outlink->src, s, s->input_frames_ref, |
| s->output_width, s->output_height, |
| s->output_format); |
| if (err < 0) |
| return err; |
| |
| outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref); |
| if (!outlink->hw_frames_ctx) |
| return AVERROR(ENOMEM); |
| |
| outlink->w = s->output_width; |
| outlink->h = s->output_height; |
| |
| return err; |
| } |
| |
| int ff_vk_filter_init(AVFilterContext *avctx) |
| { |
| FFVulkanContext *s = avctx->priv; |
| |
| s->output_format = AV_PIX_FMT_NONE; |
| |
| return 0; |
| } |
| |
| int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, |
| FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f, |
| VkSampler sampler, void *push_src, size_t push_size) |
| { |
| int err = 0; |
| FFVulkanFunctions *vk = &vkctx->vkfn; |
| VkImageView in_views[AV_NUM_DATA_POINTERS]; |
| VkImageView out_views[AV_NUM_DATA_POINTERS]; |
| VkImageMemoryBarrier2 img_bar[37]; |
| int nb_img_bar = 0; |
| |
| /* Update descriptors and init the exec context */ |
| FFVkExecContext *exec = ff_vk_exec_get(e); |
| ff_vk_exec_start(vkctx, exec); |
| |
| ff_vk_exec_bind_pipeline(vkctx, exec, pl); |
| |
| if (push_src) |
| ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, |
| 0, push_size, push_src); |
| |
| if (in_f) { |
| RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f, |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, |
| VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); |
| RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f)); |
| ff_vk_update_descriptor_img_array(vkctx, pl, exec, in_f, in_views, 0, 0, |
| VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, |
| sampler); |
| ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar, |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, |
| VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, |
| VK_ACCESS_SHADER_READ_BIT, |
| VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, |
| VK_QUEUE_FAMILY_IGNORED); |
| } |
| |
| RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f, |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, |
| VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); |
| RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f)); |
| ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, !!in_f, |
| VK_IMAGE_LAYOUT_GENERAL, |
| VK_NULL_HANDLE); |
| ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar, |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, |
| VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, |
| VK_ACCESS_SHADER_WRITE_BIT, |
| VK_IMAGE_LAYOUT_GENERAL, |
| VK_QUEUE_FAMILY_IGNORED); |
| |
| vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { |
| .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, |
| .pImageMemoryBarriers = img_bar, |
| .imageMemoryBarrierCount = nb_img_bar, |
| }); |
| |
| vk->CmdDispatch(exec->buf, |
| FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], |
| FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], |
| pl->wg_size[2]); |
| |
| return ff_vk_exec_submit(vkctx, exec); |
| fail: |
| ff_vk_exec_discard_deps(vkctx, exec); |
| return err; |
| } |
| |
| int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, |
| FFVulkanPipeline *pls[2], |
| AVFrame *out, AVFrame *tmp, AVFrame *in, |
| VkSampler sampler, void *push_src, size_t push_size) |
| { |
| int err = 0; |
| FFVulkanFunctions *vk = &vkctx->vkfn; |
| VkImageView in_views[AV_NUM_DATA_POINTERS]; |
| VkImageView tmp_views[AV_NUM_DATA_POINTERS]; |
| VkImageView out_views[AV_NUM_DATA_POINTERS]; |
| VkImageMemoryBarrier2 img_bar[37]; |
| int nb_img_bar = 0; |
| |
| /* Update descriptors and init the exec context */ |
| FFVkExecContext *exec = ff_vk_exec_get(e); |
| ff_vk_exec_start(vkctx, exec); |
| |
| RET(ff_vk_exec_add_dep_frame(vkctx, exec, in, |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, |
| VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); |
| RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp, |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, |
| VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); |
| RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, |
| VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); |
| |
| RET(ff_vk_create_imageviews(vkctx, exec, in_views, in)); |
| RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp)); |
| RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); |
| |
| ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, |
| VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, |
| VK_ACCESS_SHADER_READ_BIT, |
| VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, |
| VK_QUEUE_FAMILY_IGNORED); |
| ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar, |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, |
| VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, |
| VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, |
| VK_IMAGE_LAYOUT_GENERAL, |
| VK_QUEUE_FAMILY_IGNORED); |
| ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, |
| VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, |
| VK_ACCESS_SHADER_WRITE_BIT, |
| VK_IMAGE_LAYOUT_GENERAL, |
| VK_QUEUE_FAMILY_IGNORED); |
| |
| vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { |
| .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, |
| .pImageMemoryBarriers = img_bar, |
| .imageMemoryBarrierCount = nb_img_bar, |
| }); |
| |
| for (int i = 0; i < 2; i++) { |
| FFVulkanPipeline *pl = pls[i]; |
| AVFrame *src_f = !i ? in : tmp; |
| AVFrame *dst_f = !i ? tmp : out; |
| VkImageView *src_views = !i ? in_views : tmp_views; |
| VkImageView *dst_views = !i ? tmp_views : out_views; |
| |
| ff_vk_exec_bind_pipeline(vkctx, exec, pl); |
| |
| if (push_src) |
| ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, |
| 0, push_size, push_src); |
| |
| ff_vk_update_descriptor_img_array(vkctx, pl, exec, src_f, src_views, 0, 0, |
| !i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : |
| VK_IMAGE_LAYOUT_GENERAL, |
| sampler); |
| ff_vk_update_descriptor_img_array(vkctx, pl, exec, dst_f, dst_views, 0, 1, |
| VK_IMAGE_LAYOUT_GENERAL, |
| VK_NULL_HANDLE); |
| |
| vk->CmdDispatch(exec->buf, |
| FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], |
| FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], |
| pl->wg_size[2]); |
| } |
| |
| return ff_vk_exec_submit(vkctx, exec); |
| fail: |
| ff_vk_exec_discard_deps(vkctx, exec); |
| return err; |
| } |
| |
| int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e, |
| FFVulkanPipeline *pl, |
| AVFrame *out, AVFrame *in[], int nb_in, |
| VkSampler sampler, void *push_src, size_t push_size) |
| { |
| int err = 0; |
| FFVulkanFunctions *vk = &vkctx->vkfn; |
| VkImageView in_views[16][AV_NUM_DATA_POINTERS]; |
| VkImageView out_views[AV_NUM_DATA_POINTERS]; |
| VkImageMemoryBarrier2 img_bar[128]; |
| int nb_img_bar = 0; |
| |
| /* Update descriptors and init the exec context */ |
| FFVkExecContext *exec = ff_vk_exec_get(e); |
| ff_vk_exec_start(vkctx, exec); |
| |
| /* Inputs */ |
| for (int i = 0; i < nb_in; i++) { |
| RET(ff_vk_exec_add_dep_frame(vkctx, exec, in[i], |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, |
| VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); |
| RET(ff_vk_create_imageviews(vkctx, exec, in_views[i], in[i])); |
| |
| ff_vk_frame_barrier(vkctx, exec, in[i], img_bar, &nb_img_bar, |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, |
| VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, |
| VK_ACCESS_SHADER_READ_BIT, |
| VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, |
| VK_QUEUE_FAMILY_IGNORED); |
| } |
| |
| /* Output */ |
| RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, |
| VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); |
| RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); |
| ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, |
| VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, |
| VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, |
| VK_ACCESS_SHADER_WRITE_BIT, |
| VK_IMAGE_LAYOUT_GENERAL, |
| VK_QUEUE_FAMILY_IGNORED); |
| |
| vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { |
| .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, |
| .pImageMemoryBarriers = img_bar, |
| .imageMemoryBarrierCount = nb_img_bar, |
| }); |
| |
| ff_vk_exec_bind_pipeline(vkctx, exec, pl); |
| |
| if (push_src) |
| ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, |
| 0, push_size, push_src); |
| |
| for (int i = 0; i < nb_in; i++) |
| ff_vk_update_descriptor_img_array(vkctx, pl, exec, in[i], in_views[i], 0, i, |
| VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, |
| sampler); |
| |
| ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, nb_in, |
| VK_IMAGE_LAYOUT_GENERAL, |
| VK_NULL_HANDLE); |
| |
| vk->CmdDispatch(exec->buf, |
| FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], |
| FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], |
| pl->wg_size[2]); |
| |
| return ff_vk_exec_submit(vkctx, exec); |
| fail: |
| ff_vk_exec_discard_deps(vkctx, exec); |
| return err; |
| } |