| /* |
| * Copyright (c) 2018 Sergey Lavrushkin |
| * |
| * This file is part of FFmpeg. |
| * |
| * FFmpeg is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * FFmpeg is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with FFmpeg; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| /** |
| * @file |
| * DNN inference engine interface. |
| */ |
| |
| #ifndef AVFILTER_DNN_INTERFACE_H |
| #define AVFILTER_DNN_INTERFACE_H |
| |
| #include <stdint.h> |
| #include "libavutil/frame.h" |
| #include "avfilter.h" |
| |
| #define DNN_GENERIC_ERROR FFERRTAG('D','N','N','!') |
| |
| typedef enum {DNN_TF = 1, DNN_OV, DNN_TH} DNNBackendType; |
| |
| typedef enum {DNN_FLOAT = 1, DNN_UINT8 = 4} DNNDataType; |
| |
| typedef enum { |
| DCO_NONE, |
| DCO_BGR, |
| DCO_RGB, |
| } DNNColorOrder; |
| |
| typedef enum { |
| DAST_FAIL, // something wrong |
| DAST_EMPTY_QUEUE, // no more inference result to get |
| DAST_NOT_READY, // all queued inferences are not finished |
| DAST_SUCCESS // got a result frame successfully |
| } DNNAsyncStatusType; |
| |
| typedef enum { |
| DFT_NONE, |
| DFT_PROCESS_FRAME, // process the whole frame |
| DFT_ANALYTICS_DETECT, // detect from the whole frame |
| DFT_ANALYTICS_CLASSIFY, // classify for each bounding box |
| }DNNFunctionType; |
| |
| typedef enum { |
| DL_NONE, |
| DL_NCHW, |
| DL_NHWC, |
| } DNNLayout; |
| |
| typedef struct DNNData{ |
| void *data; |
| int dims[4]; |
| // dt and order together decide the color format |
| DNNDataType dt; |
| DNNColorOrder order; |
| DNNLayout layout; |
| float scale; |
| float mean; |
| } DNNData; |
| |
| typedef struct DNNExecBaseParams { |
| const char *input_name; |
| const char **output_names; |
| uint32_t nb_output; |
| AVFrame *in_frame; |
| AVFrame *out_frame; |
| } DNNExecBaseParams; |
| |
| typedef struct DNNExecClassificationParams { |
| DNNExecBaseParams base; |
| const char *target; |
| } DNNExecClassificationParams; |
| |
| typedef int (*FramePrePostProc)(AVFrame *frame, DNNData *model, AVFilterContext *filter_ctx); |
| typedef int (*DetectPostProc)(AVFrame *frame, DNNData *output, uint32_t nb, AVFilterContext *filter_ctx); |
| typedef int (*ClassifyPostProc)(AVFrame *frame, DNNData *output, uint32_t bbox_index, AVFilterContext *filter_ctx); |
| |
| typedef struct DNNModel{ |
| // Stores model that can be different for different backends. |
| void *model; |
| // Stores FilterContext used for the interaction between AVFrame and DNNData |
| AVFilterContext *filter_ctx; |
| // Stores function type of the model |
| DNNFunctionType func_type; |
| // Gets model input information |
| // Just reuse struct DNNData here, actually the DNNData.data field is not needed. |
| int (*get_input)(void *model, DNNData *input, const char *input_name); |
| // Gets model output width/height with given input w/h |
| int (*get_output)(void *model, const char *input_name, int input_width, int input_height, |
| const char *output_name, int *output_width, int *output_height); |
| // set the pre process to transfer data from AVFrame to DNNData |
| // the default implementation within DNN is used if it is not provided by the filter |
| FramePrePostProc frame_pre_proc; |
| // set the post process to transfer data from DNNData to AVFrame |
| // the default implementation within DNN is used if it is not provided by the filter |
| FramePrePostProc frame_post_proc; |
| // set the post process to interpret detect result from DNNData |
| DetectPostProc detect_post_proc; |
| // set the post process to interpret classify result from DNNData |
| ClassifyPostProc classify_post_proc; |
| } DNNModel; |
| |
| typedef struct TFOptions{ |
| const AVClass *clazz; |
| |
| char *sess_config; |
| } TFOptions; |
| |
| typedef struct OVOptions { |
| const AVClass *clazz; |
| |
| int batch_size; |
| int input_resizable; |
| DNNLayout layout; |
| float scale; |
| float mean; |
| } OVOptions; |
| |
| typedef struct THOptions { |
| const AVClass *clazz; |
| int optimize; |
| } THOptions; |
| |
| typedef struct DNNModule DNNModule; |
| |
| typedef struct DnnContext { |
| const AVClass *clazz; |
| |
| DNNModel *model; |
| |
| char *model_filename; |
| DNNBackendType backend_type; |
| char *model_inputname; |
| char *model_outputnames_string; |
| char *backend_options; |
| int async; |
| |
| char **model_outputnames; |
| uint32_t nb_outputs; |
| const DNNModule *dnn_module; |
| |
| int nireq; |
| char *device; |
| |
| #if CONFIG_LIBTENSORFLOW |
| TFOptions tf_option; |
| #endif |
| |
| #if CONFIG_LIBOPENVINO |
| OVOptions ov_option; |
| #endif |
| #if CONFIG_LIBTORCH |
| THOptions torch_option; |
| #endif |
| } DnnContext; |
| |
| // Stores pointers to functions for loading, executing, freeing DNN models for one of the backends. |
| struct DNNModule { |
| const AVClass clazz; |
| // Loads model and parameters from given file. Returns NULL if it is not possible. |
| DNNModel *(*load_model)(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx); |
| // Executes model with specified input and output. Returns the error code otherwise. |
| int (*execute_model)(const DNNModel *model, DNNExecBaseParams *exec_params); |
| // Retrieve inference result. |
| DNNAsyncStatusType (*get_result)(const DNNModel *model, AVFrame **in, AVFrame **out); |
| // Flush all the pending tasks. |
| int (*flush)(const DNNModel *model); |
| // Frees memory allocated for model. |
| void (*free_model)(DNNModel **model); |
| }; |
| |
| // Initializes DNNModule depending on chosen backend. |
| const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx); |
| |
| void ff_dnn_init_child_class(DnnContext *ctx); |
| void *ff_dnn_child_next(DnnContext *obj, void *prev); |
| const AVClass *ff_dnn_child_class_iterate(void **iter); |
| |
| static inline int dnn_get_width_idx_by_layout(DNNLayout layout) |
| { |
| return layout == DL_NHWC ? 2 : 3; |
| } |
| |
| static inline int dnn_get_height_idx_by_layout(DNNLayout layout) |
| { |
| return layout == DL_NHWC ? 1 : 2; |
| } |
| |
| static inline int dnn_get_channel_idx_by_layout(DNNLayout layout) |
| { |
| return layout == DL_NHWC ? 3 : 1; |
| } |
| |
| #endif |