| /* |
| * Copyright (c) 2020 |
| * |
| * This file is part of FFmpeg. |
| * |
| * FFmpeg is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * FFmpeg is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with FFmpeg; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| #include "dnn_io_proc.h" |
| #include "libavutil/imgutils.h" |
| #include "libavutil/mem.h" |
| #include "libswscale/swscale.h" |
| #include "libavutil/avassert.h" |
| #include "libavutil/detection_bbox.h" |
| |
| static int get_datatype_size(DNNDataType dt) |
| { |
| switch (dt) |
| { |
| case DNN_FLOAT: |
| return sizeof(float); |
| case DNN_UINT8: |
| return sizeof(uint8_t); |
| default: |
| av_assert0(!"not supported yet."); |
| return 1; |
| } |
| } |
| |
| int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) |
| { |
| struct SwsContext *sws_ctx; |
| int ret = 0; |
| int linesize[4] = { 0 }; |
| void **dst_data = NULL; |
| void *middle_data = NULL; |
| uint8_t *planar_data[4] = { 0 }; |
| int plane_size = frame->width * frame->height * sizeof(uint8_t); |
| enum AVPixelFormat src_fmt = AV_PIX_FMT_NONE; |
| int src_datatype_size = get_datatype_size(output->dt); |
| |
| int bytewidth = av_image_get_linesize(frame->format, frame->width, 0); |
| if (bytewidth < 0) { |
| return AVERROR(EINVAL); |
| } |
| /* scale == 1 and mean == 0 and dt == UINT8: passthrough */ |
| if (fabsf(output->scale - 1) < 1e-6f && fabsf(output->mean) < 1e-6 && output->dt == DNN_UINT8) |
| src_fmt = AV_PIX_FMT_GRAY8; |
| /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */ |
| else if ((fabsf(output->scale - 255) < 1e-6f || fabsf(output->scale) < 1e-6f) && |
| fabsf(output->mean) < 1e-6 && output->dt == DNN_FLOAT) |
| src_fmt = AV_PIX_FMT_GRAYF32; |
| else { |
| av_log(log_ctx, AV_LOG_ERROR, "dnn_process output data doesn't type: UINT8 " |
| "scale: %f, mean: %f\n", output->scale, output->mean); |
| return AVERROR(ENOSYS); |
| } |
| |
| dst_data = (void **)frame->data; |
| linesize[0] = frame->linesize[0]; |
| if (output->layout == DL_NCHW) { |
| middle_data = av_malloc(plane_size * output->dims[1]); |
| if (!middle_data) { |
| ret = AVERROR(ENOMEM); |
| goto err; |
| } |
| dst_data = &middle_data; |
| linesize[0] = frame->width * 3; |
| } |
| |
| switch (frame->format) { |
| case AV_PIX_FMT_RGB24: |
| case AV_PIX_FMT_BGR24: |
| sws_ctx = sws_getContext(frame->width * 3, |
| frame->height, |
| src_fmt, |
| frame->width * 3, |
| frame->height, |
| AV_PIX_FMT_GRAY8, |
| 0, NULL, NULL, NULL); |
| if (!sws_ctx) { |
| av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " |
| "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", |
| av_get_pix_fmt_name(src_fmt), frame->width * 3, frame->height, |
| av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height); |
| ret = AVERROR(EINVAL); |
| goto err; |
| } |
| sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0}, |
| (const int[4]){frame->width * 3 * src_datatype_size, 0, 0, 0}, 0, frame->height, |
| (uint8_t * const*)dst_data, linesize); |
| sws_freeContext(sws_ctx); |
| // convert data from planar to packed |
| if (output->layout == DL_NCHW) { |
| sws_ctx = sws_getContext(frame->width, |
| frame->height, |
| AV_PIX_FMT_GBRP, |
| frame->width, |
| frame->height, |
| frame->format, |
| 0, NULL, NULL, NULL); |
| if (!sws_ctx) { |
| av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " |
| "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", |
| av_get_pix_fmt_name(AV_PIX_FMT_GBRP), frame->width, frame->height, |
| av_get_pix_fmt_name(frame->format),frame->width, frame->height); |
| ret = AVERROR(EINVAL); |
| goto err; |
| } |
| if (frame->format == AV_PIX_FMT_RGB24) { |
| planar_data[0] = (uint8_t *)middle_data + plane_size; |
| planar_data[1] = (uint8_t *)middle_data + plane_size * 2; |
| planar_data[2] = (uint8_t *)middle_data; |
| } else if (frame->format == AV_PIX_FMT_BGR24) { |
| planar_data[0] = (uint8_t *)middle_data + plane_size; |
| planar_data[1] = (uint8_t *)middle_data; |
| planar_data[2] = (uint8_t *)middle_data + plane_size * 2; |
| } |
| sws_scale(sws_ctx, (const uint8_t * const *)planar_data, |
| (const int [4]){frame->width * sizeof(uint8_t), |
| frame->width * sizeof(uint8_t), |
| frame->width * sizeof(uint8_t), 0}, |
| 0, frame->height, frame->data, frame->linesize); |
| sws_freeContext(sws_ctx); |
| } |
| break; |
| case AV_PIX_FMT_GRAYF32: |
| av_image_copy_plane(frame->data[0], frame->linesize[0], |
| output->data, bytewidth, |
| bytewidth, frame->height); |
| break; |
| case AV_PIX_FMT_YUV420P: |
| case AV_PIX_FMT_YUV422P: |
| case AV_PIX_FMT_YUV444P: |
| case AV_PIX_FMT_YUV410P: |
| case AV_PIX_FMT_YUV411P: |
| case AV_PIX_FMT_GRAY8: |
| case AV_PIX_FMT_NV12: |
| sws_ctx = sws_getContext(frame->width, |
| frame->height, |
| AV_PIX_FMT_GRAYF32, |
| frame->width, |
| frame->height, |
| AV_PIX_FMT_GRAY8, |
| 0, NULL, NULL, NULL); |
| if (!sws_ctx) { |
| av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " |
| "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", |
| av_get_pix_fmt_name(src_fmt), frame->width, frame->height, |
| av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height); |
| ret = AVERROR(EINVAL); |
| goto err; |
| } |
| sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0}, |
| (const int[4]){frame->width * src_datatype_size, 0, 0, 0}, 0, frame->height, |
| (uint8_t * const*)frame->data, frame->linesize); |
| sws_freeContext(sws_ctx); |
| break; |
| default: |
| avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format)); |
| ret = AVERROR(ENOSYS); |
| goto err; |
| } |
| |
| err: |
| av_free(middle_data); |
| return ret; |
| } |
| |
| int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) |
| { |
| struct SwsContext *sws_ctx; |
| int ret = 0; |
| int linesize[4] = { 0 }; |
| void **src_data = NULL; |
| void *middle_data = NULL; |
| uint8_t *planar_data[4] = { 0 }; |
| int plane_size = frame->width * frame->height * sizeof(uint8_t); |
| enum AVPixelFormat dst_fmt = AV_PIX_FMT_NONE; |
| int dst_datatype_size = get_datatype_size(input->dt); |
| int bytewidth = av_image_get_linesize(frame->format, frame->width, 0); |
| if (bytewidth < 0) { |
| return AVERROR(EINVAL); |
| } |
| /* scale == 1 and mean == 0 and dt == UINT8: passthrough */ |
| if (fabsf(input->scale - 1) < 1e-6f && fabsf(input->mean) < 1e-6 && input->dt == DNN_UINT8) |
| dst_fmt = AV_PIX_FMT_GRAY8; |
| /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */ |
| else if ((fabsf(input->scale - 255) < 1e-6f || fabsf(input->scale) < 1e-6f) && |
| fabsf(input->mean) < 1e-6 && input->dt == DNN_FLOAT) |
| dst_fmt = AV_PIX_FMT_GRAYF32; |
| else { |
| av_log(log_ctx, AV_LOG_ERROR, "dnn_process input data doesn't support type: UINT8 " |
| "scale: %f, mean: %f\n", input->scale, input->mean); |
| return AVERROR(ENOSYS); |
| } |
| |
| src_data = (void **)frame->data; |
| linesize[0] = frame->linesize[0]; |
| if (input->layout == DL_NCHW) { |
| middle_data = av_malloc(plane_size * input->dims[1]); |
| if (!middle_data) { |
| ret = AVERROR(ENOMEM); |
| goto err; |
| } |
| src_data = &middle_data; |
| linesize[0] = frame->width * 3; |
| } |
| |
| switch (frame->format) { |
| case AV_PIX_FMT_RGB24: |
| case AV_PIX_FMT_BGR24: |
| // convert data from planar to packed |
| if (input->layout == DL_NCHW) { |
| sws_ctx = sws_getContext(frame->width, |
| frame->height, |
| frame->format, |
| frame->width, |
| frame->height, |
| AV_PIX_FMT_GBRP, |
| 0, NULL, NULL, NULL); |
| if (!sws_ctx) { |
| av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " |
| "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", |
| av_get_pix_fmt_name(frame->format), frame->width, frame->height, |
| av_get_pix_fmt_name(AV_PIX_FMT_GBRP),frame->width, frame->height); |
| ret = AVERROR(EINVAL); |
| goto err; |
| } |
| if (frame->format == AV_PIX_FMT_RGB24) { |
| planar_data[0] = (uint8_t *)middle_data + plane_size; |
| planar_data[1] = (uint8_t *)middle_data + plane_size * 2; |
| planar_data[2] = (uint8_t *)middle_data; |
| } else if (frame->format == AV_PIX_FMT_BGR24) { |
| planar_data[0] = (uint8_t *)middle_data + plane_size; |
| planar_data[1] = (uint8_t *)middle_data; |
| planar_data[2] = (uint8_t *)middle_data + plane_size * 2; |
| } |
| sws_scale(sws_ctx, (const uint8_t * const *)frame->data, |
| frame->linesize, 0, frame->height, planar_data, |
| (const int [4]){frame->width * sizeof(uint8_t), |
| frame->width * sizeof(uint8_t), |
| frame->width * sizeof(uint8_t), 0}); |
| sws_freeContext(sws_ctx); |
| } |
| sws_ctx = sws_getContext(frame->width * 3, |
| frame->height, |
| AV_PIX_FMT_GRAY8, |
| frame->width * 3, |
| frame->height, |
| dst_fmt, |
| 0, NULL, NULL, NULL); |
| if (!sws_ctx) { |
| av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " |
| "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", |
| av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height, |
| av_get_pix_fmt_name(dst_fmt),frame->width * 3, frame->height); |
| ret = AVERROR(EINVAL); |
| goto err; |
| } |
| sws_scale(sws_ctx, (const uint8_t **)src_data, |
| linesize, 0, frame->height, |
| (uint8_t * const [4]){input->data, 0, 0, 0}, |
| (const int [4]){frame->width * 3 * dst_datatype_size, 0, 0, 0}); |
| sws_freeContext(sws_ctx); |
| break; |
| case AV_PIX_FMT_GRAYF32: |
| av_image_copy_plane(input->data, bytewidth, |
| frame->data[0], frame->linesize[0], |
| bytewidth, frame->height); |
| break; |
| case AV_PIX_FMT_YUV420P: |
| case AV_PIX_FMT_YUV422P: |
| case AV_PIX_FMT_YUV444P: |
| case AV_PIX_FMT_YUV410P: |
| case AV_PIX_FMT_YUV411P: |
| case AV_PIX_FMT_GRAY8: |
| case AV_PIX_FMT_NV12: |
| sws_ctx = sws_getContext(frame->width, |
| frame->height, |
| AV_PIX_FMT_GRAY8, |
| frame->width, |
| frame->height, |
| dst_fmt, |
| 0, NULL, NULL, NULL); |
| if (!sws_ctx) { |
| av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " |
| "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", |
| av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height, |
| av_get_pix_fmt_name(dst_fmt),frame->width, frame->height); |
| ret = AVERROR(EINVAL); |
| goto err; |
| } |
| sws_scale(sws_ctx, (const uint8_t **)frame->data, |
| frame->linesize, 0, frame->height, |
| (uint8_t * const [4]){input->data, 0, 0, 0}, |
| (const int [4]){frame->width * dst_datatype_size, 0, 0, 0}); |
| sws_freeContext(sws_ctx); |
| break; |
| default: |
| avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format)); |
| ret = AVERROR(ENOSYS); |
| goto err; |
| } |
| err: |
| av_free(middle_data); |
| return ret; |
| } |
| |
| static enum AVPixelFormat get_pixel_format(DNNData *data) |
| { |
| if (data->dt == DNN_UINT8) { |
| switch (data->order) { |
| case DCO_BGR: |
| return AV_PIX_FMT_BGR24; |
| case DCO_RGB: |
| return AV_PIX_FMT_RGB24; |
| default: |
| av_assert0(!"unsupported data pixel format.\n"); |
| return AV_PIX_FMT_BGR24; |
| } |
| } |
| |
| av_assert0(!"unsupported data type.\n"); |
| return AV_PIX_FMT_BGR24; |
| } |
| |
| int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index, void *log_ctx) |
| { |
| const AVPixFmtDescriptor *desc; |
| int offsetx[4], offsety[4]; |
| uint8_t *bbox_data[4]; |
| struct SwsContext *sws_ctx; |
| int linesizes[4]; |
| int ret = 0; |
| enum AVPixelFormat fmt; |
| int left, top, width, height; |
| int width_idx, height_idx; |
| const AVDetectionBBoxHeader *header; |
| const AVDetectionBBox *bbox; |
| AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES); |
| int max_step[4] = { 0 }; |
| av_assert0(sd); |
| |
| /* (scale != 1 and scale != 0) or mean != 0 */ |
| if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) || |
| fabsf(input->mean) > 1e-6f) { |
| av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support " |
| "scale: %f, mean: %f\n", input->scale, input->mean); |
| return AVERROR(ENOSYS); |
| } |
| |
| if (input->layout == DL_NCHW) { |
| av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support layout: NCHW\n"); |
| return AVERROR(ENOSYS); |
| } |
| |
| width_idx = dnn_get_width_idx_by_layout(input->layout); |
| height_idx = dnn_get_height_idx_by_layout(input->layout); |
| |
| header = (const AVDetectionBBoxHeader *)sd->data; |
| bbox = av_get_detection_bbox(header, bbox_index); |
| |
| left = bbox->x; |
| width = bbox->w; |
| top = bbox->y; |
| height = bbox->h; |
| |
| fmt = get_pixel_format(input); |
| sws_ctx = sws_getContext(width, height, frame->format, |
| input->dims[width_idx], |
| input->dims[height_idx], fmt, |
| SWS_FAST_BILINEAR, NULL, NULL, NULL); |
| if (!sws_ctx) { |
| av_log(log_ctx, AV_LOG_ERROR, "Failed to create scale context for the conversion " |
| "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", |
| av_get_pix_fmt_name(frame->format), width, height, |
| av_get_pix_fmt_name(fmt), |
| input->dims[width_idx], |
| input->dims[height_idx]); |
| return AVERROR(EINVAL); |
| } |
| |
| ret = av_image_fill_linesizes(linesizes, fmt, input->dims[width_idx]); |
| if (ret < 0) { |
| av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes"); |
| sws_freeContext(sws_ctx); |
| return ret; |
| } |
| |
| desc = av_pix_fmt_desc_get(frame->format); |
| offsetx[1] = offsetx[2] = AV_CEIL_RSHIFT(left, desc->log2_chroma_w); |
| offsetx[0] = offsetx[3] = left; |
| |
| offsety[1] = offsety[2] = AV_CEIL_RSHIFT(top, desc->log2_chroma_h); |
| offsety[0] = offsety[3] = top; |
| |
| av_image_fill_max_pixsteps(max_step, NULL, desc); |
| for (int k = 0; frame->data[k]; k++) |
| bbox_data[k] = frame->data[k] + offsety[k] * frame->linesize[k] + offsetx[k] * max_step[k]; |
| |
| sws_scale(sws_ctx, (const uint8_t *const *)&bbox_data, frame->linesize, |
| 0, height, |
| (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes); |
| |
| sws_freeContext(sws_ctx); |
| |
| return ret; |
| } |
| |
| int ff_frame_to_dnn_detect(AVFrame *frame, DNNData *input, void *log_ctx) |
| { |
| struct SwsContext *sws_ctx; |
| int linesizes[4]; |
| int ret = 0, width_idx, height_idx; |
| enum AVPixelFormat fmt = get_pixel_format(input); |
| |
| /* (scale != 1 and scale != 0) or mean != 0 */ |
| if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) || |
| fabsf(input->mean) > 1e-6f) { |
| av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support " |
| "scale: %f, mean: %f\n", input->scale, input->mean); |
| return AVERROR(ENOSYS); |
| } |
| |
| if (input->layout == DL_NCHW) { |
| av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support layout: NCHW\n"); |
| return AVERROR(ENOSYS); |
| } |
| |
| width_idx = dnn_get_width_idx_by_layout(input->layout); |
| height_idx = dnn_get_height_idx_by_layout(input->layout); |
| |
| sws_ctx = sws_getContext(frame->width, frame->height, frame->format, |
| input->dims[width_idx], |
| input->dims[height_idx], fmt, |
| SWS_FAST_BILINEAR, NULL, NULL, NULL); |
| if (!sws_ctx) { |
| av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " |
| "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", |
| av_get_pix_fmt_name(frame->format), frame->width, frame->height, |
| av_get_pix_fmt_name(fmt), input->dims[width_idx], |
| input->dims[height_idx]); |
| return AVERROR(EINVAL); |
| } |
| |
| ret = av_image_fill_linesizes(linesizes, fmt, input->dims[width_idx]); |
| if (ret < 0) { |
| av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes"); |
| sws_freeContext(sws_ctx); |
| return ret; |
| } |
| |
| sws_scale(sws_ctx, (const uint8_t *const *)frame->data, frame->linesize, 0, frame->height, |
| (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes); |
| |
| sws_freeContext(sws_ctx); |
| return ret; |
| } |