| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "codec_adapter_ffmpeg_decoder.h" |
| |
| #include <limits> |
| |
| extern "C" { |
| #include "libavutil/imgutils.h" |
| } |
| |
| #include <lib/async/cpp/task.h> |
| #include <lib/fit/defer.h> |
| #include <lib/media/codec_impl/codec_buffer.h> |
| #include <lib/media/codec_impl/fourcc.h> |
| |
| namespace { |
| |
| AVPixelFormat FourccToPixelFormat(uint32_t fourcc) { |
| switch (fourcc) { |
| case make_fourcc('Y', 'V', '1', '2'): |
| return AV_PIX_FMT_YUV420P; |
| default: |
| return AV_PIX_FMT_NONE; |
| } |
| } |
| |
| } // namespace |
| |
| namespace { |
| |
| // A client using the min shouldn't necessarily expect performance to be |
| // acceptable when running higher bit-rates. |
| constexpr uint32_t kInputPerPacketBufferBytesMin = 8 * 1024; |
| // This is an arbitrary cap for now. |
| constexpr uint32_t kInputPerPacketBufferBytesMax = 4 * 1024 * 1024; |
| |
| // Arbitrary limit; specific value is historical. |
| static constexpr uint32_t kMaxOutputBufferCount = 34; |
| // Arbitrary limit. |
| static constexpr uint32_t kMaxInputBufferCount = 256; |
| |
| // FFMPEG requires at least this many padding bytes after each output frame, to allow for optimized |
| // code to do less bounds checking (an understandable tradeoff). Without this, FFMPEG can sometimes |
| // read (and maybe sometimes write though I haven't personally observed that) beyond the end of the |
| // output buffer which can crash the isolate process. |
| static constexpr uint32_t kFfmpegOutputFramePaddingBytes = 16; |
| |
| } // namespace |
| |
| CodecAdapterFfmpegDecoder::CodecAdapterFfmpegDecoder(std::mutex& lock, |
| CodecAdapterEvents* codec_adapter_events) |
| : CodecAdapterSW(lock, codec_adapter_events) {} |
| |
| CodecAdapterFfmpegDecoder::~CodecAdapterFfmpegDecoder() = default; |
| |
| void CodecAdapterFfmpegDecoder::ProcessInputLoop() { |
| std::optional<CodecInputItem> maybe_input_item; |
| while ((maybe_input_item = input_queue_.WaitForElement())) { |
| CodecInputItem input_item = std::move(maybe_input_item.value()); |
| if (input_item.is_format_details()) { |
| if (avcodec_context_) { |
| events_->onCoreCodecFailCodec("Midstream input format change is not supported."); |
| return; |
| } |
| auto maybe_avcodec_context = AvCodecContext::CreateDecoder( |
| input_item.format_details(), |
| [this](const AvCodecContext::FrameBufferRequest& frame_buffer_request, |
| AVCodecContext* avcodec_context, AVFrame* frame, int flags) { |
| return GetBuffer(frame_buffer_request, avcodec_context, frame, flags); |
| }); |
| if (!maybe_avcodec_context) { |
| events_->onCoreCodecFailCodec("Failed to create ffmpeg decoder."); |
| return; |
| } |
| avcodec_context_ = std::move(maybe_avcodec_context.value()); |
| } else if (input_item.is_end_of_stream()) { |
| ZX_ASSERT(avcodec_context_); |
| avcodec_context_->EndStream(); |
| DecodeFrames(); |
| } else if (input_item.is_packet()) { |
| ZX_DEBUG_ASSERT(avcodec_context_); |
| int result = avcodec_context_->SendPacket(input_item.packet()); |
| if (result < 0) { |
| events_->onCoreCodecFailCodec("Failed to decode input packet with ffmpeg error: %s", |
| av_err2str(result)); |
| return; |
| } |
| |
| events_->onCoreCodecInputPacketDone(input_item.packet()); |
| |
| DecodeFrames(); |
| } |
| } |
| } |
| |
| void CodecAdapterFfmpegDecoder::CleanUpAfterStream() { avcodec_context_ = nullptr; } |
| |
| std::pair<fuchsia::media::FormatDetails, size_t> CodecAdapterFfmpegDecoder::OutputFormatDetails() { |
| std::lock_guard<std::mutex> lock(lock_); |
| ZX_ASSERT(decoded_output_info_.has_value()); |
| |
| auto& [uncompressed_format, per_packet_buffer_bytes] = decoded_output_info_.value(); |
| |
| fuchsia::media::FormatDetails format_details; |
| |
| format_details.set_mime_type("video/raw"); |
| |
| fuchsia::media::VideoFormat video_format; |
| video_format.set_uncompressed(fidl::Clone(uncompressed_format)); |
| |
| format_details.mutable_domain()->set_video(std::move(video_format)); |
| |
| return {std::move(format_details), per_packet_buffer_bytes}; |
| } |
| |
| void CodecAdapterFfmpegDecoder::FfmpegFreeBufferCallback(void* ctx, uint8_t* base) { |
| auto* self = reinterpret_cast<CodecAdapterFfmpegDecoder*>(ctx); |
| self->output_buffer_pool_.FreeBuffer(base); |
| } |
| |
| int CodecAdapterFfmpegDecoder::GetBuffer( |
| const AvCodecContext::FrameBufferRequest& decoded_output_info, AVCodecContext* avcodec_context, |
| AVFrame* frame, int flags) { |
| bool should_config_output = false; |
| bool output_increased_in_size = false; |
| bool need_new_buffers = false; |
| { |
| std::lock_guard<std::mutex> lock(lock_); |
| need_new_buffers = !decoded_output_info_; |
| if (!decoded_output_info_ || |
| !fidl::Equals((*decoded_output_info_).format, decoded_output_info.format)) { |
| output_increased_in_size = |
| decoded_output_info_.has_value() && |
| decoded_output_info.buffer_bytes_needed > (*decoded_output_info_).buffer_bytes_needed; |
| decoded_output_info_ = {.format = fidl::Clone(decoded_output_info.format), |
| .buffer_bytes_needed = decoded_output_info.buffer_bytes_needed}; |
| should_config_output = true; |
| } |
| } |
| |
| if (output_increased_in_size) { |
| events_->onCoreCodecFailCodec( |
| "Midstream output config change to larger format is not supported."); |
| return avcodec_default_get_buffer2(avcodec_context, frame, flags); |
| } |
| |
| if (should_config_output) { |
| events_->onCoreCodecMidStreamOutputConstraintsChange( |
| /*output_re_config_required=*/need_new_buffers); |
| } |
| |
| auto buffer = output_buffer_pool_.AllocateBuffer(decoded_output_info.buffer_bytes_needed); |
| if (!buffer) { |
| // This stream is stopping. We let ffmpeg allocate just so it can exit |
| // cleanly. |
| return avcodec_default_get_buffer2(avcodec_context, frame, flags); |
| } |
| |
| AVPixelFormat pix_fmt = FourccToPixelFormat(decoded_output_info.format.fourcc); |
| if (pix_fmt == AV_PIX_FMT_NONE) { |
| events_->onCoreCodecFailCodec("Unsupported format: %d", pix_fmt); |
| return -1; |
| } |
| |
| AVBufferRef* buffer_ref = av_buffer_create(buffer->base(), static_cast<int>(buffer->size()), |
| FfmpegFreeBufferCallback, this, flags); |
| |
| int fill_arrays_status = |
| av_image_fill_arrays(frame->data, frame->linesize, buffer_ref->data, pix_fmt, |
| decoded_output_info.format.primary_width_pixels, |
| decoded_output_info.format.primary_height_pixels, 1); |
| if (fill_arrays_status < 0) { |
| events_->onCoreCodecFailCodec("Ffmpeg fill arrays failed: %d", fill_arrays_status); |
| return -1; |
| } |
| |
| // IYUV is not YV12. Ffmpeg only decodes into IYUV. The difference between |
| // YV12 and IYUV is the order of the U and V planes. Here we trick Ffmpeg |
| // into writing them in YV12 order relative to one another. |
| std::swap(frame->data[1], frame->data[2]); |
| |
| frame->buf[0] = buffer_ref; |
| // ffmpeg says to set extended_data to data if we're not using extended_data |
| frame->extended_data = frame->data; |
| |
| return 0; |
| } |
| |
| void CodecAdapterFfmpegDecoder::DecodeFrames() { |
| ZX_DEBUG_ASSERT(thrd_current() == input_processing_thread_); |
| ZX_DEBUG_ASSERT(avcodec_context_); |
| |
| while (true) { |
| auto [error, frame] = avcodec_context_->ReceiveFrame(); |
| if (error == AVERROR(EAGAIN)) { |
| return; |
| } else if (error == AVERROR_EOF) { |
| events_->onCoreCodecOutputEndOfStream(/*error_detected_before=*/false); |
| return; |
| } else if (error < 0) { |
| events_->onCoreCodecFailCodec("DecodeFrames(): Failed to decode frame: %s", |
| av_err2str(error)); |
| return; |
| } |
| |
| std::optional<CodecPacket*> maybe_output_packet = free_output_packets_.WaitForElement(); |
| if (!maybe_output_packet) { |
| return; |
| } |
| auto output_packet = *maybe_output_packet; |
| auto release_buffer = |
| fit::defer([this, &output_packet]() { free_output_packets_.Push(output_packet); }); |
| |
| auto buffer_alloc = output_buffer_pool_.FindBufferByBase(frame->data[0]); |
| ZX_ASSERT(buffer_alloc); |
| |
| if (buffer_alloc->bytes_used > std::numeric_limits<uint32_t>::max()) { |
| events_->onCoreCodecFailCodec("Could not represent bytes_used as uint32_t"); |
| return; |
| } |
| |
| output_packet->SetBuffer(buffer_alloc->buffer); |
| output_packet->SetStartOffset(0); |
| output_packet->SetValidLengthBytes(static_cast<uint32_t>(buffer_alloc->bytes_used)); |
| output_packet->SetTimstampIsh(frame->pts); |
| |
| { |
| std::lock_guard<std::mutex> lock(lock_); |
| ZX_DEBUG_ASSERT(in_use_by_client_.find(output_packet) == in_use_by_client_.end()); |
| in_use_by_client_.emplace(output_packet, std::move(frame)); |
| } |
| |
| release_buffer.cancel(); |
| |
| events_->onCoreCodecOutputPacket(output_packet, |
| /*error_detected_before=*/false, |
| /*error_detected_during=*/false); |
| } |
| } |
| |
| fuchsia_sysmem2::BufferCollectionConstraints |
| CodecAdapterFfmpegDecoder::CoreCodecGetBufferCollectionConstraints2( |
| CodecPort port, const fuchsia::media::StreamBufferConstraints& stream_buffer_constraints, |
| const fuchsia::media::StreamBufferPartialSettings& partial_settings) { |
| std::lock_guard<std::mutex> lock(lock_); |
| |
| fuchsia_sysmem2::BufferCollectionConstraints result; |
| |
| // The CodecImpl won't hand us the sysmem token, so we shouldn't expect to |
| // have the token here. |
| ZX_DEBUG_ASSERT(!partial_settings.has_sysmem_token()); |
| |
| // TODO(https://fxbug.dev/42084949): plumb/permit range of buffer count from further down, |
| // instead of single number frame_count, and set this to the actual |
| // stream-required # of reference frames + # that can concurrently decode. |
| // Packets and buffers are not the same thing, and we should permit the # of |
| // packets to be >= the # of buffers. We shouldn't be |
| // allocating buffers on behalf of the client here, but until we plumb the |
| // range of frame_count and are more flexible on # of allocated buffers, we |
| // have to make sure there are at least as many buffers as packets. We |
| // categorize the buffers as for camping and for slack. This should change to |
| // be just the buffers needed for camping and maybe 1 for shared slack. If |
| // the client wants more buffers the client can demand buffers in its own |
| // fuchsia::sysmem::BufferCollection::SetConstraints(). |
| if (port == kOutputPort) { |
| result.min_buffer_count_for_camping() = kMinOutputBufferCountForCamping; |
| } else { |
| result.min_buffer_count_for_camping() = kMinInputBufferCountForCamping; |
| } |
| |
| ZX_DEBUG_ASSERT(!result.min_buffer_count_for_dedicated_slack().has_value()); |
| ZX_DEBUG_ASSERT(!result.min_buffer_count_for_shared_slack().has_value()); |
| |
| if (port == kOutputPort) { |
| result.max_buffer_count() = kMaxOutputBufferCount; |
| } else { |
| result.max_buffer_count() = kMaxInputBufferCount; |
| } |
| |
| uint32_t per_packet_buffer_bytes_min; |
| uint32_t per_packet_buffer_bytes_max; |
| if (port == kInputPort) { |
| per_packet_buffer_bytes_min = kInputPerPacketBufferBytesMin; |
| per_packet_buffer_bytes_max = kInputPerPacketBufferBytesMax; |
| } else { |
| ZX_ASSERT(decoded_output_info_.has_value()); |
| auto& [uncompressed_format, per_packet_buffer_bytes] = decoded_output_info_.value(); |
| |
| ZX_DEBUG_ASSERT(port == kOutputPort); |
| // NV12, based on min stride. |
| per_packet_buffer_bytes_min = uncompressed_format.primary_line_stride_bytes * |
| uncompressed_format.primary_height_pixels * 3 / 2 + |
| kFfmpegOutputFramePaddingBytes; |
| // At least for now, don't cap the per-packet buffer size for output. The |
| // HW only cares about the portion we set up for output anyway, and the |
| // client has no way to force output to occur into portions of the output |
| // buffer beyond what's implied by the max supported image dimensions. |
| per_packet_buffer_bytes_max = 0xFFFFFFFF; |
| } |
| |
| auto& bmc = result.buffer_memory_constraints().emplace(); |
| bmc.min_size_bytes() = per_packet_buffer_bytes_min; |
| bmc.max_size_bytes() = per_packet_buffer_bytes_max; |
| |
| // These are all false because SW decode. |
| bmc.physically_contiguous_required() = false; |
| bmc.secure_required() = false; |
| |
| if (port == kOutputPort) { |
| ZX_ASSERT(decoded_output_info_.has_value()); |
| auto& [uncompressed_format, per_packet_buffer_bytes] = decoded_output_info_.value(); |
| |
| auto& image_constraints = result.image_format_constraints().emplace().emplace_back(); |
| image_constraints.pixel_format() = fuchsia_images2::PixelFormat::kYv12; |
| |
| // TODO(https://fxbug.dev/42084950): confirm that REC709 is always what we want here, or plumb |
| // actual YUV color space if it can ever be REC601_*. Since 2020 and 2100 |
| // are minimum 10 bits per Y sample and we're outputting NV12, 601 is the |
| // only other potential possibility here. |
| image_constraints.color_spaces() = {fuchsia_images2::ColorSpace::kRec709}; |
| |
| // The non-"required_" fields indicate the decoder's ability to potentially |
| // output frames at various dimensions as coded in the stream. Aside from |
| // the current stream being somewhere in these bounds, these have nothing to |
| // do with the current stream in particular. |
| image_constraints.min_size() = {16, 16}; |
| |
| // This intentionally isn't the height of a 4k frame. See |
| // max_coded_width_times_coded_height. We intentionally constrain the max |
| // dimension in width or height to the width of a 4k frame. While the HW |
| // might be able to go bigger than that as long as the other dimension is |
| // smaller to compensate, we don't really need to enable any larger than |
| // 4k's width in either dimension, so we don't. |
| image_constraints.max_size() = {3840, 3840}; |
| image_constraints.min_bytes_per_row() = 16; |
| |
| // no hard-coded max stride, at least for now |
| ZX_DEBUG_ASSERT(!image_constraints.max_bytes_per_row().has_value()); |
| image_constraints.max_width_times_height() = 3840 * 2160; |
| image_constraints.size_alignment() = {16, 16}; |
| image_constraints.bytes_per_row_divisor() = 16; |
| |
| // TODO(dustingreen): Since this is a producer that will always produce at |
| // offset 0 of a physical page, we don't really care if this field is |
| // consistent with any constraints re. what the HW can do. |
| image_constraints.start_offset_divisor() = 1; |
| |
| // Odd display dimensions are permitted, but these don't imply odd YV12 |
| // dimensions - those are constrainted by coded_width_divisor and |
| // coded_height_divisor which are both 16. |
| image_constraints.display_rect_alignment() = {1, 1}; |
| |
| // The decoder is producing frames and the decoder has no choice but to |
| // produce frames at their coded size. The decoder wants to potentially be |
| // able to support a stream with dynamic resolution, potentially including |
| // dimensions both less than and greater than the dimensions that led to the |
| // current need to allocate a BufferCollection. For this reason, the |
| // required_ fields are set to the exact current dimensions, and the |
| // permitted (non-required_) fields is set to the full potential range that |
| // the decoder could potentially output. If an initiator wants to require a |
| // larger range of dimensions that includes the required range indicated |
| // here (via a-priori knowledge of the potential stream dimensions), an |
| // initiator is free to do so. |
| image_constraints.required_min_size() = {uncompressed_format.primary_width_pixels, |
| uncompressed_format.primary_height_pixels}; |
| image_constraints.required_max_size() = {uncompressed_format.primary_width_pixels, |
| uncompressed_format.primary_height_pixels}; |
| |
| // Sysmem2 doesn't have required_min_bytes_per_row or |
| // required_max_bytes_per_row (at least for now). If those later prove to be |
| // worth adding to sysmem2, we'd set those to |
| // uncompressed_format.primary_line_stride_bytes here. |
| // |
| // The way we'd know is CoreCodecSetBufferCollectionInfo would fail the |
| // codec when primary_line_stride_bytes isn't within |
| // [min_bytes_per_row..max_bytes_per_row]. |
| // |
| // The decoder is the producer. We're implicitly relying on the consumer(s) |
| // to not overly constrain the bytes_per_row using min_bytes_per_row and |
| // max_bytes_per_row, in order for decode of the current stream dimensions |
| // to be possible. |
| } else { |
| ZX_DEBUG_ASSERT(!result.image_format_constraints().has_value()); |
| } |
| |
| // We don't have to fill out usage - CodecImpl takes care of that. |
| ZX_DEBUG_ASSERT(!result.usage().has_value()); |
| |
| return result; |
| } |
| |
| void CodecAdapterFfmpegDecoder::CoreCodecSetBufferCollectionInfo( |
| CodecPort port, const fuchsia_sysmem2::BufferCollectionInfo& buffer_collection_info) { |
| std::optional<uint32_t> bytes_per_row; |
| uint32_t min_bytes_per_row; |
| uint32_t max_bytes_per_row; |
| |
| { // scope lock |
| std::lock_guard<std::mutex> lock(lock_); |
| |
| if (port == kInputPort) { |
| ZX_DEBUG_ASSERT(buffer_collection_info.buffers()->size() >= kMinInputBufferCountForCamping); |
| } else { |
| ZX_DEBUG_ASSERT(buffer_collection_info.buffers()->size() >= kMinOutputBufferCountForCamping); |
| } |
| |
| if (decoded_output_info_.has_value()) { |
| auto& [uncompressed_format, per_packet_buffer_bytes] = decoded_output_info_.value(); |
| bytes_per_row = uncompressed_format.primary_line_stride_bytes; |
| } |
| } |
| |
| if (bytes_per_row.has_value()) { |
| auto& ifc = *buffer_collection_info.settings()->image_format_constraints(); |
| min_bytes_per_row = *ifc.min_bytes_per_row(); |
| max_bytes_per_row = *ifc.max_bytes_per_row(); |
| |
| if (bytes_per_row < min_bytes_per_row) { |
| events_->onCoreCodecFailCodec("bytes_per_row < *ifc.min_bytes_per_row()"); |
| return; |
| } |
| if (bytes_per_row > max_bytes_per_row) { |
| events_->onCoreCodecFailCodec("bytes_per_row > *ifc.max_bytes_per_row()"); |
| return; |
| } |
| } |
| } |