| // Copyright 2022 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef SRC_MEDIA_CODEC_CODECS_VAAPI_CODEC_ADAPTER_VAAPI_ENCODER_H_ |
| #define SRC_MEDIA_CODEC_CODECS_VAAPI_CODEC_ADAPTER_VAAPI_ENCODER_H_ |
| |
| #include <lib/async-loop/cpp/loop.h> |
| #include <lib/async-loop/default.h> |
| #include <lib/async/cpp/task.h> |
| #include <lib/media/codec_impl/codec_adapter.h> |
| #include <lib/media/codec_impl/codec_buffer.h> |
| #include <lib/media/codec_impl/codec_input_item.h> |
| #include <lib/media/codec_impl/codec_packet.h> |
| #include <lib/trace/event.h> |
| #include <threads.h> |
| |
| #include <optional> |
| #include <queue> |
| |
| #include <fbl/algorithm.h> |
| #include <va/va.h> |
| |
| #include "buffer_pool.h" |
| #include "media/gpu/accelerated_video_decoder.h" |
| #include "src/lib/fxl/macros.h" |
| #include "src/lib/fxl/synchronization/thread_annotations.h" |
| #include "src/media/lib/mpsc_queue/mpsc_queue.h" |
| #include "src/media/third_party/chromium_media/media/gpu/gpu_video_encode_accelerator_helpers.h" |
| #include "vaapi_utils.h" |
| |
| class CodecAdapterVaApiEncoder; |
| |
| namespace media { |
| class VaapiVideoEncoderDelegate; |
| class VaapiWrapper; |
| } // namespace media |
| |
| class VaApiEncoderOutput { |
| public: |
| VaApiEncoderOutput() = default; |
| VaApiEncoderOutput(uint8_t* base_address, CodecAdapterVaApiEncoder* adapter) |
| : base_address_(base_address), adapter_(adapter) {} |
| VaApiEncoderOutput(const VaApiEncoderOutput&) = delete; |
| VaApiEncoderOutput(VaApiEncoderOutput&& other) noexcept; |
| |
| ~VaApiEncoderOutput(); |
| |
| VaApiEncoderOutput& operator=(VaApiEncoderOutput&& other) noexcept; |
| VaApiEncoderOutput& operator=(VaApiEncoderOutput& other) noexcept = delete; |
| |
| private: |
| uint8_t* base_address_ = nullptr; |
| CodecAdapterVaApiEncoder* adapter_ = nullptr; |
| }; |
| |
| class CodecAdapterVaApiEncoder : public CodecAdapter { |
| public: |
| CodecAdapterVaApiEncoder(std::mutex& lock, CodecAdapterEvents* codec_adapter_events); |
| |
| ~CodecAdapterVaApiEncoder() override; |
| |
| bool IsCoreCodecRequiringOutputConfigForFormatDetection() override { return false; } |
| |
| bool IsCoreCodecMappedBufferUseful(CodecPort port) override { return true; } |
| |
| bool IsCoreCodecHwBased(CodecPort port) override { return false; } |
| |
| void CoreCodecInit(const fuchsia::media::FormatDetails& initial_input_format_details) override; |
| |
| void CoreCodecAddBuffer(CodecPort port, const CodecBuffer* buffer) override { |
| if (port != kOutputPort) { |
| return; |
| } |
| |
| staged_output_buffers_.push_back(buffer); |
| } |
| |
| void CoreCodecConfigureBuffers( |
| CodecPort port, const std::vector<std::unique_ptr<CodecPacket>>& packets) override { |
| if (port != kOutputPort) { |
| return; |
| } |
| std::vector<CodecPacket*> all_packets; |
| for (auto& packet : packets) { |
| all_packets.push_back(packet.get()); |
| } |
| std::shuffle(all_packets.begin(), all_packets.end(), not_for_security_prng_); |
| for (CodecPacket* packet : all_packets) { |
| free_output_packets_.Push(packet); |
| } |
| } |
| |
| void CoreCodecStartStream() override { |
| // It's ok for RecycleInputPacket to make a packet free anywhere in this |
| // sequence. Nothing else ought to be happening during CoreCodecStartStream |
| // (in this or any other thread). |
| input_queue_.Reset(); |
| free_output_packets_.Reset(/*keep_data=*/true); |
| output_buffer_pool_.Reset(/*keep_data=*/true); |
| LoadStagedOutputBuffers(); |
| |
| zx_status_t post_result = |
| async::PostTask(input_processing_loop_.dispatcher(), [this] { ProcessInputLoop(); }); |
| ZX_ASSERT_MSG(post_result == ZX_OK, |
| "async::PostTask() failed to post input processing loop - result: %d\n", |
| post_result); |
| |
| TRACE_INSTANT("codec_runner", "Media:Start", TRACE_SCOPE_THREAD); |
| } |
| |
| void CoreCodecQueueInputFormatDetails( |
| const fuchsia::media::FormatDetails& per_stream_override_format_details) override { |
| input_queue_.Push(CodecInputItem::FormatDetails(per_stream_override_format_details)); |
| } |
| |
| void CoreCodecQueueInputPacket(CodecPacket* packet) override { |
| TRACE_INSTANT("codec_runner", "Media:PacketReceived", TRACE_SCOPE_THREAD); |
| input_queue_.Push(CodecInputItem::Packet(packet)); |
| } |
| |
| void CoreCodecQueueInputEndOfStream() override { |
| input_queue_.Push(CodecInputItem::EndOfStream()); |
| } |
| |
| void CoreCodecStopStream() override { |
| input_queue_.StopAllWaits(); |
| free_output_packets_.StopAllWaits(); |
| output_buffer_pool_.StopAllWaits(); |
| |
| WaitForInputProcessingLoopToEnd(); |
| CleanUpAfterStream(); |
| |
| auto queued_input_items = BlockingMpscQueue<CodecInputItem>::Extract(std::move(input_queue_)); |
| while (!queued_input_items.empty()) { |
| CodecInputItem input_item = std::move(queued_input_items.front()); |
| queued_input_items.pop(); |
| if (input_item.is_packet()) { |
| events_->onCoreCodecInputPacketDone(input_item.packet()); |
| } |
| } |
| |
| TRACE_INSTANT("codec_runner", "Media:Stop", TRACE_SCOPE_THREAD); |
| } |
| |
| void CoreCodecRecycleOutputPacket(CodecPacket* packet) override { |
| if (packet->is_new()) { |
| // CoreCodecConfigureBuffers() took care of initially populating |
| // free_output_packets_ (in shuffled order), so ignore new packets. |
| ZX_DEBUG_ASSERT(!packet->buffer()); |
| packet->SetIsNew(false); |
| return; |
| } |
| if (packet->buffer()) { |
| VaApiEncoderOutput local_output; |
| { |
| std::lock_guard<std::mutex> lock(lock_); |
| ZX_DEBUG_ASSERT(in_use_by_client_.find(packet) != in_use_by_client_.end()); |
| local_output = std::move(in_use_by_client_[packet]); |
| in_use_by_client_.erase(packet); |
| } |
| |
| // ~ local_output, which may trigger a buffer free callback. |
| } |
| free_output_packets_.Push(std::move(packet)); |
| } |
| |
| void CoreCodecEnsureBuffersNotConfigured(CodecPort port) override { |
| buffer_settings_[port] = std::nullopt; |
| if (port != kOutputPort) { |
| // We don't do anything with input buffers. |
| return; |
| } |
| |
| { // scope to_drop |
| std::map<CodecPacket*, VaApiEncoderOutput> to_drop; |
| { |
| std::lock_guard<std::mutex> lock(lock_); |
| std::swap(to_drop, in_use_by_client_); |
| } |
| // ~to_drop |
| } |
| |
| // The ~to_drop returns all buffers to the output_buffer_pool_. |
| ZX_DEBUG_ASSERT(!output_buffer_pool_.has_buffers_in_use()); |
| |
| // VMO handles for the old output buffers may still exist, but the |
| // decoder doesn't know about those, and buffer_lifetime_ordinal will |
| // prevent us calling output_buffer_pool_.FreeBuffer() for any of the old |
| // buffers. So forget about the old buffers here. |
| output_buffer_pool_.Reset(); |
| staged_output_buffers_.clear(); |
| |
| free_output_packets_.Reset(); |
| } |
| |
| void CoreCodecMidStreamOutputBufferReConfigPrepare() override { |
| // Nothing to do here. |
| } |
| |
| void CoreCodecMidStreamOutputBufferReConfigFinish() override { LoadStagedOutputBuffers(); } |
| |
| std::unique_ptr<const fuchsia::media::StreamOutputConstraints> CoreCodecBuildNewOutputConstraints( |
| uint64_t stream_lifetime_ordinal, uint64_t new_output_buffer_constraints_version_ordinal, |
| bool buffer_constraints_action_required) override { |
| auto config = std::make_unique<fuchsia::media::StreamOutputConstraints>(); |
| |
| config->set_stream_lifetime_ordinal(stream_lifetime_ordinal); |
| |
| // For the moment, there will be only one StreamOutputConstraints, and it'll |
| // need output buffers configured for it. |
| ZX_DEBUG_ASSERT(buffer_constraints_action_required); |
| config->set_buffer_constraints_action_required(buffer_constraints_action_required); |
| auto* constraints = config->mutable_buffer_constraints(); |
| constraints->set_buffer_constraints_version_ordinal( |
| new_output_buffer_constraints_version_ordinal); |
| |
| return config; |
| } |
| |
| fuchsia::media::StreamOutputFormat CoreCodecGetOutputFormat( |
| uint64_t stream_lifetime_ordinal, |
| uint64_t new_output_format_details_version_ordinal) override { |
| fuchsia::media::StreamOutputFormat result; |
| result.set_stream_lifetime_ordinal(stream_lifetime_ordinal); |
| result.mutable_format_details()->set_format_details_version_ordinal( |
| new_output_format_details_version_ordinal); |
| |
| result.mutable_format_details()->set_mime_type("video/h264"); |
| |
| fuchsia::media::VideoFormat video_format; |
| |
| auto compressed_format = fuchsia::media::VideoCompressedFormat(); |
| compressed_format.set_temp_field_todo_remove(0); |
| video_format.set_compressed(std::move(compressed_format)); |
| |
| result.mutable_format_details()->mutable_domain()->set_video(std::move(video_format)); |
| return result; |
| } |
| |
| fuchsia_sysmem2::BufferCollectionConstraints CoreCodecGetBufferCollectionConstraints2( |
| CodecPort port, const fuchsia::media::StreamBufferConstraints& stream_buffer_constraints, |
| const fuchsia::media::StreamBufferPartialSettings& partial_settings) override { |
| fuchsia_sysmem2::BufferCollectionConstraints constraints; |
| auto& bmc = constraints.buffer_memory_constraints().emplace(); |
| |
| if (port == kOutputPort) { |
| constraints.min_buffer_count_for_camping() = 1; |
| |
| // The Intel GPU supports CPU domain buffer collections, so we don't really need to support |
| // RAM domain. |
| bmc.cpu_domain_supported() = true; |
| ZX_ASSERT(display_size_.width() > 0); |
| ZX_ASSERT(display_size_.height() > 0); |
| |
| // The encoder doesn't support splitting output across buffers. |
| bmc.min_size_bytes() = |
| static_cast<uint32_t>(media::GetEncodeBitstreamBufferSize(coded_size_)); |
| } else { |
| ZX_DEBUG_ASSERT(port == kInputPort); |
| constraints.min_buffer_count_for_camping() = 1; |
| bmc.cpu_domain_supported() = true; |
| auto& image_constraints = constraints.image_format_constraints().emplace().emplace_back(); |
| image_constraints.pixel_format() = fuchsia_images2::PixelFormat::kNv12; |
| |
| // TODO(https://fxbug.dev/42051379): Add support for more colorspaces. |
| image_constraints.color_spaces() = {fuchsia_images2::ColorSpace::kRec709}; |
| |
| // The non-"required_" fields indicate the encoder's ability to accept |
| // input frames at various dimensions. The input frames need to be within |
| // these bounds. |
| image_constraints.min_size() = {16, 16}; |
| |
| // This intentionally isn't the height of a 4k frame. See |
| // max_coded_width_times_coded_height. We intentionally constrain the max |
| // dimension in width or height to the width of a 4k frame. While the HW |
| // might be able to go bigger than that as long as the other dimension is |
| // smaller to compensate, we don't really need to enable any larger than |
| // 4k's width in either dimension, so we don't. |
| image_constraints.max_size() = {3840, 3840}; |
| image_constraints.min_bytes_per_row() = 16; |
| |
| // no hard-coded max stride, at least for now |
| ZX_DEBUG_ASSERT(!image_constraints.max_bytes_per_row().has_value()); |
| image_constraints.max_width_times_height() = 3840 * 2160; |
| image_constraints.size_alignment() = {2, 2}; |
| image_constraints.bytes_per_row_divisor() = 2; |
| image_constraints.start_offset_divisor() = 1; |
| |
| // Odd display dimensions are permitted, but these don't imply odd YV12 |
| // dimensions - those are constrainted by coded_width_divisor and |
| // coded_height_divisor which are both 2. |
| image_constraints.display_rect_alignment() = {1, 1}; |
| |
| // The required sizes aren't initialized, since |
| // CoreCodecGetBufferCollectionConstraints won't be re-triggered when the |
| // input format is changed. |
| } |
| |
| return constraints; |
| } |
| |
| void CoreCodecSetBufferCollectionInfo( |
| CodecPort port, |
| const fuchsia_sysmem2::BufferCollectionInfo& buffer_collection_info) override { |
| buffer_settings_[port] = *buffer_collection_info.settings(); |
| } |
| |
| VAContextID context_id() { return context_id_->id(); } |
| |
| scoped_refptr<VASurface> GetVASurface(); |
| |
| private: |
| friend class VaApiEncoderOutput; |
| void WaitForInputProcessingLoopToEnd() { |
| ZX_DEBUG_ASSERT(thrd_current() != input_processing_thread_); |
| |
| std::condition_variable stream_stopped_condition; |
| bool stream_stopped = false; |
| zx_status_t post_result = async::PostTask(input_processing_loop_.dispatcher(), |
| [this, &stream_stopped, &stream_stopped_condition] { |
| { |
| std::lock_guard<std::mutex> lock(lock_); |
| stream_stopped = true; |
| // Under lock since |
| // WaitForInputProcessingLoopToEnd() |
| // may otherwise return too soon deleting |
| // stream_stopped_condition too soon. |
| stream_stopped_condition.notify_all(); |
| } |
| }); |
| ZX_ASSERT_MSG(post_result == ZX_OK, |
| "async::PostTask() failed to post input processing loop - result: %d\n", |
| post_result); |
| |
| std::unique_lock<std::mutex> lock(lock_); |
| stream_stopped_condition.wait(lock, [&stream_stopped] { return stream_stopped; }); |
| } |
| bool HandleInputFormatChange(const fuchsia::media::FormatDetails& input_format_details, |
| bool initial); |
| |
| // We don't give the codec any buffers in its output pool until |
| // configuration is finished or a stream starts. Until finishing |
| // configuration we stage all the buffers. Here we load all the staged |
| // buffers so the codec can make output. |
| void LoadStagedOutputBuffers() { |
| std::vector<const CodecBuffer*> to_add = std::move(staged_output_buffers_); |
| for (auto buffer : to_add) { |
| output_buffer_pool_.AddBuffer(buffer); |
| } |
| } |
| |
| // Processes input in a loop. Should only execute on input_processing_thread_. |
| // Loops for the lifetime of a stream. |
| void ProcessInputLoop(); |
| |
| bool ProcessPacket(CodecPacket* packet); |
| // Releases any resources from the just-ended stream. |
| void CleanUpAfterStream(); |
| |
| BlockingMpscQueue<CodecInputItem> input_queue_{}; |
| BlockingMpscQueue<CodecPacket*> free_output_packets_{}; |
| |
| VAProfile va_profile_ = VAProfileH264High; |
| // VAEntrypointEncSlice should also work, but LP is supported on Intel and more efficient. |
| VAEntrypoint va_entrypoint_ = VAEntrypointEncSliceLP; |
| std::optional<ScopedConfigID> config_; |
| |
| // The order of output_buffer_pool_ and in_use_by_client_ matters, so that |
| // destruction of in_use_by_client_ happens first, because those destructing |
| // will return buffers to output_buffer_pool_. |
| BufferPool output_buffer_pool_; |
| std::map<CodecPacket*, VaApiEncoderOutput> in_use_by_client_ FXL_GUARDED_BY(lock_); |
| |
| // Buffers the client has added but that we cannot use until configuration is |
| // complete. |
| std::vector<const CodecBuffer*> staged_output_buffers_; |
| |
| uint64_t input_format_details_version_ordinal_; |
| media::VideoEncodeAccelerator::Config accelerator_config_; |
| |
| std::optional<fuchsia_sysmem2::SingleBufferSettings> buffer_settings_[kPortCount]; |
| |
| // DPB surfaces. |
| std::mutex surfaces_lock_; |
| // Incremented whenever new surfaces are allocated and old surfaces should be released. |
| uint64_t surface_generation_ FXL_GUARDED_BY(surfaces_lock_) = {}; |
| gfx::Size surface_size_ FXL_GUARDED_BY(surfaces_lock_); |
| // These surfaces are used to hold reference frames. |
| std::vector<ScopedSurfaceID> surfaces_ FXL_GUARDED_BY(surfaces_lock_); |
| |
| // The input frame is uploaded into this surface, which is used only while encoding. |
| std::optional<ScopedSurfaceID> input_surface_; |
| |
| std::optional<ScopedContextID> context_id_; |
| |
| std::shared_ptr<media::VaapiWrapper> vaapi_wrapper_; |
| std::unique_ptr<media::VaapiVideoEncoderDelegate> encoder_; |
| |
| gfx::Size display_size_; |
| gfx::Size coded_size_; |
| bool next_frame_keyframe_ = false; |
| |
| async::Loop input_processing_loop_{&kAsyncLoopConfigNoAttachToCurrentThread}; |
| thrd_t input_processing_thread_; |
| }; |
| |
| #endif // SRC_MEDIA_CODEC_CODECS_VAAPI_CODEC_ADAPTER_VAAPI_ENCODER_H_ |