blob: d0fb17ee76cef0de67c24a5e51d6c1b931fefda8 [file] [log] [blame]
// Copyright 2022 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef SRC_MEDIA_CODEC_CODECS_VAAPI_CODEC_ADAPTER_VAAPI_ENCODER_H_
#define SRC_MEDIA_CODEC_CODECS_VAAPI_CODEC_ADAPTER_VAAPI_ENCODER_H_
#include <lib/async-loop/cpp/loop.h>
#include <lib/async-loop/default.h>
#include <lib/async/cpp/task.h>
#include <lib/media/codec_impl/codec_adapter.h>
#include <lib/media/codec_impl/codec_buffer.h>
#include <lib/media/codec_impl/codec_input_item.h>
#include <lib/media/codec_impl/codec_packet.h>
#include <lib/trace/event.h>
#include <threads.h>
#include <optional>
#include <queue>
#include <fbl/algorithm.h>
#include <va/va.h>
#include "buffer_pool.h"
#include "media/gpu/accelerated_video_decoder.h"
#include "src/lib/fxl/macros.h"
#include "src/lib/fxl/synchronization/thread_annotations.h"
#include "src/media/lib/mpsc_queue/mpsc_queue.h"
#include "src/media/third_party/chromium_media/media/gpu/gpu_video_encode_accelerator_helpers.h"
#include "vaapi_utils.h"
class CodecAdapterVaApiEncoder;
namespace media {
class VaapiVideoEncoderDelegate;
class VaapiWrapper;
} // namespace media
class VaApiEncoderOutput {
public:
VaApiEncoderOutput() = default;
VaApiEncoderOutput(uint8_t* base_address, CodecAdapterVaApiEncoder* adapter)
: base_address_(base_address), adapter_(adapter) {}
VaApiEncoderOutput(const VaApiEncoderOutput&) = delete;
VaApiEncoderOutput(VaApiEncoderOutput&& other) noexcept;
~VaApiEncoderOutput();
VaApiEncoderOutput& operator=(VaApiEncoderOutput&& other) noexcept;
VaApiEncoderOutput& operator=(VaApiEncoderOutput& other) noexcept = delete;
private:
uint8_t* base_address_ = nullptr;
CodecAdapterVaApiEncoder* adapter_ = nullptr;
};
class CodecAdapterVaApiEncoder : public CodecAdapter {
public:
CodecAdapterVaApiEncoder(std::mutex& lock, CodecAdapterEvents* codec_adapter_events);
~CodecAdapterVaApiEncoder() override;
bool IsCoreCodecRequiringOutputConfigForFormatDetection() override { return false; }
bool IsCoreCodecMappedBufferUseful(CodecPort port) override { return true; }
bool IsCoreCodecHwBased(CodecPort port) override { return false; }
void CoreCodecInit(const fuchsia::media::FormatDetails& initial_input_format_details) override;
void CoreCodecAddBuffer(CodecPort port, const CodecBuffer* buffer) override {
if (port != kOutputPort) {
return;
}
staged_output_buffers_.push_back(buffer);
}
void CoreCodecConfigureBuffers(
CodecPort port, const std::vector<std::unique_ptr<CodecPacket>>& packets) override {
if (port != kOutputPort) {
return;
}
std::vector<CodecPacket*> all_packets;
for (auto& packet : packets) {
all_packets.push_back(packet.get());
}
std::shuffle(all_packets.begin(), all_packets.end(), not_for_security_prng_);
for (CodecPacket* packet : all_packets) {
free_output_packets_.Push(packet);
}
}
void CoreCodecStartStream() override {
// It's ok for RecycleInputPacket to make a packet free anywhere in this
// sequence. Nothing else ought to be happening during CoreCodecStartStream
// (in this or any other thread).
input_queue_.Reset();
free_output_packets_.Reset(/*keep_data=*/true);
output_buffer_pool_.Reset(/*keep_data=*/true);
LoadStagedOutputBuffers();
zx_status_t post_result =
async::PostTask(input_processing_loop_.dispatcher(), [this] { ProcessInputLoop(); });
ZX_ASSERT_MSG(post_result == ZX_OK,
"async::PostTask() failed to post input processing loop - result: %d\n",
post_result);
TRACE_INSTANT("codec_runner", "Media:Start", TRACE_SCOPE_THREAD);
}
void CoreCodecQueueInputFormatDetails(
const fuchsia::media::FormatDetails& per_stream_override_format_details) override {
input_queue_.Push(CodecInputItem::FormatDetails(per_stream_override_format_details));
}
void CoreCodecQueueInputPacket(CodecPacket* packet) override {
TRACE_INSTANT("codec_runner", "Media:PacketReceived", TRACE_SCOPE_THREAD);
input_queue_.Push(CodecInputItem::Packet(packet));
}
void CoreCodecQueueInputEndOfStream() override {
input_queue_.Push(CodecInputItem::EndOfStream());
}
void CoreCodecStopStream() override {
input_queue_.StopAllWaits();
free_output_packets_.StopAllWaits();
output_buffer_pool_.StopAllWaits();
WaitForInputProcessingLoopToEnd();
CleanUpAfterStream();
auto queued_input_items = BlockingMpscQueue<CodecInputItem>::Extract(std::move(input_queue_));
while (!queued_input_items.empty()) {
CodecInputItem input_item = std::move(queued_input_items.front());
queued_input_items.pop();
if (input_item.is_packet()) {
events_->onCoreCodecInputPacketDone(input_item.packet());
}
}
TRACE_INSTANT("codec_runner", "Media:Stop", TRACE_SCOPE_THREAD);
}
void CoreCodecRecycleOutputPacket(CodecPacket* packet) override {
if (packet->is_new()) {
// CoreCodecConfigureBuffers() took care of initially populating
// free_output_packets_ (in shuffled order), so ignore new packets.
ZX_DEBUG_ASSERT(!packet->buffer());
packet->SetIsNew(false);
return;
}
if (packet->buffer()) {
VaApiEncoderOutput local_output;
{
std::lock_guard<std::mutex> lock(lock_);
ZX_DEBUG_ASSERT(in_use_by_client_.find(packet) != in_use_by_client_.end());
local_output = std::move(in_use_by_client_[packet]);
in_use_by_client_.erase(packet);
}
// ~ local_output, which may trigger a buffer free callback.
}
free_output_packets_.Push(std::move(packet));
}
void CoreCodecEnsureBuffersNotConfigured(CodecPort port) override {
buffer_settings_[port] = std::nullopt;
if (port != kOutputPort) {
// We don't do anything with input buffers.
return;
}
{ // scope to_drop
std::map<CodecPacket*, VaApiEncoderOutput> to_drop;
{
std::lock_guard<std::mutex> lock(lock_);
std::swap(to_drop, in_use_by_client_);
}
// ~to_drop
}
// The ~to_drop returns all buffers to the output_buffer_pool_.
ZX_DEBUG_ASSERT(!output_buffer_pool_.has_buffers_in_use());
// VMO handles for the old output buffers may still exist, but the
// decoder doesn't know about those, and buffer_lifetime_ordinal will
// prevent us calling output_buffer_pool_.FreeBuffer() for any of the old
// buffers. So forget about the old buffers here.
output_buffer_pool_.Reset();
staged_output_buffers_.clear();
free_output_packets_.Reset();
}
void CoreCodecMidStreamOutputBufferReConfigPrepare() override {
// Nothing to do here.
}
void CoreCodecMidStreamOutputBufferReConfigFinish() override { LoadStagedOutputBuffers(); }
std::unique_ptr<const fuchsia::media::StreamOutputConstraints> CoreCodecBuildNewOutputConstraints(
uint64_t stream_lifetime_ordinal, uint64_t new_output_buffer_constraints_version_ordinal,
bool buffer_constraints_action_required) override {
auto config = std::make_unique<fuchsia::media::StreamOutputConstraints>();
config->set_stream_lifetime_ordinal(stream_lifetime_ordinal);
// For the moment, there will be only one StreamOutputConstraints, and it'll
// need output buffers configured for it.
ZX_DEBUG_ASSERT(buffer_constraints_action_required);
config->set_buffer_constraints_action_required(buffer_constraints_action_required);
auto* constraints = config->mutable_buffer_constraints();
constraints->set_buffer_constraints_version_ordinal(
new_output_buffer_constraints_version_ordinal);
return config;
}
fuchsia::media::StreamOutputFormat CoreCodecGetOutputFormat(
uint64_t stream_lifetime_ordinal,
uint64_t new_output_format_details_version_ordinal) override {
fuchsia::media::StreamOutputFormat result;
result.set_stream_lifetime_ordinal(stream_lifetime_ordinal);
result.mutable_format_details()->set_format_details_version_ordinal(
new_output_format_details_version_ordinal);
result.mutable_format_details()->set_mime_type("video/h264");
fuchsia::media::VideoFormat video_format;
auto compressed_format = fuchsia::media::VideoCompressedFormat();
compressed_format.set_temp_field_todo_remove(0);
video_format.set_compressed(std::move(compressed_format));
result.mutable_format_details()->mutable_domain()->set_video(std::move(video_format));
return result;
}
fuchsia_sysmem2::BufferCollectionConstraints CoreCodecGetBufferCollectionConstraints2(
CodecPort port, const fuchsia::media::StreamBufferConstraints& stream_buffer_constraints,
const fuchsia::media::StreamBufferPartialSettings& partial_settings) override {
fuchsia_sysmem2::BufferCollectionConstraints constraints;
auto& bmc = constraints.buffer_memory_constraints().emplace();
if (port == kOutputPort) {
constraints.min_buffer_count_for_camping() = 1;
// The Intel GPU supports CPU domain buffer collections, so we don't really need to support
// RAM domain.
bmc.cpu_domain_supported() = true;
ZX_ASSERT(display_size_.width() > 0);
ZX_ASSERT(display_size_.height() > 0);
// The encoder doesn't support splitting output across buffers.
bmc.min_size_bytes() =
static_cast<uint32_t>(media::GetEncodeBitstreamBufferSize(coded_size_));
} else {
ZX_DEBUG_ASSERT(port == kInputPort);
constraints.min_buffer_count_for_camping() = 1;
bmc.cpu_domain_supported() = true;
auto& image_constraints = constraints.image_format_constraints().emplace().emplace_back();
image_constraints.pixel_format() = fuchsia_images2::PixelFormat::kNv12;
// TODO(https://fxbug.dev/42051379): Add support for more colorspaces.
image_constraints.color_spaces() = {fuchsia_images2::ColorSpace::kRec709};
// The non-"required_" fields indicate the encoder's ability to accept
// input frames at various dimensions. The input frames need to be within
// these bounds.
image_constraints.min_size() = {16, 16};
// This intentionally isn't the height of a 4k frame. See
// max_coded_width_times_coded_height. We intentionally constrain the max
// dimension in width or height to the width of a 4k frame. While the HW
// might be able to go bigger than that as long as the other dimension is
// smaller to compensate, we don't really need to enable any larger than
// 4k's width in either dimension, so we don't.
image_constraints.max_size() = {3840, 3840};
image_constraints.min_bytes_per_row() = 16;
// no hard-coded max stride, at least for now
ZX_DEBUG_ASSERT(!image_constraints.max_bytes_per_row().has_value());
image_constraints.max_width_times_height() = 3840 * 2160;
image_constraints.size_alignment() = {2, 2};
image_constraints.bytes_per_row_divisor() = 2;
image_constraints.start_offset_divisor() = 1;
// Odd display dimensions are permitted, but these don't imply odd YV12
// dimensions - those are constrainted by coded_width_divisor and
// coded_height_divisor which are both 2.
image_constraints.display_rect_alignment() = {1, 1};
// The required sizes aren't initialized, since
// CoreCodecGetBufferCollectionConstraints won't be re-triggered when the
// input format is changed.
}
return constraints;
}
void CoreCodecSetBufferCollectionInfo(
CodecPort port,
const fuchsia_sysmem2::BufferCollectionInfo& buffer_collection_info) override {
buffer_settings_[port] = *buffer_collection_info.settings();
}
VAContextID context_id() { return context_id_->id(); }
scoped_refptr<VASurface> GetVASurface();
private:
friend class VaApiEncoderOutput;
void WaitForInputProcessingLoopToEnd() {
ZX_DEBUG_ASSERT(thrd_current() != input_processing_thread_);
std::condition_variable stream_stopped_condition;
bool stream_stopped = false;
zx_status_t post_result = async::PostTask(input_processing_loop_.dispatcher(),
[this, &stream_stopped, &stream_stopped_condition] {
{
std::lock_guard<std::mutex> lock(lock_);
stream_stopped = true;
// Under lock since
// WaitForInputProcessingLoopToEnd()
// may otherwise return too soon deleting
// stream_stopped_condition too soon.
stream_stopped_condition.notify_all();
}
});
ZX_ASSERT_MSG(post_result == ZX_OK,
"async::PostTask() failed to post input processing loop - result: %d\n",
post_result);
std::unique_lock<std::mutex> lock(lock_);
stream_stopped_condition.wait(lock, [&stream_stopped] { return stream_stopped; });
}
bool HandleInputFormatChange(const fuchsia::media::FormatDetails& input_format_details,
bool initial);
// We don't give the codec any buffers in its output pool until
// configuration is finished or a stream starts. Until finishing
// configuration we stage all the buffers. Here we load all the staged
// buffers so the codec can make output.
void LoadStagedOutputBuffers() {
std::vector<const CodecBuffer*> to_add = std::move(staged_output_buffers_);
for (auto buffer : to_add) {
output_buffer_pool_.AddBuffer(buffer);
}
}
// Processes input in a loop. Should only execute on input_processing_thread_.
// Loops for the lifetime of a stream.
void ProcessInputLoop();
bool ProcessPacket(CodecPacket* packet);
// Releases any resources from the just-ended stream.
void CleanUpAfterStream();
BlockingMpscQueue<CodecInputItem> input_queue_{};
BlockingMpscQueue<CodecPacket*> free_output_packets_{};
VAProfile va_profile_ = VAProfileH264High;
// VAEntrypointEncSlice should also work, but LP is supported on Intel and more efficient.
VAEntrypoint va_entrypoint_ = VAEntrypointEncSliceLP;
std::optional<ScopedConfigID> config_;
// The order of output_buffer_pool_ and in_use_by_client_ matters, so that
// destruction of in_use_by_client_ happens first, because those destructing
// will return buffers to output_buffer_pool_.
BufferPool output_buffer_pool_;
std::map<CodecPacket*, VaApiEncoderOutput> in_use_by_client_ FXL_GUARDED_BY(lock_);
// Buffers the client has added but that we cannot use until configuration is
// complete.
std::vector<const CodecBuffer*> staged_output_buffers_;
uint64_t input_format_details_version_ordinal_;
media::VideoEncodeAccelerator::Config accelerator_config_;
std::optional<fuchsia_sysmem2::SingleBufferSettings> buffer_settings_[kPortCount];
// DPB surfaces.
std::mutex surfaces_lock_;
// Incremented whenever new surfaces are allocated and old surfaces should be released.
uint64_t surface_generation_ FXL_GUARDED_BY(surfaces_lock_) = {};
gfx::Size surface_size_ FXL_GUARDED_BY(surfaces_lock_);
// These surfaces are used to hold reference frames.
std::vector<ScopedSurfaceID> surfaces_ FXL_GUARDED_BY(surfaces_lock_);
// The input frame is uploaded into this surface, which is used only while encoding.
std::optional<ScopedSurfaceID> input_surface_;
std::optional<ScopedContextID> context_id_;
std::shared_ptr<media::VaapiWrapper> vaapi_wrapper_;
std::unique_ptr<media::VaapiVideoEncoderDelegate> encoder_;
gfx::Size display_size_;
gfx::Size coded_size_;
bool next_frame_keyframe_ = false;
async::Loop input_processing_loop_{&kAsyncLoopConfigNoAttachToCurrentThread};
thrd_t input_processing_thread_;
};
#endif // SRC_MEDIA_CODEC_CODECS_VAAPI_CODEC_ADAPTER_VAAPI_ENCODER_H_