blob: d7f2a208c13b87804e3562f0f27708a9406f432e [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef SRC_MEDIA_DRIVERS_AMLOGIC_DECODER_VP9_DECODER_H_
#define SRC_MEDIA_DRIVERS_AMLOGIC_DECODER_VP9_DECODER_H_
#include <lib/async-loop/cpp/loop.h>
#include <lib/async/cpp/task.h>
#include <cstdint>
#include <string_view>
#include <vector>
#include "macros.h"
#include "registers.h"
#include "video_decoder.h"
// From libvpx
struct loop_filter_info_n;
struct loopfilter;
struct segmentation;
namespace amlogic_decoder {
// Used for friend declarations below
namespace test {
class Vp9UnitTest;
class TestVP9;
class TestFrameProvider;
} // namespace test
class Vp9Decoder : public VideoDecoder {
public:
enum class InputType {
// A single stream is decoded at once
kSingleStream,
// Multiple streams are decoded at once.
//
// This mode is capable of interrupting re. frame headers and decoded frames until all frames
// previously delivered via the ring buffer are exhausted at which point the FW will interrupt
// re. out of input. This mode isn't very forgiving about delivering additional data that
// doesn't contain at least the rest of a frame - the FW will hit the SW watchdog in that case.
kMultiStream,
// Multiple streams, each with input buffers divided on frame boundaries,
// are decoded at once.
//
// This mode expects frames (originally separate or within a superframe) to be delivered
// separately to the FW. If multiple frames are delivered together, the FW can respond to
// DecodeSlice with the second frame's header, instead of decoding the first frame's header,
// which may not be usable for decoding multiple frames delivered to the FW at once. See
// kMultiStream for that purpose.
kMultiFrameBased
};
class FrameDataProvider {
public:
// Called with the decoder locked.
virtual void ReadMoreInputData(Vp9Decoder* decoder) = 0;
virtual void ReadMoreInputDataFromReschedule(Vp9Decoder* decoder) = 0;
// Default behavior is for the benefit of test code; production implementation overrides all the
// methods.
virtual bool HasMoreInputData() { return true; }
// CodecAdapterVp9 will fwd to CodecImpl which will async stop/start the stream (on
// StreamControl thread) to continue decoding for the same stream, with the current input packet
// skipped, if the stream hasn't been obsoleted by the time this request reaches the
// StreamControl thread.
virtual void AsyncResetStreamAfterCurrentFrame() { ZX_PANIC("not impemented"); }
};
enum class DecoderState : uint32_t {
// In these two states the decoder is stopped because UpdateDecodeSize needs to be called. The
// difference between these two is how it needs to be restarted.
kInitialWaitingForInput,
kStoppedWaitingForInput,
// A frame was produced and the hardware is waiting for permission to decode another frame.
kFrameJustProduced,
// The hardware is currently processing data. The watchdog should always be running while the
// hardware's in this state.
kRunning,
// The hardware is waiting for reference frames and outputs to be initialized after decoding the
// uncompressed header and before decoding the compressed data.
kPausedAtHeader,
// The hardware is waiting for references frames, but the special end-of-stream size was
// reached. It can safely be swapped out now, because its state doesn't matter.
kPausedAtEndOfStream,
// The hardware's state doesn't reflect that of the Vp9Decoder.
kSwappedOut,
// Used during watchdog handling, to avoid processing interrupts that occur after watchdog. The
// current decoder is deleted and a new decoder is created to take its place.
kFailed,
};
static const char* DecoderStateName(DecoderState state);
Vp9Decoder(const Vp9Decoder&) = delete;
void ForceStopDuringRemoveLocked() override;
~Vp9Decoder() override;
__WARN_UNUSED_RESULT zx_status_t Initialize() override;
__WARN_UNUSED_RESULT zx_status_t InitializeHardware() override;
void HandleInterrupt() override;
void ReturnFrame(std::shared_ptr<VideoFrame> frame) override;
void CallErrorHandler() override {
have_fatal_error_ = true;
client_->OnError();
}
void InitializedFrames(std::vector<CodecFrame> frames, uint32_t width, uint32_t height,
uint32_t stride) override;
__WARN_UNUSED_RESULT bool CanBeSwappedIn() override;
__WARN_UNUSED_RESULT bool CanBeSwappedOut() const override {
// We don't include kInitialWaitingForInput here, because we're only in that state if
// CanBeSwappedIn(), which means we have some input data to give the HW, and we don't want to
// be swapping back out before we've done that.
return state_ == DecoderState::kFrameJustProduced ||
state_ == DecoderState::kStoppedWaitingForInput ||
state_ == DecoderState::kPausedAtEndOfStream;
}
bool IsUtilizingHardware() const override {
switch (static_cast<DecoderState>(state_)) {
case DecoderState::kInitialWaitingForInput:
case DecoderState::kRunning:
case DecoderState::kPausedAtHeader:
case DecoderState::kFailed:
return true;
case DecoderState::kStoppedWaitingForInput:
case DecoderState::kFrameJustProduced:
case DecoderState::kPausedAtEndOfStream:
case DecoderState::kSwappedOut:
return false;
}
}
void SetSwappedOut() override { state_ = DecoderState::kSwappedOut; }
void SwappedIn() override { frame_data_provider_->ReadMoreInputDataFromReschedule(this); }
void OnSignaledWatchdog() override;
zx_status_t SetupProtection() override;
void SetFrameDataProvider(FrameDataProvider* provider) { frame_data_provider_ = provider; }
void UpdateDecodeSize(uint32_t size);
// The number of frames that have been emitted from the FW (not necessarily emitted downstream
// however) since the most recent UpdateDecodeSize().
uint32_t FramesSinceUpdateDecodeSize();
__WARN_UNUSED_RESULT bool needs_more_input_data() const {
return state_ == DecoderState::kStoppedWaitingForInput ||
state_ == DecoderState::kInitialWaitingForInput;
}
__WARN_UNUSED_RESULT bool swapped_out() const { return state_ == DecoderState::kSwappedOut; }
void SetPausedAtEndOfStream();
void set_reallocate_buffers_next_frame_for_testing() {
reallocate_buffers_next_frame_for_testing_ = true;
}
void InjectInitializationFault() { should_inject_initialization_fault_for_testing_ = true; }
private:
friend class test::Vp9UnitTest;
friend class test::TestVP9;
friend class test::TestFrameProvider;
friend class CodecAdapterVp9;
class WorkingBuffer;
class BufferAllocator {
public:
void Register(WorkingBuffer* buffer);
zx_status_t AllocateBuffers(VideoDecoder::Owner* decoder, bool is_secure);
void CheckBuffers();
private:
std::vector<WorkingBuffer*> buffers_;
};
class WorkingBuffer {
public:
WorkingBuffer(BufferAllocator* allocator, size_t size, bool can_be_protected, const char* name);
~WorkingBuffer();
WorkingBuffer(const WorkingBuffer& to_copy) = delete;
WorkingBuffer& operator=(const WorkingBuffer& to_copy) = delete;
WorkingBuffer(WorkingBuffer&& to_move) = default;
WorkingBuffer& operator=(WorkingBuffer&& to_move) = default;
uint32_t addr32();
size_t size() const { return size_; }
const char* name() const { return name_; }
InternalBuffer& buffer() { return buffer_.value(); }
bool has_buffer() { return buffer_.has_value(); }
bool can_be_protected() const { return can_be_protected_; }
void SetBuffer(InternalBuffer buffer) { buffer_.emplace(std::move(buffer)); }
private:
size_t size_;
bool can_be_protected_;
const char* name_;
std::optional<InternalBuffer> buffer_;
};
struct WorkingBuffers : public BufferAllocator {
WorkingBuffers() {}
WorkingBuffers(const WorkingBuffers& to_copy) = delete;
WorkingBuffers& operator=(const WorkingBuffers& to_copy) = delete;
WorkingBuffers(WorkingBuffers&& to_move) = default;
WorkingBuffers& operator=(WorkingBuffers&& to_move) = default;
// Sizes are large enough for 4096x2304.
#define DEF_BUFFER(name, can_be_protected, size) \
WorkingBuffer name = WorkingBuffer(this, size, can_be_protected, #name)
DEF_BUFFER(rpm, false, 0x400 * 2);
DEF_BUFFER(short_term_rps, true, 0x800);
DEF_BUFFER(picture_parameter_set, true, 0x2000);
DEF_BUFFER(swap, true, 0x800);
DEF_BUFFER(swap2, true, 0x800);
DEF_BUFFER(local_memory_dump, false, 0x400 * 2);
DEF_BUFFER(ipp_line_buffer, true, 0x4000);
DEF_BUFFER(sao_up, true, 0x2800);
DEF_BUFFER(scale_lut, true, 0x8000);
// HW/firmware requires first parameters + deblock data to be adjacent in that order.
static constexpr uint32_t kDeblockParametersSize = 0x80000;
static constexpr uint32_t kDeblockDataSize = 0x80000;
DEF_BUFFER(deblock_parameters, true, kDeblockParametersSize + kDeblockDataSize);
DEF_BUFFER(deblock_parameters2, true, 0x80000); // Only used on G12a.
DEF_BUFFER(segment_map, true, 0xd800);
DEF_BUFFER(probability_buffer, false, 0x1000 * 5);
DEF_BUFFER(count_buffer, false, 0x300 * 4 * 4);
DEF_BUFFER(motion_prediction_above, true, 0x10000);
DEF_BUFFER(mmu_vbh, true, 0x5000);
DEF_BUFFER(frame_map_mmu, false, 0x1200 * 4);
#undef DEF_BUFFER
};
public:
// We allow extracting internal buffers from an old instance and adding internal buffers to a new
// instance, to reduce the cost of switching to a new Vp9Decoder instance without giving up
// the advantages of fully re-initializing the new Vp9Decoder in every other way. In other
// words, we don't want to have a Reset(), because runng the actual destructor then constructor is
// much less brittle.
//
// Any buffers that are missing or not big enough will still be reallocated desipte transferring
// InternalBuffers from an old instance to a new instance.
class InternalBuffers {
public:
InternalBuffers() = default;
InternalBuffers(InternalBuffers&& to_move) = default;
InternalBuffers& operator=(InternalBuffers&& to_move) = default;
InternalBuffers(const InternalBuffers& to_copy) = delete;
InternalBuffers& operator=(const InternalBuffers& to_copy) = delete;
private:
friend class Vp9Decoder;
std::optional<WorkingBuffers> working_buffers_;
// per-frame compressed_headers
std::vector<InternalBuffer> compressed_headers_;
// all the MpredBuffer(s); no more than 2-3.
std::vector<InternalBuffer> mpred_buffers_;
};
InternalBuffers TakeInternalBuffers();
Vp9Decoder(Owner* owner, Client* client, InputType input_type,
std::optional<InternalBuffers> internal_buffers, bool use_compressed_output,
bool is_secure);
private:
void GiveInternalBuffers(InternalBuffers internal_buffers);
struct Frame {
Frame(Vp9Decoder* parent);
~Frame();
Vp9Decoder* parent = nullptr;
// Index into frames_.
uint32_t index = 0;
// This is the count of references from reference_frame_map_, last_frame_, current_frame_, and
// any buffers the ultimate consumers have outstanding.
int32_t refcount = 0;
// Each VideoFrame is managed via shared_ptr<> here and via weak_ptr<> in CodecBuffer. There is
// a frame.reset() performed under video_decoder_lock_ that essentially signals to the
// weak_ptr<> in CodecBuffer not to call ReturnFrame() any more for this frame. For this
// reason, under normal operation (not self-test), it's important that FrameReadyNotifier and
// weak_ptr<>::lock() not result in keeping any shared_ptr<> reference on VideoFrame that lasts
// beyond the current video_decoder_lock_ interval, since that could allow calling ReturnFrame()
// on a frame that the Vp9Decoder doesn't want to hear about any more.
//
// TODO(dustingreen): Mute ReturnFrame() a different way; maybe just explicitly. Ideally, we'd
// use a way that's more similar between decoder self-test and "normal operation".
//
// This shared_ptr<> must not actually be shared outside of while video_decoder_lock_ is held.
// See previous paragraphs.
std::shared_ptr<VideoFrame> frame;
// This is a frame that was received from sysmem and will next be decoded into.
std::shared_ptr<VideoFrame> on_deck_frame;
// With the MMU enabled the compressed frame header is stored separately from the data itself,
// allowing the data to be allocated in noncontiguous memory.
std::optional<InternalBuffer> compressed_header;
io_buffer_t compressed_data = {};
// This is decoded_frame_count_ when this frame was decoded into.
uint32_t decoded_index = 0xffffffff;
// This is valid even after the VideoFrame is cleared out on resize.
uint32_t hw_width = 0;
uint32_t hw_height = 0;
int32_t client_refcount = 0;
// Redueces refcount and releases |frame| if it's not necessary anymore.
void Deref();
// Releases |frame| if it's not currently being used as a reference frame.
void ReleaseIfNonreference();
};
struct PictureData {
bool keyframe = false;
bool intra_only = false;
uint32_t refresh_frame_flags = 0;
bool show_frame;
bool error_resilient_mode;
bool has_pts = false;
uint64_t pts = 0;
};
union HardwareRenderParams;
zx_status_t AllocateFrames();
void InitializeHardwarePictureList();
void InitializeParser();
bool FindNewFrameBuffer(HardwareRenderParams* params, bool params_checked_previously);
void InitLoopFilter();
void UpdateLoopFilter(HardwareRenderParams* params);
void ProcessCompletedFrames();
void ShowExistingFrame(HardwareRenderParams* params);
void SkipFrameAfterFirmwareSlow();
void PrepareNewFrame(bool params_checked_previously);
void ConfigureFrameOutput(bool bit_depth_8);
void ConfigureMcrcc();
void UpdateLoopFilterThresholds();
void ConfigureMotionPrediction();
void ConfigureReferenceFrameHardware();
void SetRefFrames(HardwareRenderParams* params);
void AdaptProbabilityCoefficients(uint32_t adapt_prob_status);
__WARN_UNUSED_RESULT zx_status_t InitializeBuffers();
void InitializeLoopFilterData();
InputType input_type_;
FrameDataProvider* frame_data_provider_ = nullptr;
std::optional<WorkingBuffers> working_buffers_;
DiagnosticStateWrapper<DecoderState> state_{[this]() { UpdateDiagnostics(); },
DecoderState::kSwappedOut, &DecoderStateName};
std::unique_ptr<PowerReference> power_ref_;
// While frames_ always has size() == kMaxFrames, the actual number of valid frames that are fully
// usable is valid_frames_count_. For now we don't remove any Frame from frames_ after
// initialization, mostly for historical reasons at this point.
//
// TODO(dustingreen): Ensure we're getting all contig memory from sysmem, and/or always using
// non-compressed reference frames / zero per-frame contig that isn't part of the buffer
// collection, and if so, consider changing the size of frames_ instead of valid_frames_count_.
uint32_t valid_frames_count_ = 0;
std::vector<std::unique_ptr<Frame>> frames_;
Frame* last_frame_ = nullptr;
Frame* current_frame_ = nullptr;
std::unique_ptr<loop_filter_info_n> loop_filter_info_;
std::unique_ptr<loopfilter> loop_filter_;
std::unique_ptr<segmentation> segmentation_;
// Waiting for an available frame buffer (with reference count 0).
bool waiting_for_empty_frames_ = false;
// Waiting for an available output packet, to avoid show_existing_frame potentially allowing too
// much queued output, as a show_existing_frame output frame doesn't use up a frame buffer - but
// it does use up an output packet. We don't directly track the output packets in the
// h264_decoder, but this bool corresponds to being out of output packets in codec_adapter_vp9.
// We re-try PrepareNewFrame() during ReturnFrame() even if no refcount on any Frame has reached 0
bool waiting_for_output_ready_ = false;
// Waiting for InitializedFrameBuffers to be called with a new size.
bool waiting_for_new_frames_ = false;
// This is the count of frames decoded since this object was created.
uint32_t decoded_frame_count_ = 0;
uint32_t frame_done_count_ = 0;
// When we deliver a superframe containing multiple frames to the FW in one submit, the FW
// _sometimes_ emits more than one frame per UpdateDecodeSize() + kVp9CommandNalDecodeDone.
// Then later if we tell the FW to continue decoding with no more frames in the
// previously-submitted data, the FW doesn't interrupt (not even with kVp9CommandNalDecodeDone)
// and we hit the watchdog. So instead, if the FW delivers more than one frame after
// UpdateDecodeSize before kVp9CommandNalDecodeDone, we notice and combine the two first entries
// in queued_frame_sizes_ to essentially remove one future UpdateDecodeSize() that's no longer
// needed.
uint32_t frames_since_update_decode_size_ = 0;
// This is used to force new buffers to be allocated without needing a test stream that
// resizes.
bool reallocate_buffers_next_frame_for_testing_ = false;
// This forces the next InitializeHardware call to fail.
bool should_inject_initialization_fault_for_testing_ = false;
PictureData last_frame_data_;
PictureData current_frame_data_;
// This stores the motion vectors used to decode a frame for use in calculating motion vectors
// for the next frame.
//
// The choice of unique_ptr<> vs std::optional<> here seems fine either way; it'd be moving a few
// fields vs. currently chasing a couple extra pointers per frame. Unlikely to make any notable
// performance difference either way.
std::unique_ptr<InternalBuffer> last_mpred_buffer_;
std::unique_ptr<InternalBuffer> current_mpred_buffer_;
// Previously-used buffers kept around so a new buffer doesn't have to be allocated each frame.
// This will hold no more than 3 buffers shortly after GiveInternalBuffers(), but up to 1 buffer
// in the middle of a stream.
//
// The full properties of a buffer are checked before re-use since these can come from a previous
// Vp9Decoder instance.
std::vector<std::unique_ptr<InternalBuffer>> cached_mpred_buffers_;
// The VP9 specification requires that 8 reference frames can be stored - they're saved in this
// structure.
static constexpr uint32_t kReferenceFrameCount = 8u;
Frame* reference_frame_map_[kReferenceFrameCount] = {};
// Each frame that's being decoded can reference 3 of the frames that are in reference_frame_map_.
static constexpr uint32_t kCurrentReferenceFrameCount = 3u;
Frame* current_reference_frames_[kCurrentReferenceFrameCount] = {};
bool use_compressed_output_ = {};
bool have_fatal_error_ = false;
bool already_got_watchdog_ = false;
bool has_keyframe_ = false;
// When present, these are InternalBuffer(s) that we can re-use if their properties are suitable.
//
// Also, since we move these into use, not all InternalBuffer(s) within are necessarily present()
// (may have been moved out, but not fully deleted / cleared), so care is taken to check both
// has_value() and present() before re-using an optional InternalBuffer within. While we could
// potentially simplify this by making InternalBuffer be more like std::optional<> to avoid the
// std::optional<> wrapping, that currently doesn't seem like it'd necessarily pay for itself.
std::optional<InternalBuffers> on_deck_internal_buffers_;
};
} // namespace amlogic_decoder
#endif // SRC_MEDIA_DRIVERS_AMLOGIC_DECODER_VP9_DECODER_H_