| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef SRC_MEDIA_DRIVERS_AMLOGIC_DECODER_VP9_DECODER_H_ |
| #define SRC_MEDIA_DRIVERS_AMLOGIC_DECODER_VP9_DECODER_H_ |
| |
| #include <lib/async-loop/cpp/loop.h> |
| #include <lib/async/cpp/task.h> |
| |
| #include <cstdint> |
| #include <string_view> |
| #include <vector> |
| |
| #include "macros.h" |
| #include "registers.h" |
| #include "video_decoder.h" |
| |
| // From libvpx |
| struct loop_filter_info_n; |
| struct loopfilter; |
| struct segmentation; |
| |
| namespace amlogic_decoder { |
| |
| // Used for friend declarations below |
| namespace test { |
| class Vp9UnitTest; |
| class TestVP9; |
| class TestFrameProvider; |
| } // namespace test |
| |
| class Vp9Decoder : public VideoDecoder { |
| public: |
| enum class InputType { |
| // A single stream is decoded at once |
| kSingleStream, |
| // Multiple streams are decoded at once. |
| // |
| // This mode is capable of interrupting re. frame headers and decoded frames until all frames |
| // previously delivered via the ring buffer are exhausted at which point the FW will interrupt |
| // re. out of input. This mode isn't very forgiving about delivering additional data that |
| // doesn't contain at least the rest of a frame - the FW will hit the SW watchdog in that case. |
| kMultiStream, |
| // Multiple streams, each with input buffers divided on frame boundaries, |
| // are decoded at once. |
| // |
| // This mode expects frames (originally separate or within a superframe) to be delivered |
| // separately to the FW. If multiple frames are delivered together, the FW can respond to |
| // DecodeSlice with the second frame's header, instead of decoding the first frame's header, |
| // which may not be usable for decoding multiple frames delivered to the FW at once. See |
| // kMultiStream for that purpose. |
| kMultiFrameBased |
| }; |
| class FrameDataProvider { |
| public: |
| // Called with the decoder locked. |
| virtual void ReadMoreInputData(Vp9Decoder* decoder) = 0; |
| virtual void ReadMoreInputDataFromReschedule(Vp9Decoder* decoder) = 0; |
| |
| // Default behavior is for the benefit of test code; production implementation overrides all the |
| // methods. |
| virtual bool HasMoreInputData() { return true; } |
| |
| // CodecAdapterVp9 will fwd to CodecImpl which will async stop/start the stream (on |
| // StreamControl thread) to continue decoding for the same stream, with the current input packet |
| // skipped, if the stream hasn't been obsoleted by the time this request reaches the |
| // StreamControl thread. |
| virtual void AsyncResetStreamAfterCurrentFrame() { ZX_PANIC("not impemented"); } |
| }; |
| |
| enum class DecoderState : uint32_t { |
| // In these two states the decoder is stopped because UpdateDecodeSize needs to be called. The |
| // difference between these two is how it needs to be restarted. |
| kInitialWaitingForInput, |
| kStoppedWaitingForInput, |
| |
| // A frame was produced and the hardware is waiting for permission to decode another frame. |
| kFrameJustProduced, |
| |
| // The hardware is currently processing data. The watchdog should always be running while the |
| // hardware's in this state. |
| kRunning, |
| |
| // The hardware is waiting for reference frames and outputs to be initialized after decoding the |
| // uncompressed header and before decoding the compressed data. |
| kPausedAtHeader, |
| |
| // The hardware is waiting for references frames, but the special end-of-stream size was |
| // reached. It can safely be swapped out now, because its state doesn't matter. |
| kPausedAtEndOfStream, |
| |
| // The hardware's state doesn't reflect that of the Vp9Decoder. |
| kSwappedOut, |
| |
| // Used during watchdog handling, to avoid processing interrupts that occur after watchdog. The |
| // current decoder is deleted and a new decoder is created to take its place. |
| kFailed, |
| }; |
| |
| static const char* DecoderStateName(DecoderState state); |
| |
| Vp9Decoder(const Vp9Decoder&) = delete; |
| |
| void ForceStopDuringRemoveLocked() override; |
| ~Vp9Decoder() override; |
| |
| __WARN_UNUSED_RESULT zx_status_t Initialize() override; |
| __WARN_UNUSED_RESULT zx_status_t InitializeHardware() override; |
| void HandleInterrupt() override; |
| void ReturnFrame(std::shared_ptr<VideoFrame> frame) override; |
| void CallErrorHandler() override { |
| have_fatal_error_ = true; |
| client_->OnError(); |
| } |
| void InitializedFrames(std::vector<CodecFrame> frames, uint32_t width, uint32_t height, |
| uint32_t stride) override; |
| __WARN_UNUSED_RESULT bool CanBeSwappedIn() override; |
| __WARN_UNUSED_RESULT bool CanBeSwappedOut() const override { |
| // We don't include kInitialWaitingForInput here, because we're only in that state if |
| // CanBeSwappedIn(), which means we have some input data to give the HW, and we don't want to |
| // be swapping back out before we've done that. |
| return state_ == DecoderState::kFrameJustProduced || |
| state_ == DecoderState::kStoppedWaitingForInput || |
| state_ == DecoderState::kPausedAtEndOfStream; |
| } |
| bool IsUtilizingHardware() const override { |
| switch (static_cast<DecoderState>(state_)) { |
| case DecoderState::kInitialWaitingForInput: |
| case DecoderState::kRunning: |
| case DecoderState::kPausedAtHeader: |
| case DecoderState::kFailed: |
| return true; |
| case DecoderState::kStoppedWaitingForInput: |
| case DecoderState::kFrameJustProduced: |
| case DecoderState::kPausedAtEndOfStream: |
| case DecoderState::kSwappedOut: |
| return false; |
| } |
| } |
| void SetSwappedOut() override { state_ = DecoderState::kSwappedOut; } |
| void SwappedIn() override { frame_data_provider_->ReadMoreInputDataFromReschedule(this); } |
| void OnSignaledWatchdog() override; |
| zx_status_t SetupProtection() override; |
| |
| void SetFrameDataProvider(FrameDataProvider* provider) { frame_data_provider_ = provider; } |
| void UpdateDecodeSize(uint32_t size); |
| // The number of frames that have been emitted from the FW (not necessarily emitted downstream |
| // however) since the most recent UpdateDecodeSize(). |
| uint32_t FramesSinceUpdateDecodeSize(); |
| |
| __WARN_UNUSED_RESULT bool needs_more_input_data() const { |
| return state_ == DecoderState::kStoppedWaitingForInput || |
| state_ == DecoderState::kInitialWaitingForInput; |
| } |
| |
| __WARN_UNUSED_RESULT bool swapped_out() const { return state_ == DecoderState::kSwappedOut; } |
| |
| void SetPausedAtEndOfStream(); |
| |
| void set_reallocate_buffers_next_frame_for_testing() { |
| reallocate_buffers_next_frame_for_testing_ = true; |
| } |
| |
| void InjectInitializationFault() { should_inject_initialization_fault_for_testing_ = true; } |
| |
| private: |
| friend class test::Vp9UnitTest; |
| friend class test::TestVP9; |
| friend class test::TestFrameProvider; |
| friend class CodecAdapterVp9; |
| class WorkingBuffer; |
| |
| class BufferAllocator { |
| public: |
| void Register(WorkingBuffer* buffer); |
| zx_status_t AllocateBuffers(VideoDecoder::Owner* decoder, bool is_secure); |
| void CheckBuffers(); |
| |
| private: |
| std::vector<WorkingBuffer*> buffers_; |
| }; |
| |
| class WorkingBuffer { |
| public: |
| WorkingBuffer(BufferAllocator* allocator, size_t size, bool can_be_protected, const char* name); |
| |
| ~WorkingBuffer(); |
| |
| WorkingBuffer(const WorkingBuffer& to_copy) = delete; |
| WorkingBuffer& operator=(const WorkingBuffer& to_copy) = delete; |
| WorkingBuffer(WorkingBuffer&& to_move) = default; |
| WorkingBuffer& operator=(WorkingBuffer&& to_move) = default; |
| |
| uint32_t addr32(); |
| size_t size() const { return size_; } |
| const char* name() const { return name_; } |
| InternalBuffer& buffer() { return buffer_.value(); } |
| bool has_buffer() { return buffer_.has_value(); } |
| bool can_be_protected() const { return can_be_protected_; } |
| |
| void SetBuffer(InternalBuffer buffer) { buffer_.emplace(std::move(buffer)); } |
| |
| private: |
| size_t size_; |
| bool can_be_protected_; |
| const char* name_; |
| std::optional<InternalBuffer> buffer_; |
| }; |
| |
| struct WorkingBuffers : public BufferAllocator { |
| WorkingBuffers() {} |
| |
| WorkingBuffers(const WorkingBuffers& to_copy) = delete; |
| WorkingBuffers& operator=(const WorkingBuffers& to_copy) = delete; |
| WorkingBuffers(WorkingBuffers&& to_move) = default; |
| WorkingBuffers& operator=(WorkingBuffers&& to_move) = default; |
| |
| // Sizes are large enough for 4096x2304. |
| #define DEF_BUFFER(name, can_be_protected, size) \ |
| WorkingBuffer name = WorkingBuffer(this, size, can_be_protected, #name) |
| DEF_BUFFER(rpm, false, 0x400 * 2); |
| DEF_BUFFER(short_term_rps, true, 0x800); |
| DEF_BUFFER(picture_parameter_set, true, 0x2000); |
| DEF_BUFFER(swap, true, 0x800); |
| DEF_BUFFER(swap2, true, 0x800); |
| DEF_BUFFER(local_memory_dump, false, 0x400 * 2); |
| DEF_BUFFER(ipp_line_buffer, true, 0x4000); |
| DEF_BUFFER(sao_up, true, 0x2800); |
| DEF_BUFFER(scale_lut, true, 0x8000); |
| // HW/firmware requires first parameters + deblock data to be adjacent in that order. |
| static constexpr uint32_t kDeblockParametersSize = 0x80000; |
| static constexpr uint32_t kDeblockDataSize = 0x80000; |
| DEF_BUFFER(deblock_parameters, true, kDeblockParametersSize + kDeblockDataSize); |
| DEF_BUFFER(deblock_parameters2, true, 0x80000); // Only used on G12a. |
| DEF_BUFFER(segment_map, true, 0xd800); |
| DEF_BUFFER(probability_buffer, false, 0x1000 * 5); |
| DEF_BUFFER(count_buffer, false, 0x300 * 4 * 4); |
| DEF_BUFFER(motion_prediction_above, true, 0x10000); |
| DEF_BUFFER(mmu_vbh, true, 0x5000); |
| DEF_BUFFER(frame_map_mmu, false, 0x1200 * 4); |
| #undef DEF_BUFFER |
| }; |
| |
| public: |
| // We allow extracting internal buffers from an old instance and adding internal buffers to a new |
| // instance, to reduce the cost of switching to a new Vp9Decoder instance without giving up |
| // the advantages of fully re-initializing the new Vp9Decoder in every other way. In other |
| // words, we don't want to have a Reset(), because runng the actual destructor then constructor is |
| // much less brittle. |
| // |
| // Any buffers that are missing or not big enough will still be reallocated desipte transferring |
| // InternalBuffers from an old instance to a new instance. |
| class InternalBuffers { |
| public: |
| InternalBuffers() = default; |
| InternalBuffers(InternalBuffers&& to_move) = default; |
| InternalBuffers& operator=(InternalBuffers&& to_move) = default; |
| InternalBuffers(const InternalBuffers& to_copy) = delete; |
| InternalBuffers& operator=(const InternalBuffers& to_copy) = delete; |
| |
| private: |
| friend class Vp9Decoder; |
| std::optional<WorkingBuffers> working_buffers_; |
| // per-frame compressed_headers |
| std::vector<InternalBuffer> compressed_headers_; |
| // all the MpredBuffer(s); no more than 2-3. |
| std::vector<InternalBuffer> mpred_buffers_; |
| }; |
| InternalBuffers TakeInternalBuffers(); |
| Vp9Decoder(Owner* owner, Client* client, InputType input_type, |
| std::optional<InternalBuffers> internal_buffers, bool use_compressed_output, |
| bool is_secure); |
| |
| private: |
| void GiveInternalBuffers(InternalBuffers internal_buffers); |
| |
| struct Frame { |
| Frame(Vp9Decoder* parent); |
| ~Frame(); |
| |
| Vp9Decoder* parent = nullptr; |
| |
| // Index into frames_. |
| uint32_t index = 0; |
| |
| // This is the count of references from reference_frame_map_, last_frame_, current_frame_, and |
| // any buffers the ultimate consumers have outstanding. |
| int32_t refcount = 0; |
| // Each VideoFrame is managed via shared_ptr<> here and via weak_ptr<> in CodecBuffer. There is |
| // a frame.reset() performed under video_decoder_lock_ that essentially signals to the |
| // weak_ptr<> in CodecBuffer not to call ReturnFrame() any more for this frame. For this |
| // reason, under normal operation (not self-test), it's important that FrameReadyNotifier and |
| // weak_ptr<>::lock() not result in keeping any shared_ptr<> reference on VideoFrame that lasts |
| // beyond the current video_decoder_lock_ interval, since that could allow calling ReturnFrame() |
| // on a frame that the Vp9Decoder doesn't want to hear about any more. |
| // |
| // TODO(dustingreen): Mute ReturnFrame() a different way; maybe just explicitly. Ideally, we'd |
| // use a way that's more similar between decoder self-test and "normal operation". |
| // |
| // This shared_ptr<> must not actually be shared outside of while video_decoder_lock_ is held. |
| // See previous paragraphs. |
| std::shared_ptr<VideoFrame> frame; |
| |
| // This is a frame that was received from sysmem and will next be decoded into. |
| std::shared_ptr<VideoFrame> on_deck_frame; |
| // With the MMU enabled the compressed frame header is stored separately from the data itself, |
| // allowing the data to be allocated in noncontiguous memory. |
| std::optional<InternalBuffer> compressed_header; |
| |
| io_buffer_t compressed_data = {}; |
| |
| // This is decoded_frame_count_ when this frame was decoded into. |
| uint32_t decoded_index = 0xffffffff; |
| |
| // This is valid even after the VideoFrame is cleared out on resize. |
| uint32_t hw_width = 0; |
| uint32_t hw_height = 0; |
| int32_t client_refcount = 0; |
| |
| // Redueces refcount and releases |frame| if it's not necessary anymore. |
| void Deref(); |
| |
| // Releases |frame| if it's not currently being used as a reference frame. |
| void ReleaseIfNonreference(); |
| }; |
| |
| struct PictureData { |
| bool keyframe = false; |
| bool intra_only = false; |
| uint32_t refresh_frame_flags = 0; |
| bool show_frame; |
| bool error_resilient_mode; |
| bool has_pts = false; |
| uint64_t pts = 0; |
| }; |
| |
| union HardwareRenderParams; |
| |
| zx_status_t AllocateFrames(); |
| void InitializeHardwarePictureList(); |
| void InitializeParser(); |
| bool FindNewFrameBuffer(HardwareRenderParams* params, bool params_checked_previously); |
| void InitLoopFilter(); |
| void UpdateLoopFilter(HardwareRenderParams* params); |
| void ProcessCompletedFrames(); |
| void ShowExistingFrame(HardwareRenderParams* params); |
| void SkipFrameAfterFirmwareSlow(); |
| void PrepareNewFrame(bool params_checked_previously); |
| void ConfigureFrameOutput(bool bit_depth_8); |
| void ConfigureMcrcc(); |
| void UpdateLoopFilterThresholds(); |
| void ConfigureMotionPrediction(); |
| void ConfigureReferenceFrameHardware(); |
| void SetRefFrames(HardwareRenderParams* params); |
| void AdaptProbabilityCoefficients(uint32_t adapt_prob_status); |
| __WARN_UNUSED_RESULT zx_status_t InitializeBuffers(); |
| void InitializeLoopFilterData(); |
| |
| InputType input_type_; |
| |
| FrameDataProvider* frame_data_provider_ = nullptr; |
| |
| std::optional<WorkingBuffers> working_buffers_; |
| DiagnosticStateWrapper<DecoderState> state_{[this]() { UpdateDiagnostics(); }, |
| DecoderState::kSwappedOut, &DecoderStateName}; |
| std::unique_ptr<PowerReference> power_ref_; |
| |
| // While frames_ always has size() == kMaxFrames, the actual number of valid frames that are fully |
| // usable is valid_frames_count_. For now we don't remove any Frame from frames_ after |
| // initialization, mostly for historical reasons at this point. |
| // |
| // TODO(dustingreen): Ensure we're getting all contig memory from sysmem, and/or always using |
| // non-compressed reference frames / zero per-frame contig that isn't part of the buffer |
| // collection, and if so, consider changing the size of frames_ instead of valid_frames_count_. |
| uint32_t valid_frames_count_ = 0; |
| std::vector<std::unique_ptr<Frame>> frames_; |
| |
| Frame* last_frame_ = nullptr; |
| Frame* current_frame_ = nullptr; |
| std::unique_ptr<loop_filter_info_n> loop_filter_info_; |
| std::unique_ptr<loopfilter> loop_filter_; |
| std::unique_ptr<segmentation> segmentation_; |
| // Waiting for an available frame buffer (with reference count 0). |
| bool waiting_for_empty_frames_ = false; |
| // Waiting for an available output packet, to avoid show_existing_frame potentially allowing too |
| // much queued output, as a show_existing_frame output frame doesn't use up a frame buffer - but |
| // it does use up an output packet. We don't directly track the output packets in the |
| // h264_decoder, but this bool corresponds to being out of output packets in codec_adapter_vp9. |
| // We re-try PrepareNewFrame() during ReturnFrame() even if no refcount on any Frame has reached 0 |
| bool waiting_for_output_ready_ = false; |
| // Waiting for InitializedFrameBuffers to be called with a new size. |
| bool waiting_for_new_frames_ = false; |
| |
| // This is the count of frames decoded since this object was created. |
| uint32_t decoded_frame_count_ = 0; |
| |
| uint32_t frame_done_count_ = 0; |
| |
| // When we deliver a superframe containing multiple frames to the FW in one submit, the FW |
| // _sometimes_ emits more than one frame per UpdateDecodeSize() + kVp9CommandNalDecodeDone. |
| // Then later if we tell the FW to continue decoding with no more frames in the |
| // previously-submitted data, the FW doesn't interrupt (not even with kVp9CommandNalDecodeDone) |
| // and we hit the watchdog. So instead, if the FW delivers more than one frame after |
| // UpdateDecodeSize before kVp9CommandNalDecodeDone, we notice and combine the two first entries |
| // in queued_frame_sizes_ to essentially remove one future UpdateDecodeSize() that's no longer |
| // needed. |
| uint32_t frames_since_update_decode_size_ = 0; |
| |
| // This is used to force new buffers to be allocated without needing a test stream that |
| // resizes. |
| bool reallocate_buffers_next_frame_for_testing_ = false; |
| // This forces the next InitializeHardware call to fail. |
| bool should_inject_initialization_fault_for_testing_ = false; |
| |
| PictureData last_frame_data_; |
| PictureData current_frame_data_; |
| |
| // This stores the motion vectors used to decode a frame for use in calculating motion vectors |
| // for the next frame. |
| // |
| // The choice of unique_ptr<> vs std::optional<> here seems fine either way; it'd be moving a few |
| // fields vs. currently chasing a couple extra pointers per frame. Unlikely to make any notable |
| // performance difference either way. |
| std::unique_ptr<InternalBuffer> last_mpred_buffer_; |
| std::unique_ptr<InternalBuffer> current_mpred_buffer_; |
| |
| // Previously-used buffers kept around so a new buffer doesn't have to be allocated each frame. |
| // This will hold no more than 3 buffers shortly after GiveInternalBuffers(), but up to 1 buffer |
| // in the middle of a stream. |
| // |
| // The full properties of a buffer are checked before re-use since these can come from a previous |
| // Vp9Decoder instance. |
| std::vector<std::unique_ptr<InternalBuffer>> cached_mpred_buffers_; |
| |
| // The VP9 specification requires that 8 reference frames can be stored - they're saved in this |
| // structure. |
| static constexpr uint32_t kReferenceFrameCount = 8u; |
| Frame* reference_frame_map_[kReferenceFrameCount] = {}; |
| |
| // Each frame that's being decoded can reference 3 of the frames that are in reference_frame_map_. |
| static constexpr uint32_t kCurrentReferenceFrameCount = 3u; |
| Frame* current_reference_frames_[kCurrentReferenceFrameCount] = {}; |
| |
| bool use_compressed_output_ = {}; |
| bool have_fatal_error_ = false; |
| |
| bool already_got_watchdog_ = false; |
| |
| bool has_keyframe_ = false; |
| |
| // When present, these are InternalBuffer(s) that we can re-use if their properties are suitable. |
| // |
| // Also, since we move these into use, not all InternalBuffer(s) within are necessarily present() |
| // (may have been moved out, but not fully deleted / cleared), so care is taken to check both |
| // has_value() and present() before re-using an optional InternalBuffer within. While we could |
| // potentially simplify this by making InternalBuffer be more like std::optional<> to avoid the |
| // std::optional<> wrapping, that currently doesn't seem like it'd necessarily pay for itself. |
| std::optional<InternalBuffers> on_deck_internal_buffers_; |
| }; |
| |
| } // namespace amlogic_decoder |
| |
| #endif // SRC_MEDIA_DRIVERS_AMLOGIC_DECODER_VP9_DECODER_H_ |