| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "h264_decoder.h" |
| |
| #include <lib/media/codec_impl/codec_buffer.h> |
| #include <lib/media/codec_impl/codec_frame.h> |
| #include <lib/media/codec_impl/codec_packet.h> |
| #include <lib/zx/vmo.h> |
| |
| #include <fbl/algorithm.h> |
| |
| #include "firmware_blob.h" |
| #include "macros.h" |
| #include "memory_barriers.h" |
| #include "pts_manager.h" |
| #include "util.h" |
| |
| // TODO(35200): |
| // |
| // Change these to InternalBuffer: |
| // |
| // InputContext::buffer - optionally secure |
| // (done) reference_mv_buffer_ - optionally secure |
| // (done) codec_data_ - optionally secure |
| // (done) sei_data_buffer_ - optionally secure |
| // |
| // Plumb is_secure to each of the above. |
| // |
| // (Fine as io_bufer_t for now: |
| // * loading firmware uses video_firmware TA when possible, so io_buffer_t when |
| // !is_tee_available_ is fine. |
| // * secondary_firmware_ - same as for main firmware. |
| |
| static const uint32_t kBufferAlignShift = 4 + 12; |
| static const uint32_t kBufferAlign = 1 << kBufferAlignShift; |
| constexpr uint32_t kMaxActualDPBSize = 24; |
| |
| // AvScratch1 |
| class StreamInfo : public TypedRegisterBase<DosRegisterIo, StreamInfo, uint32_t> { |
| public: |
| DEF_FIELD(7, 0, width_in_mbs); |
| DEF_FIELD(23, 8, total_mbs); |
| DEF_FIELD(30, 24, max_reference_size); |
| DEF_BIT(31, mv_size_flag); |
| |
| static auto Get() { return AddrType(0x09c1 * 4); } |
| }; |
| |
| // AvScratch2 |
| class SequenceInfo : public TypedRegisterBase<DosRegisterIo, SequenceInfo, uint32_t> { |
| public: |
| DEF_BIT(0, aspect_ratio_info_present_flag); |
| DEF_BIT(1, timing_info_present_flag); |
| DEF_BIT(4, pic_struct_present_flag); |
| |
| // relatively lower-confidence vs. other bits - not confirmed |
| DEF_BIT(6, fixed_frame_rate_flag); |
| |
| DEF_FIELD(14, 13, chroma_format_idc); |
| DEF_BIT(15, frame_mbs_only_flag); |
| DEF_FIELD(23, 16, aspect_ratio_idc); |
| |
| static auto Get() { return AddrType(0x09c2 * 4); } |
| }; |
| |
| // AvScratch3 |
| class SampleAspectRatioInfo |
| : public TypedRegisterBase<DosRegisterIo, SampleAspectRatioInfo, uint32_t> { |
| public: |
| DEF_FIELD(15, 0, sar_width); |
| DEF_FIELD(31, 16, sar_height); |
| |
| static auto Get() { return AddrType(0x09c3 * 4); } |
| }; |
| |
| // AvScratch6 |
| class CropInfo : public TypedRegisterBase<DosRegisterIo, CropInfo, uint32_t> { |
| public: |
| // All quantities are the number of pixels to be cropped from each side. |
| DEF_FIELD(7, 0, bottom); |
| DEF_FIELD(15, 8, top); // Ignored |
| DEF_FIELD(23, 16, right); |
| DEF_FIELD(31, 24, left); // Ignored |
| |
| static auto Get() { return AddrType(0x09c6 * 4); } |
| }; |
| |
| // AvScratchF |
| class CodecSettings : public TypedRegisterBase<DosRegisterIo, CodecSettings, uint32_t> { |
| public: |
| DEF_BIT(1, trickmode_i); |
| DEF_BIT(2, zeroed0); |
| DEF_BIT(3, drop_b_frames); |
| DEF_BIT(4, error_recovery_mode); |
| DEF_BIT(5, zeroed1); |
| DEF_BIT(6, ip_frames_only); |
| DEF_BIT(7, disable_fast_poc); |
| |
| static auto Get() { return AddrType(0x09cf * 4); } |
| }; |
| |
| // AvScratchInfo1+ |
| class PicInfo : public TypedRegisterBase<DosRegisterIo, PicInfo, uint32_t> { |
| public: |
| DEF_FIELD(4, 0, buffer_index); |
| DEF_BIT(9, error); |
| DEF_BIT(15, eos); |
| DEF_FIELD(31, 16, stream_offset); |
| |
| static auto Get(uint32_t i) { return AddrType((0x09c1 + i) * 4); } |
| }; |
| |
| // 0 means "Unspecified" |
| constexpr uint32_t kAspectRatioIdcExtendedSar = 255; |
| |
| // This struct type doesn't need a name, since we only read this one static |
| // instance. |
| struct { |
| const uint8_t sar_width; |
| const uint8_t sar_height; |
| } kSarTable[] = { |
| // 0 - entry 0 in this table is never read, but it's only 2 bytes so we just |
| // let it exist since subtracting 1 from aspect_ratio_idc would probably |
| // take |
| // ~2 code bytes or more anyway. |
| {0, 0}, |
| // 1 |
| {1, 1}, |
| // 2 |
| {12, 11}, |
| // 3 |
| {10, 11}, |
| // 4 |
| {16, 11}, |
| // 5 |
| {40, 33}, |
| // 6 |
| {24, 11}, |
| // 7 |
| {20, 11}, |
| // 8 |
| {32, 11}, |
| // 9 |
| {80, 33}, |
| // 10 |
| {18, 11}, |
| // 11 |
| {15, 11}, |
| // 12 |
| {64, 33}, |
| // 13 |
| {160, 99}, |
| // 14 |
| {4, 3}, |
| // 15 |
| {3, 2}, |
| // 16 |
| {2, 1}, |
| }; |
| |
| static uint32_t GetMaxDpbSize(uint32_t level_idc, uint32_t width_in_mbs, uint32_t height_in_mbs) { |
| // From Table A-1 of the h.264 spec. |
| // https://www.itu.int/rec/T-REC-H.264-201704-I/en |
| uint32_t max_dpb_mbs; |
| switch (level_idc) { |
| case 10: |
| max_dpb_mbs = 396; |
| break; |
| case 11: |
| max_dpb_mbs = 900; |
| break; |
| case 12: |
| case 13: |
| case 20: |
| max_dpb_mbs = 2376; |
| break; |
| case 21: |
| max_dpb_mbs = 4752; |
| break; |
| case 22: |
| case 30: |
| max_dpb_mbs = 8100; |
| break; |
| case 31: |
| max_dpb_mbs = 18000; |
| break; |
| case 32: |
| max_dpb_mbs = 20480; |
| break; |
| case 40: |
| case 41: |
| max_dpb_mbs = 32768; |
| break; |
| case 42: |
| max_dpb_mbs = 34816; |
| break; |
| case 50: |
| max_dpb_mbs = 110400; |
| break; |
| case 51: |
| case 52: |
| max_dpb_mbs = 184320; |
| break; |
| case 60: |
| case 61: |
| case 62: |
| max_dpb_mbs = 696320; |
| break; |
| default: |
| return 0; |
| } |
| |
| uint32_t num_mbs = width_in_mbs * height_in_mbs; |
| if (!num_mbs) |
| return 0; |
| return std::min(16u, (max_dpb_mbs + num_mbs - 1) / num_mbs); |
| } |
| |
| H264Decoder::~H264Decoder() { |
| owner_->core()->StopDecoding(); |
| owner_->core()->WaitForIdle(); |
| BarrierBeforeRelease(); |
| io_buffer_release(&secondary_firmware_); |
| // ~reference_mv_buffer_ |
| // ~sei_data_buffer_ |
| // ~codec_data_ |
| } |
| |
| zx_status_t H264Decoder::ResetHardware() { |
| DosSwReset0::Get().FromValue((1 << 7) | (1 << 6) | (1 << 4)).WriteTo(owner_->dosbus()); |
| DosSwReset0::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| |
| // Reads are used for delaying running later code. |
| for (uint32_t i = 0; i < 3; i++) { |
| DosSwReset0::Get().ReadFrom(owner_->dosbus()); |
| } |
| |
| DosSwReset0::Get().FromValue((1 << 7) | (1 << 6) | (1 << 4)).WriteTo(owner_->dosbus()); |
| DosSwReset0::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| |
| DosSwReset0::Get().FromValue((1 << 9) | (1 << 8)).WriteTo(owner_->dosbus()); |
| DosSwReset0::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| |
| // Reads are used for delaying running later code. |
| for (uint32_t i = 0; i < 3; i++) { |
| DosSwReset0::Get().ReadFrom(owner_->dosbus()); |
| } |
| |
| auto temp = PowerCtlVld::Get().ReadFrom(owner_->dosbus()); |
| temp.set_reg_value(temp.reg_value() | (1 << 9) | (1 << 6)); |
| temp.WriteTo(owner_->dosbus()); |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t H264Decoder::LoadSecondaryFirmware(const uint8_t* data, uint32_t firmware_size) { |
| // For some reason, some portions of the firmware aren't loaded into the |
| // hardware directly, but are kept in main memory. |
| constexpr uint32_t kSecondaryFirmwareSize = 4 * 1024; |
| constexpr uint32_t kSecondaryFirmwareBufferSize = kSecondaryFirmwareSize * 5; |
| { |
| zx_status_t status = io_buffer_init_aligned(&secondary_firmware_, owner_->bti()->get(), |
| kSecondaryFirmwareBufferSize, kBufferAlignShift, |
| IO_BUFFER_RW | IO_BUFFER_CONTIG); |
| if (status != ZX_OK) { |
| DECODE_ERROR("Failed to make second firmware buffer: %d", status); |
| return status; |
| } |
| SetIoBufferName(&secondary_firmware_, "H264SecondaryFirmware"); |
| |
| auto addr = static_cast<uint8_t*>(io_buffer_virt(&secondary_firmware_)); |
| // The secondary firmware is in a different order in the file than the main |
| // firmware expects it to have. |
| memcpy(addr + 0, data + 0x4000, kSecondaryFirmwareSize); // header |
| memcpy(addr + 0x1000, data + 0x2000, kSecondaryFirmwareSize); // data |
| memcpy(addr + 0x2000, data + 0x6000, kSecondaryFirmwareSize); // mmc |
| memcpy(addr + 0x3000, data + 0x3000, kSecondaryFirmwareSize); // list |
| memcpy(addr + 0x4000, data + 0x5000, kSecondaryFirmwareSize); // slice |
| } |
| io_buffer_cache_flush(&secondary_firmware_, 0, kSecondaryFirmwareBufferSize); |
| return ZX_OK; |
| } |
| |
| zx_status_t H264Decoder::Initialize() { |
| uint8_t* data; |
| uint32_t firmware_size; |
| zx_status_t status = |
| owner_->SetProtected(VideoDecoder::Owner::ProtectableHardwareUnit::kVdec, is_secure_); |
| if (status != ZX_OK) |
| return status; |
| |
| if (owner_->is_tee_available()) { |
| status = owner_->TeeSmcLoadVideoFirmware(FirmwareBlob::FirmwareType::kDec_H264, |
| FirmwareBlob::FirmwareVdecLoadMode::kCompatible); |
| if (status != ZX_OK) { |
| LOG(ERROR, "owner_->TeeSmcLoadVideoFirmware() failed - status: %d", status); |
| return status; |
| } |
| } else { |
| status = owner_->firmware_blob()->GetFirmwareData(FirmwareBlob::FirmwareType::kDec_H264, &data, |
| &firmware_size); |
| if (status != ZX_OK) |
| return status; |
| status = owner_->core()->LoadFirmware(data, firmware_size); |
| if (status != ZX_OK) |
| return status; |
| |
| status = LoadSecondaryFirmware(data, firmware_size); |
| if (status != ZX_OK) |
| return status; |
| BarrierAfterFlush(); // After secondary_firmware_ cache is flushed to RAM. |
| |
| AvScratchG::Get() |
| .FromValue(truncate_to_32(io_buffer_phys(&secondary_firmware_))) |
| .WriteTo(owner_->dosbus()); |
| } |
| |
| if (!WaitForRegister(std::chrono::milliseconds(100), [this]() { |
| return !(DcacDmaCtrl::Get().ReadFrom(owner_->dosbus()).reg_value() & 0x8000); |
| })) { |
| DECODE_ERROR("Waiting for DCAC DMA timed out\n"); |
| return ZX_ERR_TIMED_OUT; |
| } |
| |
| if (!WaitForRegister(std::chrono::milliseconds(100), [this]() { |
| return !(LmemDmaCtrl::Get().ReadFrom(owner_->dosbus()).reg_value() & 0x8000); |
| })) { |
| DECODE_ERROR("Waiting for LMEM DMA timed out\n"); |
| return ZX_ERR_TIMED_OUT; |
| } |
| |
| status = ResetHardware(); |
| if (status != ZX_OK) |
| return status; |
| |
| PscaleCtrl::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| AvScratch0::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| |
| // TODO(34192): After sysmem has min_base_phys_address_divisor, use that to avoid over-allocating |
| // and rounding up here. |
| const uint32_t kCodecDataSize = 0x1ee000 + kBufferAlign; |
| auto codec_data_create_result = InternalBuffer::Create( |
| "H264CodecData", &owner_->SysmemAllocatorSyncPtr(), owner_->bti(), kCodecDataSize, is_secure_, |
| /*is_writable=*/true, /*is_mapping_needed*/ false); |
| if (!codec_data_create_result.is_ok()) { |
| LOG(ERROR, "Failed to make codec data buffer - status: %d", codec_data_create_result.error()); |
| return codec_data_create_result.error(); |
| } |
| codec_data_.emplace(codec_data_create_result.take_value()); |
| zx_paddr_t aligned_codec_data_phys = fbl::round_up(codec_data_->phys_base(), kBufferAlign); |
| // sysmem ensures that newly allocated buffers are zeroed and flushed, to extent possible, so |
| // codec_data_ doesn't need CacheFlush() here. |
| |
| enum { |
| kBufferStartAddressOffset = 0x1000000, |
| }; |
| |
| // This may wrap if the address is less than the buffer start offset. |
| uint32_t buffer_offset = truncate_to_32(aligned_codec_data_phys) - kBufferStartAddressOffset; |
| AvScratch1::Get().FromValue(buffer_offset).WriteTo(owner_->dosbus()); |
| |
| AvScratch7::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| AvScratch8::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| AvScratch9::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| VdecAssistMbox1ClrReg::Get().FromValue(1).WriteTo(owner_->dosbus()); |
| VdecAssistMbox1Mask::Get().FromValue(1).WriteTo(owner_->dosbus()); |
| MdecPicDcCtrl::Get().ReadFrom(owner_->dosbus()).set_nv12_output(true).WriteTo(owner_->dosbus()); |
| CodecSettings::Get() |
| .ReadFrom(owner_->dosbus()) |
| .set_zeroed0(0) |
| .set_drop_b_frames(false) |
| .set_error_recovery_mode(1) |
| .set_zeroed1(0) |
| .set_ip_frames_only(0) |
| .set_disable_fast_poc(0) |
| .WriteTo(owner_->dosbus()); |
| |
| // TODO(34192): After sysmem has min_base_phys_address_divisor, use that to avoid over-allocating |
| // and rounding up here. |
| constexpr uint32_t kSeiBufferSize = 8 * 1024 + kBufferAlign; |
| // Sei data buffer must be readable from CPU (though we don't actually use it |
| // yet). |
| auto sei_create_result = InternalBuffer::Create("H264SeiData", &owner_->SysmemAllocatorSyncPtr(), |
| owner_->bti(), kSeiBufferSize, false, |
| /*is_writable=*/true, /*is_mapping_neede=*/false); |
| if (!sei_create_result.is_ok()) { |
| LOG(ERROR, "Failed to make sei data buffer - status: %d", sei_create_result.error()); |
| return sei_create_result.error(); |
| } |
| sei_data_buffer_.emplace(sei_create_result.take_value()); |
| zx_paddr_t sei_data_buffer_aligned_phys = |
| fbl::round_up(sei_data_buffer_->phys_base(), kBufferAlign); |
| // Sysmem has zeroed sei_data_buffer_, flushed the zeroes, and fenced the flush, to extent |
| // possible. |
| |
| AvScratchI::Get() |
| .FromValue(truncate_to_32(sei_data_buffer_aligned_phys) - buffer_offset) |
| .WriteTo(owner_->dosbus()); |
| AvScratchJ::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| MdecPicDcThresh::Get().FromValue(0x404038aa).WriteTo(owner_->dosbus()); |
| |
| owner_->core()->StartDecoding(); |
| return ZX_OK; |
| } |
| |
| void H264Decoder::InitializedFrames(std::vector<CodecFrame> frames, uint32_t coded_width, |
| uint32_t coded_height, uint32_t stride) { |
| ZX_DEBUG_ASSERT(state_ == DecoderState::kWaitingForNewFrames); |
| ZX_DEBUG_ASSERT(coded_width == stride); |
| uint32_t frame_count = frames.size(); |
| for (uint32_t i = 0; i < frame_count; ++i) { |
| auto frame = std::make_shared<VideoFrame>(); |
| // While we'd like to pass in IO_BUFFER_CONTIG, since we know the VMO was |
| // allocated with zx_vmo_create_contiguous(), the io_buffer_init_vmo() |
| // treats that flag as an invalid argument, so instead we have to pretend as |
| // if it's a non-contiguous VMO, then validate that the VMO is actually |
| // contiguous later in aml_canvas_config() called by |
| // owner_->ConfigureCanvas() below. |
| assert(frames[i].codec_buffer_spec.has_data()); |
| assert(frames[i].codec_buffer_spec.data().is_vmo()); |
| assert(frames[i].codec_buffer_spec.data().vmo().has_vmo_handle()); |
| zx_status_t status = io_buffer_init_vmo( |
| &frame->buffer, owner_->bti()->get(), |
| frames[i].codec_buffer_spec.data().vmo().vmo_handle().get(), 0, IO_BUFFER_RW); |
| if (status != ZX_OK) { |
| DECODE_ERROR("Failed to io_buffer_init_vmo() for frame - status: %d\n", status); |
| OnFatalError(); |
| return; |
| } |
| io_buffer_cache_flush(&frame->buffer, 0, io_buffer_size(&frame->buffer, 0)); |
| |
| BarrierAfterFlush(); |
| |
| frame->hw_width = coded_width; |
| frame->hw_height = coded_height; |
| frame->coded_width = coded_width; |
| frame->coded_height = coded_height; |
| frame->stride = stride; |
| frame->uv_plane_offset = stride * coded_height; |
| frame->display_width = display_width_; |
| frame->display_height = display_height_; |
| frame->index = i; |
| |
| // can be nullptr |
| frame->codec_buffer = frames[i].codec_buffer_ptr; |
| if (frames[i].codec_buffer_ptr) { |
| frames[i].codec_buffer_ptr->SetVideoFrame(frame); |
| } |
| |
| // The ConfigureCanvas() calls validate that the VMO is physically |
| // contiguous, regardless of how the VMO was created. |
| auto y_canvas = |
| owner_->ConfigureCanvas(&frame->buffer, 0, frame->stride, frame->coded_height, 0, 0); |
| auto uv_canvas = owner_->ConfigureCanvas(&frame->buffer, frame->uv_plane_offset, frame->stride, |
| frame->coded_height / 2, 0, 0); |
| if (!y_canvas || !uv_canvas) { |
| OnFatalError(); |
| return; |
| } |
| |
| AncNCanvasAddr::Get(i) |
| .FromValue((uv_canvas->index() << 16) | (uv_canvas->index() << 8) | (y_canvas->index())) |
| .WriteTo(owner_->dosbus()); |
| video_frames_.push_back({std::move(frame), std::move(y_canvas), std::move(uv_canvas)}); |
| } |
| |
| uint32_t actual_dpb_size = frame_count; |
| ZX_DEBUG_ASSERT(actual_dpb_size <= kMaxActualDPBSize); |
| ZX_DEBUG_ASSERT(next_mv_buffer_count_ <= next_max_dpb_size_ + 1); |
| uint32_t av_scratch0 = |
| (next_mv_buffer_count_ << 24) | (actual_dpb_size << 16) | (next_max_dpb_size_ << 8); |
| AvScratch0::Get().FromValue(av_scratch0).WriteTo(owner_->dosbus()); |
| |
| state_ = DecoderState::kRunning; |
| } |
| |
| zx_status_t H264Decoder::InitializeFrames(uint32_t min_frame_count, uint32_t max_frame_count, |
| uint32_t coded_width, uint32_t coded_height, |
| uint32_t display_width, uint32_t display_height, |
| bool has_sar, uint32_t sar_width, uint32_t sar_height) { |
| DLOG("InitializeFrames() display_width: %u display_height: %u", display_width, display_height); |
| video_frames_.clear(); |
| returned_frames_.clear(); |
| |
| uint32_t stride = coded_width; |
| display_width_ = display_width; |
| display_height_ = display_height; |
| |
| // Regardless of local allocation of VMOs or remote allocation of VMOs, we |
| // first represent the frames this way. This representation conveys the |
| // potentially-non-zero offset into the VMO, and allows sharing code further |
| // down. |
| std::vector<CodecFrame> frames; |
| ::zx::bti duplicated_bti; |
| zx_status_t dup_result = owner_->bti()->duplicate(ZX_RIGHT_SAME_RIGHTS, &duplicated_bti); |
| if (dup_result != ZX_OK) { |
| DECODE_ERROR("Failed to duplicate BTI - status: %d\n", dup_result); |
| return dup_result; |
| } |
| zx_status_t initialize_result = client_->InitializeFrames( |
| std::move(duplicated_bti), min_frame_count, max_frame_count, coded_width, coded_height, |
| stride, display_width, display_height, has_sar, sar_width, sar_height); |
| if (initialize_result != ZX_OK) { |
| if (initialize_result != ZX_ERR_STOP) { |
| DECODE_ERROR("initialize_frames_handler_() failed - status: %d\n", initialize_result); |
| } |
| return initialize_result; |
| } |
| |
| return ZX_OK; |
| } |
| |
| void H264Decoder::ReturnFrame(std::shared_ptr<VideoFrame> video_frame) { |
| returned_frames_.push_back(video_frame); |
| TryReturnFrames(); |
| } |
| |
| void H264Decoder::TryReturnFrames() { |
| while (!returned_frames_.empty()) { |
| std::shared_ptr<VideoFrame> frame = returned_frames_.back(); |
| if (frame->index >= video_frames_.size() || frame != video_frames_[frame->index].frame) { |
| // Possible if the stream size changed. |
| returned_frames_.pop_back(); |
| continue; |
| } |
| if (AvScratch7::Get().ReadFrom(owner_->dosbus()).reg_value() == 0) { |
| AvScratch7::Get().FromValue(frame->index + 1).WriteTo(owner_->dosbus()); |
| } else if (AvScratch8::Get().ReadFrom(owner_->dosbus()).reg_value() == 0) { |
| AvScratch8::Get().FromValue(frame->index + 1).WriteTo(owner_->dosbus()); |
| } else { |
| // Neither return slot is free, so give up for now. An interrupt |
| // signaling completion of a frame should cause this to be tried again. |
| // TODO: Try returning frames again after a delay, to ensure this won't |
| // hang forever. |
| return; |
| } |
| returned_frames_.pop_back(); |
| } |
| } |
| |
| zx_status_t H264Decoder::InitializeStream() { |
| LOG(INFO, "H264Decoder::InitializeStream()"); |
| ZX_DEBUG_ASSERT(state_ == DecoderState::kRunning); |
| state_ = DecoderState::kWaitingForNewFrames; |
| BarrierBeforeRelease(); // For reference_mv_buffer_ |
| // Ensure empty; may or may not be set at this point. |
| reference_mv_buffer_.reset(); |
| // StreamInfo AKA AvScratch1. |
| auto stream_info = StreamInfo::Get().ReadFrom(owner_->dosbus()); |
| // SequenceInfo AKA AvScratch2. |
| auto sequence_info = SequenceInfo::Get().ReadFrom(owner_->dosbus()); |
| // SampleAspectRatioInfo AKA AvScratch3 |
| auto sar_info = SampleAspectRatioInfo::Get().ReadFrom(owner_->dosbus()); |
| uint32_t level_idc = AvScratchA::Get().ReadFrom(owner_->dosbus()).reg_value(); |
| uint32_t mb_mv_byte = stream_info.mv_size_flag() ? 24 : 96; |
| uint32_t mb_width = stream_info.width_in_mbs(); |
| if (!mb_width && stream_info.total_mbs()) |
| mb_width = 256; |
| if (!mb_width) { |
| DECODE_ERROR("Width is 0 macroblocks\n"); |
| // Not returning ZX_ERR_IO_DATA_INTEGRITY, because this isn't an explicit |
| // integrity check. |
| return ZX_ERR_INTERNAL; |
| } |
| uint32_t mb_height = stream_info.total_mbs() / mb_width; |
| |
| constexpr uint32_t kMaxDimension = 4096; |
| constexpr uint32_t kMacroblockPixels = 16; |
| |
| if (mb_width > kMaxDimension / kMacroblockPixels || |
| mb_height > kMaxDimension / kMacroblockPixels) { |
| DECODE_ERROR("Unsupported dimensions %dx%d macroblocks\n", mb_width, mb_height); |
| return ZX_ERR_INTERNAL; |
| } |
| |
| uint32_t max_dpb_size = GetMaxDpbSize(level_idc, mb_width, mb_height); |
| if (max_dpb_size == 0) { |
| LOG(WARN, |
| "level_idc, mb_width and/or mb_height invalid? - level_idc: %u mb_width: %u mb_height: %u", |
| level_idc, mb_width, mb_height); |
| return ZX_ERR_INTERNAL; |
| } |
| // GetMaxDpbSize() returns max 16, but kMaxActualDPBSize is 24. |
| ZX_DEBUG_ASSERT(max_dpb_size < kMaxActualDPBSize); |
| |
| // |max_reference_size| comes directly from max_num_ref_frames from the bitstream. Fix it up at |
| // least enough to avoid crashes, but if this value is invalid it's possible anything else in the |
| // bitstream could be broken. |
| uint32_t max_reference_size = stream_info.max_reference_size(); |
| if (max_reference_size > max_dpb_size) { |
| LOG(WARN, |
| "max_reference_size is too large - clamping - max_reference_size: %u max_dpb_size: %u", |
| max_reference_size, max_dpb_size); |
| max_reference_size = max_dpb_size; |
| } else if (max_reference_size == 0) { |
| // This is technically permissible by the h.264 spec, but still try to increase it to avoid |
| // issues. |
| LOG(WARN, "max_reference_size is zero - unexpected - using default: %u", max_dpb_size); |
| max_reference_size = max_dpb_size; |
| } |
| |
| // The HW decoder / firmware seems to require several extra frames or it won't continue decoding |
| // frames. TODO(fxb/43085): Verify whether min_buffer_count_for_camping (as opposed to |
| // min_buffer_count) can be reduced to max_dpb_size + 1, which is what you would expect based on |
| // max_num_reorder_frames from the h.264 spec. |
| constexpr uint32_t kDbpSizeAdj = 6; |
| // Seems needed for decoding bear.h264, but unclear why. |
| constexpr uint32_t kAbsoluteMinBufferCount = 10u; |
| // Technically the max we should need to camp on to decode is max_dpb_size + 1. That's because a |
| // frame is guaranteed to be output when the DPB is full and the hardware tries to insert the |
| // newly-decoding frame into it. That's also the minimum because until the DPB is full we don't |
| // know which frame should be output first (except in certain special cases like IDR frames or SEI |
| // data reducing the limit). |
| // In practice the firmware won't necessarily output frames immediately, so we need to add on some |
| // slack. |max_reference_size| + 6 is what the linux driver does in low memory situations, so it |
| // should normally work. However, when max_dpb_size and max_reference_size are very low (like in |
| // bear.h264) that isn't always enough for the firmware, so we require at least 10. |
| uint32_t min_buffer_count = std::max(std::max(max_reference_size + kDbpSizeAdj, max_dpb_size + 1), |
| kAbsoluteMinBufferCount); |
| ZX_DEBUG_ASSERT(min_buffer_count < kMaxActualDPBSize); |
| |
| // These we pass back to the firmware later, having computed/adjusted as above. |
| next_max_dpb_size_ = max_dpb_size; |
| // We need to store reference MVs for all active reference frames, as well as 1 extra for the |
| // frame that's being decoded into (in case it later becomes a reference frame). |
| next_mv_buffer_count_ = max_reference_size + 1; |
| |
| // Rounding to 4 macroblocks is for matching the linux driver, in case the |
| // hardware happens to round up as well. |
| uint32_t mv_buffer_size = fbl::round_up(mb_height, 4u) * fbl::round_up(mb_width, 4u) * |
| mb_mv_byte * next_mv_buffer_count_; |
| uint32_t mv_buffer_alloc_size = fbl::round_up(mv_buffer_size, ZX_PAGE_SIZE); |
| |
| auto create_result = InternalBuffer::Create("H264ReferenceMvs", &owner_->SysmemAllocatorSyncPtr(), |
| owner_->bti(), mv_buffer_alloc_size, is_secure_, |
| /*is_writable=*/true, /*is_mapping_needed*/ false); |
| if (!create_result.is_ok()) { |
| LOG(ERROR, "Couldn't allocate reference mv buffer - status: %d", create_result.error()); |
| return create_result.error(); |
| } |
| reference_mv_buffer_.emplace(create_result.take_value()); |
| |
| // sysmem ensure that newly allocated buffers are zeroed and flushed to RAM and fenced, to the |
| // degree possible. |
| |
| BarrierAfterFlush(); |
| AvScratch1::Get() |
| .FromValue(truncate_to_32(reference_mv_buffer_->phys_base())) |
| .WriteTo(owner_->dosbus()); |
| // In the linux driver AvScratch3 is used to communicate about the display |
| // canvas. |
| AvScratch3::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| AvScratch4::Get() |
| .FromValue(truncate_to_32(reference_mv_buffer_->phys_base() + mv_buffer_size)) |
| .WriteTo(owner_->dosbus()); |
| |
| auto crop_info = CropInfo::Get().ReadFrom(owner_->dosbus()); |
| uint32_t display_width = mb_width * 16 - crop_info.right(); |
| uint32_t display_height = mb_height * 16 - crop_info.bottom(); |
| |
| // Canvas width must be a multiple of 32 bytes. |
| uint32_t coded_width = fbl::round_up(mb_width * 16, 32u); |
| uint32_t coded_height = mb_height * 16; |
| |
| // Sample aspect ratio - normalize as sar_width : sar_height. |
| // |
| // The has_sar will be true for any explicitly-specified SAR, and false for |
| // all other cases (both explicitly "Unspecified" and "Reserved" cases that we |
| // don't recognize). |
| bool has_sar = false; |
| uint32_t sar_width = 1; |
| uint32_t sar_height = 1; |
| if (sequence_info.aspect_ratio_info_present_flag()) { |
| uint32_t aspect_ratio_idc = sequence_info.aspect_ratio_idc(); |
| if (aspect_ratio_idc == kAspectRatioIdcExtendedSar) { |
| sar_width = sar_info.sar_width(); |
| sar_height = sar_info.sar_height(); |
| has_sar = true; |
| if (sar_width == 0 || sar_height == 0) { |
| // spec says this condition means "considered unspecified" |
| sar_width = 1; |
| sar_height = 1; |
| has_sar = false; |
| } |
| } else { |
| ZX_DEBUG_ASSERT(aspect_ratio_idc != kAspectRatioIdcExtendedSar); |
| // aspect_ratio_idc == 0 and "Reserved" values are treated the same way as |
| // each other, and both cases don't run the body of the following "if". We |
| // treat "Reserved" the same as "Unspecified" instead of flagging an error |
| // because it seems extremely unlikely that any "Reserved" value in this |
| // context would have meaning beyond specifying sar_width and sar_height. |
| // So for "Reserved" values we just end up with has_sar false, which |
| // should allow _something_ to be displayed even if the displayed frames |
| // have the wrong SAR. |
| if (aspect_ratio_idc >= 1 && aspect_ratio_idc <= 16) { |
| sar_width = kSarTable[aspect_ratio_idc].sar_width; |
| sar_height = kSarTable[aspect_ratio_idc].sar_height; |
| has_sar = true; |
| } |
| ZX_DEBUG_ASSERT(aspect_ratio_idc != 0 || (!has_sar && sar_width == 1 && sar_height == 1)); |
| ZX_DEBUG_ASSERT(has_sar || (sar_width == 1 && sar_height == 1)); |
| ZX_DEBUG_ASSERT(sar_width != 0 && sar_height != 0); |
| } |
| } |
| |
| // The actual # of buffers is determined by sysmem, but constrainted by "max_dpb_size" as the min |
| // # of needed buffers for reference and re-ordering purposes, not counting decode-into buffer. |
| // The "max" means the max the stream might require, so that's actually the min # of buffers we |
| // need. The +1 accounts for the decode-into buffer (AFAICT). Reduce this number at your own |
| // risk - YMMV. |
| LOG(INFO, "max_reference_size: %u max_dpb_size: %u min_buffer_count: %u", max_reference_size, |
| max_dpb_size, min_buffer_count); |
| uint32_t min_frame_count = min_buffer_count; |
| // Also constrained by the maximum number of buffers this driver knows how to track for now, which |
| // is kMaxActualDPBSize (24). |
| uint32_t max_frame_count = kMaxActualDPBSize; |
| zx_status_t status = |
| InitializeFrames(min_frame_count, max_frame_count, coded_width, coded_height, display_width, |
| display_height, has_sar, sar_width, sar_height); |
| if (status != ZX_OK) { |
| if (status != ZX_ERR_STOP) { |
| DECODE_ERROR("InitializeFrames() failed: status: %d\n", status); |
| } |
| return status; |
| } |
| |
| return ZX_OK; |
| } |
| |
| void H264Decoder::ReceivedFrames(uint32_t frame_count) { |
| uint32_t error_count = AvScratchD::Get().ReadFrom(owner_->dosbus()).reg_value(); |
| // This hit_eos is _not_ the same as the is_end_of_stream in PtsOut below. |
| bool hit_eos = false; |
| for (uint32_t i = 0; i < frame_count && !hit_eos; i++) { |
| auto pic_info = PicInfo::Get(i).ReadFrom(owner_->dosbus()); |
| uint32_t buffer_index = pic_info.buffer_index(); |
| uint32_t slice_type = |
| (AvScratchH::Get().ReadFrom(owner_->dosbus()).reg_value() >> (i * 4)) & 0xf; |
| if (pic_info.eos()) |
| hit_eos = true; |
| |
| // TODO(dustingreen): We'll need to bit-extend (nearest wins to allow for |
| // re-ordering) this value to uint64_t, so that PTSs for frames after 4GiB |
| // still work. |
| uint32_t stream_byte_offset = pic_info.stream_offset(); |
| stream_byte_offset |= |
| ((AvScratch::Get(0xa + i / 2).ReadFrom(owner_->dosbus()).reg_value() >> ((i % 2) * 16)) & |
| 0xffff) |
| << 16; |
| |
| PtsManager::LookupResult pts_result = pts_manager_->Lookup(stream_byte_offset); |
| video_frames_[buffer_index].frame->has_pts = pts_result.has_pts(); |
| video_frames_[buffer_index].frame->pts = pts_result.pts(); |
| if (pts_result.is_end_of_stream()) { |
| // TODO(dustingreen): Handle this once we're able to detect this way. For |
| // now, ignore but print an obvious message. |
| printf("##### UNHANDLED END OF STREAM DETECTED #####\n"); |
| break; |
| } |
| |
| client_->OnFrameReady(video_frames_[buffer_index].frame); |
| DLOG("Got buffer %d error %d error_count %d slice_type %d offset %x\n", buffer_index, |
| pic_info.error(), error_count, slice_type, pic_info.stream_offset()); |
| } |
| AvScratch0::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| } |
| |
| enum { |
| kCommandNone = 0, |
| kCommandInitializeStream = 1, |
| kCommandNewFrames = 2, |
| kCommandSwitchStreams = 3, |
| kCommandFatalError = 6, |
| kCommandGotFirstOffset = 9, |
| }; |
| |
| void H264Decoder::SwitchStreams() { |
| // Signal that we're ready to allocate new frames for the new stream. |
| AvScratch7::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| AvScratch8::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| AvScratch9::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| |
| // Signal firmware that command has been processed. |
| AvScratch0::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| } |
| |
| void H264Decoder::HandleInterrupt() { |
| // Stop processing on fatal error. |
| if (fatal_error_) |
| return; |
| |
| VdecAssistMbox1ClrReg::Get().FromValue(1).WriteTo(owner_->dosbus()); |
| |
| // Some returned frames may have been buffered up earlier, so try to return |
| // them now that the firmware had a chance to do some work. |
| TryReturnFrames(); |
| |
| // The core signals the main processor what command to run using AvScratch0. |
| // The main processor returns a result using AvScratch0 to trigger the decoder |
| // to continue (possibly 0, if no result is needed). |
| auto scratch0 = AvScratch0::Get().ReadFrom(owner_->dosbus()); |
| DLOG("Got command: %x", scratch0.reg_value()); |
| uint32_t cpu_command = scratch0.reg_value() & 0xff; |
| switch (cpu_command) { |
| case kCommandNone: |
| // It is possible that the interrupt will fire with no command. This could happen if there is |
| // an SEI message that should be acknowledged. This should not be treated as an error. |
| break; |
| |
| case kCommandInitializeStream: { |
| zx_status_t status = InitializeStream(); |
| if (status != ZX_OK) { |
| OnFatalError(); |
| } |
| } break; |
| |
| case kCommandNewFrames: |
| ReceivedFrames((scratch0.reg_value() >> 8) & 0xff); |
| break; |
| |
| case kCommandSwitchStreams: |
| SwitchStreams(); |
| break; |
| |
| case kCommandFatalError: { |
| auto error_count = AvScratchD::Get().ReadFrom(owner_->dosbus()).reg_value(); |
| DECODE_ERROR("Decoder fatal error %d\n", error_count); |
| owner_->core()->StopDecoding(); |
| // We need to reset the hardware here or for some malformed hardware streams (e.g. |
| // bear_h264[638] = 44) the CPU will hang when trying to isolate VDEC1 power on shutdown. |
| ResetHardware(); |
| OnFatalError(); |
| // Don't write to AvScratch0, so the decoder won't continue. |
| break; |
| } |
| |
| case kCommandGotFirstOffset: { |
| uint32_t first_offset = AvScratch1::Get().ReadFrom(owner_->dosbus()).reg_value(); |
| DLOG("First offset: %d\n", first_offset); |
| AvScratch0::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| break; |
| } |
| |
| default: |
| DECODE_ERROR("Got unknown command: %d\n", cpu_command); |
| } |
| |
| auto sei_itu35_flags = AvScratchJ::Get().ReadFrom(owner_->dosbus()).reg_value(); |
| if (sei_itu35_flags & (1 << 15)) { |
| DLOG("Got Supplemental Enhancement Information buffer"); |
| AvScratchJ::Get().FromValue(0).WriteTo(owner_->dosbus()); |
| } |
| } |
| |
| void H264Decoder::OnFatalError() { |
| if (!fatal_error_) { |
| fatal_error_ = true; |
| client_->OnError(); |
| } |
| } |