| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "codec_adapter_h264.h" |
| |
| #include <lib/fidl/cpp/clone.h> |
| #include <lib/trace/event.h> |
| #include <lib/zx/bti.h> |
| |
| #include <optional> |
| |
| #include "amlogic_codec_adapter.h" |
| #include "device_ctx.h" |
| #include "h264_decoder.h" |
| #include "macros.h" |
| #include "pts_manager.h" |
| #include "src/media/lib/metrics/metrics.cb.h" |
| #include "vdec1.h" |
| |
| namespace amlogic_decoder { |
| |
| // TODO(dustingreen): |
| // * Split InitializeStream() into two parts, one to get the format info from |
| // the HW and send it to the Codec client, the other part to configure |
| // output buffers once the client has configured Codec output config based |
| // on the format info. Wire up so that |
| // onCoreCodecMidStreamOutputConstraintsChange() gets called and so that |
| // CoreCodecBuildNewOutputConstraints() will pick up the correct current |
| // format info (whether still mid-stream, or at the start of a new stream |
| // that's starting before the mid-stream format change was processed for the |
| // old stream). |
| // * Allocate output video buffers contig by setting relevant buffer |
| // constraints to indicate contig to BufferAllocator / BufferCollection. |
| // * On EndOfStream at input, push all remaining data through the HW decoder |
| // and detect when the EndOfStream is appropriate to generate at the output. |
| // * Split video_->Parse() into start/complete and/or switch to feeding the |
| // ring buffer directly, or whatever is wanted by multi-concurrent-stream |
| // mode. |
| // * Detect when there's sufficient space in the ring buffer, and feed in |
| // partial input packets to permit large input packets with many AUs in |
| // them. |
| // * At least when promise_separate_access_units_on_input is set, propagate |
| // timestamp_ish values from input AU to correct output video frame (using |
| // PtsManager). |
| // * Consider if there's a way to get AmlogicVideo to re-use buffers across |
| // a stream switch without over-writing buffers that are still in-use |
| // downstream. |
| |
| namespace { |
| |
| // avconv -f lavfi -i color=c=black:s=42x52 -c:v libx264 -profile:v baseline |
| // -vframes 1 new_stream.h264 |
| // |
| // (The "baseline" part of the above isn't really needed, but neither is a |
| // higher profile really needed for this purpose.) |
| // |
| // bless new_stream.h264, and manually delete the big SEI NAL that has lots of |
| // text in it (the exact encoder settings don't really matter for this purpose), |
| // including its start code, up to just before the next start code, save. |
| // |
| // xxd -i new_stream.h264 |
| // |
| // We push this through the decoder as our "EndOfStream" marker, and detect it |
| // at the output (for now) by its unusual 42x52 resolution during |
| // InitializeStream() _and_ the fact that we've queued this marker. To force |
| // this frame to be handled by the decoder we queue kFlushThroughBytes of 0s |
| // after this data. |
| // |
| // TODO(dustingreen): We don't currently detect the EndOfStream via its stream |
| // offset in PtsManager (for h264), but that would be marginally more robust |
| // than detecting the special resolution. However, to detect via stream offset, |
| // we'd either need to avoid switching resolutions, or switch resolutions using |
| // the same output buffer set (including preserving the free/busy status of each |
| // buffer across the boundary), and delay notifying the client until we're sure |
| // a format change is real, not just the one immediately before a frame whose |
| // stream offset is >= the EndOfStream offset. |
| unsigned char new_stream_h264[] = { |
| 0x00, 0x00, 0x00, 0x01, 0x67, 0x42, 0xc0, 0x0a, 0xd9, 0x0c, 0x9e, 0x49, 0xf0, 0x11, 0x00, |
| 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x03, 0x00, 0x32, 0x0f, 0x12, 0x26, 0x48, 0x00, 0x00, |
| 0x00, 0x01, 0x68, 0xcb, 0x83, 0xcb, 0x20, 0x00, 0x00, 0x01, 0x65, 0x88, 0x84, 0x0a, 0xf2, |
| 0x62, 0x80, 0x00, 0xa7, 0xbc, 0x9c, 0x9d, 0x75, 0xd7, 0x5d, 0x75, 0xd7, 0x5d, 0x78}; |
| unsigned int new_stream_h264_len = 59; |
| |
| constexpr uint32_t kFlushThroughBytes = 1024; |
| |
| constexpr uint32_t kEndOfStreamWidth = 42; |
| constexpr uint32_t kEndOfStreamHeight = 52; |
| |
| static inline constexpr uint32_t make_fourcc(uint8_t a, uint8_t b, uint8_t c, uint8_t d) { |
| return (static_cast<uint32_t>(d) << 24) | (static_cast<uint32_t>(c) << 16) | |
| (static_cast<uint32_t>(b) << 8) | static_cast<uint32_t>(a); |
| } |
| |
| // A client using the min shouldn't necessarily expect performance to be |
| // acceptable when running higher bit-rates. |
| // |
| // TODO(fxbug.dev/13530): Set this to ~8k or so. For now, we have to boost the |
| // per-packet buffer size up to fit the largest AUs we expect to decode, until |
| // fxbug.dev/13530 is fixed, in case avcC format is used. |
| constexpr uint32_t kInputPerPacketBufferBytesMin = 512 * 1024; |
| // This is an arbitrary cap for now. |
| constexpr uint32_t kInputPerPacketBufferBytesMax = 4 * 1024 * 1024; |
| |
| constexpr uint32_t kInputBufferCountForCodecMin = 1; |
| constexpr uint32_t kInputBufferCountForCodecMax = 64; |
| |
| } // namespace |
| |
| CodecAdapterH264::CodecAdapterH264(std::mutex& lock, CodecAdapterEvents* codec_adapter_events, |
| DeviceCtx* device) |
| : AmlogicCodecAdapter(lock, codec_adapter_events), |
| device_(device), |
| video_(device_->video()), |
| input_processing_loop_(&kAsyncLoopConfigNoAttachToCurrentThread) { |
| ZX_DEBUG_ASSERT(device_); |
| ZX_DEBUG_ASSERT(video_); |
| ZX_DEBUG_ASSERT(secure_memory_mode_[kInputPort] == fuchsia::mediacodec::SecureMemoryMode::OFF); |
| ZX_DEBUG_ASSERT(secure_memory_mode_[kOutputPort] == fuchsia::mediacodec::SecureMemoryMode::OFF); |
| } |
| |
| CodecAdapterH264::~CodecAdapterH264() { |
| input_processing_loop_.Quit(); |
| input_processing_loop_.JoinThreads(); |
| input_processing_loop_.Shutdown(); |
| |
| // nothing else to do here, at least not until we aren't calling PowerOff() in |
| // CoreCodecStopStream(). |
| } |
| |
| std::optional<media_metrics::StreamProcessorEvents2MetricDimensionImplementation> |
| CodecAdapterH264::CoreCodecMetricsImplementation() { |
| // Unspecified because we don't actually need metrics from h264-single, since we always use |
| // h264-multi outside of testing/debugging. |
| return media_metrics::StreamProcessorEvents2MetricDimensionImplementation_Unspecified; |
| } |
| |
| bool CodecAdapterH264::IsCoreCodecRequiringOutputConfigForFormatDetection() { return false; } |
| |
| bool CodecAdapterH264::IsCoreCodecMappedBufferUseful(CodecPort port) { |
| if (port == kInputPort) { |
| // Returning true here essentially means that we may be able to make use of mapped buffers if |
| // they're possible. However if is_secure true, we won't get a mapping and we don't really need |
| // a mapping, other than for avcC. If avcC shows up on input, we'll fail then. |
| // |
| // TODO(fxbug.dev/35200): Add the failure when avcC shows up when is_secure, as described above. |
| return true; |
| } else { |
| ZX_DEBUG_ASSERT(port == kOutputPort); |
| return false; |
| } |
| } |
| |
| bool CodecAdapterH264::IsCoreCodecHwBased(CodecPort port) { return true; } |
| |
| zx::unowned_bti CodecAdapterH264::CoreCodecBti() { return zx::unowned_bti(video_->bti()); } |
| |
| void CodecAdapterH264::CoreCodecInit( |
| const fuchsia::media::FormatDetails& initial_input_format_details) { |
| zx_status_t result = input_processing_loop_.StartThread( |
| "CodecAdapterH264::input_processing_thread_", &input_processing_thread_); |
| if (result != ZX_OK) { |
| events_->onCoreCodecFailCodec( |
| "In CodecAdapterH264::CoreCodecInit(), StartThread() failed (input)"); |
| return; |
| } |
| |
| initial_input_format_details_ = fidl::Clone(initial_input_format_details); |
| latest_input_format_details_ = fidl::Clone(initial_input_format_details); |
| |
| // TODO(dustingreen): We do most of the setup in CoreCodecStartStream() |
| // currently, but we should do more here and less there. |
| } |
| |
| void CodecAdapterH264::CoreCodecSetSecureMemoryMode( |
| CodecPort port, fuchsia::mediacodec::SecureMemoryMode secure_memory_mode) { |
| // TODO(fxbug.dev/40198): Ideally a codec list from the main CodecFactory would avoid reporting |
| // support for secure output or input when !is_tee_available(), which likely will mean reporting |
| // that in list from driver's local codec factory up to main factory. The main CodecFactory could |
| // also avoid handing out a codec that can't do secure output / input when the TEE isn't |
| // available, so we wouldn't end up here. |
| if (secure_memory_mode != fuchsia::mediacodec::SecureMemoryMode::OFF && |
| !video_->is_tee_available()) { |
| events_->onCoreCodecFailCodec( |
| "BUG 40198 - Codec factory should catch earlier when secure requested without TEE."); |
| return; |
| } |
| secure_memory_mode_[port] = secure_memory_mode; |
| } |
| |
| void CodecAdapterH264::OnFrameReady(std::shared_ptr<VideoFrame> frame) { |
| TRACE_DURATION("media", "CodecAdapterH264::OnFrameReady", "index", frame->index); |
| // The Codec interface requires that emitted frames are cache clean |
| // at least for now. We invalidate without skipping over stride-width |
| // per line, at least partly because stride - width is small (possibly |
| // always 0) for this decoder. But we do invalidate the UV section |
| // separately in case uv_plane_offset happens to leave significant |
| // space after the Y section (regardless of whether there's actually |
| // ever much padding there). |
| // |
| // TODO(dustingreen): Probably there's not ever any significant |
| // padding between Y and UV for this decoder, so probably can make one |
| // invalidate call here instead of two with no downsides. |
| // |
| // TODO(dustingreen): Skip this when the buffer isn't map-able. |
| io_buffer_cache_flush_invalidate(&frame->buffer, 0, frame->stride * frame->coded_height); |
| io_buffer_cache_flush_invalidate(&frame->buffer, frame->uv_plane_offset, |
| frame->stride * frame->coded_height / 2); |
| |
| const CodecBuffer* buffer = frame->codec_buffer; |
| ZX_DEBUG_ASSERT(buffer); |
| |
| // We intentionally _don't_ use the packet with same index as the buffer (in |
| // general - it's fine that they sometimes match), to avoid clients building |
| // up inappropriate dependency on buffer index being the same as packet |
| // index (as nice as that would be, VP9, and maybe others, don't get along |
| // with that in general, so ... force clients to treat packet index and |
| // buffer index as separate things). |
| CodecPacket* packet = GetFreePacket(); |
| // With h.264, we know that an emitted buffer implies an available output |
| // packet, because h.264 doesn't put the same output buffer in flight more |
| // than once concurrently, and we have as many output packets as buffers. |
| // This contrasts with VP9 which has unbounded show_existing_frame. |
| ZX_DEBUG_ASSERT(packet); |
| |
| // Associate the packet with the buffer while the packet is in-flight. |
| packet->SetBuffer(buffer); |
| |
| packet->SetStartOffset(0); |
| uint64_t total_size_bytes = frame->stride * frame->coded_height * 3 / 2; |
| packet->SetValidLengthBytes(total_size_bytes); |
| |
| if (frame->has_pts) { |
| packet->SetTimstampIsh(frame->pts); |
| } else { |
| packet->ClearTimestampIsh(); |
| } |
| |
| events_->onCoreCodecOutputPacket(packet, false, false); |
| } |
| |
| void CodecAdapterH264::OnError() { |
| LOG(ERROR, "OnError()"); |
| OnCoreCodecFailStream(fuchsia::media::StreamError::DECODER_UNKNOWN); |
| } |
| |
| // TODO(dustingreen): A lot of the stuff created in this method should be able |
| // to get re-used from stream to stream. We'll probably want to factor out |
| // create/init from stream init further down. |
| void CodecAdapterH264::CoreCodecStartStream() { |
| zx_status_t status; |
| |
| { // scope lock |
| std::lock_guard<std::mutex> lock(lock_); |
| parsed_video_size_ = 0; |
| is_input_format_details_pending_ = true; |
| // At least until proven otherwise. |
| is_avcc_ = false; |
| is_input_end_of_stream_queued_ = false; |
| is_stream_failed_ = false; |
| } // ~lock |
| |
| // The output port is the one we really care about for is_secure of the |
| // decoder, since the HW can read from secure or non-secure even when in |
| // secure mode, but can only write to secure memory when in secure mode. |
| auto decoder = std::make_unique<H264Decoder>(video_, this, IsOutputSecure()); |
| |
| { // scope lock |
| std::lock_guard<std::mutex> lock(*video_->video_decoder_lock()); |
| video_->SetDefaultInstance(std::move(decoder), false); |
| status = video_->InitializeStreamBuffer(/*use_parser=*/true, PAGE_SIZE, IsOutputSecure()); |
| if (status != ZX_OK) { |
| events_->onCoreCodecFailCodec("InitializeStreamBuffer() failed"); |
| return; |
| } |
| status = video_->video_decoder()->Initialize(); |
| if (status != ZX_OK) { |
| events_->onCoreCodecFailCodec("video_->video_decoder_->Initialize() failed"); |
| return; |
| } |
| } // ~lock |
| |
| { // scope lock |
| std::lock_guard<std::mutex> lock(lock_); |
| status = video_->InitializeEsParser(); |
| if (status != ZX_OK) { |
| events_->onCoreCodecFailCodec("InitializeEsParser() failed"); |
| return; |
| } |
| } // ~lock |
| } |
| |
| void CodecAdapterH264::CoreCodecQueueInputFormatDetails( |
| const fuchsia::media::FormatDetails& per_stream_override_format_details) { |
| // TODO(dustingreen): Consider letting the client specify profile/level info |
| // in the FormatDetails at least optionally, and possibly sizing input |
| // buffer constraints and/or other buffers based on that. |
| |
| QueueInputItem(CodecInputItem::FormatDetails(per_stream_override_format_details)); |
| } |
| |
| void CodecAdapterH264::CoreCodecQueueInputPacket(CodecPacket* packet) { |
| QueueInputItem(CodecInputItem::Packet(packet)); |
| } |
| |
| void CodecAdapterH264::CoreCodecQueueInputEndOfStream() { |
| // This queues a marker, but doesn't force the HW to necessarily decode all |
| // the way up to the marker, depending on whether the client closes the stream |
| // or switches to a different stream first - in those cases it's fine for the |
| // marker to never show up as output EndOfStream. |
| |
| { // scope lock |
| std::lock_guard<std::mutex> lock(lock_); |
| is_input_end_of_stream_queued_ = true; |
| } // ~lock |
| |
| QueueInputItem(CodecInputItem::EndOfStream()); |
| } |
| |
| // TODO(dustingreen): See comment on CoreCodecStartStream() re. not deleting |
| // creating as much stuff for each stream. |
| void CodecAdapterH264::CoreCodecStopStream() { |
| { // scope lock |
| std::unique_lock<std::mutex> lock(lock_); |
| |
| // This helps any previously-queued ProcessInput() calls return faster, and |
| // is checked before calling WaitForParsingCompleted() in case |
| // TryStartCancelParsing() does nothing. |
| is_cancelling_input_processing_ = true; |
| } |
| |
| // Try to cause WaitForParsingCompleted() to return early. This only cancels |
| // up to one WaitForParsingCompleted() (not queued, not sticky), so it's |
| // relevant that is_cancelling_input_processing_ == true set above is |
| // preventing us from starting another wait. Or if we didn't set |
| // is_cancelling_input_processing_ = true soon enough, then this call does |
| // make WaitForParsingCompleted() return faster. |
| LOG(DEBUG, "TryStartCancelParsing()..."); |
| video_->parser()->TryStartCancelParsing(); |
| LOG(DEBUG, "TryStartCancelParsing() done."); |
| |
| LOG(DEBUG, "stopping input processing thread and recycling input packets..."); |
| { // scope lock |
| std::unique_lock<std::mutex> lock(lock_); |
| std::condition_variable stop_input_processing_condition; |
| // We know there won't be any new queuing of input, so once this posted work |
| // runs, we know all previously-queued ProcessInput() calls have returned. |
| PostToInputProcessingThread([this, &stop_input_processing_condition] { |
| std::list<CodecInputItem> leftover_input_items; |
| { // scope lock |
| std::lock_guard<std::mutex> lock(lock_); |
| ZX_DEBUG_ASSERT(is_cancelling_input_processing_); |
| leftover_input_items = std::move(input_queue_); |
| is_cancelling_input_processing_ = false; |
| } // ~lock |
| for (auto& input_item : leftover_input_items) { |
| if (input_item.is_packet()) { |
| events_->onCoreCodecInputPacketDone(std::move(input_item.packet())); |
| } |
| } |
| stop_input_processing_condition.notify_all(); |
| }); |
| while (is_cancelling_input_processing_) { |
| stop_input_processing_condition.wait(lock); |
| } |
| ZX_DEBUG_ASSERT(!is_cancelling_input_processing_); |
| } // ~lock |
| LOG(DEBUG, "stopping input processing thread and recycling input packets done."); |
| |
| // Stop processing queued frames. |
| if (video_->core()) { |
| LOG(DEBUG, "StopDecoding()..."); |
| video_->core()->StopDecoding(); |
| LOG(DEBUG, "WaitForIdle()..."); |
| video_->core()->WaitForIdle(); |
| } |
| |
| // TODO(dustingreen): Currently, we have to tear down a few pieces of video_, |
| // to make it possible to run all the AmlogicVideo + DecoderCore + |
| // VideoDecoder code that seems necessary to run to ensure that a new stream |
| // will be entirely separate from an old stream, without deleting/creating |
| // AmlogicVideo itself. Probably we can tackle this layer-by-layer, fixing up |
| // AmlogicVideo to be more re-usable without the stuff in this method, then |
| // DecoderCore, then VideoDecoder. |
| |
| LOG(DEBUG, "ClearDecoderInstance()..."); |
| video_->ClearDecoderInstance(); |
| LOG(DEBUG, "ClearDecoderInstance() done."); |
| } |
| |
| void CodecAdapterH264::CoreCodecAddBuffer(CodecPort port, const CodecBuffer* buffer) { |
| if (port != kOutputPort) { |
| return; |
| } |
| ZX_DEBUG_ASSERT(port == kOutputPort); |
| all_output_buffers_.push_back(buffer); |
| } |
| |
| void CodecAdapterH264::CoreCodecConfigureBuffers( |
| CodecPort port, const std::vector<std::unique_ptr<CodecPacket>>& packets) { |
| if (port != kOutputPort) { |
| return; |
| } |
| ZX_DEBUG_ASSERT(port == kOutputPort); |
| // output |
| |
| ZX_DEBUG_ASSERT(all_output_packets_.empty()); |
| ZX_DEBUG_ASSERT(free_output_packets_.empty()); |
| ZX_DEBUG_ASSERT(!all_output_buffers_.empty()); |
| // TODO(dustingreen): Remove this assert - this CodecAdapter needs to stop |
| // forcing this to be true. Or, set packet count based on buffer collection |
| // buffer_count, or enforce that packet count is >= buffer_count. |
| ZX_DEBUG_ASSERT(all_output_buffers_.size() == packets.size()); |
| for (auto& packet : packets) { |
| all_output_packets_.push_back(packet.get()); |
| free_output_packets_.push_back(packet.get()->packet_index()); |
| } |
| // This should prevent any inadvertent dependence by clients on the ordering |
| // of packet_index values in the output stream or any assumptions re. the |
| // relationship between packet_index and buffer_index. |
| std::shuffle(free_output_packets_.begin(), free_output_packets_.end(), not_for_security_prng_); |
| } |
| |
| void CodecAdapterH264::CoreCodecRecycleOutputPacket(CodecPacket* packet) { |
| if (packet->is_new()) { |
| packet->SetIsNew(false); |
| return; |
| } |
| ZX_DEBUG_ASSERT(!packet->is_new()); |
| |
| // A recycled packet will have a buffer set because the packet is in-flight |
| // until put on the free list, and has a buffer associated while in-flight. |
| const CodecBuffer* buffer = packet->buffer(); |
| ZX_DEBUG_ASSERT(buffer); |
| |
| // Getting the buffer is all we needed the packet for. The packet won't get |
| // re-used until it goes back on the free list below. |
| packet->SetBuffer(nullptr); |
| |
| { // scope lock |
| std::lock_guard<std::mutex> lock(lock_); |
| free_output_packets_.push_back(packet->packet_index()); |
| } // ~lock |
| |
| { // scope lock |
| std::lock_guard<std::mutex> lock(*video_->video_decoder_lock()); |
| std::shared_ptr<VideoFrame> frame = buffer->video_frame().lock(); |
| if (!frame) { |
| // EndOfStream seen at the output, or a new InitializeFrames(), can cause |
| // !frame, which is fine. In that case, any new stream will request |
| // allocation of new frames. |
| return; |
| } |
| // Recycle can happen while stopped, but this CodecAdapater has no way yet |
| // to return frames while stopped, or to re-use buffers/frames across a |
| // stream switch. Any new stream will request allocation of new frames. |
| if (!video_->video_decoder()) { |
| return; |
| } |
| video_->video_decoder()->ReturnFrame(frame); |
| } // ~lock |
| } |
| |
| void CodecAdapterH264::CoreCodecEnsureBuffersNotConfigured(CodecPort port) { |
| std::lock_guard<std::mutex> lock(lock_); |
| |
| // This adapter should ensure that zero old CodecPacket* or CodecBuffer* |
| // remain in this adapter (or below). |
| |
| if (port == kInputPort) { |
| // There shouldn't be any queued input at this point, but if there is any, |
| // fail here even in a release build. |
| ZX_ASSERT(input_queue_.empty()); |
| } else { |
| ZX_DEBUG_ASSERT(port == kOutputPort); |
| |
| // The old all_output_buffers_ are no longer valid. |
| all_output_buffers_.clear(); |
| all_output_packets_.clear(); |
| free_output_packets_.clear(); |
| } |
| buffer_settings_[port] = std::nullopt; |
| } |
| |
| std::unique_ptr<const fuchsia::media::StreamOutputConstraints> |
| CodecAdapterH264::CoreCodecBuildNewOutputConstraints( |
| uint64_t stream_lifetime_ordinal, uint64_t new_output_buffer_constraints_version_ordinal, |
| bool buffer_constraints_action_required) { |
| // bear.h264 decodes into 320x192 YUV buffers, but the video display |
| // dimensions are 320x180. A the bottom of the buffer only .25 of the last |
| // 16 height macroblock row is meant to be displayed. |
| // |
| // TODO(dustingreen): Need to plumb video size separately from buffer size so |
| // we can display (for example) a video at 320x180 instead of the buffer's |
| // 320x192. The extra pixels look like don't-care pixels that just let |
| // themselves float essentially (re. past-the-boundary behavior of those |
| // pixels). Such pixels aren't meant to be displayed and look strange. |
| // Presumably the difference is the buffer needing to be a whole macroblock in |
| // width/height (%16==0) vs. the video dimensions being allowed to not use all |
| // of the last macroblock. |
| // |
| // This decoder produces NV12. |
| |
| // Fairly arbitrary. The client should set a higher value if the client needs |
| // to camp on more frames than this. |
| constexpr uint32_t kDefaultPacketCountForClient = 2; |
| |
| uint32_t per_packet_buffer_bytes = min_stride_ * height_ * 3 / 2; |
| |
| auto config = std::make_unique<fuchsia::media::StreamOutputConstraints>(); |
| |
| config->set_stream_lifetime_ordinal(stream_lifetime_ordinal); |
| |
| auto* constraints = config->mutable_buffer_constraints(); |
| auto* default_settings = constraints->mutable_default_settings(); |
| |
| // For the moment, there will be only one StreamOutputConstraints, and it'll |
| // need output buffers configured for it. |
| ZX_DEBUG_ASSERT(buffer_constraints_action_required); |
| config->set_buffer_constraints_action_required(buffer_constraints_action_required); |
| constraints->set_buffer_constraints_version_ordinal( |
| new_output_buffer_constraints_version_ordinal); |
| |
| // 0 is intentionally invalid - the client must fill out this field. |
| default_settings->set_buffer_lifetime_ordinal(0); |
| default_settings->set_buffer_constraints_version_ordinal( |
| new_output_buffer_constraints_version_ordinal); |
| default_settings->set_packet_count_for_server(min_buffer_count_[kOutputPort]); |
| default_settings->set_packet_count_for_client(kDefaultPacketCountForClient); |
| // Packed NV12 (no extra padding, min UV offset, min stride). |
| default_settings->set_per_packet_buffer_bytes(per_packet_buffer_bytes); |
| default_settings->set_single_buffer_mode(false); |
| |
| // For the moment, let's tell the client to allocate this exact size. |
| constraints->set_per_packet_buffer_bytes_min(per_packet_buffer_bytes); |
| constraints->set_per_packet_buffer_bytes_recommended(per_packet_buffer_bytes); |
| constraints->set_per_packet_buffer_bytes_max(per_packet_buffer_bytes); |
| |
| // The hardware only needs min_buffer_count_ buffers - more aren't better. |
| constraints->set_packet_count_for_server_min(min_buffer_count_[kOutputPort]); |
| constraints->set_packet_count_for_server_recommended(min_buffer_count_[kOutputPort]); |
| constraints->set_packet_count_for_server_recommended_max(min_buffer_count_[kOutputPort]); |
| constraints->set_packet_count_for_server_max(min_buffer_count_[kOutputPort]); |
| constraints->set_packet_count_for_client_min(0); |
| // Ensure that if the client allocates its max + the server max that it won't go over the hardware |
| // limit (max_buffer_count). |
| if (max_buffer_count_[kOutputPort] <= min_buffer_count_[kOutputPort]) { |
| events_->onCoreCodecFailCodec("Impossible for client to satisfy buffer counts"); |
| return nullptr; |
| } |
| constraints->set_packet_count_for_client_max( |
| (max_buffer_count_[kOutputPort] - min_buffer_count_[kOutputPort]) / 2); |
| |
| // False because it's not required and not encouraged for a video decoder |
| // output to allow single buffer mode. |
| constraints->set_single_buffer_mode_allowed(false); |
| |
| constraints->set_is_physically_contiguous_required(true); |
| |
| return config; |
| } |
| |
| fuchsia::sysmem::BufferCollectionConstraints |
| CodecAdapterH264::CoreCodecGetBufferCollectionConstraints( |
| CodecPort port, const fuchsia::media::StreamBufferConstraints& stream_buffer_constraints, |
| const fuchsia::media::StreamBufferPartialSettings& partial_settings) { |
| fuchsia::sysmem::BufferCollectionConstraints result; |
| |
| // For now, we didn't report support for single_buffer_mode, and CodecImpl |
| // will have failed the codec already by this point if the client tried to |
| // use single_buffer_mode. |
| // |
| // TODO(dustingreen): Support single_buffer_mode on input (only). |
| ZX_DEBUG_ASSERT(!partial_settings.has_single_buffer_mode() || |
| !partial_settings.single_buffer_mode()); |
| // The CodecImpl won't hand us the sysmem token, so we shouldn't expect to |
| // have the token here. |
| ZX_DEBUG_ASSERT(!partial_settings.has_sysmem_token()); |
| |
| if (port == kInputPort) { |
| // We don't override CoreCodecBuildNewInputConstraints() for now, so use same values as set by |
| // default implementation of CoreCodecBuildNewInputConstraints(). |
| min_buffer_count_[kInputPort] = kInputBufferCountForCodecMin; |
| max_buffer_count_[kInputPort] = kInputBufferCountForCodecMax; |
| } |
| |
| ZX_DEBUG_ASSERT(min_buffer_count_[port] != 0); |
| ZX_DEBUG_ASSERT(max_buffer_count_[port] != 0); |
| |
| result.min_buffer_count_for_camping = min_buffer_count_[port]; |
| |
| // Some slack is nice overall, but avoid having each participant ask for |
| // dedicated slack. Using sysmem the client will ask for it's own buffers for |
| // camping and any slack, so the codec doesn't need to ask for any extra on |
| // behalf of the client. |
| ZX_DEBUG_ASSERT(result.min_buffer_count_for_dedicated_slack == 0); |
| ZX_DEBUG_ASSERT(result.min_buffer_count_for_shared_slack == 0); |
| result.max_buffer_count = max_buffer_count_[port]; |
| |
| uint32_t per_packet_buffer_bytes_min; |
| uint32_t per_packet_buffer_bytes_max; |
| if (port == kInputPort) { |
| per_packet_buffer_bytes_min = kInputPerPacketBufferBytesMin; |
| per_packet_buffer_bytes_max = kInputPerPacketBufferBytesMax; |
| } else { |
| ZX_DEBUG_ASSERT(port == kOutputPort); |
| // NV12, based on min stride. |
| per_packet_buffer_bytes_min = min_stride_ * height_ * 3 / 2; |
| // At least for now, don't cap the per-packet buffer size for output. The |
| // HW only cares about the portion we set up for output anyway, and the |
| // client has no way to force output to occur into portions of the output |
| // buffer beyond what's implied by the max supported image dimensions. |
| per_packet_buffer_bytes_max = 0xFFFFFFFF; |
| } |
| |
| result.has_buffer_memory_constraints = true; |
| result.buffer_memory_constraints.min_size_bytes = per_packet_buffer_bytes_min; |
| result.buffer_memory_constraints.max_size_bytes = per_packet_buffer_bytes_max; |
| // amlogic requires physically contiguous on both input and output |
| result.buffer_memory_constraints.physically_contiguous_required = true; |
| result.buffer_memory_constraints.secure_required = IsPortSecureRequired(port); |
| result.buffer_memory_constraints.cpu_domain_supported = !IsPortSecureRequired(port); |
| result.buffer_memory_constraints.ram_domain_supported = |
| !IsPortSecureRequired(port) && (port == kOutputPort); |
| |
| if (IsPortSecurePermitted(port)) { |
| result.buffer_memory_constraints.inaccessible_domain_supported = true; |
| fuchsia::sysmem::HeapType secure_heap = (port == kInputPort) |
| ? fuchsia::sysmem::HeapType::AMLOGIC_SECURE_VDEC |
| : fuchsia::sysmem::HeapType::AMLOGIC_SECURE; |
| result.buffer_memory_constraints |
| .heap_permitted[result.buffer_memory_constraints.heap_permitted_count++] = secure_heap; |
| } |
| |
| if (!IsPortSecureRequired(port)) { |
| result.buffer_memory_constraints |
| .heap_permitted[result.buffer_memory_constraints.heap_permitted_count++] = |
| fuchsia::sysmem::HeapType::SYSTEM_RAM; |
| } |
| |
| if (port == kOutputPort) { |
| result.image_format_constraints_count = 1; |
| fuchsia::sysmem::ImageFormatConstraints& image_constraints = result.image_format_constraints[0]; |
| image_constraints.pixel_format.type = fuchsia::sysmem::PixelFormatType::NV12; |
| image_constraints.pixel_format.has_format_modifier = true; |
| image_constraints.pixel_format.format_modifier.value = fuchsia::sysmem::FORMAT_MODIFIER_LINEAR; |
| // TODO(fxbug.dev/13532): confirm that REC709 is always what we want here, or plumb |
| // actual YUV color space if it can ever be REC601_*. Since 2020 and 2100 |
| // are minimum 10 bits per Y sample and we're outputting NV12, 601 is the |
| // only other potential possibility here. |
| image_constraints.color_spaces_count = 1; |
| image_constraints.color_space[0].type = fuchsia::sysmem::ColorSpaceType::REC709; |
| |
| // The non-"required_" fields indicate the decoder's ability to potentially |
| // output frames at various dimensions as coded in the stream. Aside from |
| // the current stream being somewhere in these bounds, these have nothing to |
| // do with the current stream in particular. |
| image_constraints.min_coded_width = 16; |
| image_constraints.max_coded_width = 4096; |
| image_constraints.min_coded_height = 16; |
| // This intentionally isn't the _height_ of a 4096x2176 frame, it's |
| // intentionally the _width_ of a 4096x2176 frame assigned to |
| // max_coded_height. |
| // |
| // See max_coded_width_times_coded_height. We intentionally constrain the |
| // max dimension in width or height to the width of a 4096x2176 frame. |
| // While the HW might be able to go bigger than that as long as the other |
| // dimension is smaller to compensate, we don't really need to enable any |
| // larger than 4096x2176's width in either dimension, so we don't. |
| image_constraints.max_coded_height = 4096; |
| image_constraints.min_bytes_per_row = 16; |
| // no hard-coded max stride, at least for now |
| image_constraints.max_bytes_per_row = 0xFFFFFFFF; |
| image_constraints.max_coded_width_times_coded_height = 4096 * 2176; |
| image_constraints.layers = 1; |
| image_constraints.coded_width_divisor = 16; |
| image_constraints.coded_height_divisor = 16; |
| image_constraints.bytes_per_row_divisor = 16; |
| // TODO(dustingreen): Since this is a producer that will always produce at |
| // offset 0 of a physical page, we don't really care if this field is |
| // consistent with any constraints re. what the HW can do. |
| image_constraints.start_offset_divisor = 1; |
| // Odd display dimensions are permitted, but these don't imply odd NV12 |
| // dimensions - those are constrainted by coded_width_divisor and |
| // coded_height_divisor which are both 16. |
| image_constraints.display_width_divisor = 1; |
| image_constraints.display_height_divisor = 1; |
| |
| // The decoder is producing frames and the decoder has no choice but to |
| // produce frames at their coded size. The decoder wants to potentially be |
| // able to support a stream with dynamic resolution, potentially including |
| // dimensions both less than and greater than the dimensions that led to the |
| // current need to allocate a BufferCollection. For this reason, the |
| // required_ fields are set to the exact current dimensions, and the |
| // permitted (non-required_) fields is set to the full potential range that |
| // the decoder could potentially output. If an initiator wants to require a |
| // larger range of dimensions that includes the required range indicated |
| // here (via a-priori knowledge of the potential stream dimensions), an |
| // initiator is free to do so. |
| image_constraints.required_min_coded_width = width_; |
| image_constraints.required_max_coded_width = width_; |
| image_constraints.required_min_coded_height = height_; |
| image_constraints.required_max_coded_height = height_; |
| } else { |
| ZX_DEBUG_ASSERT(result.image_format_constraints_count == 0); |
| } |
| |
| // We don't have to fill out usage - CodecImpl takes care of that. |
| ZX_DEBUG_ASSERT(!result.usage.cpu); |
| ZX_DEBUG_ASSERT(!result.usage.display); |
| ZX_DEBUG_ASSERT(!result.usage.vulkan); |
| ZX_DEBUG_ASSERT(!result.usage.video); |
| |
| return result; |
| } |
| |
| void CodecAdapterH264::CoreCodecSetBufferCollectionInfo( |
| CodecPort port, const fuchsia::sysmem::BufferCollectionInfo_2& buffer_collection_info) { |
| ZX_DEBUG_ASSERT(buffer_collection_info.settings.buffer_settings.is_physically_contiguous); |
| if (port == kOutputPort) { |
| ZX_DEBUG_ASSERT(buffer_collection_info.settings.has_image_format_constraints); |
| ZX_DEBUG_ASSERT(buffer_collection_info.settings.image_format_constraints.pixel_format.type == |
| fuchsia::sysmem::PixelFormatType::NV12); |
| } |
| buffer_settings_[port].emplace(buffer_collection_info.settings); |
| ZX_DEBUG_ASSERT(IsPortSecure(port) || !IsPortSecureRequired(port)); |
| ZX_DEBUG_ASSERT(!IsPortSecure(port) || IsPortSecurePermitted(port)); |
| // TODO(dustingreen): Remove after secure video decode works e2e. |
| LOG(DEBUG, "CodecAdapterH264::CoreCodecSetBufferCollectionInfo() - IsPortSecure(): %u port: %u", |
| IsPortSecure(port), port); |
| } |
| |
| fuchsia::media::StreamOutputFormat CodecAdapterH264::CoreCodecGetOutputFormat( |
| uint64_t stream_lifetime_ordinal, uint64_t new_output_format_details_version_ordinal) { |
| fuchsia::media::StreamOutputFormat result; |
| result.set_stream_lifetime_ordinal(stream_lifetime_ordinal); |
| result.mutable_format_details()->set_format_details_version_ordinal( |
| new_output_format_details_version_ordinal); |
| |
| result.mutable_format_details()->set_mime_type("video/raw"); |
| |
| // For the moment, we'll memcpy to NV12 without any extra padding. |
| fuchsia::media::VideoUncompressedFormat video_uncompressed; |
| video_uncompressed.fourcc = make_fourcc('N', 'V', '1', '2'); |
| video_uncompressed.primary_width_pixels = width_; |
| video_uncompressed.primary_height_pixels = height_; |
| video_uncompressed.secondary_width_pixels = width_ / 2; |
| video_uncompressed.secondary_height_pixels = height_ / 2; |
| // TODO(dustingreen): remove this field from the VideoUncompressedFormat or |
| // specify separately for primary / secondary. |
| video_uncompressed.planar = true; |
| video_uncompressed.swizzled = false; |
| video_uncompressed.primary_line_stride_bytes = min_stride_; |
| video_uncompressed.secondary_line_stride_bytes = min_stride_; |
| video_uncompressed.primary_start_offset = 0; |
| video_uncompressed.secondary_start_offset = min_stride_ * height_; |
| video_uncompressed.tertiary_start_offset = min_stride_ * height_ + 1; |
| video_uncompressed.primary_pixel_stride = 1; |
| video_uncompressed.secondary_pixel_stride = 2; |
| video_uncompressed.primary_display_width_pixels = display_width_; |
| video_uncompressed.primary_display_height_pixels = display_height_; |
| video_uncompressed.has_pixel_aspect_ratio = has_sar_; |
| video_uncompressed.pixel_aspect_ratio_width = sar_width_; |
| video_uncompressed.pixel_aspect_ratio_height = sar_height_; |
| |
| video_uncompressed.image_format.pixel_format.type = fuchsia::sysmem::PixelFormatType::NV12; |
| video_uncompressed.image_format.coded_width = width_; |
| video_uncompressed.image_format.coded_height = height_; |
| video_uncompressed.image_format.bytes_per_row = min_stride_; |
| video_uncompressed.image_format.display_width = display_width_; |
| video_uncompressed.image_format.display_height = display_height_; |
| video_uncompressed.image_format.layers = 1; |
| video_uncompressed.image_format.color_space.type = fuchsia::sysmem::ColorSpaceType::REC709; |
| video_uncompressed.image_format.has_pixel_aspect_ratio = has_sar_; |
| video_uncompressed.image_format.pixel_aspect_ratio_width = sar_width_; |
| video_uncompressed.image_format.pixel_aspect_ratio_height = sar_height_; |
| |
| fuchsia::media::VideoFormat video_format; |
| video_format.set_uncompressed(std::move(video_uncompressed)); |
| |
| result.mutable_format_details()->mutable_domain()->set_video(std::move(video_format)); |
| |
| return result; |
| } |
| |
| void CodecAdapterH264::CoreCodecMidStreamOutputBufferReConfigPrepare() { |
| // For this adapter, the core codec just needs us to get new frame buffers |
| // set up, so nothing to do here. |
| // |
| // CoreCodecEnsureBuffersNotConfigured() will run soon. |
| } |
| |
| void CodecAdapterH264::CoreCodecMidStreamOutputBufferReConfigFinish() { |
| // Now that the client has configured output buffers, we need to hand those |
| // back to the core codec via InitializedFrames. |
| |
| std::vector<CodecFrame> frames; |
| uint32_t width; |
| uint32_t height; |
| uint32_t stride; |
| { // scope lock |
| std::lock_guard<std::mutex> lock(lock_); |
| // Now we need to populate the frames_out vector. |
| for (uint32_t i = 0; i < all_output_buffers_.size(); i++) { |
| ZX_DEBUG_ASSERT(all_output_buffers_[i]->index() == i); |
| frames.emplace_back(*all_output_buffers_[i]); |
| } |
| width = width_; |
| height = height_; |
| stride = min_stride_; |
| } // ~lock |
| { // scope lock |
| std::lock_guard<std::mutex> lock(*video_->video_decoder_lock()); |
| video_->video_decoder()->InitializedFrames(std::move(frames), width, height, stride); |
| } // ~lock |
| } |
| |
| void CodecAdapterH264::PostSerial(async_dispatcher_t* dispatcher, fit::closure to_run) { |
| zx_status_t post_result = async::PostTask(dispatcher, std::move(to_run)); |
| ZX_ASSERT_MSG(post_result == ZX_OK, "async::PostTask() failed - result: %d\n", post_result); |
| } |
| |
| void CodecAdapterH264::PostToInputProcessingThread(fit::closure to_run) { |
| PostSerial(input_processing_loop_.dispatcher(), std::move(to_run)); |
| } |
| |
| void CodecAdapterH264::QueueInputItem(CodecInputItem input_item) { |
| bool is_trigger_needed = false; |
| { // scope lock |
| std::lock_guard<std::mutex> lock(lock_); |
| // For now we don't worry about avoiding a trigger if we happen to queue |
| // when ProcessInput() has removed the last item but ProcessInput() is still |
| // running. |
| if (!is_process_input_queued_) { |
| is_trigger_needed = input_queue_.empty(); |
| is_process_input_queued_ = is_trigger_needed; |
| } |
| input_queue_.emplace_back(std::move(input_item)); |
| } // ~lock |
| if (is_trigger_needed) { |
| PostToInputProcessingThread(fit::bind_member(this, &CodecAdapterH264::ProcessInput)); |
| } |
| } |
| |
| CodecInputItem CodecAdapterH264::DequeueInputItem() { |
| { // scope lock |
| std::lock_guard<std::mutex> lock(lock_); |
| if (is_stream_failed_ || is_cancelling_input_processing_ || input_queue_.empty()) { |
| return CodecInputItem::Invalid(); |
| } |
| CodecInputItem to_ret = std::move(input_queue_.front()); |
| input_queue_.pop_front(); |
| return to_ret; |
| } // ~lock |
| } |
| |
| void CodecAdapterH264::ProcessInput() { |
| { // scope lock |
| std::lock_guard<std::mutex> lock(lock_); |
| is_process_input_queued_ = false; |
| } // ~lock |
| while (true) { |
| CodecInputItem item = DequeueInputItem(); |
| if (!item.is_valid()) { |
| return; |
| } |
| |
| if (item.is_format_details()) { |
| // TODO(dustingreen): Be more strict about what the input format actually |
| // is, and less strict about it matching the initial format. |
| ZX_ASSERT(fidl::Equals(item.format_details(), initial_input_format_details_)); |
| |
| latest_input_format_details_ = fidl::Clone(item.format_details()); |
| |
| // Even if the new item.format_details() are the same as |
| // initial_input_format_details_, this CodecAdapter doesn't notice any |
| // in-band SPS/PPS info, so the new oob_bytes still need to be |
| // (converted and) re-delivered to the core codec in case any in-band |
| // SPS/PPS changes have been seen by the core codec since the previous |
| // time. |
| // |
| // Or maybe we have no oob_bytes in which case this is irrelevant |
| // but harmless. |
| // |
| // Or maybe the oob_bytes changed. Either way, the core codec will |
| // want that info, but in-band. We delay sending the info to the core |
| // codec until we see the first input data, to more consistently handle |
| // the oob_bytes that we get initially during Codec creation. |
| is_input_format_details_pending_ = true; |
| continue; |
| } |
| |
| if (item.is_end_of_stream()) { |
| video_->pts_manager()->SetEndOfStreamOffset(parsed_video_size_); |
| if (!ParseVideoAnnexB(nullptr, &new_stream_h264[0], new_stream_h264_len)) { |
| // This can happen when switching streams. |
| LOG(DEBUG, "!ParseVideoAnnexB(new_stream_h264)"); |
| return; |
| } |
| auto bytes = std::make_unique<uint8_t[]>(kFlushThroughBytes); |
| memset(bytes.get(), 0, kFlushThroughBytes); |
| if (!ParseVideoAnnexB(nullptr, bytes.get(), kFlushThroughBytes)) { |
| // This can happen when switching streams. |
| LOG(DEBUG, "!ParseVideoAnnexB(kFlushThroughBytes)"); |
| return; |
| } |
| continue; |
| } |
| |
| ZX_DEBUG_ASSERT(item.is_packet()); |
| auto return_input_packet = |
| fit::defer([this, &item] { events_->onCoreCodecInputPacketDone(item.packet()); }); |
| |
| if (is_input_format_details_pending_) { |
| is_input_format_details_pending_ = false; |
| if (!ParseAndDeliverCodecOobBytes()) { |
| return; |
| } |
| } |
| |
| uint8_t* data = item.packet()->buffer()->base() + item.packet()->start_offset(); |
| uint32_t len = item.packet()->valid_length_bytes(); |
| |
| video_->pts_manager()->InsertPts(parsed_video_size_, item.packet()->has_timestamp_ish(), |
| item.packet()->timestamp_ish()); |
| |
| // This call is the main reason the current thread exists, as this call can |
| // wait synchronously until there are empty output frames available to |
| // decode into, which can require the shared_fidl_thread() to get those free |
| // frames to the Codec server. |
| // |
| // TODO(dustingreen): This call could be split into a start and complete. |
| // |
| // TODO(dustingreen): The current wait duration within ParseVideo() assumes |
| // that free output frames will become free on an ongoing basis, which isn't |
| // really what'll happen when video output is paused. |
| if (!ParseVideo(item.packet()->buffer(), data, len)) { |
| return; |
| } |
| |
| // At this point CodecInputItem is holding a packet pointer which may get |
| // re-used in a new CodecInputItem, but that's ok since CodecInputItem is |
| // going away here. |
| // |
| // ~return_input_packet, ~item |
| } |
| } |
| |
| bool CodecAdapterH264::ParseAndDeliverCodecOobBytes() { |
| // Our latest oob_bytes may contain SPS/PPS info. If we have any |
| // such info, the core codec needs it (possibly converted first). |
| |
| // If there's no OOB info, then there's nothing to do, as all such info will |
| // be in-band in normal packet-based AnnexB NALs (including start codes and |
| // start code emulation prevention bytes). |
| if (!latest_input_format_details_.has_oob_bytes() || |
| latest_input_format_details_.oob_bytes().empty()) { |
| // success |
| return true; |
| } |
| |
| const std::vector<uint8_t>* oob = &latest_input_format_details_.oob_bytes(); |
| |
| // We need to deliver Annex B style SPS/PPS to this core codec, regardless of |
| // what format the oob_bytes is in. |
| |
| // The oob_bytes can be in two different forms, which can be detected by |
| // the value of the first byte: |
| // |
| // 0 - Annex B form already. The 0 is the first byte of a start code. |
| // 1 - AVCC form, which we'll convert to Annex B form. AVCC version 1. There |
| // is no AVCC version 0. |
| // anything else - fail. |
| // |
| // In addition, we need to know if AVCC or not since we need to know whether |
| // to add start code emulation prevention bytes or not. And if it's AVCC, |
| // how many bytes long the pseudo_nal_length field is - that field is before |
| // each input NAL. |
| |
| // We already checked empty() above. |
| ZX_DEBUG_ASSERT(oob->size() >= 1); |
| switch ((*oob)[0]) { |
| case 0: |
| is_avcc_ = false; |
| // This ParseVideo() consumes AnnexB oob data directly. We don't |
| // presently check if the oob data has only SPS/PPS. This data is just |
| // logically pre-pended to the stream. |
| if (!ParseVideo(nullptr, oob->data(), oob->size())) { |
| return false; |
| } |
| return true; |
| case 1: { |
| // This applies to both the oob data and the input packet payload data. |
| // Both are AVCC, or both are AnnexB. |
| is_avcc_ = true; |
| |
| /* |
| AVCC OOB data layout (bits): |
| [0] (8) - version 1 |
| [1] (8) - h264 profile # |
| [2] (8) - compatible profile bits |
| [3] (8) - h264 level (eg. 31 == "3.1") |
| [4] (6) - reserved, can be set to all 1s |
| (2) - pseudo_nal_length_field_bytes_ - 1 |
| [5] (3) - reserved, can be set to all 1s |
| (5) - sps_count |
| (16) - sps_bytes |
| (8*sps_bytes) - SPS nal_unit_type (that byte) + SPS data as RBSP. |
| (8) - pps_count |
| (16) - pps_bytes |
| (8*pps_bytes) - PPS nal_unit_type (that byte) + PPS data as RBSP. |
| */ |
| |
| // We accept 0 SPS and/or 0 PPS, but typically there's one of each. At |
| // minimum the oob buffer needs to be large enough to contain both the |
| // sps_count and pps_count fields, which is a min of 7 bytes. |
| if (oob->size() < 7) { |
| LOG(ERROR, "oob->size() < 7"); |
| ; |
| OnCoreCodecFailStream(fuchsia::media::StreamError::INVALID_INPUT_FORMAT_DETAILS); |
| return false; |
| } |
| uint32_t stashed_pseudo_nal_length_bytes = ((*oob)[4] & 0x3) + 1; |
| // Temporarily, the pseudo_nal_length_field_bytes_ is 2 so we can |
| // ParseVideo() directly out of "oob". |
| pseudo_nal_length_field_bytes_ = 2; |
| uint32_t sps_count = (*oob)[5] & 0x1F; |
| uint32_t offset = 6; |
| for (uint32_t i = 0; i < sps_count; ++i) { |
| if (offset + 2 > oob->size()) { |
| LOG(ERROR, "offset + 2 > oob->size()"); |
| ; |
| OnCoreCodecFailStream(fuchsia::media::StreamError::INVALID_INPUT_FORMAT_DETAILS); |
| return false; |
| } |
| uint32_t sps_length = (*oob)[offset] * 256 + (*oob)[offset + 1]; |
| if (offset + 2 + sps_length > oob->size()) { |
| LOG(ERROR, "offset + 2 + sps_length > oob->size()"); |
| OnCoreCodecFailStream(fuchsia::media::StreamError::INVALID_INPUT_FORMAT_DETAILS); |
| return false; |
| } |
| if (!ParseVideo(nullptr, &oob->data()[offset], 2 + sps_length)) { |
| return false; |
| } |
| offset += 2 + sps_length; |
| } |
| if (offset + 1 > oob->size()) { |
| LOG(ERROR, "offset + 1 > oob->size()"); |
| OnCoreCodecFailStream(fuchsia::media::StreamError::INVALID_INPUT_FORMAT_DETAILS); |
| return false; |
| } |
| uint32_t pps_count = (*oob)[offset++]; |
| for (uint32_t i = 0; i < pps_count; ++i) { |
| if (offset + 2 > oob->size()) { |
| LOG(ERROR, "offset + 2 > oob->size()"); |
| OnCoreCodecFailStream(fuchsia::media::StreamError::INVALID_INPUT_FORMAT_DETAILS); |
| return false; |
| } |
| uint32_t pps_length = (*oob)[offset] * 256 + (*oob)[offset + 1]; |
| if (offset + 2 + pps_length > oob->size()) { |
| LOG(ERROR, "offset + 2 + pps_length > oob->size()"); |
| OnCoreCodecFailStream(fuchsia::media::StreamError::INVALID_INPUT_FORMAT_DETAILS); |
| return false; |
| } |
| if (!ParseVideo(nullptr, &oob->data()[offset], 2 + pps_length)) { |
| return false; |
| } |
| offset += 2 + pps_length; |
| } |
| // All pseudo-NALs in input packet payloads will use the |
| // parsed count of bytes of the length field. |
| pseudo_nal_length_field_bytes_ = stashed_pseudo_nal_length_bytes; |
| return true; |
| } |
| default: |
| LOG(ERROR, "unexpected first oob byte"); |
| OnCoreCodecFailStream(fuchsia::media::StreamError::INVALID_INPUT_FORMAT_DETAILS); |
| return false; |
| } |
| } |
| |
| bool CodecAdapterH264::ParseVideo(const CodecBuffer* buffer, const uint8_t* data, uint32_t length) { |
| if (is_avcc_) { |
| ZX_DEBUG_ASSERT(!buffer); |
| return ParseVideoAvcc(data, length); |
| } else { |
| return ParseVideoAnnexB(buffer, data, length); |
| } |
| } |
| |
| bool CodecAdapterH264::ParseVideoAvcc(const uint8_t* data, uint32_t length) { |
| // We don't necessarily know that is_avcc_ is true on entry to this method. |
| // We use this method to send the decoder a bunch of 0x00 sometimes, which |
| // will call this method regardless of is_avcc_ or not. |
| |
| // So far, the "avcC"/"AVCC" we've seen has emulation prevention bytes on it |
| // already. So we don't add those here. But if we did need to add them, we'd |
| // add them here. |
| |
| // For now we assume the heap is pretty fast and doesn't mind the size thrash, |
| // but maybe we'll want to keep a buffer around (we'll optimize only if/when |
| // we determine this is actually a problem). We only actually use this buffer |
| // if is_avcc_ (which is not uncommon). |
| |
| // We do parse more than one pseudo_nal per input packet. |
| // |
| // No splitting NALs across input packets, for now. |
| // |
| // TODO(dustingreen): Allow splitting NALs across input packets (not a small |
| // change). Probably also move into a source_set for sharing with other |
| // CodecAdapter(s). |
| |
| // Count the input pseudo_nal(s) |
| uint32_t pseudo_nal_count = 0; |
| uint32_t i = 0; |
| while (i < length) { |
| if (i + pseudo_nal_length_field_bytes_ > length) { |
| LOG(ERROR, "i + pseudo_nal_length_field_bytes_ > length"); |
| OnCoreCodecFailStream(fuchsia::media::StreamError::DECODER_UNKNOWN); |
| return false; |
| } |
| // Read pseudo_nal_length field, which is a field which can be 1-4 bytes |
| // long because AVCC/avcC. |
| uint32_t pseudo_nal_length = 0; |
| for (uint32_t length_byte = 0; length_byte < pseudo_nal_length_field_bytes_; ++length_byte) { |
| pseudo_nal_length = pseudo_nal_length * 256 + data[i + length_byte]; |
| } |
| i += pseudo_nal_length_field_bytes_; |
| if (i + pseudo_nal_length > length) { |
| LOG(ERROR, "i + pseudo_nal_length > length"); |
| OnCoreCodecFailStream(fuchsia::media::StreamError::DECODER_UNKNOWN); |
| return false; |
| } |
| i += pseudo_nal_length; |
| ++pseudo_nal_count; |
| } |
| |
| static constexpr uint32_t kStartCodeBytes = 4; |
| uint32_t local_length = length - pseudo_nal_count * pseudo_nal_length_field_bytes_ + |
| pseudo_nal_count * kStartCodeBytes; |
| auto local_buffer = std::make_unique<uint8_t[]>(local_length); |
| uint8_t* local_data = local_buffer.get(); |
| |
| i = 0; |
| uint32_t o = 0; |
| while (i < length) { |
| if (i + pseudo_nal_length_field_bytes_ > length) { |
| LOG(ERROR, "i + pseudo_nal_length_field_bytes_ > length"); |
| OnCoreCodecFailStream(fuchsia::media::StreamError::DECODER_UNKNOWN); |
| return false; |
| } |
| uint32_t pseudo_nal_length = 0; |
| for (uint32_t length_byte = 0; length_byte < pseudo_nal_length_field_bytes_; ++length_byte) { |
| pseudo_nal_length = pseudo_nal_length * 256 + data[i + length_byte]; |
| } |
| i += pseudo_nal_length_field_bytes_; |
| if (i + pseudo_nal_length > length) { |
| LOG(ERROR, "i + pseudo_nal_length > length"); |
| OnCoreCodecFailStream(fuchsia::media::StreamError::DECODER_UNKNOWN); |
| return false; |
| } |
| |
| local_data[o++] = 0; |
| local_data[o++] = 0; |
| local_data[o++] = 0; |
| local_data[o++] = 1; |
| |
| memcpy(&local_data[o], &data[i], pseudo_nal_length); |
| o += pseudo_nal_length; |
| i += pseudo_nal_length; |
| } |
| ZX_DEBUG_ASSERT(o == local_length); |
| ZX_DEBUG_ASSERT(i == length); |
| |
| return ParseVideoAnnexB(nullptr, local_data, local_length); |
| } |
| |
| bool CodecAdapterH264::ParseVideoAnnexB(const CodecBuffer* buffer, const uint8_t* data, |
| uint32_t length) { |
| // We don't need to check is_cancelling_input_processing_ here, because we |
| // check further down before waiting (see comment there re. why the check |
| // there after video_->ParseVideo() is important), and because returning false |
| // from this method for the first time will prevent further calls to this |
| // method thanks to propagation of false returns under ProcessInput() and a |
| // check of is_cancelling_input_processing_ in DequeueInputItem() relevant to |
| // any subsequent ProcessInput() while we're still stopping. So checking here |
| // would only be redundant. |
| |
| // Parse AnnexB data, with start codes and start code emulation prevention |
| // bytes present. |
| // |
| // The data won't be modified by ParseVideo() or ParseVideoPhysical(). |
| zx_status_t status; |
| if (buffer) { |
| // CodecImpl will Pin() the buffer if the CodecAdapter is HW-based and |
| // provides a BTI; CodecAdapterH264 does. |
| ZX_DEBUG_ASSERT(buffer->is_pinned()); |
| // Convert data from vaddr to paddr. All the input buffers are pinned |
| // continuously. |
| zx_paddr_t data_paddr = buffer->physical_base() + (data - buffer->base()); |
| status = video_->parser()->ParseVideoPhysical(data_paddr, length); |
| if (status != ZX_OK) { |
| LOG(ERROR, "ParseVideoPhysical() failed - status: %d", status); |
| OnCoreCodecFailStream(fuchsia::media::StreamError::DECODER_UNKNOWN); |
| return false; |
| } |
| } else { |
| status = video_->parser()->ParseVideo(static_cast<void*>(const_cast<uint8_t*>(data)), length); |
| if (status != ZX_OK) { |
| LOG(ERROR, "ParseVideo() failed - status: %d", status); |
| OnCoreCodecFailStream(fuchsia::media::StreamError::DECODER_UNKNOWN); |
| return false; |
| } |
| } |
| parsed_video_size_ += length; |
| |
| // Once we're cancelling, we're cancelling until we're done stopping. This |
| // snap of is_cancelling_input_processing_ either notices the transition to |
| // cancelling or doesn't, but doesn't have to worry about |
| // is_cancelling_input_processing_ becoming false again too soon because that |
| // doesn't happen until after this method has returned. |
| // |
| // If is_cancelling does notice is_cancelling_input_processing_ true: |
| // |
| // It's important that we snap after calling video_->ParseVideo() above so |
| // that this check occurs after parser_running_ becomes true, in case |
| // is_cancelling_input_processing_ became true and TryStartCancelParsing() ran |
| // before parser_running_ became true. In that case TryStartCancelParsing() |
| // did nothing - this cancelation check avoids calling |
| // WaitForParsingCompleted() at all in that case, which avoids waiting for 10 |
| // seconds. |
| // |
| // If is_cancelling doesn't notice is_cancelling_input_processing_ true: |
| // |
| // If on the other hand we miss is_cancelling_input_processing_ changing to |
| // true, then that means TryStartCancelParsing() will take care of canceling |
| // WaitForParsingCompleted(), which avoids waiting for 10 seconds. |
| bool is_cancelling; |
| { // scope lock |
| std::unique_lock<std::mutex> lock(lock_); |
| is_cancelling = is_cancelling_input_processing_; |
| } // ~lock |
| |
| if (is_cancelling || ZX_OK != (status = video_->parser()->WaitForParsingCompleted(ZX_SEC(10)))) { |
| DLOG("is_cancelling: %u status: %d", is_cancelling, status); |
| video_->parser()->CancelParsing(); |
| if (is_cancelling || status == ZX_ERR_CANCELED) { |
| LOG(DEBUG, "Parsing was cancelled - is_cancelling: %d status: %d", is_cancelling, status); |
| // Don't fail the current stream in this case. The current stream is already obsolete. While |
| // CodecImpl will tolerate this without causing the codec to fail or an extraneous |
| // OnStreamFailed(), it's better for the core codec to not fail a stream that's being stopped |
| // via CoreCodecStopStream(). |
| return false; |
| } |
| ZX_DEBUG_ASSERT(!is_cancelling && status != ZX_ERR_CANCELED); |
| LOG(ERROR, "WaitForParsingCompleted() failed - status: %d", status); |
| OnCoreCodecFailStream(fuchsia::media::StreamError::DECODER_UNKNOWN); |
| return false; |
| } |
| return true; |
| } |
| |
| zx_status_t CodecAdapterH264::InitializeFrames(uint32_t min_frame_count, uint32_t max_frame_count, |
| uint32_t width, uint32_t height, uint32_t stride, |
| uint32_t display_width, uint32_t display_height, |
| bool has_sar, uint32_t sar_width, |
| uint32_t sar_height) { |
| // First handle the special case of EndOfStream marker showing up at the output. |
| if (display_width == kEndOfStreamWidth && display_height == kEndOfStreamHeight) { |
| bool is_output_end_of_stream = false; |
| { // scope lock |
| std::lock_guard<std::mutex> lock(lock_); |
| if (is_input_end_of_stream_queued_) { |
| is_output_end_of_stream = true; |
| } |
| } // ~lock |
| if (is_output_end_of_stream) { |
| events_->onCoreCodecOutputEndOfStream(false); |
| return ZX_ERR_STOP; |
| } |
| } |
| |
| // This is called on a core codec thread, ordered with respect to emitted |
| // output frames. This method needs to block until either: |
| // * Format details have been delivered to the Codec client and the Codec |
| // client has configured corresponding output buffers. |
| // * The client has moved on by closing the current stream, in which case |
| // this method needs to fail quickly so the core codec can be stopped. |
| // |
| // The video_decoder_lock_ is held during this method. We don't release the |
| // video_decoder_lock_ while waiting for the client, because we want close of |
| // the current stream to wait for this method to return before starting the |
| // portion of stream close protected by video_decoder_lock_. |
| // |
| // The signalling to un-block this thread uses lock_. |
| // |
| // TODO(dustingreen): It can happen that the current set of buffers is already |
| // suitable for use under the new buffer constraints. However, some of the |
| // buffers can still be populated with data and used by other parts of the |
| // system, so to re-use buffers, we'll need a way to communicate which buffers |
| // are not presently available to decode into, even for what h264_decoder.cc |
| // sees as a totally new set of buffers. The h264_decoder.cc doesn't seem to |
| // separate configuration of a buffer from marking that buffer ready to fill. |
| // It seems like "new" buffers are immediately ready to fill. At the moment, |
| // the AmlogicVideo code doesn't appear to show any way to tell the HW which |
| // frames are presently still in use (not yet available to decode into), |
| // during InitializeStream(). Maybe delaying configuring of a canvas would |
| // work, but in that case would the delayed configuring adversely impact |
| // decoding performance consistency? If we can do this, detect when we can, |
| // and call onCoreCodecMidStreamOutputConstraintsChange() but pass false |
| // instead of true, and don't expect a response or block in here. Still have |
| // to return the vector of buffers, and will need to indicate which are |
| // actually available to decode into. The rest will get indicated via |
| // CoreCodecRecycleOutputPacket(), despite not necessarily getting signalled |
| // to the HW by H264Decoder::ReturnFrame further down. For now, we always |
| // re-allocate buffers. Old buffers still active elsewhere in the system can |
| // continue to be referenced by those parts of the system - the important |
| // thing for now is we avoid overwriting the content of those buffers by using |
| // an entirely new set of buffers for each stream for now. |
| |
| // First stash some format and buffer count info needed to initialize frames |
| // before triggering mid-stream format change. Later, frames satisfying these |
| // stashed parameters will be handed to the decoder via InitializedFrames(), |
| // unless CoreCodecStopStream() happens first. |
| { // scope lock |
| std::lock_guard<std::mutex> lock(lock_); |
| |
| min_buffer_count_[kOutputPort] = min_frame_count; |
| max_buffer_count_[kOutputPort] = max_frame_count; |
| width_ = width; |
| height_ = height; |
| min_stride_ = stride; |
| display_width_ = display_width; |
| display_height_ = display_height; |
| has_sar_ = has_sar; |
| sar_width_ = sar_width; |
| sar_height_ = sar_height; |
| } // ~lock |
| |
| // This will snap the current stream_lifetime_ordinal_, and call |
| // CoreCodecMidStreamOutputBufferReConfigPrepare() and |
| // CoreCodecMidStreamOutputBufferReConfigFinish() from the StreamControl |
| // thread, _iff_ the client hasn't already moved on to a new stream by then. |
| events_->onCoreCodecMidStreamOutputConstraintsChange(true); |
| |
| return ZX_OK; |
| } |
| |
| void CodecAdapterH264::OnCoreCodecFailStream(fuchsia::media::StreamError error) { |
| { // scope lock |
| std::lock_guard<std::mutex> lock(lock_); |
| is_stream_failed_ = true; |
| } |
| LOG(ERROR, "calling events_->onCoreCodecFailStream()"); |
| events_->onCoreCodecFailStream(error); |
| } |
| |
| CodecPacket* CodecAdapterH264::GetFreePacket() { |
| std::lock_guard<std::mutex> lock(lock_); |
| // The h264 decoder won't repeatedly output a buffer multiple times |
| // concurrently, so a free buffer (for which the caller needs a packet) |
| // implies a free packet. |
| ZX_DEBUG_ASSERT(!free_output_packets_.empty()); |
| uint32_t free_index = free_output_packets_.back(); |
| free_output_packets_.pop_back(); |
| return all_output_packets_[free_index]; |
| } |
| |
| bool CodecAdapterH264::IsPortSecureRequired(CodecPort port) { |
| return secure_memory_mode_[port] == fuchsia::mediacodec::SecureMemoryMode::ON; |
| } |
| |
| bool CodecAdapterH264::IsPortSecurePermitted(CodecPort port) { |
| return secure_memory_mode_[port] != fuchsia::mediacodec::SecureMemoryMode::OFF; |
| } |
| |
| bool CodecAdapterH264::IsPortSecure(CodecPort port) { |
| ZX_DEBUG_ASSERT(buffer_settings_[port]); |
| return buffer_settings_[port]->buffer_settings.is_secure; |
| } |
| |
| bool CodecAdapterH264::IsOutputSecure() { |
| // We need to know whether output is secure or not before we start accepting input, which means |
| // we need to know before output buffers are allocated, which means we can't rely on the result |
| // of sysmem BufferCollection allocation is_secure for output. |
| ZX_DEBUG_ASSERT(IsPortSecurePermitted(kOutputPort) == IsPortSecureRequired(kOutputPort)); |
| return IsPortSecureRequired(kOutputPort); |
| } |
| |
| } // namespace amlogic_decoder |