blob: 33e8d85cc2129f9ca40d28ff9cbba70baf92c0ab [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "use_video_decoder.h"
#include <inttypes.h>
#include <lib/async-loop/cpp/loop.h>
#include <lib/async-loop/default.h>
#include <lib/fdio/directory.h>
#include <lib/fidl/cpp/clone.h>
#include <lib/fit/defer.h>
#include <lib/media/codec_impl/fourcc.h>
#include <lib/media/test/codec_client.h>
#include <lib/media/test/frame_sink.h>
#include <lib/media/test/one_shot_event.h>
#include <lib/syslog/cpp/macros.h>
#include <stdint.h>
#include <string.h>
#include <zircon/time.h>
#include <optional>
#include <thread>
#include <vector>
#include <fbl/algorithm.h>
#include <src/media/lib/raw_video_writer/raw_video_writer.h>
#include <tee-client-api/tee-client-types.h>
#include "in_stream_peeker.h"
#include "input_copier.h"
#include "lib/zx/time.h"
#include "util.h"
namespace {
// Most cases secure output can't be read to be verified, but under some testing
// circumstances it can be possible.
constexpr bool kVerifySecureOutput = false;
// Queue SPS and PPS separately from the subsequent picture slice.
constexpr bool kH264SeparateSpsPps = true;
// Force some splitting of frames across packet boundaries. The remainder of the frame data will go
// in subsequent packets.
constexpr size_t kMaxFrameBytesPerPacket = 4ul * 1024;
constexpr zx::duration kInStreamDeadlineDuration = zx::sec(30);
// This example only has one stream_lifetime_ordinal which is 1.
//
// TODO(dustingreen): actually re-use the Codec instance for at least one more
// stream, even if it's just to decode the same data again.
constexpr uint64_t kStreamLifetimeOrdinal = 1;
// Scenic ImagePipe doesn't allow image_id 0, so start here.
constexpr uint32_t kFirstValidImageId = 1;
constexpr uint8_t kLongStartCodeArray[] = {0x00, 0x00, 0x00, 0x01};
constexpr uint8_t kShortStartCodeArray[] = {0x00, 0x00, 0x01};
// For now, we need to ensure we can process frames up to this large for h264. This doesn't add in
// the 128 * 1024 that we reserve for SEI/SPS/PPS that can be in the same buffer as a frame.
constexpr uint32_t kInputLargeFrameSizeH264 = 1920 * 1080 * 3 / 2 / 2;
constexpr uint32_t kInputReservedForBigHeadersSizeH264 = 128 * 1024;
constexpr uint32_t kInputMinBufferSizeH264 =
kInputLargeFrameSizeH264 + kInputReservedForBigHeadersSizeH264;
// We need to ensure we can process frames/superframes up to this large for VP9. This is the size
// Chromium currently sets for input buffers (for both h264 and vp9).
constexpr uint32_t kInputLargeFrameSizeVp9 = 1920 * 1080 * 3 / 2 / 2 + 128 * 1024;
constexpr uint32_t kInputMinBufferSizeVp9 = kInputLargeFrameSizeVp9;
constexpr uint32_t kInputMinBufferSize = std::max(kInputMinBufferSizeH264, kInputMinBufferSizeVp9);
// If readable_bytes is 0, that's considered a "start code", to allow the caller
// to terminate a NAL the same way regardless of whether another start code is
// found or the end of the buffer is found.
//
// ptr has readable_bytes of data - the function only evaluates whether there is
// a start code at the beginning of the data at ptr.
//
// readable_bytes - the caller indicates how many bytes are readable starting at
// ptr.
//
// *start_code_size_bytes will have length of the start code in bytes when the
// function returns true - unchanged otherwise. Normally this would be 3 or 4,
// but a 0 is possible if readable_bytes is 0.
bool is_start_code(uint8_t* ptr, size_t readable_bytes, size_t* start_code_size_bytes_out) {
if (readable_bytes == 0) {
*start_code_size_bytes_out = 0;
return true;
}
if (readable_bytes >= 4) {
if (!memcmp(ptr, kLongStartCodeArray, sizeof(kLongStartCodeArray))) {
*start_code_size_bytes_out = 4;
return true;
}
}
if (readable_bytes >= 3) {
if (!memcmp(ptr, kShortStartCodeArray, sizeof(kShortStartCodeArray))) {
*start_code_size_bytes_out = 3;
return true;
}
}
return false;
}
// Test-only. Not for production use. Caller must ensure there are at least 5
// bytes at nal_unit.
uint8_t GetNalUnitType(const uint8_t* nal_unit) {
// Also works with 4-byte startcodes.
static const uint8_t start_code[3] = {0, 0, 1};
uint8_t* next_start = static_cast<uint8_t*>(memmem(nal_unit, 5, start_code, sizeof(start_code))) +
sizeof(start_code);
return *next_start & 0xf;
}
struct __attribute__((__packed__)) IvfHeader {
uint32_t signature;
uint16_t version;
uint16_t header_length;
uint32_t fourcc;
uint16_t width;
uint16_t height;
uint32_t frame_rate;
uint32_t time_scale;
uint32_t frame_count;
uint32_t unused;
};
struct __attribute__((__packed__)) IvfFrameHeader {
uint32_t size_bytes;
uint64_t presentation_timestamp;
};
enum class Format {
kH264,
// This uses the multi-instance h.264 decoder.
kH264Multi,
kVp9,
};
const uint8_t kSliceNalUnitTypes[] = {1, 2, 3, 4, 5, 19, 20, 21};
bool IsSliceNalUnitType(uint8_t nal_unit_type) {
for (uint8_t type : kSliceNalUnitTypes) {
if (type == nal_unit_type) {
return true;
}
}
return false;
}
class VideoDecoderRunner {
public:
VideoDecoderRunner(Format format, UseVideoDecoderParams params);
void Run();
private:
uint64_t QueueH264Frames(uint64_t stream_lifetime_ordinal, uint64_t input_pts_counter_start);
uint64_t QueueVp9Frames(uint64_t stream_lifetime_ordinal, uint64_t input_pts_counter_start);
Format format_;
UseVideoDecoderParams params_;
std::optional<CodecClient> codec_client_;
// For testing purposes, we share some info from output to input. Normally this sort of sharing
// woudln't tend to happen.
//
// Unlike the usual situation with video decoders, in this test, input PTS values are sequential,
// while output PTS values are impacted by frame reordering.
//
// For h264, the degree of reordering is supposed to be bounded by max_num_reorder_frames. This
// means that a given frame can be delayed by up to max_num_reorder_frames, requiring that many
// additional frames on input before the delayed frame's PTS is seen on output.
//
// This value is only ever written by the output thread. It is read by both the output thread
// and the input thread.
std::atomic<int64_t> max_output_pts_seen_{-1};
};
VideoDecoderRunner::VideoDecoderRunner(Format format, UseVideoDecoderParams params)
: format_(std::move(format)), params_(std::move(params)) {}
// Payload data for bear.h264 is 00 00 00 01 start code before each NAL, with
// SPS / PPS NALs and also frame NALs. We deliver to Codec NAL-by-NAL without
// the start code.
//
// Since the .h264 file has SPS + PPS NALs in addition to frame NALs, we don't
// use oob_bytes for this stream.
//
// TODO(dustingreen): Determine for .mp4 or similar which don't have SPS / PPS
// in band whether .mp4 provides ongoing OOB data, or just at the start, and
// document in codec.fidl how that's to be handled.
//
// Returns how many input packets queued with a PTS.
uint64_t VideoDecoderRunner::QueueH264Frames(uint64_t stream_lifetime_ordinal,
uint64_t input_pts_counter_start) {
// Raw .h264 has start code 00 00 01 or 00 00 00 01 before each NAL, and
// the start codes don't alias in the middle of NALs, so we just scan
// for NALs and send them in to the decoder.
uint64_t input_pts_counter = input_pts_counter_start;
uint64_t frame_count = 0;
std::vector<uint8_t> accumulator;
auto queue_access_unit = [this, stream_lifetime_ordinal, &input_pts_counter, &frame_count,
&accumulator](uint8_t* bytes, size_t byte_count) -> bool {
auto tvp = params_.input_copier;
size_t start_code_size_bytes = 0;
ZX_ASSERT(is_start_code(bytes, byte_count, &start_code_size_bytes));
ZX_ASSERT(start_code_size_bytes < byte_count);
uint8_t nal_unit_type = bytes[start_code_size_bytes] & 0x1f;
size_t insert_offset = accumulator.size();
size_t new_size = insert_offset + byte_count;
if (accumulator.capacity() < new_size) {
size_t new_capacity = std::max(accumulator.capacity() * 2, new_size);
accumulator.reserve(new_capacity);
}
accumulator.resize(insert_offset + byte_count);
// Zero pad first few frames a lot to verify large frames can decode.
if (IsSliceNalUnitType(nal_unit_type) && frame_count < 5) {
ZX_DEBUG_ASSERT(byte_count < kInputLargeFrameSizeH264);
uint32_t zero_padding_bytes = kInputLargeFrameSizeH264 - byte_count;
accumulator.resize(accumulator.size() + zero_padding_bytes);
insert_offset += zero_padding_bytes;
}
memcpy(accumulator.data() + insert_offset, bytes, byte_count);
if (!kH264SeparateSpsPps && !IsSliceNalUnitType(nal_unit_type)) {
return true;
}
auto orig_bytes = bytes;
bytes = accumulator.data();
byte_count = accumulator.size();
auto clear_accumulator = fit::defer([&accumulator] { accumulator.clear(); });
size_t bytes_so_far = 0;
// printf("queuing offset: %ld byte_count: %zu\n", bytes -
// input_bytes.get(), byte_count);
while (bytes_so_far != byte_count) {
VLOGF("BlockingGetFreeInputPacket()...");
std::unique_ptr<fuchsia::media::Packet> packet = codec_client_->BlockingGetFreeInputPacket();
if (!packet) {
return false;
}
VLOGF("BlockingGetFreeInputPacket() done");
if (!packet->has_header()) {
Exit("broken server sent packet without header");
}
if (!packet->header().has_packet_index()) {
Exit("broken server sent packet without packet index");
}
// For input we do buffer_index == packet_index.
const CodecBuffer& buffer = codec_client_->BlockingGetFreeInputBufferForPacket(packet.get());
ZX_ASSERT(packet->buffer_index() == buffer.buffer_index());
uint32_t padding_length = tvp ? tvp->PaddingLength() : 0;
size_t bytes_to_copy =
std::min(byte_count - bytes_so_far, buffer.size_bytes() - padding_length);
// Force some frames to split across packet boundary.
//
// TODO(fxbug.dev/13483): Also cover more than one frame in a packet, and split headers.
//
// TODO(fxbug.dev/13483): Enable testing frames split across packets once SW decode can do
// that, or have this be gated on whether capability was requested of decoder and try
// requesting this capability then fall back to not this capability.
(void)kMaxFrameBytesPerPacket;
// bytes_to_copy = std::min(bytes_to_copy, kMaxFrameBytesPerPacket);
packet->set_stream_lifetime_ordinal(stream_lifetime_ordinal);
packet->set_start_offset(0);
packet->set_valid_length_bytes(bytes_to_copy);
if (bytes_so_far == 0) {
uint8_t nal_unit_type = GetNalUnitType(orig_bytes);
if (IsSliceNalUnitType(nal_unit_type)) {
constexpr zx::duration kComplainInterval = zx::sec(5);
zx::time complain_time = zx::clock::get_monotonic() + kComplainInterval;
// Wait until max_output_pts_seen_ increases to within the threshold, or time out while
// complaining every 5 seconds.
while (static_cast<int64_t>(input_pts_counter) >
max_output_pts_seen_ + 1 + params_.test_params->max_num_reorder_frames_threshold) {
zx::time now = zx::clock::get_monotonic();
if (now >= complain_time) {
fprintf(stderr,
"max_num_reorder_frames_threshold not satisfied? - keep waiting - may time "
"out...\n");
complain_time = now + kComplainInterval;
}
zx::nanosleep(zx::deadline_after(zx::msec(1)));
}
packet->set_timestamp_ish(input_pts_counter++);
}
}
packet->set_start_access_unit(bytes_so_far == 0);
packet->set_known_end_access_unit(bytes_so_far + bytes_to_copy == byte_count);
if (tvp) {
TEEC_Result result = tvp->DecryptVideo(bytes + bytes_so_far, bytes_to_copy, buffer.vmo());
ZX_ASSERT(result == TEEC_SUCCESS);
} else {
memcpy(buffer.base(), bytes + bytes_so_far, bytes_to_copy);
}
codec_client_->QueueInputPacket(std::move(packet));
bytes_so_far += bytes_to_copy;
}
if (IsSliceNalUnitType(nal_unit_type)) {
frame_count++;
}
if (frame_count == params_.test_params->frame_count) {
return false;
}
return true;
};
auto in_stream = params_.in_stream;
// Let caller-provided in_stream drive how far ahead we peek. If it's not far
// enough to find a start code or the EOS, then we'll error out.
uint32_t max_peek_bytes = in_stream->max_peek_bytes();
// default -1
int64_t input_stop_stream_after_frame_ordinal =
params_.test_params->input_stop_stream_after_frame_ordinal;
int64_t stream_frame_ordinal = 0;
while (true) {
// Until clang-tidy correctly interprets Exit(), this "= 0" satisfies it.
size_t start_code_size_bytes = 0;
uint32_t actual_peek_bytes;
uint8_t* peek;
VLOGF("PeekBytes()...");
zx_status_t status = in_stream->PeekBytes(max_peek_bytes, &actual_peek_bytes, &peek,
zx::deadline_after(kInStreamDeadlineDuration));
ZX_ASSERT(status == ZX_OK);
VLOGF("PeekBytes() done");
if (actual_peek_bytes == 0) {
// Out of input. Not an error. No more input AUs.
ZX_DEBUG_ASSERT(in_stream->eos_position_known() &&
in_stream->cursor_position() == in_stream->eos_position());
break;
}
if (!is_start_code(&peek[0], actual_peek_bytes, &start_code_size_bytes)) {
for (uint32_t i = 0; i < 64; ++i) {
LOGF("peek[%u] == 0x%x", i, peek[i]);
}
char buf[65] = {};
memcpy(&buf[0], &peek[0], 64);
buf[64] = 0;
LOGF("peek[0..64]: %s", buf);
if (in_stream->cursor_position() == 0) {
Exit(
"Didn't find a start code at the start of the file, and this "
"example doesn't scan forward (for now).");
} else {
Exit(
"Fell out of sync somehow - previous NAL offset + previous "
"NAL length not a start code.");
}
}
if (in_stream->eos_position_known() &&
in_stream->cursor_position() + start_code_size_bytes == in_stream->eos_position()) {
Exit("Start code at end of file unexpected");
}
size_t nal_start_offset = start_code_size_bytes;
// Scan for end of NAL. The end of NAL can be because we're out of peeked
// data, or because we hit another start code.
size_t find_end_iter = nal_start_offset;
size_t ignore_start_code_size_bytes;
while (find_end_iter <= actual_peek_bytes &&
!is_start_code(&peek[find_end_iter], actual_peek_bytes - find_end_iter,
&ignore_start_code_size_bytes)) {
find_end_iter++;
}
ZX_DEBUG_ASSERT(find_end_iter <= actual_peek_bytes);
if (find_end_iter == nal_start_offset) {
Exit("Two adjacent start codes unexpected.");
}
ZX_DEBUG_ASSERT(find_end_iter > nal_start_offset);
size_t nal_length = find_end_iter - nal_start_offset;
if (!queue_access_unit(&peek[0], start_code_size_bytes + nal_length)) {
// only reached on error
break;
}
// start code + NAL payload
VLOGF("TossPeekedBytes()...");
in_stream->TossPeekedBytes(start_code_size_bytes + nal_length);
VLOGF("TossPeekedBytes() done");
if (stream_frame_ordinal == input_stop_stream_after_frame_ordinal) {
break;
}
stream_frame_ordinal++;
}
return input_pts_counter - input_pts_counter_start;
}
uint64_t VideoDecoderRunner::QueueVp9Frames(uint64_t stream_lifetime_ordinal,
uint64_t input_pts_counter_start) {
// default -1
int64_t input_pts_counter = input_pts_counter_start;
uint32_t frame_ordinal = 0;
auto queue_access_unit = [this, stream_lifetime_ordinal, &input_pts_counter,
&frame_ordinal](size_t byte_count) {
auto in_stream = params_.in_stream;
auto tvp = params_.input_copier;
const int64_t skip_frame_ordinal = params_.test_params->skip_frame_ordinal;
std::unique_ptr<fuchsia::media::Packet> packet = codec_client_->BlockingGetFreeInputPacket();
if (!packet) {
fprintf(stderr, "Returning because failed to get input packet\n");
return false;
}
////////////////////////////////////////////////////////////////////////////////////////////////
// No more return false from here down. Before we return true, we must have consumed the input
// data, and incremented the input_frame_ordinal, and returned the input packet to the
// codec_client. The codec_client only wants the input packet back after its been filled out
// completely.
////////////////////////////////////////////////////////////////////////////////////////////////
auto do_not_return_early_interval = fit::defer([] {
ZX_PANIC("don't return early until packet is set up and returned to codec_client\n");
});
auto increment_input_pts_counter = fit::defer([&input_pts_counter] { input_pts_counter++; });
ZX_ASSERT(packet->has_header());
ZX_ASSERT(packet->header().has_packet_index());
const CodecBuffer& buffer = codec_client_->BlockingGetFreeInputBufferForPacket(packet.get());
ZX_ASSERT(packet->buffer_index() == buffer.buffer_index());
// VP9 decoder doesn't yet support splitting access units into multiple
// packets.
if (byte_count > buffer.size_bytes()) {
fprintf(stderr,
"buffer_count >= buffer.size_bytes() - byte_count: %lu buffer.size_bytes(): %lu\n",
byte_count, buffer.size_bytes());
}
ZX_ASSERT(byte_count <= buffer.size_bytes());
// Check that we don't waste contiguous space on non-secure VP9 input buffers.
ZX_ASSERT(!buffer.is_physically_contiguous() || tvp);
packet->set_stream_lifetime_ordinal(stream_lifetime_ordinal);
packet->set_start_offset(0);
packet->set_valid_length_bytes(byte_count);
// We don't use frame_header->presentation_timestamp, because we want to
// send through frame index in timestamp_ish field instead, for consistency
// with .h264 files which don't have timestamps in them, and so tests can
// assume frame index as timestamp_ish on output.
packet->set_timestamp_ish(input_pts_counter);
packet->set_start_access_unit(true);
packet->set_known_end_access_unit(true);
uint32_t actual_bytes_read;
std::unique_ptr<uint8_t[]> bytes;
uint8_t* read_address = nullptr;
if (tvp) {
bytes = std::make_unique<uint8_t[]>(byte_count);
read_address = bytes.get();
} else {
read_address = buffer.base();
}
zx_status_t status =
in_stream->ReadBytesComplete(byte_count, &actual_bytes_read, read_address,
zx::deadline_after(kInStreamDeadlineDuration));
ZX_ASSERT(status == ZX_OK);
if (actual_bytes_read < byte_count) {
Exit("Frame truncated.");
}
ZX_DEBUG_ASSERT(actual_bytes_read == byte_count);
/////////////////////////////////////////////////////////////////////////////////
// Switch from not being able to return early to being able to return true early.
/////////////////////////////////////////////////////////////////////////////////
do_not_return_early_interval.cancel();
auto do_not_queue_input_packet_after_all = fit::defer(
[this, &packet] { codec_client_->DoNotQueueInputPacketAfterAll(std::move(packet)); });
if (input_pts_counter == skip_frame_ordinal) {
LOGF("skipping input frame: %" PRId64, input_pts_counter);
// ~do_not_queue_input_packet_after_all, ~increment_input_pts_counter
return true;
}
if (tvp) {
VLOGF("before DecryptVideo...");
TEEC_Result result = tvp->DecryptVideo(bytes.get(), byte_count, buffer.vmo());
VLOGF("after DecryptVideo");
ZX_ASSERT(result == TEEC_SUCCESS);
}
do_not_queue_input_packet_after_all.cancel();
// Ideally we'd figure out why this padding doesn't work / how to pad VP9 frames, if possible.
#if 0
if (frame_ordinal <= 5) {
// Assert not a superframe. If we have some of these in input, we can skip padding them
// instead, or find a way to pad a superframe.
ZX_DEBUG_ASSERT((*(buffer.base() + packet->valid_length_bytes() - 1) & 0xE0) != 0xC0);
ZX_DEBUG_ASSERT(buffer.size_bytes() >= kInputLargeFrameSizeVp9);
if (byte_count < kInputLargeFrameSizeVp9) {
uint32_t zero_bytes_count = kInputLargeFrameSizeVp9 - byte_count;
memset(buffer.base() + byte_count, 0, zero_bytes_count);
packet->set_valid_length_bytes(packet->valid_length_bytes() + zero_bytes_count);
}
}
#endif
codec_client_->QueueInputPacket(std::move(packet));
++frame_ordinal;
// ~increment_input_pts_counter
return true;
};
auto in_stream = params_.in_stream;
IvfHeader header;
uint32_t actual_bytes_read;
zx_status_t status = in_stream->ReadBytesComplete(sizeof(header), &actual_bytes_read,
reinterpret_cast<uint8_t*>(&header),
zx::deadline_after(kInStreamDeadlineDuration));
// This could fail if a remote-source stream breaks.
ZX_ASSERT(status == ZX_OK);
// This could fail if the input is too short.
ZX_ASSERT(actual_bytes_read == sizeof(header));
size_t remaining_header_length = header.header_length - sizeof(header);
// We're not interested in any remaining portion of the header, but we should
// skip the rest of the header, if any.
if (remaining_header_length) {
uint8_t toss_buffer[1024];
while (remaining_header_length != 0) {
uint32_t bytes_to_read = std::min(sizeof(toss_buffer), remaining_header_length);
uint32_t actual_bytes_read;
status = in_stream->ReadBytesComplete(bytes_to_read, &actual_bytes_read, &toss_buffer[0],
zx::deadline_after(kInStreamDeadlineDuration));
ZX_ASSERT(status == ZX_OK);
ZX_ASSERT(actual_bytes_read == bytes_to_read);
remaining_header_length -= actual_bytes_read;
}
}
ZX_DEBUG_ASSERT(!remaining_header_length);
// default -1
int64_t input_stop_stream_after_frame_ordinal =
params_.test_params->input_stop_stream_after_frame_ordinal;
int64_t stream_frame_ordinal = 0;
while (true) {
IvfFrameHeader frame_header;
status = in_stream->ReadBytesComplete(sizeof(frame_header), &actual_bytes_read,
reinterpret_cast<uint8_t*>(&frame_header),
zx::deadline_after(kInStreamDeadlineDuration));
ZX_ASSERT(status == ZX_OK);
if (actual_bytes_read == 0) {
// No more frames. That's fine.
break;
}
if (actual_bytes_read < sizeof(frame_header)) {
Exit("Frame header truncated.");
}
ZX_DEBUG_ASSERT(actual_bytes_read == sizeof(frame_header));
if (params_.test_params->per_frame_debug_output) {
LOGF("input stream: %" PRIu64 " stream_frame_ordinal: %" PRId64 " input_pts_counter: %" PRIu64
" frame_header.size_bytes: %u",
stream_lifetime_ordinal, stream_frame_ordinal, input_pts_counter,
frame_header.size_bytes);
}
if (!queue_access_unit(frame_header.size_bytes)) {
// can be fine in case of vp9 input fuzzing test
break;
}
if (stream_frame_ordinal == input_stop_stream_after_frame_ordinal) {
break;
}
stream_frame_ordinal++;
}
return input_pts_counter - input_pts_counter_start;
}
void VideoDecoderRunner::Run() {
const UseVideoDecoderTestParams default_test_params;
if (!params_.test_params) {
params_.test_params = &default_test_params;
}
params_.test_params->Validate();
VLOGF("before CodecClient::CodecClient()...");
codec_client_.emplace(params_.fidl_loop, params_.fidl_thread, std::move(params_.sysmem));
codec_client_->SetMinInputBufferSize(kInputMinBufferSize);
// no effect if 0
codec_client_->SetMinOutputBufferSize(params_.min_output_buffer_size);
// no effect if 0
codec_client_->SetMinOutputBufferCount(params_.min_output_buffer_count);
codec_client_->set_is_output_secure(params_.is_secure_output);
codec_client_->set_is_input_secure(params_.is_secure_input);
codec_client_->set_in_lax_mode(params_.lax_mode);
std::string mime_type;
switch (format_) {
case Format::kH264:
mime_type = "video/h264";
break;
case Format::kH264Multi:
mime_type = "video/h264-multi";
break;
case Format::kVp9:
mime_type = "video/vp9";
break;
}
if (params_.test_params->mime_type) {
mime_type = params_.test_params->mime_type.value();
}
async::PostTask(
params_.fidl_loop->dispatcher(),
[this, codec_client_request = codec_client_->GetTheRequestOnce(), mime_type]() mutable {
VLOGF("before codec_factory->CreateDecoder() (async)");
fuchsia::media::FormatDetails input_details;
input_details.set_format_details_version_ordinal(0);
input_details.set_mime_type(mime_type.c_str());
fuchsia::mediacodec::CreateDecoder_Params decoder_params;
decoder_params.set_input_details(std::move(input_details));
// This is required for timestamp_ish values to transit the
// Codec.
//
// TODO(fxbug.dev/57706): We shouldn't need to promise this to have PTS(s) flow through.
decoder_params.set_promise_separate_access_units_on_input(true);
if (params_.is_secure_output) {
decoder_params.set_secure_output_mode(fuchsia::mediacodec::SecureMemoryMode::ON);
}
if (params_.is_secure_input) {
decoder_params.set_secure_input_mode(fuchsia::mediacodec::SecureMemoryMode::ON);
}
// Bind the fuchsia::media::CodecFactoryHandle to a CodecFactoryPtr so we can send a
// CreateDecoder message. This unbinds params_.codec_factory.
auto codec_factory_ptr = params_.codec_factory.Bind();
codec_factory_ptr->CreateDecoder(std::move(decoder_params),
std::move(codec_client_request));
// Now that the CreateDecoder message is sent, we no longer need to keep a channel open
// to the CodecFactory so we can just let codec_factory_ptr fall out of scope.
});
VLOGF("before codec_client.Start()...");
codec_client_->Start();
VLOGF("before starting in_thread...");
auto in_thread = std::make_unique<std::thread>([this]() {
auto& in_stream = params_.in_stream;
auto& test_params = params_.test_params;
VLOGF("in_thread start");
// default 1
const uint32_t loop_stream_count = test_params->loop_stream_count;
// default 2
const uint64_t keep_stream_modulo = test_params->keep_stream_modulo;
uint64_t stream_lifetime_ordinal = kStreamLifetimeOrdinal;
uint64_t input_frame_pts_counter = 0;
uint32_t frames_queued = 0;
for (uint32_t loop_ordinal = 0; loop_ordinal < loop_stream_count;
++loop_ordinal, stream_lifetime_ordinal += 2) {
switch (format_) {
case Format::kH264:
case Format::kH264Multi:
frames_queued = QueueH264Frames(stream_lifetime_ordinal, input_frame_pts_counter);
break;
case Format::kVp9:
frames_queued = QueueVp9Frames(stream_lifetime_ordinal, input_frame_pts_counter);
break;
}
// Send through QueueInputEndOfStream().
VLOGF("QueueInputEndOfStream() - stream_lifetime_ordinal: %" PRIu64, stream_lifetime_ordinal);
// For debugging a flake:
if (test_params->loop_stream_count > 1) {
LOGF("QueueInputEndOfStream() - stream_lifetime_ordinal: %" PRIu64,
stream_lifetime_ordinal);
}
codec_client_->QueueInputEndOfStream(stream_lifetime_ordinal);
if (stream_lifetime_ordinal % keep_stream_modulo == 1) {
// We flush and close to run the handling code server-side. However, we don't
// yet verify that this successfully achieves what it says.
VLOGF("FlushEndOfStreamAndCloseStream() - stream_lifetime_ordinal: %" PRIu64,
stream_lifetime_ordinal);
// For debugging a flake:
if (test_params->loop_stream_count > 1) {
LOGF("FlushEndOfStreamAndCloseStream() - stream_lifetime_ordinal: %" PRIu64,
stream_lifetime_ordinal);
}
codec_client_->FlushEndOfStreamAndCloseStream(stream_lifetime_ordinal);
// Stitch together the PTS values of the streams which we're keeping.
input_frame_pts_counter += frames_queued;
}
if (loop_ordinal + 1 != loop_stream_count) {
zx_status_t status = in_stream->ResetToStart(zx::deadline_after(kInStreamDeadlineDuration));
ZX_ASSERT(status == ZX_OK);
}
}
VLOGF("in_thread done");
});
// Separate thread to process the output.
//
// codec_client outlives the thread (and for separate reasons below, all the
// frame_sink activity started by out_thread).
auto out_thread = std::make_unique<std::thread>([this]() {
VLOGF("out_thread start");
// We allow the server to send multiple output constraint updates if it
// wants; see implementation of BlockingGetEmittedOutput() which will hide
// multiple constraint updates before the first packet from this code. In
// contrast we assert if the server sends multiple format updates with no
// packets in between since that's not compliant with the protocol rules.
std::shared_ptr<const fuchsia::media::StreamOutputFormat> prev_stream_format;
const fuchsia::media::VideoUncompressedFormat* raw = nullptr;
std::optional<zx::time> frame_zero_time;
uint64_t frame_index = 0;
uint32_t image_id = kFirstValidImageId;
std::atomic<uint32_t> async_put_frame_count = 0;
while (true) {
if (params_.frame_sink) {
// Control concurrency of pending Present()s to scenic - this could be an issue for very
// large buffer collections, or in cases where we switch buffer collections a lot - in such
// cases Scenic can start complaining about too many queued Present()s. This avoids the
// frame_sink needing to block/delay the output thread anywhere other than here.
//
// It'd be better if this were event driven, but this works for now.
while (async_put_frame_count + params_.frame_sink->GetPendingCount() >= 10) {
zx::nanosleep(zx::deadline_after(zx::msec(10)));
}
}
VLOGF("BlockingGetEmittedOutput()...");
std::unique_ptr<CodecOutput> output = codec_client_->BlockingGetEmittedOutput();
VLOGF("BlockingGetEmittedOutput() done");
if (!output) {
return;
}
if (output->stream_lifetime_ordinal() % 2 == 0) {
Exit(
"server emitted a stream_lifetime_ordinal that client didn't set "
"on any input");
}
if (output->end_of_stream()) {
VLOGF("output end_of_stream() - stream_lifetime_ordinal: %" PRIu64,
output->stream_lifetime_ordinal());
// For debugging a flake:
if (params_.test_params->loop_stream_count > 1) {
LOGF("output end_of_stream() - stream_lifetime_ordinal: %" PRIu64,
output->stream_lifetime_ordinal());
}
// default 1
const int64_t loop_stream_count = params_.test_params->loop_stream_count;
const uint64_t max_stream_lifetime_ordinal = (loop_stream_count - 1) * 2 + 1;
if (output->stream_lifetime_ordinal() != max_stream_lifetime_ordinal) {
continue;
}
VLOGF("done with output - stream_lifetime_ordinal: %" PRIu64,
output->stream_lifetime_ordinal());
// For debugging a flake:
if (params_.test_params->loop_stream_count > 1) {
LOGF("done with output - stream_lifetime_ordinal: %" PRIu64,
output->stream_lifetime_ordinal());
}
// Just "break;" would be more fragile under code modification.
goto end_of_output;
}
const fuchsia::media::Packet& packet = output->packet();
if (!packet.has_header()) {
// The server should not generate any empty packets.
Exit("broken server sent packet without header");
}
// cleanup can run on any thread, and codec_client.RecycleOutputPacket()
// is ok with that. In addition, cleanup can run after codec_client is
// gone, since we don't block return from use_video_decoder() on Scenic
// actually freeing up all previously-queued frames.
auto cleanup =
fit::defer([this, packet_header = fidl::Clone(packet.header()), &frame_index]() mutable {
// Using an auto call for this helps avoid losing track of the
// output_buffer.
codec_client_->RecycleOutputPacket(std::move(packet_header));
++frame_index;
});
std::shared_ptr<const fuchsia::media::StreamOutputFormat> format = output->format();
if (!packet.has_buffer_index()) {
// The server should not generate any empty packets.
Exit("broken server sent packet without buffer index");
}
// This will remain live long enough because this thread is the only
// thread that re-allocates output buffers.
const CodecBuffer& buffer = codec_client_->GetOutputBufferByIndex(packet.buffer_index());
ZX_ASSERT(!prev_stream_format ||
(prev_stream_format->has_format_details() &&
prev_stream_format->format_details().format_details_version_ordinal()));
if (!format->has_format_details()) {
Exit("!format->has_format_details()");
}
if (!format->format_details().has_format_details_version_ordinal()) {
Exit("!format->format_details().has_format_details_version_ordinal()");
}
if (!packet.has_valid_length_bytes() || packet.valid_length_bytes() == 0) {
// The server should not generate any empty packets.
Exit("broken server sent empty packet");
}
if (!packet.has_start_offset()) {
// The server should not generate any empty packets.
Exit("broken server sent packet without start offset");
}
// We have a non-empty packet of the stream.
if (packet.has_timestamp_ish()) {
uint64_t timestamp_ish = packet.timestamp_ish();
ZX_ASSERT(timestamp_ish < std::numeric_limits<int64_t>::max());
if (static_cast<int64_t>(timestamp_ish) > max_output_pts_seen_) {
max_output_pts_seen_ = timestamp_ish;
}
}
if (!prev_stream_format || prev_stream_format.get() != format.get()) {
VLOGF("handling output format");
// Every output has a format. This happens exactly once.
prev_stream_format = format;
ZX_ASSERT(format->format_details().has_domain());
if (!format->has_format_details()) {
Exit("!format_details");
}
const fuchsia::media::FormatDetails& format_details = format->format_details();
if (!format_details.has_domain()) {
Exit("!format.domain");
}
if (!format_details.domain().is_video()) {
Exit("!format.domain.is_video()");
}
const fuchsia::media::VideoFormat& video_format = format_details.domain().video();
if (!video_format.is_uncompressed()) {
Exit("!video.is_uncompressed()");
}
raw = &video_format.uncompressed();
switch (raw->fourcc) {
case make_fourcc('N', 'V', '1', '2'): {
size_t y_size = raw->primary_height_pixels * raw->primary_line_stride_bytes;
if (raw->secondary_start_offset < y_size) {
Exit("raw.secondary_start_offset < y_size");
}
// NV12 requires UV be same line stride as Y.
size_t total_size = raw->secondary_start_offset +
raw->primary_height_pixels / 2 * raw->primary_line_stride_bytes;
if (packet.valid_length_bytes() < total_size) {
Exit(
"packet.valid_length_bytes < total_size (1) - valid_length_bytes: %u total_size: "
"%lu",
packet.valid_length_bytes(), total_size);
}
break;
}
case make_fourcc('Y', 'V', '1', '2'): {
size_t y_size = raw->primary_height_pixels * raw->primary_line_stride_bytes;
size_t v_size = raw->secondary_height_pixels * raw->secondary_line_stride_bytes;
size_t u_size = v_size;
size_t total_size = y_size + u_size + v_size;
if (packet.valid_length_bytes() < total_size) {
Exit("packet.valid_length_bytes < total_size (2)");
}
if (raw->secondary_start_offset < y_size) {
Exit("raw.secondary_start_offset < y_size");
}
if (raw->tertiary_start_offset < y_size + v_size) {
Exit("raw.tertiary_start_offset < y_size + v_size");
}
break;
}
default:
Exit("fourcc != NV12 && fourcc != YV12");
}
}
if (frame_index == 0) {
ZX_ASSERT(!frame_zero_time);
frame_zero_time.emplace(zx::clock::get_monotonic());
}
if (params_.test_params->print_fps) {
zx::time now = zx::clock::get_monotonic();
zx::duration duration = now - frame_zero_time.value();
if (frame_index != 0 && frame_index % params_.test_params->print_fps_modulus == 0) {
printf("frame_index: %" PRIu64 " fps: %g\n", frame_index,
static_cast<double>(frame_index) * 1000000.0 /
static_cast<double>(duration.to_usecs()));
fflush(nullptr);
}
}
if (params_.emit_frame) {
// i420_bytes is in I420 format - Y plane first, then U plane, then V
// plane. The U and V planes are half size in both directions. Each
// plane is 8 bits per sample.
uint32_t i420_stride = fbl::round_up(raw->primary_display_width_pixels, 2u);
// When width is odd, we want a chroma sample for the right-most luma.
uint32_t uv_width = (raw->primary_display_width_pixels + 1) / 2;
// When height is odd, we want a chroma sample for the bottom-most luma.
uint32_t uv_height = (raw->primary_display_height_pixels + 1) / 2;
uint32_t uv_stride = i420_stride / 2;
std::unique_ptr<uint8_t[]> i420_bytes;
if (kVerifySecureOutput || !params_.is_secure_output) {
i420_bytes = std::make_unique<uint8_t[]>(
i420_stride * raw->primary_display_height_pixels + uv_stride * uv_height * 2);
switch (raw->fourcc) {
case make_fourcc('N', 'V', '1', '2'): {
// Y
uint8_t* y_src = buffer.base() + packet.start_offset() + raw->primary_start_offset;
uint8_t* y_dst = i420_bytes.get();
for (uint32_t y_iter = 0; y_iter < raw->primary_display_height_pixels; y_iter++) {
memcpy(y_dst, y_src, raw->primary_display_width_pixels);
y_src += raw->primary_line_stride_bytes;
y_dst += i420_stride;
}
// UV
uint8_t* uv_src = buffer.base() + packet.start_offset() + raw->secondary_start_offset;
uint8_t* u_dst_line = y_dst;
uint8_t* v_dst_line = u_dst_line + uv_stride * uv_height;
for (uint32_t uv_iter = 0; uv_iter < uv_height; uv_iter++) {
uint8_t* u_dst = u_dst_line;
uint8_t* v_dst = v_dst_line;
for (uint32_t uv_line_iter = 0; uv_line_iter < uv_width; ++uv_line_iter) {
*u_dst++ = uv_src[uv_line_iter * 2];
*v_dst++ = uv_src[uv_line_iter * 2 + 1];
}
uv_src += raw->primary_line_stride_bytes;
u_dst_line += uv_stride;
v_dst_line += uv_stride;
}
break;
}
case make_fourcc('Y', 'V', '1', '2'): {
// Y
uint8_t* y_src = buffer.base() + packet.start_offset() + raw->primary_start_offset;
uint8_t* y_dst = i420_bytes.get();
for (uint32_t y_iter = 0; y_iter < raw->primary_display_height_pixels; y_iter++) {
memcpy(y_dst, y_src, raw->primary_display_width_pixels);
y_src += raw->primary_line_stride_bytes;
y_dst += i420_stride;
}
// UV
uint8_t* v_src = buffer.base() + packet.start_offset() + raw->primary_start_offset +
raw->primary_line_stride_bytes * raw->primary_height_pixels;
uint8_t* u_src =
v_src + (raw->primary_line_stride_bytes / 2) * (raw->primary_height_pixels / 2);
uint8_t* u_dst = y_dst;
uint8_t* v_dst = u_dst + uv_stride * uv_height;
for (uint32_t uv_iter = 0; uv_iter < uv_height; uv_iter++) {
memcpy(u_dst, u_src, uv_width);
memcpy(v_dst, v_src, uv_width);
u_dst += uv_stride;
v_dst += uv_stride;
u_src += raw->primary_line_stride_bytes / 2;
v_src += raw->primary_line_stride_bytes / 2;
}
break;
}
default:
Exit("Feeding EmitFrame not yet implemented for fourcc: %s",
fourcc_to_string(raw->fourcc).c_str());
}
}
params_.emit_frame(output->stream_lifetime_ordinal(), i420_bytes.get(),
raw->primary_display_width_pixels, raw->primary_display_height_pixels,
i420_stride, packet.has_timestamp_ish(),
packet.has_timestamp_ish() ? packet.timestamp_ish() : 0);
}
if (params_.frame_sink) {
async_put_frame_count++;
zx::vmo image_vmo;
ZX_ASSERT(ZX_OK == buffer.vmo().duplicate(ZX_RIGHT_SAME_RIGHTS, &image_vmo));
async::PostTask(
params_.fidl_loop->dispatcher(),
[this, image_id = image_id++, vmo = std::move(image_vmo),
vmo_offset = buffer.vmo_offset() + packet.start_offset() + raw->primary_start_offset,
format, cleanup = std::move(cleanup), &async_put_frame_count]() mutable {
params_.frame_sink->PutFrame(image_id, std::move(vmo), vmo_offset, format,
[cleanup = std::move(cleanup)] {
// The ~cleanup can run on any thread (the
// current thread is main_loop's thread),
// and codec_client is ok with that
// (because it switches over to |loop|'s
// thread before sending a Codec message).
//
// ~cleanup
});
async_put_frame_count--;
});
}
// If we didn't std::move(cleanup) before here, then ~cleanup runs here.
}
end_of_output:;
VLOGF("out_thread done");
// output thread done
// ~raw_video_writer
});
// decode for a bit... in_thread, loop, out_thread, and the codec itself are
// taking care of it.
// First wait for the input thread to be done feeding input data. Before the
// in_thread terminates, it'll have sent in a last empty EOS input buffer.
VLOGF("before in_thread->join()...");
in_thread->join();
VLOGF("after in_thread->join()");
// The EOS queued as an input buffer should cause the codec to output an EOS
// output buffer, at which point out_thread should terminate, after it has
// finalized the output file.
VLOGF("before out_thread->join()...");
out_thread->join();
VLOGF("after out_thread->join()");
// We wait for frame_sink to return all the frames for these reasons:
// * As of this writing, some noisy-in-the-log things can happen in Scenic
// if we don't.
// * We don't want to cancel display of any frames, because we want to see
// the frames on the screen.
// * We don't want the |cleanup| to run after codec_client is gone since the
// |cleanup| calls codec_client.
// * It's easier to grok if activity started by use_h264_decoder() is done
// by the time use_h264_decoder() returns, given use_h264_decoder()'s role
// as an overall sequencer.
if (params_.frame_sink) {
OneShotEvent frames_done_event;
fit::closure on_frames_returned = [&frames_done_event] { frames_done_event.Signal(); };
async::PostTask(params_.fidl_loop->dispatcher(), [frame_sink = params_.frame_sink,
on_frames_returned =
std::move(on_frames_returned)]() mutable {
frame_sink->PutEndOfStreamThenWaitForFramesReturnedAsync(std::move(on_frames_returned));
});
// The just-posted wait will set frames_done using the main_loop_'s thread,
// which is not this thread.
FX_LOGS(INFO) << "waiting for all frames to be returned from Scenic...";
frames_done_event.Wait(zx::deadline_after(zx::sec(30)));
FX_LOGS(INFO) << "all frames have been returned from Scenic";
// Now we know that there are zero frames in frame_sink, including zero
// frame cleanup(s) in-flight (in the sense of a pending/running cleanup
// that's touching codec_client to post any new work. Work already posted
// via codec_client can still be in flight. See below.)
}
// Close the channels explicitly (just so we can more easily print messages
// before and after vs. ~codec_client).
VLOGF("before codec_client stop...");
codec_client_->Stop();
VLOGF("after codec_client stop.");
codec_client_ = std::nullopt;
// success
return;
}
void use_video_decoder(Format format, UseVideoDecoderParams params) {
VLOGF("use_video_decoder()");
auto video_decoder_runner =
std::make_unique<VideoDecoderRunner>(std::move(format), std::move(params));
video_decoder_runner->Run();
// ~video_decoder_runner
}
} // namespace
void use_h264_decoder(UseVideoDecoderParams params) {
use_video_decoder(Format::kH264, std::move(params));
}
void use_h264_multi_decoder(UseVideoDecoderParams params) {
use_video_decoder(Format::kH264Multi, std::move(params));
}
void use_vp9_decoder(UseVideoDecoderParams params) {
use_video_decoder(Format::kVp9, std::move(params));
}