| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef SRC_MEDIA_CODEC_EXAMPLES_USE_MEDIA_DECODER_USE_VIDEO_DECODER_H_ |
| #define SRC_MEDIA_CODEC_EXAMPLES_USE_MEDIA_DECODER_USE_VIDEO_DECODER_H_ |
| |
| #include <fuchsia/mediacodec/cpp/fidl.h> |
| #include <inttypes.h> |
| #include <lib/async-loop/cpp/loop.h> |
| #include <lib/async-loop/default.h> |
| #include <stdint.h> |
| |
| #include <limits> |
| #include <variant> |
| |
| #include <openssl/sha.h> |
| |
| // We only flush input EOS for streams whose stream_lifetime_ordinal % |
| // kFlushInputEosStreamLifetimeOrdinalPeriod == 1. |
| constexpr uint64_t kFlushInputEosStreamLifetimeOrdinalPeriod = 16; |
| |
| class FrameSink; |
| class InStreamPeeker; |
| class InputCopier; |
| |
| // An EmitFrame is passed I420 frames with stride == width, and with width |
| // and height being display_width and display_height (not coded_width and |
| // coded_height). The width and height must be even. |
| typedef fit::function<void(uint64_t stream_lifetime_ordinal, uint8_t* i420_data, uint32_t width, |
| uint32_t height, uint32_t stride, bool has_timestamp_ish, |
| uint64_t timestamp_ish)> |
| EmitFrame; |
| |
| struct FrameToCompare; |
| |
| // Keep fields in alphabetical order please, other than is_validated_. |
| struct UseVideoDecoderTestParams final { |
| ~UseVideoDecoderTestParams() { |
| // Ensure Validate() gets called at least once, if a bit later than ideal. |
| Validate(); |
| } |
| |
| UseVideoDecoderTestParams Clone() const { |
| UseVideoDecoderTestParams result = *this; |
| result.magic_validated_ = 0; |
| return result; |
| } |
| |
| // Validate() can be called at any time, preferably before the parameters are used. |
| // |
| // Validate() is also called from the destructor just in case as a backstop, but the call from the |
| // constructor shouldn't be the first call to Validate(). The destructor will catch invalid field |
| // values if nothing else blows up before then however. |
| void Validate() const { |
| if (magic_validated_ == kPrivateMagicValidated) { |
| return; |
| } |
| |
| if (first_expected_output_frame_ordinal != kDefaultFirstExpectedOutputFrameOrdinal) { |
| printf("first_expected_output_frame_ordinal: %" PRIu64 "\n", |
| first_expected_output_frame_ordinal); |
| } |
| // All values for first_expected_output_frame_ordinal are valid. |
| |
| if (keep_stream_modulo != kDefaultKeepStreamModulo) { |
| printf("keep_stream_modulo: %" PRIu64 "\n", keep_stream_modulo); |
| } |
| ZX_ASSERT(keep_stream_modulo != 0); |
| ZX_ASSERT(keep_stream_modulo % 2 == 0); |
| |
| if (loop_stream_count != kDefaultLoopStreamCount) { |
| printf("loop_stream_count: %u\n", loop_stream_count); |
| } |
| ZX_ASSERT(loop_stream_count != 0); |
| |
| if (reset_hash_each_iteration != kDefaultResetHashEachIteration) { |
| printf("reset_hash_each_iteration: %u\n", reset_hash_each_iteration); |
| } |
| |
| if (skip_frame_ordinal != kDefaultSkipFrameOrdinal) { |
| printf("skip_frame_ordinal: %" PRId64 "\n", skip_frame_ordinal); |
| } |
| ZX_ASSERT(skip_frame_ordinal >= -1); |
| |
| if (max_num_reorder_frames_threshold != kDefaultMaxNumReorderFramesThreshold) { |
| printf("max_num_reorder_frames_threshold: %" PRId64 "\n", max_num_reorder_frames_threshold); |
| } |
| ZX_ASSERT(max_num_reorder_frames_threshold >= 0); |
| |
| if (print_fps != kDefaultPrintFps) { |
| printf("print_fps: %u\n", print_fps); |
| if (print_fps && !skip_formatting_output_pixels) { |
| printf("Consider also setting skip_formatting_output_pixels"); |
| } |
| } |
| |
| if (print_fps_modulus != kDefaultPrintFpsModulus) { |
| printf("print_fps_modulus: %" PRIu64 "\n", print_fps_modulus); |
| } |
| ZX_ASSERT(print_fps_modulus != 0); |
| |
| if (per_frame_debug_output != kDefaultPerFrameDebugOutput) { |
| printf("per_frame_debug_output: %u\n", per_frame_debug_output); |
| } |
| |
| if (require_sw != kDefaultRequireSw) { |
| printf("require_sw: %u\n", require_sw); |
| } |
| |
| if (per_frame_golden_sha256 != kDefaultPerFrameGoldenSha256) { |
| uint32_t count = 0; |
| while (per_frame_golden_sha256[count]) { |
| ++count; |
| } |
| printf("per_frame_golden_sha256 provided - count: %u\n", count); |
| } |
| |
| if (compare_to_sw_decode != kDefaultCompareToSwDecode) { |
| printf("compare_to_sw_decode: %u\n", compare_to_sw_decode); |
| } |
| |
| if (frame_to_compare != kDefaultFrameToCompare) { |
| printf("frame_to_compare set\n"); |
| // avoid recursion beyond 2 |
| ZX_ASSERT(!compare_to_sw_decode); |
| } |
| |
| if (frame_num_gaps != kDefaultFrameNumGaps) { |
| printf("frame_num_gaps: %u\n", frame_num_gaps); |
| } |
| |
| if (min_expected_output_frame_count != kDefaultMinExpectedOutputFrameCount) { |
| printf("min_expected_ouput_frame_count: %d\n", min_expected_output_frame_count); |
| } |
| |
| if (golden_sha256 != kDefaultGoldenSha256) { |
| printf("golden_sha256: %s\n", golden_sha256); |
| } |
| |
| if (skip_formatting_output_pixels != kDefaultSkipFormattingOutputPixels) { |
| printf("skip_formatting_output_pixels: %u\n", skip_formatting_output_pixels); |
| ZX_ASSERT(skip_formatting_output_pixels); |
| ZX_ASSERT(!golden_sha256 && !per_frame_golden_sha256); |
| } |
| |
| magic_validated_ = kPrivateMagicValidated; |
| } |
| |
| // Client code should not touch this field. This field can't be protected or private without |
| // preventing aggregate initialization, so client code just needs to avoid initializing this |
| // field (to anything). Client code should pretend that client code can't possibly guess what |
| // kPrivateMagicValidated is. |
| // |
| // When set to kPrivateMagicValidated, all other fields have been validated. Else other fields |
| // have not been validated. |
| mutable uint64_t magic_validated_ = 0; |
| |
| // By default, the stream doesn't stop early. |
| int64_t input_stop_stream_after_frame_ordinal = -1; |
| |
| // The first output frame timestamp_ish that's expected on output. PTS values before this are not |
| // expected. |
| // |
| // For example if skip_frame_ordinal 0 is used, several frames after that are also skipped until |
| // the next keyframe, so first_expected_output_frame_ordinal can be set to the PTS of the next |
| // keyframe. |
| // |
| // By default PTS 0 is expected. |
| static constexpr uint64_t kDefaultFirstExpectedOutputFrameOrdinal = 0; |
| uint64_t first_expected_output_frame_ordinal = kDefaultFirstExpectedOutputFrameOrdinal; |
| |
| // If stream_lifetime_ordinal % keep_stream_modulo is 1, the input stream is flushed after |
| // queueing input EOS, so that any subsequent stream switch won't result in any discarded data |
| // from the flushed stream. |
| // |
| // By setting this to an even number larger than 2, some streams don't get flushed, which allows a |
| // test to cover that discard doesn't cause problems. |
| // |
| // The hash only pays attention to the frames from streams whose stream_lifetime_ordinal % |
| // keep_stream_modulo == 0. |
| // |
| // By default every stream is flushed. |
| static constexpr uint64_t kDefaultKeepStreamModulo = 2; |
| uint64_t keep_stream_modulo = kDefaultKeepStreamModulo; |
| |
| // If >1, loops through the input data this many times, each time using a new stream with new |
| // stream_lifetime_ordinal. |
| // |
| // 0 is invalid. |
| // |
| // By default, there's only one stream. |
| static constexpr uint32_t kDefaultLoopStreamCount = 1; |
| uint32_t loop_stream_count = kDefaultLoopStreamCount; |
| |
| // Reset sha256 context each iteration. This allows looping faster to get a flake to repro more |
| // often, and avoids the hash being dependent on loop_stream_count. |
| static constexpr bool kDefaultResetHashEachIteration = false; |
| bool reset_hash_each_iteration = kDefaultResetHashEachIteration; |
| |
| // If >= 0, skips any input NAL with PTS == skip_frame_ordinal. |
| // |
| // -1 is the only valid negative value. |
| // |
| // By default, no input NALs are skipped due to this parameter. |
| static constexpr int64_t kDefaultSkipFrameOrdinal = -1; |
| int64_t skip_frame_ordinal = kDefaultSkipFrameOrdinal; |
| |
| // This many frames get queued then stop queuing frames. |
| uint64_t frame_count = std::numeric_limits<uint64_t>::max(); |
| |
| // nullopt means no override |
| std::optional<std::string> mime_type; |
| |
| // If frames are out of order by more than this much, and/or the decoder is delaying output by |
| // more than this much vs input, or a combination of both, fail the test (by timing out). |
| // |
| // When set low enough, this (also) verifies that the codec doesn't impose extra input to output |
| // delay beyond what the stream requires due to frame reordering in the stream. This "extra" input |
| // to output delay is not a time delay, but a frame delay. In other words, the codec not |
| // outputting an output frame that it should be able to output already, until additional |
| // compressed input frame(s) are sent to the codec. Unfortunately it's fairly common for codec |
| // implementations to impose ~1 frame of extra input to output delay especially for h264 due to |
| // the way the bitstream spec makes it unnecessarily difficult for a decoder to know when it's |
| // safe to output a frame without messing up frame display ordering; AFAICT, a decoder doing this |
| // well for RTC use cases assuming a broad cross-section of encoders goes beyond what's nominally |
| // required of an h264 decoder per the h264 spec (non-"normative", at least to some degree). When |
| // the encoder generates minimal frame_num intervals from frame to frame in display order, decoder |
| // implementations have an easier time avoiding extra frame delay. |
| // |
| // This field is unrelated to VBV considerations and timing generally. This field is just about |
| // the decoder not demanding more compressed input frames than expected in order to generate |
| // uncompressed output frames. |
| // |
| // For h264, setting max_num_reorder_frames_threshold 0 means no frame reordering and no extra |
| // input to output delay imposed by the codec implementation. |
| // |
| // For vp9, setting max_num_reorder_frames_threshold 1 means no frame reordering and no extra |
| // input to output delay imposed by the codec implementation. This threshold is applied before any |
| // unpacking of vp9 "superframes". It'd make sense to change this to be more consistent with the |
| // h264 case (making this 0 for no frame reordering and no extra input to output delay) in a |
| // separate CL that only changes the definition and updates all the per-test thresholds. For vp9 |
| // decoders it's generally expected that no extra input to output delay is imposed, at least when |
| // an input stream isn't adversarial / pathological / particularly badly encoded in this regard. |
| // |
| // We intentionally use uint32_t max not int64_t max. |
| static constexpr int64_t kDefaultMaxNumReorderFramesThreshold = |
| std::numeric_limits<uint32_t>::max(); |
| int64_t max_num_reorder_frames_threshold = kDefaultMaxNumReorderFramesThreshold; |
| |
| // If true, print the frames-per-second each print_fps_modulus frames. |
| static constexpr bool kDefaultPrintFps = false; |
| bool print_fps = kDefaultPrintFps; |
| |
| // If print_fps is true, print the frames-per-second each print_fps_modulus frames. |
| static constexpr uint64_t kDefaultPrintFpsModulus = 1; |
| uint64_t print_fps_modulus = kDefaultPrintFpsModulus; |
| |
| static constexpr bool kDefaultPerFrameDebugOutput = true; |
| bool per_frame_debug_output = kDefaultPerFrameDebugOutput; |
| |
| // Require SW decode. |
| static constexpr bool kDefaultRequireSw = false; |
| bool require_sw = kDefaultRequireSw; |
| |
| // Should the decoder output in the Intel Y-tiling |
| // Since Y-tiling is currently only supported in NV12, setting this value to true will also |
| // constrain the output pixel format to NV12 and the colorspace to REC709. This option is really |
| // only applicable to VAAPI decoders |
| static constexpr bool kDefaultYTiling = false; |
| bool is_output_y_tiled = kDefaultYTiling; |
| |
| // Must be either nullptr, or point to a nullptr-terminated array. |
| static constexpr char** kDefaultPerFrameGoldenSha256 = nullptr; |
| const char** per_frame_golden_sha256 = nullptr; |
| |
| // If true, a failure to match per_frame_golden_sha256 will decode up to the mis-matching frame, |
| // and then compare that frame pixel-by-pixel, with stderr output indicating the diff in Y, U, and |
| // V. The SW decode for this purpose only occurs if a per_frame_golden_sha256 mis-match occurs |
| // first. |
| static constexpr bool kDefaultCompareToSwDecode = true; |
| bool compare_to_sw_decode = kDefaultCompareToSwDecode; |
| |
| // So far, this is only used recursively to compare a HW-decoded frame to a SW-decoded frame. |
| // |
| // This is the "actual" HW-decoded frame to compare to the corresponding "expected" SW-decoded |
| // frame. |
| static constexpr FrameToCompare* kDefaultFrameToCompare = nullptr; |
| FrameToCompare* frame_to_compare = kDefaultFrameToCompare; |
| |
| // Remove some of the frames, to force frame_num gap handling to run. Do this for lots of frames |
| // to check if leaks happen. We typically don't care what the golden_sha256 is in this case, nor |
| // do we expect that the hash would necessarily be consistent from decoder to decoder, as we don't |
| // require decoders to handle frame_num gaps in any particular way. We test that a decoder |
| // doesn't get stuck or crash. At least for now, we test that a decoder does not indicate |
| // failure. The first missing frame_num will be the frame_num of the second picture (ordinal 1, |
| // cardinal 2, regardless of what the frame_num values are). For now this only works with streams |
| // that have 1 slice per frame, as it doesn't actually parse the slices for the frame_num or first |
| // macroblock number. But the intent is to skip all frame_num(s) of a picture, not skip slices |
| // within a picture (which can be a separate thing). |
| static constexpr bool kDefaultFrameNumGaps = false; |
| bool frame_num_gaps = kDefaultFrameNumGaps; |
| |
| // When using frame_num_gaps true, we can expect a minimum number of output frames, to validate |
| // that the decoder outputs at least some frames after the first gap. We don't require a |
| // specific number of frames however, since handling strategies can differ. If the test stream |
| // only contains 1 IDR frame, then this verifies that the decoder doesn't require a new IDR frame |
| // to output pictures (which is desirable in that it provides slightly more visual motion |
| // continuity, but _will_ result in output pictures that are partly or fully corrupted visually.) |
| // |
| // None of this frame_num gap stuff is intended to condone input streams with corrupted/missing |
| // input data. Decoders are not required to handle general corrupted/missing data, other than |
| // not crashing and not getting stuck. It's fine if a decoder just indicates stream or codec |
| // failure on corrupted/missing input data other than frame_num gaps where exactly entire frames |
| // are missing. |
| static constexpr int32_t kDefaultMinExpectedOutputFrameCount = -1; |
| int32_t min_expected_output_frame_count = kDefaultMinExpectedOutputFrameCount; |
| |
| // If non-nullptr, the expected sha256 hash of all the output frame data in I420 format with |
| // stride == width. |
| static constexpr char* kDefaultGoldenSha256 = nullptr; |
| const char* golden_sha256 = kDefaultGoldenSha256; |
| |
| static constexpr bool kDefaultSkipFormattingOutputPixels = false; |
| bool skip_formatting_output_pixels = kDefaultSkipFormattingOutputPixels; |
| |
| private: |
| // Client code should not exploit knowledge of this value, and should not directly initialize or |
| // directly set magic_validated_ to any value. |
| static constexpr uint64_t kPrivateMagicValidated = 0xC001DECAFC0DE; |
| }; |
| |
| // This represents the "actual" frame (not the "expected" frame). |
| struct FrameToCompare { |
| // I420 format only, for now. |
| |
| // data must point at a complete frame, with at least width * height * 3 / 2 bytes |
| uint8_t* data; |
| |
| // Which "expected" frame ordinal needs to be compared to this "actual" frame. |
| uint32_t ordinal; |
| |
| // All the pixels width * height Y and width / 2 * height / 2 UV will be compared. |
| // The stride == width. |
| uint32_t width; |
| uint32_t height; |
| }; |
| |
| struct UseVideoDecoderParams { |
| // the loop created and run/started by main(). The codec_factory is |
| // and sysmem are bound to fidl_loop->dispatcher(). |
| async::Loop* fidl_loop{}; |
| // the thread on which fidl_loop activity runs. |
| thrd_t fidl_thread{}; |
| // codec_factory to take ownership of, use, and close by the |
| // time the function returns. |
| fuchsia::mediacodec::CodecFactoryHandle codec_factory; |
| fuchsia::sysmem2::AllocatorHandle sysmem; |
| InStreamPeeker* in_stream = nullptr; |
| InputCopier* input_copier = nullptr; |
| uint64_t min_output_buffer_size = 0; |
| uint32_t min_output_buffer_count = 0; |
| bool is_secure_output = false; |
| bool is_secure_input = false; |
| bool lax_mode = false; |
| // if set, is called to emit each frame in i420 format + timestamp |
| // info. |
| EmitFrame emit_frame; |
| const UseVideoDecoderTestParams* test_params = nullptr; |
| }; |
| // use_h264_decoder() |
| // |
| // If anything goes wrong, exit(-1) is used directly (until we have any reason |
| // to do otherwise). |
| // |
| // On success, the return value is the sha256 of the output data. This is |
| // intended as a golden-file value when this function is used as part of a test. |
| // This sha256 value accounts for all the output payload data and also the |
| // output format parameters. When the same input file is decoded we expect the |
| // sha256 to be the same. |
| // |
| void use_h264_decoder(UseVideoDecoderParams params); |
| |
| // The same as use_h264_decoder, but for a VP9 file wrapped in an IVF container. |
| void use_vp9_decoder(UseVideoDecoderParams params); |
| |
| // MJPEG file is a series of JPEG images |
| void use_mjpeg_decoder(UseVideoDecoderParams params); |
| |
| // Common function pointer type shared by use_h264_decoder, use_vp9_decoder, use_mjpeg_decoder. |
| typedef void (*UseVideoDecoderFunction)(UseVideoDecoderParams params); |
| |
| #endif // SRC_MEDIA_CODEC_EXAMPLES_USE_MEDIA_DECODER_USE_VIDEO_DECODER_H_ |