[amlogic-decoder][h264] Fix DPB size calculation

We need to be more careful when trying to fix up the DPB size in the
face of invalid inputs, as otherwise we generate invalid values and hit
some asserts. Also some inputs in tests were hanging flakily because
there weren't enough buffers, so increase the limit in those cases and
add a new test to ensure we always test the minimum frame count.

Bug: 41492

Change-Id: I629654e7def288ad625892689b5c5a61b83dcf69
diff --git a/garnet/drivers/video/amlogic-decoder/h264_decoder.cc b/garnet/drivers/video/amlogic-decoder/h264_decoder.cc
index 7a0d7fe..5fff839 100644
--- a/garnet/drivers/video/amlogic-decoder/h264_decoder.cc
+++ b/garnet/drivers/video/amlogic-decoder/h264_decoder.cc
@@ -468,9 +468,9 @@
 
   uint32_t actual_dpb_size = frame_count;
   ZX_DEBUG_ASSERT(actual_dpb_size <= kMaxActualDPBSize);
-  ZX_DEBUG_ASSERT(next_max_reference_size_ <= next_max_dpb_size_);
+  ZX_DEBUG_ASSERT(next_mv_buffer_count_ <= next_max_dpb_size_ + 1);
   uint32_t av_scratch0 =
-      (next_max_reference_size_ << 24) | (actual_dpb_size << 16) | (next_max_dpb_size_ << 8);
+      (next_mv_buffer_count_ << 24) | (actual_dpb_size << 16) | (next_max_dpb_size_ << 8);
   AvScratch0::Get().FromValue(av_scratch0).WriteTo(owner_->dosbus());
 
   state_ = DecoderState::kRunning;
@@ -577,42 +577,60 @@
 
   uint32_t max_dpb_size = GetMaxDpbSize(level_idc, mb_width, mb_height);
   if (max_dpb_size == 0) {
-    LOG(WARN, "mb_width and/or mb_height invalid? - mb_width: %u mb_height: %u", mb_width,
-        mb_height);
-    max_dpb_size = kMaxActualDPBSize;
+    LOG(WARN,
+        "level_idc, mb_width and/or mb_height invalid? - level_idc: %u mb_width: %u mb_height: %u",
+        level_idc, mb_width, mb_height);
+    return ZX_ERR_INTERNAL;
   }
   // GetMaxDpbSize() returns max 16, but kMaxActualDPBSize is 24.
   ZX_DEBUG_ASSERT(max_dpb_size < kMaxActualDPBSize);
 
-  // The firmware says how many reference frames are needed by this stream.  We don't expect the
-  // firmware to mess this up, but warn if it does, and try to fixup.  If fixup is actually
-  // triggered, it won't necessarily help.
+  // |max_reference_size| comes directly from max_num_ref_frames from the bitstream. Fix it up at
+  // least enough to avoid crashes, but if this value is invalid it's possible anything else in the
+  // bitstream could be broken.
   uint32_t max_reference_size = stream_info.max_reference_size();
-  if (max_reference_size > kMaxActualDPBSize - 1) {
-    LOG(WARN, "max_reference_size is too large - clamping - max_reference_size: %u",
-        max_reference_size);
-    max_reference_size = kMaxActualDPBSize - 1;
+  if (max_reference_size > max_dpb_size) {
+    LOG(WARN,
+        "max_reference_size is too large - clamping - max_reference_size: %u max_dpb_size: %u",
+        max_reference_size, max_dpb_size);
+    max_reference_size = max_dpb_size;
   } else if (max_reference_size == 0) {
+    // This is technically permissible by the h.264 spec, but still try to increase it to avoid
+    // issues.
     LOG(WARN, "max_reference_size is zero - unexpected - using default: %u", max_dpb_size);
-    max_reference_size = max_dpb_size - 1;
+    max_reference_size = max_dpb_size;
   }
 
   // The HW decoder / firmware seems to require several extra frames or it won't continue decoding
-  // frames. TODO(fxb/43085): Verify that min_buffer_count_for_camping (as opposed to
-  // min_buffer_count) can't be reduced.
+  // frames. TODO(fxb/43085): Verify whether min_buffer_count_for_camping (as opposed to
+  // min_buffer_count) can be reduced to max_dpb_size + 1, which is what you would expect based on
+  // max_num_reorder_frames from the h.264 spec.
   constexpr uint32_t kDbpSizeAdj = 6;
-  // Use kDpbSizeAdj should include the frame that's decoded into, so we don't need to add 1 extra
-  // here.
-  uint32_t min_buffer_count = max_reference_size + kDbpSizeAdj;
-  min_buffer_count = std::min(min_buffer_count, kMaxActualDPBSize);
+  // Seems needed for decoding bear.h264, but unclear why.
+  constexpr uint32_t kAbsoluteMinBufferCount = 10u;
+  // Technically the max we should need to camp on to decode is max_dpb_size + 1. That's because a
+  // frame is guaranteed to be output when the DPB is full and the hardware tries to insert the
+  // newly-decoding frame into it. That's also the minimum because until the DPB is full we don't
+  // know which frame should be output first (except in certain special cases like IDR frames or SEI
+  // data reducing the limit).
+  // In practice the firmware won't necessarily output frames immediately, so we need to add on some
+  // slack. |max_reference_size| + 6 is what the linux driver does in low memory situations, so it
+  // should normally work. However, when max_dpb_size and max_reference_size are very low (like in
+  // bear.h264) that isn't always enough for the firmware, so we require at least 10.
+  uint32_t min_buffer_count = std::max(std::max(max_reference_size + kDbpSizeAdj, max_dpb_size + 1),
+                                       kAbsoluteMinBufferCount);
+  ZX_DEBUG_ASSERT(min_buffer_count < kMaxActualDPBSize);
 
-  // Add 1 because firmware may expect this.
-  max_reference_size++;
+  // These we pass back to the firmware later, having computed/adjusted as above.
+  next_max_dpb_size_ = max_dpb_size;
+  // We need to store reference MVs for all active reference frames, as well as 1 extra for the
+  // frame that's being decoded into (in case it later becomes a reference frame).
+  next_mv_buffer_count_ = max_reference_size + 1;
 
   // Rounding to 4 macroblocks is for matching the linux driver, in case the
   // hardware happens to round up as well.
-  uint32_t mv_buffer_size =
-      fbl::round_up(mb_height, 4u) * fbl::round_up(mb_width, 4u) * mb_mv_byte * max_reference_size;
+  uint32_t mv_buffer_size = fbl::round_up(mb_height, 4u) * fbl::round_up(mb_width, 4u) *
+                            mb_mv_byte * next_mv_buffer_count_;
   uint32_t mv_buffer_alloc_size = fbl::round_up(mv_buffer_size, ZX_PAGE_SIZE);
 
   auto create_result = InternalBuffer::Create("H264ReferenceMvs", &owner_->SysmemAllocatorSyncPtr(),
@@ -687,10 +705,6 @@
     }
   }
 
-  // These we pass back to the firmware later, having computed/adjusted as above.
-  next_max_reference_size_ = max_reference_size;
-  next_max_dpb_size_ = max_dpb_size;
-
   // The actual # of buffers is determined by sysmem, but constrainted by "max_dpb_size" as the min
   // # of needed buffers for reference and re-ordering purposes, not counting decode-into buffer.
   // The "max" means the max the stream might require, so that's actually the min # of buffers we
diff --git a/garnet/drivers/video/amlogic-decoder/h264_decoder.h b/garnet/drivers/video/amlogic-decoder/h264_decoder.h
index f29d294..6cebdab 100644
--- a/garnet/drivers/video/amlogic-decoder/h264_decoder.h
+++ b/garnet/drivers/video/amlogic-decoder/h264_decoder.h
@@ -64,10 +64,10 @@
   DecoderState state_ = DecoderState::kRunning;
 
   // These are set in InitializeFrames for use in InitializedFrames.
-  // next_max_reference_size_ and next_max_dpb_size_ are specified to the firmware, along with the
+  // next_mv_buffer_count_ and next_max_dpb_size_ are specified to the firmware, along with the
   // actual number of frames.  It's not immediately clear why/whether the firmware actually needs
   // these in addition to actual number of frames.
-  uint32_t next_max_reference_size_ = 0;
+  uint32_t next_mv_buffer_count_ = 0;
   uint32_t next_max_dpb_size_ = 0;
   uint32_t display_width_ = 0;
   uint32_t display_height_ = 0;
diff --git a/garnet/drivers/video/amlogic-decoder/tests/integration/test_frame_allocator.h b/garnet/drivers/video/amlogic-decoder/tests/integration/test_frame_allocator.h
index 4f250c2..e031814 100644
--- a/garnet/drivers/video/amlogic-decoder/tests/integration/test_frame_allocator.h
+++ b/garnet/drivers/video/amlogic-decoder/tests/integration/test_frame_allocator.h
@@ -25,6 +25,8 @@
 
   void set_decoder(VideoDecoder* decoder) { decoder_ = decoder; }
 
+  void set_use_minimum_frame_count(bool use_minimum) { use_minimum_frame_count_ = use_minimum; }
+
   zx_status_t InitializeFrames(zx::bti bti, uint32_t min_frame_count, uint32_t max_frame_count,
                                uint32_t coded_width, uint32_t coded_height, uint32_t stride,
                                uint32_t display_width, uint32_t display_height, bool has_sar,
@@ -39,7 +41,8 @@
       uint32_t frame_vmo_bytes = coded_height * stride * 3 / 2;
       std::uniform_int_distribution<uint32_t> frame_count_distribution(min_frame_count,
                                                                        max_frame_count);
-      uint32_t frame_count = frame_count_distribution(prng_);
+      uint32_t frame_count =
+          use_minimum_frame_count_ ? min_frame_count : frame_count_distribution(prng_);
       LOG(INFO, "AllocateFrames() - frame_count: %u min_frame_count: %u max_frame_count: %u",
           frame_count, min_frame_count, max_frame_count);
       for (uint32_t i = 0; i < frame_count; i++) {
@@ -79,6 +82,7 @@
   uint64_t next_non_codec_buffer_lifetime_ordinal_ = 1;
   std::random_device rd_;
   std::mt19937 prng_;
+  bool use_minimum_frame_count_ = false;
 };
 
 #endif  // GARNET_DRIVERS_VIDEO_AMLOGIC_DECODER_TESTS_INTEGRATION_TEST_FRAME_ALLOCATOR_H_
diff --git a/garnet/drivers/video/amlogic-decoder/tests/integration/test_h264.cc b/garnet/drivers/video/amlogic-decoder/tests/integration/test_h264.cc
index bdf7d4f..365cc941 100644
--- a/garnet/drivers/video/amlogic-decoder/tests/integration/test_h264.cc
+++ b/garnet/drivers/video/amlogic-decoder/tests/integration/test_h264.cc
@@ -59,10 +59,11 @@
 
 class TestH264 {
  public:
-  static void Decode(bool use_parser) {
+  static void Decode(bool use_parser, bool use_minimum_frame_count) {
     auto video = std::make_unique<AmlogicVideo>();
     ASSERT_TRUE(video);
     TestFrameAllocator client(video.get());
+    client.set_use_minimum_frame_count(use_minimum_frame_count);
 
     auto bear_h264 = TestSupport::LoadFirmwareFile("video_test_data/bear.h264");
     ASSERT_NE(nullptr, bear_h264);
@@ -316,7 +317,7 @@
     video.reset();
   }
 
-  static void DecodeMalformed(uint64_t location, uint8_t value) {
+  static void DecodeMalformed(uint64_t location, uint8_t value, bool enforce_no_frames) {
     auto video = std::make_unique<AmlogicVideo>();
     ASSERT_TRUE(video);
     TestFrameAllocator client(video.get());
@@ -368,8 +369,10 @@
     video->parser()->CancelParsing();
 
     std::this_thread::sleep_for(std::chrono::milliseconds(20));
-    // No frames should be returned because the error happened too early.
-    EXPECT_EQ(0u, frame_count);
+
+    if (enforce_no_frames) {
+      EXPECT_EQ(0u, frame_count);
+    }
 
     video.reset();
   }
@@ -382,9 +385,11 @@
   }
 };
 
-TEST(H264, Decode) { TestH264::Decode(true); }
+TEST(H264, Decode) { TestH264::Decode(true, /*use_minimum_frame_count=*/false); }
 
-TEST(H264, DecodeNoParser) { TestH264::Decode(false); }
+TEST(H264, DecodeMinimumFrames) { TestH264::Decode(true, /*use_minimum_frame_count=*/true); }
+
+TEST(H264, DecodeNoParser) { TestH264::Decode(false, /*use_minimum_frame_count=*/false); }
 
 TEST(H264, DelayedReturn) { TestH264::DelayedReturn(); }
 
@@ -394,11 +399,21 @@
 
 TEST(H264, DecodeMalformedHang) {
   // Parameters found through fuzzing.
-  TestH264::DecodeMalformed(638, 44);
+  TestH264::DecodeMalformed(638, 44, true);
 }
 
 TEST(H264, DecodeMalformedTooLarge) {
   // Parameters found through fuzzing - causes mb_width=3 and total_mbs=4986, so the height is
   // calculated as 26592 pixels.
-  TestH264::DecodeMalformed(593, 176);
+  TestH264::DecodeMalformed(593, 176, true);
+}
+
+TEST(H264, DecodeMalformedBadDPB) {
+  // Parameters found through fuzzing. Gives an invalid level_idc.
+  TestH264::DecodeMalformed(16016, 199, false);
+}
+
+TEST(H264, DecodeMalformedBadReferenceCount) {
+  // Parameters found through fuzzing. Gives an invalid number of reference frames.
+  TestH264::DecodeMalformed(591, 141, false);
 }