[mediaplayer] fix issues preventing opus playback

This CL addresses the following issues, which otherwise prevent
opus playback from working:
1) The opus decoder produces interleaved output (in constrast to most
   ffmpeg audio decoders), but it allocates buffers 6 times as large
   as the generated packet (AVFrame, in ffmpeg terminology). This
   creates a situation in which the VMO used by the audio renderer is
   quickly exhausted. The ffmpeg audio decoder wrapper was modified
   to allocate system memory for the decoder and then copy only the
   relevant part of the payload buffer into a buffer allocated from
   the provided allocator (the one managing the renderer's VMO, in
   this case).
2) ffmpeg was complaining that the codec context's pkt_timebase field
   was not set. Though this had no audible effect, this CL fixes that
   anyway.
3) The AudioConsumer test UnsupportedCodec was using the opus codec
   designation, which is now supported. That designation was replaced
   with a nonsense string.

TEST: added an end-to-end test for opus
      $ fx test mediaplayer_tests
      The new test will fail until opus integration into the ffmpeg
      prebuilt is complete.

Bug: 50035

Change-Id: I9d854253222224490a48e497082ae24510bd4eb7
Reviewed-on: https://fuchsia-review.googlesource.com/c/fuchsia/+/380304
Commit-Queue: Dale Sather <dalesat@google.com>
Reviewed-by: Austin Foxley <afoxley@google.com>
Testability-Review: Austin Foxley <afoxley@google.com>
diff --git a/src/media/playback/mediaplayer/BUILD.gn b/src/media/playback/mediaplayer/BUILD.gn
index 5c65cbe..e433f02 100644
--- a/src/media/playback/mediaplayer/BUILD.gn
+++ b/src/media/playback/mediaplayer/BUILD.gn
@@ -220,6 +220,11 @@
               "//garnet/test_data/media/third_party/chromium_media_test_data/bear-1280x720.mp4")
       dest = "media_test_data/bear.mp4"
     },
+    {
+      path = rebase_path(
+              "//garnet/test_data/media/third_party/chromium_media_test_data/sfx-opus-441.webm")
+      dest = "media_test_data/sfx-opus-441.webm"
+    },
   ]
 }
 
diff --git a/src/media/playback/mediaplayer/ffmpeg/ffmpeg_audio_decoder.cc b/src/media/playback/mediaplayer/ffmpeg/ffmpeg_audio_decoder.cc
index 3989cf8..8b7285c 100644
--- a/src/media/playback/mediaplayer/ffmpeg/ffmpeg_audio_decoder.cc
+++ b/src/media/playback/mediaplayer/ffmpeg/ffmpeg_audio_decoder.cc
@@ -29,6 +29,18 @@
   if (av_sample_fmt_is_planar(context()->sample_fmt)) {
     // Prepare for interleaving.
     lpcm_util_ = LpcmUtil::Create(*stream_type_->audio());
+    copy_or_interleave_ = true;
+  }
+
+  // Codec-specific code goes here.
+  switch (context()->codec_id) {
+    case AV_CODEC_ID_OPUS:
+      // The opus decoder allocates buffers six times as large as the resulting payload. We need
+      // to copy the payloads, so we don't use up all the buffer space available to the renderer.
+      copy_or_interleave_ = true;
+      break;
+    default:
+      break;
   }
 }
 
@@ -48,6 +60,8 @@
   }
 
   context()->reordered_opaque = packet->discontinuity() ? 1 : 0;
+  context()->pkt_timebase.num = pts_rate().reference_delta();
+  context()->pkt_timebase.den = pts_rate().subject_delta();
 }
 
 int FfmpegAudioDecoder::BuildAVFrame(const AVCodecContext& av_codec_context, AVFrame* av_frame) {
@@ -63,10 +77,11 @@
     return buffer_size;
   }
 
-  // Get the right payload buffer. If we need to interleave later, we just get
+  // Get the right payload buffer. If we need to copy or interleave later, we just get
   // a buffer allocated using malloc. If not, we ask the stage for a buffer.
-  fbl::RefPtr<PayloadBuffer> buffer = lpcm_util_ ? PayloadBuffer::CreateWithMalloc(buffer_size)
-                                                 : AllocatePayloadBuffer(buffer_size);
+  fbl::RefPtr<PayloadBuffer> buffer = copy_or_interleave_
+                                          ? PayloadBuffer::CreateWithMalloc(buffer_size)
+                                          : AllocatePayloadBuffer(buffer_size);
 
   if (!buffer) {
     // TODO(dalesat): Record/report packet drop.
@@ -143,20 +158,24 @@
 
   uint64_t payload_size = stream_type_->audio()->min_buffer_size(av_frame.nb_samples);
 
-  if (lpcm_util_) {
-    // We need to interleave. The non-interleaved frames are in
+  if (copy_or_interleave_) {
+    // We need to copy or interleave. The original frames are in
     // |payload_buffer|, which was allocated from system memory. That buffer
     // will get released later in ReleaseBufferForAvFrame. We need a new
-    // buffer for the interleaved frames, which we get from the stage.
+    // buffer for the output payload, which we get from the stage.
     auto new_payload_buffer = AllocatePayloadBuffer(payload_size);
     if (!new_payload_buffer) {
       // TODO(dalesat): Record/report packet drop.
       return nullptr;
     }
 
-    lpcm_util_->Interleave(payload_buffer->data(),
-                           av_frame.linesize[0] * stream_type_->audio()->channels(),
-                           new_payload_buffer->data(), av_frame.nb_samples);
+    if (lpcm_util_) {
+      lpcm_util_->Interleave(payload_buffer->data(),
+                             av_frame.linesize[0] * stream_type_->audio()->channels(),
+                             new_payload_buffer->data(), av_frame.nb_samples);
+    } else {
+      memcpy(new_payload_buffer->data(), payload_buffer->data(), payload_size);
+    }
 
     // |new_payload_buffer| is the buffer we want to attach to the |Packet|.
     // This assignment drops the reference to the original |payload_buffer|, so
diff --git a/src/media/playback/mediaplayer/ffmpeg/ffmpeg_audio_decoder.h b/src/media/playback/mediaplayer/ffmpeg/ffmpeg_audio_decoder.h
index 3b006d7..915b886 100644
--- a/src/media/playback/mediaplayer/ffmpeg/ffmpeg_audio_decoder.h
+++ b/src/media/playback/mediaplayer/ffmpeg/ffmpeg_audio_decoder.h
@@ -43,6 +43,8 @@
   // it aligns on 32 sample (not byte) boundaries.
   static const int kChannelAlign = 32;
 
+  bool copy_or_interleave_ = false;
+
   // For interleaving, if needed.
   std::unique_ptr<LpcmUtil> lpcm_util_;
 
diff --git a/src/media/playback/mediaplayer/ffmpeg/ffmpeg_demux.cc b/src/media/playback/mediaplayer/ffmpeg/ffmpeg_demux.cc
index 968c31d..b4fbd3a 100644
--- a/src/media/playback/mediaplayer/ffmpeg/ffmpeg_demux.cc
+++ b/src/media/playback/mediaplayer/ffmpeg/ffmpeg_demux.cc
@@ -402,6 +402,10 @@
   }
 
   ffmpeg::AvPacketPtr av_packet = ffmpeg::AvPacket::Create();
+  if (av_packet->side_data) {
+    FX_LOGS(WARNING) << "ON CREATE, av_packet->side_data 0x" << std::hex
+                     << reinterpret_cast<uintptr_t>(av_packet->side_data);
+  }
 
   av_packet->data = nullptr;
   av_packet->size = 0;
@@ -415,9 +419,22 @@
   *stream_index_out = static_cast<size_t>(av_packet->stream_index);
   // TODO(dalesat): What if the packet has no PTS or duration?
   next_pts_ = av_packet->pts + av_packet->duration;
-  // TODO(dalesat): Implement packet side data.
-  FX_DCHECK(av_packet->side_data == nullptr) << "side data not implemented";
-  FX_DCHECK(av_packet->side_data_elems == 0);
+
+  if (av_packet->side_data) {
+    FX_DCHECK(av_packet->side_data_elems > 0);
+    auto side_data = av_packet->side_data;
+    for (int i = 0; i < av_packet->side_data_elems; ++i, ++side_data) {
+      switch (side_data->type) {
+        case AV_PKT_DATA_SKIP_SAMPLES:
+          // TODO(dalesat): Implement sample skipping.
+          break;
+        default:
+          // TODO(dalesat): Handle more side-data types.
+          FX_DCHECK(false) << "Unhandled side data type " << side_data->type;
+          break;
+      }
+    }
+  }
 
   int64_t pts = (av_packet->pts == AV_NOPTS_VALUE) ? Packet::kNoPts : av_packet->pts;
   bool keyframe = av_packet->flags & AV_PKT_FLAG_KEY;
diff --git a/src/media/playback/mediaplayer/test/fakes/fake_audio_renderer.cc b/src/media/playback/mediaplayer/test/fakes/fake_audio_renderer.cc
index be95f9c..90b6857 100644
--- a/src/media/playback/mediaplayer/test/fakes/fake_audio_renderer.cc
+++ b/src/media/playback/mediaplayer/test/fakes/fake_audio_renderer.cc
@@ -64,23 +64,19 @@
               << std::dec << " },\n";
   }
 
-  if (!expected_packets_info_.empty()) {
-    if (expected_packets_info_iter_ == expected_packets_info_.end()) {
-      FX_LOGS(ERROR) << "packet supplied after expected packets";
-      expected_ = false;
+  if (!packet_expecters_.empty()) {
+    bool expecter_ok = false;
+    
+    for (auto& expecter : packet_expecters_) {
+      if (expecter.IsExpected(packet, vmo_mapper_.start())) {
+        expecter_ok = true;
+      }
     }
 
-    if (expected_packets_info_iter_->pts() != packet.pts ||
-        expected_packets_info_iter_->size() != packet.payload_size ||
-        expected_packets_info_iter_->hash() !=
-            PacketInfo::Hash(
-                reinterpret_cast<uint8_t*>(vmo_mapper_.start()) + packet.payload_offset,
-                packet.payload_size)) {
+    if (!expecter_ok) {
       FX_LOGS(ERROR) << "supplied packet doesn't match expected packet info";
       expected_ = false;
     }
-
-    ++expected_packets_info_iter_;
   }
 
   packet_queue_.push(std::make_pair(packet, std::move(callback)));
@@ -210,5 +206,24 @@
       zx::time(reference_time));
 }
 
+FakeAudioRenderer::PacketExpecter::PacketExpecter(const std::vector<PacketInfo>&& info)
+    : info_(std::move(info)), iter_(info_.begin()) {}
+
+bool FakeAudioRenderer::PacketExpecter::IsExpected(const fuchsia::media::StreamPacket& packet,
+                                                   const void* start) {
+  if (iter_ == info_.end()) {
+    return false;
+  }
+
+  if (iter_->pts() != packet.pts || iter_->size() != packet.payload_size ||
+      iter_->hash() !=
+          PacketInfo::Hash(reinterpret_cast<const uint8_t*>(start) + packet.payload_offset,
+                           packet.payload_size)) {
+  }
+
+  ++iter_;
+  return true;
+}
+
 }  // namespace test
 }  // namespace media_player
diff --git a/src/media/playback/mediaplayer/test/fakes/fake_audio_renderer.h b/src/media/playback/mediaplayer/test/fakes/fake_audio_renderer.h
index d80ecd6..1d5bc19 100644
--- a/src/media/playback/mediaplayer/test/fakes/fake_audio_renderer.h
+++ b/src/media/playback/mediaplayer/test/fakes/fake_audio_renderer.h
@@ -39,8 +39,7 @@
   // Indicates that the renderer should verify supplied packets against the
   // indicated PacketInfos.
   void ExpectPackets(const std::vector<PacketInfo>&& expected_packets_info) {
-    expected_packets_info_ = std::move(expected_packets_info);
-    expected_packets_info_iter_ = expected_packets_info_.begin();
+    packet_expecters_.emplace_back(std::move(expected_packets_info));
   }
 
   // Returns true if everything has gone as expected so far.
@@ -50,10 +49,19 @@
       return false;
     }
 
-    if (!expected_packets_info_.empty() &&
-        expected_packets_info_iter_ != expected_packets_info_.end()) {
-      FX_LOGS(ERROR) << "Expected packets did not arrive.";
-      return false;
+    if (!packet_expecters_.empty()) {
+      bool expecter_done = false;
+      for (auto& expecter : packet_expecters_) {
+        if (expecter.done()) {
+          expecter_done = true;
+          break;
+        }
+      }
+
+      if (!expecter_done) {
+        FX_LOGS(ERROR) << "Expected packets did not arrive.";
+        return false;
+      }
     }
 
     if ((delay_packet_retirement_pts_ != fuchsia::media::NO_TIMESTAMP) && !packet_queue_.empty()) {
@@ -123,6 +131,19 @@
   void SetMute(bool muted) override;
 
  private:
+  class PacketExpecter {
+   public:
+    PacketExpecter(const std::vector<PacketInfo>&& info);
+
+    bool IsExpected(const fuchsia::media::StreamPacket& packet, const void* start);
+
+    bool done() const { return iter_ == info_.end(); }
+
+   private:
+    std::vector<PacketInfo> info_;
+    std::vector<PacketInfo>::iterator iter_;
+  };
+
   // Determines if we care currently playing.
   bool progressing() { return timeline_function_.invertible(); }
 
@@ -151,8 +172,7 @@
 
   bool dump_packets_ = false;
   uint64_t packets_received_;
-  std::vector<PacketInfo> expected_packets_info_;
-  std::vector<PacketInfo>::iterator expected_packets_info_iter_;
+  std::vector<PacketExpecter> packet_expecters_;
 
   std::queue<std::pair<fuchsia::media::StreamPacket, SendPacketCallback>> packet_queue_;
 
diff --git a/src/media/playback/mediaplayer/test/mediaplayer_tests.cc b/src/media/playback/mediaplayer/test/mediaplayer_tests.cc
index e07ebe1..1aef096 100644
--- a/src/media/playback/mediaplayer/test/mediaplayer_tests.cc
+++ b/src/media/playback/mediaplayer/test/mediaplayer_tests.cc
@@ -31,6 +31,7 @@
 static constexpr uint32_t kSinkFeedMaxPacketCount = 10;
 
 constexpr char kBearFilePath[] = "/pkg/data/media_test_data/bear.mp4";
+constexpr char kOpusFilePath[] = "/pkg/data/media_test_data/sfx-opus-441.webm";
 
 // Base class for mediaplayer tests.
 class MediaPlayerTests : public sys::testing::TestWithEnvironment {
@@ -408,9 +409,70 @@
 
 // Play a real A/V file from beginning to end.
 TEST_F(MediaPlayerTests, PlayBear) {
-  // TODO(dalesat): Use ExpectPackets for audio.
-  // This doesn't currently work, because the decoder behaves differently on
-  // different targets.
+  // Previously, we were getting different audio packets for arm64 vs x64. This doesn't appear
+  // to be happening anymore, but in case it recurs, we dump packets here. The output can be used
+  // to create a second list of packets to expect.
+  fake_audio_.renderer().DumpPackets();
+  fake_audio_.renderer().ExpectPackets(
+      {{1024, 8192, 0x0a68b3995a50a648},   {2048, 8192, 0x93bf522ee77e9d50},
+       {3072, 8192, 0x89cc3bcedd6034be},   {4096, 8192, 0x40931af9f379dd00},
+       {5120, 8192, 0x79dc4cfe61738988},   {6144, 8192, 0x2c831d823db62908},
+       {7168, 8192, 0x71561155059a2950},   {8192, 8192, 0x4581449f2e040ff0},
+       {9216, 8192, 0xb0429eeed8b7424e},   {10240, 8192, 0x5e7007ebe169fcc0},
+       {11264, 8192, 0x585fe50f30788fd8},  {12288, 8192, 0x7cba92a4ecaf59a2},
+       {13312, 8192, 0x8521ccbccc4d771e},  {14336, 8192, 0x5694e56b0fd93cc8},
+       {15360, 8192, 0x14abced62917c788},  {16384, 8192, 0x8e7f3918fa412a02},
+       {17408, 8192, 0xf095ec04d2238644},  {18432, 8192, 0x886cab3f4e3f9610},
+       {19456, 8192, 0x874a3d8d0f4e2190},  {20480, 8192, 0x1f70d5763dadf9ac},
+       {21504, 8192, 0x2619ff3221cbab46},  {22528, 8192, 0x33aa3594808f6b10},
+       {23552, 8192, 0x2da9b93cacd110a4},  {24576, 8192, 0x2f0def95d105b68c},
+       {25600, 8192, 0xef9acc73b96291c4},  {26624, 8192, 0xca8ed12c8f4b7b06},
+       {27648, 8192, 0x0ea5eddd4cc5e3bc},  {28672, 8192, 0xafe4007e4779438e},
+       {29696, 8192, 0xcefebc7fe3257f9e},  {30720, 8192, 0x4294978d0dc213ee},
+       {31744, 8192, 0x53ca41b8a5175774},  {32768, 8192, 0x9a16b082e9e5a95e},
+       {33792, 8192, 0x1a849b5e1f4ee80a},  {34816, 8192, 0xd1741d4e44972fea},
+       {35840, 8192, 0x7ecf5a82a4adf9a6},  {36864, 8192, 0x2878988793205f22},
+       {37888, 8192, 0x35a41b25f24ec2b8},  {38912, 8192, 0x2714de582b48ebc6},
+       {39936, 8192, 0xc8fdea128f0285f4},  {40960, 8192, 0xc5ab19b2405542ca},
+       {41984, 8192, 0x5d5d781722ba0392},  {43008, 8192, 0x02fe263969ba81a6},
+       {44032, 8192, 0x1acc5b7c24d197d4},  {45056, 8192, 0x18d713e058acfec8},
+       {46080, 8192, 0x83573b4a6f02c8da},  {47104, 8192, 0xacffcaaff833e850},
+       {48128, 8192, 0xa0cffe3e485c46c4},  {49152, 8192, 0xffd5680f78b7f7a2},
+       {50176, 8192, 0xc950e93a5272cda8},  {51200, 8192, 0x375e4dc1dc28eea4},
+       {52224, 8192, 0x5648dd0ed9d9d9d4},  {53248, 8192, 0xac945623bf04f5b6},
+       {54272, 8192, 0x3cff2936986fcdc8},  {55296, 8192, 0xbc049d18bdcca182},
+       {56320, 8192, 0x8d3646f2e29da29c},  {57344, 8192, 0xb5e72da09cd9f5b4},
+       {58368, 8192, 0x8597406852caa548},  {59392, 8192, 0x5221d69a113d9688},
+       {60416, 8192, 0xc4c0bdef8e07fb12},  {61440, 8192, 0x804e43c36110196e},
+       {62464, 8192, 0xd1d3ae38126dd618},  {63488, 8192, 0x846d01cfa3be6500},
+       {64512, 8192, 0xecca760a67eff43a},  {65536, 8192, 0x6624720182df5730},
+       {66560, 8192, 0x41eb3d61d94b2224},  {67584, 8192, 0x015efd07043b4e4c},
+       {68608, 8192, 0x2d4d9823e0e63b64},  {69632, 8192, 0xd5a845cbf966e23a},
+       {70656, 8192, 0x24c6ccf454693f72},  {71680, 8192, 0x368bea38398d5ecc},
+       {72704, 8192, 0x3602a6b0602a9458},  {73728, 8192, 0x48ea44911825e784},
+       {74752, 8192, 0x53e549d74eb26de0},  {75776, 8192, 0x3f7f7f5c7ee3d14e},
+       {76800, 8192, 0xdcafb6baa55625f6},  {77824, 8192, 0x472b007f3bc3c45c},
+       {78848, 8192, 0x53a8ecc580fff982},  {79872, 8192, 0xf59a57769900ca62},
+       {80896, 8192, 0xcc380147f73a1528},  {81920, 8192, 0x4f4b79f5ad21e67e},
+       {82944, 8192, 0xcee2192004c8066c},  {83968, 8192, 0x84672c98f8a1da4c},
+       {84992, 8192, 0x229246edd7b6c31c},  {86016, 8192, 0x3f3f4d7f8fcd62b4},
+       {87040, 8192, 0x46bc2a4e9e6d40ca},  {88064, 8192, 0xa6901df8e4afcc48},
+       {89088, 8192, 0x8e96017b64980fd8},  {90112, 8192, 0xdd9001f337c6a932},
+       {91136, 8192, 0xac5913cdd15b8a72},  {92160, 8192, 0xd9d59a367d561d4c},
+       {93184, 8192, 0xa76421aaa4b469c8},  {94208, 8192, 0x2e27a33a898c0056},
+       {95232, 8192, 0xb71592d727280bc0},  {96256, 8192, 0xb73b2e5a682cbf60},
+       {97280, 8192, 0x36d9f03861277c10},  {98304, 8192, 0xffa1d33f4aea2e40},
+       {99328, 8192, 0x4359627a59f6552e},  {100352, 8192, 0x82a76e3c810aee68},
+       {101376, 8192, 0x60066a5773c5dee2}, {102400, 8192, 0x809989d272e85654},
+       {103424, 8192, 0xd1cdd52e37d58702}, {104448, 8192, 0xe332d1115653f36c},
+       {105472, 8192, 0xa1189ac1a76c3bd0}, {106496, 8192, 0xaa20304ceb8e6daa},
+       {107520, 8192, 0x913ac8dcdc5cef52}, {108544, 8192, 0x891883b9326cd0f4},
+       {109568, 8192, 0xe8fbce45cf3990a4}, {110592, 8192, 0xc9301a9ef899455c},
+       {111616, 8192, 0x56cd5306b56e027a}, {112640, 8192, 0x5a1b088bce12b0f8},
+       {113664, 8192, 0xc697191375e99274}, {114688, 8192, 0x4d0f0798a59771c4},
+       {115712, 8192, 0x6571a4ff90e63490}, {116736, 8192, 0x20ffb62fff517f00},
+       {117760, 8192, 0x20ffb62fff517f00}, {118784, 8192, 0x20ffb62fff517f00},
+       {119808, 8192, 0x20ffb62fff517f00}, {120832, 8192, 0x20ffb62fff517f00}});
 
   fake_scenic_.session().SetExpectations(
       1,
@@ -478,6 +540,46 @@
   EXPECT_TRUE(fake_scenic_.session().expected());
 }
 
+// Play an opus file from beginning to end.
+TEST_F(MediaPlayerTests, PlayOpus) {
+  // The decoder works a bit differently on x64 vs arm64, hence the two lists here.
+  fake_audio_.renderer().ExpectPackets({{-336, 1296, 0x47ff30edd64831d6},
+                                        {312, 1920, 0xcc4016bbb348e52b},
+                                        {1272, 1920, 0xe54a89514c636028},
+                                        {2232, 1920, 0x8ef31ce86009d7da},
+                                        {3192, 1920, 0x36490fe70ca3bb81},
+                                        {4152, 1920, 0x4a8bdd8e9c2f42bb},
+                                        {5112, 1920, 0xbc8cea1839f0299e},
+                                        {6072, 1920, 0x868a68451d7ab814},
+                                        {7032, 1920, 0x84ac9b11a685a9a9},
+                                        {7992, 1920, 0xe4359c110afe8adb},
+                                        {8952, 1920, 0x2092c7fbf2ff0f0c},
+                                        {9912, 1920, 0x8002d77665736d63},
+                                        {10872, 1920, 0x541b415fbdc7b268},
+                                        {11832, 1920, 0xe81ef757a5953573},
+                                        {12792, 1920, 0xbc70aba0ed44f7dc}});
+  fake_audio_.renderer().ExpectPackets({{-336, 1296, 0xbf1f56243e245a2c},
+                                        {312, 1920, 0x670e69ee3076c4b2},
+                                        {1272, 1920, 0xe0667e312e65207d},
+                                        {2232, 1920, 0x291ffa6baf5dd2b1},
+                                        {3192, 1920, 0x1b408d840e27bcc1},
+                                        {4152, 1920, 0xdbf5034a75bc761b},
+                                        {5112, 1920, 0x46fa968eb705415b},
+                                        {7032, 1920, 0x7256d4c58d7afe56},
+                                        {7992, 1920, 0xb2a7bc50ce80c898},
+                                        {8952, 1920, 0xb314415fd9c3a694},
+                                        {9912, 1920, 0x34d9ce067ffacc37},
+                                        {11832, 1920, 0x05fd64442f53c5cc},
+                                        {12792, 1920, 0x3e2a98426c8680d0}});
+
+  commands_.SetFile(kOpusFilePath);
+  commands_.Play();
+  QuitOnEndOfStream();
+
+  Execute();
+  EXPECT_TRUE(fake_audio_.renderer().expected());
+}
+
 // Play a real A/V file from beginning to end, retaining audio packets. This
 // tests the ability of the player to handle the case in which the audio
 // renderer is holding on to packets for too long.