Fix FFmpeg errors when using the newer FFmpeg decoding API. (#2683)

The newer API requires decoding to be done in two stages: 1. send the packet to FFmpeg, and 2. receive 0 or more decoded frames from FFmpeg. Test: Served a webpage locally via flask. The webpage contained a single audio or video element. I then ran, e.g.: cobalt --url=http://127.0.0.1:5000/mp3 --enable_demuxer_extension and played the content (e.g. `document.querySelector('audio').play()` from the JS inspector) Previously, avcodec_receive_frame would repeatedly fail with AVERROR(EAGAIN), because no packets had been pushed. Bug: b/330908454 Change-Id: I245d74f880ed9297dca35e8c548f482373077121
youtube · Mar 25, 2024 · 999de53 · 999de53
1 parent a652624
commit 999de53
Show file tree

Hide file tree

Showing 7 changed files with 160 additions and 69 deletions.
diff --git a/starboard/shared/ffmpeg/ffmpeg_audio_decoder_impl.cc b/starboard/shared/ffmpeg/ffmpeg_audio_decoder_impl.cc
@@ -17,6 +17,8 @@
 
 #include "starboard/shared/ffmpeg/ffmpeg_audio_decoder_impl.h"
 
+#include <string>
+
 #include "starboard/audio_sink.h"
 #include "starboard/common/log.h"
 #include "starboard/common/string.h"
@@ -160,18 +162,13 @@ void AudioDecoderImpl<FFMPEG>::Decode(const InputBuffers& input_buffers,
 #if LIBAVUTIL_VERSION_INT < LIBAVUTIL_VERSION_52_8
   ffmpeg_->avcodec_get_frame_defaults(av_frame_);
 #endif  // LIBAVUTIL_VERSION_INT < LIBAVUTIL_VERSION_52_8
-  int frame_decoded = 0;
 
   int result = 0;
   if (ffmpeg_->avcodec_version() < kAVCodecHasUniformDecodeAPI) {
+    int frame_decoded = 0;
     result = ffmpeg_->avcodec_decode_audio4(codec_context_, av_frame_,
                                             &frame_decoded, &packet);
-  } else {
-    result = ffmpeg_->avcodec_receive_frame(codec_context_, av_frame_);
-  }
-
-  if (result != input_buffer->size()) {
-    if (ffmpeg_->avcodec_version() < kAVCodecHasUniformDecodeAPI) {
+    if (result != input_buffer->size()) {
       // TODO: Consider fill it with silence.
       SB_DLOG(WARNING) << "avcodec_decode_audio4() failed with result: "
                        << result
@@ -180,60 +177,93 @@ void AudioDecoderImpl<FFMPEG>::Decode(const InputBuffers& input_buffers,
       error_cb_(kSbPlayerErrorDecode,
                 FormatString("avcodec_decode_audio4() failed with result %d.",
                              result));
-    } else {
-      SB_DLOG(WARNING) << "avcodec_receive_frame() failed with result: "
-                       << result
-                       << " with input buffer size: " << input_buffer->size();
-      error_cb_(kSbPlayerErrorDecode,
-                FormatString("avcodec_receive_frame() failed with result %d.",
-                             result));
+      return;
+    }
+
+    if (frame_decoded != 1) {
+      // TODO: Adjust timestamp accordingly when decoding result is shifted.
+      SB_DCHECK(frame_decoded == 0);
+      SB_DLOG(WARNING) << "avcodec_decode_audio4()/avcodec_receive_frame() "
+                          "returns with 0 frames decoded";
+      return;
     }
+
+    ProcessDecodedFrame(*input_buffer, *av_frame_);
     return;
   }
 
-  if (frame_decoded != 1) {
-    // TODO: Adjust timestamp accordingly when decoding result is shifted.
-    SB_DCHECK(frame_decoded == 0);
-    SB_DLOG(WARNING) << "avcodec_decode_audio4()/avcodec_receive_frame() "
-                        "returns with 0 frames decoded";
+  // Newer decode API.
+  const int send_packet_result =
+      ffmpeg_->avcodec_send_packet(codec_context_, &packet);
+  if (send_packet_result != 0) {
+    const std::string error_message = FormatString(
+        "avcodec_send_packet() failed with result %d.", send_packet_result);
+    SB_DLOG(WARNING) << error_message;
+    error_cb_(kSbPlayerErrorDecode, error_message);
     return;
   }
 
+  // Keep receiving frames until the decoder has processed the entire packet.
+  for (;;) {
+    result = ffmpeg_->avcodec_receive_frame(codec_context_, av_frame_);
+    if (result != 0) {
+      // We either hit an error or are done processing packet.
+      break;
+    }
+    ProcessDecodedFrame(*input_buffer, *av_frame_);
+  }
+
+  // A return value of AVERROR(EAGAIN) signifies that the decoder needs
+  // another packet, so we are done processing the existing packet at that
+  // point.
+  if (result != AVERROR(EAGAIN)) {
+    SB_DLOG(WARNING) << "avcodec_receive_frame() failed with result: "
+                     << result;
+    error_cb_(
+        kSbPlayerErrorDecode,
+        FormatString("avcodec_receive_frame() failed with result %d.", result));
+  }
+}
+
+void AudioDecoderImpl<FFMPEG>::ProcessDecodedFrame(
+    const InputBuffer& input_buffer,
+    const AVFrame& av_frame) {
   int decoded_audio_size = ffmpeg_->av_samples_get_buffer_size(
-      NULL, codec_context_->channels, av_frame_->nb_samples,
+      NULL, codec_context_->channels, av_frame.nb_samples,
       codec_context_->sample_fmt, 1);
   audio_stream_info_.samples_per_second = codec_context_->sample_rate;
 
-  if (decoded_audio_size > 0) {
-    scoped_refptr<DecodedAudio> decoded_audio = new DecodedAudio(
-        codec_context_->channels, GetSampleType(), GetStorageType(),
-        input_buffer->timestamp(),
-        codec_context_->channels * av_frame_->nb_samples *
-            starboard::media::GetBytesPerSample(GetSampleType()));
-    if (GetStorageType() == kSbMediaAudioFrameStorageTypeInterleaved) {
-      memcpy(decoded_audio->data(), *av_frame_->extended_data,
-             decoded_audio->size_in_bytes());
-    } else {
-      SB_DCHECK(GetStorageType() == kSbMediaAudioFrameStorageTypePlanar);
-      const int per_channel_size_in_bytes =
-          decoded_audio->size_in_bytes() / decoded_audio->channels();
-      for (int i = 0; i < decoded_audio->channels(); ++i) {
-        memcpy(decoded_audio->data() + per_channel_size_in_bytes * i,
-               av_frame_->extended_data[i], per_channel_size_in_bytes);
-      }
-      decoded_audio = decoded_audio->SwitchFormatTo(
-          GetSampleType(), kSbMediaAudioFrameStorageTypeInterleaved);
-    }
-    decoded_audio->AdjustForDiscardedDurations(
-        audio_stream_info_.samples_per_second,
-        input_buffer->audio_sample_info().discarded_duration_from_front,
-        input_buffer->audio_sample_info().discarded_duration_from_back);
-    decoded_audios_.push(decoded_audio);
-    Schedule(output_cb_);
-  } else {
+  if (decoded_audio_size <= 0) {
     // TODO: Consider fill it with silence.
     SB_LOG(ERROR) << "Decoded audio frame is empty.";
+    return;
+  }
+
+  scoped_refptr<DecodedAudio> decoded_audio = new DecodedAudio(
+      codec_context_->channels, GetSampleType(), GetStorageType(),
+      input_buffer.timestamp(),
+      codec_context_->channels * av_frame.nb_samples *
+          starboard::media::GetBytesPerSample(GetSampleType()));
+  if (GetStorageType() == kSbMediaAudioFrameStorageTypeInterleaved) {
+    memcpy(decoded_audio->data(), *av_frame.extended_data,
+           decoded_audio->size_in_bytes());
+  } else {
+    SB_DCHECK(GetStorageType() == kSbMediaAudioFrameStorageTypePlanar);
+    const int per_channel_size_in_bytes =
+        decoded_audio->size_in_bytes() / decoded_audio->channels();
+    for (int i = 0; i < decoded_audio->channels(); ++i) {
+      memcpy(decoded_audio->data() + per_channel_size_in_bytes * i,
+             av_frame.extended_data[i], per_channel_size_in_bytes);
+    }
+    decoded_audio = decoded_audio->SwitchFormatTo(
+        GetSampleType(), kSbMediaAudioFrameStorageTypeInterleaved);
   }
+  decoded_audio->AdjustForDiscardedDurations(
+      audio_stream_info_.samples_per_second,
+      input_buffer.audio_sample_info().discarded_duration_from_front,
+      input_buffer.audio_sample_info().discarded_duration_from_back);
+  decoded_audios_.push(decoded_audio);
+  Schedule(output_cb_);
 }
 
 void AudioDecoderImpl<FFMPEG>::WriteEndOfStream() {

diff --git a/starboard/shared/ffmpeg/ffmpeg_audio_decoder_impl.h b/starboard/shared/ffmpeg/ffmpeg_audio_decoder_impl.h
@@ -64,6 +64,11 @@ class AudioDecoderImpl<FFMPEG> : public AudioDecoder,
   void InitializeCodec();
   void TeardownCodec();
 
+  // Processes decoded (PCM) audio data received from FFmpeg. The audio data is
+  // ultimately enqueued in decoded_audios_.
+  void ProcessDecodedFrame(const InputBuffer& input_buffer,
+                           const AVFrame& av_frame);
+
   static const int kMaxDecodedAudiosSize = 64;
 
   FFMPEGDispatch* ffmpeg_;

diff --git a/starboard/shared/ffmpeg/ffmpeg_dispatch.h b/starboard/shared/ffmpeg/ffmpeg_dispatch.h
@@ -48,7 +48,12 @@ constexpr int kAVUtilSupportsBufferCreate = 3409920;
 // https://github.com/FFmpeg/FFmpeg/blob/70d25268c21cbee5f08304da95be1f647c630c15/doc/APIchanges#L195
 // avcodec_decode_audio4 and avcodec_decode_video2 replaced by
 // avcodec_receive_frame()
-constexpr int kAVCodecHasUniformDecodeAPI = 3940198;
+//
+// The APIs were removed in this change:
+// https://github.com/FFmpeg/FFmpeg/commit/7c1f347b184b6738abdc22fdcda40baa9f932522#diff-76418b674d0db8d5027d2e1e325dbe9b92b65b09d9f20cdd305ad14b0e46562d
+// (note the values in libavcodec/version.h)
+// AV_VERSION_INT(58, 137, 100)
+constexpr int kAVCodecHasUniformDecodeAPI = 3836260;
 
 // https://github.com/FFmpeg/FFmpeg/blob/70d25268c21cbee5f08304da95be1f647c630c15/doc/APIchanges#L86
 // no longer required
@@ -114,6 +119,7 @@ class FFMPEGDispatch {
                                AVFrame* picture,
                                int* got_picture_ptr,
                                const AVPacket* avpkt);
+  int (*avcodec_send_packet)(AVCodecContext* avctx, const AVPacket* avpkt);
   int (*avcodec_receive_frame)(AVCodecContext* avctx, AVFrame* frame);
   void (*avcodec_flush_buffers)(AVCodecContext* avctx);
   AVFrame* (*avcodec_alloc_frame)(void);

diff --git a/starboard/shared/ffmpeg/ffmpeg_dynamic_load_dispatch_impl.cc b/starboard/shared/ffmpeg/ffmpeg_dynamic_load_dispatch_impl.cc
@@ -311,6 +311,7 @@ void FFMPEGDispatchImpl::LoadSymbols() {
     INITSYMBOL(avcodec_, avcodec_decode_audio4);
     INITSYMBOL(avcodec_, avcodec_decode_video2);
   } else {
+    INITSYMBOL(avcodec_, avcodec_send_packet);
     INITSYMBOL(avcodec_, avcodec_receive_frame);
   }
 

diff --git a/starboard/shared/ffmpeg/ffmpeg_linked_dispatch_impl.cc b/starboard/shared/ffmpeg/ffmpeg_linked_dispatch_impl.cc
@@ -77,8 +77,13 @@ void LoadSymbols(FFMPEGDispatch* ffmpeg) {
   INITSYMBOL(avcodec_close);
   INITSYMBOL(avcodec_open2);
   INITSYMBOL(av_init_packet);
+#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 137, 100)
   INITSYMBOL(avcodec_decode_audio4);
   INITSYMBOL(avcodec_decode_video2);
+#else
+  INITSYMBOL(avcodec_send_packet);
+  INITSYMBOL(avcodec_receive_frame);
+#endif  // LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 137, 100)
   INITSYMBOL(avcodec_flush_buffers);
 #if LIBAVUTIL_VERSION_INT < LIBAVUTIL_VERSION_52_8
   INITSYMBOL(avcodec_alloc_frame);

diff --git a/starboard/shared/ffmpeg/ffmpeg_video_decoder_impl.cc b/starboard/shared/ffmpeg/ffmpeg_video_decoder_impl.cc
@@ -19,6 +19,8 @@
 
 #include <stdlib.h>
 
+#include <string>
+
 #include "starboard/common/string.h"
 #include "starboard/linux/shared/decode_target_internal.h"
 #include "starboard/thread.h"
@@ -270,54 +272,90 @@ bool VideoDecoderImpl<FFMPEG>::DecodePacket(AVPacket* packet) {
   } else {
     ffmpeg_->avcodec_get_frame_defaults(av_frame_);
   }
-  int frame_decoded = 0;
   int decode_result = 0;
 
   if (ffmpeg_->avcodec_version() < kAVCodecHasUniformDecodeAPI) {
+    // Old decode API.
+    int frame_decoded = 0;
     decode_result = ffmpeg_->avcodec_decode_video2(codec_context_, av_frame_,
                                                    &frame_decoded, packet);
-  } else {
-    decode_result = ffmpeg_->avcodec_receive_frame(codec_context_, av_frame_);
+    if (decode_result < 0) {
+      SB_DLOG(ERROR) << "avcodec_decode_video2() failed with result "
+                     << decode_result;
+      error_cb_(kSbPlayerErrorDecode,
+                FormatString("avcodec_decode_video2() failed with result %d.",
+                             decode_result));
+      error_occurred_ = true;
+      return false;
+    }
+
+    if (frame_decoded == 0) {
+      return false;
+    }
+
+    return ProcessDecodedFrame(*av_frame_);
   }
 
-  if (decode_result < 0) {
-    SB_DLOG(ERROR)
-        << "avcodec_decode_video2()/avcodec_receive_frame() failed with result "
-        << decode_result;
-    error_cb_(kSbPlayerErrorDecode,
-              FormatString("avcodec_decode_video2()/avcodec_receive_frame() "
-                           "failed with result %d.",
-                           decode_result));
-    error_occurred_ = true;
+  // Newer decode API.
+  const int send_packet_result =
+      ffmpeg_->avcodec_send_packet(codec_context_, packet);
+  if (send_packet_result != 0) {
+    const std::string error_message = FormatString(
+        "avcodec_send_packet() failed with result %d.", send_packet_result);
+    SB_DLOG(WARNING) << error_message;
+    error_cb_(kSbPlayerErrorDecode, error_message);
     return false;
   }
-  if (frame_decoded == 0) {
+
+  // Keep receiving frames until the decoder has processed the entire packet.
+  for (;;) {
+    decode_result = ffmpeg_->avcodec_receive_frame(codec_context_, av_frame_);
+    if (decode_result != 0) {
+      // We either hit an error or are done processing packet.
+      break;
+    }
+
+    if (!ProcessDecodedFrame(*av_frame_)) {
+      return false;
+    }
+  }
+
+  // A return value of AVERROR(EAGAIN) signifies that the decoder needs
+  // another packet, so we are done processing the existing packet at that
+  // point.
+  if (decode_result != AVERROR(EAGAIN)) {
+    SB_DLOG(WARNING) << "avcodec_receive_frame() failed with result: "
+                     << decode_result;
     return false;
   }
 
-  if (av_frame_->opaque == NULL) {
+  return true;
+}
+
+bool VideoDecoderImpl<FFMPEG>::ProcessDecodedFrame(const AVFrame& av_frame) {
+  if (av_frame.opaque == NULL) {
     SB_DLOG(ERROR) << "Video frame was produced yet has invalid frame data.";
     error_cb_(kSbPlayerErrorDecode,
               "Video frame was produced yet has invalid frame data.");
     error_occurred_ = true;
     return false;
   }
 
-  int codec_aligned_width = av_frame_->width;
-  int codec_aligned_height = av_frame_->height;
+  int codec_aligned_width = av_frame.width;
+  int codec_aligned_height = av_frame.height;
   int codec_linesize_align[AV_NUM_DATA_POINTERS];
   ffmpeg_->avcodec_align_dimensions2(codec_context_, &codec_aligned_width,
                                      &codec_aligned_height,
                                      codec_linesize_align);
 
-  int y_pitch = AlignUp(av_frame_->width, codec_linesize_align[0] * 2);
-  int uv_pitch = av_frame_->linesize[1];
+  int y_pitch = AlignUp(av_frame.width, codec_linesize_align[0] * 2);
+  int uv_pitch = av_frame.linesize[1];
 
   const int kBitDepth = 8;
   scoped_refptr<CpuVideoFrame> frame = CpuVideoFrame::CreateYV12Frame(
-      kBitDepth, av_frame_->width, av_frame_->height, y_pitch, uv_pitch,
-      av_frame_->reordered_opaque, av_frame_->data[0], av_frame_->data[1],
-      av_frame_->data[2]);
+      kBitDepth, av_frame.width, av_frame.height, y_pitch, uv_pitch,
+      av_frame.reordered_opaque, av_frame.data[0], av_frame.data[1],
+      av_frame.data[2]);
 
   bool result = true;
   if (output_mode_ == kSbPlayerOutputModeDecodeToTexture) {

diff --git a/starboard/shared/ffmpeg/ffmpeg_video_decoder_impl.h b/starboard/shared/ffmpeg/ffmpeg_video_decoder_impl.h
@@ -107,6 +107,12 @@ class VideoDecoderImpl<FFMPEG> : public VideoDecoder {
 
   void UpdateDecodeTarget_Locked(const scoped_refptr<CpuVideoFrame>& frame);
 
+  // Processes a decoded video frame received from FFmpeg. The frame is
+  // ultimately passed to decoder_status_cb_.
+  //
+  // Returns false if the frame contains invalid data.
+  bool ProcessDecodedFrame(const AVFrame& av_frame);
+
   FFMPEGDispatch* ffmpeg_;
 
   // |video_codec_| will be initialized inside ctor and won't be changed during